├── .gitignore ├── Makefile.inc ├── README.md ├── SVF-all.patch ├── SVF-contextdda-fix.patch ├── SVF-node-allocator.patch ├── SVF-padded-vtables.patch ├── aflpp-link-safe.c ├── bin ├── wrap-gclang ├── wrap-gclang++ └── wrap_gclang.py ├── build.sh ├── clean_remake.sh ├── install_svf.sh ├── passes ├── Makefile ├── Makefile.inc ├── Makefile.svf.inc ├── add-sanitize-attr │ ├── Makefile │ └── add-sanitize-attr.cpp ├── cgc-planner │ ├── Makefile │ └── cgc-planner.cpp ├── cgc │ ├── Makefile │ ├── cgc.cpp │ └── cgc_old.cpp ├── dump-call-tree │ ├── Makefile │ └── dump-call-tree.cpp ├── dump-calls │ ├── Makefile │ └── dump-calls.cpp ├── dump-extlib │ ├── Makefile │ └── dump-extlib.cpp ├── func-stats │ ├── Makefile │ └── func-stats.cpp ├── icp │ ├── Makefile │ └── icp.cpp ├── include │ ├── common │ │ ├── cgc_magics.h │ │ └── pass.h │ ├── sdag │ │ ├── sdag-print.h │ │ └── sdag.h │ └── svfa │ │ └── SVFAPass.h └── set-norecurse-ext │ ├── Makefile │ └── set-norecurse-ext.cpp ├── remake.sh └── tests ├── driver.c ├── driver.cc ├── opt └── test ├── build.sh └── target.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.bc 2 | *.bcc 3 | *.so 4 | *.pdf 5 | *.ll 6 | *.o 7 | *.resolution.txt 8 | *.S 9 | binutils*/ 10 | *.csv 11 | *.png 12 | *.out 13 | /setup.sh 14 | SVF/ 15 | SVF2/ 16 | llvm-9/ 17 | *.taint 18 | dft.log 19 | dfsan_abilist.txt 20 | *.dot 21 | callgrind.* 22 | cachegrind.* 23 | *.function.list 24 | *.color 25 | *.txt 26 | *.indent 27 | *.log 28 | .vscode/ 29 | benchmarks/* 30 | *.svg 31 | .DS_Store 32 | -------------------------------------------------------------------------------- /Makefile.inc: -------------------------------------------------------------------------------- 1 | V?=0 2 | ifneq ($V,0) 3 | QUIET= 4 | ECHO:=@\# 5 | QMAKE=VERBOSE=1 make 6 | else 7 | QUIET= @ 8 | ECHO= echo 9 | QMAKE=make -s 10 | endif 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Predictive Context-sensitive Fuzzing 2 | 3 | This repository hosts the code for the paper [Predictive Context-sensitive Fuzzing](https://www.ndss-symposium.org/ndss-paper/predictive-context-sensitive-fuzzing/) appeared at NDSS 2024. 4 | 5 | ### Getting started 6 | 7 | Install the dependencies with: 8 | ```bash 9 | # install the dependencies 10 | $ apt-get update && \ 11 | apt-get install -y wget libstdc++-5-dev libtool-bin automake flex bison \ 12 | libglib2.0-dev libpixman-1-dev python3-setuptools unzip \ 13 | apt-utils apt-transport-https ca-certificates \ 14 | binutils 15 | 16 | # install llvm-10 17 | $ apt install -y lsb-release wget software-properties-common && wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh && ./llvm.sh 10 18 | 19 | # Download and install the latest stable Go (for gllvm) 20 | $ wget https://storage.googleapis.com/golang/getgo/installer_linux && \ 21 | chmod +x ./installer_linux && \ 22 | ./installer_linux 23 | $ export PATH=$PATH:$HOME/.go/bin:/go/bin 24 | 25 | # Download and compile afl++ of 08/2020. 26 | $ git clone https://github.com/AFLplusplus/AFLplusplus.git ./afl && \ 27 | cd ./afl && \ 28 | git checkout 2e15661f184c77ac1fbb6f868c894e946cbb7f17 29 | 30 | # Build without Python support as we don't need it. 31 | # Set AFL_NO_X86 to skip flaky tests. 32 | $ cd ./afl && unset CFLAGS && unset CXXFLAGS && \ 33 | export CC=clang && export AFL_NO_X86=1 && \ 34 | PYTHON_INCLUDE=/ make LLVM_CONFIG=llvm-config-10 && make install 35 | 36 | # Build the AFL wrapper with gclang 37 | wget https://raw.githubusercontent.com/llvm/llvm-project/5feb80e748924606531ba28c97fe65145c65372e/compiler-rt/lib/fuzzer/afl/afl_driver.cpp -O afl_driver.cpp 38 | clang++-10 -std=c++11 -O2 -c afl_driver.cpp 39 | ar r libAFLDriver.a afl_driver.o 40 | gclang++ -std=c++11 -O2 -c afl_driver.cpp -o afl_driver_gclang.o 41 | ar r libAFLDriverGclang.a afl_driver_gclang.o 42 | ``` 43 | 44 | Build the function cloning passes with: 45 | ```bash 46 | $ export LLVM_DIR="/usr/lib/llvm-10" # or the llvm-10 path 47 | $ ./build.sh 48 | ``` 49 | 50 | And compile a harness with the drop-in wrapper that we provide in the `bin` folder: `wrap_gclang` automatically runs the needed passes. 51 | 52 | To set up a correct env for the build process, do the following steps (`OUT` is your build output directory, we follow the [FuzzBench envs](https://google.github.io/fuzzbench/getting-started/adding-a-new-fuzzer/#what-is-fuzzer_lib)): 53 | 54 | ```bash 55 | export CC=./afl/afl-clang-fast 56 | export CXX=./afl/afl-clang-fast++ 57 | export FUZZER_LIB=./afl/libAFLDriverGclang.a 58 | 59 | export AFL_LLVM_DICT2FILE=$OUT/afl++.dict 60 | 61 | export AFL_QUIET=1 62 | export AFL_MAP_SIZE=2621440 63 | 64 | export REAL_CC_PATH=$CC 65 | export REAL_CXX_PATH=$CXX 66 | export CC=./bin/wrap-gclang 67 | export CXX=./bin/wrap-gclang++ 68 | 69 | export LLVM_BITCODE_GENERATION_FLAGS=-flto 70 | export WLLVM_OUTPUT_LEVEL=ERROR 71 | ``` 72 | 73 | You can tune the env `CGC_STRATEGY` to change prioritization strategy (default is dataflow) and `CGC_MAXMAP` to enlarge the max map size. 74 | 75 | Now you can compile your target simply using CC/CXX and link with: 76 | 77 | ```bash 78 | $CXX yourfiles.o[...] $FUZZER_LIB -o youroutput.bin 79 | ``` 80 | 81 | If you want sanitization, we suggest adding `-O1 -fsanitize=address -fsanitize=array-bounds,bool,builtin,enum,float-divide-by-zero,function,integer-divide-by-zero,null,object-size,return,returns-nonnull-attribute,shift,signed-integer-overflow,unreachable,vla-bound,vptr`. 82 | 83 | The last step is just to [fuzz with AFL++](https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/fuzzing_in_depth.md#a-running-afl-fuzz), we suggest using a CmpLog-instrumented binary in addition. 84 | 85 | ### Cite 86 | ``` 87 | @inproceedings{pred-ctx-fuzz, 88 | author = {Borrello, Pietro and Fioraldi, Andrea and D'Elia, Daniele Cono and Balzarotti, Davide and Querzoni, Leonardo and Giuffrida, Cristiano}, 89 | title = {Predictive Context-sensitive Fuzzing}, 90 | year = {2024}, 91 | booktitle = {Network and Distributed System Security Symposium (NDSS)} 92 | } 93 | ``` 94 | -------------------------------------------------------------------------------- /SVF-all.patch: -------------------------------------------------------------------------------- 1 | From 12ede5e903bd806c984217b0fc37f873f9718248 Mon Sep 17 00:00:00 2001 2 | From: Pietro Borrello 3 | Date: Fri, 28 May 2021 13:44:45 +0200 4 | Subject: Fix handling of padded vtables (common in asan builds) 5 | 6 | --- 7 | lib/SVF-FE/CHG.cpp | 28 ++++++++++++++++++++++++++-- 8 | 1 file changed, 26 insertions(+), 2 deletions(-) 9 | 10 | diff --git a/lib/SVF-FE/CHG.cpp b/lib/SVF-FE/CHG.cpp 11 | index 00bfbcf..fef1902 100644 12 | --- a/lib/SVF-FE/CHG.cpp 13 | +++ b/lib/SVF-FE/CHG.cpp 14 | @@ -128,7 +128,19 @@ void CHGraph::buildCHGNodes(const GlobalValue *globalvalue) 15 | 16 | for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei) 17 | { 18 | - const ConstantArray *vtbl = SVFUtil::dyn_cast(vtblStruct->getOperand(ei)); 19 | + Constant *operand = vtblStruct->getOperand(ei); 20 | + // Sometimes ASAN adds padding to vtable by embedding them in structs 21 | + // so we should check and unpack them 22 | + if (!SVFUtil::isa(operand)) { 23 | + ConstantStruct *opStruct = SVFUtil::dyn_cast(operand); 24 | + if(!opStruct) { 25 | + // We should skip handling the padding, in the form of an array 26 | + assert(SVFUtil::isa(operand->getType())); 27 | + continue; 28 | + } 29 | + operand = opStruct->getOperand(0); 30 | + } 31 | + const ConstantArray *vtbl = SVFUtil::dyn_cast(operand); 32 | assert(vtbl && "Element of initializer not an array?"); 33 | for (u32_t i = 0; i < vtbl->getNumOperands(); ++i) 34 | { 35 | @@ -434,8 +446,20 @@ void CHGraph::analyzeVTables(const Module &M) 36 | 37 | for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei) 38 | { 39 | + Constant *operand = vtblStruct->getOperand(ei); 40 | + // Sometimes ASAN adds padding to vtable by embedding them in structs 41 | + // so we should check and unpack them 42 | + if (!SVFUtil::isa(operand)) { 43 | + ConstantStruct *opStruct = SVFUtil::dyn_cast(operand); 44 | + if(!opStruct) { 45 | + // We should skip handling the padding, in the form of an array 46 | + assert(SVFUtil::isa(operand->getType())); 47 | + continue; 48 | + } 49 | + operand = opStruct->getOperand(0); 50 | + } 51 | const ConstantArray *vtbl = 52 | - SVFUtil::dyn_cast(vtblStruct->getOperand(ei)); 53 | + SVFUtil::dyn_cast(operand); 54 | assert(vtbl && "Element of initializer not an array?"); 55 | 56 | /* 57 | -- 58 | 2.17.1 59 | 60 | 61 | From eebe3d824cb29455732e9d7dac9911bc9711efde Mon Sep 17 00:00:00 2001 62 | From: Pietro Borrello 63 | Date: Fri, 28 May 2021 13:49:33 +0200 64 | Subject: contextDDA: add check on NULL refVal in isHeapCondMemObj 65 | 66 | --- 67 | lib/DDA/ContextDDA.cpp | 12 +++++++++++- 68 | 1 file changed, 11 insertions(+), 1 deletion(-) 69 | 70 | diff --git a/lib/DDA/ContextDDA.cpp b/lib/DDA/ContextDDA.cpp 71 | index 6b37821..b53d1e0 100644 72 | --- a/lib/DDA/ContextDDA.cpp 73 | +++ b/lib/DDA/ContextDDA.cpp 74 | @@ -316,7 +316,17 @@ bool ContextDDA::isHeapCondMemObj(const CxtVar& var, const StoreSVFGNode*) 75 | assert(mem && "memory object is null??"); 76 | if(mem->isHeap()) 77 | { 78 | - if(const Instruction* mallocSite = SVFUtil::dyn_cast(mem->getRefVal())) 79 | + if (!mem->getRefVal()) { 80 | + PAGNode *pnode = _pag->getPAGNode(getPtrNodeID(var)); 81 | + if(GepObjPN* gepobj = SVFUtil::dyn_cast(pnode)) { 82 | + assert(SVFUtil::isa(_pag->getPAGNode(gepobj->getBaseNode())) && "emtpy refVal in a gep object whose base is a non-dummy object"); 83 | + } 84 | + else { 85 | + assert((SVFUtil::isa(pnode) || SVFUtil::isa(pnode)) && "empty refVal in non-dummy object"); 86 | + } 87 | + return true; 88 | + } 89 | + else if(const Instruction* mallocSite = SVFUtil::dyn_cast(mem->getRefVal())) 90 | { 91 | const Function* fun = mallocSite->getFunction(); 92 | const SVFFunction* svfFun = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(fun); 93 | -- 94 | 2.17.1 95 | 96 | 97 | From c5be7f023f4456eaacd917df9c44f58956feb516 Mon Sep 17 00:00:00 2001 98 | From: Pietro Borrello 99 | Date: Fri, 28 May 2021 13:57:46 +0200 100 | Subject: NodeIDAllocator: set Strategy::SEQ as the default 101 | 102 | --- 103 | lib/Util/Options.cpp | 2 +- 104 | 1 file changed, 1 insertion(+), 1 deletion(-) 105 | 106 | diff --git a/lib/Util/Options.cpp b/lib/Util/Options.cpp 107 | index ac71de5..495a317 100644 108 | --- a/lib/Util/Options.cpp 109 | +++ b/lib/Util/Options.cpp 110 | @@ -14,7 +14,7 @@ namespace SVF 111 | 112 | const llvm::cl::opt Options::NodeAllocStrat( 113 | "node-alloc-strat", 114 | - llvm::cl::init(NodeIDAllocator::Strategy::DEBUG), 115 | + llvm::cl::init(NodeIDAllocator::Strategy::SEQ), 116 | llvm::cl::desc("Method of allocating (LLVM) values and memory objects as node IDs"), 117 | llvm::cl::values( 118 | clEnumValN(NodeIDAllocator::Strategy::DENSE, "dense", "allocate objects together and values together, separately (default)"), 119 | -- 120 | 2.17.1 121 | 122 | -------------------------------------------------------------------------------- /SVF-contextdda-fix.patch: -------------------------------------------------------------------------------- 1 | From eebe3d824cb29455732e9d7dac9911bc9711efde Mon Sep 17 00:00:00 2001 2 | From: Pietro Borrello 3 | Date: Fri, 28 May 2021 13:49:33 +0200 4 | Subject: contextDDA: add check on NULL refVal in isHeapCondMemObj 5 | 6 | --- 7 | lib/DDA/ContextDDA.cpp | 12 +++++++++++- 8 | 1 file changed, 11 insertions(+), 1 deletion(-) 9 | 10 | diff --git a/lib/DDA/ContextDDA.cpp b/lib/DDA/ContextDDA.cpp 11 | index 6b37821..b53d1e0 100644 12 | --- a/lib/DDA/ContextDDA.cpp 13 | +++ b/lib/DDA/ContextDDA.cpp 14 | @@ -316,7 +316,17 @@ bool ContextDDA::isHeapCondMemObj(const CxtVar& var, const StoreSVFGNode*) 15 | assert(mem && "memory object is null??"); 16 | if(mem->isHeap()) 17 | { 18 | - if(const Instruction* mallocSite = SVFUtil::dyn_cast(mem->getRefVal())) 19 | + if (!mem->getRefVal()) { 20 | + PAGNode *pnode = _pag->getPAGNode(getPtrNodeID(var)); 21 | + if(GepObjPN* gepobj = SVFUtil::dyn_cast(pnode)) { 22 | + assert(SVFUtil::isa(_pag->getPAGNode(gepobj->getBaseNode())) && "emtpy refVal in a gep object whose base is a non-dummy object"); 23 | + } 24 | + else { 25 | + assert((SVFUtil::isa(pnode) || SVFUtil::isa(pnode)) && "empty refVal in non-dummy object"); 26 | + } 27 | + return true; 28 | + } 29 | + else if(const Instruction* mallocSite = SVFUtil::dyn_cast(mem->getRefVal())) 30 | { 31 | const Function* fun = mallocSite->getFunction(); 32 | const SVFFunction* svfFun = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(fun); 33 | -- 34 | 2.17.1 35 | 36 | -------------------------------------------------------------------------------- /SVF-node-allocator.patch: -------------------------------------------------------------------------------- 1 | From c5be7f023f4456eaacd917df9c44f58956feb516 Mon Sep 17 00:00:00 2001 2 | From: Pietro Borrello 3 | Date: Fri, 28 May 2021 13:57:46 +0200 4 | Subject: NodeIDAllocator: set Strategy::SEQ as the default 5 | 6 | --- 7 | lib/Util/Options.cpp | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/lib/Util/Options.cpp b/lib/Util/Options.cpp 11 | index ac71de5..495a317 100644 12 | --- a/lib/Util/Options.cpp 13 | +++ b/lib/Util/Options.cpp 14 | @@ -14,7 +14,7 @@ namespace SVF 15 | 16 | const llvm::cl::opt Options::NodeAllocStrat( 17 | "node-alloc-strat", 18 | - llvm::cl::init(NodeIDAllocator::Strategy::DEBUG), 19 | + llvm::cl::init(NodeIDAllocator::Strategy::SEQ), 20 | llvm::cl::desc("Method of allocating (LLVM) values and memory objects as node IDs"), 21 | llvm::cl::values( 22 | clEnumValN(NodeIDAllocator::Strategy::DENSE, "dense", "allocate objects together and values together, separately (default)"), 23 | -- 24 | 2.17.1 25 | 26 | -------------------------------------------------------------------------------- /SVF-padded-vtables.patch: -------------------------------------------------------------------------------- 1 | From 12ede5e903bd806c984217b0fc37f873f9718248 Mon Sep 17 00:00:00 2001 2 | From: Pietro Borrello 3 | Date: Fri, 28 May 2021 13:44:45 +0200 4 | Subject: Fix handling of padded vtables (common in asan builds) 5 | 6 | --- 7 | lib/SVF-FE/CHG.cpp | 28 ++++++++++++++++++++++++++-- 8 | 1 file changed, 26 insertions(+), 2 deletions(-) 9 | 10 | diff --git a/lib/SVF-FE/CHG.cpp b/lib/SVF-FE/CHG.cpp 11 | index 00bfbcf..fef1902 100644 12 | --- a/lib/SVF-FE/CHG.cpp 13 | +++ b/lib/SVF-FE/CHG.cpp 14 | @@ -128,7 +128,19 @@ void CHGraph::buildCHGNodes(const GlobalValue *globalvalue) 15 | 16 | for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei) 17 | { 18 | - const ConstantArray *vtbl = SVFUtil::dyn_cast(vtblStruct->getOperand(ei)); 19 | + Constant *operand = vtblStruct->getOperand(ei); 20 | + // Sometimes ASAN adds padding to vtable by embedding them in structs 21 | + // so we should check and unpack them 22 | + if (!SVFUtil::isa(operand)) { 23 | + ConstantStruct *opStruct = SVFUtil::dyn_cast(operand); 24 | + if(!opStruct) { 25 | + // We should skip handling the padding, in the form of an array 26 | + assert(SVFUtil::isa(operand->getType())); 27 | + continue; 28 | + } 29 | + operand = opStruct->getOperand(0); 30 | + } 31 | + const ConstantArray *vtbl = SVFUtil::dyn_cast(operand); 32 | assert(vtbl && "Element of initializer not an array?"); 33 | for (u32_t i = 0; i < vtbl->getNumOperands(); ++i) 34 | { 35 | @@ -434,8 +446,20 @@ void CHGraph::analyzeVTables(const Module &M) 36 | 37 | for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei) 38 | { 39 | + Constant *operand = vtblStruct->getOperand(ei); 40 | + // Sometimes ASAN adds padding to vtable by embedding them in structs 41 | + // so we should check and unpack them 42 | + if (!SVFUtil::isa(operand)) { 43 | + ConstantStruct *opStruct = SVFUtil::dyn_cast(operand); 44 | + if(!opStruct) { 45 | + // We should skip handling the padding, in the form of an array 46 | + assert(SVFUtil::isa(operand->getType())); 47 | + continue; 48 | + } 49 | + operand = opStruct->getOperand(0); 50 | + } 51 | const ConstantArray *vtbl = 52 | - SVFUtil::dyn_cast(vtblStruct->getOperand(ei)); 53 | + SVFUtil::dyn_cast(operand); 54 | assert(vtbl && "Element of initializer not an array?"); 55 | 56 | /* 57 | -- 58 | 2.17.1 59 | 60 | -------------------------------------------------------------------------------- /aflpp-link-safe.c: -------------------------------------------------------------------------------- 1 | __attribute__((weak)) unsigned int * __afl_fuzz_len; 2 | __attribute__((weak)) unsigned char *__afl_fuzz_ptr; 3 | __attribute__((weak)) int __afl_persistent_loop(unsigned int x) { return 0; } 4 | __attribute__((weak)) void __afl_manual_init() {} 5 | -------------------------------------------------------------------------------- /bin/wrap-gclang: -------------------------------------------------------------------------------- 1 | wrap_gclang.py -------------------------------------------------------------------------------- /bin/wrap-gclang++: -------------------------------------------------------------------------------- 1 | wrap_gclang.py -------------------------------------------------------------------------------- /bin/wrap_gclang.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import subprocess 4 | import shutil 5 | import json 6 | import sys 7 | import os 8 | 9 | import errno 10 | from hashlib import sha256 11 | from tempfile import gettempdir 12 | from time import time, sleep 13 | 14 | class ILockException(Exception): 15 | pass 16 | 17 | class ILock(object): 18 | def __init__(self, name, timeout=None, check_interval=0.25, reentrant=False, lock_directory=None): 19 | self._timeout = timeout if timeout is not None else 10 ** 8 20 | self._check_interval = check_interval 21 | 22 | lock_directory = gettempdir() if lock_directory is None else lock_directory 23 | unique_token = sha256(name.encode()).hexdigest() 24 | self._filepath = os.path.join(lock_directory, 'ilock-' + unique_token + '.lock') 25 | 26 | self._reentrant = reentrant 27 | 28 | self._enter_count = 0 29 | 30 | def lock(self): 31 | import portalocker 32 | 33 | if self._enter_count > 0: 34 | if self._reentrant: 35 | self._enter_count += 1 36 | return self 37 | raise ILockException('Trying re-enter a non-reentrant lock') 38 | 39 | current_time = call_time = time() 40 | while call_time + self._timeout >= current_time: 41 | self._lockfile = open(self._filepath, 'w') 42 | try: 43 | portalocker.lock(self._lockfile, portalocker.constants.LOCK_NB | portalocker.constants.LOCK_EX) 44 | self._enter_count = 1 45 | return self 46 | except portalocker.exceptions.LockException: 47 | pass 48 | 49 | current_time = time() 50 | check_interval = self._check_interval if self._timeout > self._check_interval else self._timeout 51 | sleep(check_interval) 52 | 53 | raise ILockException('Timeout was reached') 54 | 55 | def __enter__(self): 56 | return self.lock() 57 | 58 | def unlock(self): 59 | self._enter_count -= 1 60 | 61 | if self._enter_count > 0: 62 | return 63 | 64 | if sys.platform.startswith('linux'): 65 | # In Linux you can delete a locked file 66 | os.unlink(self._filepath) 67 | 68 | self._lockfile.close() 69 | 70 | if sys.platform == 'win32': 71 | # In Windows you need to unlock a file before deletion 72 | try: 73 | os.remove(self._filepath) 74 | except WindowsError as e: 75 | # Mute exception in case an access was already acquired (EACCES) 76 | # and in more rare case when it was even already released and file was deleted (ENOENT) 77 | if e.errno not in [errno.EACCES, errno.ENOENT]: 78 | raise 79 | 80 | def __exit__(self, exc_type, exc_val, exc_tb): 81 | return self.unlock() 82 | 83 | SOURCE_EXTENSIONS = ('.c', '.cc', '.cpp', '.h', 84 | '.hpp') 85 | FILTER_EXTENSIONS = ('.c', '.cc', '.cpp', '.h', 86 | '.hpp', '.o', '.obj', '.a', '.la') 87 | 88 | script_dir = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) 89 | 90 | is_cxx = "++" in sys.argv[0] 91 | 92 | is_debug = os.getenv("WRAP_GCLANG_DEBUG") is not None 93 | keep_symbols = os.getenv("CGC_KEEP_SYMBOLS") is not None 94 | compiler_path = os.getenv("LLVM_COMPILER_PATH") 95 | benchmark = os.getenv("BENCHMARK") 96 | fuzzer = os.getenv("FUZZER") 97 | experiment = os.getenv("EXPERIMENT", 'noexp') 98 | 99 | fuzz_programs = [] 100 | fuzz_target = os.getenv("FUZZ_TARGET") 101 | # ffmpeg_ffmpeg_demuxer_fuzzer first compiles `tools/target_dem_fuzzer` and then moves it to /out/ffmpeg_DEMUXER_fuzzer 102 | if fuzz_target is not None and benchmark == 'ffmpeg_ffmpeg_demuxer_fuzzer' and 'ffmpeg_DEMUXER_fuzzer' in fuzz_target: 103 | fuzz_target = fuzz_target.replace('ffmpeg_DEMUXER_fuzzer', 'target_dem_fuzzer') 104 | 105 | if fuzz_target is not None: 106 | fuzz_programs.append(os.path.basename(fuzz_target)) 107 | if os.getenv("FUZZ_PROGRAMS") is not None: 108 | fuzz_programs += list(map(lambda x: x.strip(), os.getenv("FUZZ_PROGRAMS").split(","))) 109 | 110 | configure_only = os.getenv('WLLVM_CONFIGURE_ONLY') 111 | 112 | def get_string(s): 113 | res = '' 114 | for ss in s: 115 | res += chr(ss - 1) 116 | return res 117 | 118 | def get_stats(filename): 119 | if os.getenv("OPT_PATH"): 120 | opt_name = os.environ["OPT_PATH"] 121 | elif compiler_path is not None: 122 | opt_name = os.path.join(compiler_path, "opt") 123 | else: 124 | opt_name = "opt" 125 | out = subprocess.check_output("%s -load=%s/func-stats.so -func-stats %s -o /dev/null" % (opt_name, script_dir, filename), shell=True).decode() 126 | assert('Num functions: ' in out and 'Num BBs : ' in out and 'AFL edges : ' in out) 127 | num_funcs = int(out.split('Num functions: ')[1].split('\n')[0]) 128 | num_bb = int(out.split('Num BBs : ')[1].split('\n')[0]) 129 | afl_edges = int(out.split('AFL edges : ')[1].split('\n')[0]) 130 | return num_funcs, num_bb, afl_edges 131 | 132 | def get_filesize(file): 133 | try: 134 | return os.path.getsize(file) 135 | except OSError: 136 | return 0 137 | 138 | def log_stats(filename): 139 | strategy = os.getenv("CGC_STRATEGY") 140 | type = "icp" if os.getenv("FORCE_ICP") else "noicp" 141 | bc = os.path.basename(filename) 142 | num_funcs, num_bb, afl_edges = get_stats(filename) 143 | filesize = get_filesize(filename) 144 | data = 'stats,type=%s experiment="%s",benchmark="%s",fuzzer="%s",bc="%s",strategy="%s",num_functions=%di,num_bb=%di,afl_edges=%di,size=%di' % (type, experiment, benchmark, fuzzer, bc, strategy, num_funcs, num_bb, afl_edges, filesize) 145 | 146 | def log_msg(filename, msg): 147 | strategy = os.getenv("CGC_STRATEGY") 148 | type = "icp" if os.getenv("FORCE_ICP") else "noicp" 149 | bc = os.path.basename(filename) 150 | data = 'msgs,type=%s experiment="%s",benchmark="%s",fuzzer="%s",file="%s",strategy="%s",msg="%s"' % (type, experiment, benchmark, fuzzer, bc, strategy, msg) 151 | 152 | # gclang does not forward optimization flags to the linking step, so -fsanitize=object-size 153 | # will lead to a warning on missing optimizations when compiling. 154 | # This is usually safe, but will make fail some ./configure scripts 155 | def filter_objsan(args): 156 | for i, arg in enumerate(args): 157 | if arg.startswith('-fsanitize='): 158 | args[i] = args[i].replace('object-size,', '') # if not last 159 | args[i] = args[i].replace(',object-size', '') # if last 160 | if '-fsanitize=object-size' in args: args.remove('-fsanitize=object-size') # if alone 161 | 162 | def gclang_exec(args, capture_output=False): 163 | if os.getenv("GCLANG_PATH"): 164 | cc_name = os.environ["GCLANG_PATH"] 165 | else: 166 | cc_name = "gclang" 167 | if is_cxx: 168 | if os.getenv("GCLANGXX_PATH"): 169 | cc_name = os.environ["GCLANGXX_PATH"] 170 | else: 171 | cc_name = "gclang++" 172 | argv = [cc_name] + args 173 | if is_debug: 174 | print(" ".join(argv), file=sys.stderr) 175 | if capture_output: 176 | return subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 177 | else: 178 | return subprocess.run(argv) 179 | 180 | 181 | def cc_exec(args, capture_output=False): 182 | if os.getenv("REAL_CC_PATH"): 183 | cc_name = os.environ["REAL_CC_PATH"] 184 | elif compiler_path is not None: 185 | cc_name = os.path.join(compiler_path, "clang") 186 | else: 187 | cc_name = "clang" 188 | if is_cxx: 189 | if os.getenv("REAL_CXX_PATH"): 190 | cc_name = os.environ["REAL_CXX_PATH"] 191 | elif compiler_path is not None: 192 | cc_name = os.path.join(compiler_path, "clang++") 193 | else: 194 | cc_name = "clang++" 195 | argv = [cc_name] + args 196 | if is_debug: 197 | print(" ".join(argv), file=sys.stderr) 198 | if capture_output: 199 | return subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 200 | else: 201 | return subprocess.run(argv) 202 | 203 | 204 | def opt_exec(args, capture_output=False, check_ret=True, wrapper_cmd=None, save_output=False, save_input=False): 205 | if os.getenv("OPT_PATH"): 206 | cc_name = os.environ["OPT_PATH"] 207 | elif compiler_path is not None: 208 | cc_name = os.path.join(compiler_path, "opt") 209 | else: 210 | cc_name = "opt" 211 | argv = [cc_name] + args 212 | if wrapper_cmd is not None: 213 | argv = wrapper_cmd + argv 214 | if is_debug: 215 | print(" ".join(argv), file=sys.stderr) 216 | # ugly docker debug 217 | if os.path.exists('/host_tmp'): 218 | os.system("cp %s /host_tmp" % args[-1]) 219 | if save_input: 220 | os.system('cp %s %s' % (argv[-1], os.getenv('OUT', '/tmp/'))) 221 | if capture_output: 222 | ret = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 223 | else: 224 | ret = subprocess.run(argv) 225 | if check_ret: 226 | assert(ret.returncode == 0) 227 | # ugly docker debug 228 | if ret.returncode == 0 and os.path.exists('/host_tmp'): 229 | os.system("cp %s /host_tmp" % args[-2]) 230 | if save_output: 231 | os.system('cp %s %s' % (argv[-2], os.getenv('OUT', '/tmp/'))) 232 | return ret 233 | 234 | def extract_exec(args, capture_output=False, check_ret=True): 235 | if os.getenv("EXTRACT_PATH"): 236 | ext_name = os.environ["EXTRACT_PATH"] 237 | elif compiler_path is not None: 238 | ext_name = os.path.join(compiler_path, "llvm-extract") 239 | else: 240 | ext_name = "llvm-extract" 241 | argv = [ext_name] + args 242 | if is_debug: 243 | print(" ".join(argv), file=sys.stderr) 244 | if capture_output: 245 | ret = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 246 | else: 247 | ret = subprocess.run(argv) 248 | if check_ret: 249 | assert(ret.returncode == 0) 250 | return ret 251 | 252 | def link_exec(args, capture_output=False, check_ret=True): 253 | if os.getenv("LINK_PATH"): 254 | tool_name = os.environ["LINK_PATH"] 255 | elif compiler_path is not None: 256 | tool_name = os.path.join(compiler_path, "llvm-link") 257 | else: 258 | tool_name = "llvm-link" 259 | argv = [tool_name] + args 260 | if is_debug: 261 | print(" ".join(argv), file=sys.stderr) 262 | if capture_output: 263 | ret = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 264 | else: 265 | ret = subprocess.run(argv) 266 | if check_ret: 267 | assert(ret.returncode == 0) 268 | return ret 269 | 270 | def strip_exec(args): 271 | if os.getenv("STRIP_PATH"): 272 | tool_name = os.environ["STRIP_PATH"] 273 | elif compiler_path is not None: 274 | tool_name = os.path.join(compiler_path, "llvm-strip") 275 | else: 276 | tool_name = "llvm-strip" 277 | argv = [tool_name] + args 278 | subprocess.check_call(argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 279 | 280 | def get_bc(filename, bc_filename=None, strict_mode=False, capture_output=False): 281 | if bc_filename is None: 282 | bc_filename = filename + '.bc' 283 | if os.getenv("GETBC_PATH"): 284 | cc_name = os.environ["GETBC_PATH"] 285 | else: 286 | cc_name = "get-bc" 287 | argv = ['get-bc', '-b', '-o', bc_filename] 288 | if strict_mode: 289 | argv.append('-S') 290 | argv.append(filename) 291 | if is_debug: 292 | print(" ".join(argv), file=sys.stderr) 293 | if capture_output: 294 | return subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 295 | else: 296 | return subprocess.run(argv) 297 | 298 | 299 | def common_opts(): 300 | return [ 301 | # BEWARE: we need to insert debug info to be able to properly extract libraries 302 | # based on the file path 303 | "-g", 304 | "-fno-function-sections", 305 | '-fno-unique-section-names', 306 | "-funroll-loops", 307 | # "-fno-discard-value-names", 308 | ] 309 | 310 | 311 | def cc_mode(): 312 | old_args = sys.argv[1:] 313 | filter_objsan(old_args) 314 | 315 | args = common_opts() 316 | have_o = False 317 | for arg in old_args: 318 | if arg.startswith('-O'): 319 | have_o = True 320 | if not arg == '-ffunction-sections': 321 | args.append(arg) 322 | if not have_o: 323 | args = ['-O3'] + args 324 | 325 | assert(gclang_exec(args).returncode == 0) 326 | 327 | def add_afl_symbols(outfile): 328 | subprocess.check_call("echo '__afl_persistent_loop' >> %s" % outfile, shell=True) 329 | subprocess.check_call("echo '__afl_manual_init' >> %s" % outfile, shell=True) 330 | subprocess.check_call("echo '__afl_fuzz_len' >> %s" % outfile, shell=True) 331 | subprocess.check_call("echo '__afl_fuzz_ptr' >> %s" % outfile, shell=True) 332 | 333 | def gen_whitelist(): 334 | bench_name = os.getenv("BENCHMARK") 335 | assert bench_name 336 | # Get the project name from the benchmark name 337 | proj_name = bench_name.split("_")[0].split("-")[0] 338 | fuzzbench_src = os.getenv("SRC") 339 | assert fuzzbench_src 340 | # match any path containing the project name, and strictly the SRC directory 341 | return "{},^{}$".format(proj_name, fuzzbench_src) 342 | 343 | def get_cache_size(index): 344 | cache_size = open('/sys/devices/system/cpu/cpu0/cache/index{}/size'.format(index)).read().strip() 345 | if cache_size[-1] == 'K' or cache_size[-1] == 'k': 346 | return int(cache_size[:-1]) * 1024 347 | elif cache_size[-1] == 'M' or cache_size[-1] == 'm': 348 | return int(cache_size[:-1]) * 1024 * 1024 349 | else: 350 | # Do not expect Gb sized caches in the near future :( 351 | assert (cache_size.isdecimal()) 352 | return int(cache_size) 353 | 354 | def get_map_limit(): 355 | if os.getenv("CGC_MAXMAP") is None: 356 | return get_cache_size(os.getenv("CGC_CACHEDMAP", "2")) 357 | else: 358 | return int(os.getenv("CGC_MAXMAP")) 359 | 360 | # gclang fails extracting the bitcode for source files that are inside a linker group 361 | # so extract them, plus extract also `-o output` if it is in the linker group 362 | def fix_linker_groups(args): 363 | last_group = 0 364 | # search all occurrences, a ValueError will end the search 365 | while True: 366 | try: 367 | #search for linker groups 368 | last_group = args.index('-Wl,--start-group', last_group) 369 | end_group = args.index('-Wl,--end-group', last_group) 370 | idx = last_group + 1 371 | while idx < end_group: 372 | arg = args[idx] 373 | if arg.endswith(SOURCE_EXTENSIONS): 374 | args.insert(last_group, args.pop(idx)) 375 | last_group += 1 376 | elif arg == '-o': 377 | # pop both the `-o` and the param 378 | args.insert(last_group, args.pop(idx)) 379 | args.insert(last_group+1, args.pop(idx+1)) 380 | last_group += 2 381 | else: 382 | idx += 1 383 | last_group = end_group + 1 384 | 385 | except ValueError: 386 | return 387 | 388 | def ld_mode(): 389 | old_args = sys.argv[1:] 390 | filter_objsan(old_args) 391 | 392 | args = common_opts() + ['-Wl,--allow-multiple-definition'] 393 | linker_args = common_opts() + [os.path.join(script_dir, 'aflpp-link-safe.o')]#, '-lrt', '-pthread'] 394 | 395 | outname = None 396 | 397 | have_o = False 398 | opt_level = None 399 | have_std = [] 400 | filtereds = [] 401 | i = 0 402 | while i < len(old_args): 403 | if old_args[i].startswith('-O'): 404 | have_o = True 405 | opt_level = old_args[i] 406 | if old_args[i].startswith('-std='): 407 | have_std = [old_args[i]] 408 | if not old_args[i] == '-ffunction-sections': 409 | linker_args.append(old_args[i]) 410 | if old_args[i] == '-o': 411 | outname = old_args[i + 1] 412 | linker_args.append(outname) 413 | args += [outname + '.final.bc.o', '-o', outname] 414 | i += 1 415 | elif not old_args[i].endswith(FILTER_EXTENSIONS): 416 | args.append(old_args[i]) 417 | else: 418 | filtereds.append(old_args[i]) 419 | i += 1 420 | if not have_o: 421 | args = ['-O3'] + args 422 | linker_args = ['-O3'] + linker_args 423 | opt_level = '-O3' 424 | 425 | if outname is None: 426 | outname = 'a.out' 427 | args += [outname + '.final.bc.o', '-o', outname] 428 | 429 | if len(fuzz_programs) > 0 and os.path.basename(outname) not in fuzz_programs: 430 | assert(gclang_exec(old_args + [os.path.join(script_dir, 'aflpp-link-safe.o')]).returncode == 0) 431 | return 432 | 433 | fix_linker_groups(linker_args) 434 | assert(gclang_exec(linker_args).returncode == 0) 435 | 436 | log_msg(benchmark, "start") 437 | assert(get_bc(outname, capture_output=True).returncode == 0) 438 | 439 | for fname in filtereds: 440 | orig_fname = fname 441 | if fname.startswith("-l:"): 442 | fname = fname[3:] 443 | if fname.endswith('.o') and get_bc(fname, strict_mode=True, capture_output=True).returncode != 0: 444 | args += [orig_fname] # reinclude in the link command 445 | # reinclude also libs/libsz.a needed by libs like `libhdf5` (in matio_matio_fuzzer it links the system libhdf5 for which bitcode is unavailable) 446 | elif fname.endswith('.a') and (get_bc(fname, strict_mode=True, capture_output=True).returncode != 0 or 'libs/libsz.a' in fname): 447 | args += [orig_fname] # reinclude in the link command 448 | 449 | # strip and log original size 450 | strip_exec(['--strip-all-gnu', outname]) 451 | log_msg(benchmark, "orig_size: %d" % get_filesize(outname)) 452 | 453 | ilock = None 454 | if os.getenv("WRAP_GCLANG_LOCK") is not None: 455 | ilock = ILock(os.getenv("WRAP_GCLANG_LOCK")) 456 | ilock.lock() 457 | 458 | input_fname = outname + '.bc' 459 | log_stats(input_fname) 460 | if os.getenv("NO_PASSES") is None: 461 | if os.getenv("NO_INTERNALIZE") is None and os.getenv("NO_INTERNALIZE1") is None: 462 | opt_exec(['-load=%s/dump-call-tree.so' % script_dir, '-dump-call-tree', '-call-tree-start=main', '-dump-tree-file=call-tree.log', 463 | '-o', '/dev/null', outname + '.bc']) 464 | add_afl_symbols("call-tree.log") 465 | opt_exec(['-internalize', '-internalize-public-api-file=call-tree.log', 466 | '-globaldce', '-o', outname + '.internalized.bc', input_fname]) 467 | input_fname = outname + '.internalized.bc' 468 | log_stats(input_fname) 469 | if os.getenv("NO_EXTRACT") is None: 470 | whitelist = gen_whitelist() 471 | opt_exec(['-load=%s/dump-extlib.so' % script_dir, '-dump-extlib', '-dumpext-whitelist=%s' % whitelist, 472 | '-dumpext-blacklist=third_party,third-party', '-dumpext-out=funcs.log', 473 | '-o', input_fname, input_fname]) 474 | functions_to_extract = open('funcs.log').read().strip() 475 | if len(functions_to_extract) > 0: 476 | # solidity has too many functions to extract, fix it 477 | if 'solidity' in benchmark: 478 | fl = functions_to_extract.split(' ') 479 | functions_to_extract1 = fl[:len(fl)//2] 480 | functions_to_extract2 = fl[len(fl)//2:] 481 | extract_exec(functions_to_extract1 + ['-o', 'lib1.bc', input_fname]) 482 | extract_exec(functions_to_extract2 + ['-o', 'lib2.bc', input_fname]) 483 | link_exec(['-o', 'lib.bc', 'lib1.bc', 'lib2.bc']) 484 | extract_exec(functions_to_extract1 + [ '--delete', '-o', outname + '.extracted1.bc', input_fname]) 485 | extract_exec(functions_to_extract2 + [ '--delete', '-o', outname + '.extracted.bc', outname + '.extracted1.bc']) 486 | else: 487 | extract_exec(functions_to_extract.split(' ') + ['-o', 'lib.bc', input_fname]) 488 | extract_exec(functions_to_extract.split(' ') + [ '--delete', '-o', outname + '.extracted.bc', input_fname]) 489 | opt_exec([opt_level, '-loop-unroll', '-o', 'lib.bc', 'lib.bc']) 490 | input_fname = outname + '.extracted.bc' 491 | log_stats(input_fname) 492 | if os.getenv("FORCE_ICP"): 493 | opt_exec(['-load=%s/icp.so' % script_dir, '-icp', '-icp-fallback', '-icp-type', '-icp-type-opaque-ptrs=0', 494 | '-icp-alias', '-stat=0', '-ander', '-modelConsts', '-o', outname + '.icp.bc', input_fname]) 495 | input_fname = outname + '.icp.bc' 496 | log_stats(input_fname) 497 | if os.getenv("NO_CGC") is None: 498 | cgc_strategy = os.getenv("CGC_STRATEGY") if os.getenv("CGC_STRATEGY") is not None else 'dataflow' 499 | cgc_fill = "0" if os.getenv("CGC_NOFILL") else "1" 500 | scalarize = [] 501 | sea_dependencies = [] 502 | vectorize = [] 503 | # split passes in two invocations, since it seems to avoid a crash with sqlite3 and sea-dsa which happens in misterious conditions (only docker, no valgrind) 504 | opt_exec([opt_level, '-loop-unroll'] + scalarize + [ 505 | '-load=%s/cgc-planner.so' % script_dir] + sea_dependencies + ['-o', outname + '.temp.bc', input_fname], save_output=True) 506 | input_fname = outname + '.temp.bc' 507 | log_stats(input_fname) 508 | 509 | if os.getenv("CGC_ONLY_PTR_EVAL") is not None: 510 | def ptr_eval(strategy): 511 | ofile = '%s.txt' % strategy 512 | opt_exec(['-load=%s/ptr-eval.so' % script_dir, '-ptr-eval', '-ptr-strategy=%s' % strategy, 513 | '-ptr-out=%s' % ofile, '-stat=0', '-modelConsts', 514 | '-o', '/dev/null', input_fname], check_ret=False, 515 | wrapper_cmd=['/usr/bin/time', "-f", "%M", '-o', 'time_stats.txt']) 516 | if os.path.exists('time_stats.txt'): 517 | with open('time_stats.txt') as f: 518 | max_mem = f.read().strip().replace('\n', ' ') 519 | os.remove('time_stats.txt') 520 | else: 521 | max_mem = '-1' 522 | 523 | if os.path.exists(ofile): 524 | with open(ofile) as f: 525 | res = f.read() 526 | os.remove(ofile) 527 | return res + '|' + max_mem 528 | 529 | else: 530 | return ("%s: -1|-1|-1" % strategy) + '|' + max_mem 531 | 532 | log_msg(benchmark, ptr_eval('params')) 533 | log_msg(benchmark, ptr_eval('dataflowSea')) 534 | log_msg(benchmark, ptr_eval('dataflow')) 535 | 536 | if os.getenv("CGC_ONLY_CGC_EVAL") is not None: 537 | ret = opt_exec(['-load=%s/func-stats.so' % script_dir, '-func-stats', '-dump-graph', 538 | '-o', '/dev/null', input_fname], check_ret=True, capture_output=True) 539 | with open('cgc.txt', 'w') as f: 540 | f.write(ret.stdout.decode(errors='ignore')) 541 | 542 | out = subprocess.check_output(['python3', '%s/cgc.py' % script_dir, 'cgc.txt']) 543 | log_msg(benchmark, out.strip().decode(errors='ignore')) 544 | return 545 | 546 | opt_exec(['-load=%s/cgc-planner.so' % script_dir, '-cgc-planner', '-cgc-strategy=%s' % cgc_strategy, '-cgc-funcs=^main$', '-cgc-calls-treshold=50', '-stat=0', '-modelConsts'] + vectorize + 547 | ['-o', outname + '.lto.bc', input_fname]) 548 | input_fname = outname + '.lto.bc' 549 | log_stats(input_fname) 550 | 551 | max_aflmap = get_map_limit() 552 | # if the libs have been extracted, set the max accordingly 553 | if os.getenv("NO_EXTRACT") is None and len(functions_to_extract) > 0: 554 | _, _, lib_edges = get_stats('lib.bc') 555 | _, _, cur_edges = get_stats(input_fname) 556 | max_aflmap -= lib_edges 557 | while cur_edges >= max_aflmap: 558 | max_aflmap += get_map_limit() 559 | 560 | opt_exec(['-load=%s/cgc.so' % script_dir, '-cgc', '-cgc-clone-prefix=', '-cgc-max-aflmap=%d' % max_aflmap, '-cgc-fill=%s' % cgc_fill, 561 | '-load=%s/dump-call-tree.so' % script_dir, '-dump-call-tree', '-call-tree-start=main', '-dump-tree-file=call-tree.log', 562 | '-o', outname + '.cgc.bc', input_fname]) 563 | input_fname = outname + '.cgc.bc' 564 | log_stats(input_fname) 565 | if os.getenv("FORCE_INTERNALIZE") is not None: 566 | add_afl_symbols("call-tree.log") 567 | opt_exec(['-internalize', '-internalize-public-api-file=call-tree.log', 568 | '-globaldce', '-o', outname + '.cgc.internalized.bc', input_fname]) 569 | input_fname = outname + '.cgc.internalized.bc' 570 | log_stats(input_fname) 571 | 572 | if os.getenv("CGC_LOG_CALLS") is not None: 573 | opt_exec(['-load=%s/dump-calls.so' % script_dir, '-dump-calls', 574 | '-o', outname + '.log.bc', input_fname]) 575 | input_fname = outname + '.log.bc' 576 | log_stats(input_fname) 577 | 578 | if os.getenv("NO_PASSES") is None and os.getenv("NO_EXTRACT") is None and len(functions_to_extract) > 0: 579 | link_exec(['-o', outname + '.linked.bc', input_fname, 'lib.bc']) 580 | input_fname = outname + '.linked.bc' 581 | log_stats(input_fname) 582 | 583 | shutil.copy(input_fname, outname + '.final.bc') 584 | log_stats(outname + '.final.bc') 585 | 586 | assert(cc_exec(common_opts() + have_std + [opt_level] + [outname + '.final.bc', '-c', '-o', outname + '.final.bc.o']).returncode == 0) 587 | 588 | #if fuzz_target is not None and 'grok' in fuzz_target: 589 | # if '-std=c++11' in args: args.remove('-std=c++11') 590 | # args = ['-std=gnu++2a'] + args 591 | 592 | # this fixes a bug at the linking stage for exiv2: `__sancov_pcs has both ordered [...] and unordered [...] sections` 593 | # see https://github.com/rust-lang/rust/issues/53945 and https://github.com/google/oss-fuzz/pull/6288 for details 594 | if 'exiv2' in benchmark: 595 | args += ['-fuse-ld=gold'] 596 | 597 | assert(cc_exec(args).returncode == 0) 598 | 599 | if not keep_symbols: 600 | # strip and log final size 601 | strip_exec(['--strip-all-gnu', outname]) 602 | log_msg(benchmark, "final_size: %d" % get_filesize(outname)) 603 | log_msg(benchmark, "end") 604 | 605 | # ugly docker debug 606 | if os.path.exists('/host_tmp'): 607 | os.system("cp %s /host_tmp" % (outname + '.final.bc')) 608 | os.system("cp %s /host_tmp" % outname) 609 | 610 | if ilock is not None: 611 | ilock.unlock() 612 | 613 | 614 | def is_ld_mode(): 615 | return not ("--version" in sys.argv or "--target-help" in sys.argv or 616 | "-c" in sys.argv or "-E" in sys.argv or "-S" in sys.argv or 617 | "-shared" in sys.argv) 618 | 619 | 620 | if len(sys.argv) <= 1: 621 | cc_exec([]) 622 | elif is_ld_mode() and not configure_only: 623 | ld_mode() 624 | else: 625 | cc_mode() 626 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./install_svf.sh 4 | make -C passes 5 | 6 | cd bin && clang -c ../aflpp-link-safe.c 7 | -------------------------------------------------------------------------------- /clean_remake.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | ROOT_DIR="." 7 | 8 | # setup llvm env variables 9 | if [ -z "${LLVM_DIR}" ]; then 10 | 11 | echo "[ ] retrieving the LLVM directory..." 12 | 13 | if [ -z "${LLVM_CONFIG}" ]; then 14 | export LLVM_CONFIG='llvm-config' 15 | fi 16 | 17 | export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')" 18 | if [ "$LLVM_VER" = "" ]; then 19 | echo "[!] llvm-config not found!" 20 | exit 1 21 | fi 22 | 23 | echo "[+] using LLVM $LLVM_VER" 24 | 25 | export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH" 26 | export LLVM_DIR="$($LLVM_CONFIG --prefix)" 27 | 28 | else 29 | 30 | export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH" 31 | 32 | fi 33 | 34 | echo "[+] the LLVM directory is $LLVM_DIR" 35 | export LLVM_COMPILER_PATH=$LLVM_DIR/bin 36 | 37 | DIR=`pwd` 38 | cd $ROOT_DIR/passes 39 | make clean install || exit 1 40 | cd $DIR 41 | -------------------------------------------------------------------------------- /install_svf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | export MAKEFLAGS="-j $(grep -c ^processor /proc/cpuinfo)" 5 | 6 | if [ -z "${LLVM_DIR}" ]; then 7 | 8 | echo "[ ] retrieving the LLVM directory..." 9 | 10 | if [ -z "${LLVM_CONFIG}" ]; then 11 | export LLVM_CONFIG='llvm-config' 12 | fi 13 | 14 | export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')" 15 | if [ "$LLVM_VER" = "" ]; then 16 | echo "[!] llvm-config not found!" 17 | exit 1 18 | fi 19 | 20 | echo "[+] using LLVM $LLVM_VER" 21 | 22 | export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH" 23 | export LLVM_DIR="$($LLVM_CONFIG --prefix)" 24 | 25 | else 26 | 27 | export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH" 28 | 29 | fi 30 | 31 | echo "[+] the LLVM directory is $LLVM_DIR" 32 | 33 | # 34 | # SVF 35 | # 36 | echo "[ ] preparing SVF..." 37 | 38 | if [[ -d SVF ]]; then 39 | echo "[!] the SVF directory already exists" 40 | cd SVF 41 | else 42 | git clone https://github.com/SVF-tools/SVF.git SVF 43 | cd SVF 44 | git checkout SVF-2.1 45 | git am -3 -k ../SVF-all.patch 46 | 47 | git clone https://github.com/SVF-tools/Test-Suite.git 48 | cd Test-Suite 49 | git checkout 72c679a49b943abb229fcb1844f68dff9cc7d522 50 | cd .. 51 | fi 52 | 53 | echo "[+] SVF ready" 54 | 55 | echo "[ ] compiling SVF..." 56 | source ./build.sh debug 57 | 58 | echo "[+] all done, goodbye!" 59 | -------------------------------------------------------------------------------- /passes/Makefile: -------------------------------------------------------------------------------- 1 | DIRS := cgc func-stats dump-call-tree icp cgc-planner dump-extlib dump-calls 2 | 3 | all: $(patsubst %,build_%,$(DIRS)) 4 | 5 | $(patsubst %,build_%,$(DIRS)): DIR = $(subst build_,,$@) 6 | $(patsubst %,build_%,$(DIRS)): 7 | @echo Building LLVM $(DIR)... 8 | $(MAKE) -C $(DIR) install 9 | 10 | clean: 11 | $(foreach DIR, $(DIRS), $(MAKE) -C $(DIR) clean;) 12 | 13 | install: all 14 | -------------------------------------------------------------------------------- /passes/Makefile.inc: -------------------------------------------------------------------------------- 1 | include $(ROOT)/Makefile.inc 2 | 3 | INSTALL_DIR = $(ROOT)/bin 4 | 5 | ARCH ?= native 6 | 7 | ifeq "$(LLVM_DIR)" "" 8 | LLVM_CONFIG ?= llvm-config 9 | LLVM_PREFIX = $(shell $(LLVM_CONFIG) --prefix) 10 | else 11 | LLVM_PREFIX=$(shell readlink -f $(LLVM_DIR)) 12 | LLVM_CONFIG = $(LLVM_PREFIX)/bin/llvm-config 13 | endif 14 | 15 | $(info [+] LLVM prefix at $(LLVM_PREFIX)) 16 | $(info [+] LLVM config at $(LLVM_CONFIG)) 17 | 18 | LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/git//' | sed 's/svn//' ) 19 | ifeq "$(LLVMVER)" "" 20 | $(warning [!] llvm-config not found!) 21 | endif 22 | 23 | LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) 24 | LLVM_LIBDIR = $(shell $(LLVM_CONFIG) --libdir 2>/dev/null) 25 | LLVM_INCDIR = $(shell $(LLVM_CONFIG) --includedir 2>/dev/null) 26 | LLVM_STDCXX = gnu++11 27 | 28 | LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) 29 | ifeq "$(LLVM_NEW_API)" "1" 30 | $(info [+] detected llvm 10+, enabling c++14) 31 | LLVM_STDCXX = c++14 32 | endif 33 | 34 | CXX = $(LLVM_BINDIR)/clang++ 35 | 36 | CXXFLAGS += `$(LLVM_CONFIG) --cxxflags` -g -fPIC -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -Wno-unknown-warning-option -DHAVE_EXCEPTIONS=0 -march=$(ARCH) 37 | LDFLAGS += `$(LLVM_CONFIG) --ldflags` -L$(LLVM_LIBDIR) 38 | 39 | $(info [+] CXX = $(CXX)) 40 | 41 | PASSLIBNAME =$(PASSNAME).so 42 | 43 | HEADERS += $(wildcard ../include/common/*.h) $(wildcard ../include/$(PASSNAME)/*.h) 44 | 45 | INCLUDES += -I../include/common -I../include/$(PASSNAME) 46 | 47 | all: $(PASSLIBNAME) 48 | 49 | $(PASSLIBNAME): $(OBJS) 50 | $(QUIET) $(ECHO) " [LINK] $@" 51 | $(CXX) -std=$(LLVM_STDCXX) $(CXXFLAGS) -shared -o $@ $(CPPS) $(OBJS) $(LDFLAGS) $(LIBS) 52 | 53 | %.o: %.cpp $(HEADERS) 54 | $(QUIET) $(ECHO) " [C++] $<" 55 | $(CXX) -std=$(LLVM_STDCXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $< 56 | 57 | install: $(INSTALL_DIR)/$(PASSLIBNAME) 58 | 59 | $(INSTALL_DIR)/$(PASSLIBNAME): $(PASSLIBNAME) 60 | $(QUIET) $(ECHO) " [INSTALL] $< -> $@" 61 | install -c -D -m 744 $? $@ 62 | 63 | clean: 64 | $(QUIET) $(ECHO) " [RM] $(OBJS) $(PASSLIBNAME)" 65 | rm -f $(OBJS) $(PASSLIBNAME) $(INSTALL_DIR)/$(PASSLIBNAME) 66 | -------------------------------------------------------------------------------- /passes/Makefile.svf.inc: -------------------------------------------------------------------------------- 1 | SVF_HOME=$(ROOT)/SVF 2 | SVF_HEADER=$(SVF_HOME)/include 3 | SVF_LIB=$(SVF_HOME)/Debug-build/lib 4 | 5 | LDFLAGS += $(SVF_LIB)/libSvf.a $(SVF_LIB)/CUDD/libCudd.a 6 | CFLAGS += -fno-rtti -Wno-overloaded-virtual -Wno-ignored-qualifiers -Wno-reorder 7 | INCLUDES += -I $(SVF_HEADER) 8 | -------------------------------------------------------------------------------- /passes/add-sanitize-attr/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the add-sanitize-attr pass 2 | ROOT=../.. 3 | 4 | PASSNAME := add-sanitize-attr 5 | OBJS := add-sanitize-attr.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/add-sanitize-attr/add-sanitize-attr.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | using namespace llvm; 5 | 6 | #define DEBUG_TYPE "AddSanitizeAttr" 7 | #define AddSanitizeAttrPassLog(M) LLVM_DEBUG(dbgs() << "AddSanitizeAttrPass: " << M << "\n") 8 | #define oprint(s) outs() << s << "\n" 9 | 10 | namespace { 11 | 12 | class AddSanitizeAttrPass : public ModulePass { 13 | 14 | public: 15 | static char ID; 16 | AddSanitizeAttrPass() : ModulePass(ID) {} 17 | 18 | virtual bool runOnModule(Module &M) { 19 | for (auto &F : M.getFunctionList()) { 20 | if (F.isDeclaration()) 21 | continue; 22 | 23 | // if(!F.hasFnAttribute(Attribute::NoSanitize)) 24 | F.addFnAttr(Attribute::SanitizeAddress); 25 | } 26 | 27 | return true; 28 | } 29 | }; 30 | 31 | } 32 | 33 | char AddSanitizeAttrPass::ID = 0; 34 | RegisterPass MP("add-sanitize-attr", "AddSanitizeAttr Pass"); 35 | 36 | -------------------------------------------------------------------------------- /passes/cgc-planner/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the cgc-planner pass 2 | ROOT=../.. 3 | 4 | PASSNAME := cgc-planner 5 | OBJS := cgc-planner.o 6 | 7 | include ../Makefile.inc 8 | include ../Makefile.svf.inc 9 | -------------------------------------------------------------------------------- /passes/cgc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the cgc pass 2 | ROOT=../.. 3 | 4 | PASSNAME := cgc 5 | OBJS := cgc.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/cgc/cgc.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" 5 | #include "llvm/Transforms/Utils/Cloning.h" 6 | #include 7 | #include "llvm/Analysis/CFG.h" 8 | #include "llvm/Analysis/CallGraph.h" 9 | #include "llvm/Analysis/CallGraphSCCPass.h" 10 | #include "llvm/ADT/SCCIterator.h" 11 | #include "llvm/ADT/SmallVector.h" 12 | #include "llvm/IR/CFG.h" 13 | #include 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | #define DEBUG_TYPE "cgc" 19 | #define cgcPassLog(M) LLVM_DEBUG(dbgs() << "CallgraphClonePass: " << M << "\n") 20 | #define oprint(s) LLVM_DEBUG(dbgs() << s << "\n") 21 | 22 | static cl::list 23 | HardenFunctions("cgc-harden-funcs", 24 | cl::desc("Specify all the comma-separated function regexes to harden against optimizer [default: main, LLVMFuzzerTestOneInput]"), 25 | cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden); 26 | 27 | static cl::opt 28 | ClonePrefix("cgc-clone-prefix", 29 | cl::desc("Specify the clone name prefix"), 30 | cl::init("__cgc_"), cl::NotHidden); 31 | 32 | static cl::opt 33 | CGCFill("cgc-fill", 34 | cl::init(true), cl::NotHidden, 35 | cl::desc("If true will clone all the other calls once the planned ones have been completed")); 36 | 37 | // Fill 256Kb by default, an average size of L2 cache 38 | static cl::opt 39 | MaxSize("cgc-max-aflmap", 40 | cl::init(256*1024), cl::NotHidden, 41 | cl::desc("The maximum acceptable size for the AFL++ edge map")); 42 | 43 | namespace { 44 | // This pass clones function calls based on decisions taken by CGC Planner on which 45 | // subgraph portion of the callgraph should be cloned 46 | class CallgraphClonePass : public ModulePass { 47 | 48 | // Keep track of all the functions belonging to strongly connected components 49 | std::set SCCFunctions; 50 | 51 | std::map> FunctionToSCC; 52 | std::map> FunctionToCallBases; 53 | std::map FunctionToAFLMapSize; 54 | 55 | // Keep track of cloned functions 56 | std::set FunctionClones; 57 | 58 | // Return the priority of the CallBase, an higher priority means the CallBase 59 | // should be cloned earlier 60 | static long getPriority(CallBase *CB) { 61 | MDNode* N; 62 | assert(CB); 63 | N = CB->getMetadata(CGC_CLONE_PRIORITY); 64 | if (N == NULL) return 0; 65 | Constant *val = dyn_cast(N->getOperand(0))->getValue(); 66 | assert(val); 67 | long prio = cast(val)->getSExtValue(); 68 | return prio; 69 | } 70 | 71 | // Save the priority value for a function that has been cloned 72 | static void setFunctionPriority(Function *F, long prio) { 73 | LLVMContext& C = F->getContext(); 74 | MDNode* N = MDNode::get(C, ConstantAsMetadata::get(ConstantInt::get(C, APInt(sizeof(unsigned long)*8, prio, true)))); 75 | F->setMetadata(CGC_CLONE_PRIORITY, N); 76 | } 77 | 78 | // Return the priority of the Function that has been cloned with 79 | static long getFunctionPriority(Function *F) { 80 | MDNode* N; 81 | assert(F); 82 | N = F->getMetadata(CGC_CLONE_PRIORITY); 83 | if (N == NULL) return 0; 84 | Constant *val = dyn_cast(N->getOperand(0))->getValue(); 85 | if(!val) return 0; 86 | long prio = cast(val)->getSExtValue(); 87 | return prio; 88 | } 89 | 90 | // Compare the priority of two CallBases, an higher priority means the CallBase 91 | // should be cloned earlier 92 | struct ComparePriority { 93 | bool operator()(CallBase *c1, CallBase *c2) { 94 | long prio1 = getPriority(c1); 95 | long prio2 = getPriority(c2); 96 | return prio1 < prio2; 97 | } 98 | }; 99 | 100 | // A priority queue for the CallBases, ordered by priority 101 | using CallBaseQueue = std::priority_queue, ComparePriority>; 102 | 103 | public: 104 | static char ID; 105 | unsigned long unique_id = 0; 106 | unsigned long nclones = 0; 107 | unsigned long aflmap_size = 0; 108 | CallgraphClonePass() : ModulePass(ID) {} 109 | 110 | unsigned long getUniqueID() { 111 | return ++unique_id; 112 | } 113 | 114 | // Taken from: https://github.com/AFLplusplus 115 | // True if block has successors and it dominates all of them. 116 | bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { 117 | if (succ_begin(BB) == succ_end(BB)) return false; 118 | for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) { 119 | // if the edge is critical it will be splitted 120 | if (isCriticalEdge(BB->getTerminator(), SUCC)) continue; 121 | if (!DT->dominates(BB, SUCC)) return false; 122 | } 123 | return true; 124 | } 125 | 126 | // Taken from: https://github.com/AFLplusplus 127 | // True if block has predecessors and it postdominates all of them. 128 | bool isFullPostDominator(const BasicBlock * BB, 129 | const PostDominatorTree *PDT) { 130 | if (pred_begin(BB) == pred_end(BB)) return false; 131 | for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) { 132 | // if the edge is critical it will be splitted 133 | if (isCriticalEdge(PRED->getTerminator(), BB)) continue; 134 | if (!PDT->dominates(BB, PRED)) return false; 135 | } 136 | return true; 137 | } 138 | 139 | // Given a function, try to estimate the number of edges in the function that 140 | // will be instrumented by AFLplusplus. 141 | // It instruments edges by breaking all critial edges with a block in the middle 142 | // and avoiding instrumenting blocks which are full dominators, or full 143 | // post-dominators with multiple predecessors. 144 | unsigned long estimateAFLEdges(Function *F) { 145 | DominatorTree *DT = &getAnalysis(*F).getDomTree(); 146 | PostDominatorTree *PDT = &getAnalysis(*F).getPostDomTree(); 147 | unsigned edges = 0; 148 | for (BasicBlock &BB: *F) { 149 | // Do not instrument full dominators, or full post-dominators with multiple 150 | // predecessors. 151 | bool shouldInstrumentBlock = (&F->getEntryBlock() == &BB) || (!isFullDominator(&BB, DT) && 152 | !(isFullPostDominator(&BB, PDT) 153 | && !BB.getSinglePredecessor())); 154 | if (shouldInstrumentBlock) ++edges; 155 | 156 | Instruction *TI = BB.getTerminator(); 157 | if (TI->getNumSuccessors() > 1 && !isa(TI)) 158 | for (unsigned succ = 0, end = TI->getNumSuccessors(); succ != end; ++succ) { 159 | if (isCriticalEdge(TI, succ)) 160 | ++edges; 161 | } 162 | } 163 | return edges; 164 | } 165 | 166 | // Return true if `F` has been marked as a root from which to start cloning 167 | // by CGC Planner. 168 | bool isCGCRoot(Function &F) { 169 | MDNode* N; 170 | N = F.getMetadata(CGC_ROOT_ATTR); 171 | if (N == NULL) return false; 172 | return true; 173 | } 174 | 175 | // Return true if `CB` has been planned to be cloned by CGC Planner 176 | bool isPlannedClone(CallBase &CB) { 177 | MDNode* N; 178 | N = CB.getMetadata(CGC_CLONE_CALL_ATTR); 179 | if (N == NULL) return false; 180 | return true; 181 | } 182 | 183 | // Return true if `F` has an available_externally linkage (i.e. equivalent to a declaration) 184 | bool isAvailableExternally(Function &F) { 185 | GlobalValue::LinkageTypes L = F.getLinkage(); 186 | return GlobalValue::isAvailableExternallyLinkage(L); 187 | } 188 | 189 | // Substitute all the trailings .x.y.z that llvm creates when having two functions 190 | // with the same name, with some uniqueIDs to avoid long names 191 | std::string compressName(std::string name) { 192 | // find the last .num 193 | std::string newName = name; 194 | std::string::size_type idx = newName.rfind('.'); 195 | if (idx == std::string::npos || idx == newName.length()) { 196 | return newName; 197 | } 198 | // ensure it is actually a number 199 | int random = atoi(newName.substr(idx+1).c_str()); 200 | 201 | while (random) { 202 | newName = newName.substr(0, idx); 203 | idx = newName.rfind('.'); 204 | if (idx == std::string::npos || idx == newName.length()) { 205 | return newName + "." + std::to_string(getUniqueID()); 206 | } 207 | random = atoi(newName.substr(idx+1).c_str()); 208 | } 209 | return newName + "." + std::to_string(getUniqueID()); 210 | } 211 | 212 | void setCloneName(Function *F) { 213 | // if the function name already contains the prefix do not add it 214 | if (F->getName().find(ClonePrefix) == std::string::npos) 215 | F->setName(ClonePrefix + F->getName()); 216 | // Compress the clone name to avoid .1452.3394.9208.13831.27566... 217 | // at the end 218 | F->setName(compressName(F->getName().str())); 219 | } 220 | 221 | // Replace all the dots in the name that llvm may insert with underscores 222 | void normalizeName(Function *F) { 223 | std::string newName = F->getName().str(); 224 | std::replace(newName.begin(), newName.end(), '.', '_'); 225 | F->setName(newName); 226 | } 227 | 228 | // Mark the function so that it can be recognized as a clone 229 | void markClone(Function *F) { 230 | LLVMContext& C = F->getContext(); 231 | MDNode* N = MDNode::get(C, ConstantAsMetadata::get(ConstantInt::get(C, APInt(sizeof(unsigned long)*8, 1, true)))); 232 | F->setMetadata(CGC_CLONE_MARK, N); 233 | 234 | // NOTICE: A bit risky to change all names 235 | std::string FName = F->getName().str(); 236 | F->setName(CGC_CLONE_MARK + std::to_string(getFunctionPriority(F)) + "_" + FName); 237 | } 238 | 239 | // Visit a Constant AST to find and replace oldV with newV, returning a new constant 240 | Constant *replaceConstant(Constant *C, Constant *newV, Constant *oldV) { 241 | if (ConstantStruct *S = dyn_cast(C)) { 242 | SmallVector Ops; 243 | for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) { 244 | Constant *op = S->getOperand(i); 245 | if (op == oldV) 246 | Ops.push_back(newV); 247 | else 248 | Ops.push_back(replaceConstant(op, newV, oldV)); 249 | } 250 | 251 | Constant* res = ConstantStruct::getAnon(Ops, true); 252 | return res; 253 | 254 | } else if (ConstantExpr *E = dyn_cast(C)) { 255 | SmallVector Ops; 256 | for (unsigned i = 0, e = E->getNumOperands(); i != e; ++i) { 257 | Constant *op = E->getOperand(i); 258 | if (op == oldV) 259 | Ops.push_back(newV); 260 | else 261 | Ops.push_back(replaceConstant(op, newV, oldV)); 262 | } 263 | 264 | Constant *res = E->getWithOperands(Ops); 265 | return res; 266 | 267 | } else { 268 | return C; 269 | } 270 | } 271 | 272 | // Fix the prologue of newF, by substituting the occurencies of oldF. 273 | // This allows us to clone functions without corrupting the prologue, that is 274 | // left untouched by cloneFunction. -fsanitize=function uses prologues 275 | void fixPrologue(Function *newF, Function *oldF) { 276 | if (!newF->hasPrologueData()) return; 277 | 278 | Constant *prologue = replaceConstant(newF->getPrologueData(), newF, oldF); 279 | newF->setPrologueData(prologue); 280 | } 281 | 282 | // `dest` is a clone of `source`, with the instructions mapped 1to1 in the `VMap`. 283 | // Update the `FunctionToCallBases` struct to keep track of the CallBases in 284 | // `dest` that represent the clone CallBases of `source`. 285 | // Update the `FunctionToAFLMapSize` to keep track of the estimation for the 286 | // new clone. 287 | void updateMetadata(Function *dest, Function *source, ValueToValueMapTy &VMap) { 288 | assert(FunctionToCallBases.find(source) != FunctionToCallBases.end()); 289 | FunctionToCallBases[dest]; 290 | for (CallBase *CB: FunctionToCallBases[source]) { 291 | CallBase *mappedCB = dyn_cast(VMap[CB]); 292 | assert(mappedCB); 293 | FunctionToCallBases[dest].insert(mappedCB); 294 | } 295 | 296 | assert(FunctionToAFLMapSize.find(source) != FunctionToAFLMapSize.end()); 297 | FunctionToAFLMapSize[dest] = FunctionToAFLMapSize[source]; 298 | } 299 | 300 | // Gather all the calls to `F`, starting from `I` and visiting recursively all 301 | // the users of `I`, to collect all the eventual calls to `F` originated by `I` 302 | // e.g. call bitcast F, with I being the bitcast 303 | void gatherEventualCallsTo(Function *F, Value *V, std::set &callsToF) { 304 | // If it is a call, just check if `F` is called 305 | if (CallBase * CB = dyn_cast(V)) { 306 | // check that the function is called and not passed as param 307 | if (CB->getCalledOperand()->stripPointerCasts() == F) { 308 | callsToF.insert(CB); 309 | } 310 | // If it is a bitcast, visit all the users recursively 311 | } else if (BitCastOperator * BO = dyn_cast(V)) { 312 | for (User* user: BO->users()) { 313 | gatherEventualCallsTo(F, user, callsToF); 314 | } 315 | } 316 | } 317 | 318 | // Return true if `F` has multiple call sites so it makes sense to clone it 319 | bool shouldCloneFunction(Function *F) { 320 | // Do not clone LLVMFuzzerTestOneInput itself 321 | if (F->getName().equals("LLVMFuzzerTestOneInput")) return false; 322 | 323 | unsigned int numCallsToF = 0; 324 | std::set callsToF; 325 | // Gather all the calls to the function `F` 326 | for (User* user: F->users()) { 327 | gatherEventualCallsTo(F, user, callsToF); 328 | 329 | // No need to visit all the users, bailout if already true 330 | if (callsToF.size() > 1) return true; 331 | } 332 | 333 | numCallsToF = callsToF.size(); 334 | 335 | // oprint(F->getName().str() << " - " << numCallsToF); 336 | // We should clone the function only if it is called more than once 337 | return numCallsToF > 1; 338 | } 339 | 340 | // Return true if cloning `F` would not exceed the size limit. 341 | bool allowedToClone(Function *F) { 342 | unsigned long additional_edges = 0; 343 | // If `F` is in a SCC we will clone the whole SCC while cloning `F` 344 | if (isInSCC(F)) { 345 | assert(FunctionToSCC.find(F) != FunctionToSCC.end()); 346 | std::set SCC = FunctionToSCC[F]; 347 | for (Function *F: SCC) { 348 | assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end()); 349 | additional_edges += FunctionToAFLMapSize[F]; 350 | } 351 | // Otherwise just count `F` 352 | } else { 353 | assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end()); 354 | additional_edges += FunctionToAFLMapSize[F]; 355 | } 356 | // More readable mf 357 | if (aflmap_size + additional_edges > MaxSize) return false; 358 | else return true; 359 | } 360 | 361 | // Return true if the `SCC` has multiple call sites so it makes sense to clone it 362 | bool shouldCloneSCC(std::set &SCC) { 363 | unsigned int numCallsToSCC = 0; 364 | std::set callsToSCC; 365 | // Gather all the calls to each function in the `SCC` 366 | for (Function *F: SCC) { 367 | for (User* user: F->users()) { 368 | gatherEventualCallsTo(F, user, callsToSCC); 369 | } 370 | } 371 | 372 | // Count only the calls from outside the `SCC` 373 | for (Instruction *call: callsToSCC) { 374 | Function* callerF = call->getParent()->getParent(); 375 | if (SCC.find(callerF) == SCC.end()) 376 | ++numCallsToSCC; 377 | } 378 | 379 | // for (Function *F: SCC) 380 | // oprint(F->getName().str() << " - " << numCallsToSCC); 381 | // We should clone the function only if it is called more than once 382 | return numCallsToSCC > 1; 383 | } 384 | 385 | // Add all the callbases in the function to the priority queue 386 | void updateCallBaseQueue(CallBaseQueue &cgcCallBaseQueue, Function *F) { 387 | for (CallBase *CB: FunctionToCallBases[F]) { 388 | cgcCallBaseQueue.push(CB); 389 | } 390 | } 391 | 392 | // Update the metadata on SCC clones 393 | void updateSCCMetadata(Function *SCCclone, std::set &SCCClones) { 394 | assert(FunctionToSCC.find(SCCclone) == FunctionToSCC.end()); 395 | FunctionToSCC[SCCclone] = SCCClones; 396 | assert(SCCFunctions.find(SCCclone) == SCCFunctions.end()); 397 | SCCFunctions.insert(SCCclone); 398 | } 399 | 400 | // Visit the Strongly Connected Component where `F` belongs, to clone it as 401 | // a single node. Update `cgcCallBaseQueue` accordingly to continue the visit. 402 | Function* addSCCClone(CallBaseQueue &cgcCallBaseQueue, Function* F, long prio) { 403 | std::map FtoClones; 404 | std::set SCCClones; 405 | 406 | assert(FunctionToSCC.find(F) != FunctionToSCC.end()); 407 | std::set SCC = FunctionToSCC[F]; 408 | 409 | // Clone all the functions in the SCC 410 | bool should_clone = shouldCloneSCC(SCC); 411 | for (Function *SCCfunc: SCC) { 412 | // Clone original function if required 413 | if (should_clone) { 414 | ValueToValueMapTy VMap; 415 | Function *clone = CloneFunction(SCCfunc, VMap); 416 | assert(clone); 417 | updateMetadata(clone, SCCfunc, VMap); 418 | trackClone(clone, cgcCallBaseQueue); 419 | setCloneName(clone); 420 | fixPrologue(clone, SCCfunc); 421 | FtoClones[SCCfunc] = clone; 422 | SCCClones.insert(clone); 423 | // Add the priority to the clone to keep track of it 424 | setFunctionPriority(clone, prio); 425 | } else { 426 | // Set the original function as a clone without updating the number of clones 427 | trackClone(SCCfunc, cgcCallBaseQueue, /*update=*/false); 428 | FtoClones[SCCfunc] = SCCfunc; 429 | SCCClones.insert(SCCfunc); 430 | // Add the priority to the clone to keep track of it 431 | setFunctionPriority(SCCfunc, prio); 432 | } 433 | } 434 | 435 | // update metadata for SCC 436 | for (Function *SCCclone: SCCClones) { 437 | if (FunctionToSCC.find(SCCclone) == FunctionToSCC.end()) 438 | updateSCCMetadata(SCCclone, SCCClones); 439 | } 440 | 441 | // Now rewire the functions in the SCC clones 442 | for (Function *SCCclone: SCCClones) { 443 | assert(FunctionToCallBases.find(SCCclone) != FunctionToCallBases.end()); 444 | for (CallBase *CB: FunctionToCallBases[SCCclone]) { 445 | 446 | // For direct calls, simply redirect target to new clone 447 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 448 | if (C) { 449 | Function *clone; 450 | // If the called function is in the SCC use the clone we generated 451 | if (FtoClones.find(C) != FtoClones.end()) { 452 | clone = FtoClones[C]; 453 | if (clone->getFunctionType() != CB->getCalledOperand()->getType()->getPointerElementType()) 454 | CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB)); 455 | else 456 | CB->setCalledFunction(clone); 457 | // Otherwise plan a clone 458 | } else { 459 | // clone only if is a planned clone, otherwise leave as is 460 | // NB: this assumes that all calls to `C` from `SCC` 461 | // have been planned equally to be cloned or not, otherwise 462 | // calls to `C` will not be consistent inside `SCC` 463 | if (isPlannedClone(*CB) == false) continue; 464 | cgcCallBaseQueue.push(CB); 465 | } 466 | } 467 | } 468 | } 469 | return FtoClones[F]; 470 | } 471 | 472 | // Clone the function `F`, and update the `cgcCallBaseQueue` to continue 473 | // the visit 474 | Function* addFunctionClone(CallBaseQueue &cgcCallBaseQueue, Function *F, long prio) { 475 | 476 | // The assertion is valid only if we visit the graph in BFS mode, i.e. 477 | // starting from a single root, in the general case we may revisit a function 478 | // that has been cloned, that now has two callers since his parent is cloned 479 | // assert(!isClone(F)); 480 | 481 | // bail out if blacklisted 482 | if (isBlacklisted(F)) 483 | return F; 484 | 485 | // bail out if cloning `F` would exceed the max size 486 | if (!allowedToClone(F)) 487 | return F; 488 | 489 | if (isInSCC(F)) 490 | return addSCCClone(cgcCallBaseQueue, F, prio); 491 | 492 | // Clone original function if required 493 | if (shouldCloneFunction(F)) { 494 | ValueToValueMapTy VMap; 495 | Function *clone = CloneFunction(F, VMap); 496 | assert(clone); 497 | updateMetadata(clone, F, VMap); 498 | trackClone(clone, cgcCallBaseQueue); 499 | 500 | setCloneName(clone); 501 | fixPrologue(clone, F); 502 | 503 | // Add the target to the functions to process. 504 | updateCallBaseQueue(cgcCallBaseQueue, clone); 505 | 506 | // Add the priority to the clone to keep track of it 507 | setFunctionPriority(clone, prio); 508 | 509 | return clone; 510 | } else { 511 | // Set the original function as a clone without updating the number of clones 512 | trackClone(F, cgcCallBaseQueue, /*update=*/false); 513 | // Add the target to the functions to process. 514 | updateCallBaseQueue(cgcCallBaseQueue, F); 515 | 516 | // Add the priority to the clone to keep track of it 517 | setFunctionPriority(F, prio); 518 | 519 | return F; 520 | } 521 | } 522 | 523 | // Visit the call base `CB` to clone its target 524 | void cgc(CallBase *CB, CallBaseQueue &cgcCallBaseQueue) { 525 | Function *F = CB->getFunction(); 526 | 527 | // bail out if blacklisted 528 | if (isBlacklisted(F)) 529 | return; 530 | 531 | // For direct calls, simply redirect target to new clone 532 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 533 | // this should not be an edge between two functions in the same SCC 534 | assert(!isInSCC(F) || !isInSCC(C) || (FunctionToSCC[F].find(C) == FunctionToSCC[F].end() 535 | && FunctionToSCC[C].find(F) == FunctionToSCC[C].end())); 536 | if (C) { 537 | // clone only if is a planned clone, otherwise leave as is 538 | if (isPlannedClone(*CB) == false) return; 539 | long prio = getPriority(CB); 540 | Function *clone = addFunctionClone(cgcCallBaseQueue, C, prio); 541 | if (clone->getFunctionType() != CB->getCalledOperand()->getType()->getPointerElementType()) 542 | CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB)); 543 | else 544 | CB->setCalledFunction(clone); 545 | } 546 | } 547 | 548 | // Check if `F` just calls himself 549 | bool isSimplyRecursive(Function *F) { 550 | for (auto &BB : *F) 551 | for (auto &I : BB.instructionsWithoutDebug()) 552 | if (auto *CB = dyn_cast(&I)) { 553 | Function *Callee = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 554 | 555 | // Function calls itself 556 | if (Callee == F) { 557 | return true; 558 | } 559 | } 560 | return false; 561 | } 562 | 563 | // Visit the `SCC` to gather the informations needed in `FunctionToSCC` and 564 | // `SCCFunctions` 565 | void collectSCC(CallGraphSCC &SCC) { 566 | std::set Functions; 567 | for (CallGraphNode *I : SCC) { 568 | Functions.insert(I->getFunction()); 569 | } 570 | 571 | // If the SCC contains multiple nodes we know there is recursion. 572 | if (Functions.size() != 1) { 573 | for (Function *F : Functions) { 574 | SCCFunctions.insert(F); 575 | assert(!F->doesNotRecurse()); 576 | 577 | // A function should belong to a single SCC 578 | assert(FunctionToSCC.find(F) == FunctionToSCC.end()); 579 | FunctionToSCC[F] = Functions; 580 | } 581 | // Take into account simple recursive functions 582 | } else { 583 | Function *F = *Functions.begin(); 584 | if (F && isSimplyRecursive(F)) { 585 | SCCFunctions.insert(F); 586 | assert(!F->doesNotRecurse()); 587 | 588 | assert(FunctionToSCC.find(F) == FunctionToSCC.end()); 589 | FunctionToSCC[F] = Functions; 590 | } 591 | } 592 | } 593 | 594 | // Return true if `F` is blacklisted 595 | bool isBlacklisted(Function *F) { 596 | MDNode* N; 597 | N = F->getMetadata(CGC_CLONE_NEVER); 598 | if (N == NULL) return false; 599 | return true; 600 | } 601 | 602 | // Return true if `F` is part of a SCC 603 | bool isInSCC(Function *F) { 604 | return SCCFunctions.find(F) != SCCFunctions.end(); 605 | } 606 | 607 | // Return true if `F` is a clone of a function 608 | bool isClone(Function *F) { 609 | return FunctionClones.find(F) != FunctionClones.end(); 610 | } 611 | 612 | // Add `F` to the function clones we keep track of, and update stats 613 | void trackClone(Function *F, CallBaseQueue& cgcCallBaseQueue, bool update=true) { 614 | FunctionClones.insert(F); 615 | if (update) { 616 | ++nclones; 617 | aflmap_size += FunctionToAFLMapSize[F]; 618 | } 619 | LLVM_DEBUG(dbgs() << "\r" << nclones << " - " << aflmap_size << " "); 620 | } 621 | 622 | // Sometimes LLVM build the CallGraph withouth taking into considerations calls 623 | // that pass through a `bitcast` operation. We fix this here, revisiting the 624 | // functions and updating the CallGraph 625 | void fixCallGraph(Module &M, CallGraph *CG) { 626 | for (auto &F : M.getFunctionList()) { 627 | if (F.isDeclaration() || isAvailableExternally(F)) 628 | continue; 629 | for(auto &BB: F) { 630 | for (auto &I : BB) { 631 | if (CallBase * CB = dyn_cast(&I)) { 632 | if (CB->isInlineAsm()) continue; 633 | 634 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 635 | if (!Called || Called->isDeclaration() || isAvailableExternally(*Called)|| Called->isIntrinsic()) continue; 636 | 637 | // If `Called` actually points to a function, but getCalledFunction 638 | // returns null then we have spotted a missing function 639 | if (CB->getCalledFunction() == nullptr) { 640 | CallGraphNode *Node = CG->getOrInsertFunction(&F); 641 | Node->addCalledFunction(CB, CG->getOrInsertFunction(Called)); 642 | } 643 | } 644 | } 645 | } 646 | } 647 | } 648 | 649 | // Initialize the `FunctionToCallBases` structure with all the existing CallBases in `F` 650 | void gatherCallBases(Function *F) { 651 | // Initialize the set in case no call is present in the function 652 | FunctionToCallBases[F]; 653 | for (BasicBlock &BB: *F) { 654 | for (Instruction &I : BB) { 655 | // Gather all call bases 656 | if (CallBase * CB = dyn_cast(&I)) { 657 | 658 | // Only if they represent direct calls to functions 659 | if (CB->isInlineAsm()) continue; 660 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 661 | if (!Called || Called->isDeclaration() || isAvailableExternally(*Called) || Called->isIntrinsic()) continue; 662 | 663 | // Insert into the map 664 | FunctionToCallBases[F].insert(CB); 665 | } 666 | } 667 | } 668 | } 669 | 670 | // The optimizer may decide to inline functions and simplify them. Or directly simplify 671 | // static/internal ones. Try to persuade it to avoid simplifying functions we want as is, 672 | // by setting all the functions `F` calls to not static and not inlinable. 673 | void hardenFunction(Function *F) { 674 | for (BasicBlock &BB: *F) { 675 | for (Instruction &I : BB) { 676 | // Gather all call bases 677 | if (CallBase * CB = dyn_cast(&I)) { 678 | 679 | // Only if they represent direct calls to functions 680 | if (CB->isInlineAsm()) continue; 681 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 682 | if (!Called || Called->isDeclaration() || isAvailableExternally(*Called) || Called->isIntrinsic()) continue; 683 | 684 | // Harden from inlining 685 | if (Called->hasFnAttribute(Attribute::InlineHint)) 686 | Called->removeFnAttr(Attribute::InlineHint); 687 | if (Called->hasFnAttribute(Attribute::AlwaysInline)) 688 | Called->removeFnAttr(Attribute::AlwaysInline); 689 | Called->addFnAttr(Attribute::NoInline); 690 | 691 | // Harden from static/internal-driven simplifications 692 | GlobalValue *GVF = dyn_cast(Called); 693 | GVF->setVisibility(GlobalValue::DefaultVisibility); 694 | GVF->setLinkage(GlobalValue::ExternalLinkage); 695 | } 696 | } 697 | } 698 | } 699 | 700 | virtual bool runOnModule(Module &M) override { 701 | cgcPassLog("Running..."); 702 | 703 | // Initialize regular expressions for functions to harden against optimizer 704 | std::vector HardenFunctionRegexes; 705 | if (HardenFunctions.empty()) { 706 | HardenFunctions.push_back("main"); 707 | HardenFunctions.push_back("LLVMFuzzerTestOneInput"); 708 | } 709 | passListRegexInit(HardenFunctionRegexes, HardenFunctions); 710 | 711 | CallGraph *CG = &getAnalysis().getCallGraph(); 712 | 713 | // LLVM does not consider edges like `call (bitcast (func))` so insert them. 714 | // really llvm?? 715 | fixCallGraph(M, CG); 716 | 717 | // Walk the callgraph in bottom-up SCC order. 718 | scc_iterator CGI = scc_begin(CG); 719 | 720 | CallGraphSCC CurSCC(*CG, &CGI); 721 | while (!CGI.isAtEnd()) { 722 | // Copy the current SCC and increment past it so that the pass can hack 723 | // on the SCC if it wants to without invalidating our iterator. 724 | const std::vector &NodeVec = *CGI; 725 | CurSCC.initialize(NodeVec); 726 | ++CGI; 727 | 728 | collectSCC(CurSCC); 729 | } 730 | 731 | // Collect all functions in the module and add root function clones. 732 | CallBaseQueue cgcCallBaseQueue; 733 | std::set HardenFunctionsSet; 734 | std::list skippedFuncs; 735 | for (auto &F : M.getFunctionList()) { 736 | if (F.isDeclaration() || isAvailableExternally(F)) 737 | continue; 738 | 739 | // gather all the call bases in the function 740 | gatherCallBases(&F); 741 | 742 | // gather the estimation for the AFL map size 743 | FunctionToAFLMapSize[&F] = estimateAFLEdges(&F); 744 | // update the current size 745 | aflmap_size += FunctionToAFLMapSize[&F]; 746 | 747 | const std::string &FName = F.getName().str(); 748 | if (passListRegexMatch(HardenFunctionRegexes, FName)) { 749 | HardenFunctionsSet.insert(&F); 750 | } 751 | if (!isCGCRoot(F)) { 752 | // keep track of the functions skipped 753 | if (!isInSCC(&F)) skippedFuncs.push_back(&F); 754 | // BUG: here if the scc is a root scc, you will never clone the callsited of the root SCC that go outside the SCC 755 | continue; 756 | } 757 | assert(!isInSCC(&F) && 758 | "Cannot set a function belonging to an SCC as a root function to be cloned"); 759 | updateCallBaseQueue(cgcCallBaseQueue, &F); 760 | } 761 | 762 | // Harden each function against the optimizer 763 | for (Function *F: HardenFunctionsSet) 764 | hardenFunction(F); 765 | 766 | // if the map size is already at the max, just return 767 | if (aflmap_size >= MaxSize) return true; 768 | 769 | // Start from root function clones and iteratively clone the callgraph. 770 | while (!cgcCallBaseQueue.empty()) { 771 | CallBase *CB = cgcCallBaseQueue.top(); 772 | cgcCallBaseQueue.pop(); 773 | cgc(CB, cgcCallBaseQueue); 774 | // `cgc` should never clone past the limit 775 | assert (aflmap_size <= MaxSize); 776 | } 777 | 778 | // now clone all the other calls if still have budget 779 | if (CGCFill && aflmap_size < MaxSize) { 780 | oprint("Finished planned clones, still continuing to clone"); 781 | for (Function* F: skippedFuncs) { 782 | updateCallBaseQueue(cgcCallBaseQueue, F); 783 | } 784 | // restart the visit to clone all the remaining calls 785 | while (!cgcCallBaseQueue.empty()) { 786 | CallBase *CB = cgcCallBaseQueue.top(); 787 | cgcCallBaseQueue.pop(); 788 | cgc(CB, cgcCallBaseQueue); 789 | // `cgc` should never clone past the limit 790 | assert (aflmap_size <= MaxSize); 791 | } 792 | } 793 | 794 | // normalize names and mark all the clones 795 | for (Function *F: FunctionClones) { 796 | if (F->isDeclaration() || isAvailableExternally(*F)) 797 | continue; 798 | normalizeName(F); 799 | markClone(F); 800 | } 801 | oprint("\nTotal Clones: " << nclones); 802 | return true; 803 | } 804 | 805 | void getAnalysisUsage(AnalysisUsage &AU) const override { 806 | AU.addRequired(); 807 | AU.addRequired(); 808 | AU.addRequired(); 809 | } 810 | }; 811 | 812 | } 813 | 814 | char CallgraphClonePass::ID = 0; 815 | RegisterPass MP("cgc", "CallgraphClone Pass"); 816 | -------------------------------------------------------------------------------- /passes/cgc/cgc_old.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" 5 | #include "llvm/Transforms/Utils/Cloning.h" 6 | #include 7 | #include "llvm/Analysis/CFG.h" 8 | #include "llvm/Analysis/CallGraph.h" 9 | #include "llvm/Analysis/CallGraphSCCPass.h" 10 | #include "llvm/ADT/SCCIterator.h" 11 | #include "llvm/ADT/SmallVector.h" 12 | #include "llvm/IR/CFG.h" 13 | #include 14 | 15 | using namespace llvm; 16 | 17 | #define DEBUG_TYPE "cgc" 18 | #define cgcPassLog(M) LLVM_DEBUG(dbgs() << "CallgraphClonePass: " << M << "\n") 19 | #define oprint(s) LLVM_DEBUG(dbgs() << s << "\n") 20 | 21 | static cl::list 22 | HardenFunctions("cgc-harden-funcs", 23 | cl::desc("Specify all the comma-separated function regexes to harden against optimizer [default: main, LLVMFuzzerTestOneInput]"), 24 | cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden); 25 | 26 | static cl::opt 27 | ClonePrefix("cgc-clone-prefix", 28 | cl::desc("Specify the clone name prefix"), 29 | cl::init("__cgc_"), cl::NotHidden); 30 | 31 | // Fill 256Kb by default, an average size of L2 cache 32 | static cl::opt 33 | MaxSize("cgc-max-aflmap", 34 | cl::init(256*1024), cl::NotHidden, 35 | cl::desc("The maximum acceptable size for the AFL++ edge map")); 36 | 37 | static cl::opt 38 | CallsThreshold("cgc-calls-treshold", 39 | cl::init(0), cl::NotHidden, 40 | cl::desc("The threshold of incoming calls for which a function is considered an error function and not cloned\n\t[default: 0 -> set to treshold_factor*initial_number_of_funcs]")); 41 | 42 | static cl::opt 43 | CallsThresholdFactor("cgc-calls-treshold-factor", 44 | cl::init(0.25), cl::NotHidden, 45 | cl::desc("The threshold factor on which cgc-calls-treshold is computed if initialized to 0")); 46 | 47 | namespace { 48 | // This pass clones function calls based on decisions taken by CGC Planner on which 49 | // subgraph portion of the callgraph should be cloned 50 | class CallgraphClonePass : public ModulePass { 51 | 52 | // Keep track of all the functions belonging to strongly connected components 53 | std::set SCCFunctions; 54 | 55 | std::map> FunctionToSCC; 56 | std::map> FunctionToCallBases; 57 | std::map FunctionToAFLMapSize; 58 | 59 | // Keep track of cloned functions 60 | std::set FunctionClones; 61 | 62 | // All the functions that should not be cloned 63 | std::set FunctionBlacklist; 64 | 65 | // The number of times a function is originally called 66 | std::map CallsToFunction; 67 | 68 | public: 69 | static char ID; 70 | unsigned long unique_id = 0; 71 | unsigned long nclones = 0; 72 | unsigned long aflmap_size = 0; 73 | CallgraphClonePass() : ModulePass(ID) {} 74 | 75 | unsigned long getUniqueID() { 76 | return ++unique_id; 77 | } 78 | 79 | // Taken from: https://github.com/AFLplusplus 80 | // True if block has successors and it dominates all of them. 81 | bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { 82 | if (succ_begin(BB) == succ_end(BB)) return false; 83 | for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) { 84 | // if the edge is critical it will be splitted 85 | if (isCriticalEdge(BB->getTerminator(), SUCC)) continue; 86 | if (!DT->dominates(BB, SUCC)) return false; 87 | } 88 | return true; 89 | } 90 | 91 | // Taken from: https://github.com/AFLplusplus 92 | // True if block has predecessors and it postdominates all of them. 93 | bool isFullPostDominator(const BasicBlock * BB, 94 | const PostDominatorTree *PDT) { 95 | if (pred_begin(BB) == pred_end(BB)) return false; 96 | for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) { 97 | // if the edge is critical it will be splitted 98 | if (isCriticalEdge(PRED->getTerminator(), BB)) continue; 99 | if (!PDT->dominates(BB, PRED)) return false; 100 | } 101 | return true; 102 | } 103 | 104 | // Given a function, try to estimate the number of edges in the function that 105 | // will be instrumented by AFLplusplus. 106 | // It instruments edges by breaking all critial edges with a block in the middle 107 | // and avoiding instrumenting blocks which are full dominators, or full 108 | // post-dominators with multiple predecessors. 109 | unsigned long estimateAFLEdges(Function *F) { 110 | DominatorTree *DT = &getAnalysis(*F).getDomTree(); 111 | PostDominatorTree *PDT = &getAnalysis(*F).getPostDomTree(); 112 | unsigned edges = 0; 113 | for (BasicBlock &BB: *F) { 114 | // Do not instrument full dominators, or full post-dominators with multiple 115 | // predecessors. 116 | bool shouldInstrumentBlock = (&F->getEntryBlock() == &BB) || (!isFullDominator(&BB, DT) && 117 | !(isFullPostDominator(&BB, PDT) 118 | && !BB.getSinglePredecessor())); 119 | if (shouldInstrumentBlock) ++edges; 120 | 121 | Instruction *TI = BB.getTerminator(); 122 | if (TI->getNumSuccessors() > 1 && !isa(TI)) 123 | for (unsigned succ = 0, end = TI->getNumSuccessors(); succ != end; ++succ) { 124 | if (isCriticalEdge(TI, succ)) 125 | ++edges; 126 | } 127 | } 128 | return edges; 129 | } 130 | 131 | // Return true if `F` has been marked as a root from which to start cloning 132 | // by CGC Planner. 133 | bool isCGCRoot(Function &F) { 134 | MDNode* N; 135 | N = F.getMetadata(CGC_ROOT_ATTR); 136 | if (N == NULL) return false; 137 | return true; 138 | } 139 | 140 | // Return true if `CB` has been planned to be cloned by CGC Planner 141 | bool isPlannedClone(CallBase &CB) { 142 | MDNode* N; 143 | N = CB.getMetadata(CGC_CLONE_CALL_ATTR); 144 | if (N == NULL) return false; 145 | return true; 146 | } 147 | 148 | // Substitute all the trailings .x.y.z that llvm creates when having two functions 149 | // with the same name, with some uniqueIDs to avoid long names 150 | std::string compressName(std::string name) { 151 | // find the last .num 152 | std::string newName = name; 153 | std::string::size_type idx = newName.rfind('.'); 154 | if (idx == std::string::npos || idx == newName.length()) { 155 | std::replace(newName.begin(), newName.end(), '.', '_'); 156 | return newName; 157 | } 158 | // ensure it is actually a number 159 | int random = atoi(newName.substr(idx+1).c_str()); 160 | 161 | while (random) { 162 | newName = newName.substr(0, idx); 163 | idx = newName.rfind('.'); 164 | if (idx == std::string::npos || idx == newName.length()) { 165 | std::replace(newName.begin(), newName.end(), '.', '_'); 166 | return newName + "_" + std::to_string(getUniqueID()); 167 | } 168 | random = atoi(newName.substr(idx+1).c_str()); 169 | } 170 | std::replace(newName.begin(), newName.end(), '.', '_'); 171 | return newName + "_" + std::to_string(getUniqueID()); 172 | } 173 | 174 | void setCloneName(Function *F) { 175 | // if the function name already contains the prefix do not add it 176 | if (F->getName().find(ClonePrefix) == std::string::npos) 177 | F->setName(ClonePrefix + F->getName()); 178 | // Compress the clone name to avoid .1452.3394.9208.13831.27566... 179 | // at the end 180 | F->setName(compressName(F->getName().str())); 181 | } 182 | 183 | // Visit a Constant AST to find and replace oldV with newV, returning a new constant 184 | Constant *replaceConstant(Constant *C, Constant *newV, Constant *oldV) { 185 | if (ConstantStruct *S = dyn_cast(C)) { 186 | SmallVector Ops; 187 | for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) { 188 | Constant *op = S->getOperand(i); 189 | if (op == oldV) 190 | Ops.push_back(newV); 191 | else 192 | Ops.push_back(replaceConstant(op, newV, oldV)); 193 | } 194 | 195 | Constant* res = ConstantStruct::getAnon(Ops, true); 196 | return res; 197 | 198 | } else if (ConstantExpr *E = dyn_cast(C)) { 199 | SmallVector Ops; 200 | for (unsigned i = 0, e = E->getNumOperands(); i != e; ++i) { 201 | Constant *op = E->getOperand(i); 202 | if (op == oldV) 203 | Ops.push_back(newV); 204 | else 205 | Ops.push_back(replaceConstant(op, newV, oldV)); 206 | } 207 | 208 | Constant *res = E->getWithOperands(Ops); 209 | return res; 210 | 211 | } else { 212 | return C; 213 | } 214 | } 215 | 216 | // Fix the prologue of newF, by substituting the occurencies of oldF. 217 | // This allows us to clone functions without corrupting the prologue, that is 218 | // left untouched by cloneFunction. -fsanitize=function uses prologues 219 | void fixPrologue(Function *newF, Function *oldF) { 220 | if (!newF->hasPrologueData()) return; 221 | 222 | Constant *prologue = replaceConstant(newF->getPrologueData(), newF, oldF); 223 | newF->setPrologueData(prologue); 224 | } 225 | 226 | // `dest` is a clone of `source`, with the instructions mapped 1to1 in the `VMap`. 227 | // Update the `FunctionToCallBases` struct to keep track of the CallBases in 228 | // `dest` that represent the clone CallBases of `source`. 229 | // Update the `FunctionToAFLMapSize` to keep track of the estimation for the 230 | // new clone. 231 | void updateMetadata(Function *dest, Function *source, ValueToValueMapTy &VMap) { 232 | assert(FunctionToCallBases.find(source) != FunctionToCallBases.end()); 233 | FunctionToCallBases[dest]; 234 | for (CallBase *CB: FunctionToCallBases[source]) { 235 | CallBase *mappedCB = dyn_cast(VMap[CB]); 236 | assert(mappedCB); 237 | FunctionToCallBases[dest].insert(mappedCB); 238 | } 239 | 240 | assert(FunctionToAFLMapSize.find(source) != FunctionToAFLMapSize.end()); 241 | FunctionToAFLMapSize[dest] = FunctionToAFLMapSize[source]; 242 | } 243 | 244 | // Gather all the calls to `F`, starting from `I` and visiting recursively all 245 | // the users of `I`, to collect all the eventual calls to `F` originated by `I` 246 | // e.g. call bitcast F, with I being the bitcast 247 | void gatherEventualCallsTo(Function *F, Value *V, std::set &callsToF) { 248 | // If it is a call, just check if `F` is called 249 | if (CallBase * CB = dyn_cast(V)) { 250 | // check that the function is called and not passed as param 251 | if (CB->getCalledOperand()->stripPointerCasts() == F) { 252 | callsToF.insert(CB); 253 | } 254 | // If it is a bitcast, visit all the users recursively 255 | } else if (BitCastOperator * BO = dyn_cast(V)) { 256 | for (User* user: BO->users()) { 257 | gatherEventualCallsTo(F, user, callsToF); 258 | } 259 | } 260 | } 261 | 262 | // Return true if `F` has multiple call sites so it makes sense to clone it 263 | bool shouldCloneFunction(Function *F) { 264 | unsigned int numCallsToF = 0; 265 | std::set callsToF; 266 | // Gather all the calls to the function `F` 267 | for (User* user: F->users()) { 268 | gatherEventualCallsTo(F, user, callsToF); 269 | } 270 | 271 | numCallsToF = callsToF.size(); 272 | 273 | // oprint(F->getName().str() << " - " << numCallsToF); 274 | // We should clone the function only if it is called more than once 275 | return numCallsToF > 1; 276 | } 277 | 278 | // Return true if cloning `F` would not exceed the size limit. 279 | bool allowedToClone(Function *F) { 280 | unsigned long additional_edges = 0; 281 | // If `F` is in a SCC we will clone the whole SCC while cloning `F` 282 | if (isInSCC(F)) { 283 | assert(FunctionToSCC.find(F) != FunctionToSCC.end()); 284 | std::set SCC = FunctionToSCC[F]; 285 | for (Function *F: SCC) { 286 | assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end()); 287 | additional_edges += FunctionToAFLMapSize[F]; 288 | // For every function `F` in the SCC we will end up cloning also all the 289 | // functions called by `F` 290 | assert(FunctionToCallBases.find(F) != FunctionToCallBases.end()); 291 | for (CallBase *CB: FunctionToCallBases[F]) { 292 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 293 | if (C) { 294 | // If the called function is in the SCC do not count it here 295 | if (SCC.find(C) != SCC.end()) { 296 | continue; 297 | // Otherwise count it 298 | } else { 299 | // exclude from the count if would not be cloned 300 | if (isPlannedClone(*CB) == false) continue; 301 | assert(FunctionToAFLMapSize.find(C) != FunctionToAFLMapSize.end()); 302 | additional_edges += FunctionToAFLMapSize[C]; 303 | } 304 | } 305 | } 306 | } 307 | // Otherwise just count `F` 308 | } else { 309 | assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end()); 310 | additional_edges += FunctionToAFLMapSize[F]; 311 | } 312 | // More readable mf 313 | if (aflmap_size + additional_edges > MaxSize) return false; 314 | else return true; 315 | } 316 | 317 | // Return true if the `SCC` has multiple call sites so it makes sense to clone it 318 | bool shouldCloneSCC(std::set &SCC) { 319 | unsigned int numCallsToSCC = 0; 320 | std::set callsToSCC; 321 | // Gather all the calls to each function in the `SCC` 322 | for (Function *F: SCC) { 323 | for (User* user: F->users()) { 324 | gatherEventualCallsTo(F, user, callsToSCC); 325 | } 326 | } 327 | 328 | // Count only the calls from outside the `SCC` 329 | for (Instruction *call: callsToSCC) { 330 | Function* callerF = call->getParent()->getParent(); 331 | if (SCC.find(callerF) == SCC.end()) 332 | ++numCallsToSCC; 333 | } 334 | 335 | // for (Function *F: SCC) 336 | // oprint(F->getName().str() << " - " << numCallsToSCC); 337 | // We should clone the function only if it is called more than once 338 | return numCallsToSCC > 1; 339 | } 340 | 341 | // Visit the Strongly Connected Component where `F` belongs, to clone it as 342 | // a single node. Update `cgcFunctionQueue` accordingly to continue the visit. 343 | Function* addSCCClone(std::list &cgcFunctionQueue, Function* F) { 344 | std::map FtoClones; 345 | std::set SCCClones; 346 | 347 | assert(FunctionToSCC.find(F) != FunctionToSCC.end()); 348 | std::set SCC = FunctionToSCC[F]; 349 | 350 | // Clone all the functions in the SCC 351 | bool should_clone = shouldCloneSCC(SCC); 352 | for (Function *SCCfunc: SCC) { 353 | // Clone original function if required 354 | if (should_clone) { 355 | ValueToValueMapTy VMap; 356 | Function *clone = CloneFunction(SCCfunc, VMap); 357 | assert(clone); 358 | updateMetadata(clone, SCCfunc, VMap); 359 | trackClone(clone, cgcFunctionQueue); 360 | setCloneName(clone); 361 | fixPrologue(clone, SCCfunc); 362 | FtoClones[SCCfunc] = clone; 363 | SCCClones.insert(clone); 364 | } else { 365 | // Set the original function as a clone without updating the number of clones 366 | trackClone(SCCfunc, cgcFunctionQueue, /*update=*/false); 367 | FtoClones[SCCfunc] = SCCfunc; 368 | SCCClones.insert(SCCfunc); 369 | } 370 | } 371 | 372 | // Now rewire the functions in the SCC clones 373 | for (Function *SCCclone: SCCClones) { 374 | assert(FunctionToCallBases.find(SCCclone) != FunctionToCallBases.end()); 375 | for (CallBase *CB: FunctionToCallBases[SCCclone]) { 376 | 377 | // For direct calls, simply redirect target to new clone 378 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 379 | if (C) { 380 | Function *clone; 381 | // If the called function is in the SCC use the clone we generated 382 | if (FtoClones.find(C) != FtoClones.end()) { 383 | clone = FtoClones[C]; 384 | // Otherwise generate a clone 385 | } else { 386 | // clone only if is a planned clone, otherwise leave as is 387 | // NB: this assumes that all calls to `C` from `SCC` 388 | // have been planned equally to be cloned or not, otherwise 389 | // calls to `C` will not be consistent inside `SCC` 390 | if (isPlannedClone(*CB) == false) continue; 391 | clone = addFunctionClone(cgcFunctionQueue, C); 392 | FtoClones[C] = clone; 393 | } 394 | if (clone->getFunctionType() != CB->getCalledOperand()->getType()) 395 | CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB)); 396 | else 397 | CB->setCalledFunction(clone); 398 | } 399 | } 400 | } 401 | return FtoClones[F]; 402 | } 403 | 404 | // Clone the function `F`, and insert it in the `cgcFunctionQueue` to continue 405 | // the visit 406 | Function* addFunctionClone(std::list &cgcFunctionQueue, Function *F) { 407 | 408 | assert(!isClone(F)); 409 | 410 | // bail out if blacklisted 411 | if (isBlacklisted(F)) 412 | return F; 413 | 414 | // bail out if cloning `F` would exceed the max size 415 | if (!allowedToClone(F)) 416 | return F; 417 | 418 | if (isInSCC(F)) 419 | return addSCCClone(cgcFunctionQueue, F); 420 | 421 | // Clone original function if required 422 | if (shouldCloneFunction(F)) { 423 | ValueToValueMapTy VMap; 424 | Function *clone = CloneFunction(F, VMap); 425 | assert(clone); 426 | updateMetadata(clone, F, VMap); 427 | trackClone(clone, cgcFunctionQueue); 428 | 429 | setCloneName(clone); 430 | fixPrologue(clone, F); 431 | 432 | // Add the target to the functions to process. 433 | cgcFunctionQueue.push_back(clone); 434 | 435 | return clone; 436 | } else { 437 | // Set the original function as a clone without updating the number of clones 438 | trackClone(F, cgcFunctionQueue, /*update=*/false); 439 | // Add the target to the functions to process. 440 | cgcFunctionQueue.push_back(F); 441 | return F; 442 | } 443 | } 444 | 445 | // Visit funciton `F` to clone all the functions it calls, uniqely per CallBase 446 | void cgc(Function *F, std::list &cgcFunctionQueue) { 447 | assert(!isInSCC(F)); 448 | assert(FunctionToCallBases.find(F) != FunctionToCallBases.end()); 449 | 450 | // bail out if blacklisted 451 | if (isBlacklisted(F)) 452 | return; 453 | 454 | // For each call in the given function clone: 455 | for (CallBase *CB: FunctionToCallBases[F]) { 456 | 457 | // For direct calls, simply redirect target to new clone 458 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 459 | if (C) { 460 | // clone only if is a planned clone, otherwise leave as is 461 | if (isPlannedClone(*CB) == false) continue; 462 | Function *clone = addFunctionClone(cgcFunctionQueue, C); 463 | if (clone->getFunctionType() != CB->getCalledOperand()->getType()) 464 | CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB)); 465 | else 466 | CB->setCalledFunction(clone); 467 | continue; 468 | } 469 | } 470 | } 471 | 472 | // Check if `F` just calls himself 473 | bool isSimplyRecursive(Function *F) { 474 | for (auto &BB : *F) 475 | for (auto &I : BB.instructionsWithoutDebug()) 476 | if (auto *CB = dyn_cast(&I)) { 477 | Function *Callee = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 478 | 479 | // Function calls itself 480 | if (Callee == F) { 481 | return true; 482 | } 483 | } 484 | return false; 485 | } 486 | 487 | // Visit the `SCC` to gather the informations needed in `FunctionToSCC` and 488 | // `SCCFunctions` 489 | void collectSCC(CallGraphSCC &SCC) { 490 | std::set Functions; 491 | for (CallGraphNode *I : SCC) { 492 | Functions.insert(I->getFunction()); 493 | } 494 | 495 | // If the SCC contains multiple nodes we know there is recursion. 496 | if (Functions.size() != 1) { 497 | for (Function *F : Functions) { 498 | SCCFunctions.insert(F); 499 | assert(!F->doesNotRecurse()); 500 | 501 | // A function should belong to a single SCC 502 | assert(FunctionToSCC.find(F) == FunctionToSCC.end()); 503 | FunctionToSCC[F] = Functions; 504 | } 505 | // Take into account simple recursive functions 506 | } else { 507 | Function *F = *Functions.begin(); 508 | if (F && isSimplyRecursive(F)) { 509 | SCCFunctions.insert(F); 510 | assert(!F->doesNotRecurse()); 511 | 512 | assert(FunctionToSCC.find(F) == FunctionToSCC.end()); 513 | FunctionToSCC[F] = Functions; 514 | } 515 | } 516 | } 517 | 518 | // Return true if `F` is blacklisted 519 | bool isBlacklisted(Function *F) { 520 | return FunctionBlacklist.find(F) != FunctionBlacklist.end(); 521 | } 522 | 523 | // Return true if `F` is part of a SCC 524 | bool isInSCC(Function *F) { 525 | return SCCFunctions.find(F) != SCCFunctions.end(); 526 | } 527 | 528 | // Return true if `F` is a clone of a function 529 | bool isClone(Function *F) { 530 | return FunctionClones.find(F) != FunctionClones.end(); 531 | } 532 | 533 | // Add `F` to the function clones we keep track of, and update stats 534 | void trackClone(Function *F, std::list &cgcFunctionQueue, bool update=true) { 535 | FunctionClones.insert(F); 536 | if (update) { 537 | ++nclones; 538 | aflmap_size += FunctionToAFLMapSize[F]; 539 | } 540 | LLVM_DEBUG(dbgs() << "\r" << nclones << " - " << aflmap_size << " "); 541 | } 542 | 543 | // Sometimes LLVM build the CallGraph withouth taking into considerations calls 544 | // that pass through a `bitcast` operation. We fix this here, revisiting the 545 | // functions and updating the CallGraph 546 | void fixCallGraph(Module &M, CallGraph *CG) { 547 | for (auto &F : M.getFunctionList()) { 548 | if (F.isDeclaration()) 549 | continue; 550 | for(auto &BB: F) { 551 | for (auto &I : BB) { 552 | if (CallBase * CB = dyn_cast(&I)) { 553 | if (CB->isInlineAsm()) continue; 554 | 555 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 556 | if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue; 557 | 558 | // If `Called` actually points to a function, but getCalledFunction 559 | // returns null then we have spotted a missing function 560 | if (CB->getCalledFunction() == nullptr) { 561 | CallGraphNode *Node = CG->getOrInsertFunction(&F); 562 | Node->addCalledFunction(CB, CG->getOrInsertFunction(Called)); 563 | } 564 | } 565 | } 566 | } 567 | } 568 | } 569 | 570 | // Initialize the `FunctionToCallBases` structure with all the existing CallBases in `F` 571 | // and update info on the functions called 572 | void gatherCallBases(Function *F) { 573 | // Initialize the set in case no call is present in the function 574 | FunctionToCallBases[F]; 575 | for (BasicBlock &BB: *F) { 576 | for (Instruction &I : BB) { 577 | // Gather all call bases 578 | if (CallBase * CB = dyn_cast(&I)) { 579 | 580 | // Only if they represent direct calls to functions 581 | if (CB->isInlineAsm()) continue; 582 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 583 | if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue; 584 | 585 | // Insert into the map 586 | FunctionToCallBases[F].insert(CB); 587 | 588 | // Update the info on number of times a function is called 589 | CallsToFunction[Called]++; 590 | } 591 | } 592 | } 593 | } 594 | 595 | // The optimizer may decide to inline functions and simplify them. Or directly simplify 596 | // static/internal ones. Try to persuade it to avoid simplifying functions we want as is, 597 | // by setting all the functions `F` calls to not static and not inlinable. 598 | void hardenFunction(Function *F) { 599 | for (BasicBlock &BB: *F) { 600 | for (Instruction &I : BB) { 601 | // Gather all call bases 602 | if (CallBase * CB = dyn_cast(&I)) { 603 | 604 | // Only if they represent direct calls to functions 605 | if (CB->isInlineAsm()) continue; 606 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 607 | if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue; 608 | 609 | // Harden from inlining 610 | if (Called->hasFnAttribute(Attribute::InlineHint)) 611 | Called->removeFnAttr(Attribute::InlineHint); 612 | if (Called->hasFnAttribute(Attribute::AlwaysInline)) 613 | Called->removeFnAttr(Attribute::AlwaysInline); 614 | Called->addFnAttr(Attribute::NoInline); 615 | 616 | // Harden from static/internal-driven simplifications 617 | GlobalValue *GVF = dyn_cast(Called); 618 | GVF->setVisibility(GlobalValue::DefaultVisibility); 619 | GVF->setLinkage(GlobalValue::ExternalLinkage); 620 | } 621 | } 622 | } 623 | } 624 | 625 | // Add the function `F` to the blacklist if the number of calls to it is higher 626 | // than the user threshold. If `F` belong to a SCC, add the SCC to the blacklist 627 | void maybeAddToBlacklist(Function *F) { 628 | 629 | // if already in the blacklist bail out 630 | if (isBlacklisted(F)) 631 | return; 632 | 633 | // get the number of times `F` is called 634 | unsigned long numCalls = CallsToFunction[F]; 635 | 636 | if (numCalls > CallsThreshold) { 637 | 638 | // if the function was in a SCC add all the functions 639 | if (isInSCC(F)) { 640 | assert(FunctionToSCC.find(F) != FunctionToSCC.end()); 641 | for (Function *sccF: FunctionToSCC[F]) { 642 | oprint("[-] excluding " << sccF->getName().str() << " due to " << F->getName().str() << " with " << numCalls << " calls"); 643 | FunctionBlacklist.insert(sccF); 644 | } 645 | // otherwise add just the function 646 | } else { 647 | oprint("[-] excluding " << F->getName().str() << " with " << numCalls << " calls"); 648 | FunctionBlacklist.insert(F); 649 | } 650 | } 651 | } 652 | 653 | // Visit `F` and all the functions called by `F`, adding them to `visitedFuncs` 654 | void visitCalledFunctions(Function* F, std::set &visitedFuncs) { 655 | // bail out if already visited 656 | if (visitedFuncs.find(F) != visitedFuncs.end()) return; 657 | 658 | // insert into the visited functions 659 | visitedFuncs.insert(F); 660 | 661 | for (auto &BB : *F) 662 | for (auto &I : BB) { 663 | if (CallBase * CB = dyn_cast(&I)) { 664 | if (CB->isInlineAsm()) continue; 665 | 666 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 667 | if (C) { 668 | if (C->isDeclaration() || C->isIntrinsic()) 669 | continue; 670 | 671 | visitCalledFunctions(C, visitedFuncs); 672 | } 673 | } 674 | } 675 | } 676 | 677 | virtual bool runOnModule(Module &M) override { 678 | cgcPassLog("Running..."); 679 | 680 | // Initialize regular expressions for functions to harden against optimizer 681 | std::vector HardenFunctionRegexes; 682 | if (HardenFunctions.empty()) { 683 | HardenFunctions.push_back("main"); 684 | HardenFunctions.push_back("LLVMFuzzerTestOneInput"); 685 | } 686 | passListRegexInit(HardenFunctionRegexes, HardenFunctions); 687 | 688 | // Visit the strongly connected components to identify recursive functions 689 | CallGraph *CG = &getAnalysis().getCallGraph(); 690 | 691 | // LLVM does not consider edges like `call (bitcast (func))` so insert them. 692 | // really llvm?? 693 | fixCallGraph(M, CG); 694 | 695 | // Walk the callgraph in bottom-up SCC order. 696 | scc_iterator CGI = scc_begin(CG); 697 | 698 | CallGraphSCC CurSCC(*CG, &CGI); 699 | while (!CGI.isAtEnd()) { 700 | // Copy the current SCC and increment past it so that the pass can hack 701 | // on the SCC if it wants to without invalidating our iterator. 702 | const std::vector &NodeVec = *CGI; 703 | CurSCC.initialize(NodeVec); 704 | ++CGI; 705 | 706 | collectSCC(CurSCC); 707 | } 708 | 709 | std::set visitedFuncs; 710 | 711 | // Collect all functions in the module and add root function clones. 712 | std::list cgcFunctionQueue; 713 | std::set HardenFunctionsSet; 714 | for (auto &F : M.getFunctionList()) { 715 | if (F.isDeclaration()) 716 | continue; 717 | 718 | // gather all the call bases in the function 719 | gatherCallBases(&F); 720 | 721 | // gather the estimation for the AFL map size 722 | FunctionToAFLMapSize[&F] = estimateAFLEdges(&F); 723 | // update the current size 724 | aflmap_size += FunctionToAFLMapSize[&F]; 725 | 726 | const std::string &FName = F.getName().str(); 727 | if (passListRegexMatch(HardenFunctionRegexes, FName)) { 728 | HardenFunctionsSet.insert(&F); 729 | } 730 | if (!isCGCRoot(F)) 731 | continue; 732 | cgcFunctionQueue.push_back(&F); 733 | 734 | // visit the path starting from F and count called functions 735 | visitCalledFunctions(&F, visitedFuncs); 736 | } 737 | 738 | // need to order roots based on BFS id if multiple roots 739 | assert(cgcFunctionQueue.size() == 1 && "only single root supported"); 740 | 741 | // Keep track of the initial number of functions used in the call path 742 | unsigned long initialNfuncs = visitedFuncs.size(); 743 | 744 | // if CallsThreshold==0 automatically tune based on the number of functions 745 | if (CallsThreshold == 0) { 746 | CallsThreshold = CallsThresholdFactor * initialNfuncs; 747 | oprint("Threshold for error functions: " << CallsThreshold); 748 | } 749 | 750 | // Now revisit all the functions to fill the blacklist 751 | for (auto &F : M.getFunctionList()) { 752 | if (F.isDeclaration()) 753 | continue; 754 | 755 | // fill the function black list if we detect it as an error function 756 | maybeAddToBlacklist(&F); 757 | } 758 | 759 | // Harden each function against the optimizer 760 | for (Function *F: HardenFunctionsSet) 761 | hardenFunction(F); 762 | 763 | // Start from root function clones and iteratively clone the callgraph. 764 | while (!cgcFunctionQueue.empty()) { 765 | Function *F = *cgcFunctionQueue.begin(); 766 | cgcFunctionQueue.erase(cgcFunctionQueue.begin()); 767 | cgc(F, cgcFunctionQueue); 768 | // `cgc` should never clone past the limit 769 | assert (aflmap_size <= MaxSize); 770 | } 771 | oprint("\nTotal Clones: " << nclones); 772 | return true; 773 | } 774 | 775 | void getAnalysisUsage(AnalysisUsage &AU) const override { 776 | AU.addRequired(); 777 | AU.addRequired(); 778 | AU.addRequired(); 779 | } 780 | }; 781 | 782 | } 783 | 784 | char CallgraphClonePass::ID = 0; 785 | RegisterPass MP("cgc", "CallgraphClone Pass"); 786 | -------------------------------------------------------------------------------- /passes/dump-call-tree/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the dump-call-tree pass 2 | ROOT=../.. 3 | 4 | PASSNAME := dump-call-tree 5 | OBJS := dump-call-tree.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/dump-call-tree/dump-call-tree.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace llvm; 8 | 9 | #define DEBUG_TYPE "DumpCallTree" 10 | #define DumpCallTreePassLog(M) LLVM_DEBUG(dbgs() << "DumpCallTreePass: " << M << "\n") 11 | #define oprint(s) outs() << s << "\n" 12 | 13 | static cl::opt 14 | CallTreeStart("call-tree-start", 15 | cl::desc("Specify the function from where to start the visit of the call tree to dump"), 16 | cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden); 17 | 18 | static cl::opt 19 | OutFilename("dump-tree-file", 20 | cl::desc("The file where to dump the called tree"), 21 | cl::init("call-tree.log"), cl::NotHidden); 22 | 23 | namespace { 24 | 25 | // Dump the subtree of the CFG functions starting from `call-tree-start` 26 | class DumpCallTreePass : public ModulePass { 27 | 28 | std::set CalledSet; 29 | std::set ToVisit; 30 | 31 | public: 32 | static char ID; 33 | DumpCallTreePass() : ModulePass(ID) {} 34 | 35 | void visit(Function* F) { 36 | CalledSet.insert(F->getName().str()); 37 | // For each call in the given function: 38 | for (auto &BB : *F) 39 | for (auto &I : BB) { 40 | if (CallBase * CB = dyn_cast(&I)) { 41 | if (CB->isInlineAsm()) continue; 42 | 43 | Function *C = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 44 | if (C) { 45 | if (C->isDeclaration() || C->isIntrinsic()) 46 | continue; 47 | 48 | // If never saw the function add to the visit 49 | if (CalledSet.find(C->getName().str()) == CalledSet.end()) 50 | ToVisit.insert(C); 51 | } 52 | } 53 | } 54 | } 55 | 56 | virtual bool runOnModule(Module &M) { 57 | for (auto &F : M.getFunctionList()) { 58 | if (F.isDeclaration()) 59 | continue; 60 | if (!F.getName().equals(CallTreeStart)) 61 | continue; 62 | ToVisit.insert(&F); 63 | break; 64 | } 65 | 66 | // Start from root function and iteratively visit the callgraph. 67 | while (!ToVisit.empty()) { 68 | Function *F = *ToVisit.begin(); 69 | ToVisit.erase(ToVisit.begin()); 70 | visit(F); 71 | } 72 | 73 | std::ofstream ofile; 74 | ofile.open(OutFilename, std::ios::out | std::ios::trunc); 75 | assert(ofile.is_open()); 76 | 77 | for (auto s: CalledSet) { 78 | ofile << s << std::endl; 79 | } 80 | ofile.flush(); 81 | ofile.close(); 82 | 83 | return false; 84 | } 85 | }; 86 | 87 | } 88 | 89 | char DumpCallTreePass::ID = 0; 90 | RegisterPass MP("dump-call-tree", "DumpCallTree Pass"); 91 | 92 | -------------------------------------------------------------------------------- /passes/dump-calls/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the dump-calls pass 2 | ROOT=../.. 3 | 4 | PASSNAME := dump-calls 5 | OBJS := dump-calls.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/dump-calls/dump-calls.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "llvm/IR/IRBuilder.h" 8 | 9 | using namespace llvm; 10 | 11 | #define DEBUG_TYPE "DumpCalls" 12 | #define DumpCallsPassLog(M) LLVM_DEBUG(dbgs() << "DumpCallsPass: " << M << "\n") 13 | #define oprint(s) outs() << s << "\n" 14 | 15 | namespace { 16 | 17 | // Dump the subtree of the CFG functions starting from `call-tree-start` 18 | class DumpCallsPass : public ModulePass { 19 | 20 | public: 21 | static char ID; 22 | DumpCallsPass() : ModulePass(ID) {} 23 | 24 | // Return true if `F` has an available_externally linkage (i.e. equivalent to a declaration) 25 | bool isAvailableExternally(Function &F) { 26 | GlobalValue::LinkageTypes L = F.getLinkage(); 27 | return GlobalValue::isAvailableExternallyLinkage(L); 28 | } 29 | 30 | // Return whether the function has been marked as a clone 31 | static bool hasCloneMark(Function *F) { 32 | MDNode* N; 33 | assert(F); 34 | N = F->getMetadata(CGC_CLONE_MARK); 35 | if (N == NULL) return false; 36 | return true; 37 | } 38 | 39 | void createPrintCall(Module &M, Function &F, const std::string &to_print, const std::string &prefix, const std::string &suffix, IRBuilder<> &builder) { 40 | auto &CTX = M.getContext(); 41 | PointerType *PrintfArgTy = PointerType::getUnqual(Type::getInt8Ty(CTX)); 42 | 43 | // STEP 1: Inject the declaration of printf 44 | // ---------------------------------------- 45 | // Create (or _get_ in cases where it's already available) the following 46 | // declaration in the IR module: 47 | // declare i32 @printf(i8*, ...) 48 | // It corresponds to the following C declaration: 49 | // int printf(char *, ...) 50 | FunctionType *PrintfTy = FunctionType::get( 51 | IntegerType::getInt32Ty(CTX), 52 | PrintfArgTy, 53 | /*IsVarArgs=*/true); 54 | 55 | FunctionCallee Printf = M.getOrInsertFunction("printf", PrintfTy); 56 | 57 | // Set attributes as per inferLibFuncAttributes in BuildLibCalls.cpp 58 | Function *PrintfF = dyn_cast(Printf.getCallee()); 59 | PrintfF->setDoesNotThrow(); 60 | PrintfF->addParamAttr(0, Attribute::NoCapture); 61 | PrintfF->addParamAttr(0, Attribute::ReadOnly); 62 | 63 | // STEP 2: Inject a global variable that will hold the printf format string 64 | // ------------------------------------------------------------------------ 65 | llvm::Constant *PrintfFormatStr = llvm::ConstantDataArray::getString( 66 | CTX, prefix + to_print + suffix); 67 | 68 | Constant *PrintfFormatStrVar = 69 | M.getOrInsertGlobal(to_print, PrintfFormatStr->getType()); 70 | dyn_cast(PrintfFormatStrVar)->setInitializer(PrintfFormatStr); 71 | 72 | // Printf requires i8*, but PrintfFormatStrVar is an array: [n x i8]. Add 73 | // a cast: [n x i8] -> i8* 74 | llvm::Value *FormatStrPtr = 75 | builder.CreatePointerCast(PrintfFormatStrVar, PrintfArgTy, "formatStr"); 76 | 77 | // Finally, inject a call to printf 78 | builder.CreateCall( 79 | Printf, {FormatStrPtr}); 80 | } 81 | 82 | void visit(Function* F) { 83 | for (auto &BB : *F) 84 | for (auto &I : BB.instructionsWithoutDebug()) { 85 | if (CallBase * CB = dyn_cast(&I)) { 86 | if (CB->isInlineAsm()) continue; 87 | 88 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 89 | if (!Called || Called->isDeclaration() || isAvailableExternally(*Called)|| Called->isIntrinsic()) continue; 90 | 91 | // add the logging call 92 | IRBuilder<> IBuilder(&I); 93 | const std::string &to_print = Called->getName().str(); 94 | if (hasCloneMark(Called)) 95 | createPrintCall(*F->getParent(), *F, CGC_CLONE_MARK + to_print, ">>> |", "\n", IBuilder); 96 | else 97 | createPrintCall(*F->getParent(), *F, to_print, ">>> |", "\n", IBuilder); 98 | } 99 | } 100 | } 101 | 102 | virtual bool runOnModule(Module &M) { 103 | for (auto &F : M.getFunctionList()) { 104 | if (F.isDeclaration() || isAvailableExternally(F)) continue; 105 | visit(&F); 106 | } 107 | 108 | return true; 109 | } 110 | }; 111 | 112 | } 113 | 114 | char DumpCallsPass::ID = 0; 115 | RegisterPass MP("dump-calls", "DumpCalls Pass"); 116 | 117 | -------------------------------------------------------------------------------- /passes/dump-extlib/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the dump-extlib pass 2 | ROOT=../.. 3 | 4 | PASSNAME := dump-extlib 5 | OBJS := dump-extlib.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/dump-extlib/dump-extlib.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace llvm; 7 | 8 | #define DEBUG_TYPE "DumpExtlib" 9 | #define DumpExtlibPassLog(M) LLVM_DEBUG(dbgs() << "DumpExtlibPass: " << M << "\n") 10 | #define oprint(s) (outs() << s << "\n") 11 | 12 | static cl::list 13 | Whitelist("dumpext-whitelist", 14 | cl::desc("Specify the comma-separated path regexes for the whitelist"), 15 | cl::OneOrMore, cl::CommaSeparated, cl::NotHidden); 16 | 17 | static cl::list 18 | Blacklist("dumpext-blacklist", 19 | cl::desc("Specify the comma-separated path regexes for the blacklist"), 20 | cl::OneOrMore, cl::CommaSeparated, cl::NotHidden); 21 | 22 | static cl::opt 23 | OutFilename("dumpext-out", 24 | cl::desc("Specify the name of the file where the function list will be saved [- for stdout]"), 25 | cl::init("-"), cl::NotHidden); 26 | 27 | static cl::opt 28 | Dbg("dumpext-dbg", cl::desc("Debug Mode"), 29 | cl::init(false)); 30 | 31 | namespace { 32 | // This pass tries to find in the module all the function that are identified 33 | // being part of linked static libraries. 34 | // It uses a really simple euristic where it takes a whitelist and assumes 35 | // a function being a library one if the DebugInfo of that function points to 36 | // a path not containing any token in the whitelist. 37 | // 38 | // e.g. whitelist: curl 39 | // path: /src/curl/lib/ -> ok 40 | // path: /src/nghttp2/lib/ -> lib function 41 | // 42 | // The pass writes a function list to be passed to llvm-extract 43 | class DumpExtlibPass : public ModulePass { 44 | 45 | public: 46 | static char ID; 47 | DumpExtlibPass() : ModulePass(ID) {} 48 | 49 | std::string dirnameOf(const std::string& fname) 50 | { 51 | size_t pos = fname.find_last_of("/"); 52 | return (std::string::npos == pos) 53 | ? fname 54 | : fname.substr(0, pos); 55 | } 56 | 57 | std::string getFileDirectory(Function &F) { 58 | if (DISubprogram *Loc = F.getSubprogram()) { 59 | // The path from the CWD to the source file, while building 60 | StringRef File = Loc->getFilename(); 61 | // CWD while building 62 | StringRef Directory = Loc->getDirectory(); 63 | 64 | std::string Path = Directory.str() + "/" + File.str(); 65 | return dirnameOf(Path); 66 | } else { 67 | // oprint(F.getName()); 68 | // assert(false); 69 | // No location metadata available 70 | return ""; 71 | } 72 | } 73 | 74 | std::string getCompilationDirectory(Function &F) { 75 | if (DISubprogram *Loc = F.getSubprogram()) { 76 | // The path from the CWD to the source file, while building 77 | // StringRef File = Loc->getFilename(); 78 | // CWD while building 79 | StringRef Directory = Loc->getDirectory(); 80 | return Directory.str(); 81 | } else { 82 | // oprint(F.getName()); 83 | // assert(false); 84 | // No location metadata available 85 | return ""; 86 | } 87 | } 88 | 89 | virtual bool runOnModule(Module &M) { 90 | 91 | // Initialize regular expressions for whitelist 92 | std::vector WhitelistRegexes; 93 | assert (!Whitelist.empty()); 94 | passListRegexInit(WhitelistRegexes, Whitelist); 95 | 96 | // Initialize regular expressions for blacklist 97 | std::vector BlacklistRegexes; 98 | if (Blacklist.empty()) { 99 | Blacklist.push_back("EMPTY_BLACKLIST_SHOULD_NOT_MATCH_ANYTHING"); 100 | } 101 | passListRegexInit(BlacklistRegexes, Blacklist); 102 | 103 | std::vector ToExtract; 104 | std::map callsToFunc; 105 | 106 | // first remove all the aliases, since once we extract the functions we may invalidate some 107 | std::set aliasesToRemove; 108 | for (GlobalAlias &A: M.getAliasList()) { 109 | A.replaceAllUsesWith(A.getAliasee()); 110 | aliasesToRemove.insert(&A); 111 | } 112 | for (GlobalAlias *A: aliasesToRemove) A->eraseFromParent(); 113 | 114 | for (auto &F : M.getFunctionList()) { 115 | if (F.isDeclaration()) 116 | continue; 117 | 118 | const std::string &DirName = getFileDirectory(F); 119 | const std::string &CompilationDir = getCompilationDirectory(F); 120 | 121 | // if the function does not have any debug info stay safe and assume 122 | // that it belongs to the original program 123 | if (DirName == "") continue; 124 | 125 | // If either the directory of the source file of the function or 126 | // the compilation directory matches the whitelist then keep the function 127 | if (passListRegexMatch(WhitelistRegexes, DirName) || passListRegexMatch(WhitelistRegexes, CompilationDir)) { 128 | if (Dbg) { 129 | oprint("Keep " << F.getName().str() << ": " << DirName); 130 | } 131 | 132 | // only if the blacklist does not match then skip extraction and leave it in the bitcode 133 | if (!passListRegexMatch(BlacklistRegexes, DirName) && !passListRegexMatch(BlacklistRegexes, CompilationDir)) { 134 | // continue and skip the extraction 135 | continue; 136 | } 137 | } 138 | 139 | ToExtract.push_back(&F); 140 | if (Dbg) { 141 | oprint("Remove " << F.getName().str() << ": " << DirName); 142 | } 143 | } 144 | 145 | std::string result = ""; 146 | for (Function *F: ToExtract) { 147 | result.append(" -func="); 148 | // result.append("^"); 149 | result.append(F->getName().str()); 150 | // result.append("$|"); 151 | } 152 | // result.replace(result.rfind("|"), 1, ")"); 153 | 154 | if (OutFilename == "-") { 155 | outs() << result << "\n"; 156 | } else { 157 | std::ofstream ofile; 158 | ofile.open(OutFilename, std::ios::out | std::ios::trunc); 159 | assert(ofile.is_open()); 160 | 161 | ofile << result; 162 | ofile.flush(); 163 | ofile.close(); 164 | } 165 | return true; 166 | } 167 | }; 168 | 169 | } 170 | 171 | char DumpExtlibPass::ID = 0; 172 | RegisterPass MP("dump-extlib", "DumpExtlib Pass"); 173 | 174 | -------------------------------------------------------------------------------- /passes/func-stats/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the func-stats pass 2 | ROOT=../.. 3 | 4 | PASSNAME := func-stats 5 | OBJS := func-stats.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/func-stats/func-stats.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "llvm/Analysis/CFG.h" 5 | 6 | using namespace llvm; 7 | 8 | #define DEBUG_TYPE "FuncStats" 9 | #define FuncStatsPassLog(M) LLVM_DEBUG(dbgs() << "FuncStatsPass: " << M << "\n") 10 | #define oprint(s) outs() << s << "\n" 11 | 12 | static cl::opt 13 | DumpCalls("dump-calls", 14 | cl::desc("Dump all non unique calls"), 15 | cl::init(false), cl::NotHidden); 16 | 17 | static cl::opt 18 | DumpGraph("dump-graph", 19 | cl::desc("Dump the Call Graph"), 20 | cl::init(false), cl::NotHidden); 21 | 22 | static cl::opt 23 | DumpWeights("dump-weights", 24 | cl::desc("Dump the CGC weights"), 25 | cl::init(false), cl::NotHidden); 26 | 27 | static cl::opt 28 | RootFunction("dump-weights-root", 29 | cl::desc("Specify the root functions where to start dumping weights"), 30 | cl::init(""), cl::NotHidden); 31 | 32 | namespace { 33 | 34 | class FuncStatsPass : public ModulePass { 35 | 36 | public: 37 | static char ID; 38 | FuncStatsPass() : ModulePass(ID) {} 39 | 40 | // Return true if `F` has an available_externally linkage (i.e. equivalent to a declaration) 41 | bool isAvailableExternally(Function &F) { 42 | GlobalValue::LinkageTypes L = F.getLinkage(); 43 | return GlobalValue::isAvailableExternallyLinkage(L); 44 | } 45 | 46 | // Taken from: https://github.com/AFLplusplus 47 | // True if block has successors and it dominates all of them. 48 | bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { 49 | if (succ_begin(BB) == succ_end(BB)) return false; 50 | for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) { 51 | // if the edge is critical it will be splitted 52 | if (isCriticalEdge(BB->getTerminator(), SUCC)) continue; 53 | if (!DT->dominates(BB, SUCC)) return false; 54 | } 55 | return true; 56 | } 57 | 58 | // Taken from: https://github.com/AFLplusplus 59 | // True if block has predecessors and it postdominates all of them. 60 | bool isFullPostDominator(const BasicBlock * BB, 61 | const PostDominatorTree *PDT) { 62 | if (pred_begin(BB) == pred_end(BB)) return false; 63 | for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) { 64 | // if the edge is critical it will be splitted 65 | if (isCriticalEdge(PRED->getTerminator(), BB)) continue; 66 | if (!PDT->dominates(BB, PRED)) return false; 67 | } 68 | return true; 69 | } 70 | 71 | // Given a function, try to estimate the number of edges in the function that 72 | // will be instrumented by AFLplusplus. 73 | // It instruments edges by breaking all critial edges with a block in the middle 74 | // and avoiding instrumenting blocks which are full dominators, or full 75 | // post-dominators with multiple predecessors. 76 | unsigned long estimateAFLEdges(Function *F) { 77 | DominatorTree *DT = &getAnalysis(*F).getDomTree(); 78 | PostDominatorTree *PDT = &getAnalysis(*F).getPostDomTree(); 79 | unsigned edges = 0; 80 | for (BasicBlock &BB: *F) { 81 | // Do not instrument full dominators, or full post-dominators with multiple 82 | // predecessors. 83 | bool shouldInstrumentBlock = (&F->getEntryBlock() == &BB) || (!isFullDominator(&BB, DT) && 84 | !(isFullPostDominator(&BB, PDT) 85 | && !BB.getSinglePredecessor())); 86 | if (shouldInstrumentBlock) ++edges; 87 | 88 | Instruction *TI = BB.getTerminator(); 89 | if (TI->getNumSuccessors() > 1 && !isa(TI)) 90 | for (unsigned succ = 0, end = TI->getNumSuccessors(); succ != end; ++succ) { 91 | if (isCriticalEdge(TI, succ)) 92 | ++edges; 93 | } 94 | } 95 | return edges; 96 | } 97 | 98 | // Return the priority of the CallBase, an higher priority means the CallBase 99 | // should be cloned earlier 100 | static long getPriority(CallBase *CB) { 101 | MDNode* N; 102 | assert(CB); 103 | N = CB->getMetadata(CGC_CLONE_PRIORITY); 104 | if (N == NULL) return 0; 105 | Constant *val = dyn_cast(N->getOperand(0))->getValue(); 106 | assert(val); 107 | long prio = cast(val)->getSExtValue(); 108 | return prio; 109 | } 110 | 111 | void dumpWeights(Function *F, int level, std::set &visited) { 112 | if (visited.find(F) != visited.end()) return; 113 | visited.insert(F); 114 | 115 | for (BasicBlock &BB: *F) { 116 | for (Instruction &I: BB) { 117 | // Search all call bases 118 | if (CallBase * CB = dyn_cast(&I)) { 119 | 120 | // Only if they represent direct calls to functions 121 | if (CB->isInlineAsm()) continue; 122 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 123 | if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue; 124 | 125 | oprint(std::string(level, '\t') << "|-> [" << getPriority(CB) << "] " << Called->getName()); 126 | dumpWeights(Called, level+1, visited); 127 | } 128 | } 129 | } 130 | } 131 | 132 | virtual bool runOnModule(Module &M) override { 133 | unsigned int num_funcs = 0; 134 | unsigned int total_BB = 0; 135 | unsigned int total_edges = 0; 136 | std::map callsToFunc; 137 | for (auto &F : M.getFunctionList()) { 138 | if (F.isDeclaration()) 139 | continue; 140 | ++num_funcs; 141 | if (DumpGraph) { 142 | oprint("Call graph node for function: '" << F.getName() << "'"); 143 | } 144 | total_edges += estimateAFLEdges(&F); 145 | for(auto &BB: F) { 146 | ++total_BB; 147 | if (DumpCalls || DumpGraph) { 148 | for (auto &I : BB) { 149 | if (CallBase * CB = dyn_cast(&I)) { 150 | if (CB->isInlineAsm()) continue; 151 | 152 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 153 | if (!Called || Called->isDeclaration() || Called->isIntrinsic() || isAvailableExternally(*Called)) continue; 154 | callsToFunc[Called]+=1; 155 | if (DumpGraph) { 156 | oprint(" " << F.getName() << " calls function '" << Called->getName() << "'"); 157 | } 158 | } 159 | } 160 | } 161 | } 162 | } 163 | 164 | oprint("Num functions: " << num_funcs); 165 | oprint("Num BBs : " << total_BB); 166 | oprint("AFL edges : " << total_edges); 167 | 168 | if (DumpCalls) { 169 | for (auto elem: callsToFunc) { 170 | Function* F = elem.first; 171 | int calls = elem.second; 172 | if (calls > 1) oprint(F->getName().str() << ": " << calls); 173 | } 174 | } 175 | 176 | if (DumpWeights) { 177 | for (Function &F: M) { 178 | if (F.isDeclaration()) 179 | continue; 180 | 181 | // start from root 182 | const std::string &FName = F.getName().str(); 183 | std::set visited; 184 | if (FName == RootFunction) { 185 | oprint(F.getName()); 186 | dumpWeights(&F, 0, visited); 187 | } 188 | } 189 | } 190 | 191 | return false; 192 | } 193 | 194 | void getAnalysisUsage(AnalysisUsage &AU) const override { 195 | AU.addRequired(); 196 | AU.addRequired(); 197 | } 198 | }; 199 | 200 | } 201 | 202 | char FuncStatsPass::ID = 0; 203 | RegisterPass MP("func-stats", "FuncStats Pass"); 204 | 205 | -------------------------------------------------------------------------------- /passes/icp/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the ICP pass 2 | ROOT=../.. 3 | 4 | PASSNAME := icp 5 | OBJS := icp.o 6 | 7 | include ../Makefile.inc 8 | include ../Makefile.svf.inc 9 | -------------------------------------------------------------------------------- /passes/icp/icp.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "WPA/WPAPass.h" 4 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" 5 | #include "llvm/IR/Intrinsics.h" 6 | #include "llvm/IR/CFG.h" 7 | #include "llvm/IR/IRBuilder.h" 8 | 9 | using namespace llvm; 10 | using namespace SVF; 11 | 12 | #define DEBUG_TYPE "icp" 13 | #define icpPassLog(M) LLVM_DEBUG(dbgs() << "ICPPass: " << M << "\n") 14 | #define oprint(s) (dbgs() << s << "\n") 15 | #define print(s) (errs() << s << "\n") 16 | 17 | static cl::list 18 | Functions("icp-funcs", 19 | cl::desc("Specify all the comma-separated function regexes to icp"), 20 | cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden); 21 | 22 | static cl::opt 23 | VarArgOnly("icp-vararg-only", 24 | cl::desc("ICP only variadic calls"), 25 | cl::init(false), cl::NotHidden); 26 | 27 | static cl::opt 28 | Fallback("icp-fallback", 29 | cl::desc("Leave a fallback indirect call behind"), 30 | cl::init(false), cl::NotHidden); 31 | 32 | static cl::opt 33 | Abort("icp-abort", 34 | cl::desc("Leave an abort call for the default case"), 35 | cl::init(false), cl::NotHidden); 36 | 37 | static cl::opt 38 | TypeAnalysis("icp-type", 39 | cl::desc("Use faster type-based points-to analysis."), 40 | cl::init(false), cl::NotHidden); 41 | 42 | static cl::opt 43 | TypeAnalysisOpaquePtrs("icp-type-opaque-ptrs", 44 | cl::desc("Allow arbitrary ptr casts in type-based points-to analysis."), 45 | cl::init(true), cl::NotHidden); 46 | 47 | static cl::opt 48 | StrictSignature("icp-type-strict-signature", 49 | cl::desc("Only allow for exact function signature matches"), 50 | cl::init(true), cl::NotHidden); 51 | 52 | static cl::opt 53 | AliasSVFAnalysis("icp-alias", 54 | cl::desc("Use slower alias-based points-to analysis."), 55 | cl::init(false), cl::NotHidden); 56 | 57 | static cl::opt 58 | NoPromote("icp-no-promote", 59 | cl::desc("Don't promote indirect call, analyse only possible targets"), 60 | cl::init(false), cl::NotHidden); 61 | 62 | namespace { 63 | 64 | class ICPPass : public ModulePass { 65 | 66 | public: 67 | static char ID; 68 | ICPPass() : ModulePass(ID) {} 69 | 70 | bool isCompatibleType(Type *T1, Type *T2) { 71 | // Check if 2 types are the same, tolerating void* (i8*) pointer casts. 72 | if (T1 == T2) 73 | return true; 74 | if (!T1->isPointerTy() || !T2->isPointerTy()) 75 | return false; 76 | // If requested, be even more conservative (any pointer cast will do). 77 | if (TypeAnalysisOpaquePtrs) 78 | return true; 79 | return false; 80 | } 81 | 82 | bool csTypeAlias(CallSite &CS, Function *F) { 83 | // avoid stripping pointer casts, since we want the final called ptr type 84 | Value *V = CS.getCalledValue(); 85 | FunctionType *FT= F->getFunctionType(); 86 | FunctionType *CT= cast(V->getType()->getContainedType(0)); 87 | 88 | // Fast path: perfect type match. 89 | if (FT == CT) 90 | return true; 91 | 92 | // Return types have to match, unless the callsite doesn't care. 93 | if (!CT->getReturnType()->isVoidTy() 94 | && !isCompatibleType(CT->getReturnType(), FT->getReturnType())) 95 | return false; 96 | 97 | // Match #arguments and #parameters (account for variadic functions). 98 | if (CS.arg_size() < FT->getNumParams()) 99 | return false; 100 | // Accept the case when the CallSite has more params than the function if not strict 101 | if (StrictSignature) 102 | if (CS.arg_size() > FT->getNumParams() && !F->isVarArg()) 103 | return false; 104 | 105 | unsigned int max_args = StrictSignature ? CS.arg_size() : FT->getNumParams(); 106 | 107 | // Make sure each argument has compatible type with corresponding param. 108 | for (unsigned i=0; igetNumParams() ? FT->getParamType(i) : NULL; 110 | if (!PT) 111 | break; 112 | if (!isCompatibleType(PT, CS.getArgument(i)->getType())) 113 | return false; 114 | } 115 | 116 | return true; 117 | } 118 | 119 | // Check if the signature of the CallSite is compatible with calling the function F 120 | bool isSignatureCompatible(CallSite &CS, Function *F) { 121 | // avoid stripping pointer casts, since we want the final called ptr type 122 | Value *V = CS.getCalledValue(); 123 | FunctionType *FT= F->getFunctionType(); 124 | FunctionType *CT= cast(V->getType()->getContainedType(0)); 125 | 126 | // Fast path: perfect type match. 127 | if (FT == CT) 128 | return true; 129 | 130 | // Return types have to match, unless the callsite doesn't care. 131 | if (!CT->getReturnType()->isVoidTy() 132 | && !isCompatibleType(CT->getReturnType(), FT->getReturnType())) 133 | return false; 134 | 135 | // Match #arguments and #parameters 136 | if (CS.arg_size() < FT->getNumParams()) 137 | return false; 138 | 139 | // Accept the case when the CallSite has more params than the function 140 | return true; 141 | } 142 | 143 | void getIndirectCallees(Module *M, CallSite &CS, std::vector &callees, WPAPass *wpa) { 144 | // Grab functions that may alias value at the callsite 145 | Value *V = CS.getCalledValue()->stripPointerCasts(); 146 | for (auto &F : M->getFunctionList()) { 147 | if (!F.hasAddressTaken()) 148 | continue; 149 | if (VarArgOnly && Fallback && !F.isVarArg()) 150 | continue; 151 | 152 | if (AliasSVFAnalysis && TypeAnalysis) { 153 | if (csTypeAlias(CS, &F) && wpa->alias(V, &F)) 154 | callees.push_back(&F); 155 | continue; 156 | } 157 | 158 | // Use points-to analysis if requested 159 | if (!TypeAnalysis) { 160 | if (isSignatureCompatible(CS, &F) && wpa->alias(V, &F)) 161 | callees.push_back(&F); 162 | continue; 163 | } 164 | 165 | // Or faster callsite type-based analysis otherwise 166 | if (csTypeAlias(CS, &F)) 167 | callees.push_back(&F); 168 | } 169 | } 170 | 171 | Instruction *wrapPromoteCallWithIfThenElse(llvm::CallSite CS, llvm::Function *Callee, llvm::MDNode *BranchWeights = (llvm::MDNode *)nullptr) { 172 | FunctionType *FT= Callee->getFunctionType(); 173 | Instruction * newI = promoteCallWithIfThenElse(CS, Callee); 174 | assert(newI); 175 | CallBase *newCI = dyn_cast(newI); 176 | assert(newCI); 177 | 178 | // If the new function accepts less arguments than the callsite trim them 179 | if (newCI->arg_size() > FT->getNumParams()) { 180 | std::vector args; 181 | for (auto &arg: newCI->args()) { 182 | if (args.size() >= FT->getNumParams()) break; 183 | args.push_back(arg); 184 | } 185 | CallInst *fixedCI = CallInst::Create(newCI->getCalledValue()->stripPointerCasts(), args, "", newCI); 186 | fixedCI->setDebugLoc(newCI->getDebugLoc()); 187 | newCI->replaceAllUsesWith(fixedCI); 188 | newCI->eraseFromParent(); 189 | return fixedCI; 190 | } 191 | 192 | return newI; 193 | } 194 | 195 | void promoteIndirectCall(Function *F, Instruction *I, WPAPass *wpa) { 196 | Module* M = F->getParent(); 197 | LLVMContext& C = M->getContext(); 198 | 199 | // retrieve the errx function 200 | std::vector args; 201 | args.push_back(Type::getInt32Ty(C)); 202 | args.push_back(Type::getInt8PtrTy(C)); 203 | FunctionType *FT = FunctionType::get(Type::getVoidTy(C), args, true); 204 | FunctionCallee _errx = M->getOrInsertFunction("errx", FT); 205 | assert(_errx); 206 | Function *ErrxF = dyn_cast(_errx.getCallee()); 207 | assert(ErrxF); 208 | 209 | oprint("Promoting indirect call: " << *I << " in " << F->getName().str()); 210 | // Get indirect callees 211 | CallSite CS(I); 212 | std::vector callees; 213 | getIndirectCallees(F->getParent(), CS, callees, wpa); 214 | if (callees.empty()) { 215 | // For now we fail if we are not using the type analysis, since we may 216 | // are using SVF wrongly: 217 | // https://github.com/SVF-tools/SVF/issues/280 218 | if (Abort) { 219 | // insert an abort call in place of the indirect default call 220 | Instruction *OldCall = CS.getInstruction(); 221 | BasicBlock* ThisBB = CS.getInstruction()->getParent(); 222 | 223 | // replace the return value of the call with undefined 224 | OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); 225 | 226 | // add the call to the errx function 227 | std::vector args; 228 | args.push_back( ConstantInt::get(Type::getInt32Ty(C), 0)); 229 | std::string str = "ICP UNREACHABLE"; 230 | llvm::IRBuilder<> builder(ThisBB); 231 | static Value* error_string = builder.CreateGlobalStringPtr(StringRef(str)); 232 | args.push_back(error_string); 233 | CallInst *CI = CallInst::Create(ErrxF, args, "",OldCall); 234 | CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); 235 | 236 | // remove the old call and the branch to leave unreachable instr 237 | OldCall->eraseFromParent(); 238 | } 239 | oprint("No callees available"); 240 | return; 241 | } 242 | oprint(callees.size() << " callees possible"); 243 | for (auto Callee : callees) { 244 | oprint("possible callee: " << Callee->getName().str()); 245 | } 246 | if (NoPromote) return; 247 | 248 | // Check if we should only promote indirect calls to variadic functions. 249 | if (VarArgOnly) { 250 | bool hasVarArgCallee = false; 251 | for (auto Callee : callees) { 252 | if (Callee->isVarArg()) 253 | hasVarArgCallee = true; 254 | } 255 | if (!hasVarArgCallee) 256 | return; 257 | } 258 | 259 | // Promote with or without indirect call fallback. 260 | Function *lastCallee = NULL; 261 | for (auto Callee : callees) { 262 | if (lastCallee) 263 | wrapPromoteCallWithIfThenElse(CS, lastCallee); 264 | lastCallee = Callee; 265 | } 266 | if (Fallback) { 267 | wrapPromoteCallWithIfThenElse(CS, lastCallee); 268 | CS.addAttribute(AttributeList::FunctionIndex, Attribute::NoRecurse); 269 | } 270 | else if (Abort) { 271 | // create the last branch with the remaining indirect call 272 | wrapPromoteCallWithIfThenElse(CS, lastCallee); 273 | 274 | // insert an abort call in place of the indirect default call 275 | Instruction *OldCall = CS.getInstruction(); 276 | BasicBlock* ThisBB = CS.getInstruction()->getParent(); 277 | Instruction* LastI = ThisBB->getTerminator(); 278 | UnreachableInst* UI = new UnreachableInst(C, LastI); 279 | 280 | // remove the values coming from the phi nodes of the successors 281 | for (BasicBlock* SuccBB: successors(ThisBB)) { 282 | for (PHINode &Phi: SuccBB->phis()) { 283 | Phi.removeIncomingValue(ThisBB); 284 | } 285 | } 286 | 287 | // replace the return value of the call with undefined 288 | OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); 289 | 290 | // add the call to the errx function 291 | std::vector args; 292 | args.push_back( ConstantInt::get(Type::getInt32Ty(C), 0)); 293 | std::string str = "ICP UNREACHABLE"; 294 | llvm::IRBuilder<> builder(ThisBB); 295 | static Value* error_string = builder.CreateGlobalStringPtr(StringRef(str)); 296 | args.push_back(error_string); 297 | CallInst *CI = CallInst::Create(ErrxF, args, "",OldCall); 298 | CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); 299 | 300 | // remove the old call and the branch to leave unreachable instr 301 | OldCall->eraseFromParent(); 302 | LastI->eraseFromParent(); 303 | assert(ThisBB->getTerminator() == UI); 304 | } else { 305 | promoteCall(CS, lastCallee); 306 | } 307 | } 308 | 309 | std::string getLocation(Instruction &I) { 310 | 311 | if (DILocation *Loc = I.getDebugLoc()) { 312 | unsigned Line = Loc->getLine(); 313 | unsigned Col = Loc->getColumn(); 314 | StringRef File = Loc->getFilename(); 315 | DILocation *InlineLoc = Loc->getInlinedAt(); 316 | DILocalScope *Scope = Loc->getScope(); 317 | // not worth 318 | if (Line == 0 && Col == 0 && !InlineLoc) {print(*Scope); assert(false); return "";} 319 | if (!InlineLoc) 320 | return "file: " + File.str() + ", line: " + std::to_string(Line) + ", col:" + std::to_string(Col); 321 | else { 322 | unsigned InLine = InlineLoc->getLine(); 323 | unsigned InCol = InlineLoc->getColumn(); 324 | StringRef InFile = InlineLoc->getFilename(); 325 | return "file: " + File.str() + ", line: " + std::to_string(Line) + ", col:" + std::to_string(Col) + 326 | ", inlined at: " + InFile.str() + ", line: " + std::to_string(InLine) + ", col:" + std::to_string(InCol); 327 | } 328 | } else { 329 | assert(false); 330 | // No location metadata available 331 | return ""; 332 | } 333 | } 334 | 335 | void dumpICFG(Function *F, WPAPass *wpa) { 336 | print("- function: " << F->getName()); 337 | print(F->getSection()); 338 | print(F->getSectionPrefix()); 339 | for (BasicBlock &BB: *F) { 340 | for (Instruction &I : BB) { 341 | // Gather all call bases 342 | if (CallBase * CB = dyn_cast(&I)) { 343 | 344 | // Only if they represent indirect calls to functions 345 | if (CB->isInlineAsm()) continue; 346 | Function *Called = dyn_cast(CB->getCalledOperand()->stripPointerCasts()); 347 | if (Called) continue; 348 | 349 | CallSite CS(&I); 350 | std::vector callees; 351 | getIndirectCallees(F->getParent(), CS, callees, wpa); 352 | print(" - " << I); 353 | print(" - " << getLocation(I)); 354 | for(Function *callee: callees) { 355 | print(" - " << callee->getName()); 356 | } 357 | } 358 | } 359 | } 360 | } 361 | 362 | void icp(Function *F, WPAPass *wpa) { 363 | std::vector indirectCalls; 364 | // dumpICFG(F, wpa); 365 | 366 | // Collect indirect calls. 367 | for (auto &BB : *F) 368 | for (auto &I : BB) { 369 | CallSite CS(&I); 370 | if (!CS.getInstruction() || CS.isInlineAsm()) 371 | continue; 372 | if (isa(CS.getCalledValue()->stripPointerCasts())) 373 | continue; 374 | indirectCalls.push_back(&I); 375 | } 376 | 377 | // Promote. 378 | for (auto I : indirectCalls) { 379 | promoteIndirectCall(F, I, wpa); 380 | } 381 | } 382 | 383 | virtual bool runOnModule(Module &M) { 384 | icpPassLog("Running..."); 385 | assert(!(Abort && Fallback) && 386 | "Only a mode between icp-unreachable and icp-fallback can be selected"); 387 | SVFModule* svfModule = LLVMModuleSet::getLLVMModuleSet()->buildSVFModule(M); 388 | WPAPass *wpa = NULL; 389 | assert(AliasSVFAnalysis || TypeAnalysis); 390 | if (AliasSVFAnalysis) { 391 | wpa = new WPAPass(); 392 | wpa->runOnModule(svfModule); 393 | } 394 | 395 | std::vector FunctionRegexes; 396 | if (Functions.empty()) 397 | Functions.push_back(".*"); 398 | passListRegexInit(FunctionRegexes, Functions); 399 | 400 | // ICP all the functions in the module. 401 | for (auto &F : M.getFunctionList()) { 402 | if (F.isDeclaration()) 403 | continue; 404 | const std::string &FName = F.getName(); 405 | if (!passListRegexMatch(FunctionRegexes, FName)) 406 | continue; 407 | icp(&F, wpa); 408 | } 409 | 410 | return true; 411 | } 412 | }; 413 | 414 | } 415 | 416 | char ICPPass::ID = 0; 417 | RegisterPass MP("icp", "ICP Pass"); 418 | -------------------------------------------------------------------------------- /passes/include/common/cgc_magics.h: -------------------------------------------------------------------------------- 1 | #ifndef _CGC_MAGICS_H 2 | #define _CGC_MAGICS_H 3 | 4 | #define CGC_ROOT_ATTR "cgc_root" 5 | #define CGC_CLONE_CALL_ATTR "cgc_clone_call" 6 | #define CGC_CLONE_PRIORITY "cgc_clone_priority" 7 | #define CGC_CLONE_NEVER "cgc_clone_never" 8 | #define CGC_CLONE_MARK "__cgc_clone_" 9 | 10 | #endif /* _CGC_MAGICS_H */ 11 | -------------------------------------------------------------------------------- /passes/include/common/pass.h: -------------------------------------------------------------------------------- 1 | #ifndef _PASS_H 2 | #define _PASS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | using namespace llvm; 29 | 30 | static inline void passListRegexInit(std::vector ®exes, const std::vector &strings) 31 | { 32 | for (auto &s : strings) 33 | regexes.push_back(new Regex(s, 0)); 34 | } 35 | 36 | static inline bool passListRegexMatch(const std::vector ®exes, const std::string &string) 37 | { 38 | for (auto ®ex : regexes) { 39 | if (regex->match(string)) 40 | return true; 41 | } 42 | 43 | return false; 44 | } 45 | 46 | #endif /* _PASS_H */ 47 | -------------------------------------------------------------------------------- /passes/include/sdag/sdag-print.h: -------------------------------------------------------------------------------- 1 | #ifndef SDAG_PRINT_H 2 | #define SDAG_PRINT_H 3 | 4 | #include "sdag.h" 5 | 6 | #include 7 | 8 | #include "llvm/Support/GraphWriter.h" 9 | 10 | namespace llvm { 11 | 12 | template<> 13 | struct DOTGraphTraits : public DefaultDOTGraphTraits { 14 | 15 | DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} 16 | 17 | static std::string getGraphName(const SDAG *sdag) { 18 | return "SDAG for '" + sdag->getFunction()->getName().str() + "' function"; 19 | } 20 | 21 | std::string getNodeLabel(const SDAGNode *Node, 22 | const SDAG *Graph) { 23 | return Node->getLabel(!isSimple()); 24 | } 25 | 26 | std::string getNodeAttributes(const SDAGNode *Node, 27 | const SDAG *Graph) { 28 | std::string str; 29 | if (!Node->isSpecial()) 30 | return str; 31 | raw_string_ostream OS(str); 32 | OS << "color=\"red\""; 33 | return OS.str(); 34 | } 35 | 36 | }; 37 | } // End llvm namespace 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /passes/include/sdag/sdag.h: -------------------------------------------------------------------------------- 1 | #ifndef SDAG_H 2 | #define SDAG_H 3 | 4 | #include "llvm/ADT/GraphTraits.h" 5 | #include "llvm/ADT/iterator.h" 6 | #include "llvm/ADT/iterator_range.h" 7 | #include "llvm/IR/BasicBlock.h" 8 | #include "llvm/IR/Function.h" 9 | #include "llvm/IR/InstrTypes.h" 10 | #include "llvm/IR/Value.h" 11 | #include "llvm/Support/Casting.h" 12 | #include "llvm/Support/type_traits.h" 13 | #include "llvm/Analysis/MemorySSA.h" 14 | #include "llvm/IR/IntrinsicInst.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace llvm { 21 | 22 | class SDAGNode; 23 | 24 | class SDAG { 25 | private: 26 | static std::map objMap; 27 | static MemorySSA *MSSA; 28 | static AAResults *AA; 29 | 30 | Function *function; 31 | SDAGNode *root; 32 | std::set nodes; 33 | std::map nodeMap; 34 | SDAG(Function *F) { this->function = F; } 35 | 36 | void build(); 37 | bool buildFromNode(SDAGNode* node); 38 | void reachingMemDefs(Instruction *I, std::vector &reachingDefs); 39 | SDAGNode* newSuccNode(SDAGNode *parent, Value *V); 40 | public: 41 | static SDAG* get(Function *F, MemorySSA *MSSA, AAResults *AA); 42 | 43 | void print(raw_ostream &OS, SDAGNode *node, bool verbFmt=false) const; 44 | void print(raw_ostream &OS, bool verbFmt=false) const { print(OS, root, verbFmt); } 45 | SDAGNode *getRoot() const { return root; } 46 | Function *getFunction() const { return function; } 47 | const std::set& getNodes() const { return nodes; } 48 | void foldNodesByOpcode(unsigned opcode); 49 | }; 50 | 51 | class SDAGNode { 52 | protected: 53 | SDAG *sdag; 54 | Value *value; 55 | std::vector successors; 56 | std::vector parents; 57 | 58 | public: 59 | SDAGNode(SDAG *sdag, Value *value) { 60 | this->sdag = sdag; 61 | this->value = value; 62 | } 63 | void addSuccessor(SDAGNode *node); 64 | void delSuccessor(SDAGNode *node); 65 | void fold(); 66 | 67 | std::string getLabel(bool verbFmt=false) const; 68 | bool isSpecial() const; 69 | Value *getValue() const { return value; } 70 | SDAG *getSDAG() const { return sdag; } 71 | Function *getFunction() const { return sdag->getFunction(); } 72 | unsigned getNumSuccessors() const { return successors.size(); }; 73 | unsigned getNumParents() const { return getParents().size(); }; 74 | const std::vector& getSuccessors() const { return successors; }; 75 | const std::vector& getParents() const { return parents; }; 76 | }; 77 | 78 | //===----------------------------------------------------------------------===// 79 | // SDAGNode succ_iterator helpers 80 | //===----------------------------------------------------------------------===// 81 | 82 | template 83 | class SuccIteratorx 84 | : public iterator_facade_base, 85 | std::random_access_iterator_tag, SuccNodeT, int, 86 | SuccNodeT *, SuccNodeT *> { 87 | public: 88 | using difference_type = int; 89 | using pointer = SuccNodeT *; 90 | using reference = SuccNodeT *; 91 | 92 | private: 93 | NodeT *Node; 94 | int Idx; 95 | using Self = SuccIteratorx; 96 | 97 | inline bool index_is_valid(int Idx) { 98 | return Idx >= 0 && Idx <= (int)Node->getNumSuccessors(); 99 | } 100 | 101 | /// Proxy object to allow write access in operator[] 102 | class SuccessorProxy { 103 | Self It; 104 | 105 | public: 106 | explicit SuccessorProxy(const Self &It) : It(It) {} 107 | 108 | SuccessorProxy(const SuccessorProxy &) = default; 109 | 110 | SuccessorProxy &operator=(SuccessorProxy RHS) { 111 | *this = reference(RHS); 112 | return *this; 113 | } 114 | 115 | SuccessorProxy &operator=(reference RHS) { 116 | It.Node->setSuccessor(It.Idx, RHS); 117 | return *this; 118 | } 119 | 120 | operator reference() const { return *It; } 121 | }; 122 | 123 | public: 124 | // begin iterator 125 | explicit inline SuccIteratorx(NodeT *Node) : Node(Node), Idx(0) {} 126 | // end iterator 127 | inline SuccIteratorx(NodeT *Node, bool) : Node(Node) { 128 | Idx = Node->getNumSuccessors(); 129 | } 130 | 131 | /// This is used to interface between code that wants to 132 | /// operate on terminator instructions directly. 133 | int getSuccessorIndex() const { return Idx; } 134 | 135 | inline bool operator==(const Self &x) const { return Idx == x.Idx; } 136 | 137 | inline SuccNodeT *operator*() const { return Node->getSuccessors()[Idx]; } 138 | 139 | inline SuccNodeT *operator->() const { return operator*(); } 140 | 141 | inline bool operator<(const Self &RHS) const { 142 | assert(Node == RHS.Node && "Cannot compare iterators of different nodes!"); 143 | return Idx < RHS.Idx; 144 | } 145 | 146 | int operator-(const Self &RHS) const { 147 | assert(Node == RHS.Node && "Cannot compare iterators of different nodes!"); 148 | return Idx - RHS.Idx; 149 | } 150 | 151 | inline Self &operator+=(int RHS) { 152 | int NewIdx = Idx + RHS; 153 | assert(index_is_valid(NewIdx) && "Iterator index out of bound"); 154 | Idx = NewIdx; 155 | return *this; 156 | } 157 | 158 | inline Self &operator-=(int RHS) { return operator+=(-RHS); } 159 | 160 | // Specially implement the [] operation using a proxy object to support 161 | // assignment. 162 | inline SuccessorProxy operator[](int Offset) { 163 | Self TmpIt = *this; 164 | TmpIt += Offset; 165 | return SuccessorProxy(TmpIt); 166 | } 167 | 168 | /// Get the source NodeT of this iterator. 169 | inline SuccNodeT *getSource() { 170 | return Node; 171 | } 172 | }; 173 | 174 | //===----------------------------------------------------------------------===// 175 | // SDAGNode succ_iterator helpers 176 | //===----------------------------------------------------------------------===// 177 | 178 | using sdagn_succ_iterator = 179 | SuccIteratorx; 180 | using sdagn_succ_const_iterator = 181 | SuccIteratorx; 182 | 183 | inline sdagn_succ_iterator sdagn_succ_begin(SDAGNode *N) { 184 | return sdagn_succ_iterator(N); 185 | } 186 | inline sdagn_succ_const_iterator sdagn_succ_begin(const SDAGNode *N) { 187 | return sdagn_succ_const_iterator(N); 188 | } 189 | inline sdagn_succ_iterator sdagn_succ_end(SDAGNode *N) { 190 | return sdagn_succ_iterator(N, true); 191 | } 192 | inline sdagn_succ_const_iterator sdagn_succ_end(const SDAGNode *N) { 193 | return sdagn_succ_const_iterator(N, true); 194 | } 195 | 196 | //===--------------------------------------------------------------------===// 197 | // GraphTraits specializations for SDAGs 198 | //===--------------------------------------------------------------------===// 199 | 200 | // Provide specializations of GraphTraits to be able to treat a function as a 201 | // graph of SDAG Nodes... 202 | 203 | template <> struct GraphTraits { 204 | using NodeRef = SDAGNode *; 205 | using ChildIteratorType = sdagn_succ_iterator; 206 | 207 | static NodeRef getEntryNode(NodeRef N) { return N; } 208 | static ChildIteratorType child_begin(NodeRef N) { return sdagn_succ_begin(N); } 209 | static ChildIteratorType child_end(NodeRef N) { return sdagn_succ_end(N); } 210 | }; 211 | 212 | template <> struct GraphTraits { 213 | using NodeRef = const SDAGNode *; 214 | using ChildIteratorType = sdagn_succ_const_iterator; 215 | 216 | static NodeRef getEntryNode(const NodeRef N) { return N; } 217 | 218 | static ChildIteratorType child_begin(NodeRef N) { return sdagn_succ_begin(N); } 219 | static ChildIteratorType child_end(NodeRef N) { return sdagn_succ_end(N); } 220 | }; 221 | 222 | //===--------------------------------------------------------------------===// 223 | // GraphTraits specializations for function SDAGs 224 | //===--------------------------------------------------------------------===// 225 | 226 | // Provide specializations of GraphTraits to be able to treat a SDAG as a 227 | // graph of SDAG nodes... 228 | // 229 | template <> struct GraphTraits : public GraphTraits { 230 | static NodeRef getEntryNode(SDAG *sdag) { return sdag->getRoot(); } 231 | 232 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 233 | using nodes_iterator = std::set::iterator; 234 | 235 | static nodes_iterator nodes_begin(SDAG *sdag) { 236 | return nodes_iterator(sdag->getNodes().begin()); 237 | } 238 | 239 | static nodes_iterator nodes_end(SDAG *sdag) { 240 | return nodes_iterator(sdag->getNodes().end()); 241 | } 242 | 243 | static size_t size(SDAG *sdag) { return sdag->getNodes().size(); } 244 | }; 245 | 246 | template <> struct GraphTraits : 247 | public GraphTraits { 248 | static NodeRef getEntryNode(const SDAG *sdag) { return sdag->getRoot(); } 249 | 250 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 251 | using nodes_iterator = std::set::iterator; 252 | 253 | static nodes_iterator nodes_begin(const SDAG *sdag) { 254 | return nodes_iterator(sdag->getNodes().begin()); 255 | } 256 | 257 | static nodes_iterator nodes_end(const SDAG *sdag) { 258 | return nodes_iterator(sdag->getNodes().end()); 259 | } 260 | 261 | static size_t size(const SDAG *sdag) { return sdag->getNodes().size(); } 262 | }; 263 | 264 | class SDAGWrapperPass : public FunctionPass { 265 | public: 266 | static char ID; 267 | SDAGWrapperPass() : FunctionPass(ID) {} 268 | 269 | bool runOnFunction(Function &F) override { 270 | auto MSSA = &getAnalysis().getMSSA(); 271 | auto AAResults = &getAnalysis().getAAResults(); 272 | sdag = SDAG::get(&F, MSSA, AAResults); 273 | return false; 274 | } 275 | void print(raw_ostream &OS, const Module* = nullptr) const override {} 276 | SDAG *getSDAG() const { return sdag; } 277 | 278 | void getAnalysisUsage(AnalysisUsage &AU) const override { 279 | AU.addRequired(); 280 | AU.addRequired(); 281 | AU.setPreservesAll(); 282 | } 283 | private: 284 | SDAG *sdag; 285 | }; 286 | 287 | } // end namespace llvm 288 | 289 | #endif // SDAG_H 290 | -------------------------------------------------------------------------------- /passes/include/svfa/SVFAPass.h: -------------------------------------------------------------------------------- 1 | //===- SVFAPass.h -- Whole program analysis------------------------------------// 2 | // 3 | // SVF: Static Value-Flow Analysis 4 | // 5 | // Copyright (C) <2013-2017> 6 | // 7 | 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // You should have received a copy of the GNU General Public License 19 | // along with this program. If not, see . 20 | // 21 | //===----------------------------------------------------------------------===// 22 | 23 | 24 | /* 25 | * @file: SVFA.h 26 | * @author: yesen 27 | * @date: 10/06/2014 28 | * @version: 1.0 29 | * 30 | * @section LICENSE 31 | * 32 | * @section DESCRIPTION 33 | * 34 | */ 35 | 36 | 37 | #ifndef SVFA_H_ 38 | #define SVFA_H_ 39 | 40 | #include "MemoryModel/PointerAnalysis.h" 41 | 42 | class SVFModule; 43 | class SVFG; 44 | 45 | /*! 46 | * Whole program pointer analysis. 47 | * This class performs various pointer analysis on the given module. 48 | */ 49 | // excised ", public llvm::AliasAnalysis" as that has a very light interface 50 | // and I want to see what breaks. 51 | class SVFAPass: public ModulePass { 52 | typedef std::vector PTAVector; 53 | 54 | public: 55 | /// Pass ID 56 | static char ID; 57 | 58 | enum AliasCheckRule { 59 | Conservative, ///< return MayAlias if any pta says alias 60 | Veto, ///< return NoAlias if any pta says no alias 61 | Precise ///< return alias result by the most precise pta 62 | }; 63 | 64 | /// Constructor needs TargetLibraryInfo to be passed to the AliasAnalysis 65 | SVFAPass() : ModulePass(ID) { 66 | 67 | } 68 | 69 | /// Destructor 70 | ~SVFAPass(); 71 | 72 | /// LLVM analysis usage 73 | virtual inline void getAnalysisUsage(AnalysisUsage &au) const { 74 | // declare your dependencies here. 75 | /// do not intend to change the IR in this pass, 76 | au.setPreservesAll(); 77 | } 78 | 79 | /// Get adjusted analysis for alias analysis 80 | virtual inline void* getAdjustedAnalysisPointer(AnalysisID id) { 81 | return this; 82 | } 83 | 84 | /// Interface expose to users of our pointer analysis, given Location infos 85 | virtual inline AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { 86 | return alias(LocA.Ptr, LocB.Ptr); 87 | } 88 | 89 | /// Interface expose to users of our pointer analysis, given Value infos 90 | virtual AliasResult alias(const Value* V1, const Value* V2); 91 | 92 | /// Print all alias pairs 93 | virtual void PrintAliasPairs(PointerAnalysis* pta); 94 | 95 | /// Interface of mod-ref analysis to determine whether a CallSite instruction can mod or ref any memory location 96 | virtual ModRefInfo getModRefInfo(const CallInst* callInst); 97 | 98 | /// Interface of mod-ref analysis to determine whether a CallSite instruction can mod or ref a specific memory location, given Location infos 99 | virtual inline ModRefInfo getModRefInfo(const CallInst* callInst, const MemoryLocation& Loc) { 100 | return getModRefInfo(callInst, Loc.Ptr); 101 | } 102 | 103 | /// Interface of mod-ref analysis to determine whether a CallSite instruction can mod or ref a specific memory location, given Value infos 104 | virtual ModRefInfo getModRefInfo(const CallInst* callInst, const Value* V); 105 | 106 | /// Interface of mod-ref analysis between two CallSite instructions 107 | virtual ModRefInfo getModRefInfo(const CallInst* callInst1, const CallInst* callInst2); 108 | 109 | /// We start from here 110 | virtual bool runOnModule(llvm::Module& module) { 111 | SVFModule svfModule(module); 112 | runOnModule(svfModule); 113 | return false; 114 | } 115 | 116 | /// Run pointer analysis on SVFModule 117 | void runOnModule(SVFModule svfModule); 118 | 119 | void dumpCalleeStats(llvm::Module *M); 120 | unsigned getCaleeCount(llvm::Module *M, llvm::CallSite &CS); 121 | unsigned getCaleeTBCount(llvm::Module *M, llvm::CallSite &CS); 122 | bool hasAddressTaken(const llvm::Function *F); 123 | 124 | /// PTA name 125 | virtual inline StringRef getPassName() const { 126 | return "SVFAPass"; 127 | } 128 | 129 | private: 130 | /// Create pointer analysis according to specified kind and analyze the module. 131 | void runPointerAnalysis(SVFModule svfModule, u32_t kind); 132 | 133 | PTAVector ptaVector; ///< all pointer analysis to be executed. 134 | PointerAnalysis* _pta; ///< pointer analysis to be executed. 135 | SVFG* _svfg; ///< svfg generated through -ander pointer analysis 136 | }; 137 | 138 | 139 | #endif /* SVFA_H_ */ 140 | -------------------------------------------------------------------------------- /passes/set-norecurse-ext/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the remove-unreachable pass 2 | ROOT=../.. 3 | 4 | PASSNAME := set-norecurse-ext 5 | OBJS := set-norecurse-ext.o 6 | 7 | include ../Makefile.inc 8 | 9 | -------------------------------------------------------------------------------- /passes/set-norecurse-ext/set-norecurse-ext.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IR/IRBuilder.h" 6 | #include "llvm/IR/Intrinsics.h" 7 | #include "llvm/IR/IntrinsicInst.h" 8 | #include "llvm/Analysis/CallGraph.h" 9 | #include "llvm/Analysis/CallGraphSCCPass.h" 10 | #include "llvm/ADT/SCCIterator.h" 11 | #include "llvm/IR/CFG.h" 12 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 13 | #include "llvm/Analysis/AssumptionCache.h" 14 | #include "llvm/IR/Dominators.h" 15 | #include "llvm/Transforms/Utils/CodeExtractor.h" 16 | #include "llvm/Analysis/LoopPass.h" 17 | #include "llvm/Analysis/LoopInfo.h" 18 | #include 19 | using namespace llvm; 20 | 21 | #define DEBUG_TYPE "set-norecurse-ext" 22 | #define setNoRecursExtPassLog(M) LLVM_DEBUG(dbgs() << "setNoRecursExtPass: " << M << "\n") 23 | 24 | #define oprint setNoRecursExtPassLog 25 | 26 | typedef long imd_t; 27 | 28 | // This pass sets the norecurse attribute to all the external functions that we 29 | // can guess they not recurse back to the program in any way. 30 | // Since we are calling an external function the only way they could recurse back 31 | // in the module, or call a recursive function in it, is by 32 | // passing a callback pointer to them, so check that 33 | namespace { 34 | 35 | class SetNoRecursExtPass : public ModulePass { 36 | 37 | public: 38 | static char ID; 39 | SetNoRecursExtPass() : ModulePass(ID) { 40 | } 41 | 42 | bool isFunctionPointerType(Type *type){ 43 | // Check the type here 44 | if(PointerType *pointerType=dyn_cast(type)){ 45 | return isFunctionPointerType(pointerType->getElementType()); 46 | } 47 | //Exit Condition 48 | else if(type->isFunctionTy()){ 49 | return true; 50 | } 51 | return false; 52 | } 53 | 54 | void setNoRecursExt(CallSite &CS, Function *F) { 55 | oprint("Checking " << *CS.getInstruction()); 56 | 57 | // Check no parameter is a function pointer 58 | for (auto &arg: F->args()) { 59 | Type* argT = arg.getType(); 60 | oprint(" " << *argT); 61 | if (isFunctionPointerType(argT)) { 62 | oprint(" [-] not adding attr norecurse"); 63 | return; 64 | } 65 | } 66 | oprint(" Callsite " << F->getName().str()); 67 | // Check also the callsite 68 | for (auto &arg: CS.args()) { 69 | Type* argT = (*arg).getType(); 70 | oprint(" " << *argT); 71 | if (isFunctionPointerType(argT)) { 72 | oprint(" [-] not adding attr norecurse"); 73 | return; 74 | } 75 | } 76 | oprint(" [+] adding attr norecurse"); 77 | // if check ok set the norecurse attrs 78 | if (!CS.hasFnAttr(Attribute::NoRecurse)) 79 | CS.addAttribute(AttributeList::FunctionIndex, Attribute::NoRecurse); 80 | if (!F->hasFnAttribute(Attribute::NoRecurse)) 81 | F->addFnAttr(Attribute::NoRecurse); 82 | } 83 | 84 | 85 | static bool addNoRecurseAttrs(CallGraphSCC &SCC) { 86 | SmallVector Functions; 87 | for (CallGraphNode *I : SCC) { 88 | Functions.push_back(I->getFunction()); 89 | } 90 | 91 | // If the SCC contains multiple nodes we know for sure there is recursion. 92 | if (Functions.size() != 1) 93 | return false; 94 | 95 | Function *F = *Functions.begin(); 96 | if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 97 | return false; 98 | 99 | // If all of the calls in F are identifiable and are to norecurse functions, F 100 | // is norecurse. This check also detects self-recursion as F is not currently 101 | // marked norecurse, so any called from F to F will not be marked norecurse. 102 | for (auto &BB : *F) 103 | for (auto &I : BB.instructionsWithoutDebug()) 104 | if (auto *CB = dyn_cast(&I)) { 105 | Function *Callee = dyn_cast(CB->getCalledValue()->stripPointerCasts()); 106 | if (!Callee || Callee == F || !Callee->doesNotRecurse()) { 107 | // Function calls a potentially recursive function. 108 | 109 | // Check if the callsite has no recurse information 110 | CallSite CS(&I); 111 | if (!Callee && CS) continue; 112 | 113 | return false; 114 | } 115 | } 116 | 117 | // Every call was to a non-recursive function other than this function, and 118 | // we have no indirect recursion as the SCC size is one. This function cannot 119 | // recurse. 120 | F->setDoesNotRecurse(); 121 | return true; 122 | } 123 | 124 | virtual bool runOnModule(Module &M) override { 125 | setNoRecursExtPassLog("Running..."); 126 | 127 | /* Iterate all functions in the module */ 128 | for (auto &F : M.getFunctionList()) { 129 | if (F.isDeclaration()) 130 | continue; 131 | for (auto &BB: F) { 132 | for (auto &I: BB) { 133 | CallSite CS(&I); 134 | if (!CS.getInstruction() || CS.isInlineAsm()) 135 | continue; // not a call 136 | Function *Callee = dyn_cast(CS.getCalledValue()->stripPointerCasts()); 137 | if (!Callee) 138 | continue; // not a direct call 139 | 140 | // if external function try to set the norecurse attr 141 | if (Callee->isDeclaration()) 142 | setNoRecursExt(CS, Callee); 143 | } 144 | } 145 | } 146 | 147 | // Now visit the whole call graph in post order to derive norecurse attributes 148 | CallGraph *CG = &getAnalysis().getCallGraph(); 149 | // Walk the callgraph in bottom-up SCC order. 150 | scc_iterator CGI = scc_begin(CG); 151 | 152 | CallGraphSCC CurSCC(*CG, &CGI); 153 | while (!CGI.isAtEnd()) { 154 | // Copy the current SCC and increment past it so that the pass can hack 155 | // on the SCC if it wants to without invalidating our iterator. 156 | const std::vector &NodeVec = *CGI; 157 | CurSCC.initialize(NodeVec); 158 | ++CGI; 159 | 160 | addNoRecurseAttrs(CurSCC); 161 | } 162 | return true; 163 | } 164 | 165 | void getAnalysisUsage(AnalysisUsage &AU) const override { 166 | AU.addRequired(); 167 | } 168 | }; 169 | 170 | } 171 | 172 | char SetNoRecursExtPass::ID = 0; 173 | RegisterPass MP("set-norecurse-ext", "Set NoRecurse Attr to external functions Pass"); 174 | -------------------------------------------------------------------------------- /remake.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | ROOT_DIR="." 7 | 8 | # setup llvm env variables 9 | if [ -z "${LLVM_DIR}" ]; then 10 | 11 | echo "[ ] retrieving the LLVM directory..." 12 | 13 | if [ -z "${LLVM_CONFIG}" ]; then 14 | export LLVM_CONFIG='llvm-config' 15 | fi 16 | 17 | export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')" 18 | if [ "$LLVM_VER" = "" ]; then 19 | echo "[!] llvm-config not found!" 20 | exit 1 21 | fi 22 | 23 | echo "[+] using LLVM $LLVM_VER" 24 | 25 | export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH" 26 | export LLVM_DIR="$($LLVM_CONFIG --prefix)" 27 | 28 | else 29 | 30 | export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH" 31 | 32 | fi 33 | 34 | echo "[+] the LLVM directory is $LLVM_DIR" 35 | export LLVM_COMPILER_PATH=$LLVM_DIR/bin 36 | 37 | DIR=`pwd` 38 | cd $ROOT_DIR/passes 39 | make install || exit 1 40 | cd $DIR 41 | -------------------------------------------------------------------------------- /tests/driver.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int LLVMFuzzerTestOneInput(const __uint8_t* data, size_t size); 5 | 6 | int main(int argc, char *argv[]) { 7 | __uint8_t data[1024*500]; 8 | int ret = read(0, data, sizeof data); 9 | LLVMFuzzerTestOneInput(data, ret); 10 | } -------------------------------------------------------------------------------- /tests/driver.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" int LLVMFuzzerTestOneInput(const __uint8_t* data, size_t size); 4 | 5 | int main(int argc, char *argv[]) { 6 | __uint8_t data[1024*500]; 7 | int ret = read(0, data, sizeof data); 8 | LLVMFuzzerTestOneInput(data, ret); 9 | } -------------------------------------------------------------------------------- /tests/opt: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ARGS="" 4 | PASSES_DIR="../../bin" 5 | 6 | # setup llvm env variables 7 | if [ -z "${LLVM_DIR}" ]; then 8 | 9 | if [ -z "${LLVM_CONFIG}" ]; then 10 | export LLVM_CONFIG='llvm-config' 11 | fi 12 | 13 | export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')" 14 | if [ "$LLVM_VER" = "" ]; then 15 | echo "[!] llvm-config not found!" 16 | exit 1 17 | fi 18 | 19 | export OPT="$($LLVM_CONFIG --bindir)/opt" 20 | 21 | else 22 | 23 | export OPT="$LLVM_DIR/bin/opt" 24 | 25 | fi 26 | 27 | for i in $* 28 | do 29 | arg="$i" 30 | c=`echo $arg | head -c 1` 31 | if [ "$c" = "-" ]; then 32 | pass=`echo $arg | tail -c +2` 33 | if [ -f $PASSES_DIR/$pass.so ]; then 34 | arg="-load=$PASSES_DIR/$pass.so -$pass" 35 | fi 36 | fi 37 | ARGS="$ARGS $arg" 38 | done 39 | 40 | echo "$OPT" $ARGS 41 | "$OPT" $ARGS 42 | -------------------------------------------------------------------------------- /tests/test/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | ROOT_DIR="../.." 7 | 8 | # setup llvm env variables 9 | if [ -z "${LLVM_DIR}" ]; then 10 | 11 | echo "[ ] retrieving the LLVM directory..." 12 | 13 | if [ -z "${LLVM_CONFIG}" ]; then 14 | export LLVM_CONFIG='llvm-config' 15 | fi 16 | 17 | export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')" 18 | if [ "$LLVM_VER" = "" ]; then 19 | echo "[!] llvm-config not found!" 20 | exit 1 21 | fi 22 | 23 | echo "[+] using LLVM $LLVM_VER" 24 | 25 | export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH" 26 | export LLVM_DIR="$($LLVM_CONFIG --prefix)" 27 | 28 | else 29 | 30 | export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH" 31 | 32 | fi 33 | 34 | echo "[+] the LLVM directory is $LLVM_DIR" 35 | export LLVM_COMPILER_PATH=$LLVM_DIR/bin 36 | 37 | DIR=`pwd` 38 | cd $ROOT_DIR/passes 39 | make install || exit 1 40 | cd $DIR 41 | 42 | export LLVM_BITCODE_GENERATION_FLAGS="-flto" 43 | BENCH="target" 44 | 45 | "$LLVM_COMPILER_PATH/clang" -O1 -flto -g -c -o $BENCH.base.bc $BENCH.c 46 | "$LLVM_COMPILER_PATH/llvm-link" -o $BENCH.linked.bc $BENCH.base.bc 47 | ../opt -dump-call-tree -call-tree-start="main" -dump-tree-file='call-tree.log' -o /dev/null $BENCH.linked.bc 48 | ../opt -internalize -internalize-public-api-file='call-tree.log' -globaldce -o $BENCH.linked_int.bc $BENCH.linked.bc 49 | ../opt -cgc-planner -cgc-strategy=params -cgc-funcs='main' -stat=0 -cgc-calls-treshold=1000000 -func-stats -dump-weights -dump-weights-root='main' -cgc -cgc-clone-prefix='' -cgc-fill=1 -dump-call-tree -call-tree-start="main" -dump-tree-file='call-tree.log' -o $BENCH.cgc0.bc $BENCH.linked_int.bc 50 | ../opt -internalize -internalize-public-api-file='call-tree.log' -globaldce -o $BENCH.cgc.bc $BENCH.cgc0.bc 51 | # ../opt -func-stats $BENCH.linked_int.bc -o /dev/null 52 | # ../opt -func-stats $BENCH.cgc.bc -o /dev/null 53 | 54 | "$LLVM_COMPILER_PATH/clang++" -O1 ../driver.cc $BENCH.cgc.bc -o $BENCH.out 55 | -------------------------------------------------------------------------------- /tests/test/target.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int glob1[8] = {0}; 4 | int glob2[8] = {0}; 5 | 6 | __attribute_noinline__ int foo(int* ptr) { 7 | return ptr[2] + 1; 8 | } 9 | 10 | __attribute_noinline__ int func1(int* ptr) { 11 | return foo(ptr); 12 | } 13 | 14 | __attribute_noinline__ int func2(int* ptr) { 15 | return foo(ptr); 16 | } 17 | 18 | __attribute_noinline__ int func3(int* ptr) { 19 | return foo(ptr); 20 | } 21 | 22 | int main(int argc, char** argv) { 23 | return func1(glob1) + func2(glob2) + func3(glob2); 24 | } --------------------------------------------------------------------------------