├── .gitignore
├── Makefile.inc
├── README.md
├── SVF-all.patch
├── SVF-contextdda-fix.patch
├── SVF-node-allocator.patch
├── SVF-padded-vtables.patch
├── aflpp-link-safe.c
├── bin
    ├── wrap-gclang
    ├── wrap-gclang++
    └── wrap_gclang.py
├── build.sh
├── clean_remake.sh
├── install_svf.sh
├── passes
    ├── Makefile
    ├── Makefile.inc
    ├── Makefile.svf.inc
    ├── add-sanitize-attr
    │   ├── Makefile
    │   └── add-sanitize-attr.cpp
    ├── cgc-planner
    │   ├── Makefile
    │   └── cgc-planner.cpp
    ├── cgc
    │   ├── Makefile
    │   ├── cgc.cpp
    │   └── cgc_old.cpp
    ├── dump-call-tree
    │   ├── Makefile
    │   └── dump-call-tree.cpp
    ├── dump-calls
    │   ├── Makefile
    │   └── dump-calls.cpp
    ├── dump-extlib
    │   ├── Makefile
    │   └── dump-extlib.cpp
    ├── func-stats
    │   ├── Makefile
    │   └── func-stats.cpp
    ├── icp
    │   ├── Makefile
    │   └── icp.cpp
    ├── include
    │   ├── common
    │   │   ├── cgc_magics.h
    │   │   └── pass.h
    │   ├── sdag
    │   │   ├── sdag-print.h
    │   │   └── sdag.h
    │   └── svfa
    │   │   └── SVFAPass.h
    └── set-norecurse-ext
    │   ├── Makefile
    │   └── set-norecurse-ext.cpp
├── remake.sh
└── tests
    ├── driver.c
    ├── driver.cc
    ├── opt
    └── test
        ├── build.sh
        └── target.c


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.bc
 2 | *.bcc
 3 | *.so
 4 | *.pdf
 5 | *.ll
 6 | *.o
 7 | *.resolution.txt
 8 | *.S
 9 | binutils*/
10 | *.csv
11 | *.png
12 | *.out
13 | /setup.sh
14 | SVF/
15 | SVF2/
16 | llvm-9/
17 | *.taint
18 | dft.log
19 | dfsan_abilist.txt
20 | *.dot
21 | callgrind.*
22 | cachegrind.*
23 | *.function.list
24 | *.color
25 | *.txt
26 | *.indent
27 | *.log
28 | .vscode/
29 | benchmarks/*
30 | *.svg
31 | .DS_Store
32 | 


--------------------------------------------------------------------------------
/Makefile.inc:
--------------------------------------------------------------------------------
 1 | V?=0
 2 | ifneq ($V,0)
 3 | QUIET=
 4 | ECHO:=@\#
 5 | QMAKE=VERBOSE=1 make
 6 | else
 7 | QUIET= @
 8 | ECHO= echo
 9 | QMAKE=make -s
10 | endif
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Predictive Context-sensitive Fuzzing
 2 | 
 3 | This repository hosts the code for the paper [Predictive Context-sensitive Fuzzing](https://www.ndss-symposium.org/ndss-paper/predictive-context-sensitive-fuzzing/) appeared at NDSS 2024.
 4 | 
 5 | ### Getting started
 6 | 
 7 | Install the dependencies with:
 8 | ```bash
 9 | # install the dependencies
10 | $ apt-get update && \
11 | apt-get install -y wget libstdc++-5-dev libtool-bin automake flex bison \
12 |                    libglib2.0-dev libpixman-1-dev python3-setuptools unzip \
13 |                    apt-utils apt-transport-https ca-certificates \
14 |                    binutils
15 | 
16 | # install llvm-10
17 | $ apt install -y lsb-release wget software-properties-common && wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh && ./llvm.sh 10
18 | 
19 | # Download and install the latest stable Go (for gllvm)
20 | $ wget https://storage.googleapis.com/golang/getgo/installer_linux && \
21 |     chmod +x ./installer_linux && \
22 |     ./installer_linux
23 | $ export PATH=$PATH:$HOME/.go/bin:/go/bin
24 | 
25 | # Download and compile afl++ of 08/2020.
26 | $ git clone https://github.com/AFLplusplus/AFLplusplus.git ./afl && \
27 |     cd ./afl && \
28 |     git checkout 2e15661f184c77ac1fbb6f868c894e946cbb7f17
29 | 
30 | # Build without Python support as we don't need it.
31 | # Set AFL_NO_X86 to skip flaky tests.
32 | $ cd ./afl && unset CFLAGS && unset CXXFLAGS && \
33 |     export CC=clang && export AFL_NO_X86=1 && \
34 |     PYTHON_INCLUDE=/ make LLVM_CONFIG=llvm-config-10 && make install
35 |     
36 | # Build the AFL wrapper with gclang
37 | wget https://raw.githubusercontent.com/llvm/llvm-project/5feb80e748924606531ba28c97fe65145c65372e/compiler-rt/lib/fuzzer/afl/afl_driver.cpp -O afl_driver.cpp
38 | clang++-10 -std=c++11 -O2 -c afl_driver.cpp
39 | ar r libAFLDriver.a afl_driver.o
40 | gclang++ -std=c++11 -O2 -c afl_driver.cpp -o afl_driver_gclang.o
41 | ar r libAFLDriverGclang.a afl_driver_gclang.o
42 | ```
43 | 
44 | Build the function cloning passes with:
45 | ```bash
46 | $ export LLVM_DIR="/usr/lib/llvm-10" # or the llvm-10 path
47 | $ ./build.sh
48 | ```
49 | 
50 | And compile a harness with the drop-in wrapper that we provide in the `bin` folder: `wrap_gclang` automatically runs the needed passes.
51 | 
52 | To set up a correct env for the build process, do the following steps (`OUT` is your build output directory, we follow the [FuzzBench envs](https://google.github.io/fuzzbench/getting-started/adding-a-new-fuzzer/#what-is-fuzzer_lib)):
53 | 
54 | ```bash
55 | export CC=./afl/afl-clang-fast
56 | export CXX=./afl/afl-clang-fast++
57 | export FUZZER_LIB=./afl/libAFLDriverGclang.a
58 | 
59 | export AFL_LLVM_DICT2FILE=$OUT/afl++.dict
60 | 
61 | export AFL_QUIET=1
62 | export AFL_MAP_SIZE=2621440
63 | 
64 | export REAL_CC_PATH=$CC
65 | export REAL_CXX_PATH=$CXX
66 | export CC=./bin/wrap-gclang
67 | export CXX=./bin/wrap-gclang++
68 | 
69 | export LLVM_BITCODE_GENERATION_FLAGS=-flto
70 | export WLLVM_OUTPUT_LEVEL=ERROR
71 | ```
72 | 
73 | You can tune the env `CGC_STRATEGY` to change prioritization strategy (default is dataflow) and `CGC_MAXMAP` to enlarge the max map size.
74 | 
75 | Now you can compile your target simply using CC/CXX and link with:
76 | 
77 | ```bash
78 | $CXX yourfiles.o[...] $FUZZER_LIB -o youroutput.bin
79 | ```
80 | 
81 | If you want sanitization, we suggest adding `-O1 -fsanitize=address -fsanitize=array-bounds,bool,builtin,enum,float-divide-by-zero,function,integer-divide-by-zero,null,object-size,return,returns-nonnull-attribute,shift,signed-integer-overflow,unreachable,vla-bound,vptr`.
82 | 
83 | The last step is just to [fuzz with AFL++](https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/fuzzing_in_depth.md#a-running-afl-fuzz), we suggest using a CmpLog-instrumented binary in addition.
84 | 
85 | ### Cite
86 | ```
87 | @inproceedings{pred-ctx-fuzz,
88 |     author = {Borrello, Pietro and Fioraldi, Andrea and D'Elia, Daniele Cono and Balzarotti, Davide and Querzoni, Leonardo and Giuffrida, Cristiano},
89 |     title = {Predictive Context-sensitive Fuzzing},
90 |     year = {2024},
91 |     booktitle = {Network and Distributed System Security Symposium (NDSS)}
92 | }
93 | ```
94 | 


--------------------------------------------------------------------------------
/SVF-all.patch:
--------------------------------------------------------------------------------
  1 | From 12ede5e903bd806c984217b0fc37f873f9718248 Mon Sep 17 00:00:00 2001
  2 | From: Pietro Borrello <borrello@diag.uniroma1.it>
  3 | Date: Fri, 28 May 2021 13:44:45 +0200
  4 | Subject: Fix handling of padded vtables (common in asan builds)
  5 | 
  6 | ---
  7 |  lib/SVF-FE/CHG.cpp | 28 ++++++++++++++++++++++++++--
  8 |  1 file changed, 26 insertions(+), 2 deletions(-)
  9 | 
 10 | diff --git a/lib/SVF-FE/CHG.cpp b/lib/SVF-FE/CHG.cpp
 11 | index 00bfbcf..fef1902 100644
 12 | --- a/lib/SVF-FE/CHG.cpp
 13 | +++ b/lib/SVF-FE/CHG.cpp
 14 | @@ -128,7 +128,19 @@ void CHGraph::buildCHGNodes(const GlobalValue *globalvalue)
 15 |  
 16 |          for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei)
 17 |          {
 18 | -            const ConstantArray *vtbl = SVFUtil::dyn_cast<ConstantArray>(vtblStruct->getOperand(ei));
 19 | +            Constant *operand = vtblStruct->getOperand(ei);
 20 | +            // Sometimes ASAN adds padding to vtable by embedding them in structs
 21 | +            // so we should check and unpack them
 22 | +            if (!SVFUtil::isa<ConstantArray>(operand)) {
 23 | +                ConstantStruct *opStruct = SVFUtil::dyn_cast<ConstantStruct>(operand);
 24 | +                if(!opStruct) {
 25 | +                    // We should skip handling the padding, in the form of an array
 26 | +                    assert(SVFUtil::isa<ArrayType>(operand->getType()));
 27 | +                    continue;
 28 | +                }
 29 | +                operand = opStruct->getOperand(0);
 30 | +            }
 31 | +            const ConstantArray *vtbl = SVFUtil::dyn_cast<ConstantArray>(operand);
 32 |              assert(vtbl && "Element of initializer not an array?");
 33 |              for (u32_t i = 0; i < vtbl->getNumOperands(); ++i)
 34 |              {
 35 | @@ -434,8 +446,20 @@ void CHGraph::analyzeVTables(const Module &M)
 36 |  
 37 |              for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei)
 38 |              {
 39 | +                Constant *operand = vtblStruct->getOperand(ei);
 40 | +                // Sometimes ASAN adds padding to vtable by embedding them in structs
 41 | +                // so we should check and unpack them
 42 | +                if (!SVFUtil::isa<ConstantArray>(operand)) {
 43 | +                    ConstantStruct *opStruct = SVFUtil::dyn_cast<ConstantStruct>(operand);
 44 | +                    if(!opStruct) {
 45 | +                        // We should skip handling the padding, in the form of an array
 46 | +                        assert(SVFUtil::isa<ArrayType>(operand->getType()));
 47 | +                        continue;
 48 | +                    }
 49 | +                    operand = opStruct->getOperand(0);
 50 | +                }
 51 |                  const ConstantArray *vtbl =
 52 | -                    SVFUtil::dyn_cast<ConstantArray>(vtblStruct->getOperand(ei));
 53 | +                    SVFUtil::dyn_cast<ConstantArray>(operand);
 54 |                  assert(vtbl && "Element of initializer not an array?");
 55 |  
 56 |                  /*
 57 | -- 
 58 | 2.17.1
 59 | 
 60 | 
 61 | From eebe3d824cb29455732e9d7dac9911bc9711efde Mon Sep 17 00:00:00 2001
 62 | From: Pietro Borrello <borrello@diag.uniroma1.it>
 63 | Date: Fri, 28 May 2021 13:49:33 +0200
 64 | Subject: contextDDA: add check on NULL refVal in isHeapCondMemObj
 65 | 
 66 | ---
 67 |  lib/DDA/ContextDDA.cpp | 12 +++++++++++-
 68 |  1 file changed, 11 insertions(+), 1 deletion(-)
 69 | 
 70 | diff --git a/lib/DDA/ContextDDA.cpp b/lib/DDA/ContextDDA.cpp
 71 | index 6b37821..b53d1e0 100644
 72 | --- a/lib/DDA/ContextDDA.cpp
 73 | +++ b/lib/DDA/ContextDDA.cpp
 74 | @@ -316,7 +316,17 @@ bool ContextDDA::isHeapCondMemObj(const CxtVar& var, const StoreSVFGNode*)
 75 |      assert(mem && "memory object is null??");
 76 |      if(mem->isHeap())
 77 |      {
 78 | -        if(const Instruction* mallocSite = SVFUtil::dyn_cast<Instruction>(mem->getRefVal()))
 79 | +        if (!mem->getRefVal()) {
 80 | +            PAGNode *pnode = _pag->getPAGNode(getPtrNodeID(var));
 81 | +            if(GepObjPN* gepobj = SVFUtil::dyn_cast<GepObjPN>(pnode)) {
 82 | +                assert(SVFUtil::isa<DummyObjPN>(_pag->getPAGNode(gepobj->getBaseNode())) && "emtpy refVal in a gep object whose base is a non-dummy object");
 83 | +            }
 84 | +            else {
 85 | +                assert((SVFUtil::isa<DummyObjPN>(pnode) || SVFUtil::isa<DummyValPN>(pnode)) && "empty refVal in non-dummy object");
 86 | +            }
 87 | +            return true;
 88 | +        }
 89 | +        else if(const Instruction* mallocSite = SVFUtil::dyn_cast<Instruction>(mem->getRefVal()))
 90 |          {
 91 |              const Function* fun = mallocSite->getFunction();
 92 |              const SVFFunction* svfFun = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(fun);
 93 | -- 
 94 | 2.17.1
 95 | 
 96 | 
 97 | From c5be7f023f4456eaacd917df9c44f58956feb516 Mon Sep 17 00:00:00 2001
 98 | From: Pietro Borrello <borrello@diag.uniroma1.it>
 99 | Date: Fri, 28 May 2021 13:57:46 +0200
100 | Subject: NodeIDAllocator: set Strategy::SEQ as the default
101 | 
102 | ---
103 |  lib/Util/Options.cpp | 2 +-
104 |  1 file changed, 1 insertion(+), 1 deletion(-)
105 | 
106 | diff --git a/lib/Util/Options.cpp b/lib/Util/Options.cpp
107 | index ac71de5..495a317 100644
108 | --- a/lib/Util/Options.cpp
109 | +++ b/lib/Util/Options.cpp
110 | @@ -14,7 +14,7 @@ namespace SVF
111 |  
112 |      const llvm::cl::opt<NodeIDAllocator::Strategy> Options::NodeAllocStrat(
113 |          "node-alloc-strat",
114 | -        llvm::cl::init(NodeIDAllocator::Strategy::DEBUG),
115 | +        llvm::cl::init(NodeIDAllocator::Strategy::SEQ),
116 |          llvm::cl::desc("Method of allocating (LLVM) values and memory objects as node IDs"),
117 |          llvm::cl::values(
118 |              clEnumValN(NodeIDAllocator::Strategy::DENSE, "dense", "allocate objects together and values together, separately (default)"),
119 | -- 
120 | 2.17.1
121 | 
122 | 


--------------------------------------------------------------------------------
/SVF-contextdda-fix.patch:
--------------------------------------------------------------------------------
 1 | From eebe3d824cb29455732e9d7dac9911bc9711efde Mon Sep 17 00:00:00 2001
 2 | From: Pietro Borrello <borrello@diag.uniroma1.it>
 3 | Date: Fri, 28 May 2021 13:49:33 +0200
 4 | Subject: contextDDA: add check on NULL refVal in isHeapCondMemObj
 5 | 
 6 | ---
 7 |  lib/DDA/ContextDDA.cpp | 12 +++++++++++-
 8 |  1 file changed, 11 insertions(+), 1 deletion(-)
 9 | 
10 | diff --git a/lib/DDA/ContextDDA.cpp b/lib/DDA/ContextDDA.cpp
11 | index 6b37821..b53d1e0 100644
12 | --- a/lib/DDA/ContextDDA.cpp
13 | +++ b/lib/DDA/ContextDDA.cpp
14 | @@ -316,7 +316,17 @@ bool ContextDDA::isHeapCondMemObj(const CxtVar& var, const StoreSVFGNode*)
15 |      assert(mem && "memory object is null??");
16 |      if(mem->isHeap())
17 |      {
18 | -        if(const Instruction* mallocSite = SVFUtil::dyn_cast<Instruction>(mem->getRefVal()))
19 | +        if (!mem->getRefVal()) {
20 | +            PAGNode *pnode = _pag->getPAGNode(getPtrNodeID(var));
21 | +            if(GepObjPN* gepobj = SVFUtil::dyn_cast<GepObjPN>(pnode)) {
22 | +                assert(SVFUtil::isa<DummyObjPN>(_pag->getPAGNode(gepobj->getBaseNode())) && "emtpy refVal in a gep object whose base is a non-dummy object");
23 | +            }
24 | +            else {
25 | +                assert((SVFUtil::isa<DummyObjPN>(pnode) || SVFUtil::isa<DummyValPN>(pnode)) && "empty refVal in non-dummy object");
26 | +            }
27 | +            return true;
28 | +        }
29 | +        else if(const Instruction* mallocSite = SVFUtil::dyn_cast<Instruction>(mem->getRefVal()))
30 |          {
31 |              const Function* fun = mallocSite->getFunction();
32 |              const SVFFunction* svfFun = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(fun);
33 | -- 
34 | 2.17.1
35 | 
36 | 


--------------------------------------------------------------------------------
/SVF-node-allocator.patch:
--------------------------------------------------------------------------------
 1 | From c5be7f023f4456eaacd917df9c44f58956feb516 Mon Sep 17 00:00:00 2001
 2 | From: Pietro Borrello <borrello@diag.uniroma1.it>
 3 | Date: Fri, 28 May 2021 13:57:46 +0200
 4 | Subject: NodeIDAllocator: set Strategy::SEQ as the default
 5 | 
 6 | ---
 7 |  lib/Util/Options.cpp | 2 +-
 8 |  1 file changed, 1 insertion(+), 1 deletion(-)
 9 | 
10 | diff --git a/lib/Util/Options.cpp b/lib/Util/Options.cpp
11 | index ac71de5..495a317 100644
12 | --- a/lib/Util/Options.cpp
13 | +++ b/lib/Util/Options.cpp
14 | @@ -14,7 +14,7 @@ namespace SVF
15 |  
16 |      const llvm::cl::opt<NodeIDAllocator::Strategy> Options::NodeAllocStrat(
17 |          "node-alloc-strat",
18 | -        llvm::cl::init(NodeIDAllocator::Strategy::DEBUG),
19 | +        llvm::cl::init(NodeIDAllocator::Strategy::SEQ),
20 |          llvm::cl::desc("Method of allocating (LLVM) values and memory objects as node IDs"),
21 |          llvm::cl::values(
22 |              clEnumValN(NodeIDAllocator::Strategy::DENSE, "dense", "allocate objects together and values together, separately (default)"),
23 | -- 
24 | 2.17.1
25 | 
26 | 


--------------------------------------------------------------------------------
/SVF-padded-vtables.patch:
--------------------------------------------------------------------------------
 1 | From 12ede5e903bd806c984217b0fc37f873f9718248 Mon Sep 17 00:00:00 2001
 2 | From: Pietro Borrello <borrello@diag.uniroma1.it>
 3 | Date: Fri, 28 May 2021 13:44:45 +0200
 4 | Subject: Fix handling of padded vtables (common in asan builds)
 5 | 
 6 | ---
 7 |  lib/SVF-FE/CHG.cpp | 28 ++++++++++++++++++++++++++--
 8 |  1 file changed, 26 insertions(+), 2 deletions(-)
 9 | 
10 | diff --git a/lib/SVF-FE/CHG.cpp b/lib/SVF-FE/CHG.cpp
11 | index 00bfbcf..fef1902 100644
12 | --- a/lib/SVF-FE/CHG.cpp
13 | +++ b/lib/SVF-FE/CHG.cpp
14 | @@ -128,7 +128,19 @@ void CHGraph::buildCHGNodes(const GlobalValue *globalvalue)
15 |  
16 |          for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei)
17 |          {
18 | -            const ConstantArray *vtbl = SVFUtil::dyn_cast<ConstantArray>(vtblStruct->getOperand(ei));
19 | +            Constant *operand = vtblStruct->getOperand(ei);
20 | +            // Sometimes ASAN adds padding to vtable by embedding them in structs
21 | +            // so we should check and unpack them
22 | +            if (!SVFUtil::isa<ConstantArray>(operand)) {
23 | +                ConstantStruct *opStruct = SVFUtil::dyn_cast<ConstantStruct>(operand);
24 | +                if(!opStruct) {
25 | +                    // We should skip handling the padding, in the form of an array
26 | +                    assert(SVFUtil::isa<ArrayType>(operand->getType()));
27 | +                    continue;
28 | +                }
29 | +                operand = opStruct->getOperand(0);
30 | +            }
31 | +            const ConstantArray *vtbl = SVFUtil::dyn_cast<ConstantArray>(operand);
32 |              assert(vtbl && "Element of initializer not an array?");
33 |              for (u32_t i = 0; i < vtbl->getNumOperands(); ++i)
34 |              {
35 | @@ -434,8 +446,20 @@ void CHGraph::analyzeVTables(const Module &M)
36 |  
37 |              for (unsigned int ei = 0; ei < vtblStruct->getNumOperands(); ++ei)
38 |              {
39 | +                Constant *operand = vtblStruct->getOperand(ei);
40 | +                // Sometimes ASAN adds padding to vtable by embedding them in structs
41 | +                // so we should check and unpack them
42 | +                if (!SVFUtil::isa<ConstantArray>(operand)) {
43 | +                    ConstantStruct *opStruct = SVFUtil::dyn_cast<ConstantStruct>(operand);
44 | +                    if(!opStruct) {
45 | +                        // We should skip handling the padding, in the form of an array
46 | +                        assert(SVFUtil::isa<ArrayType>(operand->getType()));
47 | +                        continue;
48 | +                    }
49 | +                    operand = opStruct->getOperand(0);
50 | +                }
51 |                  const ConstantArray *vtbl =
52 | -                    SVFUtil::dyn_cast<ConstantArray>(vtblStruct->getOperand(ei));
53 | +                    SVFUtil::dyn_cast<ConstantArray>(operand);
54 |                  assert(vtbl && "Element of initializer not an array?");
55 |  
56 |                  /*
57 | -- 
58 | 2.17.1
59 | 
60 | 


--------------------------------------------------------------------------------
/aflpp-link-safe.c:
--------------------------------------------------------------------------------
1 | __attribute__((weak)) unsigned int * __afl_fuzz_len;
2 | __attribute__((weak)) unsigned char *__afl_fuzz_ptr;
3 | __attribute__((weak)) int __afl_persistent_loop(unsigned int x) { return 0; }
4 | __attribute__((weak)) void __afl_manual_init() {}
5 | 


--------------------------------------------------------------------------------
/bin/wrap-gclang:
--------------------------------------------------------------------------------
1 | wrap_gclang.py


--------------------------------------------------------------------------------
/bin/wrap-gclang++:
--------------------------------------------------------------------------------
1 | wrap_gclang.py


--------------------------------------------------------------------------------
/bin/wrap_gclang.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import subprocess
  4 | import shutil
  5 | import json
  6 | import sys
  7 | import os
  8 | 
  9 | import errno
 10 | from hashlib import sha256
 11 | from tempfile import gettempdir
 12 | from time import time, sleep
 13 | 
 14 | class ILockException(Exception):
 15 |     pass
 16 | 
 17 | class ILock(object):
 18 |     def __init__(self, name, timeout=None, check_interval=0.25, reentrant=False, lock_directory=None):
 19 |         self._timeout = timeout if timeout is not None else 10 ** 8
 20 |         self._check_interval = check_interval
 21 | 
 22 |         lock_directory = gettempdir() if lock_directory is None else lock_directory
 23 |         unique_token = sha256(name.encode()).hexdigest()
 24 |         self._filepath = os.path.join(lock_directory, 'ilock-' + unique_token + '.lock')
 25 | 
 26 |         self._reentrant = reentrant
 27 | 
 28 |         self._enter_count = 0
 29 | 
 30 |     def lock(self):
 31 |         import portalocker
 32 |   
 33 |         if self._enter_count > 0:
 34 |             if self._reentrant:
 35 |                 self._enter_count += 1
 36 |                 return self
 37 |             raise ILockException('Trying re-enter a non-reentrant lock')
 38 | 
 39 |         current_time = call_time = time()
 40 |         while call_time + self._timeout >= current_time:
 41 |             self._lockfile = open(self._filepath, 'w')
 42 |             try:
 43 |                 portalocker.lock(self._lockfile, portalocker.constants.LOCK_NB | portalocker.constants.LOCK_EX)
 44 |                 self._enter_count = 1
 45 |                 return self
 46 |             except portalocker.exceptions.LockException:
 47 |                 pass
 48 | 
 49 |             current_time = time()
 50 |             check_interval = self._check_interval if self._timeout > self._check_interval else self._timeout
 51 |             sleep(check_interval)
 52 | 
 53 |         raise ILockException('Timeout was reached')
 54 | 
 55 |     def __enter__(self):
 56 |         return self.lock()
 57 | 
 58 |     def unlock(self):
 59 |         self._enter_count -= 1
 60 | 
 61 |         if self._enter_count > 0:
 62 |             return
 63 | 
 64 |         if sys.platform.startswith('linux'):
 65 |             # In Linux you can delete a locked file
 66 |             os.unlink(self._filepath)
 67 | 
 68 |         self._lockfile.close()
 69 | 
 70 |         if sys.platform == 'win32':
 71 |             # In Windows you need to unlock a file before deletion
 72 |             try:
 73 |                 os.remove(self._filepath)
 74 |             except WindowsError as e:
 75 |                 # Mute exception in case an access was already acquired (EACCES)
 76 |                 #  and in more rare case when it was even already released and file was deleted (ENOENT)
 77 |                 if e.errno not in [errno.EACCES, errno.ENOENT]:
 78 |                     raise
 79 | 
 80 |     def __exit__(self, exc_type, exc_val, exc_tb):
 81 |         return self.unlock()
 82 | 
 83 | SOURCE_EXTENSIONS = ('.c', '.cc', '.cpp', '.h',
 84 |                      '.hpp')
 85 | FILTER_EXTENSIONS = ('.c', '.cc', '.cpp', '.h',
 86 |                      '.hpp', '.o', '.obj', '.a', '.la')
 87 | 
 88 | script_dir = os.path.dirname(os.path.realpath(os.path.abspath(__file__)))
 89 | 
 90 | is_cxx = "++" in sys.argv[0]
 91 | 
 92 | is_debug = os.getenv("WRAP_GCLANG_DEBUG") is not None
 93 | keep_symbols = os.getenv("CGC_KEEP_SYMBOLS") is not None
 94 | compiler_path = os.getenv("LLVM_COMPILER_PATH")
 95 | benchmark = os.getenv("BENCHMARK")
 96 | fuzzer = os.getenv("FUZZER")
 97 | experiment = os.getenv("EXPERIMENT", 'noexp')
 98 | 
 99 | fuzz_programs = []
100 | fuzz_target = os.getenv("FUZZ_TARGET")
101 | # ffmpeg_ffmpeg_demuxer_fuzzer first compiles `tools/target_dem_fuzzer` and then moves it to /out/ffmpeg_DEMUXER_fuzzer
102 | if fuzz_target is not None and benchmark == 'ffmpeg_ffmpeg_demuxer_fuzzer' and  'ffmpeg_DEMUXER_fuzzer' in fuzz_target:
103 |     fuzz_target = fuzz_target.replace('ffmpeg_DEMUXER_fuzzer', 'target_dem_fuzzer')
104 | 
105 | if fuzz_target is not None:
106 |     fuzz_programs.append(os.path.basename(fuzz_target))
107 | if os.getenv("FUZZ_PROGRAMS") is not None:
108 |     fuzz_programs += list(map(lambda x: x.strip(), os.getenv("FUZZ_PROGRAMS").split(",")))
109 | 
110 | configure_only = os.getenv('WLLVM_CONFIGURE_ONLY')
111 | 
112 | def get_string(s):
113 |     res = ''
114 |     for ss in s:
115 |         res += chr(ss - 1)
116 |     return res
117 | 
118 | def get_stats(filename):
119 |     if os.getenv("OPT_PATH"):
120 |         opt_name = os.environ["OPT_PATH"]
121 |     elif compiler_path is not None:
122 |         opt_name = os.path.join(compiler_path, "opt")
123 |     else:
124 |         opt_name = "opt"
125 |     out = subprocess.check_output("%s -load=%s/func-stats.so -func-stats %s -o /dev/null" % (opt_name, script_dir, filename), shell=True).decode()
126 |     assert('Num functions: ' in out and 'Num BBs      : ' in out and 'AFL edges    : ' in out)
127 |     num_funcs = int(out.split('Num functions: ')[1].split('\n')[0])
128 |     num_bb    = int(out.split('Num BBs      : ')[1].split('\n')[0])
129 |     afl_edges = int(out.split('AFL edges    : ')[1].split('\n')[0])
130 |     return num_funcs, num_bb, afl_edges
131 | 
132 | def get_filesize(file):
133 |     try:
134 |         return os.path.getsize(file)
135 |     except OSError:
136 |         return 0
137 | 
138 | def log_stats(filename):
139 |     strategy = os.getenv("CGC_STRATEGY")
140 |     type = "icp" if os.getenv("FORCE_ICP") else "noicp"
141 |     bc = os.path.basename(filename)
142 |     num_funcs, num_bb, afl_edges = get_stats(filename)
143 |     filesize = get_filesize(filename)
144 |     data = 'stats,type=%s experiment="%s",benchmark="%s",fuzzer="%s",bc="%s",strategy="%s",num_functions=%di,num_bb=%di,afl_edges=%di,size=%di' % (type, experiment, benchmark, fuzzer, bc, strategy, num_funcs, num_bb, afl_edges, filesize)
145 | 
146 | def log_msg(filename, msg):
147 |     strategy = os.getenv("CGC_STRATEGY")
148 |     type = "icp" if os.getenv("FORCE_ICP") else "noicp"
149 |     bc = os.path.basename(filename)
150 |     data = 'msgs,type=%s experiment="%s",benchmark="%s",fuzzer="%s",file="%s",strategy="%s",msg="%s"' % (type, experiment, benchmark, fuzzer, bc, strategy, msg)
151 | 
152 | # gclang does not forward optimization flags to the linking step, so -fsanitize=object-size
153 | # will lead to a warning on missing optimizations when compiling.
154 | # This is usually safe, but will make fail some ./configure scripts
155 | def filter_objsan(args):
156 |     for i, arg in enumerate(args):
157 |         if arg.startswith('-fsanitize='):
158 |             args[i] = args[i].replace('object-size,', '') # if not last
159 |             args[i] = args[i].replace(',object-size', '') # if last
160 |     if '-fsanitize=object-size' in args: args.remove('-fsanitize=object-size') # if alone
161 | 
162 | def gclang_exec(args, capture_output=False):
163 |     if os.getenv("GCLANG_PATH"):
164 |         cc_name = os.environ["GCLANG_PATH"]
165 |     else:
166 |         cc_name = "gclang"
167 |     if is_cxx:
168 |         if os.getenv("GCLANGXX_PATH"):
169 |             cc_name = os.environ["GCLANGXX_PATH"]
170 |         else:
171 |             cc_name = "gclang++"
172 |     argv = [cc_name] + args
173 |     if is_debug:
174 |         print(" ".join(argv), file=sys.stderr)
175 |     if capture_output:
176 |         return subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
177 |     else:
178 |         return subprocess.run(argv)
179 | 
180 | 
181 | def cc_exec(args, capture_output=False):
182 |     if os.getenv("REAL_CC_PATH"):
183 |         cc_name = os.environ["REAL_CC_PATH"]
184 |     elif compiler_path is not None:
185 |         cc_name = os.path.join(compiler_path, "clang")
186 |     else:
187 |         cc_name = "clang"
188 |     if is_cxx:
189 |         if os.getenv("REAL_CXX_PATH"):
190 |             cc_name = os.environ["REAL_CXX_PATH"]
191 |         elif compiler_path is not None:
192 |             cc_name = os.path.join(compiler_path, "clang++")
193 |         else:
194 |             cc_name = "clang++"
195 |     argv = [cc_name] + args
196 |     if is_debug:
197 |         print(" ".join(argv), file=sys.stderr)
198 |     if capture_output:
199 |         return subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
200 |     else:
201 |         return subprocess.run(argv)
202 | 
203 | 
204 | def opt_exec(args, capture_output=False, check_ret=True, wrapper_cmd=None, save_output=False, save_input=False):
205 |     if os.getenv("OPT_PATH"):
206 |         cc_name = os.environ["OPT_PATH"]
207 |     elif compiler_path is not None:
208 |         cc_name = os.path.join(compiler_path, "opt")
209 |     else:
210 |         cc_name = "opt"
211 |     argv = [cc_name] + args
212 |     if wrapper_cmd is not None:
213 |         argv = wrapper_cmd + argv
214 |     if is_debug:
215 |         print(" ".join(argv), file=sys.stderr)
216 |     # ugly docker debug
217 |     if os.path.exists('/host_tmp'):
218 |         os.system("cp %s /host_tmp" % args[-1])
219 |     if save_input:
220 |         os.system('cp %s %s' % (argv[-1], os.getenv('OUT', '/tmp/')))
221 |     if capture_output:
222 |         ret = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
223 |     else:
224 |         ret = subprocess.run(argv)
225 |     if check_ret:
226 |         assert(ret.returncode == 0)
227 |     # ugly docker debug
228 |     if ret.returncode == 0 and os.path.exists('/host_tmp'):
229 |         os.system("cp %s /host_tmp" % args[-2])
230 |     if save_output:
231 |         os.system('cp %s %s' % (argv[-2], os.getenv('OUT', '/tmp/')))
232 |     return ret
233 | 
234 | def extract_exec(args, capture_output=False, check_ret=True):
235 |     if os.getenv("EXTRACT_PATH"):
236 |         ext_name = os.environ["EXTRACT_PATH"]
237 |     elif compiler_path is not None:
238 |         ext_name = os.path.join(compiler_path, "llvm-extract")
239 |     else:
240 |         ext_name = "llvm-extract"
241 |     argv = [ext_name] + args
242 |     if is_debug:
243 |         print(" ".join(argv), file=sys.stderr)
244 |     if capture_output:
245 |         ret = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
246 |     else:
247 |         ret = subprocess.run(argv)
248 |     if check_ret:
249 |         assert(ret.returncode == 0)
250 |     return ret
251 | 
252 | def link_exec(args, capture_output=False, check_ret=True):
253 |     if os.getenv("LINK_PATH"):
254 |         tool_name = os.environ["LINK_PATH"]
255 |     elif compiler_path is not None:
256 |         tool_name = os.path.join(compiler_path, "llvm-link")
257 |     else:
258 |         tool_name = "llvm-link"
259 |     argv = [tool_name] + args
260 |     if is_debug:
261 |         print(" ".join(argv), file=sys.stderr)
262 |     if capture_output:
263 |         ret = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
264 |     else:
265 |         ret = subprocess.run(argv)
266 |     if check_ret:
267 |         assert(ret.returncode == 0)
268 |     return ret
269 | 
270 | def strip_exec(args):
271 |     if os.getenv("STRIP_PATH"):
272 |         tool_name = os.environ["STRIP_PATH"]
273 |     elif compiler_path is not None:
274 |         tool_name = os.path.join(compiler_path, "llvm-strip")
275 |     else:
276 |         tool_name = "llvm-strip"
277 |     argv = [tool_name] + args
278 |     subprocess.check_call(argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
279 | 
280 | def get_bc(filename, bc_filename=None, strict_mode=False, capture_output=False):
281 |     if bc_filename is None:
282 |         bc_filename = filename + '.bc'
283 |     if os.getenv("GETBC_PATH"):
284 |         cc_name = os.environ["GETBC_PATH"]
285 |     else:
286 |         cc_name = "get-bc"
287 |     argv = ['get-bc', '-b', '-o', bc_filename]
288 |     if strict_mode:
289 |         argv.append('-S')
290 |     argv.append(filename)
291 |     if is_debug:
292 |         print(" ".join(argv), file=sys.stderr)
293 |     if capture_output:
294 |         return subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
295 |     else:
296 |         return subprocess.run(argv)
297 | 
298 | 
299 | def common_opts():
300 |     return [
301 |         # BEWARE: we need to insert debug info to be able to properly extract libraries
302 |         # based on the file path
303 |         "-g",
304 |         "-fno-function-sections",
305 |         '-fno-unique-section-names',
306 |         "-funroll-loops",
307 |         # "-fno-discard-value-names",
308 |     ]
309 | 
310 | 
311 | def cc_mode():
312 |     old_args = sys.argv[1:]
313 |     filter_objsan(old_args)
314 | 
315 |     args = common_opts()
316 |     have_o = False
317 |     for arg in old_args:
318 |         if arg.startswith('-O'):
319 |             have_o = True
320 |         if not arg == '-ffunction-sections':
321 |             args.append(arg)
322 |     if not have_o:
323 |         args = ['-O3'] + args
324 | 
325 |     assert(gclang_exec(args).returncode == 0)
326 | 
327 | def add_afl_symbols(outfile):
328 |     subprocess.check_call("echo '__afl_persistent_loop' >> %s" % outfile, shell=True)
329 |     subprocess.check_call("echo '__afl_manual_init' >> %s" % outfile, shell=True)
330 |     subprocess.check_call("echo '__afl_fuzz_len' >> %s" % outfile, shell=True)
331 |     subprocess.check_call("echo '__afl_fuzz_ptr' >> %s" % outfile, shell=True)
332 | 
333 | def gen_whitelist():
334 |     bench_name = os.getenv("BENCHMARK")
335 |     assert bench_name
336 |     # Get the project name from the benchmark name
337 |     proj_name = bench_name.split("_")[0].split("-")[0]
338 |     fuzzbench_src = os.getenv("SRC")
339 |     assert fuzzbench_src
340 |     # match any path containing the project name, and strictly the SRC directory
341 |     return "{},^{}$".format(proj_name, fuzzbench_src)
342 | 
343 | def get_cache_size(index):
344 |     cache_size = open('/sys/devices/system/cpu/cpu0/cache/index{}/size'.format(index)).read().strip()
345 |     if cache_size[-1] == 'K' or cache_size[-1] == 'k':
346 |         return int(cache_size[:-1]) * 1024
347 |     elif cache_size[-1] == 'M' or cache_size[-1] == 'm':
348 |         return int(cache_size[:-1]) * 1024 * 1024
349 |     else:
350 |         # Do not expect Gb sized caches in the near future :(
351 |         assert (cache_size.isdecimal())
352 |         return int(cache_size)
353 | 
354 | def get_map_limit():
355 |     if os.getenv("CGC_MAXMAP") is None:
356 |         return get_cache_size(os.getenv("CGC_CACHEDMAP", "2"))
357 |     else:
358 |         return int(os.getenv("CGC_MAXMAP"))
359 | 
360 | # gclang fails extracting the bitcode for source files that are inside a linker group
361 | # so extract them, plus extract also `-o output` if it is in the linker group
362 | def fix_linker_groups(args):
363 |     last_group = 0
364 |     # search all occurrences, a ValueError will end the search
365 |     while True:
366 |         try:
367 |             #search for linker groups
368 |             last_group = args.index('-Wl,--start-group', last_group)
369 |             end_group = args.index('-Wl,--end-group', last_group)
370 |             idx = last_group + 1
371 |             while idx < end_group:
372 |                 arg = args[idx]
373 |                 if arg.endswith(SOURCE_EXTENSIONS):
374 |                     args.insert(last_group, args.pop(idx))
375 |                     last_group += 1
376 |                 elif arg == '-o':
377 |                     # pop both the `-o` and the param
378 |                     args.insert(last_group, args.pop(idx))
379 |                     args.insert(last_group+1, args.pop(idx+1))
380 |                     last_group += 2
381 |                 else:
382 |                     idx += 1
383 |             last_group = end_group + 1
384 | 
385 |         except ValueError:
386 |             return
387 | 
388 | def ld_mode():
389 |     old_args = sys.argv[1:]
390 |     filter_objsan(old_args)
391 | 
392 |     args = common_opts() + ['-Wl,--allow-multiple-definition']
393 |     linker_args = common_opts() + [os.path.join(script_dir, 'aflpp-link-safe.o')]#, '-lrt', '-pthread']
394 | 
395 |     outname = None
396 | 
397 |     have_o = False
398 |     opt_level = None
399 |     have_std = []
400 |     filtereds = []
401 |     i = 0
402 |     while i < len(old_args):
403 |         if old_args[i].startswith('-O'):
404 |             have_o = True
405 |             opt_level = old_args[i]
406 |         if old_args[i].startswith('-std='):
407 |             have_std = [old_args[i]]
408 |         if not old_args[i] == '-ffunction-sections':
409 |             linker_args.append(old_args[i])
410 |         if old_args[i] == '-o':
411 |             outname = old_args[i + 1]
412 |             linker_args.append(outname)
413 |             args += [outname + '.final.bc.o', '-o', outname]
414 |             i += 1
415 |         elif not old_args[i].endswith(FILTER_EXTENSIONS):
416 |             args.append(old_args[i])
417 |         else:
418 |             filtereds.append(old_args[i])
419 |         i += 1
420 |     if not have_o:
421 |         args = ['-O3'] + args
422 |         linker_args = ['-O3'] + linker_args
423 |         opt_level = '-O3'
424 | 
425 |     if outname is None:
426 |         outname = 'a.out'
427 |         args += [outname + '.final.bc.o', '-o', outname]
428 | 
429 |     if len(fuzz_programs) > 0 and os.path.basename(outname) not in fuzz_programs:
430 |         assert(gclang_exec(old_args + [os.path.join(script_dir, 'aflpp-link-safe.o')]).returncode == 0)
431 |         return
432 | 
433 |     fix_linker_groups(linker_args)
434 |     assert(gclang_exec(linker_args).returncode == 0)
435 | 
436 |     log_msg(benchmark, "start")
437 |     assert(get_bc(outname, capture_output=True).returncode == 0)
438 | 
439 |     for fname in filtereds:
440 |         orig_fname = fname
441 |         if fname.startswith("-l:"):
442 |             fname = fname[3:]
443 |         if fname.endswith('.o') and get_bc(fname, strict_mode=True, capture_output=True).returncode != 0:
444 |             args += [orig_fname] # reinclude in the link command
445 |         # reinclude also libs/libsz.a needed by libs like `libhdf5` (in matio_matio_fuzzer it links the system libhdf5 for which bitcode is unavailable)
446 |         elif fname.endswith('.a') and (get_bc(fname, strict_mode=True, capture_output=True).returncode != 0 or 'libs/libsz.a' in fname):
447 |             args += [orig_fname] # reinclude in the link command
448 | 
449 |     # strip and log original size
450 |     strip_exec(['--strip-all-gnu', outname])
451 |     log_msg(benchmark, "orig_size: %d" % get_filesize(outname))
452 | 
453 |     ilock = None
454 |     if os.getenv("WRAP_GCLANG_LOCK") is not None:
455 |         ilock = ILock(os.getenv("WRAP_GCLANG_LOCK"))
456 |         ilock.lock()
457 | 
458 |     input_fname = outname + '.bc'
459 |     log_stats(input_fname)
460 |     if os.getenv("NO_PASSES") is None:
461 |         if os.getenv("NO_INTERNALIZE") is None and os.getenv("NO_INTERNALIZE1") is None:
462 |             opt_exec(['-load=%s/dump-call-tree.so' % script_dir, '-dump-call-tree', '-call-tree-start=main', '-dump-tree-file=call-tree.log',
463 |                       '-o', '/dev/null', outname + '.bc'])
464 |             add_afl_symbols("call-tree.log")
465 |             opt_exec(['-internalize', '-internalize-public-api-file=call-tree.log',
466 |                       '-globaldce', '-o', outname + '.internalized.bc', input_fname])
467 |             input_fname = outname + '.internalized.bc'
468 |             log_stats(input_fname)
469 |         if os.getenv("NO_EXTRACT") is None:
470 |             whitelist = gen_whitelist()
471 |             opt_exec(['-load=%s/dump-extlib.so' % script_dir, '-dump-extlib', '-dumpext-whitelist=%s' % whitelist,
472 |                       '-dumpext-blacklist=third_party,third-party', '-dumpext-out=funcs.log',
473 |                       '-o', input_fname, input_fname])
474 |             functions_to_extract = open('funcs.log').read().strip()
475 |             if len(functions_to_extract) > 0:
476 |                 # solidity has too many functions to extract, fix it
477 |                 if 'solidity' in benchmark:
478 |                     fl = functions_to_extract.split(' ')
479 |                     functions_to_extract1 = fl[:len(fl)//2]
480 |                     functions_to_extract2 = fl[len(fl)//2:]
481 |                     extract_exec(functions_to_extract1 + ['-o', 'lib1.bc', input_fname])
482 |                     extract_exec(functions_to_extract2 + ['-o', 'lib2.bc', input_fname])
483 |                     link_exec(['-o', 'lib.bc', 'lib1.bc', 'lib2.bc'])
484 |                     extract_exec(functions_to_extract1 + [ '--delete', '-o', outname + '.extracted1.bc', input_fname])
485 |                     extract_exec(functions_to_extract2 + [ '--delete', '-o', outname + '.extracted.bc', outname + '.extracted1.bc'])
486 |                 else:
487 |                     extract_exec(functions_to_extract.split(' ') + ['-o', 'lib.bc', input_fname])
488 |                     extract_exec(functions_to_extract.split(' ') + [ '--delete', '-o', outname + '.extracted.bc', input_fname])
489 |                 opt_exec([opt_level, '-loop-unroll', '-o', 'lib.bc', 'lib.bc'])
490 |                 input_fname = outname + '.extracted.bc'
491 |                 log_stats(input_fname)
492 |         if os.getenv("FORCE_ICP"):
493 |             opt_exec(['-load=%s/icp.so' % script_dir, '-icp', '-icp-fallback', '-icp-type', '-icp-type-opaque-ptrs=0',
494 |                       '-icp-alias', '-stat=0', '-ander', '-modelConsts', '-o', outname + '.icp.bc', input_fname])
495 |             input_fname = outname + '.icp.bc'
496 |             log_stats(input_fname)
497 |         if os.getenv("NO_CGC") is None:
498 |             cgc_strategy = os.getenv("CGC_STRATEGY") if os.getenv("CGC_STRATEGY") is not None else 'dataflow'
499 |             cgc_fill = "0" if os.getenv("CGC_NOFILL") else "1"
500 |             scalarize = []
501 |             sea_dependencies = []
502 |             vectorize = []
503 |             # split passes in two invocations, since it seems to avoid a crash with sqlite3 and sea-dsa which happens in misterious conditions (only docker, no valgrind)
504 |             opt_exec([opt_level, '-loop-unroll'] + scalarize + [
505 |                       '-load=%s/cgc-planner.so' % script_dir] + sea_dependencies + ['-o', outname + '.temp.bc', input_fname], save_output=True)
506 |             input_fname = outname + '.temp.bc'
507 |             log_stats(input_fname)
508 | 
509 |             if os.getenv("CGC_ONLY_PTR_EVAL") is not None:
510 |                 def ptr_eval(strategy):
511 |                     ofile = '%s.txt' % strategy
512 |                     opt_exec(['-load=%s/ptr-eval.so' % script_dir, '-ptr-eval', '-ptr-strategy=%s' % strategy, 
513 |                         '-ptr-out=%s' % ofile, '-stat=0', '-modelConsts',
514 |                         '-o', '/dev/null', input_fname], check_ret=False, 
515 |                         wrapper_cmd=['/usr/bin/time', "-f", "%M", '-o', 'time_stats.txt'])
516 |                     if os.path.exists('time_stats.txt'):
517 |                         with open('time_stats.txt') as f:
518 |                             max_mem = f.read().strip().replace('\n', ' ')
519 |                             os.remove('time_stats.txt')
520 |                     else:
521 |                         max_mem = '-1'
522 |                     
523 |                     if os.path.exists(ofile):
524 |                         with open(ofile) as f:
525 |                             res = f.read()
526 |                         os.remove(ofile)
527 |                         return res + '|' + max_mem
528 | 
529 |                     else:
530 |                         return ("%s: -1|-1|-1" % strategy) + '|' + max_mem
531 | 
532 |                 log_msg(benchmark, ptr_eval('params'))
533 |                 log_msg(benchmark, ptr_eval('dataflowSea'))
534 |                 log_msg(benchmark, ptr_eval('dataflow'))
535 |                 
536 |             if os.getenv("CGC_ONLY_CGC_EVAL") is not None:
537 |                 ret = opt_exec(['-load=%s/func-stats.so' % script_dir, '-func-stats', '-dump-graph',
538 |                     '-o', '/dev/null', input_fname], check_ret=True, capture_output=True)
539 |                 with open('cgc.txt', 'w') as f:
540 |                     f.write(ret.stdout.decode(errors='ignore'))
541 |                 
542 |                 out = subprocess.check_output(['python3', '%s/cgc.py' % script_dir, 'cgc.txt'])
543 |                 log_msg(benchmark, out.strip().decode(errors='ignore'))
544 |                 return
545 | 
546 |             opt_exec(['-load=%s/cgc-planner.so' % script_dir, '-cgc-planner', '-cgc-strategy=%s' % cgc_strategy, '-cgc-funcs=^main$', '-cgc-calls-treshold=50', '-stat=0', '-modelConsts'] + vectorize + 
547 |                       ['-o', outname + '.lto.bc', input_fname])
548 |             input_fname = outname + '.lto.bc'
549 |             log_stats(input_fname)
550 | 
551 |             max_aflmap = get_map_limit()
552 |             # if the libs have been extracted, set the max accordingly
553 |             if os.getenv("NO_EXTRACT") is None and len(functions_to_extract) > 0:
554 |                 _, _, lib_edges = get_stats('lib.bc')
555 |                 _, _, cur_edges = get_stats(input_fname)
556 |                 max_aflmap -= lib_edges
557 |                 while cur_edges >= max_aflmap:
558 |                     max_aflmap += get_map_limit()
559 | 
560 |             opt_exec(['-load=%s/cgc.so' % script_dir, '-cgc', '-cgc-clone-prefix=', '-cgc-max-aflmap=%d' % max_aflmap, '-cgc-fill=%s' % cgc_fill,
561 |                       '-load=%s/dump-call-tree.so' % script_dir, '-dump-call-tree', '-call-tree-start=main', '-dump-tree-file=call-tree.log',
562 |                       '-o', outname + '.cgc.bc', input_fname])
563 |             input_fname = outname + '.cgc.bc'
564 |             log_stats(input_fname)
565 |         if os.getenv("FORCE_INTERNALIZE") is not None:
566 |             add_afl_symbols("call-tree.log")
567 |             opt_exec(['-internalize', '-internalize-public-api-file=call-tree.log', 
568 |                       '-globaldce', '-o', outname + '.cgc.internalized.bc', input_fname])
569 |             input_fname = outname + '.cgc.internalized.bc'
570 |             log_stats(input_fname)
571 | 
572 |     if os.getenv("CGC_LOG_CALLS") is not None:
573 |         opt_exec(['-load=%s/dump-calls.so' % script_dir, '-dump-calls', 
574 |                 '-o', outname + '.log.bc', input_fname])
575 |         input_fname = outname + '.log.bc'
576 |         log_stats(input_fname)
577 | 
578 |     if os.getenv("NO_PASSES") is None and os.getenv("NO_EXTRACT") is None and len(functions_to_extract) > 0:
579 |         link_exec(['-o', outname + '.linked.bc', input_fname, 'lib.bc'])
580 |         input_fname = outname + '.linked.bc'
581 |         log_stats(input_fname)
582 | 
583 |     shutil.copy(input_fname, outname + '.final.bc')
584 |     log_stats(outname + '.final.bc')
585 | 
586 |     assert(cc_exec(common_opts() + have_std + [opt_level] + [outname + '.final.bc', '-c', '-o', outname + '.final.bc.o']).returncode == 0)
587 |     
588 |     #if fuzz_target is not None and 'grok' in fuzz_target:
589 |     #    if '-std=c++11' in args: args.remove('-std=c++11')
590 |     #    args = ['-std=gnu++2a'] + args
591 | 
592 |     # this fixes a bug at the linking stage for exiv2: `__sancov_pcs has both ordered [...] and unordered [...] sections`
593 |     # see https://github.com/rust-lang/rust/issues/53945 and https://github.com/google/oss-fuzz/pull/6288 for details
594 |     if 'exiv2' in benchmark:
595 |         args += ['-fuse-ld=gold']
596 | 
597 |     assert(cc_exec(args).returncode == 0)
598 | 
599 |     if not keep_symbols:
600 |         # strip and log final size
601 |         strip_exec(['--strip-all-gnu', outname])
602 |     log_msg(benchmark, "final_size: %d" % get_filesize(outname))
603 |     log_msg(benchmark, "end")
604 | 
605 |     # ugly docker debug
606 |     if os.path.exists('/host_tmp'):
607 |         os.system("cp %s /host_tmp" % (outname + '.final.bc'))
608 |         os.system("cp %s /host_tmp" % outname)
609 |     
610 |     if ilock is not None:
611 |         ilock.unlock()
612 | 
613 | 
614 | def is_ld_mode():
615 |     return not ("--version" in sys.argv or "--target-help" in sys.argv or
616 |                 "-c" in sys.argv or "-E" in sys.argv or "-S" in sys.argv or
617 |                 "-shared" in sys.argv)
618 | 
619 | 
620 | if len(sys.argv) <= 1:
621 |     cc_exec([])
622 | elif is_ld_mode() and not configure_only:
623 |     ld_mode()
624 | else:
625 |     cc_mode()
626 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | ./install_svf.sh
4 | make -C passes
5 | 
6 | cd bin && clang -c ../aflpp-link-safe.c
7 | 


--------------------------------------------------------------------------------
/clean_remake.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | ROOT_DIR="."
 7 | 
 8 | # setup llvm env variables
 9 | if [ -z "${LLVM_DIR}" ]; then 
10 | 
11 |   echo "[ ] retrieving the LLVM directory..."
12 | 
13 |   if [ -z "${LLVM_CONFIG}" ]; then 
14 |       export LLVM_CONFIG='llvm-config'
15 |   fi
16 | 
17 |   export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')"
18 |   if [ "$LLVM_VER" = "" ]; then
19 |     echo "[!] llvm-config not found!"
20 |     exit 1
21 |   fi
22 | 
23 |   echo "[+] using LLVM $LLVM_VER"
24 | 
25 |   export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH"
26 |   export LLVM_DIR="$($LLVM_CONFIG --prefix)"
27 | 
28 | else
29 | 
30 |   export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH"
31 | 
32 | fi
33 | 
34 | echo "[+] the LLVM directory is $LLVM_DIR"
35 | export LLVM_COMPILER_PATH=$LLVM_DIR/bin
36 | 
37 | DIR=`pwd`
38 | cd $ROOT_DIR/passes
39 | make clean install || exit 1
40 | cd $DIR
41 | 


--------------------------------------------------------------------------------
/install_svf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | export MAKEFLAGS="-j $(grep -c ^processor /proc/cpuinfo)"
 5 | 
 6 | if [ -z "${LLVM_DIR}" ]; then 
 7 | 
 8 |   echo "[ ] retrieving the LLVM directory..."
 9 | 
10 |   if [ -z "${LLVM_CONFIG}" ]; then 
11 |       export LLVM_CONFIG='llvm-config'
12 |   fi
13 | 
14 |   export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')"
15 |   if [ "$LLVM_VER" = "" ]; then
16 |     echo "[!] llvm-config not found!"
17 |     exit 1
18 |   fi
19 | 
20 |   echo "[+] using LLVM $LLVM_VER"
21 | 
22 |   export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH"
23 |   export LLVM_DIR="$($LLVM_CONFIG --prefix)"
24 | 
25 | else
26 | 
27 |   export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH"
28 | 
29 | fi
30 | 
31 | echo "[+] the LLVM directory is $LLVM_DIR"
32 | 
33 | #
34 | # SVF
35 | #
36 | echo "[ ] preparing SVF..."
37 | 
38 | if [[ -d SVF ]]; then
39 |   echo "[!] the SVF directory already exists"
40 |   cd SVF
41 | else
42 |   git clone https://github.com/SVF-tools/SVF.git SVF
43 |   cd SVF
44 |   git checkout SVF-2.1
45 |   git am -3 -k ../SVF-all.patch
46 | 
47 |   git clone https://github.com/SVF-tools/Test-Suite.git
48 |   cd Test-Suite
49 |   git checkout 72c679a49b943abb229fcb1844f68dff9cc7d522
50 |   cd ..
51 | fi
52 | 
53 | echo "[+] SVF ready"
54 | 
55 | echo "[ ] compiling SVF..."
56 | source ./build.sh debug 
57 | 
58 | echo "[+] all done, goodbye!"
59 | 


--------------------------------------------------------------------------------
/passes/Makefile:
--------------------------------------------------------------------------------
 1 | DIRS := cgc func-stats dump-call-tree icp cgc-planner dump-extlib dump-calls
 2 | 
 3 | all: $(patsubst %,build_%,$(DIRS))
 4 | 
 5 | $(patsubst %,build_%,$(DIRS)): DIR = $(subst build_,,$@)
 6 | $(patsubst %,build_%,$(DIRS)):
 7 | 	@echo Building LLVM $(DIR)...
 8 | 	$(MAKE) -C $(DIR) install
 9 | 
10 | clean:
11 | 	$(foreach DIR, $(DIRS), $(MAKE) -C $(DIR) clean;)
12 | 
13 | install: all
14 | 


--------------------------------------------------------------------------------
/passes/Makefile.inc:
--------------------------------------------------------------------------------
 1 | include $(ROOT)/Makefile.inc
 2 | 
 3 | INSTALL_DIR = $(ROOT)/bin
 4 | 
 5 | ARCH ?= native
 6 | 
 7 | ifeq "$(LLVM_DIR)" ""
 8 |   LLVM_CONFIG ?= llvm-config
 9 |   LLVM_PREFIX = $(shell $(LLVM_CONFIG) --prefix)
10 | else
11 |   LLVM_PREFIX=$(shell readlink -f $(LLVM_DIR))
12 |   LLVM_CONFIG = $(LLVM_PREFIX)/bin/llvm-config
13 | endif
14 | 
15 | $(info [+] LLVM prefix at $(LLVM_PREFIX))
16 | $(info [+] LLVM config at $(LLVM_CONFIG))
17 | 
18 | LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/git//' | sed 's/svn//' )
19 | ifeq "$(LLVMVER)" ""
20 |   $(warning [!] llvm-config not found!)
21 | endif
22 | 
23 | LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
24 | LLVM_LIBDIR = $(shell $(LLVM_CONFIG) --libdir 2>/dev/null)
25 | LLVM_INCDIR = $(shell $(LLVM_CONFIG) --includedir 2>/dev/null)
26 | LLVM_STDCXX = gnu++11
27 | 
28 | LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
29 | ifeq "$(LLVM_NEW_API)" "1"
30 |   $(info [+] detected llvm 10+, enabling c++14)
31 |   LLVM_STDCXX = c++14
32 | endif
33 | 
34 | CXX        = $(LLVM_BINDIR)/clang++
35 | 
36 | CXXFLAGS   += `$(LLVM_CONFIG) --cxxflags` -g -fPIC -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -Wno-unknown-warning-option -DHAVE_EXCEPTIONS=0 -march=$(ARCH)
37 | LDFLAGS    += `$(LLVM_CONFIG) --ldflags` -L$(LLVM_LIBDIR)
38 | 
39 | $(info [+] CXX = $(CXX))
40 | 
41 | PASSLIBNAME =$(PASSNAME).so
42 | 
43 | HEADERS += $(wildcard ../include/common/*.h) $(wildcard ../include/$(PASSNAME)/*.h)
44 | 
45 | INCLUDES += -I../include/common -I../include/$(PASSNAME)
46 | 
47 | all: $(PASSLIBNAME)
48 | 
49 | $(PASSLIBNAME): $(OBJS)
50 | 	$(QUIET) $(ECHO) "  [LINK] $@"
51 | 	$(CXX) -std=$(LLVM_STDCXX) $(CXXFLAGS) -shared -o $@ $(CPPS) $(OBJS) $(LDFLAGS) $(LIBS)
52 | 
53 | %.o: %.cpp $(HEADERS)
54 | 	$(QUIET) $(ECHO)  "  [C++] $<"
55 | 	$(CXX) -std=$(LLVM_STDCXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $<
56 | 
57 | install: $(INSTALL_DIR)/$(PASSLIBNAME)
58 | 
59 | $(INSTALL_DIR)/$(PASSLIBNAME): $(PASSLIBNAME)
60 | 	$(QUIET) $(ECHO) "  [INSTALL] $< -> $@"
61 | 	install -c -D -m 744 $? $@
62 | 
63 | clean:
64 | 	$(QUIET) $(ECHO) "  [RM] $(OBJS) $(PASSLIBNAME)"
65 | 	rm -f $(OBJS) $(PASSLIBNAME) $(INSTALL_DIR)/$(PASSLIBNAME)
66 | 


--------------------------------------------------------------------------------
/passes/Makefile.svf.inc:
--------------------------------------------------------------------------------
1 | SVF_HOME=$(ROOT)/SVF
2 | SVF_HEADER=$(SVF_HOME)/include
3 | SVF_LIB=$(SVF_HOME)/Debug-build/lib
4 | 
5 | LDFLAGS += $(SVF_LIB)/libSvf.a $(SVF_LIB)/CUDD/libCudd.a
6 | CFLAGS += -fno-rtti -Wno-overloaded-virtual -Wno-ignored-qualifiers -Wno-reorder
7 | INCLUDES += -I $(SVF_HEADER)
8 | 


--------------------------------------------------------------------------------
/passes/add-sanitize-attr/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the add-sanitize-attr pass
2 | ROOT=../..
3 | 
4 | PASSNAME := add-sanitize-attr
5 | OBJS := add-sanitize-attr.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/add-sanitize-attr/add-sanitize-attr.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <pass.h>
 3 | 
 4 | using namespace llvm;
 5 | 
 6 | #define DEBUG_TYPE "AddSanitizeAttr"
 7 | #define AddSanitizeAttrPassLog(M) LLVM_DEBUG(dbgs() << "AddSanitizeAttrPass: " << M << "\n")
 8 | #define oprint(s) outs() << s << "\n"
 9 | 
10 | namespace {
11 | 
12 |   class AddSanitizeAttrPass : public ModulePass {
13 | 
14 |   public:
15 |     static char ID;
16 |     AddSanitizeAttrPass() : ModulePass(ID) {}
17 | 
18 |     virtual bool runOnModule(Module &M) {
19 |         for (auto &F : M.getFunctionList()) {
20 |             if (F.isDeclaration())
21 |                 continue;
22 | 
23 |             // if(!F.hasFnAttribute(Attribute::NoSanitize))
24 |             F.addFnAttr(Attribute::SanitizeAddress);
25 |         }
26 | 
27 |       return true;
28 |     }
29 |   };
30 | 
31 | }
32 | 
33 | char AddSanitizeAttrPass::ID = 0;
34 | RegisterPass<AddSanitizeAttrPass> MP("add-sanitize-attr", "AddSanitizeAttr Pass");
35 | 
36 | 


--------------------------------------------------------------------------------
/passes/cgc-planner/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the cgc-planner pass
2 | ROOT=../..
3 | 
4 | PASSNAME := cgc-planner
5 | OBJS := cgc-planner.o
6 | 
7 | include ../Makefile.inc
8 | include ../Makefile.svf.inc
9 | 


--------------------------------------------------------------------------------
/passes/cgc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the cgc pass
2 | ROOT=../..
3 | 
4 | PASSNAME := cgc
5 | OBJS := cgc.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/cgc/cgc.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | #include <cgc_magics.h>
  4 | #include "llvm/Transforms/Utils/CallPromotionUtils.h"
  5 | #include "llvm/Transforms/Utils/Cloning.h"
  6 | #include <llvm/Transforms/Utils/BasicBlockUtils.h>
  7 | #include "llvm/Analysis/CFG.h"
  8 | #include "llvm/Analysis/CallGraph.h"
  9 | #include "llvm/Analysis/CallGraphSCCPass.h"
 10 | #include "llvm/ADT/SCCIterator.h"
 11 | #include "llvm/ADT/SmallVector.h"
 12 | #include "llvm/IR/CFG.h"
 13 | #include <algorithm>
 14 | #include <queue>
 15 | 
 16 | using namespace llvm;
 17 | 
 18 | #define DEBUG_TYPE "cgc"
 19 | #define cgcPassLog(M) LLVM_DEBUG(dbgs() << "CallgraphClonePass: " << M << "\n")
 20 | #define oprint(s) LLVM_DEBUG(dbgs() << s << "\n")
 21 | 
 22 | static cl::list<std::string>
 23 | HardenFunctions("cgc-harden-funcs",
 24 |     cl::desc("Specify all the comma-separated function regexes to harden against optimizer [default: main, LLVMFuzzerTestOneInput]"),
 25 |     cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden);
 26 | 
 27 | static cl::opt<std::string>
 28 | ClonePrefix("cgc-clone-prefix",
 29 |     cl::desc("Specify the clone name prefix"),
 30 |     cl::init("__cgc_"), cl::NotHidden);
 31 | 
 32 | static cl::opt<bool>
 33 | CGCFill("cgc-fill", 
 34 | cl::init(true), cl::NotHidden,
 35 | cl::desc("If true will clone all the other calls once the planned ones have been completed"));
 36 | 
 37 | // Fill 256Kb by default, an average size of L2 cache
 38 | static cl::opt<unsigned>
 39 | MaxSize("cgc-max-aflmap", 
 40 | cl::init(256*1024), cl::NotHidden,
 41 | cl::desc("The maximum acceptable size for the AFL++ edge map"));
 42 | 
 43 | namespace {
 44 |   // This pass clones function calls based on decisions taken by CGC Planner on which
 45 |   // subgraph portion of the callgraph should be cloned
 46 |   class CallgraphClonePass : public ModulePass {
 47 | 
 48 |     // Keep track of all the functions belonging to strongly connected components
 49 |     std::set<Function*> SCCFunctions;
 50 | 
 51 |     std::map<Function*, std::set<Function*>> FunctionToSCC;
 52 |     std::map<Function*, std::set<CallBase*>> FunctionToCallBases;
 53 |     std::map<Function*, unsigned long>       FunctionToAFLMapSize;
 54 | 
 55 |     // Keep track of cloned functions
 56 |     std::set<Function*> FunctionClones;
 57 | 
 58 |     // Return the priority of the CallBase, an higher priority means the CallBase
 59 |     // should be cloned earlier
 60 |     static long getPriority(CallBase *CB) {
 61 |         MDNode* N;
 62 |         assert(CB);
 63 |         N = CB->getMetadata(CGC_CLONE_PRIORITY);
 64 |         if (N == NULL) return 0;
 65 |         Constant *val = dyn_cast<ConstantAsMetadata>(N->getOperand(0))->getValue();
 66 |         assert(val);
 67 |         long prio = cast<ConstantInt>(val)->getSExtValue();
 68 |         return prio;
 69 |     }
 70 | 
 71 |     // Save the priority value for a function that has been cloned
 72 |     static void setFunctionPriority(Function *F, long prio) {
 73 |         LLVMContext& C = F->getContext();
 74 |         MDNode* N = MDNode::get(C, ConstantAsMetadata::get(ConstantInt::get(C, APInt(sizeof(unsigned long)*8, prio, true))));
 75 |         F->setMetadata(CGC_CLONE_PRIORITY, N);
 76 |     }
 77 | 
 78 |     // Return the priority of the Function that has been cloned with
 79 |     static long getFunctionPriority(Function *F) {
 80 |         MDNode* N;
 81 |         assert(F);
 82 |         N = F->getMetadata(CGC_CLONE_PRIORITY);
 83 |         if (N == NULL) return 0;
 84 |         Constant *val = dyn_cast<ConstantAsMetadata>(N->getOperand(0))->getValue();
 85 |         if(!val) return 0;
 86 |         long prio = cast<ConstantInt>(val)->getSExtValue();
 87 |         return prio;
 88 |     }
 89 | 
 90 |     // Compare the priority of two CallBases, an higher priority means the CallBase
 91 |     // should be cloned earlier
 92 |     struct ComparePriority {
 93 |         bool operator()(CallBase *c1, CallBase *c2) {
 94 |             long prio1 = getPriority(c1);
 95 |             long prio2 = getPriority(c2);
 96 |             return prio1 < prio2;
 97 |         }
 98 |     };
 99 | 
100 |     // A priority queue for the CallBases, ordered by priority
101 |     using CallBaseQueue = std::priority_queue<CallBase*, std::vector<CallBase*>, ComparePriority>;
102 | 
103 |   public:
104 |     static char ID;
105 |     unsigned long unique_id = 0;
106 |     unsigned long nclones = 0;
107 |     unsigned long aflmap_size = 0;
108 |     CallgraphClonePass() : ModulePass(ID) {}
109 | 
110 |     unsigned long getUniqueID() {
111 |         return ++unique_id;
112 |     }
113 | 
114 |     // Taken from: https://github.com/AFLplusplus
115 |     // True if block has successors and it dominates all of them.
116 |     bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
117 |         if (succ_begin(BB) == succ_end(BB)) return false;
118 |         for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
119 |             // if the edge is critical it will be splitted
120 |             if (isCriticalEdge(BB->getTerminator(), SUCC)) continue;
121 |             if (!DT->dominates(BB, SUCC)) return false;
122 |         }
123 |         return true;
124 |     }
125 | 
126 |     // Taken from: https://github.com/AFLplusplus
127 |     // True if block has predecessors and it postdominates all of them.
128 |     bool isFullPostDominator(const BasicBlock *       BB,
129 |                                     const PostDominatorTree *PDT) {
130 |         if (pred_begin(BB) == pred_end(BB)) return false;
131 |         for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
132 |             // if the edge is critical it will be splitted
133 |             if (isCriticalEdge(PRED->getTerminator(), BB)) continue;
134 |             if (!PDT->dominates(BB, PRED)) return false;
135 |         }
136 |         return true;
137 |     }
138 | 
139 |     // Given a function, try to estimate the number of edges in the function that
140 |     // will be instrumented by AFLplusplus.
141 |     // It instruments edges by breaking all critial edges with a block in the middle
142 |     // and avoiding instrumenting blocks which are full dominators, or full 
143 |     // post-dominators with multiple predecessors.
144 |     unsigned long estimateAFLEdges(Function *F) {
145 |         DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
146 |         PostDominatorTree *PDT = &getAnalysis<PostDominatorTreeWrapperPass>(*F).getPostDomTree();
147 |         unsigned edges = 0;
148 |         for (BasicBlock &BB: *F) {
149 |             // Do not instrument full dominators, or full post-dominators with multiple
150 |             // predecessors.
151 |             bool shouldInstrumentBlock = (&F->getEntryBlock() == &BB) || (!isFullDominator(&BB, DT) && 
152 |                                             !(isFullPostDominator(&BB, PDT) 
153 |                                             && !BB.getSinglePredecessor()));
154 |             if (shouldInstrumentBlock) ++edges;
155 | 
156 |             Instruction *TI = BB.getTerminator();
157 |             if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
158 |                 for (unsigned succ = 0, end = TI->getNumSuccessors(); succ != end; ++succ) {
159 |                     if (isCriticalEdge(TI, succ))
160 |                         ++edges;
161 |                 }
162 |         }
163 |         return edges;
164 |     }
165 | 
166 |     // Return true if `F` has been marked as a root from which to start cloning 
167 |     // by CGC Planner.
168 |     bool isCGCRoot(Function &F) {
169 |         MDNode* N;
170 |         N = F.getMetadata(CGC_ROOT_ATTR);
171 |         if (N == NULL) return false;
172 |         return true;
173 |     }
174 | 
175 |     // Return true if `CB` has been planned to be cloned by CGC Planner
176 |     bool isPlannedClone(CallBase &CB) {
177 |         MDNode* N;
178 |         N = CB.getMetadata(CGC_CLONE_CALL_ATTR);
179 |         if (N == NULL) return false;
180 |         return true;
181 |     }
182 | 
183 |     // Return true if `F` has an available_externally linkage (i.e. equivalent to a declaration)
184 |     bool isAvailableExternally(Function &F) {
185 |         GlobalValue::LinkageTypes L = F.getLinkage();
186 |         return GlobalValue::isAvailableExternallyLinkage(L);
187 |     }
188 | 
189 |     // Substitute all the trailings .x.y.z that llvm creates when having two functions
190 |     // with the same name, with some uniqueIDs to avoid long names
191 |     std::string compressName(std::string name) {
192 |         // find the last .num
193 |         std::string newName = name;
194 |         std::string::size_type idx = newName.rfind('.');
195 |         if (idx == std::string::npos || idx == newName.length()) {
196 |             return newName;
197 |         }
198 |         // ensure it is actually a number
199 |         int random = atoi(newName.substr(idx+1).c_str());
200 | 
201 |         while (random) {
202 |             newName = newName.substr(0, idx);
203 |             idx = newName.rfind('.');
204 |             if (idx == std::string::npos || idx == newName.length()) {
205 |                 return newName + "." + std::to_string(getUniqueID());
206 |             }
207 |             random = atoi(newName.substr(idx+1).c_str());
208 |         }
209 |         return newName + "." + std::to_string(getUniqueID());
210 |     }
211 | 
212 |     void setCloneName(Function *F) {
213 |         // if the function name already contains the prefix do not add it
214 |         if (F->getName().find(ClonePrefix) == std::string::npos)
215 |             F->setName(ClonePrefix + F->getName());
216 |         // Compress the clone name to avoid .1452.3394.9208.13831.27566...
217 |         // at the end
218 |         F->setName(compressName(F->getName().str()));
219 |     }
220 | 
221 |     // Replace all the dots in the name that llvm may insert with underscores
222 |     void normalizeName(Function *F) {
223 |         std::string newName = F->getName().str();
224 |         std::replace(newName.begin(), newName.end(), '.', '_');
225 |         F->setName(newName);
226 |     }
227 | 
228 |     // Mark the function so that it can be recognized as a clone
229 |     void markClone(Function *F) {
230 |         LLVMContext& C = F->getContext();
231 |         MDNode* N = MDNode::get(C, ConstantAsMetadata::get(ConstantInt::get(C, APInt(sizeof(unsigned long)*8, 1, true))));
232 |         F->setMetadata(CGC_CLONE_MARK, N);
233 | 
234 |         // NOTICE: A bit risky to change all names
235 |         std::string FName = F->getName().str();
236 |         F->setName(CGC_CLONE_MARK + std::to_string(getFunctionPriority(F)) + "_" + FName);
237 |     }
238 | 
239 |     // Visit a Constant AST to find and replace oldV with newV, returning a new constant
240 |     Constant *replaceConstant(Constant *C, Constant *newV, Constant *oldV) {
241 |         if (ConstantStruct *S = dyn_cast<ConstantStruct>(C)) {
242 |             SmallVector<Constant*, 8> Ops;
243 |             for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) {
244 |                 Constant *op = S->getOperand(i);
245 |                 if (op == oldV)
246 |                     Ops.push_back(newV);
247 |                 else
248 |                     Ops.push_back(replaceConstant(op, newV, oldV));
249 |             }
250 | 
251 |             Constant* res = ConstantStruct::getAnon(Ops, true);
252 |             return res;
253 |         
254 |         } else if (ConstantExpr *E = dyn_cast<ConstantExpr>(C)) {
255 |             SmallVector<Constant*, 8> Ops;
256 |             for (unsigned i = 0, e = E->getNumOperands(); i != e; ++i) {
257 |                 Constant *op = E->getOperand(i);
258 |                 if (op == oldV)
259 |                     Ops.push_back(newV);
260 |                 else
261 |                     Ops.push_back(replaceConstant(op, newV, oldV));
262 |             }
263 | 
264 |             Constant *res = E->getWithOperands(Ops);
265 |             return res;
266 |             
267 |         } else {
268 |             return C;
269 |         }
270 |     }
271 | 
272 |     // Fix the prologue of newF, by substituting the occurencies of oldF.
273 |     // This allows us to clone functions without corrupting the prologue, that is
274 |     // left untouched by cloneFunction. -fsanitize=function uses prologues
275 |     void fixPrologue(Function *newF, Function *oldF) {
276 |         if (!newF->hasPrologueData()) return;
277 | 
278 |         Constant *prologue = replaceConstant(newF->getPrologueData(), newF, oldF);
279 |         newF->setPrologueData(prologue);
280 |     }
281 | 
282 |     // `dest` is a clone of `source`, with the instructions mapped 1to1 in the `VMap`.
283 |     // Update the `FunctionToCallBases` struct to keep track of the CallBases in
284 |     // `dest` that represent the clone CallBases of `source`.
285 |     // Update the `FunctionToAFLMapSize` to keep track of the estimation for the
286 |     // new clone.
287 |     void updateMetadata(Function *dest, Function *source, ValueToValueMapTy &VMap) {
288 |         assert(FunctionToCallBases.find(source) != FunctionToCallBases.end());
289 |         FunctionToCallBases[dest];
290 |         for (CallBase *CB: FunctionToCallBases[source]) {
291 |             CallBase *mappedCB = dyn_cast<CallBase>(VMap[CB]);
292 |             assert(mappedCB);
293 |             FunctionToCallBases[dest].insert(mappedCB);
294 |         }
295 | 
296 |         assert(FunctionToAFLMapSize.find(source) != FunctionToAFLMapSize.end());
297 |         FunctionToAFLMapSize[dest] = FunctionToAFLMapSize[source];
298 |     }
299 | 
300 |     // Gather all the calls to `F`, starting from `I` and visiting recursively all
301 |     // the users of `I`, to collect all the eventual calls to `F` originated by `I`
302 |     // e.g. call bitcast F, with I being the bitcast
303 |     void gatherEventualCallsTo(Function *F, Value *V, std::set<Instruction*> &callsToF) {
304 |         // If it is a call, just check if `F` is called
305 |         if (CallBase * CB = dyn_cast<CallBase>(V)) {
306 |             // check that the function is called and not passed as param
307 |             if (CB->getCalledOperand()->stripPointerCasts() == F) {
308 |                 callsToF.insert(CB);
309 |             }
310 |         // If it is a bitcast, visit all the users recursively
311 |         } else if (BitCastOperator * BO = dyn_cast<BitCastOperator>(V)) {
312 |             for (User* user: BO->users()) {
313 |                 gatherEventualCallsTo(F, user, callsToF);
314 |             }
315 |         }
316 |     }
317 | 
318 |     // Return true if `F` has multiple call sites so it makes sense to clone it
319 |     bool shouldCloneFunction(Function *F) {
320 |         // Do not clone LLVMFuzzerTestOneInput itself
321 |         if (F->getName().equals("LLVMFuzzerTestOneInput")) return false;
322 | 
323 |         unsigned int numCallsToF = 0;
324 |         std::set<Instruction*> callsToF;
325 |         // Gather all the calls to the function `F`
326 |         for (User* user: F->users()) {
327 |             gatherEventualCallsTo(F, user, callsToF);
328 | 
329 |             // No need to visit all the users, bailout if already true
330 |             if (callsToF.size() > 1) return true;
331 |         }
332 | 
333 |         numCallsToF = callsToF.size();
334 | 
335 |         // oprint(F->getName().str() << " - " << numCallsToF);
336 |         // We should clone the function only if it is called more than once
337 |         return numCallsToF > 1;
338 |     }
339 | 
340 |     // Return true if cloning `F` would not exceed the size limit.
341 |     bool allowedToClone(Function *F) {
342 |         unsigned long additional_edges = 0;
343 |         // If `F` is in a SCC we will clone the whole SCC while cloning `F`
344 |         if (isInSCC(F)) {
345 |             assert(FunctionToSCC.find(F) != FunctionToSCC.end());
346 |             std::set<Function*> SCC = FunctionToSCC[F];
347 |             for (Function *F: SCC) {
348 |                 assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end());
349 |                 additional_edges += FunctionToAFLMapSize[F];
350 |             }
351 |         // Otherwise just count `F`
352 |         } else {
353 |             assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end());
354 |             additional_edges += FunctionToAFLMapSize[F];
355 |         }
356 |         // More readable mf
357 |         if (aflmap_size + additional_edges > MaxSize) return false;
358 |         else return true;
359 |     }
360 | 
361 |     // Return true if the `SCC` has multiple call sites so it makes sense to clone it
362 |     bool shouldCloneSCC(std::set<Function*> &SCC) {
363 |         unsigned int numCallsToSCC = 0;
364 |         std::set<Instruction*> callsToSCC;
365 |         // Gather all the calls to each function in the `SCC`
366 |         for (Function *F: SCC) {
367 |             for (User* user: F->users()) {
368 |                 gatherEventualCallsTo(F, user, callsToSCC);
369 |             }
370 |         }
371 | 
372 |         // Count only the calls from outside the `SCC`
373 |         for (Instruction *call: callsToSCC) {
374 |             Function* callerF = call->getParent()->getParent();
375 |             if (SCC.find(callerF) == SCC.end())
376 |                 ++numCallsToSCC;
377 |         }
378 | 
379 |         // for (Function *F: SCC)
380 |         //     oprint(F->getName().str() << " - " << numCallsToSCC);
381 |         // We should clone the function only if it is called more than once
382 |         return numCallsToSCC > 1;
383 |     }
384 | 
385 |     // Add all the callbases in the function to the priority queue
386 |     void updateCallBaseQueue(CallBaseQueue &cgcCallBaseQueue, Function *F) {
387 |         for (CallBase *CB: FunctionToCallBases[F]) {
388 |             cgcCallBaseQueue.push(CB);
389 |         }
390 |     }
391 | 
392 |     // Update the metadata on SCC clones
393 |     void updateSCCMetadata(Function *SCCclone, std::set<Function*> &SCCClones) {
394 |         assert(FunctionToSCC.find(SCCclone) == FunctionToSCC.end());
395 |         FunctionToSCC[SCCclone] = SCCClones;
396 |         assert(SCCFunctions.find(SCCclone) == SCCFunctions.end());
397 |         SCCFunctions.insert(SCCclone);
398 |     }
399 | 
400 |     // Visit the Strongly Connected Component where `F` belongs, to clone it as 
401 |     // a single node. Update `cgcCallBaseQueue` accordingly to continue the visit.
402 |     Function* addSCCClone(CallBaseQueue &cgcCallBaseQueue, Function* F, long prio) {
403 |         std::map<Function*, Function*> FtoClones;
404 |         std::set<Function*> SCCClones;
405 | 
406 |         assert(FunctionToSCC.find(F) != FunctionToSCC.end());
407 |         std::set<Function*> SCC = FunctionToSCC[F];
408 | 
409 |         // Clone all the functions in the SCC
410 |         bool should_clone = shouldCloneSCC(SCC);
411 |         for (Function *SCCfunc: SCC) {
412 |             // Clone original function if required
413 |             if (should_clone) {
414 |                 ValueToValueMapTy VMap;
415 |                 Function *clone = CloneFunction(SCCfunc, VMap);
416 |                 assert(clone);
417 |                 updateMetadata(clone, SCCfunc, VMap);
418 |                 trackClone(clone, cgcCallBaseQueue);
419 |                 setCloneName(clone);
420 |                 fixPrologue(clone, SCCfunc);
421 |                 FtoClones[SCCfunc] = clone;
422 |                 SCCClones.insert(clone);
423 |                 // Add the priority to the clone to keep track of it
424 |                 setFunctionPriority(clone, prio);
425 |             } else {
426 |                 // Set the original function as a clone without updating the number of clones
427 |                 trackClone(SCCfunc, cgcCallBaseQueue, /*update=*/false);
428 |                 FtoClones[SCCfunc] = SCCfunc;
429 |                 SCCClones.insert(SCCfunc);
430 |                 // Add the priority to the clone to keep track of it
431 |                 setFunctionPriority(SCCfunc, prio);
432 |             }
433 |         }
434 | 
435 |         // update metadata for SCC
436 |         for (Function *SCCclone: SCCClones) {
437 |             if (FunctionToSCC.find(SCCclone) == FunctionToSCC.end())
438 |                 updateSCCMetadata(SCCclone, SCCClones);
439 |         }
440 | 
441 |         // Now rewire the functions in the SCC clones
442 |         for (Function *SCCclone: SCCClones) {
443 |             assert(FunctionToCallBases.find(SCCclone) != FunctionToCallBases.end());
444 |             for (CallBase *CB: FunctionToCallBases[SCCclone]) {
445 | 
446 |                 // For direct calls, simply redirect target to new clone
447 |                 Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
448 |                 if (C) {
449 |                     Function *clone;
450 |                     // If the called function is in the SCC use the clone we generated
451 |                     if (FtoClones.find(C) != FtoClones.end()) {
452 |                         clone = FtoClones[C];
453 |                         if (clone->getFunctionType() != CB->getCalledOperand()->getType()->getPointerElementType())
454 |                             CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB));
455 |                         else
456 |                             CB->setCalledFunction(clone);
457 |                     // Otherwise plan a clone
458 |                     } else {
459 |                         // clone only if is a planned clone, otherwise leave as is
460 |                         // NB: this assumes that all calls to `C` from `SCC`
461 |                         // have been planned equally to be cloned or not, otherwise
462 |                         // calls to `C` will not be consistent inside `SCC`
463 |                         if (isPlannedClone(*CB) == false) continue;
464 |                         cgcCallBaseQueue.push(CB);
465 |                     }
466 |                 }
467 |             }
468 |         }
469 |         return FtoClones[F];
470 |     }
471 | 
472 |     // Clone the function `F`, and update the `cgcCallBaseQueue` to continue
473 |     // the visit
474 |     Function* addFunctionClone(CallBaseQueue &cgcCallBaseQueue, Function *F, long prio) {
475 | 
476 |         // The assertion is valid only if we visit the graph in BFS mode, i.e.
477 |         // starting from a single root, in the general case we may revisit a function
478 |         // that has been cloned, that now has two callers since his parent is cloned
479 |         // assert(!isClone(F));
480 | 
481 |         // bail out if blacklisted
482 |         if (isBlacklisted(F))
483 |             return F;
484 | 
485 |         // bail out if cloning `F` would exceed the max size
486 |         if (!allowedToClone(F))
487 |             return F;
488 | 
489 |         if (isInSCC(F))
490 |             return addSCCClone(cgcCallBaseQueue, F, prio);
491 | 
492 |         // Clone original function if required
493 |         if (shouldCloneFunction(F)) {
494 |             ValueToValueMapTy VMap;
495 |             Function *clone = CloneFunction(F, VMap);
496 |             assert(clone);
497 |             updateMetadata(clone, F, VMap);
498 |             trackClone(clone, cgcCallBaseQueue);
499 | 
500 |             setCloneName(clone);
501 |             fixPrologue(clone, F);
502 | 
503 |             // Add the target to the functions to process.
504 |             updateCallBaseQueue(cgcCallBaseQueue, clone);
505 | 
506 |             // Add the priority to the clone to keep track of it
507 |             setFunctionPriority(clone, prio);
508 | 
509 |             return clone;
510 |         } else {
511 |             // Set the original function as a clone without updating the number of clones
512 |             trackClone(F, cgcCallBaseQueue, /*update=*/false);
513 |             // Add the target to the functions to process.
514 |             updateCallBaseQueue(cgcCallBaseQueue, F);
515 | 
516 |             // Add the priority to the clone to keep track of it
517 |             setFunctionPriority(F, prio);
518 | 
519 |             return F;
520 |         }
521 |     }
522 | 
523 |     // Visit the call base `CB` to clone its target
524 |     void cgc(CallBase *CB, CallBaseQueue &cgcCallBaseQueue) {
525 |         Function *F = CB->getFunction();
526 | 
527 |         // bail out if blacklisted
528 |         if (isBlacklisted(F))
529 |             return;
530 | 
531 |         // For direct calls, simply redirect target to new clone
532 |         Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
533 |         // this should not be an edge between two functions in the same SCC
534 |         assert(!isInSCC(F) || !isInSCC(C) || (FunctionToSCC[F].find(C) == FunctionToSCC[F].end() 
535 |             && FunctionToSCC[C].find(F) == FunctionToSCC[C].end()));
536 |         if (C) {
537 |             // clone only if is a planned clone, otherwise leave as is
538 |             if (isPlannedClone(*CB) == false) return;
539 |             long prio = getPriority(CB);
540 |             Function *clone = addFunctionClone(cgcCallBaseQueue, C, prio);
541 |             if (clone->getFunctionType() != CB->getCalledOperand()->getType()->getPointerElementType())
542 |                 CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB));
543 |             else
544 |                 CB->setCalledFunction(clone);
545 |         }
546 |     }
547 | 
548 |     // Check if `F` just calls himself
549 |     bool isSimplyRecursive(Function *F) {
550 |         for (auto &BB : *F)
551 |         for (auto &I : BB.instructionsWithoutDebug())
552 |             if (auto *CB = dyn_cast<CallBase>(&I)) {
553 |                 Function *Callee = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
554 |                 
555 |                 // Function calls itself
556 |                 if (Callee == F) {
557 |                     return true;
558 |                 }
559 |             }
560 |         return false;
561 |     }
562 | 
563 |     // Visit the `SCC` to gather the informations needed in `FunctionToSCC` and
564 |     // `SCCFunctions`
565 |     void collectSCC(CallGraphSCC &SCC) {
566 |         std::set<Function *> Functions;
567 |         for (CallGraphNode *I : SCC) {
568 |             Functions.insert(I->getFunction());
569 |         }
570 | 
571 |         // If the SCC contains multiple nodes we know there is recursion.
572 |         if (Functions.size() != 1) {
573 |             for (Function *F : Functions) {
574 |                 SCCFunctions.insert(F);
575 |                 assert(!F->doesNotRecurse());
576 | 
577 |                 // A function should belong to a single SCC
578 |                 assert(FunctionToSCC.find(F) == FunctionToSCC.end());
579 |                 FunctionToSCC[F] = Functions;
580 |             }
581 |         // Take into account simple recursive functions
582 |         } else {
583 |             Function *F = *Functions.begin();
584 |             if (F && isSimplyRecursive(F)) {
585 |                 SCCFunctions.insert(F);
586 |                 assert(!F->doesNotRecurse());
587 | 
588 |                 assert(FunctionToSCC.find(F) == FunctionToSCC.end());
589 |                 FunctionToSCC[F] = Functions;
590 |             }
591 |         }
592 |     }
593 | 
594 |     // Return true if `F` is blacklisted
595 |     bool isBlacklisted(Function *F) {
596 |         MDNode* N;
597 |         N = F->getMetadata(CGC_CLONE_NEVER);
598 |         if (N == NULL) return false;
599 |         return true;
600 |     }
601 | 
602 |     // Return true if `F` is part of a SCC
603 |     bool isInSCC(Function *F) {
604 |         return SCCFunctions.find(F) != SCCFunctions.end();
605 |     }
606 |     
607 |     // Return true if `F` is a clone of a function
608 |     bool isClone(Function *F) {
609 |         return FunctionClones.find(F) != FunctionClones.end();
610 |     }
611 | 
612 |     // Add `F` to the function clones we keep track of, and update stats
613 |     void trackClone(Function *F, CallBaseQueue& cgcCallBaseQueue, bool update=true) {
614 |         FunctionClones.insert(F);
615 |         if (update) {
616 |             ++nclones;
617 |             aflmap_size += FunctionToAFLMapSize[F];
618 |         }
619 |         LLVM_DEBUG(dbgs() << "\r"  << nclones << " - " << aflmap_size << "              ");
620 |     }
621 | 
622 |     // Sometimes LLVM build the CallGraph withouth taking into considerations calls
623 |     // that pass through a `bitcast` operation. We fix this here, revisiting the
624 |     // functions and updating the CallGraph
625 |     void fixCallGraph(Module &M, CallGraph *CG) {
626 |         for (auto &F : M.getFunctionList()) {
627 |             if (F.isDeclaration() || isAvailableExternally(F))
628 |                 continue;
629 |             for(auto &BB: F) {
630 |                 for (auto &I : BB) {
631 |                     if (CallBase * CB = dyn_cast<CallBase>(&I)) {
632 |                         if (CB->isInlineAsm()) continue;
633 | 
634 |                         Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
635 |                         if (!Called || Called->isDeclaration()  || isAvailableExternally(*Called)|| Called->isIntrinsic()) continue;
636 | 
637 |                         // If `Called` actually points to a function, but getCalledFunction
638 |                         // returns null then we have spotted a missing function
639 |                         if (CB->getCalledFunction() == nullptr) {
640 |                             CallGraphNode *Node = CG->getOrInsertFunction(&F);
641 |                             Node->addCalledFunction(CB, CG->getOrInsertFunction(Called));
642 |                         }
643 |                     }
644 |                 }
645 |             }
646 |         }
647 |     }
648 | 
649 |     // Initialize the `FunctionToCallBases` structure with all the existing CallBases in `F`
650 |     void gatherCallBases(Function *F) {
651 |         // Initialize the set in case no call is present in the function
652 |         FunctionToCallBases[F];
653 |         for (BasicBlock &BB: *F) {
654 |             for (Instruction &I : BB) {
655 |                 // Gather all call bases
656 |                 if (CallBase * CB = dyn_cast<CallBase>(&I)) {
657 | 
658 |                     // Only if they represent direct calls to functions
659 |                     if (CB->isInlineAsm()) continue;
660 |                     Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
661 |                     if (!Called || Called->isDeclaration() || isAvailableExternally(*Called) || Called->isIntrinsic()) continue;
662 | 
663 |                     // Insert into the map
664 |                     FunctionToCallBases[F].insert(CB);
665 |                 }
666 |             }
667 |         }
668 |     }
669 | 
670 |     // The optimizer may decide to inline functions and simplify them. Or directly simplify
671 |     // static/internal ones. Try to persuade it to avoid simplifying functions we want as is,
672 |     // by setting all the functions `F` calls to not static and not inlinable.
673 |     void hardenFunction(Function *F) {
674 |         for (BasicBlock &BB: *F) {
675 |             for (Instruction &I : BB) {
676 |                 // Gather all call bases
677 |                 if (CallBase * CB = dyn_cast<CallBase>(&I)) {
678 | 
679 |                     // Only if they represent direct calls to functions
680 |                     if (CB->isInlineAsm()) continue;
681 |                     Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
682 |                     if (!Called || Called->isDeclaration() || isAvailableExternally(*Called) || Called->isIntrinsic()) continue;
683 |                     
684 |                     // Harden from inlining
685 |                     if (Called->hasFnAttribute(Attribute::InlineHint))
686 |                         Called->removeFnAttr(Attribute::InlineHint);
687 |                     if (Called->hasFnAttribute(Attribute::AlwaysInline))
688 |                         Called->removeFnAttr(Attribute::AlwaysInline);
689 |                     Called->addFnAttr(Attribute::NoInline);
690 | 
691 |                     // Harden from static/internal-driven simplifications
692 |                     GlobalValue *GVF = dyn_cast<GlobalValue>(Called);
693 |                     GVF->setVisibility(GlobalValue::DefaultVisibility);
694 |                     GVF->setLinkage(GlobalValue::ExternalLinkage);
695 |                 }
696 |             }
697 |         }
698 |     }
699 | 
700 |     virtual bool runOnModule(Module &M) override {
701 |         cgcPassLog("Running...");
702 | 
703 |         // Initialize regular expressions for functions to harden against optimizer
704 |         std::vector<Regex*> HardenFunctionRegexes;
705 |         if (HardenFunctions.empty()) {
706 |             HardenFunctions.push_back("main");
707 |             HardenFunctions.push_back("LLVMFuzzerTestOneInput");
708 |         }
709 |         passListRegexInit(HardenFunctionRegexes, HardenFunctions);
710 | 
711 |         CallGraph *CG = &getAnalysis<CallGraphWrapperPass>().getCallGraph();
712 | 
713 |         // LLVM does not consider edges like `call (bitcast (func))` so insert them.
714 |         // really llvm??
715 |         fixCallGraph(M, CG);
716 | 
717 |         // Walk the callgraph in bottom-up SCC order.
718 |         scc_iterator<CallGraph*> CGI = scc_begin(CG);
719 |         
720 |         CallGraphSCC CurSCC(*CG, &CGI);
721 |         while (!CGI.isAtEnd()) {
722 |             // Copy the current SCC and increment past it so that the pass can hack
723 |             // on the SCC if it wants to without invalidating our iterator.
724 |             const std::vector<CallGraphNode *> &NodeVec = *CGI;
725 |             CurSCC.initialize(NodeVec);
726 |             ++CGI;
727 |         
728 |             collectSCC(CurSCC);
729 |         }
730 | 
731 |         // Collect all functions in the module and add root function clones.
732 |         CallBaseQueue cgcCallBaseQueue;
733 |         std::set<Function*> HardenFunctionsSet;
734 |         std::list<Function*> skippedFuncs;
735 |         for (auto &F : M.getFunctionList()) {
736 |             if (F.isDeclaration() || isAvailableExternally(F))
737 |                 continue;
738 | 
739 |             // gather all the call bases in the function
740 |             gatherCallBases(&F);
741 | 
742 |             // gather the estimation for the AFL map size
743 |             FunctionToAFLMapSize[&F] = estimateAFLEdges(&F);
744 |             // update the current size
745 |             aflmap_size += FunctionToAFLMapSize[&F];
746 | 
747 |             const std::string &FName = F.getName().str();
748 |             if (passListRegexMatch(HardenFunctionRegexes, FName)) {
749 |                 HardenFunctionsSet.insert(&F);
750 |             }
751 |             if (!isCGCRoot(F)) {
752 |                 // keep track of the functions skipped
753 |                 if (!isInSCC(&F)) skippedFuncs.push_back(&F);
754 |                 // BUG: here if the scc is a root scc, you will never clone the callsited of the root SCC that go outside the SCC
755 |                 continue;
756 |             }
757 |             assert(!isInSCC(&F) && 
758 |                 "Cannot set a function belonging to an SCC as a root function to be cloned");
759 |             updateCallBaseQueue(cgcCallBaseQueue, &F);
760 |         }
761 | 
762 |         // Harden each function against the optimizer
763 |         for (Function *F: HardenFunctionsSet)
764 |             hardenFunction(F);
765 |         
766 |         // if the map size is already at the max, just return
767 |         if (aflmap_size >= MaxSize) return true;
768 | 
769 |         // Start from root function clones and iteratively clone the callgraph.
770 |         while (!cgcCallBaseQueue.empty()) {
771 |             CallBase *CB = cgcCallBaseQueue.top();
772 |             cgcCallBaseQueue.pop();
773 |             cgc(CB, cgcCallBaseQueue);
774 |             // `cgc` should never clone past the limit
775 |             assert (aflmap_size <= MaxSize);
776 |         }
777 |         
778 |         // now clone all the other calls if still have budget
779 |         if (CGCFill && aflmap_size < MaxSize) {
780 |             oprint("Finished planned clones, still continuing to clone");
781 |             for (Function* F: skippedFuncs) {
782 |                 updateCallBaseQueue(cgcCallBaseQueue, F);
783 |             }
784 |             // restart the visit to clone all the remaining calls
785 |             while (!cgcCallBaseQueue.empty()) {
786 |                 CallBase *CB = cgcCallBaseQueue.top();
787 |                 cgcCallBaseQueue.pop();
788 |                 cgc(CB, cgcCallBaseQueue);
789 |                 // `cgc` should never clone past the limit
790 |                 assert (aflmap_size <= MaxSize);
791 |             }
792 |         }
793 | 
794 |         // normalize names and mark all the clones
795 |         for (Function *F: FunctionClones) {
796 |             if (F->isDeclaration() || isAvailableExternally(*F))
797 |                 continue;
798 |             normalizeName(F);
799 |             markClone(F);
800 |         }
801 |         oprint("\nTotal Clones: " << nclones);
802 |         return true;
803 |     }
804 | 
805 |     void getAnalysisUsage(AnalysisUsage &AU) const override {
806 |         AU.addRequired<CallGraphWrapperPass>();
807 |         AU.addRequired<DominatorTreeWrapperPass>();
808 |         AU.addRequired<PostDominatorTreeWrapperPass>();
809 |     }
810 |   };
811 | 
812 | }
813 | 
814 | char CallgraphClonePass::ID = 0;
815 | RegisterPass<CallgraphClonePass> MP("cgc", "CallgraphClone Pass");
816 | 


--------------------------------------------------------------------------------
/passes/cgc/cgc_old.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | #include <cgc_magics.h>
  4 | #include "llvm/Transforms/Utils/CallPromotionUtils.h"
  5 | #include "llvm/Transforms/Utils/Cloning.h"
  6 | #include <llvm/Transforms/Utils/BasicBlockUtils.h>
  7 | #include "llvm/Analysis/CFG.h"
  8 | #include "llvm/Analysis/CallGraph.h"
  9 | #include "llvm/Analysis/CallGraphSCCPass.h"
 10 | #include "llvm/ADT/SCCIterator.h"
 11 | #include "llvm/ADT/SmallVector.h"
 12 | #include "llvm/IR/CFG.h"
 13 | #include <algorithm>
 14 | 
 15 | using namespace llvm;
 16 | 
 17 | #define DEBUG_TYPE "cgc"
 18 | #define cgcPassLog(M) LLVM_DEBUG(dbgs() << "CallgraphClonePass: " << M << "\n")
 19 | #define oprint(s) LLVM_DEBUG(dbgs() << s << "\n")
 20 | 
 21 | static cl::list<std::string>
 22 | HardenFunctions("cgc-harden-funcs",
 23 |     cl::desc("Specify all the comma-separated function regexes to harden against optimizer [default: main, LLVMFuzzerTestOneInput]"),
 24 |     cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden);
 25 | 
 26 | static cl::opt<std::string>
 27 | ClonePrefix("cgc-clone-prefix",
 28 |     cl::desc("Specify the clone name prefix"),
 29 |     cl::init("__cgc_"), cl::NotHidden);
 30 | 
 31 | // Fill 256Kb by default, an average size of L2 cache
 32 | static cl::opt<unsigned>
 33 | MaxSize("cgc-max-aflmap", 
 34 | cl::init(256*1024), cl::NotHidden,
 35 | cl::desc("The maximum acceptable size for the AFL++ edge map"));
 36 | 
 37 | static cl::opt<unsigned>
 38 | CallsThreshold("cgc-calls-treshold", 
 39 | cl::init(0), cl::NotHidden,
 40 | cl::desc("The threshold of incoming calls for which a function is considered an error function and not cloned\n\t[default: 0 -> set to treshold_factor*initial_number_of_funcs]"));
 41 | 
 42 | static cl::opt<float>
 43 | CallsThresholdFactor("cgc-calls-treshold-factor", 
 44 | cl::init(0.25), cl::NotHidden,
 45 | cl::desc("The threshold factor on which cgc-calls-treshold is computed if initialized to 0"));
 46 | 
 47 | namespace {
 48 |   // This pass clones function calls based on decisions taken by CGC Planner on which
 49 |   // subgraph portion of the callgraph should be cloned
 50 |   class CallgraphClonePass : public ModulePass {
 51 | 
 52 |     // Keep track of all the functions belonging to strongly connected components
 53 |     std::set<Function*> SCCFunctions;
 54 | 
 55 |     std::map<Function*, std::set<Function*>> FunctionToSCC;
 56 |     std::map<Function*, std::set<CallBase*>> FunctionToCallBases;
 57 |     std::map<Function*, unsigned long>       FunctionToAFLMapSize;
 58 | 
 59 |     // Keep track of cloned functions
 60 |     std::set<Function*> FunctionClones;
 61 | 
 62 |     // All the functions that should not be cloned
 63 |     std::set<Function*> FunctionBlacklist;
 64 | 
 65 |     // The number of times a function is originally called
 66 |     std::map<Function*, unsigned long> CallsToFunction;
 67 | 
 68 |   public:
 69 |     static char ID;
 70 |     unsigned long unique_id = 0;
 71 |     unsigned long nclones = 0;
 72 |     unsigned long aflmap_size = 0;
 73 |     CallgraphClonePass() : ModulePass(ID) {}
 74 | 
 75 |     unsigned long getUniqueID() {
 76 |         return ++unique_id;
 77 |     }
 78 | 
 79 |     // Taken from: https://github.com/AFLplusplus
 80 |     // True if block has successors and it dominates all of them.
 81 |     bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
 82 |         if (succ_begin(BB) == succ_end(BB)) return false;
 83 |         for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
 84 |             // if the edge is critical it will be splitted
 85 |             if (isCriticalEdge(BB->getTerminator(), SUCC)) continue;
 86 |             if (!DT->dominates(BB, SUCC)) return false;
 87 |         }
 88 |         return true;
 89 |     }
 90 | 
 91 |     // Taken from: https://github.com/AFLplusplus
 92 |     // True if block has predecessors and it postdominates all of them.
 93 |     bool isFullPostDominator(const BasicBlock *       BB,
 94 |                                     const PostDominatorTree *PDT) {
 95 |         if (pred_begin(BB) == pred_end(BB)) return false;
 96 |         for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
 97 |             // if the edge is critical it will be splitted
 98 |             if (isCriticalEdge(PRED->getTerminator(), BB)) continue;
 99 |             if (!PDT->dominates(BB, PRED)) return false;
100 |         }
101 |         return true;
102 |     }
103 | 
104 |     // Given a function, try to estimate the number of edges in the function that
105 |     // will be instrumented by AFLplusplus.
106 |     // It instruments edges by breaking all critial edges with a block in the middle
107 |     // and avoiding instrumenting blocks which are full dominators, or full 
108 |     // post-dominators with multiple predecessors.
109 |     unsigned long estimateAFLEdges(Function *F) {
110 |         DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
111 |         PostDominatorTree *PDT = &getAnalysis<PostDominatorTreeWrapperPass>(*F).getPostDomTree();
112 |         unsigned edges = 0;
113 |         for (BasicBlock &BB: *F) {
114 |             // Do not instrument full dominators, or full post-dominators with multiple
115 |             // predecessors.
116 |             bool shouldInstrumentBlock = (&F->getEntryBlock() == &BB) || (!isFullDominator(&BB, DT) && 
117 |                                             !(isFullPostDominator(&BB, PDT) 
118 |                                             && !BB.getSinglePredecessor()));
119 |             if (shouldInstrumentBlock) ++edges;
120 | 
121 |             Instruction *TI = BB.getTerminator();
122 |             if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
123 |                 for (unsigned succ = 0, end = TI->getNumSuccessors(); succ != end; ++succ) {
124 |                     if (isCriticalEdge(TI, succ))
125 |                         ++edges;
126 |                 }
127 |         }
128 |         return edges;
129 |     }
130 | 
131 |     // Return true if `F` has been marked as a root from which to start cloning 
132 |     // by CGC Planner.
133 |     bool isCGCRoot(Function &F) {
134 |         MDNode* N;
135 |         N = F.getMetadata(CGC_ROOT_ATTR);
136 |         if (N == NULL) return false;
137 |         return true;
138 |     }
139 | 
140 |     // Return true if `CB` has been planned to be cloned by CGC Planner
141 |     bool isPlannedClone(CallBase &CB) {
142 |         MDNode* N;
143 |         N = CB.getMetadata(CGC_CLONE_CALL_ATTR);
144 |         if (N == NULL) return false;
145 |         return true;
146 |     }
147 | 
148 |     // Substitute all the trailings .x.y.z that llvm creates when having two functions
149 |     // with the same name, with some uniqueIDs to avoid long names
150 |     std::string compressName(std::string name) {
151 |         // find the last .num
152 |         std::string newName = name;
153 |         std::string::size_type idx = newName.rfind('.');
154 |         if (idx == std::string::npos || idx == newName.length()) {
155 |             std::replace(newName.begin(), newName.end(), '.', '_');
156 |             return newName;
157 |         }
158 |         // ensure it is actually a number
159 |         int random = atoi(newName.substr(idx+1).c_str());
160 | 
161 |         while (random) {
162 |             newName = newName.substr(0, idx);
163 |             idx = newName.rfind('.');
164 |             if (idx == std::string::npos || idx == newName.length()) {
165 |                 std::replace(newName.begin(), newName.end(), '.', '_');
166 |                 return newName + "_" + std::to_string(getUniqueID());
167 |             }
168 |             random = atoi(newName.substr(idx+1).c_str());
169 |         }
170 |         std::replace(newName.begin(), newName.end(), '.', '_');
171 |         return newName + "_" + std::to_string(getUniqueID());
172 |     }
173 | 
174 |     void setCloneName(Function *F) {
175 |         // if the function name already contains the prefix do not add it
176 |         if (F->getName().find(ClonePrefix) == std::string::npos)
177 |             F->setName(ClonePrefix + F->getName());
178 |         // Compress the clone name to avoid .1452.3394.9208.13831.27566...
179 |         // at the end
180 |         F->setName(compressName(F->getName().str()));
181 |     }
182 | 
183 |     // Visit a Constant AST to find and replace oldV with newV, returning a new constant
184 |     Constant *replaceConstant(Constant *C, Constant *newV, Constant *oldV) {
185 |         if (ConstantStruct *S = dyn_cast<ConstantStruct>(C)) {
186 |             SmallVector<Constant*, 8> Ops;
187 |             for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) {
188 |                 Constant *op = S->getOperand(i);
189 |                 if (op == oldV)
190 |                     Ops.push_back(newV);
191 |                 else
192 |                     Ops.push_back(replaceConstant(op, newV, oldV));
193 |             }
194 | 
195 |             Constant* res = ConstantStruct::getAnon(Ops, true);
196 |             return res;
197 |         
198 |         } else if (ConstantExpr *E = dyn_cast<ConstantExpr>(C)) {
199 |             SmallVector<Constant*, 8> Ops;
200 |             for (unsigned i = 0, e = E->getNumOperands(); i != e; ++i) {
201 |                 Constant *op = E->getOperand(i);
202 |                 if (op == oldV)
203 |                     Ops.push_back(newV);
204 |                 else
205 |                     Ops.push_back(replaceConstant(op, newV, oldV));
206 |             }
207 | 
208 |             Constant *res = E->getWithOperands(Ops);
209 |             return res;
210 |             
211 |         } else {
212 |             return C;
213 |         }
214 |     }
215 | 
216 |     // Fix the prologue of newF, by substituting the occurencies of oldF.
217 |     // This allows us to clone functions without corrupting the prologue, that is
218 |     // left untouched by cloneFunction. -fsanitize=function uses prologues
219 |     void fixPrologue(Function *newF, Function *oldF) {
220 |         if (!newF->hasPrologueData()) return;
221 | 
222 |         Constant *prologue = replaceConstant(newF->getPrologueData(), newF, oldF);
223 |         newF->setPrologueData(prologue);
224 |     }
225 | 
226 |     // `dest` is a clone of `source`, with the instructions mapped 1to1 in the `VMap`.
227 |     // Update the `FunctionToCallBases` struct to keep track of the CallBases in
228 |     // `dest` that represent the clone CallBases of `source`.
229 |     // Update the `FunctionToAFLMapSize` to keep track of the estimation for the
230 |     // new clone.
231 |     void updateMetadata(Function *dest, Function *source, ValueToValueMapTy &VMap) {
232 |         assert(FunctionToCallBases.find(source) != FunctionToCallBases.end());
233 |         FunctionToCallBases[dest];
234 |         for (CallBase *CB: FunctionToCallBases[source]) {
235 |             CallBase *mappedCB = dyn_cast<CallBase>(VMap[CB]);
236 |             assert(mappedCB);
237 |             FunctionToCallBases[dest].insert(mappedCB);
238 |         }
239 | 
240 |         assert(FunctionToAFLMapSize.find(source) != FunctionToAFLMapSize.end());
241 |         FunctionToAFLMapSize[dest] = FunctionToAFLMapSize[source];
242 |     }
243 | 
244 |     // Gather all the calls to `F`, starting from `I` and visiting recursively all
245 |     // the users of `I`, to collect all the eventual calls to `F` originated by `I`
246 |     // e.g. call bitcast F, with I being the bitcast
247 |     void gatherEventualCallsTo(Function *F, Value *V, std::set<Instruction*> &callsToF) {
248 |         // If it is a call, just check if `F` is called
249 |         if (CallBase * CB = dyn_cast<CallBase>(V)) {
250 |             // check that the function is called and not passed as param
251 |             if (CB->getCalledOperand()->stripPointerCasts() == F) {
252 |                 callsToF.insert(CB);
253 |             }
254 |         // If it is a bitcast, visit all the users recursively
255 |         } else if (BitCastOperator * BO = dyn_cast<BitCastOperator>(V)) {
256 |             for (User* user: BO->users()) {
257 |                 gatherEventualCallsTo(F, user, callsToF);
258 |             }
259 |         }
260 |     }
261 | 
262 |     // Return true if `F` has multiple call sites so it makes sense to clone it
263 |     bool shouldCloneFunction(Function *F) {
264 |         unsigned int numCallsToF = 0;
265 |         std::set<Instruction*> callsToF;
266 |         // Gather all the calls to the function `F`
267 |         for (User* user: F->users()) {
268 |             gatherEventualCallsTo(F, user, callsToF);
269 |         }
270 | 
271 |         numCallsToF = callsToF.size();
272 | 
273 |         // oprint(F->getName().str() << " - " << numCallsToF);
274 |         // We should clone the function only if it is called more than once
275 |         return numCallsToF > 1;
276 |     }
277 | 
278 |     // Return true if cloning `F` would not exceed the size limit.
279 |     bool allowedToClone(Function *F) {
280 |         unsigned long additional_edges = 0;
281 |         // If `F` is in a SCC we will clone the whole SCC while cloning `F`
282 |         if (isInSCC(F)) {
283 |             assert(FunctionToSCC.find(F) != FunctionToSCC.end());
284 |             std::set<Function*> SCC = FunctionToSCC[F];
285 |             for (Function *F: SCC) {
286 |                 assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end());
287 |                 additional_edges += FunctionToAFLMapSize[F];
288 |                 // For every function `F` in the SCC we will end up cloning also all the
289 |                 // functions called by `F`
290 |                 assert(FunctionToCallBases.find(F) != FunctionToCallBases.end());
291 |                 for (CallBase *CB: FunctionToCallBases[F]) {
292 |                     Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
293 |                     if (C) {
294 |                         // If the called function is in the SCC do not count it here
295 |                         if (SCC.find(C) != SCC.end()) {
296 |                             continue;
297 |                         // Otherwise count it
298 |                         } else {
299 |                             // exclude from the count if would not be cloned
300 |                             if (isPlannedClone(*CB) == false) continue;
301 |                             assert(FunctionToAFLMapSize.find(C) != FunctionToAFLMapSize.end());
302 |                             additional_edges += FunctionToAFLMapSize[C];
303 |                         }
304 |                     }
305 |                 }
306 |             }
307 |         // Otherwise just count `F`
308 |         } else {
309 |             assert(FunctionToAFLMapSize.find(F) != FunctionToAFLMapSize.end());
310 |             additional_edges += FunctionToAFLMapSize[F];
311 |         }
312 |         // More readable mf
313 |         if (aflmap_size + additional_edges > MaxSize) return false;
314 |         else return true;
315 |     }
316 | 
317 |     // Return true if the `SCC` has multiple call sites so it makes sense to clone it
318 |     bool shouldCloneSCC(std::set<Function*> &SCC) {
319 |         unsigned int numCallsToSCC = 0;
320 |         std::set<Instruction*> callsToSCC;
321 |         // Gather all the calls to each function in the `SCC`
322 |         for (Function *F: SCC) {
323 |             for (User* user: F->users()) {
324 |                 gatherEventualCallsTo(F, user, callsToSCC);
325 |             }
326 |         }
327 | 
328 |         // Count only the calls from outside the `SCC`
329 |         for (Instruction *call: callsToSCC) {
330 |             Function* callerF = call->getParent()->getParent();
331 |             if (SCC.find(callerF) == SCC.end())
332 |                 ++numCallsToSCC;
333 |         }
334 | 
335 |         // for (Function *F: SCC)
336 |         //     oprint(F->getName().str() << " - " << numCallsToSCC);
337 |         // We should clone the function only if it is called more than once
338 |         return numCallsToSCC > 1;
339 |     }
340 | 
341 |     // Visit the Strongly Connected Component where `F` belongs, to clone it as 
342 |     // a single node. Update `cgcFunctionQueue` accordingly to continue the visit.
343 |     Function* addSCCClone(std::list<Function*> &cgcFunctionQueue, Function* F) {
344 |         std::map<Function*, Function*> FtoClones;
345 |         std::set<Function*> SCCClones;
346 | 
347 |         assert(FunctionToSCC.find(F) != FunctionToSCC.end());
348 |         std::set<Function*> SCC = FunctionToSCC[F];
349 | 
350 |         // Clone all the functions in the SCC
351 |         bool should_clone = shouldCloneSCC(SCC);
352 |         for (Function *SCCfunc: SCC) {
353 |             // Clone original function if required
354 |             if (should_clone) {
355 |                 ValueToValueMapTy VMap;
356 |                 Function *clone = CloneFunction(SCCfunc, VMap);
357 |                 assert(clone);
358 |                 updateMetadata(clone, SCCfunc, VMap);
359 |                 trackClone(clone, cgcFunctionQueue);
360 |                 setCloneName(clone);
361 |                 fixPrologue(clone, SCCfunc);
362 |                 FtoClones[SCCfunc] = clone;
363 |                 SCCClones.insert(clone);
364 |             } else {
365 |                 // Set the original function as a clone without updating the number of clones
366 |                 trackClone(SCCfunc, cgcFunctionQueue, /*update=*/false);
367 |                 FtoClones[SCCfunc] = SCCfunc;
368 |                 SCCClones.insert(SCCfunc);
369 |             }
370 |         }
371 | 
372 |         // Now rewire the functions in the SCC clones
373 |         for (Function *SCCclone: SCCClones) {
374 |                 assert(FunctionToCallBases.find(SCCclone) != FunctionToCallBases.end());
375 |                 for (CallBase *CB: FunctionToCallBases[SCCclone]) {
376 | 
377 |                     // For direct calls, simply redirect target to new clone
378 |                     Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
379 |                     if (C) {
380 |                         Function *clone;
381 |                         // If the called function is in the SCC use the clone we generated
382 |                         if (FtoClones.find(C) != FtoClones.end()) {
383 |                             clone = FtoClones[C];
384 |                         // Otherwise generate a clone
385 |                         } else {
386 |                             // clone only if is a planned clone, otherwise leave as is
387 |                             // NB: this assumes that all calls to `C` from `SCC`
388 |                             // have been planned equally to be cloned or not, otherwise
389 |                             // calls to `C` will not be consistent inside `SCC`
390 |                             if (isPlannedClone(*CB) == false) continue;
391 |                             clone = addFunctionClone(cgcFunctionQueue, C);
392 |                             FtoClones[C] = clone;
393 |                         }
394 |                         if (clone->getFunctionType() != CB->getCalledOperand()->getType())
395 |                             CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB));
396 |                         else
397 |                             CB->setCalledFunction(clone);
398 |                     }
399 |             }
400 |         }
401 |         return FtoClones[F];
402 |     }
403 | 
404 |     // Clone the function `F`, and insert it in the `cgcFunctionQueue` to continue
405 |     // the visit
406 |     Function* addFunctionClone(std::list<Function*> &cgcFunctionQueue, Function *F) {
407 | 
408 |         assert(!isClone(F));
409 | 
410 |         // bail out if blacklisted
411 |         if (isBlacklisted(F))
412 |             return F;
413 | 
414 |         // bail out if cloning `F` would exceed the max size
415 |         if (!allowedToClone(F))
416 |             return F;
417 | 
418 |         if (isInSCC(F))
419 |             return addSCCClone(cgcFunctionQueue, F);
420 | 
421 |         // Clone original function if required
422 |         if (shouldCloneFunction(F)) {
423 |             ValueToValueMapTy VMap;
424 |             Function *clone = CloneFunction(F, VMap);
425 |             assert(clone);
426 |             updateMetadata(clone, F, VMap);
427 |             trackClone(clone, cgcFunctionQueue);
428 | 
429 |             setCloneName(clone);
430 |             fixPrologue(clone, F);
431 | 
432 |             // Add the target to the functions to process.
433 |             cgcFunctionQueue.push_back(clone);
434 | 
435 |             return clone;
436 |         } else {
437 |             // Set the original function as a clone without updating the number of clones
438 |             trackClone(F, cgcFunctionQueue, /*update=*/false);
439 |             // Add the target to the functions to process.
440 |             cgcFunctionQueue.push_back(F);
441 |             return F;
442 |         }
443 |     }
444 | 
445 |     // Visit funciton `F` to clone all the functions it calls, uniqely per CallBase
446 |     void cgc(Function *F, std::list<Function*> &cgcFunctionQueue) {
447 |         assert(!isInSCC(F));
448 |         assert(FunctionToCallBases.find(F) != FunctionToCallBases.end());
449 | 
450 |         // bail out if blacklisted
451 |         if (isBlacklisted(F))
452 |             return;
453 | 
454 |         // For each call in the given function clone:
455 |         for (CallBase *CB: FunctionToCallBases[F]) {
456 | 
457 |             // For direct calls, simply redirect target to new clone
458 |             Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
459 |             if (C) {
460 |                 // clone only if is a planned clone, otherwise leave as is
461 |                 if (isPlannedClone(*CB) == false) continue;
462 |                 Function *clone = addFunctionClone(cgcFunctionQueue, C);
463 |                 if (clone->getFunctionType() != CB->getCalledOperand()->getType())
464 |                     CB->setCalledFunction(CB->getFunctionType(), CastInst::CreatePointerCast(clone, CB->getCalledOperand()->getType(), "", CB));
465 |                 else
466 |                     CB->setCalledFunction(clone);
467 |                 continue;
468 |             }
469 |         }
470 |     }
471 | 
472 |     // Check if `F` just calls himself
473 |     bool isSimplyRecursive(Function *F) {
474 |         for (auto &BB : *F)
475 |         for (auto &I : BB.instructionsWithoutDebug())
476 |             if (auto *CB = dyn_cast<CallBase>(&I)) {
477 |                 Function *Callee = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
478 |                 
479 |                 // Function calls itself
480 |                 if (Callee == F) {
481 |                     return true;
482 |                 }
483 |             }
484 |         return false;
485 |     }
486 | 
487 |     // Visit the `SCC` to gather the informations needed in `FunctionToSCC` and
488 |     // `SCCFunctions`
489 |     void collectSCC(CallGraphSCC &SCC) {
490 |         std::set<Function *> Functions;
491 |         for (CallGraphNode *I : SCC) {
492 |             Functions.insert(I->getFunction());
493 |         }
494 | 
495 |         // If the SCC contains multiple nodes we know there is recursion.
496 |         if (Functions.size() != 1) {
497 |             for (Function *F : Functions) {
498 |                 SCCFunctions.insert(F);
499 |                 assert(!F->doesNotRecurse());
500 | 
501 |                 // A function should belong to a single SCC
502 |                 assert(FunctionToSCC.find(F) == FunctionToSCC.end());
503 |                 FunctionToSCC[F] = Functions;
504 |             }
505 |         // Take into account simple recursive functions
506 |         } else {
507 |             Function *F = *Functions.begin();
508 |             if (F && isSimplyRecursive(F)) {
509 |                 SCCFunctions.insert(F);
510 |                 assert(!F->doesNotRecurse());
511 | 
512 |                 assert(FunctionToSCC.find(F) == FunctionToSCC.end());
513 |                 FunctionToSCC[F] = Functions;
514 |             }
515 |         }
516 |     }
517 | 
518 |     // Return true if `F` is blacklisted
519 |     bool isBlacklisted(Function *F) {
520 |         return FunctionBlacklist.find(F) != FunctionBlacklist.end();
521 |     }
522 | 
523 |     // Return true if `F` is part of a SCC
524 |     bool isInSCC(Function *F) {
525 |         return SCCFunctions.find(F) != SCCFunctions.end();
526 |     }
527 |     
528 |     // Return true if `F` is a clone of a function
529 |     bool isClone(Function *F) {
530 |         return FunctionClones.find(F) != FunctionClones.end();
531 |     }
532 | 
533 |     // Add `F` to the function clones we keep track of, and update stats
534 |     void trackClone(Function *F, std::list<Function*> &cgcFunctionQueue, bool update=true) {
535 |         FunctionClones.insert(F);
536 |         if (update) {
537 |             ++nclones;
538 |             aflmap_size += FunctionToAFLMapSize[F];
539 |         }
540 |         LLVM_DEBUG(dbgs() << "\r"  << nclones << " - " << aflmap_size << "              ");
541 |     }
542 | 
543 |     // Sometimes LLVM build the CallGraph withouth taking into considerations calls
544 |     // that pass through a `bitcast` operation. We fix this here, revisiting the
545 |     // functions and updating the CallGraph
546 |     void fixCallGraph(Module &M, CallGraph *CG) {
547 |         for (auto &F : M.getFunctionList()) {
548 |             if (F.isDeclaration())
549 |                 continue;
550 |             for(auto &BB: F) {
551 |                 for (auto &I : BB) {
552 |                     if (CallBase * CB = dyn_cast<CallBase>(&I)) {
553 |                         if (CB->isInlineAsm()) continue;
554 | 
555 |                         Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
556 |                         if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue;
557 | 
558 |                         // If `Called` actually points to a function, but getCalledFunction
559 |                         // returns null then we have spotted a missing function
560 |                         if (CB->getCalledFunction() == nullptr) {
561 |                             CallGraphNode *Node = CG->getOrInsertFunction(&F);
562 |                             Node->addCalledFunction(CB, CG->getOrInsertFunction(Called));
563 |                         }
564 |                     }
565 |                 }
566 |             }
567 |         }
568 |     }
569 | 
570 |     // Initialize the `FunctionToCallBases` structure with all the existing CallBases in `F`
571 |     // and update info on the functions called
572 |     void gatherCallBases(Function *F) {
573 |         // Initialize the set in case no call is present in the function
574 |         FunctionToCallBases[F];
575 |         for (BasicBlock &BB: *F) {
576 |             for (Instruction &I : BB) {
577 |                 // Gather all call bases
578 |                 if (CallBase * CB = dyn_cast<CallBase>(&I)) {
579 | 
580 |                     // Only if they represent direct calls to functions
581 |                     if (CB->isInlineAsm()) continue;
582 |                     Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
583 |                     if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue;
584 | 
585 |                     // Insert into the map
586 |                     FunctionToCallBases[F].insert(CB);
587 | 
588 |                     // Update the info on number of times a function is called
589 |                     CallsToFunction[Called]++;
590 |                 }
591 |             }
592 |         }
593 |     }
594 | 
595 |     // The optimizer may decide to inline functions and simplify them. Or directly simplify
596 |     // static/internal ones. Try to persuade it to avoid simplifying functions we want as is,
597 |     // by setting all the functions `F` calls to not static and not inlinable.
598 |     void hardenFunction(Function *F) {
599 |         for (BasicBlock &BB: *F) {
600 |             for (Instruction &I : BB) {
601 |                 // Gather all call bases
602 |                 if (CallBase * CB = dyn_cast<CallBase>(&I)) {
603 | 
604 |                     // Only if they represent direct calls to functions
605 |                     if (CB->isInlineAsm()) continue;
606 |                     Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
607 |                     if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue;
608 |                     
609 |                     // Harden from inlining
610 |                     if (Called->hasFnAttribute(Attribute::InlineHint))
611 |                         Called->removeFnAttr(Attribute::InlineHint);
612 |                     if (Called->hasFnAttribute(Attribute::AlwaysInline))
613 |                         Called->removeFnAttr(Attribute::AlwaysInline);
614 |                     Called->addFnAttr(Attribute::NoInline);
615 | 
616 |                     // Harden from static/internal-driven simplifications
617 |                     GlobalValue *GVF = dyn_cast<GlobalValue>(Called);
618 |                     GVF->setVisibility(GlobalValue::DefaultVisibility);
619 |                     GVF->setLinkage(GlobalValue::ExternalLinkage);
620 |                 }
621 |             }
622 |         }
623 |     }
624 | 
625 |     // Add the function `F` to the blacklist if the number of calls to it is higher
626 |     // than the user threshold. If `F` belong to a SCC, add the SCC to the blacklist
627 |     void maybeAddToBlacklist(Function *F) {
628 | 
629 |         // if already in the blacklist bail out
630 |         if (isBlacklisted(F))
631 |             return;
632 | 
633 |         // get the number of times `F` is called
634 |         unsigned long numCalls = CallsToFunction[F];
635 | 
636 |         if (numCalls > CallsThreshold) {
637 | 
638 |             // if the function was in a SCC add all the functions
639 |             if (isInSCC(F)) {
640 |                 assert(FunctionToSCC.find(F) != FunctionToSCC.end());
641 |                 for (Function *sccF: FunctionToSCC[F]) {
642 |                     oprint("[-] excluding " << sccF->getName().str() << " due to " << F->getName().str() << " with " << numCalls << " calls");
643 |                     FunctionBlacklist.insert(sccF);
644 |                 }
645 |             // otherwise add just the function
646 |             } else {
647 |                 oprint("[-] excluding " << F->getName().str() << " with " << numCalls << " calls");
648 |                 FunctionBlacklist.insert(F);
649 |             }
650 |         }
651 |     }
652 | 
653 |     // Visit `F` and all the functions called by `F`, adding them to `visitedFuncs`
654 |     void visitCalledFunctions(Function* F, std::set<Function*> &visitedFuncs) {
655 |         // bail out if already visited
656 |         if (visitedFuncs.find(F) != visitedFuncs.end()) return;
657 | 
658 |         // insert into the visited functions
659 |         visitedFuncs.insert(F);
660 | 
661 |         for (auto &BB : *F)
662 |         for (auto &I : BB) {
663 |             if (CallBase * CB = dyn_cast<CallBase>(&I)) {
664 |                 if (CB->isInlineAsm()) continue;
665 | 
666 |                 Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
667 |                 if (C) {
668 |                     if (C->isDeclaration() || C->isIntrinsic())
669 |                     continue;
670 |                     
671 |                     visitCalledFunctions(C, visitedFuncs);
672 |                 }
673 |             }
674 |         }
675 |     }
676 | 
677 |     virtual bool runOnModule(Module &M) override {
678 |         cgcPassLog("Running...");
679 | 
680 |         // Initialize regular expressions for functions to harden against optimizer
681 |         std::vector<Regex*> HardenFunctionRegexes;
682 |         if (HardenFunctions.empty()) {
683 |             HardenFunctions.push_back("main");
684 |             HardenFunctions.push_back("LLVMFuzzerTestOneInput");
685 |         }
686 |         passListRegexInit(HardenFunctionRegexes, HardenFunctions);
687 | 
688 |         // Visit the strongly connected components to identify recursive functions
689 |          CallGraph *CG = &getAnalysis<CallGraphWrapperPass>().getCallGraph();
690 | 
691 |         // LLVM does not consider edges like `call (bitcast (func))` so insert them.
692 |         // really llvm??
693 |         fixCallGraph(M, CG);
694 | 
695 |         // Walk the callgraph in bottom-up SCC order.
696 |         scc_iterator<CallGraph*> CGI = scc_begin(CG);
697 |         
698 |         CallGraphSCC CurSCC(*CG, &CGI);
699 |         while (!CGI.isAtEnd()) {
700 |             // Copy the current SCC and increment past it so that the pass can hack
701 |             // on the SCC if it wants to without invalidating our iterator.
702 |             const std::vector<CallGraphNode *> &NodeVec = *CGI;
703 |             CurSCC.initialize(NodeVec);
704 |             ++CGI;
705 |         
706 |             collectSCC(CurSCC);
707 |         }
708 | 
709 |         std::set<Function*> visitedFuncs;
710 | 
711 |         // Collect all functions in the module and add root function clones.
712 |         std::list<Function*> cgcFunctionQueue;
713 |         std::set<Function*> HardenFunctionsSet;
714 |         for (auto &F : M.getFunctionList()) {
715 |             if (F.isDeclaration())
716 |                 continue;
717 | 
718 |             // gather all the call bases in the function
719 |             gatherCallBases(&F);
720 | 
721 |             // gather the estimation for the AFL map size
722 |             FunctionToAFLMapSize[&F] = estimateAFLEdges(&F);
723 |             // update the current size
724 |             aflmap_size += FunctionToAFLMapSize[&F];
725 | 
726 |             const std::string &FName = F.getName().str();
727 |             if (passListRegexMatch(HardenFunctionRegexes, FName)) {
728 |                 HardenFunctionsSet.insert(&F);
729 |             }
730 |             if (!isCGCRoot(F))
731 |                 continue;
732 |             cgcFunctionQueue.push_back(&F);
733 | 
734 |             // visit the path starting from F and count called functions
735 |             visitCalledFunctions(&F, visitedFuncs);
736 |         }
737 | 
738 |         // need to order roots based on BFS id if multiple roots
739 |         assert(cgcFunctionQueue.size() == 1 && "only single root supported");
740 | 
741 |         // Keep track of the initial number of functions used in the call path
742 |         unsigned long initialNfuncs = visitedFuncs.size();
743 | 
744 |         // if CallsThreshold==0 automatically tune based on the number of functions
745 |         if (CallsThreshold == 0) {
746 |             CallsThreshold = CallsThresholdFactor * initialNfuncs;
747 |             oprint("Threshold for error functions: " << CallsThreshold);
748 |         }
749 | 
750 |         // Now revisit all the functions to fill the blacklist
751 |         for (auto &F : M.getFunctionList()) {
752 |             if (F.isDeclaration())
753 |                 continue;
754 |         
755 |             // fill the function black list if we detect it as an error function
756 |             maybeAddToBlacklist(&F);
757 |         }
758 | 
759 |         // Harden each function against the optimizer
760 |         for (Function *F: HardenFunctionsSet)
761 |             hardenFunction(F);
762 | 
763 |         // Start from root function clones and iteratively clone the callgraph.
764 |         while (!cgcFunctionQueue.empty()) {
765 |             Function *F = *cgcFunctionQueue.begin();
766 |             cgcFunctionQueue.erase(cgcFunctionQueue.begin());
767 |             cgc(F, cgcFunctionQueue);
768 |             // `cgc` should never clone past the limit
769 |             assert (aflmap_size <= MaxSize);
770 |         }
771 |         oprint("\nTotal Clones: " << nclones);
772 |         return true;
773 |     }
774 | 
775 |     void getAnalysisUsage(AnalysisUsage &AU) const override {
776 |         AU.addRequired<CallGraphWrapperPass>();
777 |         AU.addRequired<DominatorTreeWrapperPass>();
778 |         AU.addRequired<PostDominatorTreeWrapperPass>();
779 |     }
780 |   };
781 | 
782 | }
783 | 
784 | char CallgraphClonePass::ID = 0;
785 | RegisterPass<CallgraphClonePass> MP("cgc", "CallgraphClone Pass");
786 | 


--------------------------------------------------------------------------------
/passes/dump-call-tree/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the dump-call-tree pass
2 | ROOT=../..
3 | 
4 | PASSNAME := dump-call-tree
5 | OBJS := dump-call-tree.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/dump-call-tree/dump-call-tree.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <pass.h>
 3 | #include <iostream>
 4 | #include <fstream>
 5 | #include <unistd.h>
 6 | 
 7 | using namespace llvm;
 8 | 
 9 | #define DEBUG_TYPE "DumpCallTree"
10 | #define DumpCallTreePassLog(M) LLVM_DEBUG(dbgs() << "DumpCallTreePass: " << M << "\n")
11 | #define oprint(s) outs() << s << "\n"
12 | 
13 | static cl::opt<std::string>
14 | CallTreeStart("call-tree-start",
15 |     cl::desc("Specify the function from where to start the visit of the call tree to dump"),
16 |     cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden);
17 | 
18 | static cl::opt<std::string>
19 | OutFilename("dump-tree-file",
20 |     cl::desc("The file where to dump the called tree"),
21 |     cl::init("call-tree.log"), cl::NotHidden);
22 | 
23 | namespace {
24 | 
25 |   // Dump the subtree of the CFG functions starting from `call-tree-start`
26 |   class DumpCallTreePass : public ModulePass {
27 | 
28 |   std::set<std::string> CalledSet;
29 |   std::set<Function*> ToVisit;
30 | 
31 |   public:
32 |     static char ID;
33 |     DumpCallTreePass() : ModulePass(ID) {}
34 | 
35 |     void visit(Function* F) {
36 |       CalledSet.insert(F->getName().str());
37 |       // For each call in the given function:
38 |       for (auto &BB : *F)
39 |       for (auto &I : BB) {
40 |         if (CallBase * CB = dyn_cast<CallBase>(&I)) {
41 |           if (CB->isInlineAsm()) continue;
42 | 
43 |           Function *C = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
44 |           if (C) {
45 |             if (C->isDeclaration() || C->isIntrinsic())
46 |               continue;
47 |             
48 |             // If never saw the function add to the visit
49 |             if (CalledSet.find(C->getName().str()) == CalledSet.end())
50 |               ToVisit.insert(C);
51 |           }
52 |         }
53 |       }
54 |     }
55 | 
56 |     virtual bool runOnModule(Module &M) {
57 |       for (auto &F : M.getFunctionList()) {
58 |         if (F.isDeclaration())
59 |           continue;
60 |         if (!F.getName().equals(CallTreeStart))
61 |           continue;
62 |         ToVisit.insert(&F);
63 |         break;
64 |       }
65 | 
66 |       // Start from root function and iteratively visit the callgraph.
67 |       while (!ToVisit.empty()) {
68 |           Function *F = *ToVisit.begin();
69 |           ToVisit.erase(ToVisit.begin());
70 |           visit(F);
71 |       }
72 | 
73 |       std::ofstream ofile;
74 |       ofile.open(OutFilename, std::ios::out | std::ios::trunc);
75 |       assert(ofile.is_open());
76 | 
77 |       for (auto s: CalledSet) {
78 |         ofile << s << std::endl;
79 |       }
80 |       ofile.flush();
81 |       ofile.close();
82 | 
83 |       return false;
84 |     }
85 |   };
86 | 
87 | }
88 | 
89 | char DumpCallTreePass::ID = 0;
90 | RegisterPass<DumpCallTreePass> MP("dump-call-tree", "DumpCallTree Pass");
91 | 
92 | 


--------------------------------------------------------------------------------
/passes/dump-calls/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the dump-calls pass
2 | ROOT=../..
3 | 
4 | PASSNAME := dump-calls
5 | OBJS := dump-calls.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/dump-calls/dump-calls.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | #include <cgc_magics.h>
  4 | #include <iostream>
  5 | #include <fstream>
  6 | #include <unistd.h>
  7 | #include "llvm/IR/IRBuilder.h"
  8 | 
  9 | using namespace llvm;
 10 | 
 11 | #define DEBUG_TYPE "DumpCalls"
 12 | #define DumpCallsPassLog(M) LLVM_DEBUG(dbgs() << "DumpCallsPass: " << M << "\n")
 13 | #define oprint(s) outs() << s << "\n"
 14 | 
 15 | namespace {
 16 | 
 17 |   // Dump the subtree of the CFG functions starting from `call-tree-start`
 18 |   class DumpCallsPass : public ModulePass {
 19 | 
 20 |   public:
 21 |     static char ID;
 22 |     DumpCallsPass() : ModulePass(ID) {}
 23 | 
 24 |     // Return true if `F` has an available_externally linkage (i.e. equivalent to a declaration)
 25 |     bool isAvailableExternally(Function &F) {
 26 |         GlobalValue::LinkageTypes L = F.getLinkage();
 27 |         return GlobalValue::isAvailableExternallyLinkage(L);
 28 |     }
 29 | 
 30 |     // Return whether the function has been marked as a clone
 31 |     static bool hasCloneMark(Function *F) {
 32 |         MDNode* N;
 33 |         assert(F);
 34 |         N = F->getMetadata(CGC_CLONE_MARK);
 35 |         if (N == NULL) return false;
 36 |         return true;
 37 |     }
 38 | 
 39 |     void createPrintCall(Module &M, Function &F, const std::string &to_print, const std::string &prefix, const std::string &suffix, IRBuilder<> &builder) {
 40 |         auto &CTX = M.getContext();
 41 |         PointerType *PrintfArgTy = PointerType::getUnqual(Type::getInt8Ty(CTX));
 42 | 
 43 |         // STEP 1: Inject the declaration of printf
 44 |         // ----------------------------------------
 45 |         // Create (or _get_ in cases where it's already available) the following
 46 |         // declaration in the IR module:
 47 |         //    declare i32 @printf(i8*, ...)
 48 |         // It corresponds to the following C declaration:
 49 |         //    int printf(char *, ...)
 50 |         FunctionType *PrintfTy = FunctionType::get(
 51 |             IntegerType::getInt32Ty(CTX),
 52 |             PrintfArgTy,
 53 |             /*IsVarArgs=*/true);
 54 | 
 55 |         FunctionCallee Printf = M.getOrInsertFunction("printf", PrintfTy);
 56 | 
 57 |         // Set attributes as per inferLibFuncAttributes in BuildLibCalls.cpp
 58 |         Function *PrintfF = dyn_cast<Function>(Printf.getCallee());
 59 |         PrintfF->setDoesNotThrow();
 60 |         PrintfF->addParamAttr(0, Attribute::NoCapture);
 61 |         PrintfF->addParamAttr(0, Attribute::ReadOnly);
 62 | 
 63 |         // STEP 2: Inject a global variable that will hold the printf format string
 64 |         // ------------------------------------------------------------------------
 65 |         llvm::Constant *PrintfFormatStr = llvm::ConstantDataArray::getString(
 66 |             CTX, prefix + to_print + suffix);
 67 | 
 68 |         Constant *PrintfFormatStrVar =
 69 |             M.getOrInsertGlobal(to_print, PrintfFormatStr->getType());
 70 |         dyn_cast<GlobalVariable>(PrintfFormatStrVar)->setInitializer(PrintfFormatStr);
 71 | 
 72 |         // Printf requires i8*, but PrintfFormatStrVar is an array: [n x i8]. Add
 73 |         // a cast: [n x i8] -> i8*
 74 |         llvm::Value *FormatStrPtr =
 75 |             builder.CreatePointerCast(PrintfFormatStrVar, PrintfArgTy, "formatStr");
 76 | 
 77 |         // Finally, inject a call to printf
 78 |         builder.CreateCall(
 79 |             Printf, {FormatStrPtr});
 80 |     }
 81 | 
 82 |     void visit(Function* F) {
 83 |       for (auto &BB : *F)
 84 |         for (auto &I : BB.instructionsWithoutDebug()) {
 85 |             if (CallBase * CB = dyn_cast<CallBase>(&I)) {
 86 |                 if (CB->isInlineAsm()) continue;
 87 | 
 88 |                 Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
 89 |                 if (!Called || Called->isDeclaration()  || isAvailableExternally(*Called)|| Called->isIntrinsic()) continue;
 90 | 
 91 |                 // add the logging call
 92 |                 IRBuilder<> IBuilder(&I);
 93 |                 const std::string &to_print = Called->getName().str();
 94 |                 if (hasCloneMark(Called))
 95 |                     createPrintCall(*F->getParent(), *F, CGC_CLONE_MARK + to_print, ">>> |", "\n", IBuilder);
 96 |                 else
 97 |                     createPrintCall(*F->getParent(), *F, to_print, ">>> |", "\n", IBuilder);
 98 |             }
 99 |         }
100 |     }
101 | 
102 |     virtual bool runOnModule(Module &M) {
103 |         for (auto &F : M.getFunctionList()) {
104 |             if (F.isDeclaration() || isAvailableExternally(F)) continue;
105 |             visit(&F);
106 |         }
107 | 
108 |         return true;
109 |     }
110 |   };
111 | 
112 | }
113 | 
114 | char DumpCallsPass::ID = 0;
115 | RegisterPass<DumpCallsPass> MP("dump-calls", "DumpCalls Pass");
116 | 
117 | 


--------------------------------------------------------------------------------
/passes/dump-extlib/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the dump-extlib pass
2 | ROOT=../..
3 | 
4 | PASSNAME := dump-extlib
5 | OBJS := dump-extlib.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/dump-extlib/dump-extlib.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | #include <iostream>
  4 | #include <fstream>
  5 | 
  6 | using namespace llvm;
  7 | 
  8 | #define DEBUG_TYPE "DumpExtlib"
  9 | #define DumpExtlibPassLog(M) LLVM_DEBUG(dbgs() << "DumpExtlibPass: " << M << "\n")
 10 | #define oprint(s) (outs() << s << "\n")
 11 | 
 12 | static cl::list<std::string>
 13 | Whitelist("dumpext-whitelist",
 14 |     cl::desc("Specify the comma-separated path regexes for the whitelist"),
 15 |     cl::OneOrMore, cl::CommaSeparated, cl::NotHidden);
 16 | 
 17 | static cl::list<std::string>
 18 | Blacklist("dumpext-blacklist",
 19 |     cl::desc("Specify the comma-separated path regexes for the blacklist"),
 20 |     cl::OneOrMore, cl::CommaSeparated, cl::NotHidden);
 21 | 
 22 | static cl::opt<std::string>
 23 | OutFilename("dumpext-out",
 24 |     cl::desc("Specify the name of the file where the function list will be saved [- for stdout]"),
 25 |     cl::init("-"), cl::NotHidden);
 26 | 
 27 | static cl::opt<bool>
 28 | Dbg("dumpext-dbg", cl::desc("Debug Mode"),
 29 |     cl::init(false));
 30 | 
 31 | namespace {
 32 |   // This pass tries to find in the module all the function that are identified
 33 |   // being part of linked static libraries.
 34 |   // It uses a really simple euristic where it takes a whitelist and assumes
 35 |   // a function being a library one if the DebugInfo of that function points to 
 36 |   // a path not containing any token in the whitelist.
 37 |   //
 38 |   // e.g. whitelist: curl
 39 |   // path: /src/curl/lib/    -> ok
 40 |   // path: /src/nghttp2/lib/ -> lib function
 41 |   //
 42 |   // The pass writes a function list to be passed to llvm-extract
 43 |   class DumpExtlibPass : public ModulePass {
 44 | 
 45 |   public:
 46 |     static char ID;
 47 |     DumpExtlibPass() : ModulePass(ID) {}
 48 | 
 49 |     std::string dirnameOf(const std::string& fname)
 50 |     {
 51 |         size_t pos = fname.find_last_of("/");
 52 |         return (std::string::npos == pos)
 53 |             ? fname
 54 |             : fname.substr(0, pos);
 55 |     }
 56 | 
 57 |     std::string getFileDirectory(Function &F) {
 58 |         if (DISubprogram *Loc = F.getSubprogram()) {
 59 |             // The path from the CWD to the source file, while building
 60 |             StringRef File = Loc->getFilename();
 61 |             // CWD while building
 62 |             StringRef Directory = Loc->getDirectory();
 63 | 
 64 |             std::string Path = Directory.str() + "/" + File.str();
 65 |             return dirnameOf(Path);
 66 |         } else {
 67 |             // oprint(F.getName());
 68 |             // assert(false);
 69 |             // No location metadata available
 70 |             return "";
 71 |         }
 72 |     }
 73 | 
 74 |     std::string getCompilationDirectory(Function &F) {
 75 |         if (DISubprogram *Loc = F.getSubprogram()) {
 76 |             // The path from the CWD to the source file, while building
 77 |             // StringRef File = Loc->getFilename();
 78 |             // CWD while building
 79 |             StringRef Directory = Loc->getDirectory();
 80 |             return Directory.str();
 81 |         } else {
 82 |             // oprint(F.getName());
 83 |             // assert(false);
 84 |             // No location metadata available
 85 |             return "";
 86 |         }
 87 |     }
 88 | 
 89 |     virtual bool runOnModule(Module &M) {
 90 | 
 91 |         // Initialize regular expressions for whitelist
 92 |         std::vector<Regex*> WhitelistRegexes;
 93 |         assert (!Whitelist.empty());
 94 |         passListRegexInit(WhitelistRegexes, Whitelist);
 95 | 
 96 |         // Initialize regular expressions for blacklist
 97 |         std::vector<Regex*> BlacklistRegexes;
 98 |         if (Blacklist.empty()) {
 99 |             Blacklist.push_back("EMPTY_BLACKLIST_SHOULD_NOT_MATCH_ANYTHING");
100 |         }
101 |         passListRegexInit(BlacklistRegexes, Blacklist);
102 | 
103 |         std::vector<Function*> ToExtract;
104 |         std::map<Function*, int> callsToFunc;
105 | 
106 |         // first remove all the aliases, since once we extract the functions we may invalidate some
107 |         std::set<GlobalAlias*> aliasesToRemove;
108 |         for (GlobalAlias &A: M.getAliasList()) {
109 |             A.replaceAllUsesWith(A.getAliasee());
110 |             aliasesToRemove.insert(&A);
111 |         }
112 |         for (GlobalAlias *A: aliasesToRemove) A->eraseFromParent();
113 |         
114 |         for (auto &F : M.getFunctionList()) {
115 |             if (F.isDeclaration())
116 |                 continue;
117 | 
118 |             const std::string &DirName = getFileDirectory(F);
119 |             const std::string &CompilationDir = getCompilationDirectory(F);
120 | 
121 |             // if the function does not have any debug info stay safe and assume
122 |             // that it belongs to the original program
123 |             if (DirName == "") continue;
124 | 
125 |             // If either the directory of the source file of the function or 
126 |             // the compilation directory matches the whitelist then keep the function
127 |             if (passListRegexMatch(WhitelistRegexes, DirName) || passListRegexMatch(WhitelistRegexes, CompilationDir)) {
128 |                 if (Dbg) {
129 |                     oprint("Keep " << F.getName().str() << ": " << DirName);
130 |                 }
131 | 
132 |                 // only if the blacklist does not match then skip extraction and leave it in the bitcode
133 |                 if (!passListRegexMatch(BlacklistRegexes, DirName) && !passListRegexMatch(BlacklistRegexes, CompilationDir)) {
134 |                     // continue and skip the extraction
135 |                     continue;
136 |                 }
137 |             }
138 | 
139 |             ToExtract.push_back(&F);
140 |             if (Dbg) {
141 |                 oprint("Remove " << F.getName().str() << ": " << DirName);
142 |             }
143 |         }
144 | 
145 |         std::string result = "";
146 |         for (Function *F: ToExtract) {
147 |             result.append(" -func=");
148 |             // result.append("^");
149 |             result.append(F->getName().str());
150 |             // result.append("$|");
151 |         }
152 |         // result.replace(result.rfind("|"), 1, ")");
153 | 
154 |         if (OutFilename == "-") {
155 |             outs() << result << "\n";
156 |         } else {
157 |             std::ofstream ofile;
158 |             ofile.open(OutFilename, std::ios::out | std::ios::trunc);
159 |             assert(ofile.is_open());
160 | 
161 |             ofile << result;
162 |             ofile.flush();
163 |             ofile.close();
164 |         }
165 |         return true;
166 |     }
167 |   };
168 | 
169 | }
170 | 
171 | char DumpExtlibPass::ID = 0;
172 | RegisterPass<DumpExtlibPass> MP("dump-extlib", "DumpExtlib Pass");
173 | 
174 | 


--------------------------------------------------------------------------------
/passes/func-stats/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the func-stats pass
2 | ROOT=../..
3 | 
4 | PASSNAME := func-stats
5 | OBJS := func-stats.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/func-stats/func-stats.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | #include <cgc_magics.h>
  4 | #include "llvm/Analysis/CFG.h"
  5 | 
  6 | using namespace llvm;
  7 | 
  8 | #define DEBUG_TYPE "FuncStats"
  9 | #define FuncStatsPassLog(M) LLVM_DEBUG(dbgs() << "FuncStatsPass: " << M << "\n")
 10 | #define oprint(s) outs() << s << "\n"
 11 | 
 12 | static cl::opt<bool>
 13 | DumpCalls("dump-calls",
 14 |     cl::desc("Dump all non unique calls"),
 15 |     cl::init(false), cl::NotHidden);
 16 | 
 17 | static cl::opt<bool>
 18 | DumpGraph("dump-graph",
 19 |     cl::desc("Dump the Call Graph"),
 20 |     cl::init(false), cl::NotHidden);
 21 | 
 22 | static cl::opt<bool>
 23 | DumpWeights("dump-weights",
 24 |     cl::desc("Dump the CGC weights"),
 25 |     cl::init(false), cl::NotHidden);
 26 | 
 27 | static cl::opt<std::string>
 28 | RootFunction("dump-weights-root",
 29 |     cl::desc("Specify the root functions where to start dumping weights"),
 30 |     cl::init(""), cl::NotHidden);
 31 | 
 32 | namespace {
 33 | 
 34 |   class FuncStatsPass : public ModulePass {
 35 | 
 36 |   public:
 37 |     static char ID;
 38 |     FuncStatsPass() : ModulePass(ID) {}
 39 | 
 40 |     // Return true if `F` has an available_externally linkage (i.e. equivalent to a declaration)
 41 |     bool isAvailableExternally(Function &F) {
 42 |         GlobalValue::LinkageTypes L = F.getLinkage();
 43 |         return GlobalValue::isAvailableExternallyLinkage(L);
 44 |     }
 45 | 
 46 |     // Taken from: https://github.com/AFLplusplus
 47 |     // True if block has successors and it dominates all of them.
 48 |     bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
 49 |         if (succ_begin(BB) == succ_end(BB)) return false;
 50 |         for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
 51 |             // if the edge is critical it will be splitted
 52 |             if (isCriticalEdge(BB->getTerminator(), SUCC)) continue;
 53 |             if (!DT->dominates(BB, SUCC)) return false;
 54 |         }
 55 |         return true;
 56 |     }
 57 | 
 58 |     // Taken from: https://github.com/AFLplusplus
 59 |     // True if block has predecessors and it postdominates all of them.
 60 |     bool isFullPostDominator(const BasicBlock *       BB,
 61 |                                     const PostDominatorTree *PDT) {
 62 |         if (pred_begin(BB) == pred_end(BB)) return false;
 63 |         for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
 64 |             // if the edge is critical it will be splitted
 65 |             if (isCriticalEdge(PRED->getTerminator(), BB)) continue;
 66 |             if (!PDT->dominates(BB, PRED)) return false;
 67 |         }
 68 |         return true;
 69 |     }
 70 | 
 71 |     // Given a function, try to estimate the number of edges in the function that
 72 |     // will be instrumented by AFLplusplus.
 73 |     // It instruments edges by breaking all critial edges with a block in the middle
 74 |     // and avoiding instrumenting blocks which are full dominators, or full 
 75 |     // post-dominators with multiple predecessors.
 76 |     unsigned long estimateAFLEdges(Function *F) {
 77 |         DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
 78 |         PostDominatorTree *PDT = &getAnalysis<PostDominatorTreeWrapperPass>(*F).getPostDomTree();
 79 |         unsigned edges = 0;
 80 |         for (BasicBlock &BB: *F) {
 81 |             // Do not instrument full dominators, or full post-dominators with multiple
 82 |             // predecessors.
 83 |             bool shouldInstrumentBlock = (&F->getEntryBlock() == &BB) || (!isFullDominator(&BB, DT) && 
 84 |                                             !(isFullPostDominator(&BB, PDT) 
 85 |                                             && !BB.getSinglePredecessor()));
 86 |             if (shouldInstrumentBlock) ++edges;
 87 | 
 88 |             Instruction *TI = BB.getTerminator();
 89 |             if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
 90 |                 for (unsigned succ = 0, end = TI->getNumSuccessors(); succ != end; ++succ) {
 91 |                     if (isCriticalEdge(TI, succ))
 92 |                         ++edges;
 93 |                 }
 94 |         }
 95 |         return edges;
 96 |     }
 97 | 
 98 |     // Return the priority of the CallBase, an higher priority means the CallBase
 99 |     // should be cloned earlier
100 |     static long getPriority(CallBase *CB) {
101 |         MDNode* N;
102 |         assert(CB);
103 |         N = CB->getMetadata(CGC_CLONE_PRIORITY);
104 |         if (N == NULL) return 0;
105 |         Constant *val = dyn_cast<ConstantAsMetadata>(N->getOperand(0))->getValue();
106 |         assert(val);
107 |         long prio = cast<ConstantInt>(val)->getSExtValue();
108 |         return prio;
109 |     }
110 | 
111 |     void dumpWeights(Function *F, int level, std::set<Function*> &visited) {
112 |       if (visited.find(F) != visited.end()) return;
113 |       visited.insert(F);
114 | 
115 |       for (BasicBlock &BB: *F) {
116 |         for (Instruction &I: BB) {
117 |           // Search all call bases
118 |           if (CallBase * CB = dyn_cast<CallBase>(&I)) {
119 | 
120 |             // Only if they represent direct calls to functions
121 |             if (CB->isInlineAsm()) continue;
122 |             Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
123 |             if (!Called || Called->isDeclaration() || Called->isIntrinsic()) continue;
124 | 
125 |             oprint(std::string(level, '\t') << "|-> [" << getPriority(CB) << "] " << Called->getName());
126 |             dumpWeights(Called, level+1, visited);
127 |           }
128 |         }
129 |       }
130 |     }
131 | 
132 |     virtual bool runOnModule(Module &M) override {
133 |       unsigned int num_funcs = 0;
134 |       unsigned int total_BB = 0;
135 |       unsigned int total_edges = 0;
136 |       std::map<Function*, int> callsToFunc;
137 |       for (auto &F : M.getFunctionList()) {
138 |         if (F.isDeclaration())
139 |           continue;
140 |         ++num_funcs;
141 |         if (DumpGraph) {
142 |           oprint("Call graph node for function: '" << F.getName() << "'");
143 |         }
144 |         total_edges += estimateAFLEdges(&F);
145 |         for(auto &BB: F) {
146 |           ++total_BB;
147 |           if (DumpCalls || DumpGraph) {
148 |             for (auto &I : BB) {
149 |               if (CallBase * CB = dyn_cast<CallBase>(&I)) {
150 |                 if (CB->isInlineAsm()) continue;
151 | 
152 |                 Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
153 |                 if (!Called || Called->isDeclaration() || Called->isIntrinsic() || isAvailableExternally(*Called)) continue;
154 |                 callsToFunc[Called]+=1;
155 |                 if (DumpGraph) {
156 |                   oprint("    " << F.getName() << " calls function '" << Called->getName() << "'");
157 |                 }
158 |               }
159 |             }
160 |           }
161 |         }
162 |       }
163 | 
164 |       oprint("Num functions: " << num_funcs);
165 |       oprint("Num BBs      : " << total_BB);
166 |       oprint("AFL edges    : " << total_edges);
167 | 
168 |       if (DumpCalls) {
169 |         for (auto elem: callsToFunc) {
170 |           Function* F = elem.first;
171 |           int calls   = elem.second;
172 |           if (calls > 1) oprint(F->getName().str() << ": " << calls);
173 |         }
174 |       }
175 | 
176 |       if (DumpWeights) {
177 |         for (Function &F: M) {
178 |             if (F.isDeclaration())
179 |               continue;
180 | 
181 |             // start from root
182 |             const std::string &FName = F.getName().str();
183 |             std::set<Function*> visited;
184 |             if (FName == RootFunction) {
185 |               oprint(F.getName());
186 |               dumpWeights(&F, 0, visited);
187 |             }
188 |         }
189 |       }
190 | 
191 |       return false;
192 |     }
193 | 
194 |     void getAnalysisUsage(AnalysisUsage &AU) const override {
195 |         AU.addRequired<DominatorTreeWrapperPass>();
196 |         AU.addRequired<PostDominatorTreeWrapperPass>();
197 |     }
198 |   };
199 | 
200 | }
201 | 
202 | char FuncStatsPass::ID = 0;
203 | RegisterPass<FuncStatsPass> MP("func-stats", "FuncStats Pass");
204 | 
205 | 


--------------------------------------------------------------------------------
/passes/icp/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the ICP pass
2 | ROOT=../..
3 | 
4 | PASSNAME := icp
5 | OBJS := icp.o
6 | 
7 | include ../Makefile.inc
8 | include ../Makefile.svf.inc
9 | 


--------------------------------------------------------------------------------
/passes/icp/icp.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | #include "WPA/WPAPass.h"
  4 | #include "llvm/Transforms/Utils/CallPromotionUtils.h"
  5 | #include "llvm/IR/Intrinsics.h"
  6 | #include "llvm/IR/CFG.h"
  7 | #include "llvm/IR/IRBuilder.h"
  8 | 
  9 | using namespace llvm;
 10 | using namespace SVF;
 11 | 
 12 | #define DEBUG_TYPE "icp"
 13 | #define icpPassLog(M) LLVM_DEBUG(dbgs() << "ICPPass: " << M << "\n")
 14 | #define oprint(s) (dbgs() << s << "\n")
 15 | #define print(s) (errs() << s << "\n")
 16 | 
 17 | static cl::list<std::string>
 18 | Functions("icp-funcs",
 19 |     cl::desc("Specify all the comma-separated function regexes to icp"),
 20 |     cl::ZeroOrMore, cl::CommaSeparated, cl::NotHidden);
 21 | 
 22 | static cl::opt<bool>
 23 | VarArgOnly("icp-vararg-only",
 24 |     cl::desc("ICP only variadic calls"),
 25 |     cl::init(false), cl::NotHidden);
 26 | 
 27 | static cl::opt<bool>
 28 | Fallback("icp-fallback",
 29 |     cl::desc("Leave a fallback indirect call behind"),
 30 |     cl::init(false), cl::NotHidden);
 31 | 
 32 | static cl::opt<bool>
 33 | Abort("icp-abort",
 34 |     cl::desc("Leave an abort call for the default case"),
 35 |     cl::init(false), cl::NotHidden);
 36 | 
 37 | static cl::opt<bool>
 38 | TypeAnalysis("icp-type",
 39 |     cl::desc("Use faster type-based points-to analysis."),
 40 |     cl::init(false), cl::NotHidden);
 41 | 
 42 | static cl::opt<bool>
 43 | TypeAnalysisOpaquePtrs("icp-type-opaque-ptrs",
 44 |     cl::desc("Allow arbitrary ptr casts in type-based points-to analysis."),
 45 |     cl::init(true), cl::NotHidden);
 46 | 
 47 | static cl::opt<bool>
 48 | StrictSignature("icp-type-strict-signature",
 49 |     cl::desc("Only allow for exact function signature matches"),
 50 |     cl::init(true), cl::NotHidden);
 51 | 
 52 | static cl::opt<bool>
 53 | AliasSVFAnalysis("icp-alias",
 54 |     cl::desc("Use slower alias-based points-to analysis."),
 55 |     cl::init(false), cl::NotHidden);
 56 | 
 57 | static cl::opt<bool>
 58 | NoPromote("icp-no-promote",
 59 |     cl::desc("Don't promote indirect call, analyse only possible targets"),
 60 |     cl::init(false), cl::NotHidden);
 61 | 
 62 | namespace {
 63 | 
 64 |   class ICPPass : public ModulePass {
 65 | 
 66 |   public:
 67 |     static char ID;
 68 |     ICPPass() : ModulePass(ID) {}
 69 | 
 70 |     bool isCompatibleType(Type *T1, Type *T2) {
 71 |         // Check if 2 types are the same, tolerating void* (i8*) pointer casts.
 72 |         if (T1 == T2)
 73 |             return true;
 74 |         if (!T1->isPointerTy() || !T2->isPointerTy())
 75 |             return false;
 76 |         // If requested, be even more conservative (any pointer cast will do).
 77 |         if (TypeAnalysisOpaquePtrs)
 78 |             return true;
 79 |         return false;
 80 |     }
 81 | 
 82 |     bool csTypeAlias(CallSite &CS, Function *F) {
 83 |         // avoid stripping pointer casts, since we want the final called ptr type
 84 |         Value *V = CS.getCalledValue();
 85 |         FunctionType *FT= F->getFunctionType();
 86 |         FunctionType *CT= cast<FunctionType>(V->getType()->getContainedType(0));
 87 | 
 88 |         // Fast path: perfect type match.
 89 |         if (FT == CT)
 90 |             return true;
 91 | 
 92 |         // Return types have to match, unless the callsite doesn't care.
 93 |         if (!CT->getReturnType()->isVoidTy()
 94 |             && !isCompatibleType(CT->getReturnType(), FT->getReturnType()))
 95 |             return false;
 96 | 
 97 |         // Match #arguments and #parameters (account for variadic functions).
 98 |         if (CS.arg_size() < FT->getNumParams())
 99 |             return false;
100 |         // Accept the case when the CallSite has more params than the function if not strict
101 |         if (StrictSignature)
102 |             if (CS.arg_size() > FT->getNumParams() && !F->isVarArg())
103 |                 return false;
104 | 
105 |         unsigned int max_args = StrictSignature ? CS.arg_size() : FT->getNumParams();
106 | 
107 |         // Make sure each argument has compatible type with corresponding param.
108 |         for (unsigned i=0; i<max_args; i++) {
109 |             Type *PT = i < FT->getNumParams() ? FT->getParamType(i) : NULL;
110 |             if (!PT)
111 |                 break;
112 |             if (!isCompatibleType(PT, CS.getArgument(i)->getType()))
113 |                 return false;
114 |         }
115 | 
116 |         return true;
117 |     }
118 | 
119 |     // Check if the signature of the CallSite is compatible with calling the function F
120 |     bool isSignatureCompatible(CallSite &CS, Function *F) {
121 |         // avoid stripping pointer casts, since we want the final called ptr type
122 |         Value *V = CS.getCalledValue();
123 |         FunctionType *FT= F->getFunctionType();
124 |         FunctionType *CT= cast<FunctionType>(V->getType()->getContainedType(0));
125 | 
126 |         // Fast path: perfect type match.
127 |         if (FT == CT)
128 |             return true;
129 | 
130 |         // Return types have to match, unless the callsite doesn't care.
131 |         if (!CT->getReturnType()->isVoidTy()
132 |             && !isCompatibleType(CT->getReturnType(), FT->getReturnType()))
133 |             return false;
134 | 
135 |         // Match #arguments and #parameters
136 |         if (CS.arg_size() < FT->getNumParams())
137 |             return false;
138 |         
139 |         // Accept the case when the CallSite has more params than the function
140 |         return true;
141 |     }
142 | 
143 |     void getIndirectCallees(Module *M, CallSite &CS, std::vector<Function*> &callees, WPAPass *wpa) {
144 |         // Grab functions that may alias value at the callsite
145 |         Value *V = CS.getCalledValue()->stripPointerCasts();
146 |         for (auto &F : M->getFunctionList()) {
147 |             if (!F.hasAddressTaken())
148 |                 continue;
149 |             if (VarArgOnly && Fallback && !F.isVarArg())
150 |                 continue;
151 | 
152 |             if (AliasSVFAnalysis && TypeAnalysis) {
153 |                 if (csTypeAlias(CS, &F) && wpa->alias(V, &F))
154 |                     callees.push_back(&F);
155 |                 continue;
156 |             }
157 | 
158 |             // Use points-to analysis if requested
159 |             if (!TypeAnalysis) {
160 |                 if (isSignatureCompatible(CS, &F) && wpa->alias(V, &F))
161 |                     callees.push_back(&F);
162 |                 continue;
163 |             }
164 | 
165 |             // Or faster callsite type-based analysis otherwise
166 |             if (csTypeAlias(CS, &F))
167 |                 callees.push_back(&F);
168 |         }
169 |     }
170 | 
171 |     Instruction *wrapPromoteCallWithIfThenElse(llvm::CallSite CS, llvm::Function *Callee, llvm::MDNode *BranchWeights = (llvm::MDNode *)nullptr) {
172 |         FunctionType *FT= Callee->getFunctionType();
173 |         Instruction * newI = promoteCallWithIfThenElse(CS, Callee);
174 |         assert(newI);
175 |         CallBase *newCI = dyn_cast<CallBase>(newI);
176 |         assert(newCI);
177 | 
178 |         // If the new function accepts less arguments than the callsite trim them
179 |         if (newCI->arg_size() > FT->getNumParams()) {
180 |             std::vector<Value*> args;
181 |             for (auto &arg: newCI->args()) {
182 |                 if (args.size() >= FT->getNumParams()) break;
183 |                 args.push_back(arg);
184 |             }
185 |             CallInst *fixedCI = CallInst::Create(newCI->getCalledValue()->stripPointerCasts(), args, "", newCI);
186 |             fixedCI->setDebugLoc(newCI->getDebugLoc());
187 |             newCI->replaceAllUsesWith(fixedCI);
188 |             newCI->eraseFromParent();
189 |             return fixedCI;
190 |         }
191 | 
192 |         return newI;
193 |     }
194 | 
195 |     void promoteIndirectCall(Function *F, Instruction *I, WPAPass *wpa) {
196 |         Module* M = F->getParent();
197 |         LLVMContext& C = M->getContext();
198 | 
199 |         // retrieve the errx function
200 |         std::vector<Type *> args;
201 |         args.push_back(Type::getInt32Ty(C));
202 |         args.push_back(Type::getInt8PtrTy(C));
203 |         FunctionType *FT = FunctionType::get(Type::getVoidTy(C), args, true);
204 |         FunctionCallee _errx = M->getOrInsertFunction("errx", FT);
205 |         assert(_errx);
206 |         Function *ErrxF = dyn_cast<Function>(_errx.getCallee());
207 |         assert(ErrxF);
208 | 
209 |         oprint("Promoting indirect call: " << *I << " in " << F->getName().str());
210 |         // Get indirect callees
211 |         CallSite CS(I);
212 |         std::vector<Function*> callees;
213 |         getIndirectCallees(F->getParent(), CS, callees, wpa);
214 |         if (callees.empty()) {
215 |             // For now we fail if we are not using the type analysis, since we may
216 |             // are using SVF wrongly:
217 |             // https://github.com/SVF-tools/SVF/issues/280
218 |             if (Abort) {
219 |                 // insert an abort call in place of the indirect default call
220 |                 Instruction *OldCall = CS.getInstruction();
221 |                 BasicBlock* ThisBB = CS.getInstruction()->getParent();
222 | 
223 |                 // replace the return value of the call with undefined
224 |                 OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
225 | 
226 |                 // add the call to the errx function
227 |                 std::vector<Value*> args;
228 |                 args.push_back( ConstantInt::get(Type::getInt32Ty(C), 0));
229 |                 std::string str = "ICP UNREACHABLE";
230 |                 llvm::IRBuilder<> builder(ThisBB);
231 |                 static Value* error_string = builder.CreateGlobalStringPtr(StringRef(str));
232 |                 args.push_back(error_string);
233 |                 CallInst *CI = CallInst::Create(ErrxF, args, "",OldCall);
234 |                 CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
235 | 
236 |                 // remove the old call and the branch to leave unreachable instr
237 |                 OldCall->eraseFromParent();
238 |             }
239 |             oprint("No callees available");
240 |             return;
241 |         }
242 |         oprint(callees.size() << " callees possible");
243 |         for (auto Callee : callees) {
244 |             oprint("possible callee: " << Callee->getName().str());
245 |         }
246 |         if (NoPromote) return;
247 | 
248 |         // Check if we should only promote indirect calls to variadic functions.
249 |         if (VarArgOnly) {
250 |             bool hasVarArgCallee = false;
251 |             for (auto Callee : callees) {
252 |                 if (Callee->isVarArg())
253 |                     hasVarArgCallee = true;
254 |             }
255 |             if (!hasVarArgCallee)
256 |                 return;
257 |         }
258 | 
259 |         // Promote with or without indirect call fallback.
260 |         Function *lastCallee = NULL;
261 |         for (auto Callee : callees) {
262 |             if (lastCallee)
263 |                 wrapPromoteCallWithIfThenElse(CS, lastCallee);
264 |             lastCallee = Callee;
265 |         }
266 |         if (Fallback) {
267 |             wrapPromoteCallWithIfThenElse(CS, lastCallee);
268 |             CS.addAttribute(AttributeList::FunctionIndex, Attribute::NoRecurse);
269 |         }
270 |         else if (Abort) {
271 |             // create the last branch with the remaining indirect call
272 |             wrapPromoteCallWithIfThenElse(CS, lastCallee);
273 | 
274 |             // insert an abort call in place of the indirect default call
275 |             Instruction *OldCall = CS.getInstruction();
276 |             BasicBlock* ThisBB = CS.getInstruction()->getParent();
277 |             Instruction* LastI = ThisBB->getTerminator();
278 |             UnreachableInst* UI = new UnreachableInst(C, LastI);
279 | 
280 |             // remove the values coming from the phi nodes of the successors
281 |             for (BasicBlock* SuccBB: successors(ThisBB)) {
282 |                 for (PHINode &Phi: SuccBB->phis()) {
283 |                     Phi.removeIncomingValue(ThisBB);
284 |                 }
285 |             }
286 | 
287 |             // replace the return value of the call with undefined
288 |             OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
289 | 
290 |             // add the call to the errx function
291 |             std::vector<Value*> args;
292 |             args.push_back( ConstantInt::get(Type::getInt32Ty(C), 0));
293 |             std::string str = "ICP UNREACHABLE";
294 |             llvm::IRBuilder<> builder(ThisBB);
295 |             static Value* error_string = builder.CreateGlobalStringPtr(StringRef(str));
296 |             args.push_back(error_string);
297 |             CallInst *CI = CallInst::Create(ErrxF, args, "",OldCall);
298 |             CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
299 | 
300 |             // remove the old call and the branch to leave unreachable instr
301 |             OldCall->eraseFromParent();
302 |             LastI->eraseFromParent();
303 |             assert(ThisBB->getTerminator() == UI);
304 |         } else {
305 |             promoteCall(CS, lastCallee);
306 |         }
307 |     }
308 | 
309 |     std::string getLocation(Instruction &I) {
310 | 
311 |         if (DILocation *Loc = I.getDebugLoc()) {
312 |             unsigned Line = Loc->getLine();
313 |             unsigned Col  = Loc->getColumn();
314 |             StringRef File = Loc->getFilename();
315 |             DILocation *InlineLoc = Loc->getInlinedAt();
316 |             DILocalScope *Scope = Loc->getScope();
317 |             // not worth
318 |             if (Line == 0 && Col == 0 && !InlineLoc) {print(*Scope); assert(false); return "";}
319 |             if (!InlineLoc)
320 |                 return "file: " + File.str() + ", line: " + std::to_string(Line) + ", col:" + std::to_string(Col);
321 |             else {
322 |                 unsigned InLine = InlineLoc->getLine();
323 |                 unsigned InCol  = InlineLoc->getColumn();
324 |                 StringRef InFile = InlineLoc->getFilename();
325 |                 return "file: " + File.str() + ", line: " + std::to_string(Line) + ", col:" + std::to_string(Col) +
326 |                     ", inlined at: " + InFile.str() + ", line: " + std::to_string(InLine) + ", col:" + std::to_string(InCol);
327 |             }
328 |         } else {
329 |             assert(false);
330 |             // No location metadata available
331 |             return "";
332 |         }
333 |     }
334 | 
335 |     void dumpICFG(Function *F, WPAPass *wpa) {
336 |         print("- function: " <<  F->getName());
337 |         print(F->getSection());
338 |         print(F->getSectionPrefix());
339 |         for (BasicBlock &BB: *F) {
340 |             for (Instruction &I : BB) {
341 |                 // Gather all call bases
342 |                 if (CallBase * CB = dyn_cast<CallBase>(&I)) {
343 | 
344 |                     // Only if they represent indirect calls to functions
345 |                     if (CB->isInlineAsm()) continue;
346 |                     Function *Called = dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
347 |                     if (Called) continue;
348 |                     
349 |                     CallSite CS(&I);
350 |                     std::vector<Function*> callees;
351 |                     getIndirectCallees(F->getParent(), CS, callees, wpa);
352 |                     print("    - " << I);
353 |                     print("    - " << getLocation(I));
354 |                     for(Function *callee: callees) {
355 |                         print("        - " << callee->getName());
356 |                     }
357 |                 }
358 |             }
359 |         }
360 |     }
361 | 
362 |     void icp(Function *F, WPAPass *wpa) {
363 |         std::vector<Instruction *> indirectCalls;
364 |         // dumpICFG(F, wpa);
365 | 
366 |         // Collect indirect calls.
367 |         for (auto &BB : *F)
368 |         for (auto &I : BB) {
369 |             CallSite CS(&I);
370 |             if (!CS.getInstruction() || CS.isInlineAsm())
371 |                 continue;
372 |             if (isa<Function>(CS.getCalledValue()->stripPointerCasts()))
373 |                 continue;
374 |             indirectCalls.push_back(&I);
375 |         }
376 | 
377 |         // Promote.
378 |         for (auto I : indirectCalls) {
379 |             promoteIndirectCall(F, I, wpa);
380 |         }
381 |     }
382 | 
383 |     virtual bool runOnModule(Module &M) {
384 |         icpPassLog("Running...");
385 |         assert(!(Abort && Fallback) && 
386 |             "Only a mode between icp-unreachable and icp-fallback can be selected");
387 |         SVFModule* svfModule = LLVMModuleSet::getLLVMModuleSet()->buildSVFModule(M);
388 |         WPAPass *wpa = NULL;
389 |         assert(AliasSVFAnalysis || TypeAnalysis);
390 |         if (AliasSVFAnalysis) {
391 |             wpa = new WPAPass();
392 |             wpa->runOnModule(svfModule);
393 |         }
394 | 
395 |         std::vector<Regex*> FunctionRegexes;
396 |         if (Functions.empty())
397 |             Functions.push_back(".*");
398 |         passListRegexInit(FunctionRegexes, Functions);
399 | 
400 |         // ICP all the functions in the module.
401 |         for (auto &F : M.getFunctionList()) {
402 |             if (F.isDeclaration())
403 |                 continue;
404 |             const std::string &FName = F.getName();
405 |             if (!passListRegexMatch(FunctionRegexes, FName))
406 |                 continue;
407 |             icp(&F, wpa);
408 |         }
409 | 
410 |         return true;
411 |     }
412 |   };
413 | 
414 | }
415 | 
416 | char ICPPass::ID = 0;
417 | RegisterPass<ICPPass> MP("icp", "ICP Pass");
418 | 


--------------------------------------------------------------------------------
/passes/include/common/cgc_magics.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CGC_MAGICS_H
 2 | #define _CGC_MAGICS_H
 3 | 
 4 | #define CGC_ROOT_ATTR "cgc_root"
 5 | #define CGC_CLONE_CALL_ATTR "cgc_clone_call"
 6 | #define CGC_CLONE_PRIORITY "cgc_clone_priority"
 7 | #define CGC_CLONE_NEVER "cgc_clone_never"
 8 | #define CGC_CLONE_MARK "__cgc_clone_"
 9 | 
10 | #endif /* _CGC_MAGICS_H */
11 | 


--------------------------------------------------------------------------------
/passes/include/common/pass.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PASS_H
 2 | #define _PASS_H
 3 | 
 4 | #include <llvm/Pass.h>
 5 | #include <llvm/Support/raw_ostream.h>
 6 | #include <llvm/Analysis/AliasAnalysis.h>
 7 | 
 8 | #include <llvm/Support/Debug.h>
 9 | #include <llvm/Transforms/Utils/Cloning.h>
10 | #include <llvm/ADT/Statistic.h>
11 | 
12 | #include <llvm/Support/Regex.h>
13 | #include <llvm/Support/CommandLine.h>
14 | #include <llvm/Analysis/LoopInfo.h>
15 | 
16 | #include <llvm/Transforms/Utils/Local.h>
17 | #include <llvm/Transforms/Scalar.h>
18 | 
19 | #include <llvm/IR/InstIterator.h>
20 | #include <llvm/IR/DebugInfoMetadata.h>
21 | 
22 | #include <cstdio>
23 | #include <cstdlib>
24 | #include <iostream>
25 | #include <sstream>
26 | #include <string>
27 | 
28 | using namespace llvm;
29 | 
30 | static inline void passListRegexInit(std::vector<Regex*> &regexes, const std::vector<std::string> &strings)
31 | {
32 |     for (auto &s : strings)
33 |         regexes.push_back(new Regex(s, 0));
34 | }
35 | 
36 | static inline bool passListRegexMatch(const std::vector<Regex*> &regexes, const std::string &string)
37 | {
38 |     for (auto &regex : regexes) {
39 |     	  if (regex->match(string))
40 |     	  		  return true;
41 |     }
42 |     
43 |     return false;
44 | }
45 | 
46 | #endif /* _PASS_H */
47 | 


--------------------------------------------------------------------------------
/passes/include/sdag/sdag-print.h:
--------------------------------------------------------------------------------
 1 | #ifndef SDAG_PRINT_H
 2 | #define SDAG_PRINT_H
 3 | 
 4 | #include "sdag.h"
 5 | 
 6 | #include <pass.h>
 7 | 
 8 | #include "llvm/Support/GraphWriter.h"
 9 | 
10 | namespace llvm {
11 | 
12 | template<>
13 | struct DOTGraphTraits<const SDAG*> : public DefaultDOTGraphTraits {
14 | 
15 |   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
16 | 
17 |   static std::string getGraphName(const SDAG *sdag) {
18 |     return "SDAG for '" + sdag->getFunction()->getName().str() + "' function";
19 |   }
20 | 
21 |   std::string getNodeLabel(const SDAGNode *Node,
22 |                            const SDAG *Graph) {
23 | 	return Node->getLabel(!isSimple());
24 |   }
25 | 
26 |   std::string getNodeAttributes(const SDAGNode *Node,
27 |                            const SDAG *Graph) {
28 |     std::string str;
29 |     if (!Node->isSpecial())
30 |     	return str;
31 |     raw_string_ostream OS(str);
32 |     OS << "color=\"red\"";
33 |     return OS.str();
34 |   }
35 | 
36 | };
37 | } // End llvm namespace
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/passes/include/sdag/sdag.h:
--------------------------------------------------------------------------------
  1 | #ifndef SDAG_H
  2 | #define SDAG_H
  3 | 
  4 | #include "llvm/ADT/GraphTraits.h"
  5 | #include "llvm/ADT/iterator.h"
  6 | #include "llvm/ADT/iterator_range.h"
  7 | #include "llvm/IR/BasicBlock.h"
  8 | #include "llvm/IR/Function.h"
  9 | #include "llvm/IR/InstrTypes.h"
 10 | #include "llvm/IR/Value.h"
 11 | #include "llvm/Support/Casting.h"
 12 | #include "llvm/Support/type_traits.h"
 13 | #include "llvm/Analysis/MemorySSA.h"
 14 | #include "llvm/IR/IntrinsicInst.h"
 15 | #include <cassert>
 16 | #include <cstddef>
 17 | #include <iterator>
 18 | #include <set>
 19 | 
 20 | namespace llvm {
 21 | 
 22 | class SDAGNode;
 23 | 
 24 | class SDAG {
 25 | private:
 26 | 	static std::map<Function*, SDAG*> objMap;
 27 | 	static MemorySSA *MSSA;
 28 | 	static AAResults *AA;
 29 | 	
 30 | 	Function *function;
 31 | 	SDAGNode *root;
 32 | 	std::set<SDAGNode*> nodes;
 33 | 	std::map<Value*, SDAGNode*> nodeMap;
 34 | 	SDAG(Function *F) { this->function = F; }
 35 | 	
 36 | 	void build();
 37 | 	bool buildFromNode(SDAGNode* node);
 38 | 	void reachingMemDefs(Instruction *I, std::vector<Value*> &reachingDefs);
 39 | 	SDAGNode* newSuccNode(SDAGNode *parent, Value *V);
 40 | public:
 41 | 	static SDAG* get(Function *F, MemorySSA *MSSA, AAResults *AA);
 42 | 	
 43 | 	void print(raw_ostream &OS, SDAGNode *node, bool verbFmt=false) const;
 44 | 	void print(raw_ostream &OS, bool verbFmt=false) const { print(OS, root, verbFmt); }
 45 | 	SDAGNode *getRoot() const { return root; }
 46 | 	Function *getFunction() const { return function; }
 47 | 	const std::set<SDAGNode*>& getNodes() const { return nodes; }
 48 | 	void foldNodesByOpcode(unsigned opcode);
 49 | };
 50 | 
 51 | class SDAGNode {
 52 | protected:
 53 | 	SDAG *sdag;
 54 | 	Value *value;
 55 | 	std::vector<SDAGNode*> successors;
 56 | 	std::vector<SDAGNode*> parents;
 57 | 
 58 | public:
 59 | 	SDAGNode(SDAG *sdag, Value *value) {
 60 | 		this->sdag = sdag;
 61 | 		this->value = value;
 62 | 	}
 63 | 	void addSuccessor(SDAGNode *node);
 64 | 	void delSuccessor(SDAGNode *node);
 65 | 	void fold();
 66 | 
 67 | 	std::string getLabel(bool verbFmt=false) const;
 68 | 	bool isSpecial() const;
 69 | 	Value *getValue() const { return value; }
 70 | 	SDAG *getSDAG() const { return sdag; }
 71 | 	Function *getFunction() const { return sdag->getFunction(); }
 72 | 	unsigned getNumSuccessors() const { return successors.size(); };
 73 | 	unsigned getNumParents() const { return getParents().size(); };
 74 | 	const std::vector<SDAGNode*>& getSuccessors() const { return successors; };
 75 | 	const std::vector<SDAGNode*>& getParents() const { return parents; };
 76 | };
 77 | 
 78 | //===----------------------------------------------------------------------===//
 79 | // SDAGNode succ_iterator helpers
 80 | //===----------------------------------------------------------------------===//
 81 |  
 82 | template <class NodeT, class SuccNodeT>
 83 | class SuccIteratorx
 84 |     : public iterator_facade_base<SuccIteratorx<NodeT, SuccNodeT>,
 85 |                                   std::random_access_iterator_tag, SuccNodeT, int,
 86 |                                   SuccNodeT *, SuccNodeT *> {
 87 | public:
 88 |   using difference_type = int;
 89 |   using pointer = SuccNodeT *;
 90 |   using reference = SuccNodeT *;
 91 | 
 92 | private:
 93 |   NodeT *Node;
 94 |   int Idx;
 95 |   using Self = SuccIteratorx<NodeT, SuccNodeT>;
 96 | 
 97 |   inline bool index_is_valid(int Idx) {
 98 |     return Idx >= 0 && Idx <= (int)Node->getNumSuccessors();
 99 |   }
100 | 
101 |   /// Proxy object to allow write access in operator[]
102 |   class SuccessorProxy {
103 |     Self It;
104 | 
105 |   public:
106 |     explicit SuccessorProxy(const Self &It) : It(It) {}
107 | 
108 |     SuccessorProxy(const SuccessorProxy &) = default;
109 | 
110 |     SuccessorProxy &operator=(SuccessorProxy RHS) {
111 |       *this = reference(RHS);
112 |       return *this;
113 |     }
114 | 
115 |     SuccessorProxy &operator=(reference RHS) {
116 |       It.Node->setSuccessor(It.Idx, RHS);
117 |       return *this;
118 |     }
119 | 
120 |     operator reference() const { return *It; }
121 |   };
122 | 
123 | public:
124 |   // begin iterator
125 |   explicit inline SuccIteratorx(NodeT *Node) : Node(Node), Idx(0) {}
126 |   // end iterator
127 |   inline SuccIteratorx(NodeT *Node, bool) : Node(Node) {
128 |     Idx = Node->getNumSuccessors();
129 |   }
130 | 
131 |   /// This is used to interface between code that wants to
132 |   /// operate on terminator instructions directly.
133 |   int getSuccessorIndex() const { return Idx; }
134 | 
135 |   inline bool operator==(const Self &x) const { return Idx == x.Idx; }
136 | 
137 |   inline SuccNodeT *operator*() const { return Node->getSuccessors()[Idx]; }
138 | 
139 |   inline SuccNodeT *operator->() const { return operator*(); }
140 | 
141 |   inline bool operator<(const Self &RHS) const {
142 |     assert(Node == RHS.Node && "Cannot compare iterators of different nodes!");
143 |     return Idx < RHS.Idx;
144 |   }
145 | 
146 |   int operator-(const Self &RHS) const {
147 |     assert(Node == RHS.Node && "Cannot compare iterators of different nodes!");
148 |     return Idx - RHS.Idx;
149 |   }
150 | 
151 |   inline Self &operator+=(int RHS) {
152 |     int NewIdx = Idx + RHS;
153 |     assert(index_is_valid(NewIdx) && "Iterator index out of bound");
154 |     Idx = NewIdx;
155 |     return *this;
156 |   }
157 | 
158 |   inline Self &operator-=(int RHS) { return operator+=(-RHS); }
159 | 
160 |   // Specially implement the [] operation using a proxy object to support
161 |   // assignment.
162 |   inline SuccessorProxy operator[](int Offset) {
163 |     Self TmpIt = *this;
164 |     TmpIt += Offset;
165 |     return SuccessorProxy(TmpIt);
166 |   }
167 | 
168 |   /// Get the source NodeT of this iterator.
169 |   inline SuccNodeT *getSource() {
170 |     return Node;
171 |   }
172 | };
173 | 
174 | //===----------------------------------------------------------------------===//
175 | // SDAGNode succ_iterator helpers
176 | //===----------------------------------------------------------------------===//
177 | 
178 | using sdagn_succ_iterator =
179 |     SuccIteratorx<SDAGNode, SDAGNode>;
180 | using sdagn_succ_const_iterator =
181 |     SuccIteratorx<const SDAGNode, const SDAGNode>;
182 | 
183 | inline sdagn_succ_iterator sdagn_succ_begin(SDAGNode *N) {
184 |   return sdagn_succ_iterator(N);
185 | }
186 | inline sdagn_succ_const_iterator sdagn_succ_begin(const SDAGNode *N) {
187 |   return sdagn_succ_const_iterator(N);
188 | }
189 | inline sdagn_succ_iterator sdagn_succ_end(SDAGNode *N) {
190 |   return sdagn_succ_iterator(N, true);
191 | }
192 | inline sdagn_succ_const_iterator sdagn_succ_end(const SDAGNode *N) {
193 |   return sdagn_succ_const_iterator(N, true);
194 | }
195 | 
196 | //===--------------------------------------------------------------------===//
197 | // GraphTraits specializations for SDAGs
198 | //===--------------------------------------------------------------------===//
199 | 
200 | // Provide specializations of GraphTraits to be able to treat a function as a
201 | // graph of SDAG Nodes...
202 | 
203 | template <> struct GraphTraits<SDAGNode*> {
204 |   using NodeRef = SDAGNode *;
205 |   using ChildIteratorType = sdagn_succ_iterator;
206 | 
207 |   static NodeRef getEntryNode(NodeRef N) { return N; }
208 |   static ChildIteratorType child_begin(NodeRef N) { return sdagn_succ_begin(N); }
209 |   static ChildIteratorType child_end(NodeRef N) { return sdagn_succ_end(N); }
210 | };
211 | 
212 | template <> struct GraphTraits<const SDAGNode*> {
213 |   using NodeRef = const SDAGNode *;
214 |   using ChildIteratorType = sdagn_succ_const_iterator;
215 | 
216 |   static NodeRef getEntryNode(const NodeRef N) { return N; }
217 | 
218 |   static ChildIteratorType child_begin(NodeRef N) { return sdagn_succ_begin(N); }
219 |   static ChildIteratorType child_end(NodeRef N) { return sdagn_succ_end(N); }
220 | };
221 | 
222 | //===--------------------------------------------------------------------===//
223 | // GraphTraits specializations for function SDAGs
224 | //===--------------------------------------------------------------------===//
225 | 
226 | // Provide specializations of GraphTraits to be able to treat a SDAG as a
227 | // graph of SDAG nodes...
228 | //
229 | template <> struct GraphTraits<SDAG*> : public GraphTraits<SDAGNode*> {
230 |   static NodeRef getEntryNode(SDAG *sdag) { return sdag->getRoot(); }
231 | 
232 |   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
233 |   using nodes_iterator = std::set<SDAGNode*>::iterator;
234 | 
235 |   static nodes_iterator nodes_begin(SDAG *sdag) {
236 |     return nodes_iterator(sdag->getNodes().begin());
237 |   }
238 | 
239 |   static nodes_iterator nodes_end(SDAG *sdag) {
240 |     return nodes_iterator(sdag->getNodes().end());
241 |   }
242 | 
243 |   static size_t size(SDAG *sdag) { return sdag->getNodes().size(); }
244 | };
245 | 
246 | template <> struct GraphTraits<const SDAG*> :
247 |   public GraphTraits<const SDAGNode*> {
248 |   static NodeRef getEntryNode(const SDAG *sdag) { return sdag->getRoot(); }
249 | 
250 |   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
251 |   using nodes_iterator = std::set<SDAGNode*>::iterator;
252 | 
253 |   static nodes_iterator nodes_begin(const SDAG *sdag) {
254 |     return nodes_iterator(sdag->getNodes().begin());
255 |   }
256 | 
257 |   static nodes_iterator nodes_end(const SDAG *sdag) {
258 |     return nodes_iterator(sdag->getNodes().end());
259 |   }
260 | 
261 |   static size_t size(const SDAG *sdag) { return sdag->getNodes().size(); }
262 | };
263 | 
264 | class SDAGWrapperPass : public FunctionPass {
265 | public:
266 |   static char ID;
267 |   SDAGWrapperPass() : FunctionPass(ID) {}
268 | 
269 |   bool runOnFunction(Function &F) override {
270 |   	auto MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
271 |   	auto AAResults = &getAnalysis<AAResultsWrapperPass>().getAAResults();
272 |   	sdag = SDAG::get(&F, MSSA, AAResults);
273 |     return false;
274 |   }
275 |   void print(raw_ostream &OS, const Module* = nullptr) const override {}
276 |   SDAG *getSDAG() const { return sdag; }
277 | 
278 |   void getAnalysisUsage(AnalysisUsage &AU) const override {
279 |     AU.addRequired<MemorySSAWrapperPass>();
280 |     AU.addRequired<AAResultsWrapperPass>();
281 |     AU.setPreservesAll();
282 |   }
283 | private:
284 |   SDAG *sdag;
285 | };
286 | 
287 | } // end namespace llvm
288 | 
289 | #endif // SDAG_H
290 | 


--------------------------------------------------------------------------------
/passes/include/svfa/SVFAPass.h:
--------------------------------------------------------------------------------
  1 | //===- SVFAPass.h -- Whole program analysis------------------------------------//
  2 | //
  3 | //                     SVF: Static Value-Flow Analysis
  4 | //
  5 | // Copyright (C) <2013-2017>  <Yulei Sui>
  6 | //
  7 | 
  8 | // This program is free software: you can redistribute it and/or modify
  9 | // it under the terms of the GNU General Public License as published by
 10 | // the Free Software Foundation, either version 3 of the License, or
 11 | // (at your option) any later version.
 12 | 
 13 | // This program is distributed in the hope that it will be useful,
 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | // GNU General Public License for more details.
 17 | 
 18 | // You should have received a copy of the GNU General Public License
 19 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | //
 21 | //===----------------------------------------------------------------------===//
 22 | 
 23 | 
 24 | /*
 25 |  * @file: SVFA.h
 26 |  * @author: yesen
 27 |  * @date: 10/06/2014
 28 |  * @version: 1.0
 29 |  *
 30 |  * @section LICENSE
 31 |  *
 32 |  * @section DESCRIPTION
 33 |  *
 34 |  */
 35 | 
 36 | 
 37 | #ifndef SVFA_H_
 38 | #define SVFA_H_
 39 | 
 40 | #include "MemoryModel/PointerAnalysis.h"
 41 | 
 42 | class SVFModule;
 43 | class SVFG;
 44 | 
 45 | /*!
 46 |  * Whole program pointer analysis.
 47 |  * This class performs various pointer analysis on the given module.
 48 |  */
 49 | // excised ", public llvm::AliasAnalysis" as that has a very light interface
 50 | // and I want to see what breaks.
 51 | class SVFAPass: public ModulePass {
 52 |     typedef std::vector<PointerAnalysis*> PTAVector;
 53 | 
 54 | public:
 55 |     /// Pass ID
 56 |     static char ID;
 57 | 
 58 |     enum AliasCheckRule {
 59 |         Conservative,	///< return MayAlias if any pta says alias
 60 |         Veto,			///< return NoAlias if any pta says no alias
 61 |         Precise			///< return alias result by the most precise pta
 62 |     };
 63 | 
 64 |     /// Constructor needs TargetLibraryInfo to be passed to the AliasAnalysis
 65 |     SVFAPass() : ModulePass(ID) {
 66 | 
 67 |     }
 68 | 
 69 |     /// Destructor
 70 |     ~SVFAPass();
 71 | 
 72 |     /// LLVM analysis usage
 73 |     virtual inline void getAnalysisUsage(AnalysisUsage &au) const {
 74 |         // declare your dependencies here.
 75 |         /// do not intend to change the IR in this pass,
 76 |         au.setPreservesAll();
 77 |     }
 78 | 
 79 |     /// Get adjusted analysis for alias analysis
 80 |     virtual inline void* getAdjustedAnalysisPointer(AnalysisID id) {
 81 |         return this;
 82 |     }
 83 | 
 84 |     /// Interface expose to users of our pointer analysis, given Location infos
 85 |     virtual inline AliasResult alias(const MemoryLocation  &LocA, const MemoryLocation  &LocB) {
 86 |         return alias(LocA.Ptr, LocB.Ptr);
 87 |     }
 88 | 
 89 |     /// Interface expose to users of our pointer analysis, given Value infos
 90 |     virtual AliasResult alias(const Value* V1,	const Value* V2);
 91 | 
 92 |     /// Print all alias pairs
 93 |     virtual void PrintAliasPairs(PointerAnalysis* pta);
 94 | 
 95 |     /// Interface of mod-ref analysis to determine whether a CallSite instruction can mod or ref any memory location
 96 |     virtual ModRefInfo getModRefInfo(const CallInst* callInst);
 97 | 
 98 |     /// Interface of mod-ref analysis to determine whether a CallSite instruction can mod or ref a specific memory location, given Location infos
 99 |     virtual inline ModRefInfo getModRefInfo(const CallInst* callInst, const MemoryLocation& Loc) {
100 |         return getModRefInfo(callInst, Loc.Ptr);
101 |     }
102 | 
103 |     /// Interface of mod-ref analysis to determine whether a CallSite instruction can mod or ref a specific memory location, given Value infos
104 |     virtual ModRefInfo getModRefInfo(const CallInst* callInst, const Value* V);
105 | 
106 |     /// Interface of mod-ref analysis between two CallSite instructions
107 |     virtual ModRefInfo getModRefInfo(const CallInst* callInst1, const CallInst* callInst2);
108 | 
109 |     /// We start from here
110 |     virtual bool runOnModule(llvm::Module& module) {
111 |         SVFModule svfModule(module);
112 |         runOnModule(svfModule);
113 |         return false;
114 |     }
115 | 
116 |     /// Run pointer analysis on SVFModule
117 |     void runOnModule(SVFModule svfModule);
118 | 
119 |     void dumpCalleeStats(llvm::Module *M);
120 |     unsigned getCaleeCount(llvm::Module *M, llvm::CallSite &CS);
121 |     unsigned getCaleeTBCount(llvm::Module *M, llvm::CallSite &CS);
122 |     bool hasAddressTaken(const llvm::Function *F);
123 | 
124 |     /// PTA name
125 |     virtual inline StringRef getPassName() const {
126 |         return "SVFAPass";
127 |     }
128 | 
129 | private:
130 |     /// Create pointer analysis according to specified kind and analyze the module.
131 |     void runPointerAnalysis(SVFModule svfModule, u32_t kind);
132 | 
133 |     PTAVector ptaVector;	///< all pointer analysis to be executed.
134 |     PointerAnalysis* _pta;	///<  pointer analysis to be executed.
135 |     SVFG* _svfg;  ///< svfg generated through -ander pointer analysis
136 | };
137 | 
138 | 
139 | #endif /* SVFA_H_ */
140 | 


--------------------------------------------------------------------------------
/passes/set-norecurse-ext/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for the remove-unreachable pass
2 | ROOT=../..
3 | 
4 | PASSNAME := set-norecurse-ext
5 | OBJS := set-norecurse-ext.o
6 | 
7 | include ../Makefile.inc
8 | 
9 | 


--------------------------------------------------------------------------------
/passes/set-norecurse-ext/set-norecurse-ext.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <pass.h>
  3 | 
  4 | #include "llvm/IR/Module.h"
  5 | #include "llvm/IR/IRBuilder.h"
  6 | #include "llvm/IR/Intrinsics.h"
  7 | #include "llvm/IR/IntrinsicInst.h"
  8 | #include "llvm/Analysis/CallGraph.h"
  9 | #include "llvm/Analysis/CallGraphSCCPass.h"
 10 | #include "llvm/ADT/SCCIterator.h"
 11 | #include "llvm/IR/CFG.h"
 12 | #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 13 | #include "llvm/Analysis/AssumptionCache.h"
 14 | #include "llvm/IR/Dominators.h"
 15 | #include "llvm/Transforms/Utils/CodeExtractor.h"
 16 | #include "llvm/Analysis/LoopPass.h"
 17 | #include "llvm/Analysis/LoopInfo.h"
 18 | #include <set>
 19 | using namespace llvm;
 20 | 
 21 | #define DEBUG_TYPE "set-norecurse-ext"
 22 | #define setNoRecursExtPassLog(M) LLVM_DEBUG(dbgs() << "setNoRecursExtPass: " << M << "\n")
 23 | 
 24 | #define oprint setNoRecursExtPassLog
 25 | 
 26 | typedef long imd_t;
 27 | 
 28 | // This pass sets the norecurse attribute to all the external functions that we
 29 | // can guess they not recurse back to the program in any way.
 30 | // Since we are calling an external function the only way they could recurse back 
 31 | // in the module, or call a recursive function in it, is by
 32 | // passing a callback pointer to them, so check that
 33 | namespace {
 34 | 
 35 |   class SetNoRecursExtPass : public ModulePass {
 36 | 
 37 |   public:
 38 |     static char ID;
 39 |     SetNoRecursExtPass() : ModulePass(ID) {
 40 |     }
 41 | 
 42 |     bool  isFunctionPointerType(Type *type){
 43 |         // Check the type here
 44 |         if(PointerType *pointerType=dyn_cast<PointerType>(type)){
 45 |             return isFunctionPointerType(pointerType->getElementType());
 46 |         }
 47 |             //Exit Condition
 48 |             else if(type->isFunctionTy()){
 49 |             return  true;
 50 |             }
 51 |         return false;
 52 |     }
 53 | 
 54 |     void setNoRecursExt(CallSite &CS, Function *F) {
 55 |         oprint("Checking " << *CS.getInstruction());
 56 |         
 57 |         // Check no parameter is a function pointer
 58 |         for (auto &arg: F->args()) {
 59 |             Type* argT = arg.getType();
 60 |             oprint("  " << *argT);
 61 |             if (isFunctionPointerType(argT)) {
 62 |                 oprint("  [-] not adding attr norecurse");
 63 |                 return;
 64 |             }
 65 |         }
 66 |         oprint(" Callsite " << F->getName().str());
 67 |         // Check also the callsite
 68 |         for (auto &arg: CS.args()) {
 69 |             Type* argT = (*arg).getType();
 70 |             oprint("  " << *argT);
 71 |             if (isFunctionPointerType(argT)) {
 72 |                 oprint("  [-] not adding attr norecurse");
 73 |                 return;
 74 |             }
 75 |         }
 76 |         oprint("  [+] adding attr norecurse");
 77 |         // if check ok set the norecurse attrs
 78 |         if (!CS.hasFnAttr(Attribute::NoRecurse))
 79 |             CS.addAttribute(AttributeList::FunctionIndex, Attribute::NoRecurse);
 80 |         if (!F->hasFnAttribute(Attribute::NoRecurse))
 81 |             F->addFnAttr(Attribute::NoRecurse);
 82 |     }
 83 | 
 84 | 
 85 |     static bool addNoRecurseAttrs(CallGraphSCC &SCC) {
 86 |         SmallVector<Function *, 8> Functions;
 87 |         for (CallGraphNode *I : SCC) {
 88 |             Functions.push_back(I->getFunction());
 89 |         }
 90 |         
 91 |         // If the SCC contains multiple nodes we know for sure there is recursion.
 92 |         if (Functions.size() != 1)
 93 |             return false;
 94 | 
 95 |         Function *F = *Functions.begin();
 96 |         if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
 97 |             return false;
 98 | 
 99 |         // If all of the calls in F are identifiable and are to norecurse functions, F
100 |         // is norecurse. This check also detects self-recursion as F is not currently
101 |         // marked norecurse, so any called from F to F will not be marked norecurse.
102 |         for (auto &BB : *F)
103 |         for (auto &I : BB.instructionsWithoutDebug())
104 |             if (auto *CB = dyn_cast<CallBase>(&I)) {
105 |                 Function *Callee = dyn_cast<Function>(CB->getCalledValue()->stripPointerCasts());
106 |                 if (!Callee || Callee == F || !Callee->doesNotRecurse()) {
107 |                     // Function calls a potentially recursive function.
108 | 
109 |                     // Check if the callsite has no recurse information
110 |                     CallSite CS(&I);
111 |                     if (!Callee && CS) continue;
112 | 
113 |                     return false;
114 |                 }
115 |             }
116 | 
117 |         // Every call was to a non-recursive function other than this function, and
118 |         // we have no indirect recursion as the SCC size is one. This function cannot
119 |         // recurse.
120 |         F->setDoesNotRecurse();
121 |         return true;
122 |     }
123 | 
124 |     virtual bool runOnModule(Module &M) override {
125 |         setNoRecursExtPassLog("Running...");
126 | 
127 |         /* Iterate all functions in the module */
128 |         for (auto &F : M.getFunctionList()) {
129 |             if (F.isDeclaration())
130 |                 continue;
131 |             for (auto &BB: F) {
132 |                 for (auto &I: BB) {
133 |                     CallSite CS(&I);
134 |                     if (!CS.getInstruction() || CS.isInlineAsm())
135 |                         continue; // not a call
136 |                     Function *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
137 |                     if (!Callee)
138 |                         continue; // not a direct call
139 |                     
140 |                     // if external function try to set the norecurse attr
141 |                     if (Callee->isDeclaration())
142 |                         setNoRecursExt(CS, Callee);
143 |                 }
144 |             }
145 |         }
146 | 
147 |         // Now visit the whole call graph in post order to derive norecurse attributes
148 |         CallGraph *CG = &getAnalysis<CallGraphWrapperPass>().getCallGraph();
149 |         // Walk the callgraph in bottom-up SCC order.
150 |         scc_iterator<CallGraph*> CGI = scc_begin(CG);
151 |         
152 |         CallGraphSCC CurSCC(*CG, &CGI);
153 |         while (!CGI.isAtEnd()) {
154 |             // Copy the current SCC and increment past it so that the pass can hack
155 |             // on the SCC if it wants to without invalidating our iterator.
156 |             const std::vector<CallGraphNode *> &NodeVec = *CGI;
157 |             CurSCC.initialize(NodeVec);
158 |             ++CGI;
159 |         
160 |             addNoRecurseAttrs(CurSCC);
161 |         }
162 |         return true;
163 |     }
164 |  
165 |    void getAnalysisUsage(AnalysisUsage &AU) const override {
166 |         AU.addRequired<CallGraphWrapperPass>();
167 |     }
168 |   };
169 | 
170 | }
171 | 
172 | char SetNoRecursExtPass::ID = 0;
173 | RegisterPass<SetNoRecursExtPass> MP("set-norecurse-ext", "Set NoRecurse Attr to external functions Pass");
174 | 


--------------------------------------------------------------------------------
/remake.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | ROOT_DIR="."
 7 | 
 8 | # setup llvm env variables
 9 | if [ -z "${LLVM_DIR}" ]; then 
10 | 
11 |   echo "[ ] retrieving the LLVM directory..."
12 | 
13 |   if [ -z "${LLVM_CONFIG}" ]; then 
14 |       export LLVM_CONFIG='llvm-config'
15 |   fi
16 | 
17 |   export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')"
18 |   if [ "$LLVM_VER" = "" ]; then
19 |     echo "[!] llvm-config not found!"
20 |     exit 1
21 |   fi
22 | 
23 |   echo "[+] using LLVM $LLVM_VER"
24 | 
25 |   export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH"
26 |   export LLVM_DIR="$($LLVM_CONFIG --prefix)"
27 | 
28 | else
29 | 
30 |   export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH"
31 | 
32 | fi
33 | 
34 | echo "[+] the LLVM directory is $LLVM_DIR"
35 | export LLVM_COMPILER_PATH=$LLVM_DIR/bin
36 | 
37 | DIR=`pwd`
38 | cd $ROOT_DIR/passes
39 | make install || exit 1
40 | cd $DIR
41 | 


--------------------------------------------------------------------------------
/tests/driver.c:
--------------------------------------------------------------------------------
 1 | #include <unistd.h>
 2 | #include <stdio.h>
 3 | 
 4 | int LLVMFuzzerTestOneInput(const __uint8_t* data, size_t size);
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |     __uint8_t data[1024*500];
 8 |     int ret = read(0, data, sizeof data);
 9 |     LLVMFuzzerTestOneInput(data, ret);
10 | }


--------------------------------------------------------------------------------
/tests/driver.cc:
--------------------------------------------------------------------------------
1 | #include <unistd.h>
2 | 
3 | extern "C" int LLVMFuzzerTestOneInput(const __uint8_t* data, size_t size);
4 | 
5 | int main(int argc, char *argv[]) {
6 |     __uint8_t data[1024*500];
7 |     int ret = read(0, data, sizeof data);
8 |     LLVMFuzzerTestOneInput(data, ret);
9 | }


--------------------------------------------------------------------------------
/tests/opt:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ARGS=""
 4 | PASSES_DIR="../../bin"
 5 | 
 6 | # setup llvm env variables
 7 | if [ -z "${LLVM_DIR}" ]; then 
 8 | 
 9 |   if [ -z "${LLVM_CONFIG}" ]; then 
10 |       export LLVM_CONFIG='llvm-config'
11 |   fi
12 | 
13 |   export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')"
14 |   if [ "$LLVM_VER" = "" ]; then
15 |     echo "[!] llvm-config not found!"
16 |     exit 1
17 |   fi
18 | 
19 |   export OPT="$($LLVM_CONFIG --bindir)/opt"
20 | 
21 | else
22 | 
23 |   export OPT="$LLVM_DIR/bin/opt"
24 | 
25 | fi
26 | 
27 | for i in $*
28 | do
29 | 	arg="$i"
30 | 	c=`echo $arg | head -c 1`
31 | 	if [ "$c" = "-" ]; then
32 | 		pass=`echo $arg | tail -c +2`
33 | 		if [ -f $PASSES_DIR/$pass.so ]; then
34 | 			arg="-load=$PASSES_DIR/$pass.so -$pass"
35 | 		fi
36 | 	fi
37 | 	ARGS="$ARGS $arg"
38 | done
39 | 
40 | echo "$OPT" $ARGS
41 | "$OPT" $ARGS
42 | 


--------------------------------------------------------------------------------
/tests/test/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | ROOT_DIR="../.."
 7 | 
 8 | # setup llvm env variables
 9 | if [ -z "${LLVM_DIR}" ]; then 
10 | 
11 |   echo "[ ] retrieving the LLVM directory..."
12 | 
13 |   if [ -z "${LLVM_CONFIG}" ]; then 
14 |       export LLVM_CONFIG='llvm-config'
15 |   fi
16 | 
17 |   export LLVM_VER="$($LLVM_CONFIG --version 2>/dev/null | sed 's/git//')"
18 |   if [ "$LLVM_VER" = "" ]; then
19 |     echo "[!] llvm-config not found!"
20 |     exit 1
21 |   fi
22 | 
23 |   echo "[+] using LLVM $LLVM_VER"
24 | 
25 |   export PATH="$($LLVM_CONFIG --bindir)/bin:$SVF_HOME/Debug-build/bin:$PATH"
26 |   export LLVM_DIR="$($LLVM_CONFIG --prefix)"
27 | 
28 | else
29 | 
30 |   export PATH="$LLVM_DIR/bin:$SVF_HOME/Debug-build/bin:$PATH"
31 | 
32 | fi
33 | 
34 | echo "[+] the LLVM directory is $LLVM_DIR"
35 | export LLVM_COMPILER_PATH=$LLVM_DIR/bin
36 | 
37 | DIR=`pwd`
38 | cd $ROOT_DIR/passes
39 | make install || exit 1
40 | cd $DIR
41 | 
42 | export LLVM_BITCODE_GENERATION_FLAGS="-flto"
43 | BENCH="target"
44 | 
45 | "$LLVM_COMPILER_PATH/clang" -O1 -flto -g -c -o $BENCH.base.bc $BENCH.c
46 | "$LLVM_COMPILER_PATH/llvm-link" -o $BENCH.linked.bc $BENCH.base.bc
47 | ../opt -dump-call-tree -call-tree-start="main" -dump-tree-file='call-tree.log' -o /dev/null $BENCH.linked.bc
48 | ../opt -internalize -internalize-public-api-file='call-tree.log' -globaldce -o $BENCH.linked_int.bc $BENCH.linked.bc
49 | ../opt -cgc-planner -cgc-strategy=params -cgc-funcs='main' -stat=0 -cgc-calls-treshold=1000000 -func-stats -dump-weights -dump-weights-root='main' -cgc -cgc-clone-prefix='' -cgc-fill=1 -dump-call-tree -call-tree-start="main" -dump-tree-file='call-tree.log' -o $BENCH.cgc0.bc $BENCH.linked_int.bc
50 | ../opt -internalize -internalize-public-api-file='call-tree.log' -globaldce -o $BENCH.cgc.bc $BENCH.cgc0.bc
51 | # ../opt -func-stats $BENCH.linked_int.bc -o /dev/null
52 | # ../opt -func-stats $BENCH.cgc.bc -o /dev/null
53 | 
54 | "$LLVM_COMPILER_PATH/clang++" -O1 ../driver.cc $BENCH.cgc.bc -o $BENCH.out 
55 | 


--------------------------------------------------------------------------------
/tests/test/target.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int glob1[8] = {0};
 4 | int glob2[8] = {0};
 5 | 
 6 | __attribute_noinline__ int foo(int* ptr) {
 7 |     return ptr[2] + 1;
 8 | }
 9 | 
10 | __attribute_noinline__ int func1(int* ptr) {
11 |     return foo(ptr);
12 | }
13 | 
14 | __attribute_noinline__ int func2(int* ptr) {
15 |     return foo(ptr);
16 | }
17 | 
18 | __attribute_noinline__ int func3(int* ptr) {
19 |     return foo(ptr);
20 | }
21 | 
22 | int main(int argc, char** argv) {
23 |     return func1(glob1) + func2(glob2) + func3(glob2);
24 | }


--------------------------------------------------------------------------------