├── .gitignore ├── CMakeLists.txt ├── include ├── AndersonSolver.h ├── NodeFactory.h ├── PointerAnalysis.h ├── SteensgardSolver.h └── UnionSet.h ├── lib ├── AndersonSolver.cpp ├── ConstraintCollect.cpp ├── Main.cpp ├── NodeFactory.cpp ├── PointerAnalysis.cpp └── SteensgardSolver.cpp ├── note.md ├── readme.md ├── run.sh ├── sample-output ├── ptg.dot └── ptg.png ├── sh └── compile.sh └── test ├── test00.c ├── test01.c ├── test02.c ├── test03.c ├── test04.c └── test05.c /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | 3 | bc/ 4 | build/ 5 | release/ 6 | 7 | 8 | .bc 9 | .ll 10 | 11 | .dot 12 | .png 13 | output/ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1.0) 2 | # ${LLVM_DIR} 3 | project(assign2) 4 | find_package(LLVM REQUIRED CONFIG HINTS /usr/local/llvm10d /usr/local/llvm10d/lib/cmake/llvm 5 | NO_DEFAULT_PATH) 6 | 7 | include_directories(${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS} SYSTEM) 8 | link_directories(${LLVM_LIBRARY_DIRS}) 9 | message(STATUS "LLVM_LIB DIR : ${LLVM_LIBRARY_DIRS}") 10 | set(LLVM_LINK_COMPONENTS 11 | LLVMCore 12 | LLVMIRReader 13 | LLVMPasses 14 | ) 15 | 16 | message(STATUS "LLVM LIBS : ${LLVM_LINK_COMPONENTS}") 17 | # Support plugins. 18 | 19 | #set(SOURCES 20 | # lib/Main.cpp 21 | # lib/NodeFactory.cpp 22 | # lib/ConstraintCollect.cpp 23 | # lib/ConstraintSolve.cpp 24 | #) 25 | file(GLOB SOURCES "lib/*.cpp") 26 | 27 | add_executable(anderson ${SOURCES}) 28 | 29 | target_link_libraries(anderson 30 | ${LLVM_LINK_COMPONENTS} 31 | ) 32 | target_include_directories(anderson 33 | PRIVATE 34 | ${PROJECT_SOURCE_DIR}/include 35 | ) -------------------------------------------------------------------------------- /include/AndersonSolver.h: -------------------------------------------------------------------------------- 1 | #include "PointerAnalysis.h" 2 | #include 3 | 4 | using namespace std; 5 | /* 6 | Constraints solving. 7 | 8 | graph defnition: 9 | - insertEdge 10 | - propagate points-to info 11 | */ 12 | 13 | typedef set NodeSet; 14 | class PointsToNode; 15 | 16 | class AndersonPTG { 17 | vector graph; 18 | // typedef pair WLItem; 19 | typedef NodeIdx WLItem; 20 | queue worklist; 21 | public: 22 | AndersonPTG(unsigned n, vector& constraints); 23 | 24 | void solve(); 25 | 26 | const vector& getGraph() const; 27 | private: 28 | void initGraph(vector& constraints); 29 | void insertEdge(NodeIdx src, NodeIdx dest); 30 | void propagate(NodeIdx dest, const NodeSet& st); 31 | void propagate(NodeIdx dest, NodeIdx src); 32 | 33 | public: 34 | void dumpGraph(PAPass& pass); 35 | }; 36 | 37 | typedef set NodeSet; 38 | /// dump node to name, we may need this 39 | class PointsToNode { 40 | /// when this node's pts-set changes, we needd to propagate changes to 41 | /// other nodes through "copy", "load" & "store" 42 | /// copy is "static", while the other 2 are "dynamic" 43 | NodeSet successors; 44 | /// load & store are both indirect! 45 | NodeSet loadTo; // other <- *me 46 | NodeSet storeFrom; // *me <- other 47 | // NodeIdx idx; /// ? do we need this 48 | 49 | NodeSet ptsSet; 50 | public: 51 | void addSuccessor(NodeIdx succ) { successors.insert(succ); } 52 | void addLoad(NodeIdx dest) { loadTo.insert(dest); } 53 | void addStore(NodeIdx src) { storeFrom.insert(src); } 54 | 55 | bool addPointee(NodeIdx pte) { 56 | if(ptsSet.count(pte)) return false; 57 | ptsSet.insert(pte); 58 | return true; 59 | } 60 | bool addPointee(const NodeSet& src) { 61 | bool changed = false; 62 | for(NodeIdx idx:src) { 63 | if(!ptsSet.count(idx)) { 64 | changed = true; 65 | break; 66 | } 67 | } 68 | if(changed) ptsSet.insert(src.begin(), src.end()); 69 | return changed; 70 | } 71 | 72 | /// getter 73 | const NodeSet& getSuccessors()const { return successors; } 74 | const NodeSet& getLoads()const { return loadTo; } 75 | const NodeSet& getStores()const { return storeFrom; } 76 | const NodeSet& getPtsSet()const { return ptsSet; } 77 | // const NodeIdx getIdx()const { return idx; } 78 | 79 | bool hasSuccessor(NodeIdx succ) { return successors.count(succ); } 80 | }; -------------------------------------------------------------------------------- /include/NodeFactory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace llvm; 10 | using namespace std; 11 | 12 | /// Anderson Node definition 13 | typedef unsigned NodeIdx; 14 | class LocationNode { 15 | public: 16 | enum NodeType { 17 | ValueNode, // kind of like pointer? 18 | ObjectNode, // alloc site 19 | }; 20 | 21 | private: 22 | int idx; // node id 23 | const Value* value; // correspinding inst 24 | NodeType nodeType; 25 | friend class NodeFactory; 26 | public: 27 | LocationNode(int idx, const Value* value, NodeType nodeType) 28 | :idx(idx), value(value), nodeType(nodeType) {} 29 | const Value* getValue() const { return value; } 30 | }; 31 | 32 | class NodeFactory { 33 | vector nodes; 34 | map objNodes; 35 | map valNodes; 36 | map retNodes; 37 | public: 38 | NodeIdx createValNode(const Value* value); 39 | NodeIdx createObjNode(const Value* value); 40 | NodeIdx createRetNode(const Value* value); 41 | NodeIdx getValNode(const Value* value)const; 42 | NodeIdx getObjNode(const Value* value)const; 43 | NodeIdx getRetNode(const Value* value)const; 44 | 45 | // get the "llvm::Value*"(maybe a inst, func) by node index 46 | const Value* getValueByNodeIdx(NodeIdx idx)const; 47 | bool isValueNode(NodeIdx idx)const; 48 | unsigned getNumNode() { return nodes.size(); } 49 | }; -------------------------------------------------------------------------------- /include/PointerAnalysis.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | #include "NodeFactory.h" 24 | 25 | using namespace llvm; 26 | using namespace std; 27 | 28 | class PointsToNode; 29 | struct PAConstraint { 30 | enum ConstraintType { 31 | Copy, AddressOf, Load, Store, 32 | }; 33 | PAConstraint(NodeIdx dest, NodeIdx src, ConstraintType type) 34 | :dest(dest), src(src), type(type) {} 35 | NodeIdx getDest() { return dest; } 36 | NodeIdx getSrc() { return src; } 37 | ConstraintType getTy() { return type; } 38 | private: 39 | NodeIdx dest, src; 40 | ConstraintType type; 41 | }; 42 | 43 | ///processed by mem2reg before this pass. 44 | /// Pointer Analysis Pass 45 | struct PAPass : public ModulePass { 46 | static char ID; // Pass identification, replacement for typeid 47 | PAPass() : ModulePass(ID) {} 48 | 49 | NodeFactory nodeFactory; 50 | vector constraints; 51 | 52 | bool runOnModule(Module &M) override; 53 | string idx2str(NodeIdx idx, bool visualize=true); 54 | private: 55 | /* 56 | ** currently only add return node for each function, as a context-insensitive 57 | ** approach to handle function call. This return node kind of serves as a 58 | ** "core" node which (may) links multiple call sites & return sites 59 | */ 60 | void collectConstraintsForGlobal(Module &M); 61 | void collectConstraintsForFunction(const Function *f); 62 | void collectConstraintsForInstruction(const Instruction* inst); 63 | /// constraints: ret & call, parameter passing 64 | void addConstraintsForCall(ImmutableCallSite cs); 65 | void addArgConstraints(ImmutableCallSite cs, const Function* f); 66 | 67 | void solveConstraints(); 68 | 69 | /// dump 70 | void dumpConstraints(); 71 | void dumpPtsSet(const vector& graph); 72 | }; 73 | -------------------------------------------------------------------------------- /include/SteensgardSolver.h: -------------------------------------------------------------------------------- 1 | #include "PointerAnalysis.h" 2 | #include "UnionSet.h" 3 | 4 | #include 5 | #include 6 | 7 | typedef set PointsToSet; 8 | 9 | class SteensgardPTG { 10 | UnionSet uset; 11 | // pointer analysis as type inference 12 | map type; 13 | 14 | USetIdx createDeferenceNode(USetIdx deferencedIdx); 15 | void run(vector &constraints); 16 | USetIdx join(USetIdx x, USetIdx y); 17 | void joinPts(USetIdx ptr); 18 | void insert(USetIdx dest, USetIdx loc); 19 | void handleEqual(USetIdx x, USetIdx y); 20 | public: 21 | SteensgardPTG(int locationCnt, vector &constraints):uset(locationCnt){ 22 | run(constraints); 23 | } 24 | void solve(); 25 | void dumpGraph(PAPass& pass); 26 | }; -------------------------------------------------------------------------------- /include/UnionSet.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace std; 4 | 5 | /// Union set with dynamic size(only increasing) 6 | typedef unsigned USetIdx; 7 | class UnionSet { 8 | private: 9 | /// when parent[i] < 0, means it is a root 10 | /// and its tree size == -parent[i] 11 | vector parent; 12 | public: 13 | USetIdx createNode() { 14 | USetIdx index = parent.size(); 15 | parent.push_back(-1); 16 | return index; 17 | } 18 | 19 | UnionSet(int sz=0) { 20 | for(int i=0; i= 0); 26 | int par = parent[from]; 27 | if(par < 0) return from; 28 | return parent[from] = find(par); 29 | } 30 | 31 | int merge(int x, int y) { 32 | USetIdx px = find(x); 33 | USetIdx py = find(y); 34 | 35 | // subtree with px as root is bigger 36 | if(parent[px] < parent[py]) { 37 | parent[px] += parent[py]; 38 | parent[py] = px; 39 | return px; 40 | } else { 41 | parent[py] += parent[px]; 42 | parent[px] = py; 43 | return py; 44 | } 45 | } 46 | 47 | map> getClasses() { 48 | map> ret; 49 | for(USetIdx i=0; i 4 | 5 | AndersonPTG::AndersonPTG(unsigned n, vector& constraints) 6 | : graph(n) { initGraph(constraints); } 7 | 8 | void AndersonPTG::solve() { 9 | while(!worklist.empty()) { 10 | NodeIdx idx = worklist.front(); 11 | worklist.pop(); 12 | auto& node = graph[idx]; 13 | for(NodeIdx succ:node.getSuccessors()) propagate(succ, node.getPtsSet()); 14 | 15 | for(NodeIdx pointee:node.getPtsSet()) { 16 | for(NodeIdx load:node.getLoads()) insertEdge(pointee, load); 17 | for(NodeIdx store:node.getStores()) insertEdge(store, pointee); 18 | } 19 | } 20 | } 21 | 22 | const vector& AndersonPTG::getGraph() const { 23 | return graph; 24 | } 25 | void AndersonPTG::initGraph(vector& constraints) { 26 | // llvm::errs() << "init points-to graph\n"; 27 | for(auto& cons:constraints) { 28 | switch (cons.getTy()) 29 | { 30 | case PAConstraint::Copy : 31 | graph[cons.getSrc()].addSuccessor(cons.getDest()); 32 | break; 33 | case PAConstraint::Load : 34 | // <- * 35 | graph[cons.getSrc()].addLoad(cons.getDest()); 36 | break; 37 | case PAConstraint::Store: 38 | // * <- 39 | graph[cons.getDest()].addStore(cons.getSrc()); 40 | break; 41 | case PAConstraint::AddressOf: { 42 | /// init 43 | NodeIdx destIdx = cons.getDest(); 44 | graph[destIdx].addPointee(cons.getSrc()); 45 | /// add to WL 46 | worklist.push(destIdx); 47 | break; 48 | } 49 | default: 50 | break; 51 | } 52 | } 53 | } 54 | 55 | void AndersonPTG::insertEdge(NodeIdx src, NodeIdx dest) { 56 | auto& srcNode = graph[src]; 57 | if(!srcNode.hasSuccessor(dest)) { 58 | srcNode.addSuccessor(dest); 59 | if(!srcNode.getPtsSet().empty()) worklist.push(src); 60 | } 61 | } 62 | 63 | void AndersonPTG::propagate(NodeIdx dest, const NodeSet& st) { 64 | bool changed = graph[dest].addPointee(st); 65 | if(changed) worklist.push(dest); 66 | } 67 | void AndersonPTG::propagate(NodeIdx dest, NodeIdx src) { 68 | bool changed = graph[dest].addPointee(src); 69 | if(changed) worklist.push(dest); 70 | } 71 | 72 | static string tabAndNewLine(string s) { 73 | return "\t" + s + ";\n"; 74 | } 75 | 76 | static string quote(string s) { 77 | return "\"" + s + "\""; 78 | } 79 | 80 | void AndersonPTG::dumpGraph(PAPass& pass) { 81 | // llvm::errs() << "---------------------------------\n"; 82 | string dotStr = "digraph anderson_ptg {\n"; 83 | dotStr += tabAndNewLine("graph [label=\"Anderson Pointer Analysis\",labelloc=t,fontsize=20]"); 84 | dotStr += tabAndNewLine("node [color=blue]"); 85 | for(unsigned i=0; i " + quote(target)); 93 | } 94 | // llvm::errs() << " }\n"; 95 | } 96 | 97 | dotStr += "}"; 98 | ofstream dotFile("output/ptg.dot"); 99 | dotFile << dotStr; 100 | } 101 | 102 | -------------------------------------------------------------------------------- /lib/ConstraintCollect.cpp: -------------------------------------------------------------------------------- 1 | #include "PointerAnalysis.h" 2 | 3 | #include "llvm/IR/InstIterator.h" 4 | #include "llvm/IR/Instructions.h" 5 | 6 | 7 | cl::opt DumpDebugInfo("debug-info", 8 | cl::desc("Dump debug info into out"), 9 | cl::init(false), cl::Hidden); 10 | cl::opt DumpTrace("trace", 11 | cl::desc("Dump trace into err"), 12 | cl::init(false), cl::Hidden); 13 | cl::opt DumpInst("dump-inst", 14 | cl::desc("Dump instructions"), 15 | cl::init(false), cl::Hidden); 16 | cl::opt Node2Name("node2name", 17 | cl::desc("Dump node by index or name"), 18 | cl::init(false), cl::Hidden); 19 | 20 | void PAPass::collectConstraintsForGlobal(Module &M) { 21 | for(Function & f:M) { 22 | if(f.isIntrinsic() || f.isDeclaration()) continue; 23 | if(f.getType()->isPointerTy()) 24 | nodeFactory.createRetNode(&f); 25 | } 26 | } 27 | void PAPass::collectConstraintsForFunction(const Function *f) { 28 | for (const_inst_iterator itr = inst_begin(f), ite = inst_end(f); itr != ite; 29 | ++itr) { 30 | auto inst = &*itr.getInstructionIterator(); 31 | if(inst->getType()->isPointerTy()) 32 | nodeFactory.createValNode(inst); 33 | } 34 | for (const_inst_iterator itr = inst_begin(f), ite = inst_end(f); itr != ite; 35 | ++itr) { 36 | auto inst = &*itr.getInstructionIterator(); 37 | collectConstraintsForInstruction(inst); 38 | } 39 | } 40 | 41 | void PAPass::collectConstraintsForInstruction(const Instruction* inst) { 42 | if(DumpInst) inst->dump(); 43 | switch (inst->getOpcode()) 44 | { 45 | case Instruction::Alloca: { 46 | assert(inst->getType()->isPointerTy()); 47 | NodeIdx src = nodeFactory.createObjNode(inst); 48 | NodeIdx dest = nodeFactory.getValNode(inst); 49 | constraints.emplace_back(dest, src, PAConstraint::AddressOf); 50 | break; 51 | } 52 | case Instruction::Load: 53 | if(inst->getType()->isPointerTy()) { 54 | NodeIdx dest = nodeFactory.getValNode(inst); 55 | NodeIdx src = nodeFactory.getValNode(inst->getOperand(0)); 56 | constraints.emplace_back(dest, src, PAConstraint::Load); 57 | } 58 | break; 59 | 60 | case Instruction::Store: 61 | // type of store instruction is "void" 62 | if(inst->getOperand(0)->getType()->isPointerTy()) { 63 | NodeIdx src = nodeFactory.getValNode(inst->getOperand(0)); 64 | NodeIdx dest = nodeFactory.getValNode(inst->getOperand(1)); 65 | constraints.emplace_back(dest, src, PAConstraint::Store); 66 | } 67 | break; 68 | case Instruction::PHI: 69 | if(inst->getType()->isPointerTy()) { 70 | const PHINode *phiNode = cast(inst); 71 | NodeIdx dest = nodeFactory.getValNode(inst); 72 | for(unsigned i=0; igetNumIncomingValues(); i++) { 73 | NodeIdx src = nodeFactory.getValNode(phiNode->getIncomingValue(i)); 74 | constraints.emplace_back(dest, src, PAConstraint::Copy); 75 | } 76 | } 77 | break; 78 | case Instruction::Call: 79 | case Instruction::Invoke: { 80 | ImmutableCallSite cs(inst); 81 | assert(cs && "wrong callsite?"); 82 | addConstraintsForCall(cs); 83 | break; 84 | } 85 | break; 86 | case Instruction::Ret: 87 | // do not handle pointer args 88 | if(inst->getNumOperands()>0 && inst->getOperand(0)->getType()->isPointerTy()) { 89 | NodeIdx dest = nodeFactory.getRetNode(inst->getParent()->getParent()); 90 | NodeIdx src = nodeFactory.getValNode(inst->getOperand(0)); 91 | constraints.emplace_back(dest, src, PAConstraint::Copy); 92 | } 93 | break; 94 | case Instruction::GetElementPtr: { 95 | /// field-insensitive 96 | NodeIdx dest = nodeFactory.getValNode(inst); 97 | NodeIdx src = nodeFactory.getValNode(inst->getOperand(0)); 98 | constraints.emplace_back(dest, src, PAConstraint::Copy); 99 | } 100 | default: 101 | break; 102 | } 103 | } 104 | 105 | /// constraints: ret & call, parameter passing 106 | void PAPass::addConstraintsForCall(ImmutableCallSite cs) { 107 | /// direct call 108 | if(const Function* f = cs.getCalledFunction()) { 109 | if(f->isIntrinsic() || f->isDeclaration()) { 110 | if(DumpDebugInfo) llvm::outs() << "external call: " << f->getName() << "\n"; 111 | return; 112 | } else { 113 | // 114 | NodeIdx dest = nodeFactory.getValNode(cs.getInstruction()); 115 | NodeIdx src = nodeFactory.getRetNode(f); 116 | constraints.emplace_back(dest, src, PAConstraint::Copy); 117 | addArgConstraints(cs, f); 118 | } 119 | 120 | } else { 121 | // TODO 122 | assert("Not implemented yet"); 123 | } 124 | } 125 | 126 | void PAPass::addArgConstraints(ImmutableCallSite cs, const Function* f) { 127 | auto argIt = cs.arg_begin(); 128 | auto parIt = f->arg_begin(); 129 | 130 | while(argIt != cs.arg_end() && parIt != f->arg_end()) { 131 | const Value* arg = *argIt; 132 | const Value* par = &*parIt; 133 | if(arg->getType()->isPointerTy() && par->getType()->isPointerTy()) { 134 | NodeIdx dest = nodeFactory.getValNode(par); 135 | NodeIdx src = nodeFactory.getValNode(arg); 136 | constraints.emplace_back(dest, src, PAConstraint::Copy); 137 | } 138 | argIt++; 139 | parIt++; 140 | } 141 | } 142 | 143 | void PAPass::dumpConstraints() { 144 | llvm::errs() << "Constraints " << constraints.size() << "\n"; 145 | for(auto &item: constraints) { 146 | auto srcStr = idx2str(item.getSrc()); 147 | auto destStr = idx2str(item.getDest()); 148 | // auto srcStr = item.getSrc(); 149 | // auto destStr = item.getDest(); 150 | switch(item.getTy()) { 151 | case PAConstraint::AddressOf: 152 | llvm::errs() << destStr << " <- &" << srcStr << "\n"; 153 | break; 154 | case PAConstraint::Copy: 155 | llvm::errs() << destStr << " <- " << srcStr << "\n"; 156 | break; 157 | case PAConstraint::Load: 158 | llvm::errs() << destStr << " <- *" << srcStr << "\n"; 159 | break; 160 | case PAConstraint::Store: 161 | llvm::errs() << "*" << destStr << " <- " << srcStr << "\n"; 162 | break; 163 | } 164 | } 165 | } 166 | 167 | 168 | // code from https://github.com/jarulraj/llvm/ , the find name is too trivial... 169 | static std::string getValueName (const Value *v) { 170 | // If we can get name directly 171 | if (v->getName().str().length() > 0) { 172 | return v->getName().str(); 173 | } else if (isa(v)) { 174 | std::string s = ""; 175 | raw_string_ostream *strm = new raw_string_ostream(s); 176 | v->print(*strm); 177 | std::string inst = strm->str(); 178 | size_t idx1 = inst.find("%"); 179 | size_t idx2 = inst.find(" ", idx1); 180 | if (idx1 != std::string::npos && idx2 != std::string::npos && idx1 == 2) { 181 | return inst.substr(idx1, idx2 - idx1); 182 | } else { 183 | // nothing match 184 | return ""; 185 | } 186 | } else if (const ConstantInt *cint = dyn_cast(v)) { 187 | std::string s = ""; 188 | raw_string_ostream *strm = new raw_string_ostream(s); 189 | cint->getValue().print(*strm, true); 190 | return strm->str(); 191 | } else { 192 | std::string s = ""; 193 | raw_string_ostream *strm = new raw_string_ostream(s); 194 | v->print(*strm); 195 | std::string inst = strm->str(); 196 | return "\"" + inst + "\""; 197 | } 198 | } 199 | string PAPass::idx2str(NodeIdx idx, bool visualize) { 200 | if(Node2Name || visualize) { 201 | string suffix = nodeFactory.isValueNode(idx)? "":"(obj)"; 202 | auto value = nodeFactory.getValueByNodeIdx(idx); 203 | // return string(value->getName()); 204 | return getValueName(value) + suffix; 205 | } else return to_string(idx); 206 | } -------------------------------------------------------------------------------- /lib/Main.cpp: -------------------------------------------------------------------------------- 1 | //===- Hello.cpp - Example code from "Writing an LLVM Pass" ---------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | // This file implements two versions of the LLVM "Hello World" pass described 11 | // in docs/WritingAnLLVMPass.html 12 | // 13 | //===----------------------------------------------------------------------===// 14 | 15 | #include "PointerAnalysis.h" 16 | 17 | #include 18 | 19 | using namespace std; 20 | using namespace llvm; 21 | static ManagedStatic GlobalContext; 22 | static LLVMContext &getGlobalContext() { return *GlobalContext; } 23 | /* In LLVM 5.0, when -O0 passed to clang , the functions generated with clang will 24 | * have optnone attribute which would lead to some transform passes disabled, like mem2reg. 25 | */ 26 | struct EnableFunctionOptPass: public FunctionPass { 27 | static char ID; 28 | EnableFunctionOptPass():FunctionPass(ID){} 29 | bool runOnFunction(Function & F) override{ 30 | if(F.hasFnAttribute(Attribute::OptimizeNone)) 31 | { 32 | F.removeFnAttr(Attribute::OptimizeNone); 33 | } 34 | return true; 35 | } 36 | }; 37 | 38 | char EnableFunctionOptPass::ID=0; 39 | 40 | cl::opt DumpCons("dump-cons", 41 | cl::desc("Dump constraints"), 42 | cl::init(false), cl::Hidden); 43 | cl::opt DumpModuleInfo("dump-module", 44 | cl::desc("Dump Module info into stderr"), 45 | cl::init(false), cl::Hidden); 46 | 47 | bool PAPass::runOnModule(Module &M) { 48 | if(DumpModuleInfo) { 49 | M.dump(); 50 | errs()<<"------------------------------\n"; 51 | } 52 | collectConstraintsForGlobal(M); 53 | for(Function& f:M) { 54 | collectConstraintsForFunction(&f); 55 | } 56 | if(DumpCons) dumpConstraints(); 57 | /// solve 58 | solveConstraints(); 59 | return false; 60 | } 61 | 62 | 63 | char PAPass::ID = 0; 64 | static RegisterPass X("my-anderson", "My Anderson implementation"); 65 | 66 | static cl::opt 67 | InputFilename(cl::Positional, 68 | cl::desc(".bc"), 69 | cl::init("")); 70 | 71 | 72 | int main(int argc, char **argv) { 73 | LLVMContext &Context = getGlobalContext(); 74 | SMDiagnostic Err; 75 | // Parse the command line to read the Inputfilename 76 | cl::ParseCommandLineOptions(argc, argv, 77 | "PAPass \n My first LLVM too which does not do much.\n"); 78 | 79 | 80 | // Load the input module 81 | std::unique_ptr M = parseIRFile(InputFilename, Err, Context); 82 | if (!M) { 83 | Err.print(argv[0], errs()); 84 | return 1; 85 | } 86 | 87 | llvm::legacy::PassManager Passes; 88 | 89 | ///Remove functions' optnone attribute in LLVM5.0 90 | Passes.add(new EnableFunctionOptPass()); 91 | ///Transform it to SSA 92 | Passes.add(llvm::createPromoteMemoryToRegisterPass()); 93 | 94 | /// Your pass to print Function and Call Instructions 95 | Passes.add(new PAPass()); 96 | Passes.run(*M.get()); 97 | } 98 | 99 | -------------------------------------------------------------------------------- /lib/NodeFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "NodeFactory.h" 4 | 5 | NodeIdx NodeFactory::createValNode(const Value* value) { 6 | int idx = nodes.size(); 7 | nodes.emplace_back(idx, value, LocationNode::ValueNode); 8 | assert(valNodes.find(value) == valNodes.end() && "Node already exist"); 9 | valNodes.emplace(value, idx); 10 | return idx; 11 | } 12 | 13 | NodeIdx NodeFactory::createObjNode(const Value* value) { 14 | int idx = nodes.size(); 15 | nodes.emplace_back(idx, value, LocationNode::ObjectNode); 16 | assert(objNodes.find(value) == objNodes.end() && "Node already exist"); 17 | objNodes.emplace(value, idx); 18 | return idx; 19 | } 20 | 21 | NodeIdx NodeFactory::createRetNode(const Value* value) { 22 | int idx = nodes.size(); 23 | nodes.emplace_back(idx, value, LocationNode::ValueNode); 24 | assert(retNodes.find(value) == retNodes.end() && "Node already exist"); 25 | retNodes.emplace(value, idx); 26 | return idx; 27 | } 28 | 29 | NodeIdx NodeFactory::getValNode(const Value* value)const { 30 | if(valNodes.find(value) == valNodes.end()) 31 | value->dump(); 32 | assert(valNodes.find(value) != valNodes.end() && "Node dose NOT exist"); 33 | return valNodes.at(value); 34 | } 35 | 36 | NodeIdx NodeFactory::getObjNode(const Value* value)const { 37 | assert(objNodes.find(value) != objNodes.end() && "Node dose NOT exist"); 38 | return objNodes.at(value); 39 | } 40 | 41 | NodeIdx NodeFactory::getRetNode(const Value* value)const { 42 | assert(retNodes.find(value) != retNodes.end() && "Node dose NOT exist"); 43 | return retNodes.at(value); 44 | } 45 | 46 | const Value* NodeFactory::getValueByNodeIdx(NodeIdx idx)const { 47 | assert(idx < nodes.size() && "node idx out of bound"); 48 | return nodes[idx].getValue(); 49 | } 50 | 51 | bool NodeFactory::isValueNode(NodeIdx idx) const { 52 | return nodes[idx].nodeType == LocationNode::ValueNode; 53 | } -------------------------------------------------------------------------------- /lib/PointerAnalysis.cpp: -------------------------------------------------------------------------------- 1 | #include "PointerAnalysis.h" 2 | #include "AndersonSolver.h" 3 | #include "SteensgardSolver.h" 4 | 5 | // cl::list Argv(cl::ConsumeAfter, cl::desc("...")); 6 | cl::opt Steensgard("steen", cl::desc("Steensgrad/Unification pointer analysis"), 7 | cl::init(false), cl::Hidden); 8 | 9 | enum PA_TYPE { 10 | INCLUSION, 11 | UNIFICATION, 12 | }; 13 | static string STR_ANDERSON = "ander"; 14 | static string STR_STEENSGARD = "steen"; 15 | static string STR_UNIFICATION = "unification"; 16 | static string STR_INCLUSION = "inclusion"; 17 | 18 | void PAPass::solveConstraints() { 19 | PA_TYPE type = Steensgard? UNIFICATION : INCLUSION; 20 | // for(auto it=Argv.begin(); it!=Argv.end(); it++) { 21 | // if(*it != "-algo") continue; 22 | // ++it; 23 | // assert(it != Argv.end()); 24 | // // llvm::errs() << "algo: " << *it << "\n"; 25 | // if(STR_ANDERSON == *it || STR_INCLUSION == *it) type = PA_TYPE::INCLUSION; 26 | // else if(STR_STEENSGARD == *it || STR_UNIFICATION == *it) type = PA_TYPE::UNIFICATION; 27 | // else { 28 | // llvm::errs() << "Invalid parameter: -algo " << *it << "\n"; 29 | // } 30 | // break; 31 | // } 32 | if(type == INCLUSION) { 33 | AndersonPTG ptg(nodeFactory.getNumNode(), constraints); 34 | ptg.solve(); 35 | ptg.dumpGraph(*this); 36 | } else { 37 | // llvm::errs() << "I choose you, steens!\n"; 38 | SteensgardPTG ptg(nodeFactory.getNumNode(), constraints); 39 | ptg.solve(); 40 | ptg.dumpGraph(*this); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /lib/SteensgardSolver.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "SteensgardSolver.h" 6 | 7 | 8 | static void merge(PointsToSet& dest, const PointsToSet& src) { 9 | dest.insert(src.begin(), src.end()); 10 | } 11 | 12 | USetIdx SteensgardPTG::createDeferenceNode(USetIdx deferencedIdx) { 13 | return 0; 14 | } 15 | 16 | void SteensgardPTG::joinPts(USetIdx ptr) { 17 | auto &pts = type[ptr]; 18 | if(pts.size() < 2) return; 19 | auto it = pts.begin(); 20 | auto x = *it; 21 | auto px = uset.find(x); 22 | for(++it; it!= pts.end(); ++it) { 23 | auto py = uset.find(*it); 24 | px = join(px, py); 25 | } 26 | pts.clear(); 27 | pts.insert(px); 28 | } 29 | 30 | void SteensgardPTG::handleEqual(USetIdx x, USetIdx y) { 31 | x = uset.find(x); 32 | y = uset.find(y); 33 | if(x == y) return; 34 | auto&px = type[x]; 35 | auto&py = type[y]; 36 | assert(px.size() <= 1 && py.size() == 1); 37 | merge(px, py); 38 | type.erase(y); 39 | joinPts(x); 40 | type[y] = type[x]; 41 | } 42 | 43 | 44 | USetIdx SteensgardPTG::join(USetIdx x, USetIdx y) { 45 | x = uset.find(x); 46 | y = uset.find(y); 47 | if(x == y) return x; 48 | USetIdx ret = uset.merge(x, y); 49 | if(ret != x && type.count(x)) { 50 | merge(type[ret], type[x]); 51 | type.erase(x); 52 | } 53 | if(ret != y && type.count(y)) { 54 | merge(type[ret], type[y]); 55 | type.erase(y); 56 | } 57 | joinPts(ret); 58 | return ret; 59 | } 60 | 61 | void SteensgardPTG::insert(USetIdx dest, USetIdx loc) { 62 | type[dest].insert(loc); 63 | joinPts(dest); 64 | } 65 | 66 | void SteensgardPTG::run(vector &constraints) { 67 | 68 | for(auto &cons:constraints) { 69 | if(PAConstraint::AddressOf == cons.getTy()) { 70 | /// dest = &src 71 | // llvm::errs() << cons.getDest() << " <- &" << cons.getSrc() << "\n"; 72 | insert(cons.getDest(), cons.getSrc()); 73 | } else if(PAConstraint::Copy == cons.getTy()) { 74 | /// dest = src 75 | // llvm::errs() << cons.getDest() << " <- " << cons.getSrc() << "\n"; 76 | handleEqual(cons.getDest(), cons.getSrc()); 77 | } 78 | } 79 | 80 | for(auto &cons:constraints) { 81 | if(PAConstraint::Load == cons.getTy()) { 82 | /// dest = *src 83 | auto srcPar = uset.find(cons.getSrc()); 84 | auto &pstSrc = type[srcPar]; 85 | assert(pstSrc.size() == 1 && "pts.size() should be 1"); 86 | handleEqual(cons.getDest(), *pstSrc.begin()); 87 | } else if(PAConstraint::Store == cons.getTy()) { 88 | /// *dest = src 89 | // llvm::errs() << "*" << cons.getDest() << " <- " << cons.getSrc() << "\n"; 90 | auto destPar = uset.find(cons.getDest()); 91 | auto &pstDest = type[destPar]; 92 | assert(pstDest.size() == 1 && "pts.size() should be 1"); 93 | handleEqual(*pstDest.begin(), cons.getSrc()); 94 | } 95 | } 96 | } 97 | 98 | void SteensgardPTG::solve() { 99 | 100 | } 101 | 102 | static string set2str(const set & pst, PAPass& pass) { 103 | string s = "{"; 104 | for(auto x:pst) s += pass.idx2str(x) + ", "; 105 | s += "}"; 106 | return s; 107 | } 108 | 109 | static string tabAndNewLine(string s) { 110 | return "\t" + s + ";\n"; 111 | } 112 | 113 | static string quote(string s) { 114 | return "\"" + s + "\""; 115 | } 116 | 117 | void SteensgardPTG::dumpGraph(PAPass& pass) { 118 | // uset.dumpClasses(); 119 | auto unionClass = uset.getClasses(); 120 | ofstream dotFile("output/ptg.dot"); 121 | dotFile << "digraph unification_ptg {\n"; 122 | dotFile << tabAndNewLine("graph [label=\"Steensgard Pointer Analysis\",labelloc=t,fontsize=20]"); 123 | dotFile << tabAndNewLine("node [color=blue]"); 124 | 125 | for(const auto& kv:unionClass) { 126 | assert(kv.first == uset.find(kv.first)); 127 | auto &objRoot = type[kv.first]; 128 | if(objRoot.empty()) continue; 129 | assert(objRoot.size() == 1); 130 | auto &objSet = unionClass[uset.find(*objRoot.begin())]; 131 | auto ptr = set2str(kv.second, pass); 132 | auto obj = set2str(objSet, pass); 133 | dotFile << tabAndNewLine(quote(ptr) + " -> " + quote(obj)); 134 | } 135 | dotFile << "}"; 136 | } -------------------------------------------------------------------------------- /note.md: -------------------------------------------------------------------------------- 1 | ## Notes on Anderson 2 | 3 | we have 2 main steps, collecting constraints and solving the constraints. 2 things are a little tricky. 4 | 5 | - how to dynamically resolve virtual function? for precision, we may not want to treat `this` like other parameters(we DO NOT add constraints, just propagate points-to set). In [this referenced implementation](https://github.com/grievejia/andersen), seems no such handling. 6 | - how to handle function pointer? 7 | 8 | Actually, they are both about "dynamic" information, but when we collect constraints(the first step), we only have "static" constraints. Only when we start solving the constraints, can we get the needed information incrementally. 9 | 10 | To handle them, we should have some constraints on the fly. But this implementation will focus on provide a prototype first... -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## pointer analysis 2 | 3 | A simple prototype of pointer analysis which trys to be as simple as possible to learn the basic algorithm. It currently includes: 4 | 5 | - anderson/inclusion pointer analysis 6 | - steensgard/unification pointer analysis 7 | 8 | the default chosen algorithm is `anderson`, you can change to `steensgard` by adding command line argument `-steen`. 9 | 10 | ### Build & run 11 | 12 | Hint: You may need to change the **hard-coded llvm path** in `CMakeList.txt`! 13 | 14 | ```shell 15 | mkdir build 16 | cd build 17 | cmake .. 18 | 19 | # run a simple test 20 | chmod +x run.sh 21 | ./run.sh bc/test00.bc 22 | # for steensgard/unification pointer analysis, type 23 | ./run.sh bc/test00.bc -steen 24 | ``` 25 | 26 | the points-to-graph will output as a ".png" file if you have `graphviz` installed. The sample figure(steensgard with `test00.c`): 27 | 28 |

29 | 30 | other command line arguments: 31 | 32 | - `-dump-module`: dump module 33 | - `-dump-cons`: dump constraints 34 | 35 | ### reference 36 | 37 | https://github.com/grievejia/andersen : A really good anderson implementation for study, but it's more complicated. 38 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | source sh/compile.sh 2 | 3 | BUILD_DIR="release" 4 | OUTPUT_DIR="output" 5 | 6 | rm -rf $OUTPUT_DIR 7 | mkdir $OUTPUT_DIR 8 | cd $BUILD_DIR 9 | make 10 | cd .. 11 | $BUILD_DIR/anderson $* 12 | 13 | dot -Tpng $OUTPUT_DIR/ptg.dot -o $OUTPUT_DIR/ptg.png 14 | code $OUTPUT_DIR/*.png -------------------------------------------------------------------------------- /sample-output/ptg.dot: -------------------------------------------------------------------------------- 1 | digraph unification_ptg { 2 | graph [label="Steensgard Pointer Analysis",labelloc=t,fontsize=20]; 3 | node [color=blue]; 4 | "{f, }" -> "{p(obj), q(obj), }"; 5 | "{a.addr, }" -> "{a.addr(obj), b.addr(obj), c(obj), }"; 6 | "{b.addr, }" -> "{a.addr(obj), b.addr(obj), c(obj), }"; 7 | "{c, }" -> "{a.addr(obj), b.addr(obj), c(obj), }"; 8 | "{p, }" -> "{p(obj), q(obj), }"; 9 | "{q, }" -> "{p(obj), q(obj), }"; 10 | "{w.0, }" -> "{p(obj), q(obj), }"; 11 | "{p(obj), q(obj), }" -> "{a.addr(obj), b.addr(obj), c(obj), }"; 12 | } -------------------------------------------------------------------------------- /sample-output/ptg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/enochii/simple-pointer-analysis/df7fe919ce9f13e34f3e0b6be51f5ad78d983f99/sample-output/ptg.png -------------------------------------------------------------------------------- /sh/compile.sh: -------------------------------------------------------------------------------- 1 | mkdir bc 2 | file_dir="test" 3 | cd $file_dir 4 | cfiles=$(ls .) 5 | for file in $cfiles; do 6 | prefix=${file:0:6} 7 | bc_file="$prefix.bc" 8 | clang -emit-llvm -c -O0 -g3 $file -o $bc_file 9 | # opt -mem2reg $bc_file -o $bc_file 10 | # clang -S -emit-llvm $file -o /dev/stdout | opt -S -mem2reg -o example1-opt.ll 11 | done 12 | cd .. 13 | mv $file_dir/*.bc bc 14 | # mv $file_dir/*.ll bc -------------------------------------------------------------------------------- /test/test00.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // int plus(int op1, int op2) { return op1 + op2; } 4 | // int minus(int op1, int op2) { return op1 - op2; } 5 | 6 | // typedef int(Binary)(int, int); 7 | int** f(int op, int a, int b) { 8 | int c; 9 | int *p=&a, *q=&b; 10 | int **w; 11 | if(op < 2) { 12 | w = &p; 13 | } else { 14 | w = &q; 15 | } 16 | *w = &c; 17 | return w; 18 | } 19 | 20 | // int main() { 21 | // int **ptr = f(1, 2, 3); 22 | // printf("%p\n",ptr); 23 | // return 0; 24 | // } -------------------------------------------------------------------------------- /test/test01.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct A { 4 | int x; 5 | int y; 6 | }; 7 | 8 | int f(int u, int *q, int *p) { 9 | struct A a; 10 | a.x = 1; 11 | a.y = 2; 12 | int b; 13 | p = &b; 14 | *p = 1; 15 | q = p; 16 | int c = 1; 17 | if(1 < u) { 18 | c = 2; 19 | q = &c; 20 | } 21 | p = q; 22 | if(1 >= u) { 23 | p = &a.x; 24 | } 25 | return a.x + *p; 26 | } -------------------------------------------------------------------------------- /test/test02.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct test02 { 4 | int a; 5 | }; 6 | 7 | 8 | int f() { 9 | struct test02 t; 10 | t.a = 0; 11 | return t.a; 12 | } -------------------------------------------------------------------------------- /test/test03.c: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int a; 3 | int* p; 4 | } test03; 5 | 6 | int* f(int op) { 7 | test03 s; 8 | int a, b; 9 | if(op < 1) { 10 | s.p = &a; 11 | } else { 12 | s.p = &b; 13 | } 14 | return s.p; 15 | } 16 | 17 | // struct with no pointer? -------------------------------------------------------------------------------- /test/test04.c: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int a; 3 | int* p; 4 | } test03; 5 | 6 | /// TODO: handle return a struct containing a pointer 7 | test03 f(int op) { 8 | test03 s; 9 | int a, b; 10 | if(op < 1) { 11 | s.p = &a; 12 | } else { 13 | s.p = &b; 14 | } 15 | return s; 16 | } 17 | 18 | // struct with no pointer? -------------------------------------------------------------------------------- /test/test05.c: -------------------------------------------------------------------------------- 1 | int f(int op) { 2 | int x, z; 3 | int *y, *w; 4 | int** b = &y; 5 | if(1 < op) { 6 | y = &x; 7 | b = &w; 8 | } 9 | else 10 | y = &z; 11 | return **b;; 12 | } --------------------------------------------------------------------------------