├── .gitignore ├── Clang+LLVM_ The amazing compiler infrastructure.pdf ├── LICENSE ├── README.md ├── examples ├── simple.c └── simple_log.c └── llvm_passes ├── CMakeLists.txt ├── InstrumentationPasses ├── CMakeLists.txt └── LogMemAccess │ ├── CMakeLists.txt │ └── src │ └── LogMemAccess.cpp ├── README.md └── StaticAnalysisPasses ├── CMakeLists.txt ├── FunctionIdentifier ├── CMakeLists.txt └── src │ └── FunctionIdentifierPass.cpp ├── GetLoopExitingBBs ├── CMakeLists.txt └── src │ └── GetLoopExitingBBs.cpp └── StructAccessIdentifier ├── CMakeLists.txt └── src └── StructAccessIdentifier.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /Clang+LLVM_ The amazing compiler infrastructure.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Machiry/understanding-clang-llvm/a4a98a0740b82da3384e7f87afc9f6bdae29d662/Clang+LLVM_ The amazing compiler infrastructure.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, Machiry Aravind Kumar 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # understanding-clang-llvm 2 | Repository for understanding and writing LLVM passes. 3 | 4 | ## Building and installing Clang/LLVM 5 | > This needs ~40GB disk space and compilation time of ~90min. 6 | 7 | 1) Download [LLVM-8.0.0](http://llvm.org/releases/8.0.0/llvm-8.0.0.src.tar.xz), [clang-8.0.0](http://llvm.org/releases/8.0.0/cfe-8.0.0.src.tar.xz) 8 | 9 | 2) Unzip the LLVM and Clang source files 10 | ``` 11 | tar xf llvm-8.0.0.src.tar.xz 12 | tar xf cfe-8.0.0.src.tar.xz 13 | mv cfe-8.0.0.src llvm-8.0.0.src/tools/clang 14 | ``` 15 | 16 | 3) Create your target build folder and make 17 | ``` 18 | mkdir llvm-8.0.0.obj 19 | cd llvm-8.0.0.obj 20 | cmake -DCMAKE_BUILD_TYPE=Debug ../llvm-8.0.0.src (or add "-DCMAKE_BUILD_TYPE:STRING=Release" for releae version) 21 | make -j8 22 | ``` 23 | 24 | 4) Add paths for LLVM and Clang 25 | ``` 26 | export LLVM_SRC=your_path_to_llvm-8.0.0.src 27 | export LLVM_OBJ=your_path_to_llvm-8.0.0.obj 28 | export LLVM_DIR=your_path_to_llvm-8.0.0.obj 29 | export PATH=$LLVM_DIR/bin:$PATH 30 | ``` 31 | 32 | ## Converting `.c` file to bitcode file 33 | The folder [examples](https://github.com/Machiry/understanding-clang-llvm/tree/master/examples) contains a couple of example `.c` files which you can play with. 34 | 35 | LLVM passes run on bitcode file. Here, we explain how to convert a `C` file to bitcode file. 36 | ``` 37 | clang -c -emit-llvm -o 38 | ``` 39 | Example: 40 | ``` 41 | cd examples 42 | clang -c -emit-llvm simple.c -o simple.bc 43 | ``` 44 | 45 | ## LLVM Passes 46 | The folder [llvm_passes](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes) contains various sample LLVM passes. 47 | -------------------------------------------------------------------------------- /examples/simple.c: -------------------------------------------------------------------------------- 1 | #include 2 | int func2() { 3 | return printf("In func2\n"); 4 | } 5 | int func1() { 6 | return printf("In func1\n"); 7 | } 8 | 9 | int main() { 10 | unsigned i; 11 | scanf("%u", &i); 12 | func1(); 13 | while(i > 0) { 14 | func2(); 15 | } 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /examples/simple_log.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef struct { 6 | char *ptr; 7 | unsigned len; 8 | } foo; 9 | 10 | void log_mem_access(void *addr, int value, int flag) { 11 | if(flag == 0) { 12 | printf("Reading value 0x%x from %p\n", value, addr); 13 | } 14 | if(flag == 1) { 15 | printf("Writing value 0x%x to %p\n", value, addr); 16 | } 17 | } 18 | 19 | unsigned count_alpha_chars(char *str) { 20 | unsigned c = 0; 21 | if(str) { 22 | while(*str) { 23 | if((*str >= 'A' && *str <= 'Z') || (*str >= 'a' && *str <= 'z')) { 24 | c++; 25 | } 26 | str++; 27 | } 28 | } 29 | return c; 30 | } 31 | 32 | int main(int argc, char **argv) { 33 | foo obj = {}; 34 | char buff[512]; 35 | // read len 36 | scanf("%u", &(obj.len)); 37 | // allocate buff 38 | obj.ptr = (char*)malloc(obj.len); 39 | if(obj.ptr) { 40 | // read buff 41 | scanf("%512s", buff); 42 | strncpy(obj.ptr, buff, obj.len); 43 | // output buff 44 | printf("Content:%s, Alpha:%u\n", obj.ptr, count_alpha_chars(obj.ptr)); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /llvm_passes/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.3) 2 | if(POLICY CMP0074) 3 | cmake_policy(SET CMP0074 OLD) 4 | endif() 5 | 6 | # To support both in- and out-of-source builds, 7 | # we check for the presence of the add_llvm_loadable_module command. 8 | # - if this command is not present, we are building out-of-source 9 | if(NOT COMMAND add_llvm_loadable_module) 10 | if (DEFINED ENV{LLVM_DIR}) 11 | # We need to match the build environment for LLVM: 12 | # In particular, we need C++11 and the -fno-rtti flag 13 | set(CMAKE_CXX_STANDARD 11) 14 | if(CMAKE_BUILD_TYPE MATCHES "Debug") 15 | set(CMAKE_CXX_FLAGS "-std=gnu++11 -O0 -fno-rtti") 16 | else() 17 | set(CMAKE_CXX_FLAGS "-std=gnu++11 -O3 -fno-rtti") 18 | endif() 19 | 20 | find_package(LLVM REQUIRED CONFIG) 21 | 22 | list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") 23 | include(AddLLVM) 24 | 25 | add_definitions(${LLVM_DEFINITIONS}) 26 | include_directories(${LLVM_INCLUDE_DIRS}) 27 | 28 | else() 29 | message(FATAL_ERROR "\ 30 | WARNING: The LLVM_DIR var was not set (required for an out-of-source build)!\n\ 31 | Please set this to environment variable to point to the LLVM build directory\ 32 | (e.g. on linux: export LLVM_DIR=/path/to/llvm/build/dir)") 33 | endif() 34 | else() 35 | set(IN_SOURCE_BUILD 1) 36 | endif() 37 | 38 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include 39 | ${CMAKE_CURRENT_BINARY_DIR}/include) 40 | 41 | add_subdirectory(InstrumentationPasses) 42 | add_subdirectory(StaticAnalysisPasses) 43 | -------------------------------------------------------------------------------- /llvm_passes/InstrumentationPasses/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(LogMemAccess) 2 | -------------------------------------------------------------------------------- /llvm_passes/InstrumentationPasses/LogMemAccess/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(LogMemAccess MODULE src/LogMemAccess.cpp) 2 | 3 | include_directories(include) 4 | 5 | target_compile_features(LogMemAccess PRIVATE cxx_range_for cxx_auto_type) 6 | 7 | set_target_properties(LogMemAccess PROPERTIES 8 | COMPILE_FLAGS "-g" 9 | ) 10 | 11 | if(APPLE) 12 | set_target_properties(LogMemAccess PROPERTIES 13 | LINK_FLAGS "-undefined dynamic_lookup" 14 | ) 15 | endif(APPLE) 16 | -------------------------------------------------------------------------------- /llvm_passes/InstrumentationPasses/LogMemAccess/src/LogMemAccess.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by machiry at the beginning of time. 3 | // 4 | // This pass logs all the memory access. 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | 25 | using namespace llvm; 26 | 27 | #define LOG_FUNC_NAME "log_mem_access" 28 | 29 | namespace UMD { 30 | 31 | 32 | /*** 33 | * The main pass. 34 | */ 35 | struct LogMemAccessPass : public ModulePass { 36 | public: 37 | static char ID; 38 | Function *logFunction; 39 | 40 | LogMemAccessPass() : ModulePass(ID) { 41 | this->logFunction = nullptr; 42 | } 43 | 44 | ~LogMemAccessPass() { 45 | } 46 | 47 | 48 | Value *createPointerToVoidPtrCast(IRBuilder<> &targetBuilder, Value *pointerOp) { 49 | return targetBuilder.CreatePointerCast(pointerOp, IntegerType::getInt8PtrTy(pointerOp->getContext())); 50 | } 51 | 52 | Value *createValueToUnsignedIntCast(IRBuilder<> &targetBuilder, Value *valueOp) { 53 | if (valueOp->getType()->isPointerTy()) { 54 | valueOp = targetBuilder.CreatePtrToInt(valueOp, IntegerType::getInt32Ty(valueOp->getContext())); 55 | } 56 | return targetBuilder.CreateIntCast(valueOp, IntegerType::getInt32Ty(valueOp->getContext()), true); 57 | } 58 | 59 | 60 | Function *getLogFunction(Module &m) { 61 | if (this->logFunction == nullptr) { 62 | // void*, int, int 63 | Type *parameterTypes[] = {IntegerType::getInt8PtrTy(m.getContext()), 64 | IntegerType::getInt32Ty(m.getContext()), 65 | IntegerType::getInt32Ty(m.getContext())}; 66 | // void (void*, int, int) 67 | FunctionType *log_function_type = FunctionType::get(IntegerType::getVoidTy(m.getContext()), parameterTypes, 68 | false); 69 | 70 | // get the reference to function 71 | Function *func = cast(m.getOrInsertFunction(LOG_FUNC_NAME, log_function_type)); 72 | 73 | this->logFunction = func; 74 | } 75 | return this->logFunction; 76 | } 77 | 78 | bool instrumentLoad(LoadInst *targetInstr) { 79 | bool retVal = true; 80 | 81 | try { 82 | dbgs() << "Instrumenting:" << *targetInstr << "\n"; 83 | // set the insertion point to be after the load instruction. 84 | auto targetInsertPoint = targetInstr->getIterator(); 85 | targetInsertPoint++; 86 | IRBuilder<> builder(&(*targetInsertPoint)); 87 | 88 | // get the log function 89 | Function *targetLogFunction = this->getLogFunction(*targetInstr->getModule()); 90 | 91 | // get the arguments for the function. 92 | Value *address = targetInstr->getPointerOperand(); 93 | Value *targetValue = targetInstr; 94 | 95 | address = this->createPointerToVoidPtrCast(builder, address); 96 | targetValue = this->createValueToUnsignedIntCast(builder, targetValue); 97 | 98 | ConstantInt *readFlag = ConstantInt::get(IntegerType::getInt32Ty(targetInstr->getContext()), 0); 99 | // prepare arguments. 100 | Value* arguments[] = {address, targetValue, readFlag}; 101 | builder.CreateCall(targetLogFunction, arguments); 102 | } catch (const std::exception &e) { 103 | dbgs() << "[?] Error occurred while trying to instrument load instruction:" << e.what() << "\n"; 104 | retVal = false; 105 | } 106 | return retVal; 107 | } 108 | 109 | bool instrumentStore(StoreInst *targetInstr) { 110 | bool retVal = true; 111 | try { 112 | dbgs() << "Instrumenting:" << *targetInstr << "\n"; 113 | // set the insertion point to be before the store instruction. 114 | auto targetInsertPoint = targetInstr->getIterator(); 115 | IRBuilder<> builder(&(*targetInsertPoint)); 116 | 117 | // get the log function 118 | Function *targetLogFunction = this->getLogFunction(*targetInstr->getModule()); 119 | 120 | // get the arguments for the function. 121 | Value *address = targetInstr->getPointerOperand(); 122 | Value *targetValue = targetInstr->getValueOperand(); 123 | 124 | address = this->createPointerToVoidPtrCast(builder, address); 125 | targetValue = this->createValueToUnsignedIntCast(builder, targetValue); 126 | 127 | ConstantInt *writeFlag = ConstantInt::get(IntegerType::getInt32Ty(targetInstr->getContext()), 1); 128 | 129 | // prepare arguments. 130 | Value* arguments[] = {address, targetValue, writeFlag}; 131 | 132 | builder.CreateCall(targetLogFunction, arguments); 133 | } catch (const std::exception &e) { 134 | dbgs() << "[?] Error occurred while trying to instrument store instruction:" << e.what() << "\n"; 135 | retVal = false; 136 | } 137 | return retVal; 138 | 139 | } 140 | 141 | 142 | bool runOnModule(Module &m) override { 143 | bool edited = false; 144 | // module is a collection of functions. 145 | for (auto &currFunc: m) { 146 | if(currFunc.getName() == LOG_FUNC_NAME) { 147 | continue; 148 | } 149 | // function is a collection of basic blocks. 150 | for (auto &currBB: currFunc) { 151 | // basic block is a collection of instructions. 152 | for (auto &currIns: currBB) { 153 | Instruction *currInstrPtr = &currIns; 154 | // is this a load instruction? 155 | if (LoadInst *LD = dyn_cast(currInstrPtr)) { 156 | edited = instrumentLoad(LD); 157 | 158 | } 159 | //is this a store instruction. 160 | if (StoreInst *SI = dyn_cast(currInstrPtr)) { 161 | edited = instrumentStore(SI); 162 | } 163 | } 164 | } 165 | } 166 | // true indicates that changes have been made to the module. 167 | return edited; 168 | } 169 | 170 | }; 171 | 172 | char LogMemAccessPass::ID = 0; 173 | // pass arg, pass desc, cfg_only, analysis only 174 | static RegisterPass x("logm", "Log all memory accesses.", false, false); 175 | } -------------------------------------------------------------------------------- /llvm_passes/README.md: -------------------------------------------------------------------------------- 1 | # LLVM Passes 2 | This folder contains various llvm passes. 3 | ## Building 4 | Building all the passes. 5 | 6 | mkdir obj 7 | cmake .. 8 | make -j4 9 | 10 | All the shared objects for the passes will be present inside the `obj` folder. 11 | 12 | ## Passes 13 | * [Instrumentation Passes](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes/InstrumentationPasses): 14 | 15 | These passes modify the program by adding instrumentation code. 16 | * [Log memory access](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes/InstrumentationPasses/LogMemAccess): This pass logs all the memory reads and writes by inserting call to the function: `log_mem_access`, 17 | with address and value being written and read. (Try this on [simple_log.c](https://github.com/Machiry/understanding-clang-llvm/blob/master/examples/simple_log.c)) 18 | 19 | > Usage: 20 | 21 | cd obj/InstrumentationPasses/LogMemAccess 22 | opt -load ./libLogMemAccess.so -logm -o 23 | 24 | **Note:** 25 | To use this pass, make sure that you define the following function in the input source file: 26 | ``` 27 | void log_mem_access(void *addr, int value, int flag) { 28 | if(flag == 0) { 29 | printf("Reading value 0x%x from %p\n", value, addr); 30 | } 31 | if(flag == 1) { 32 | printf("Writing value 0x%x to %p\n", value, addr); 33 | } 34 | } 35 | * [Static Analysis Passes](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes/StaticAnalysisPasses): 36 | 37 | These passes perform static analysis on the provided bitcode. 38 | * [Loop Exiting BB finder](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes/StaticAnalysisPasses/GetLoopExitingBBs): This pass identifies all the basic-blocks that control exit to a loop. 39 | > Usage: 40 | ``` 41 | cd obj/StaticAnalysisPasses/GetLoopExitingBBs 42 | opt -load ./libGetLoopExitingBBs.so -loopbbs 43 | ``` 44 | 45 | * [Function Identifier Pass](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes/StaticAnalysisPasses/FunctionIdentifier): This pass identifies all the functions in the module, also prints all the corresponding function names. 46 | > Usage: 47 | ``` 48 | cd obj/StaticAnalysisPasses/FunctionIdentifier 49 | opt -load ./libFunctionIdentifier.so -identfunc 50 | ``` 51 | * [StructAccess Identifier Pass](https://github.com/Machiry/understanding-clang-llvm/tree/master/llvm_passes/StaticAnalysisPasses/StructAccessIdentifier): This pass identifies all accesses to structure elements in all functions in the module, also prints the corresponding information. 52 | > Usage: 53 | ``` 54 | cd obj/StaticAnalysisPasses/StructAccessIdentifier 55 | opt -load ./libStructAccessIdentifier.so -staccess 56 | ``` 57 | -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(FunctionIdentifier) 2 | add_subdirectory(GetLoopExitingBBs) 3 | add_subdirectory(StructAccessIdentifier) 4 | -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/FunctionIdentifier/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(FunctionIdentifier MODULE src/FunctionIdentifierPass.cpp) 2 | 3 | include_directories(include) 4 | 5 | target_compile_features(FunctionIdentifier PRIVATE cxx_range_for cxx_auto_type) 6 | 7 | set_target_properties(FunctionIdentifier PROPERTIES 8 | COMPILE_FLAGS "-g" 9 | ) 10 | 11 | if(APPLE) 12 | set_target_properties(FunctionIdentifier PROPERTIES 13 | LINK_FLAGS "-undefined dynamic_lookup" 14 | ) 15 | endif(APPLE) 16 | -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/FunctionIdentifier/src/FunctionIdentifierPass.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by machiry at the beginning of time. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | using namespace llvm; 17 | 18 | namespace UMD { 19 | 20 | 21 | /*** 22 | * This pass lists all the function in the provided module. 23 | */ 24 | struct FunctionIdentifierPass : public ModulePass { 25 | public: 26 | static char ID; 27 | 28 | FunctionIdentifierPass() : ModulePass(ID) { 29 | } 30 | 31 | ~FunctionIdentifierPass() { 32 | } 33 | 34 | 35 | bool runOnModule(Module &m) override { 36 | // iterate through all the functions. 37 | for (auto &currFunc: m) { 38 | if(currFunc.hasName()) { 39 | dbgs() << "[+] Function Name:" << currFunc.getName() << ", Has Body:" << !currFunc.isDeclaration() << "\n"; 40 | } 41 | } 42 | // this is just an analysis pass, 43 | // we do not change the module. 44 | return false; 45 | } 46 | 47 | }; 48 | 49 | char FunctionIdentifierPass::ID = 0; 50 | // pass arg, pass desc, cfg_only, analysis only 51 | static RegisterPass x("identfunc", "Identify all the functions in the module.", false, true); 52 | } -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/GetLoopExitingBBs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(GetLoopExitingBBs MODULE src/GetLoopExitingBBs.cpp) 2 | 3 | include_directories(include) 4 | 5 | target_compile_features(GetLoopExitingBBs PRIVATE cxx_range_for cxx_auto_type) 6 | 7 | set_target_properties(GetLoopExitingBBs PROPERTIES 8 | COMPILE_FLAGS "-g" 9 | ) 10 | 11 | if(APPLE) 12 | set_target_properties(GetLoopExitingBBs PROPERTIES 13 | LINK_FLAGS "-undefined dynamic_lookup" 14 | ) 15 | endif(APPLE) 16 | -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/GetLoopExitingBBs/src/GetLoopExitingBBs.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by machiry at the beginning of time. 3 | // 4 | // This pass gets the information about basic-blocks that control 5 | // the exit from a loop 6 | // 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | 25 | using namespace llvm; 26 | 27 | namespace UMD { 28 | 29 | 30 | /*** 31 | * The main pass. 32 | */ 33 | struct GetLoopExitingBBs : public ModulePass { 34 | public: 35 | static char ID; 36 | 37 | GetLoopExitingBBs() : ModulePass(ID) { 38 | } 39 | 40 | ~GetLoopExitingBBs() { 41 | } 42 | 43 | 44 | bool runOnModule(Module &m) override { 45 | // iterate through all the functions. 46 | for (auto &currFunc: m) { 47 | // does this function have body? 48 | if(!currFunc.isDeclaration()) { 49 | // get information about loops. 50 | auto &loopAnalysis = getAnalysis(currFunc); 51 | LoopInfo &loopInfo = loopAnalysis.getLoopInfo(); 52 | // iterate over all the loops 53 | for (auto *lobj: loopInfo.getLoopsInPreorder()) { 54 | SmallVector exitBBs; 55 | exitBBs.clear(); 56 | // get the exit basic blocks. 57 | lobj->getExitingBlocks(exitBBs); 58 | 59 | // get the loop exit condition. 60 | for (auto *bb: exitBBs) { 61 | Instruction *exitInstr = bb->getTerminator(); 62 | const DebugLoc &DL = exitInstr->getDebugLoc(); 63 | DILocation *di = DL.get(); 64 | 65 | dbgs() << *exitInstr << " at " << DL.getLine() << " " << di->getFilename() << "\n"; 66 | } 67 | 68 | } 69 | } 70 | } 71 | 72 | for (auto &currFunc: m) { 73 | // get the function type. 74 | FunctionType *currFuncType = currFunc.getFunctionType(); 75 | // check the type of each parameter. 76 | for(unsigned i=0; i< currFuncType->getNumParams(); i++) { 77 | if(currFuncType->getParamType(i)->isPointerTy()) { 78 | dbgs() << "Function:" << currFunc.getName() << " has a pointer parameter.\n"; 79 | continue; 80 | } 81 | } 82 | } 83 | // this is just an analysis pass, 84 | // we do not change the module. 85 | return false; 86 | } 87 | 88 | void getAnalysisUsage(AnalysisUsage &AU) const override { 89 | AU.addRequired(); 90 | } 91 | 92 | }; 93 | 94 | char GetLoopExitingBBs::ID = 0; 95 | // pass arg, pass desc, cfg_only, analysis only 96 | static RegisterPass x("loopbbs", "Static analysis pass to get the instructions " 97 | "that control the exit from a loop.", false, true); 98 | } -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/StructAccessIdentifier/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(StructAccessIdentifier MODULE src/StructAccessIdentifier.cpp) 2 | 3 | include_directories(include) 4 | 5 | target_compile_features(StructAccessIdentifier PRIVATE cxx_range_for cxx_auto_type) 6 | 7 | set_target_properties(StructAccessIdentifier PROPERTIES 8 | COMPILE_FLAGS "-g" 9 | ) 10 | 11 | if(APPLE) 12 | set_target_properties(StructAccessIdentifier PROPERTIES 13 | LINK_FLAGS "-undefined dynamic_lookup" 14 | ) 15 | endif(APPLE) 16 | -------------------------------------------------------------------------------- /llvm_passes/StaticAnalysisPasses/StructAccessIdentifier/src/StructAccessIdentifier.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by machiry at the beginning of time. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | using namespace llvm; 17 | 18 | namespace UMD { 19 | 20 | 21 | /*** 22 | * This pass detects all direct accesses to struct members. 23 | */ 24 | struct StructAccessIdentifierPass : public ModulePass { 25 | public: 26 | static char ID; 27 | 28 | StructAccessIdentifierPass() : ModulePass(ID) { 29 | } 30 | 31 | ~StructAccessIdentifierPass() { 32 | } 33 | 34 | // get the structure type embedded in the current type. 35 | Type *getStructureAccessType(Type *currType) { 36 | if(currType->isPointerTy()) { 37 | PointerType *currPtrType = dyn_cast(currType); 38 | return this->getStructureAccessType(currPtrType->getPointerElementType()); 39 | } 40 | return currType; 41 | } 42 | 43 | // get the operand that represents the field in the provided 44 | // GEP instruction. 45 | // getelementptr inbounds %struct.foo, %struct.foo* %obj, i32 0, i32 0 46 | Value* getFieldOperand(GetElementPtrInst *gepI) { 47 | unsigned numOp = gepI->getNumOperands(); 48 | // the last operand is the field number. 49 | return gepI->getOperand(numOp-1); 50 | } 51 | 52 | 53 | bool checkFunction(Function &currFunc) { 54 | bool hasAccesses = false; 55 | std::string funcName = "NONAME"; 56 | if(currFunc.hasName()) { 57 | funcName = currFunc.getName(); 58 | } 59 | dbgs() << "[+] Checking struct accesses in function:" << funcName << "\n"; 60 | for(auto &currBB: currFunc) { 61 | for(auto &currIns: currBB) { 62 | Instruction *currInstrPtr = &currIns; 63 | // check if this is a GEP instruction. 64 | if(GetElementPtrInst *targetAccess = dyn_cast(currInstrPtr)) { 65 | Type *accessedType = targetAccess->getPointerOperandType(); 66 | Type *targetAccType = this->getStructureAccessType(accessedType); 67 | // check we are accessing a struct? 68 | if(targetAccType->isStructTy()) { 69 | hasAccesses = true; 70 | StructType *stType = dyn_cast(targetAccType); 71 | std::string stName = "NONAME"; 72 | // get the name of the struct, if it has one, 73 | if(stType->hasName()) { 74 | stName = stType->getName(); 75 | } 76 | // get the field number. 77 | Value *fieldOp = getFieldOperand(targetAccess); 78 | if(ConstantInt *CNum = dyn_cast(fieldOp)) { 79 | unsigned fieldNum = CNum->getValue().getZExtValue(); 80 | // get the type of the field. 81 | Type *elementType = stType->getStructElementType(fieldNum); 82 | dbgs() << "[*] Accessing field:" << fieldNum << " of structure:" << stName << " and its a "; 83 | // if this is a pointer type? 84 | if(elementType->isPointerTy()) { 85 | dbgs() << "pointer type\n"; 86 | } else { 87 | dbgs() << "scalar type\n"; 88 | } 89 | } 90 | } 91 | 92 | } 93 | } 94 | } 95 | dbgs() << "[+] Finished Checking struct accesses in function:" << funcName << "\n"; 96 | return hasAccesses; 97 | } 98 | 99 | 100 | bool runOnModule(Module &m) override { 101 | // iterate through all the functions. 102 | for (auto &currFunc: m) { 103 | // if this is not a declaration. 104 | if(!currFunc.isDeclaration()) { 105 | checkFunction(currFunc); 106 | } 107 | } 108 | // this is just an analysis pass, 109 | // we do not change the module. 110 | return false; 111 | } 112 | 113 | }; 114 | 115 | char StructAccessIdentifierPass::ID = 0; 116 | // pass arg, pass desc, cfg_only, analysis only 117 | static RegisterPass x("staccess", 118 | "Identify all the direct access to struct members.", 119 | false, 120 | true); 121 | } --------------------------------------------------------------------------------