├── IRDumper ├── .gitignore ├── src │ ├── lib │ │ ├── CMakeLists.txt │ │ ├── IRDumper.h │ │ └── IRDumper.cpp │ └── CMakeLists.txt └── Makefile ├── src ├── lib │ ├── Config.cc │ ├── CMakeLists.txt │ ├── Config.h │ ├── CallGraph.h │ ├── Analyzer.h │ ├── MLTA.h │ ├── Analyzer.cc │ ├── Common.h │ ├── CallGraph.cc │ ├── Common.cc │ └── MLTA.cc └── CMakeLists.txt ├── .gitignore ├── docs ├── fig3-mlta-result.png └── review-kallgraph.md ├── TODO ├── InstrumentPTW ├── Makefile ├── InstrumentPTW.h └── InstrumentPTW.cpp ├── Makefile ├── irgen.sh ├── LICENSE └── README.md /IRDumper/.gitignore: -------------------------------------------------------------------------------- 1 | IRDumper.so 2 | IRDumper.o 3 | build 4 | -------------------------------------------------------------------------------- /src/lib/Config.cc: -------------------------------------------------------------------------------- 1 | #include"Config.h" 2 | 3 | int ENABLE_MLTA = 2; 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | log 3 | processLinuxTrace.py 4 | tags 5 | llvm-project 6 | *.swp 7 | -------------------------------------------------------------------------------- /docs/fig3-mlta-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/umnsec/mlta/HEAD/docs/fig3-mlta-result.png -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | 1. Handling missing names of structs 2 | 2. Handling downcasting in -O2 3 | 3. Reducing type escapes and caps 4 | 5 | -------------------------------------------------------------------------------- /IRDumper/src/lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set (DumperSourceCodes 2 | IRDumper.h 3 | IRDumper.cpp 4 | ) 5 | 6 | add_library (DumperObj OBJECT ${DumperSourceCodes}) 7 | add_library (Dumper SHARED $) 8 | add_library (DumperStatic STATIC $) 9 | 10 | set (EXECUTABLE_OUTPUT_PATH ${UNISAN_BINARY_DIR}) 11 | link_directories (${UNISAN_BINARY_DIR}/lib) 12 | -------------------------------------------------------------------------------- /IRDumper/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5.1) 2 | project(DUMPER) 3 | 4 | find_package(LLVM REQUIRED CONFIG) 5 | 6 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 7 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 8 | 9 | include(CheckCXXCompilerFlag) 10 | 11 | include_directories(${LLVM_INCLUDE_DIRS}) 12 | add_definitions(${LLVM_DEFINITIONS}) 13 | 14 | add_subdirectory(lib) 15 | -------------------------------------------------------------------------------- /InstrumentPTW/Makefile: -------------------------------------------------------------------------------- 1 | INSTALL_DIR=/path/to/clang-6-install 2 | INSTALL_BIN=$(INSTALL_DIR)/bin 3 | INSTALL_LIB=$(INSTALL_DIR)/lib 4 | INSTALL_INC=$(INSTALL_DIR)/include 5 | 6 | CC = $(INSTALL_BIN)/clang 7 | CFLAGS = -I $(INSTALL_INC) -I . -Wall -fno-rtti -Wfatal-errors 8 | LDFLAGS = -L $(INSTALL_LIB) 9 | 10 | all: InstrumentPTW IRDumper 11 | 12 | InstrumentPTW: InstrumentPTW.cpp 13 | $(CC) $(CFLAGS) -c InstrumentPTW.cpp -O3 -fPIC -o InstrumentPTW.o 14 | $(CC) -shared -fPIC -rdynamic -o $(INSTALL_LIB)/InstrumentPTW.so InstrumentPTW.o $(LDFLAGS) 15 | 16 | clean: 17 | rm -f *.o *.so 18 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5.1) 2 | project(KANALYZER) 3 | 4 | find_package(LLVM REQUIRED CONFIG) 5 | 6 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 7 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 8 | 9 | # Set your project compile flags. 10 | # E.g. if using the C++ header files 11 | # you will need to enable C++14 support 12 | # for your compiler. 13 | # Check for C++14 support and set the compilation flag 14 | include(CheckCXXCompilerFlag) 15 | 16 | include_directories(${LLVM_INCLUDE_DIRS}) 17 | add_definitions(${LLVM_DEFINITIONS}) 18 | 19 | add_subdirectory (lib) 20 | -------------------------------------------------------------------------------- /IRDumper/Makefile: -------------------------------------------------------------------------------- 1 | CUR_DIR = $(shell pwd) 2 | LLVM_BUILD=${CUR_DIR}/../llvm-project/prefix 3 | SRC_DIR := ${CUR_DIR}/src 4 | SRC_BUILD := ${CUR_DIR}/build 5 | 6 | NPROC := ${shell sysctl -n hw.ncpu} 7 | 8 | build_src_func = \ 9 | (mkdir -p ${2} \ 10 | && cd ${2} \ 11 | && PATH=${LLVM_BUILD}/bin:${PATH}\ 12 | LLVM_ROOT_DIR=${LLVM_BUILD}/bin \ 13 | LLVM_LIBRARY_DIRS=${LLVM_BUILD}/lib \ 14 | LLVM_INCLUDE_DIRS=${LLVM_BUILD}/include \ 15 | CC=clang CXX=clang++ \ 16 | cmake ${1} \ 17 | -DCMAKE_BUILD_TYPE=Release \ 18 | -DCMAKE_CXX_FLAGS_RELEASE="-std=c++14 -fno-rtti -fpic -O3 -v" \ 19 | && make -j${NPROC}) 20 | 21 | all: dumper 22 | 23 | dumper: 24 | $(call build_src_func, ${SRC_DIR}, ${SRC_BUILD}) 25 | 26 | clean: 27 | rm -rf ${SRC_BUILD} 28 | -------------------------------------------------------------------------------- /src/lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set (AnalyzerSourceCodes 2 | Config.h 3 | Config.cc 4 | Common.h 5 | Common.cc 6 | Analyzer.h 7 | Analyzer.cc 8 | CallGraph.h 9 | CallGraph.cc 10 | MLTA.h 11 | MLTA.cc 12 | ) 13 | 14 | set(CMAKE_MACOSX_RPATH 0) 15 | 16 | # Build libraries. 17 | add_library (AnalyzerObj OBJECT ${AnalyzerSourceCodes}) 18 | add_library (Analyzer SHARED $) 19 | add_library (AnalyzerStatic STATIC $) 20 | 21 | # Build executable. 22 | set (EXECUTABLE_OUTPUT_PATH ${ANALYZER_BINARY_DIR}) 23 | link_directories (${ANALYZER_BINARY_DIR}/lib) 24 | add_executable(kanalyzer ${AnalyzerSourceCodes}) 25 | target_link_libraries(kanalyzer 26 | LLVMAsmParser 27 | LLVMSupport 28 | LLVMCore 29 | LLVMAnalysis 30 | LLVMIRReader 31 | AnalyzerStatic 32 | ) 33 | -------------------------------------------------------------------------------- /src/lib/Config.h: -------------------------------------------------------------------------------- 1 | #ifndef _KACONFIG_H 2 | #define _KACONFIG_H 3 | 4 | 5 | #include "llvm/Support/FileSystem.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "Common.h" 13 | 14 | using namespace std; 15 | using namespace llvm; 16 | 17 | // 18 | // Configurations 19 | // 20 | 21 | //#define DEBUG_MLTA 22 | 23 | extern int ENABLE_MLTA; 24 | #define SOUND_MODE 1 25 | #define MAX_TYPE_LAYER 10 26 | 27 | #define MAP_CALLER_TO_CALLEE 1 28 | #define UNROLL_LOOP_ONCE 1 29 | #define MAP_DECLARATION_FUNCTION 30 | #define PRINT_ICALL_TARGET 31 | // Path to source code 32 | #define SOURCE_CODE_PATH "/home/kjlu/projects/kernels/linux-5.1" 33 | //#define PRINT_SOURCE_LINE 34 | //#define MLTA_FIELD_INSENSITIVE 35 | 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CUR_DIR = $(shell pwd) 2 | LLVM_BUILD := ${CUR_DIR}/llvm-project/prefix 3 | ANALYZER_DIR := ${CUR_DIR}/src 4 | ANALYZER_BUILD := ${CUR_DIR}/build 5 | 6 | 7 | UNAME := $(shell uname) 8 | ifeq ($(UNAME), Linux) 9 | NPROC := ${shell nproc} 10 | else 11 | NPROC := ${shell sysctl -n hw.ncpu} 12 | endif 13 | 14 | build_analyzer_func = \ 15 | (mkdir -p ${2} \ 16 | && cd ${2} \ 17 | && PATH=${LLVM_BUILD}/bin:${PATH} \ 18 | LLVM_TOOLS_BINARY_DIR=${LLVM_BUILD}/bin \ 19 | LLVM_LIBRARY_DIRS=${LLVM_BUILD}/lib \ 20 | LLVM_INCLUDE_DIRS=${LLVM_BUILD}/include \ 21 | CC=clang CXX=clang++ \ 22 | cmake ${1} \ 23 | -DCMAKE_BUILD_TYPE=Build \ 24 | -DLLVM_ENABLE_ASSERTIONS=ON \ 25 | -DCMAKE_CXX_FLAGS_BUILD="-std=c++14 -fpic -fno-rtti -g" \ 26 | && make -j${NPROC}) 27 | 28 | 29 | all: kanalyzer 30 | 31 | kanalyzer: 32 | $(call build_analyzer_func, ${ANALYZER_DIR}, ${ANALYZER_BUILD}) 33 | 34 | clean: 35 | rm -rf ${ANALYZER_BUILD} 36 | -------------------------------------------------------------------------------- /src/lib/CallGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef _CALL_GRAPH_H 2 | #define _CALL_GRAPH_H 3 | 4 | #include "Analyzer.h" 5 | #include "MLTA.h" 6 | #include "Config.h" 7 | 8 | class CallGraphPass : 9 | public virtual IterativeModulePass, public virtual MLTA { 10 | 11 | private: 12 | 13 | // 14 | // Variables 15 | // 16 | 17 | // Index of the module 18 | int MIdx; 19 | 20 | setCallSet; 21 | setICallSet; 22 | setMatchedICallSet; 23 | 24 | 25 | // 26 | // Methods 27 | // 28 | void doMLTA(Function *F); 29 | 30 | 31 | public: 32 | static int AnalysisPhase; 33 | 34 | CallGraphPass(GlobalContext *Ctx_) 35 | : IterativeModulePass(Ctx_, "CallGraph"), 36 | MLTA(Ctx_) { 37 | 38 | LoadElementsStructNameMap(Ctx->Modules); 39 | MIdx = 0; 40 | } 41 | 42 | virtual bool doInitialization(llvm::Module *); 43 | virtual bool doFinalization(llvm::Module *); 44 | virtual bool doModulePass(llvm::Module *); 45 | 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /irgen.sh: -------------------------------------------------------------------------------- 1 | # Configurations 2 | 3 | KERNEL_SRC="$(pwd)/../kernels/linux" 4 | IRDUMPER="$(pwd)/IRDumper/build/lib/libDumper.so" 5 | CLANG="$(pwd)/llvm-project/prefix/bin/clang" 6 | CONFIG="defconfig" 7 | #CONFIG="allyesconfig" 8 | 9 | # Use -Wno-error to avoid turning warnings into errors 10 | NEW_CMD="\n\n\ 11 | KBUILD_USERCFLAGS += -Wno-error -g -Xclang -no-opaque-pointers -Xclang -flegacy-pass-manager -Xclang -load -Xclang $IRDUMPER\nKBUILD_CFLAGS += -Wno-error -g -Xclang -no-opaque-pointers -Xclang -flegacy-pass-manager -Xclang -load -Xclang $IRDUMPER" 12 | 13 | # Back up Linux Makefile 14 | #cp $KERNEL_SRC/Makefile $KERNEL_SRC/Makefile.bak 15 | 16 | if [ ! -f "$KERNEL_SRC/Makefile.bak" ]; then 17 | echo "Back up Linux Makefile first" 18 | exit -1 19 | fi 20 | 21 | # The new flags better follow "# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments" 22 | echo -e $NEW_CMD >$KERNEL_SRC/IRDumper.cmd 23 | cat $KERNEL_SRC/Makefile.bak $KERNEL_SRC/IRDumper.cmd >$KERNEL_SRC/Makefile 24 | 25 | cd $KERNEL_SRC && make $CONFIG 26 | echo $CLANG 27 | echo $NEW_CMD 28 | make CC=$CLANG -j`nproc` -k -i 29 | 30 | -------------------------------------------------------------------------------- /InstrumentPTW/InstrumentPTW.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "llvm/IR/CallSite.h" 10 | #include "llvm/IR/Constants.h" 11 | #include "llvm/IR/Function.h" 12 | #include "llvm/IR/GlobalVariable.h" 13 | #include "llvm/IR/IRBuilder.h" 14 | #include "llvm/IR/InlineAsm.h" 15 | #include "llvm/IR/Instruction.h" 16 | #include "llvm/IR/Instructions.h" 17 | #include "llvm/IR/LegacyPassManager.h" 18 | #include "llvm/IR/Module.h" 19 | #include "llvm/IR/Type.h" 20 | #include "llvm/IR/TypeBuilder.h" 21 | #include "llvm/IRReader/IRReader.h" 22 | #include "llvm/Pass.h" 23 | #include "llvm/Support/CommandLine.h" 24 | #include "llvm/Support/SourceMgr.h" 25 | #include "llvm/Support/raw_ostream.h" 26 | #include "llvm/Transforms/IPO/PassManagerBuilder.h" 27 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 28 | 29 | using namespace llvm; 30 | using namespace std; 31 | 32 | class InstrumentPTW : public ModulePass { 33 | 34 | public: 35 | static char ID; 36 | 37 | InstrumentPTW() : ModulePass(ID) {} 38 | 39 | virtual bool runOnModule(Module &M); 40 | }; 41 | 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 The Systems Security Group at University of Minnesota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /InstrumentPTW/InstrumentPTW.cpp: -------------------------------------------------------------------------------- 1 | #include "InstrumentPTW.h" 2 | 3 | bool InstrumentPTW::runOnModule(Module &M) { 4 | ////errs() << "hello from InstrumentPTW::runOnModule\n"; 5 | 6 | vector workSet; 7 | 8 | for (auto & F : M) 9 | for (auto & BB : F) { 10 | for (auto & I : BB) { 11 | if (CallInst * CI = dyn_cast(&I)) { 12 | 13 | Function * calledF = CI->getCalledFunction(); 14 | if (calledF == nullptr) 15 | workSet.push_back(CI); 16 | 17 | } else if (InvokeInst * II = dyn_cast(&I)) { 18 | 19 | Function * calledF = II->getCalledFunction(); 20 | if (calledF == nullptr) 21 | workSet.push_back(II); 22 | 23 | } 24 | } 25 | } 26 | 27 | IRBuilder<> builder(M.getContext()); 28 | for (auto I : workSet) { 29 | 30 | CallSite CS(I); 31 | 32 | Value * calledV = CS.getCalledValue(); 33 | 34 | if (isa(calledV)) continue; 35 | 36 | builder.SetInsertPoint(I); 37 | 38 | InlineAsm *Asm = InlineAsm::get( 39 | FunctionType::get(builder.getVoidTy(), {calledV->getType()}, false), 40 | "ptwriteq $0", "r,~{dirflag},~{fpsr},~{flags}", 41 | true); 42 | builder.CreateCall(Asm, calledV); 43 | } 44 | 45 | return true; 46 | } 47 | 48 | char InstrumentPTW::ID = 0; 49 | static RegisterPass X("InstrumentPTW", "InstrumentPTW pass", false, false); 50 | 51 | static void register_pass(const PassManagerBuilder &PMB, 52 | legacy::PassManagerBase &PM) { 53 | PM.add(new InstrumentPTW()); 54 | } 55 | 56 | static RegisterStandardPasses RegisterPass( 57 | PassManagerBuilder::EP_OptimizerLast, register_pass); 58 | static RegisterStandardPasses RegisterPass1( 59 | PassManagerBuilder::EP_EnabledOnOptLevel0, register_pass); 60 | -------------------------------------------------------------------------------- /IRDumper/src/lib/IRDumper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "llvm/IR/Constants.h" 10 | #include "llvm/IR/Function.h" 11 | #include "llvm/IR/GlobalVariable.h" 12 | #include "llvm/IR/IRBuilder.h" 13 | #include "llvm/IR/InlineAsm.h" 14 | #include "llvm/IR/Instruction.h" 15 | #include "llvm/IR/Instructions.h" 16 | #include "llvm/IR/LegacyPassManager.h" 17 | #include "llvm/IR/Module.h" 18 | #include "llvm/IR/Type.h" 19 | //#include "llvm/IR/TypeBuilder.h" //NOTE: If I include this line, compilation will fail. 20 | #include "llvm/IRReader/IRReader.h" 21 | #include "llvm/Pass.h" 22 | #include "llvm/Support/CommandLine.h" 23 | #include "llvm/Support/SourceMgr.h" 24 | #include "llvm/Support/raw_ostream.h" 25 | #include "llvm/Transforms/IPO/PassManagerBuilder.h" 26 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 | #include "llvm/Support/raw_ostream.h" 28 | #include "llvm/Support/FileSystem.h" 29 | #include "llvm/Bitcode/BitcodeWriter.h" 30 | #include "llvm/Passes/PassBuilder.h" 31 | #include "llvm/Passes/PassPlugin.h" 32 | 33 | using namespace llvm; 34 | using namespace std; 35 | 36 | // This is legacy pass manager; must provide clang flag '-flegacy-pass-manager' 37 | class LegacyIRDumper : public ModulePass { 38 | 39 | public: 40 | static char ID; 41 | 42 | LegacyIRDumper() : ModulePass(ID) {} 43 | 44 | virtual bool runOnModule(Module &M); 45 | }; 46 | 47 | // FIXME: the following does not work with the new pass manager. 48 | // Refer to https://llvm.org/docs/NewPassManager.html 49 | class IRDumper : public PassInfoMixin { 50 | 51 | public: 52 | virtual PreservedAnalyses run(Module &M, ModuleAnalysisManager &); 53 | }; 54 | 55 | -------------------------------------------------------------------------------- /IRDumper/src/lib/IRDumper.cpp: -------------------------------------------------------------------------------- 1 | #include "IRDumper.h" 2 | 3 | using namespace llvm; 4 | 5 | 6 | void saveModule(Module &M, Twine filename) 7 | { 8 | //int ll_fd; 9 | //sys::fs::openFileForWrite(filename + "_pt.ll", ll_fd, 10 | // sys::fs::F_RW | sys::fs::F_Text); 11 | //raw_fd_ostream ll_file(ll_fd, true, true); 12 | //M.print(ll_file, nullptr); 13 | 14 | int bc_fd; 15 | StringRef FN = filename.getSingleStringRef(); 16 | sys::fs::openFileForWrite( 17 | FN.take_front(FN.size() - 2) + ".bc", bc_fd); 18 | raw_fd_ostream bc_file(bc_fd, true, true); 19 | WriteBitcodeToFile(M, bc_file); 20 | } 21 | 22 | bool LegacyIRDumper::runOnModule(Module &M) { 23 | 24 | saveModule(M, M.getName()); 25 | 26 | return false; 27 | } 28 | 29 | char LegacyIRDumper::ID = 0; 30 | static RegisterPass X("IRDumper", "IRDumper pass", false, false); 31 | 32 | static void register_pass(const PassManagerBuilder &PMB, 33 | legacy::PassManagerBase &PM) { 34 | PM.add(new LegacyIRDumper()); 35 | } 36 | 37 | /* Legacy PM Registration */ 38 | static RegisterStandardPasses RegisterIRDumperPass( 39 | PassManagerBuilder::EP_OptimizerLast, register_pass); 40 | static RegisterStandardPasses RegisterRDumperPassL0( 41 | PassManagerBuilder::EP_EnabledOnOptLevel0, register_pass); 42 | 43 | 44 | PreservedAnalyses IRDumper::run(Module &M, ModuleAnalysisManager &) { 45 | saveModule(M, M.getName()); 46 | return PreservedAnalyses::all(); 47 | } 48 | 49 | /* New PM Registration */ 50 | llvm::PassPluginLibraryInfo getIRDumperPluginInfo() { 51 | return {LLVM_PLUGIN_API_VERSION, "IRDumper", LLVM_VERSION_STRING, 52 | [](PassBuilder &PB) { 53 | PB.registerPipelineParsingCallback( 54 | [](StringRef Name, llvm::ModulePassManager &PM, 55 | ArrayRef) { 56 | if (Name == "IRDumper") { 57 | PM.addPass(IRDumper()); 58 | return true; 59 | } 60 | return false; 61 | }); 62 | }}; 63 | } 64 | 65 | #ifndef LLVM_BYE_LINK_INTO_TOOLS 66 | extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo 67 | llvmGetPassPluginInfo() { 68 | return getIRDumperPluginInfo(); 69 | } 70 | #endif 71 | -------------------------------------------------------------------------------- /src/lib/Analyzer.h: -------------------------------------------------------------------------------- 1 | #ifndef _ANALYZER_GLOBAL_H 2 | #define _ANALYZER_GLOBAL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "llvm/Support/CommandLine.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "Common.h" 24 | 25 | 26 | // 27 | // typedefs 28 | // 29 | typedef std::vector< std::pair > ModuleList; 30 | // Mapping module to its file name. 31 | typedef std::unordered_map ModuleNameMap; 32 | // The set of all functions. 33 | typedef llvm::SmallPtrSet FuncSet; 34 | typedef llvm::SmallPtrSet CallInstSet; 35 | typedef DenseMap CallerMap; 36 | typedef DenseMap CalleeMap; 37 | 38 | struct GlobalContext { 39 | 40 | GlobalContext() {} 41 | 42 | // Statistics 43 | unsigned NumFunctions = 0; 44 | unsigned NumFirstLayerTypeCalls = 0; 45 | unsigned NumSecondLayerTypeCalls = 0; 46 | unsigned NumSecondLayerTargets = 0; 47 | unsigned NumValidIndirectCalls = 0; 48 | unsigned NumIndirectCallTargets = 0; 49 | unsigned NumFirstLayerTargets = 0; 50 | 51 | // Global variables 52 | DenseMapGlobals; 53 | 54 | // Map global function GUID (uint64_t) to its actual function with body. 55 | map GlobalFuncMap; 56 | 57 | // Functions whose addresses are taken. 58 | FuncSet AddressTakenFuncs; 59 | 60 | // Map a callsite to all potential callee functions. 61 | CalleeMap Callees; 62 | 63 | // Map a function to all potential caller instructions. 64 | CallerMap Callers; 65 | 66 | // Map function signature to functions 67 | DenseMapsigFuncsMap; 68 | 69 | // Indirect call instructions. 70 | std::vectorIndirectCallInsts; 71 | 72 | // Modules. 73 | ModuleList Modules; 74 | ModuleNameMap ModuleMaps; 75 | std::set InvolvedModules; 76 | 77 | }; 78 | 79 | class IterativeModulePass { 80 | protected: 81 | const char * ID; 82 | public: 83 | IterativeModulePass(GlobalContext *Ctx_, const char *ID_) 84 | : ID(ID_) { } 85 | 86 | // Run on each module before iterative pass. 87 | virtual bool doInitialization(llvm::Module *M) 88 | { return true; } 89 | 90 | // Run on each module after iterative pass. 91 | virtual bool doFinalization(llvm::Module *M) 92 | { return true; } 93 | 94 | // Iterative pass. 95 | virtual bool doModulePass(llvm::Module *M) 96 | { return false; } 97 | 98 | virtual void run(ModuleList &modules); 99 | }; 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TypeDive: Multi-Layer Type Analysis (MLTA) for Refining Indirect-Call Targets 2 | 3 | 4 | 📢 **[July 2025]** We have posted a detailed response to the claims made in the KallGraph paper. [**Read the full review here →**](docs/review-kallgraph.md) 5 | 6 | 7 | ## Intro 8 | This project includes a prototype implementation (TypeDive) of MLTA. 9 | MLTA relies on an observation that function pointers are commonly 10 | stored into objects whose types have a multi-layer type hierarchy; 11 | before indirect calls, function pointers will be loaded from objects 12 | with the same type hierarchy layer by layer. By matching the 13 | multi-layer types of function pointers and functions, MLTA can 14 | dramatically refine indirect-call targets. MLTA's approach is highly 15 | scalable (e.g., finishing the analysis of the Linux kernel within 16 | minutes) and does not have false negatives in principle. 17 | 18 | 19 | TypeDive has been tested with LLVM 15.0, O0 and O2 optimization 20 | levels, and the Linux kernel. The finally results of TypeDive may 21 | have a few false negatives. Observed causes include hacky code in 22 | Linux (mainly the out-of-bound access from `container_of`), compiler 23 | bugs, and false negatives from the baseline (function-type matching). 24 | 25 | 26 | ## How to use TypeDive 27 | **For TypeDive works with LLVM-15, g++-10 is required for compilation** 28 | ### Build LLVM 29 | ```sh 30 | $ ./build-llvm.sh 31 | # The tested LLVM is of commit e758b77161a7 32 | ``` 33 | 34 | ### Build TypeDive 35 | ```sh 36 | # Build the analysis pass 37 | # First update Makefile to make sure the path to the built LLVM is correct 38 | $ make 39 | # Now, you can find the executable, `kanalyzer`, in `build/lib/` 40 | ``` 41 | 42 | ### Prepare LLVM bitcode files of OS kernels 43 | 44 | * First build IRDumper. Before make, make sure the path to LLVM in 45 | `IRDumper/Makefile` is correct. It must be using the same LLVM used 46 | for building TypeDive 47 | * See `irgen.py` for details on how to generate bitcode/IR 48 | 49 | ### Run TypeDive 50 | ```sh 51 | # To analyze a list of bitcode files, put the absolute paths of the bitcode files in a file, say "bc.list", then run: 52 | $ ./build/lib/kalalyzer @bc.list 53 | # Results will be printed out, or can you get the results in map `Ctx->Callees`. 54 | ``` 55 | 56 | ### Configurations 57 | 58 | * Config options can be found in `Config.h` 59 | ```sh 60 | # If precision is the priority, you can comment out `SOUND_MODE` 61 | # `SOURCE_CODE_PATH` should point to the source code 62 | ``` 63 | 64 | 65 | ## More details 66 | * [The MLTA paper (CCS'19)](https://www-users.cse.umn.edu/~kjlu/papers/mlta.pdf) 67 | ```sh 68 | @inproceedings{mlta-ccs19, 69 | title = {{Where Does It Go? Refining Indirect-Call Targets with Multi-Layer Type Analysis}}, 70 | author = {Kangjie Lu and Hong Hu}, 71 | booktitle = {Proceedings of the 26th ACM Conference on Computer and Communications Security (CCS)}, 72 | month = November, 73 | year = 2019, 74 | address = {London, UK}, 75 | } 76 | ``` 77 | -------------------------------------------------------------------------------- /src/lib/MLTA.h: -------------------------------------------------------------------------------- 1 | #ifndef _MULTI_LAYER_TYPE_ANALYSIS_H 2 | #define _MULTI_LAYER_TYPE_ANALYSIS_H 3 | 4 | #include "Analyzer.h" 5 | #include "Config.h" 6 | #include "llvm/IR/Operator.h" 7 | 8 | typedef pair typeidx_t; 9 | pair typeidx_c(Type *Ty, int Idx); 10 | typedef pair hashidx_t; 11 | pair hashidx_c(size_t Hash, int Idx); 12 | 13 | class MLTA { 14 | 15 | protected: 16 | 17 | // 18 | // Variables 19 | // 20 | 21 | GlobalContext *Ctx; 22 | 23 | 24 | //////////////////////////////////////////////////////////////// 25 | // Important data structures for type confinement, propagation, 26 | // and escapes. 27 | //////////////////////////////////////////////////////////////// 28 | DenseMap>typeIdxFuncsMap; 29 | map>>typeIdxPropMap; 30 | settypeEscapeSet; 31 | // Cap type: We cannot know where the type can be futher 32 | // propagated to. Do not include idx in the hash 33 | settypeCapSet; 34 | 35 | 36 | //////////////////////////////////////////////////////////////// 37 | // Other data structures 38 | //////////////////////////////////////////////////////////////// 39 | // Cache matched functions for CallInst 40 | DenseMapMatchedFuncsMap; 41 | DenseMapVTableFuncsMap; 42 | 43 | setsrcLnHashSet; 44 | setaddrTakenFuncHashSet; 45 | 46 | map>calleesSrcMap; 47 | map>L1CalleesSrcMap; 48 | 49 | // Matched icall types -- to avoid repeatation 50 | DenseMap MatchedICallTypeMap; 51 | 52 | // Set of target types 53 | setTTySet; 54 | 55 | // Functions that are actually stored to variables 56 | FuncSet StoredFuncs; 57 | // Special functions like syscalls 58 | FuncSet OutScopeFuncs; 59 | 60 | // Alias struct pointer of a general pointer 61 | map>AliasStructPtrMap; 62 | 63 | 64 | 65 | // 66 | // Methods 67 | // 68 | 69 | //////////////////////////////////////////////////////////////// 70 | // Type-related basic functions 71 | //////////////////////////////////////////////////////////////// 72 | bool fuzzyTypeMatch(Type *Ty1, Type *Ty2, Module *M1, Module *M2); 73 | 74 | void escapeType(Value *V); 75 | void propagateType(Value *ToV, Type *FromTy, int Idx = -1); 76 | 77 | Type *getBaseType(Value *V, set &Visited); 78 | Type *_getPhiBaseType(PHINode *PN, set &Visited); 79 | Function *getBaseFunction(Value *V); 80 | bool nextLayerBaseType(Value *V, list &TyList, 81 | Value * &NextV, set &Visited); 82 | bool nextLayerBaseTypeWL(Value *V, list &TyList, 83 | Value * &NextV); 84 | bool getGEPLayerTypes(GEPOperator *GEP, list &TyList); 85 | bool getBaseTypeChain(list &Chain, Value *V, 86 | bool &Complete); 87 | bool getDependentTypes(Type *Ty, int Idx, set &PropSet); 88 | 89 | 90 | //////////////////////////////////////////////////////////////// 91 | // Target-related basic functions 92 | //////////////////////////////////////////////////////////////// 93 | void confineTargetFunction(Value *V, Function *F); 94 | void intersectFuncSets(FuncSet &FS1, FuncSet &FS2, 95 | FuncSet &FS); 96 | bool typeConfineInInitializer(GlobalVariable *GV); 97 | bool typeConfineInFunction(Function *F); 98 | bool typePropInFunction(Function *F); 99 | void collectAliasStructPtr(Function *F); 100 | 101 | // deprecated 102 | //bool typeConfineInStore(StoreInst *SI); 103 | //bool typePropWithCast(User *Cast); 104 | Value *getVTable(Value *V); 105 | 106 | 107 | //////////////////////////////////////////////////////////////// 108 | // API functions 109 | //////////////////////////////////////////////////////////////// 110 | // Use type-based analysis to find targets of indirect calls 111 | void findCalleesWithType(CallInst*, FuncSet&); 112 | bool findCalleesWithMLTA(CallInst *CI, FuncSet &FS); 113 | bool getTargetsWithLayerType(size_t TyHash, int Idx, 114 | FuncSet &FS); 115 | 116 | 117 | //////////////////////////////////////////////////////////////// 118 | // Util functions 119 | //////////////////////////////////////////////////////////////// 120 | bool isCompositeType(Type *Ty); 121 | Type *getFuncPtrType(Value *V); 122 | Value *recoverBaseType(Value *V); 123 | void unrollLoops(Function *F); 124 | void saveCalleesInfo(CallInst *CI, FuncSet &FS, bool mlta); 125 | void printTargets(FuncSet &FS, CallInst *CI = NULL); 126 | void printTypeChain(list &Chain); 127 | 128 | 129 | public: 130 | 131 | // General pointer types like char * and void * 132 | mapInt8PtrTy; 133 | // long interger type 134 | mapIntPtrTy; 135 | mapDLMap; 136 | 137 | MLTA(GlobalContext *Ctx_) { 138 | Ctx = Ctx_; 139 | } 140 | 141 | }; 142 | 143 | #endif 144 | -------------------------------------------------------------------------------- /src/lib/Analyzer.cc: -------------------------------------------------------------------------------- 1 | //===-- Analyzer.cc - the kernel-analysis framework-------------===// 2 | // 3 | // It constructs a global call-graph based on multi-layer type 4 | // analysis. 5 | // 6 | //===-----------------------------------------------------------===// 7 | 8 | #include "llvm/IR/LLVMContext.h" 9 | #include "llvm/IR/PassManager.h" 10 | #include "llvm/IR/Module.h" 11 | #include "llvm/IR/Verifier.h" 12 | #include "llvm/Bitcode/BitcodeReader.h" 13 | #include "llvm/Bitcode/BitcodeWriter.h" 14 | #include "llvm/Support/ManagedStatic.h" 15 | #include "llvm/Support/PrettyStackTrace.h" 16 | #include "llvm/Support/ToolOutputFile.h" 17 | #include "llvm/Support/SystemUtils.h" 18 | #include "llvm/Support/FileSystem.h" 19 | #include "llvm/IRReader/IRReader.h" 20 | #include "llvm/Support/SourceMgr.h" 21 | #include "llvm/Support/Signals.h" 22 | #include "llvm/Support/Path.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "Analyzer.h" 31 | #include "CallGraph.h" 32 | #include "Config.h" 33 | 34 | using namespace llvm; 35 | 36 | // Command line parameters. 37 | cl::list InputFilenames( 38 | cl::Positional, cl::OneOrMore, cl::desc("")); 39 | 40 | cl::opt VerboseLevel( 41 | "verbose-level", cl::desc("Print information at which verbose level"), 42 | cl::init(0)); 43 | 44 | cl::opt MLTA( 45 | "mlta", 46 | cl::desc("Multi-layer type analysis for refining indirect-call \ 47 | targets"), 48 | cl::NotHidden, cl::init(2)); 49 | 50 | GlobalContext GlobalCtx; 51 | 52 | 53 | void IterativeModulePass::run(ModuleList &modules) { 54 | 55 | ModuleList::iterator i, e; 56 | OP << "[" << ID << "] Initializing " << modules.size() << " modules "; 57 | bool again = true; 58 | while (again) { 59 | again = false; 60 | for (i = modules.begin(), e = modules.end(); i != e; ++i) { 61 | again |= doInitialization(i->first); 62 | OP << "."; 63 | } 64 | } 65 | OP << "\n"; 66 | 67 | unsigned iter = 0, changed = 1; 68 | while (changed) { 69 | ++iter; 70 | changed = 0; 71 | unsigned counter_modules = 0; 72 | unsigned total_modules = modules.size(); 73 | for (i = modules.begin(), e = modules.end(); i != e; ++i) { 74 | OP << "[" << ID << " / " << iter << "] "; 75 | OP << "[" << ++counter_modules << " / " << total_modules << "] "; 76 | OP << "[" << i->second << "]\n"; 77 | 78 | bool ret = doModulePass(i->first); 79 | if (ret) { 80 | ++changed; 81 | OP << "\t [CHANGED]\n"; 82 | } else 83 | OP << "\n"; 84 | } 85 | OP << "[" << ID << "] Updated in " << changed << " modules.\n"; 86 | } 87 | 88 | OP << "[" << ID << "] Postprocessing ...\n"; 89 | again = true; 90 | while (again) { 91 | again = false; 92 | for (i = modules.begin(), e = modules.end(); i != e; ++i) { 93 | // TODO: Dump the results. 94 | again |= doFinalization(i->first); 95 | } 96 | } 97 | 98 | OP << "[" << ID << "] Done!\n\n"; 99 | } 100 | 101 | void PrintResults(GlobalContext *GCtx) { 102 | 103 | int TotalTargets = 0; 104 | for (auto IC : GCtx->IndirectCallInsts) { 105 | TotalTargets += GCtx->Callees[IC].size(); 106 | } 107 | float AveIndirectTargets = 0.0; 108 | if (GCtx->NumValidIndirectCalls) 109 | AveIndirectTargets = 110 | (float)GCtx->NumIndirectCallTargets/GCtx->IndirectCallInsts.size(); 111 | 112 | int totalsize = 0; 113 | for (auto &curEle: GCtx->Callees) { 114 | if (curEle.first->isIndirectCall()) { 115 | totalsize += curEle.second.size(); 116 | } 117 | } 118 | OP << "\n@@ Total number of final callees: " << totalsize << ".\n"; 119 | 120 | OP<<"############## Result Statistics ##############\n"; 121 | //cout<<"# Ave. Number of indirect-call targets: \t"<IndirectCallInsts.size()<<"\n"; 123 | OP<<"# Number of indirect calls with targets: \t"<NumValidIndirectCalls<<"\n"; 124 | OP<<"# Number of indirect-call targets: \t\t"<NumIndirectCallTargets<<"\n"; 125 | OP<<"# Number of address-taken functions: \t\t"<AddressTakenFuncs.size()<<"\n"; 126 | OP<<"# Number of multi-layer calls: \t\t\t"<NumSecondLayerTypeCalls<<"\n"; 127 | OP<<"# Number of multi-layer targets: \t\t"<NumSecondLayerTargets<<"\n"; 128 | OP<<"# Number of one-layer calls: \t\t\t"<NumFirstLayerTypeCalls<<"\n"; 129 | OP<<"# Number of one-layer targets: \t\t\t"<NumFirstLayerTargets<<"\n"; 130 | 131 | } 132 | 133 | int main(int argc, char **argv) { 134 | 135 | // Print a stack trace if we signal out. 136 | sys::PrintStackTraceOnErrorSignal(argv[0]); 137 | PrettyStackTraceProgram X(argc, argv); 138 | 139 | llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 140 | 141 | cl::ParseCommandLineOptions(argc, argv, "global analysis\n"); 142 | SMDiagnostic Err; 143 | 144 | // Loading modules 145 | OP << "Total " << InputFilenames.size() << " file(s)\n"; 146 | 147 | for (unsigned i = 0; i < InputFilenames.size(); ++i) { 148 | 149 | LLVMContext *LLVMCtx = new LLVMContext(); 150 | std::unique_ptr M = parseIRFile(InputFilenames[i], Err, *LLVMCtx); 151 | 152 | if (M == NULL) { 153 | OP << argv[0] << ": error loading file '" 154 | << InputFilenames[i] << "'\n"; 155 | continue; 156 | } 157 | 158 | Module *Module = M.release(); 159 | StringRef MName = StringRef(strdup(InputFilenames[i].data())); 160 | GlobalCtx.Modules.push_back(std::make_pair(Module, MName)); 161 | GlobalCtx.ModuleMaps[Module] = InputFilenames[i]; 162 | } 163 | 164 | // 165 | // Main workflow 166 | // 167 | 168 | ENABLE_MLTA = MLTA; 169 | 170 | // Build global callgraph. 171 | CallGraphPass CGPass(&GlobalCtx); 172 | CGPass.run(GlobalCtx.Modules); 173 | 174 | // Print final results 175 | PrintResults(&GlobalCtx); 176 | 177 | return 0; 178 | } 179 | 180 | -------------------------------------------------------------------------------- /src/lib/Common.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMON_H_ 2 | #define _COMMON_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | #define Z3_ENABLED 0 17 | 18 | #if Z3_ENABLED 19 | #include 20 | #endif 21 | 22 | using namespace llvm; 23 | using namespace std; 24 | 25 | #define LOG(lv, stmt) \ 26 | do { \ 27 | if (VerboseLevel >= lv) \ 28 | errs() << stmt; \ 29 | } while(0) 30 | 31 | 32 | #define OP llvm::errs() 33 | 34 | #ifdef DEBUG_MLTA 35 | #define DBG OP 36 | #else 37 | #define DBG if (false) OP 38 | #endif 39 | 40 | #define debug_print(fmt, ...) \ 41 | do { if (DEBUG) fprintf(stderr, fmt, __VA_ARGS__); } while (0) 42 | 43 | #define WARN(stmt) LOG(1, "\n[WARN] " << stmt); 44 | 45 | #define ERR(stmt) \ 46 | do { \ 47 | errs() << "ERROR (" << __FUNCTION__ << "@" << __LINE__ << ")"; \ 48 | errs() << ": " << stmt; \ 49 | exit(-1); \ 50 | } while(0) 51 | 52 | /// Different colors for output 53 | #define KNRM "\x1B[0m" /* Normal */ 54 | #define KRED "\x1B[31m" /* Red */ 55 | #define KGRN "\x1B[32m" /* Green */ 56 | #define KYEL "\x1B[33m" /* Yellow */ 57 | #define KBLU "\x1B[34m" /* Blue */ 58 | #define KMAG "\x1B[35m" /* Magenta */ 59 | #define KCYN "\x1B[36m" /* Cyan */ 60 | #define KWHT "\x1B[37m" /* White */ 61 | 62 | 63 | extern cl::opt VerboseLevel; 64 | 65 | // 66 | // Common functions 67 | // 68 | 69 | string getFileName(DILocation *Loc, 70 | DISubprogram *SP=NULL); 71 | 72 | bool isConstant(Value *V); 73 | 74 | string getSourceLine(string fn_str, unsigned lineno); 75 | 76 | string getSourceFuncName(Instruction *I); 77 | 78 | StringRef getCalledFuncName(CallInst *CI); 79 | 80 | string extractMacro(string, Instruction* I); 81 | 82 | DILocation *getSourceLocation(Instruction *I); 83 | 84 | void printSourceCodeInfo(Value *V, string Tag = "VALUE"); 85 | void printSourceCodeInfo(Function *F, string Tag = "FUNC"); 86 | string getMacroInfo(Value *V); 87 | 88 | void getSourceCodeInfo(Value *V, string &file, 89 | unsigned &line); 90 | 91 | int8_t getArgNoInCall(CallInst *CI, Value *Arg); 92 | Argument *getParamByArgNo(Function *F, int8_t ArgNo); 93 | 94 | size_t funcHash(Function *F, bool withName = false); 95 | size_t callHash(CallInst *CI); 96 | void structTypeHash(StructType *STy, set &HSet); 97 | size_t typeHash(Type *Ty); 98 | size_t typeIdxHash(Type *Ty, int Idx = -1); 99 | size_t hashIdxHash(size_t Hs, int Idx = -1); 100 | size_t strIntHash(string str, int i); 101 | string structTyStr(StructType *STy); 102 | bool trimPathSlash(string &path, int slash); 103 | int64_t getGEPOffset(const Value *V, const DataLayout *DL); 104 | void LoadElementsStructNameMap( 105 | vector> &Modules); 106 | 107 | // 108 | // Common data structures 109 | // 110 | class ModuleOracle { 111 | public: 112 | ModuleOracle(Module &m) : 113 | dl(m.getDataLayout()), 114 | tli(TargetLibraryInfoImpl(Triple(m.getTargetTriple()))) 115 | {} 116 | 117 | ~ModuleOracle() {} 118 | 119 | // Getter 120 | const DataLayout &getDataLayout() { 121 | return dl; 122 | } 123 | 124 | TargetLibraryInfo &getTargetLibraryInfo() { 125 | return tli; 126 | } 127 | 128 | // Data layout 129 | uint64_t getBits() { 130 | return Bits; 131 | } 132 | 133 | uint64_t getPointerWidth() { 134 | return dl.getPointerSizeInBits(); 135 | } 136 | 137 | uint64_t getPointerSize() { 138 | return dl.getPointerSize(); 139 | } 140 | 141 | uint64_t getTypeSize(Type *ty) { 142 | return dl.getTypeAllocSize(ty); 143 | } 144 | 145 | uint64_t getTypeWidth(Type *ty) { 146 | return dl.getTypeSizeInBits(ty); 147 | } 148 | 149 | uint64_t getTypeOffset(Type *type, unsigned idx) { 150 | assert(isa(type)); 151 | return dl.getStructLayout(cast(type)) 152 | ->getElementOffset(idx); 153 | } 154 | 155 | bool isReintPointerType(Type *ty) { 156 | return (ty->isPointerTy() || 157 | (ty->isIntegerTy() && 158 | ty->getIntegerBitWidth() == getPointerWidth())); 159 | } 160 | 161 | protected: 162 | // Info provide 163 | const DataLayout &dl; 164 | TargetLibraryInfo tli; 165 | 166 | // Consts 167 | const uint64_t Bits = 8; 168 | }; 169 | 170 | class Helper { 171 | public: 172 | // LLVM value 173 | static string getValueName(Value *v) { 174 | if (!v->hasName()) { 175 | return to_string(reinterpret_cast(v)); 176 | } else { 177 | return v->getName().str(); 178 | } 179 | } 180 | 181 | static string getValueType(Value *v) { 182 | if (Instruction *inst = dyn_cast(v)) { 183 | return string(inst->getOpcodeName()); 184 | } else { 185 | return string("value " + to_string(v->getValueID())); 186 | } 187 | } 188 | 189 | static string getValueRepr(Value *v) { 190 | string str; 191 | raw_string_ostream stm(str); 192 | 193 | v->print(stm); 194 | stm.flush(); 195 | 196 | return str; 197 | } 198 | 199 | #if Z3_ENABLED 200 | // Z3 expr 201 | static string getExprType(Z3_context ctxt, Z3_ast ast) { 202 | return string(Z3_sort_to_string(ctxt, Z3_get_sort(ctxt, ast))); 203 | } 204 | 205 | static string getExprRepr(Z3_context ctxt, Z3_ast ast) { 206 | return string(Z3_ast_to_string(ctxt, ast)); 207 | } 208 | #endif 209 | 210 | // String conversion 211 | static void convertDotInName(string &name) { 212 | replace(name.begin(), name.end(), '.', '_'); 213 | } 214 | }; 215 | 216 | class Dumper { 217 | public: 218 | Dumper() {} 219 | ~Dumper() {} 220 | 221 | // LLVM value 222 | void valueName(Value *val) { 223 | errs() << Helper::getValueName(val) << "\n"; 224 | } 225 | 226 | void typedValue(Value *val) { 227 | errs() << "[" << Helper::getValueType(val) << "]" 228 | << Helper::getValueRepr(val) 229 | << "\n"; 230 | } 231 | 232 | #if Z3_ENABLED 233 | // Z3 expr 234 | void typedExpr(Z3_context ctxt, Z3_ast ast) { 235 | errs() << "[" << Helper::getExprType(ctxt, ast) << "]" 236 | << Helper::getExprRepr(ctxt, ast) 237 | << "\n"; 238 | } 239 | #endif 240 | 241 | }; 242 | 243 | extern Dumper DUMP; 244 | 245 | #endif 246 | -------------------------------------------------------------------------------- /src/lib/CallGraph.cc: -------------------------------------------------------------------------------- 1 | //===-- CallGraph.cc - Build global call-graph------------------===// 2 | // 3 | // This pass builds a global call-graph. The targets of an indirect 4 | // call are identified based on type-analysis, i.e., matching the 5 | // number and type of function parameters. 6 | // 7 | //===-----------------------------------------------------------===// 8 | 9 | #include "llvm/Pass.h" 10 | #include "llvm/IR/Module.h" 11 | #include "llvm/IR/Function.h" 12 | #include "llvm/IR/BasicBlock.h" 13 | #include "llvm/IR/Instruction.h" 14 | #include "llvm/IR/Instructions.h" 15 | #include "llvm/Support/Debug.h" 16 | #include "llvm/IR/DebugInfo.h" 17 | #include "llvm/IR/InstIterator.h" 18 | #include "llvm/IR/Constants.h" 19 | #include "llvm/ADT/StringExtras.h" 20 | #include "llvm/Analysis/CallGraph.h" 21 | #include "llvm/Support/raw_ostream.h" 22 | #include "llvm/IR/InstrTypes.h" 23 | #include "llvm/Analysis/LoopInfo.h" 24 | #include "llvm/Analysis/LoopPass.h" 25 | #include "llvm/IR/LegacyPassManager.h" 26 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 | #include "llvm/IR/IRBuilder.h" 28 | #include "llvm/IR/CFG.h" 29 | 30 | #include "Common.h" 31 | #include "CallGraph.h" 32 | 33 | #include 34 | #include 35 | 36 | 37 | using namespace llvm; 38 | 39 | // 40 | // Implementation 41 | // 42 | 43 | void CallGraphPass::doMLTA(Function *F) { 44 | 45 | // Unroll loops 46 | #ifdef UNROLL_LOOP_ONCE 47 | unrollLoops(F); 48 | #endif 49 | 50 | // Collect callers and callees 51 | for (inst_iterator i = inst_begin(F), e = inst_end(F); 52 | i != e; ++i) { 53 | // Map callsite to possible callees. 54 | if (CallInst *CI = dyn_cast(&*i)) { 55 | 56 | CallSet.insert(CI); 57 | 58 | FuncSet *FS = &Ctx->Callees[CI]; 59 | Value *CV = CI->getCalledOperand(); 60 | Function *CF = dyn_cast(CV); 61 | 62 | // Indirect call 63 | if (CI->isIndirectCall()) { 64 | 65 | // Multi-layer type matching 66 | if (ENABLE_MLTA > 1) { 67 | findCalleesWithMLTA(CI, *FS); 68 | } 69 | // Fuzzy type matching 70 | else if (ENABLE_MLTA == 0) { 71 | size_t CIH = callHash(CI); 72 | if (MatchedICallTypeMap.find(CIH) 73 | != MatchedICallTypeMap.end()) 74 | *FS = MatchedICallTypeMap[CIH]; 75 | else { 76 | findCalleesWithType(CI, *FS); 77 | MatchedICallTypeMap[CIH] = *FS; 78 | } 79 | } 80 | // One-layer type matching 81 | else { 82 | *FS = Ctx->sigFuncsMap[callHash(CI)]; 83 | } 84 | 85 | #ifdef MAP_CALLER_TO_CALLEE 86 | for (Function *Callee : *FS) { 87 | Ctx->Callers[Callee].insert(CI); 88 | } 89 | #endif 90 | // Save called values for future uses. 91 | Ctx->IndirectCallInsts.push_back(CI); 92 | 93 | ICallSet.insert(CI); 94 | if (!FS->empty()) { 95 | MatchedICallSet.insert(CI); 96 | Ctx->NumIndirectCallTargets += FS->size(); 97 | Ctx->NumValidIndirectCalls++; 98 | } 99 | } 100 | // Direct call 101 | else { 102 | // not InlineAsm 103 | if (CF) { 104 | // Call external functions 105 | if (CF->isDeclaration()) { 106 | if (Function *GF = Ctx->GlobalFuncMap[CF->getGUID()]) 107 | CF = GF; 108 | } 109 | 110 | FS->insert(CF); 111 | 112 | #ifdef MAP_CALLER_TO_CALLEE 113 | Ctx->Callers[CF].insert(CI); 114 | #endif 115 | } 116 | // InlineAsm 117 | else { 118 | // TODO: handle InlineAsm functions 119 | } 120 | } 121 | 122 | if (ENABLE_MLTA > 1) { 123 | if (CI->isIndirectCall()) { 124 | 125 | #ifdef PRINT_ICALL_TARGET 126 | printSourceCodeInfo(CI, "RESOLVING"); 127 | #endif 128 | 129 | //FuncSet FSBase = Ctx->sigFuncsMap[callHash(CI)]; 130 | //if (LayerNo > 0) { 131 | for (auto F : Ctx->sigFuncsMap[callHash(CI)]) { 132 | if (FS->find(F) == FS->end()) { 133 | #ifdef PRINT_ICALL_TARGET 134 | if ((OutScopeFuncs.find(F) == OutScopeFuncs.end()) 135 | && (StoredFuncs.find(F) != StoredFuncs.end())) { 136 | printSourceCodeInfo(F, "REMOVED"); 137 | } 138 | else { 139 | } 140 | #endif 141 | } 142 | } 143 | #ifdef PRINT_ICALL_TARGET 144 | printTargets(*FS, CI); 145 | #endif 146 | } 147 | } 148 | } 149 | } 150 | } 151 | 152 | bool CallGraphPass::doInitialization(Module *M) { 153 | 154 | OP<<"#"<getName()<<"\n"; 155 | 156 | ++ MIdx; 157 | 158 | DLMap[M] = &(M->getDataLayout()); 159 | Int8PtrTy[M] = Type::getInt8PtrTy(M->getContext()); 160 | IntPtrTy[M] = DLMap[M]->getIntPtrType(M->getContext()); 161 | 162 | setCastSet; 163 | 164 | // 165 | // Iterate and process globals 166 | // 167 | for (Module::global_iterator gi = M->global_begin(); 168 | gi != M->global_end(); ++gi) { 169 | 170 | GlobalVariable* GV = &*gi; 171 | if (GV->hasInitializer()) { 172 | 173 | Type *ITy = GV->getInitializer()->getType(); 174 | if (!ITy->isPointerTy() && !isCompositeType(ITy)) 175 | continue; 176 | 177 | Ctx->Globals[GV->getGUID()] = GV; 178 | 179 | typeConfineInInitializer(GV); 180 | } 181 | } 182 | 183 | // Iterate functions and instructions 184 | for (Function &F : *M) { 185 | 186 | // Collect address-taken functions. 187 | // NOTE: declaration functions can also have address taken 188 | if (F.hasAddressTaken()) { 189 | Ctx->AddressTakenFuncs.insert(&F); 190 | size_t FuncHash = funcHash(&F, false); 191 | Ctx->sigFuncsMap[FuncHash].insert(&F); 192 | StringRef FName = F.getName(); 193 | if (FName.startswith("__x64") || 194 | FName.startswith("__ia32")) { 195 | OutScopeFuncs.insert(&F); 196 | } 197 | } 198 | 199 | // The following only considers actual functions with body 200 | if (F.isDeclaration()) { 201 | continue; 202 | } 203 | 204 | collectAliasStructPtr(&F); 205 | typeConfineInFunction(&F); 206 | typePropInFunction(&F); 207 | 208 | // Collect global function definitions. 209 | if (F.hasExternalLinkage()) { 210 | Ctx->GlobalFuncMap[F.getGUID()] = &F; 211 | } 212 | } 213 | 214 | // Do something at the end of last module 215 | if (Ctx->Modules.size() == MIdx) { 216 | 217 | // Map the declaration functions to actual ones 218 | // NOTE: to delete an item, must iterate by reference 219 | for (auto &SF : Ctx->sigFuncsMap) { 220 | for (auto F : SF.second) { 221 | if (!F) 222 | continue; 223 | if (F->isDeclaration()) { 224 | SF.second.erase(F); 225 | if (Function *AF = Ctx->GlobalFuncMap[F->getGUID()]) { 226 | SF.second.insert(AF); 227 | } 228 | } 229 | } 230 | } 231 | 232 | for (auto &TF : typeIdxFuncsMap) { 233 | for (auto &IF : TF.second) { 234 | for (auto F : IF.second) { 235 | if (F->isDeclaration()) { 236 | IF.second.erase(F); 237 | if (Function *AF = Ctx->GlobalFuncMap[F->getGUID()]) { 238 | IF.second.insert(AF); 239 | } 240 | } 241 | } 242 | } 243 | } 244 | 245 | MIdx = 0; 246 | } 247 | 248 | return false; 249 | } 250 | 251 | bool CallGraphPass::doFinalization(Module *M) { 252 | 253 | ++ MIdx; 254 | if (Ctx->Modules.size() == MIdx) { 255 | // Finally map declaration functions to actual functions 256 | OP<<"Mapping declaration functions to actual ones...\n"; 257 | Ctx->NumIndirectCallTargets = 0; 258 | for (auto CI : CallSet) { 259 | FuncSet FS; 260 | for (auto F : Ctx->Callees[CI]) { 261 | if (F->isDeclaration()) { 262 | F = Ctx->GlobalFuncMap[F->getGUID()]; 263 | if (F) { 264 | FS.insert(F); 265 | } 266 | } 267 | else 268 | FS.insert(F); 269 | } 270 | Ctx->Callees[CI] = FS; 271 | 272 | if (CI->isIndirectCall()) { 273 | Ctx->NumIndirectCallTargets += FS.size(); 274 | //printTargets(Ctx->Callees[CI], CI); 275 | } 276 | } 277 | 278 | } 279 | return false; 280 | } 281 | 282 | bool CallGraphPass::doModulePass(Module *M) { 283 | 284 | ++ MIdx; 285 | 286 | // 287 | // Iterate and process globals 288 | // 289 | for (Module::global_iterator gi = M->global_begin(); 290 | gi != M->global_end(); ++gi) { 291 | 292 | GlobalVariable* GV = &*gi; 293 | //if (GV->user_empty()) 294 | // continue; 295 | 296 | Type *GTy = GV->getType(); 297 | assert(GTy->isPointerTy()); 298 | 299 | } 300 | if (MIdx == Ctx->Modules.size()) { 301 | } 302 | 303 | // 304 | // Process functions 305 | // 306 | for (Module::iterator f = M->begin(), fe = M->end(); 307 | f != fe; ++f) { 308 | 309 | Function *F = &*f; 310 | 311 | if (F->isDeclaration()) 312 | continue; 313 | 314 | doMLTA(F); 315 | } 316 | 317 | return false; 318 | } 319 | 320 | -------------------------------------------------------------------------------- /docs/review-kallgraph.md: -------------------------------------------------------------------------------- 1 | # A Critical Review of “Redefining Indirect Call Analysis with KallGraph” [1] 2 | 3 | 4 | The paper *KallGraph* [1] introduces a hybrid analysis technique that integrates point-to analysis into the MLTA [4] framework to enhance precision in resolving indirect calls. Central to its contribution is the claim of identifying “fundamental design flaws” in MLTA. 5 | 6 | We initially shared a version of this review with the authors of *KallGraph* on **June 16, 2025**. They responded to parts of our evaluation, and we have taken their feedback into account in this updated review. This revised version reflects our continued efforts to ensure the technical accuracy of the discussion and to clarify misconceptions related to MLTA and its successors. 7 | 8 | --- 9 | 10 | ## TL;DR Summary 11 | 12 | - Most major claims of *KallGraph* are not supported by evidence, and its experimental results appear significantly off. 13 | - MLTA does not exhibit the claimed type confinement issues; its design and experiments confirm sound handling of such cases. 14 | - All example cases used to demonstrate MLTA’s “unsoundness” are incorrect; MLTA does not miss the reported targets. These include examples in Figure 3 and Section 7.2.1 of the *KallGraph* paper. 15 | - Of 66 false negatives reported in *KallGraph*’s Table 6, only 17 are verifiable; none are due to design flaws. 16 | - *KallGraph* itself shows a substantially higher FN rate than reported—missing 1,782 of 6,500 traced indirect calls. Precision claims become pointless with such a high FN rate. 17 | - Table 2 states *KallGraph* completes Linux kernel analysis in 4.5 hours, which is misleading. In fact, it requires 270 CPU hours; MLTA takes only 0.5 CPU hour. 18 | 19 | --- 20 | 21 | ## 1. KallGraph’s Soundness Claims Against MLTA 22 | 23 | *KallGraph* presents two main claims of unsoundness in MLTA, both attributed to its design. 24 | 25 | ### 1.1 Alleged Unsound Type Confinement Rule 26 | 27 | *KallGraph* claims that MLTA’s “Type Confinement Rule” is unsound because it cannot track layered struct types across function boundaries. However, this is incorrect---MLTA includes escape analysis and propagation policy to address exactly this case. 28 | 29 | From Section 4.1.3 of the MLTA paper: 30 | 31 | > One thing to note is that, when we cannot decide if a composite type would be stored or cast to an unsupported type, e.g., a pointer of an object of the composite type is passed to or from other functions, and we cannot decide how this pointer is used in those functions, we will also treat the composite type as escaping. 32 | 33 | This mechanism ensures soundness for interprocedural ambiguity, but *KallGraph* does not account for it in its analysis. 34 | 35 | #### Supportive experiments 36 | 37 | *KallGraph* uses Figure 3 to argue that MLTA misses the target function `b_read` for `icall2`. In fact, MLTA captures this target correctly. See the screenshot below: 38 | 39 | ![MLTA resolving b_read correctly (Figure 3)](fig3-mlta-result.png) 40 | 41 | 42 | 43 | *KallGraph* also claims 66 of its 100 FNs are due to this issue. Our reanalysis finds that: 44 | 45 | - 61 are not false negatives at all. 46 | - The remaining 5 are caused by an implementation error (see Section 2). 47 | 48 | **Update:** 49 | The authors responded to this correction: they acknowledged their example is incorrect and tried to provide new examples. It turns out the new examples are irrelevant to the claimed unsound confinement design, but that when collecting address-taken functions, some special statements (`ReturnInst` and `Arguments`) were not supported yet. These FNs already exist in the function-type matching and are not introduced by MLTA's design. We have patches to quickly fix them. 50 | 51 | --- 52 | 53 | ### 1.2 Alleged Unsound Type Propagation Rule 54 | 55 | The paper also raises questions about MLTA’s “Type Propagation Rule” and claims it is a design flaw which we disagree with. 56 | We would like to clarify that this is MLTA’s decision to not adopt the bi-directional propagation for typecast for a clear reason: 57 | The scenario, in practice, does NOT exist. In our experiments, we’ve never encountered such a case. The `unsound_cast` example 58 | provided in figure 3 of the paper is unrealistic. There is no reason to intermediately introduce object `f` of the essentially 59 | same type (exact same fields) when both source and use are for `e`. Interestingly, all the 15 false negatives mentioned in the 60 | Table 6 are not due to the issue; instead they are caused by broken types (see details in section 2) from the LLVM compiler. 61 | 62 | Again, by running its example in Figure 3, our experiment shows that MLTA does not miss the target f_read(). 63 | 64 | 65 | --- 66 | 67 | ### 1.3 Discrepancy in Reported FN Rate 68 | 69 | *KallGraph* claims to have only 9 FNs among 2,937 targets (937 traced + 2,000 sampled). Using full tracing on Linux 5.18, we observed 6,461 ground-truth indirect call targets. 70 | 71 | Findings: 72 | 73 | - *KallGraph* missed 1,812 targets (1,782 with `mem2reg`). 74 | - The authors have not responded to this discrepancy. 75 | - MLTA initially had 342 FNs (mostly from an improper syscall-pointer-array handling), and only 33 FNs after the fix. 76 | - TFA [2] had just 2 FNs under the same conditions. 77 | 78 | 📄 **[50_samples_of_FN_of_KallGraph_in_Linux_5.18.txt](docs/50_samples_of_FN_of_KallGraph_in_Linux_5.18.txt)** contains our FN samples of KallGraph. 79 | 80 | --- 81 | 82 | ## 2. Detailed Re-evaluation of the Reported False Negatives 83 | 84 | *KallGraph* lists 66 MLTA FNs in Table 6. Our reevaluation using MLTA shows: 85 | 86 | - Only 17 are verifiable FNs. 87 | - 49 are resolved correctly. 88 | - Both examples in Section 7.2.1 (`link->doit()`, `ia32_sys_call_table[unr]`) are correctly handled by MLTA. 89 | 90 | *KallGraph* attributes the 17 FNs to: 91 | 92 | - 5: “unsound type confinement” 93 | - 7: “unsound typecast handling” 94 | - 5: “weak implementation” 95 | 96 | Our investigation shows they stem from LLVM type issues and implementation artifacts---not from MLTA’s design. 97 | 98 | 📄 **[FN Reevaluation Results]([docs/re-evaluation-kallgraph.txt](https://docs.google.com/spreadsheets/d/e/2PACX-1vQ0ud5xfwK6V2nNQ6aq9af7bMIcxfMExOsMeymDKXa3lHwk1BeayaTqDCqAF9Ux7QA6oAdADx4GTKFX/pubhtml?gid=0&single=true))** provides a case-by-case breakdown. Note that in MLTA's paper and experiments, we never used `mem2reg` which introduces lots of `PHINode` which is not fully supported yet in MLTA's current implementation. 99 | 100 | --- 101 | 102 | ### 2.1 Root Causes of False Negatives: A Correction 103 | 104 | We categorize the 17 confirmed FNs into two root causes: 105 | 106 | #### 2.1.1 Known LLVM Type System Issues 107 | 108 | LLVM IR may represent the same C struct inconsistently across modules. 109 | 110 | Example (`io_uring/io_uring.c:1867`): 111 | 112 | - In `io_uring/io_uring.bc`: 113 | ```llvm 114 | %struct.io_issue_def = type { i16, i32 (%struct.io_kiocb*, i32)*, i32 (%struct.io_kiocb*, %struct.io_uring_sqe*)* } 115 | - In `io_uring/opdef.bc` (The type name is missing, and the struct layout is different, too): 116 | ```llvm 117 | { i8, i8, i32 (%struct.io_kiocb*, i32)*, i32 (%struct.io_kiocb*, %struct.io_uring sqe*)*} 118 | 119 | MLTA uses name-based type comparison, which fails here. *KallGraph* refers to this as “unsound typecast handling,” but it is a well-known limitation of LLVM IR and has been discussed in prior work [2, 3, 5]. MLTA’s successors address this issue through structural matching. 120 | 121 | --- 122 | 123 | ### 2.1.2 Implementation Artifacts 124 | 125 | A specific issue involves the use of `Ctx->GlobalFuncMap` inside `typeConfineInFunction()`. This map was not fully populated at the time of analysis, which prevented MLTA from triggering its fallback behavior for indirect calls. 126 | 127 | Reordering the LLVM pass to ensure the map is populated beforehand resolves the issue. 128 | 129 | These are **implementation limitations**, not **design flaws**. 130 | 131 | --- 132 | 133 | ## 3. Other Major Comments 134 | 135 | ### 3.1 On MLTA’s Relationship to KallGraph 136 | 137 | *KallGraph* repeatedly claims MLTA is an “ad hoc” version of itself. We respectfully disagree. 138 | 139 | MLTA is a principled type-based analysis that complements point-to analysis. While *KallGraph* combines both, this does not imply that either individual technique is ad hoc. 140 | 141 | MLTA’s primary contribution lies in **structural type reasoning**, not in its **minimized data-flow** analysis. 142 | 143 | --- 144 | 145 | ### 3.2 On KallGraph’s Fixed-Point Optimization 146 | 147 | *KallGraph* introduces a reachability-based fixed-point optimization to reduce iterations. However, this optimization assumes one of two things: 148 | 149 | 1. A sound call graph is available **before** the analysis begins—which *KallGraph* doesn't have initially, or 150 | 2. The call graph must be computed **iteratively**—which negates the benefit of the proposed optimization. 151 | 152 | This circular dependency undermines the theoretical soundness and practical effectiveness of the optimization. 153 | 154 | **Update**: Authors responded to this by claiming that KallGraph uses flow-insensitive and context-insensitive analysis, so doesn't suffer from this issue. 155 | Apparently, this answer doesn't address the concern. 156 | 157 | --- 158 | 159 | ## References 160 | 161 | [1] Guoren Li, Manu Sridharan, and Zhiyun Qian. *Redefining Indirect Call Analysis with KallGraph*. IEEE S&P 2025, pp. 2734–2752. 162 | [2] Dinghao Liu, Shouling Ji, Kangjie Lu, and Qinming He. *Improving Indirect-Call Analysis in LLVM with Type and Data-Flow Co-Analysis*. USENIX Security 2024. 163 | [3] Kangjie Lu. *Practical Program Modularization with Type-Based Dependence Analysis*. IEEE S&P 2023. 164 | [4] Kangjie Lu and Hong Hu. *Where Does It Go? Refining Indirect-Call Targets with Multi-Layer Type Analysis*. ACM CCS 2019. 165 | [5] Tianrou Xia, Hong Hu, and Dinghao Wu. *DeepType: Refining Indirect Call Targets with Strong Multi-Layer Type Analysis*. USENIX Security 2024. 166 | -------------------------------------------------------------------------------- /src/lib/Common.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "Common.h" 8 | #include "Config.h" 9 | 10 | 11 | // Map from struct elements to its name 12 | static map>elementsStructNameMap; 13 | 14 | bool trimPathSlash(string &path, int slash) { 15 | while (slash > 0) { 16 | path = path.substr(path.find('/') + 1); 17 | --slash; 18 | } 19 | 20 | return true; 21 | } 22 | 23 | string getFileName(DILocation *Loc, DISubprogram *SP) { 24 | string FN; 25 | if (Loc) 26 | FN = Loc->getFilename().str(); 27 | else if (SP) 28 | FN = SP->getFilename().str(); 29 | else 30 | return ""; 31 | 32 | int slashToTrim = 2; 33 | char *user = getlogin(); 34 | if (strstr(user, "kjlu")) { 35 | slashToTrim = 0; 36 | trimPathSlash(FN, slashToTrim); 37 | FN = string(SOURCE_CODE_PATH) + "/" + FN; 38 | } 39 | else { 40 | OP << "== Warning: please specify the path of linux source."; 41 | } 42 | return FN; 43 | } 44 | 45 | /// Check if the value is a constant. 46 | bool isConstant(Value *V) { 47 | // Invalid input. 48 | if (!V) 49 | return false; 50 | 51 | // The value is a constant. 52 | Constant *Ct = dyn_cast(V); 53 | if (Ct) 54 | return true; 55 | 56 | return false; 57 | } 58 | 59 | /// Get the source code line 60 | string getSourceLine(string fn_str, unsigned lineno) { 61 | std::ifstream sourcefile(fn_str); 62 | string line; 63 | sourcefile.seekg(ios::beg); 64 | 65 | for(int n = 0; n < lineno - 1; ++n){ 66 | sourcefile.ignore(std::numeric_limits::max(), '\n'); 67 | } 68 | getline(sourcefile, line); 69 | 70 | return line; 71 | } 72 | 73 | string getSourceFuncName(Instruction *I) { 74 | 75 | DILocation *Loc = getSourceLocation(I); 76 | if (!Loc) 77 | return ""; 78 | unsigned lineno = Loc->getLine(); 79 | std::string fn_str = getFileName(Loc); 80 | string line = getSourceLine(fn_str, lineno); 81 | 82 | while(line[0] == ' ' || line[0] == '\t') 83 | line.erase(line.begin()); 84 | line = line.substr(0, line.find('(')); 85 | return line; 86 | } 87 | 88 | string extractMacro(string line, Instruction *I) { 89 | string macro, word, FnName; 90 | std::regex caps("[^\\(][_A-Z][_A-Z0-9]+[\\);,]+"); 91 | smatch match; 92 | 93 | // detect function macros 94 | if (CallInst *CI = dyn_cast(I)) { 95 | FnName = getCalledFuncName(CI).str(); 96 | caps = "[_A-Z][_A-Z0-9]{2,}"; 97 | std::regex keywords("(\\s*)(for|if|while)(\\s*)(\\()"); 98 | 99 | if (regex_search(line, match, keywords)) 100 | line = line.substr(match[0].length()); 101 | 102 | if (line.find(FnName) != std::string::npos) { 103 | if (regex_search(FnName, match, caps)) 104 | return FnName; 105 | 106 | } else { 107 | //identify non matching functions as macros 108 | //std::count(line.begin(), line.end(), '"') > 0 109 | std::size_t eq_pos = line.find_last_of("="); 110 | if (eq_pos == std::string::npos) 111 | eq_pos = 0; 112 | else 113 | ++eq_pos; 114 | 115 | std::size_t paren = line.find('(', eq_pos); 116 | return line.substr(eq_pos, paren-eq_pos); 117 | } 118 | 119 | } else { 120 | // detect macro constant variables 121 | std::size_t lhs = -1; 122 | stringstream iss(line.substr(lhs+1)); 123 | 124 | while (iss >> word) { 125 | if (regex_search(word, match, caps)) { 126 | macro = word; 127 | return macro; 128 | } 129 | } 130 | } 131 | 132 | return ""; 133 | } 134 | 135 | /// Get called function name of V. 136 | StringRef getCalledFuncName(CallInst *CI) { 137 | 138 | Value *V; 139 | V = CI->getCalledOperand(); 140 | assert(V); 141 | 142 | InlineAsm *IA = dyn_cast(V); 143 | if (IA) 144 | return StringRef(IA->getAsmString()); 145 | 146 | User *UV = dyn_cast(V); 147 | if (UV) { 148 | if (UV->getNumOperands() > 0) { 149 | Value *VUV = UV->getOperand(0); 150 | return VUV->getName(); 151 | } 152 | } 153 | 154 | return V->getName(); 155 | } 156 | 157 | DILocation *getSourceLocation(Instruction *I) { 158 | if (!I) 159 | return NULL; 160 | 161 | MDNode *N = I->getMetadata("dbg"); 162 | if (!N) 163 | return NULL; 164 | 165 | DILocation *Loc = dyn_cast(N); 166 | if (!Loc || Loc->getLine() < 1) 167 | return NULL; 168 | 169 | return Loc; 170 | } 171 | 172 | /// Print out source code information to facilitate manual analyses. 173 | void printSourceCodeInfo(Value *V, string Tag) { 174 | Instruction *I = dyn_cast(V); 175 | if (!I) 176 | return; 177 | 178 | DILocation *Loc = getSourceLocation(I); 179 | if (!Loc) 180 | return; 181 | 182 | unsigned LineNo = Loc->getLine(); 183 | std::string FN = getFileName(Loc); 184 | string line = getSourceLine(FN, LineNo); 185 | FN = Loc->getFilename().str(); 186 | //FN = FN.substr(FN.find('/') + 1); 187 | //FN = FN.substr(FN.find('/') + 1); 188 | 189 | while(line[0] == ' ' || line[0] == '\t') 190 | line.erase(line.begin()); 191 | OP << " [" 192 | << "\033[34m" << Tag << "\033[0m" << "] " 193 | << FN 194 | << " +" << LineNo 195 | #ifdef PRINT_SOURCE_LINE 196 | << " " 197 | << "\033[35m" << line << "\033[0m" <<'\n'; 198 | OP<<*I 199 | #endif 200 | <<"\n"; 201 | } 202 | 203 | 204 | void printSourceCodeInfo(Function *F, string Tag) { 205 | 206 | DISubprogram *SP = F->getSubprogram(); 207 | 208 | if (SP) { 209 | string FN = getFileName(NULL, SP); 210 | string line = getSourceLine(FN, SP->getLine()); 211 | while(line[0] == ' ' || line[0] == '\t') 212 | line.erase(line.begin()); 213 | 214 | FN = SP->getFilename().str(); 215 | //FN = FN.substr(FN.find('/') + 1); 216 | //FN = FN.substr(FN.find('/') + 1); 217 | 218 | OP << " [" 219 | << "\033[34m" << Tag << "\033[0m" << "] " 220 | << FN 221 | << " +" << SP->getLine() 222 | #ifdef PRINT_SOURCE_LINE 223 | << " " 224 | << "\033[35m" << line << "\033[0m" 225 | #endif 226 | <<'\n'; 227 | } 228 | #ifdef PRINT_SOURCE_LINE 229 | else { 230 | OP << " [" 231 | << "\033[34m" << "??" << "\033[0m" << "] " 232 | << F->getParent()->getName()<<": "<getName()<<'\n'; 233 | } 234 | #endif 235 | } 236 | 237 | string getMacroInfo(Value *V) { 238 | 239 | Instruction *I = dyn_cast(V); 240 | if (!I) return ""; 241 | 242 | DILocation *Loc = getSourceLocation(I); 243 | if (!Loc) return ""; 244 | 245 | unsigned LineNo = Loc->getLine(); 246 | std::string FN = getFileName(Loc); 247 | string line = getSourceLine(FN, LineNo); 248 | FN = Loc->getFilename().str(); 249 | const char *filename = FN.c_str(); 250 | filename = strchr(filename, '/') + 1; 251 | filename = strchr(filename, '/') + 1; 252 | int idx = filename - FN.c_str(); 253 | 254 | while(line[0] == ' ' || line[0] == '\t') 255 | line.erase(line.begin()); 256 | 257 | string macro = extractMacro(line, I); 258 | 259 | //clean up the ending and whitespaces 260 | macro.erase(std::remove (macro.begin(), macro.end(),' '), macro.end()); 261 | unsigned length = 0; 262 | for (auto it = macro.begin(), e = macro.end(); it != e; ++it) 263 | if (*it == ')' || *it == ';' || *it == ',') { 264 | macro = macro.substr(0, length); 265 | break; 266 | } else { 267 | ++length; 268 | } 269 | 270 | return macro; 271 | } 272 | 273 | /// Get source code information of this value 274 | void getSourceCodeInfo(Value *V, string &file, 275 | unsigned &line) { 276 | file = ""; 277 | line = 0; 278 | 279 | auto I = dyn_cast(V); 280 | if (!I) 281 | return; 282 | 283 | MDNode *N = I->getMetadata("dbg"); 284 | if (!N) 285 | return; 286 | 287 | DILocation *Loc = dyn_cast(N); 288 | if (!Loc || Loc->getLine() < 1) 289 | return; 290 | 291 | file = Loc->getFilename().str(); 292 | line = Loc->getLine(); 293 | } 294 | 295 | int8_t getArgNoInCall(CallInst *CI, Value *Arg) { 296 | 297 | int8_t Idx = 0; 298 | for (auto AI = CI->arg_begin(), E = CI->arg_end(); 299 | AI != E; ++AI) { 300 | if (*AI == Arg) { 301 | return Idx; 302 | } 303 | ++Idx; 304 | } 305 | return -1; 306 | } 307 | 308 | Argument *getParamByArgNo(Function *F, int8_t ArgNo) { 309 | 310 | if (ArgNo >= F->arg_size()) 311 | return NULL; 312 | 313 | int8_t idx = 0; 314 | Function::arg_iterator ai = F->arg_begin(); 315 | while (idx != ArgNo) { 316 | ++ai; 317 | ++idx; 318 | } 319 | return ai; 320 | } 321 | 322 | void LoadElementsStructNameMap( 323 | vector> &Modules) { 324 | 325 | for (auto M : Modules) { 326 | for (auto STy : M.first->getIdentifiedStructTypes()) { 327 | assert(STy->hasName()); 328 | if (STy->isOpaque()) 329 | continue; 330 | 331 | string strSTy = structTyStr(STy); 332 | elementsStructNameMap[strSTy].insert(STy->getName()); 333 | } 334 | } 335 | } 336 | 337 | void cleanString(string &str) { 338 | // process string 339 | // remove c++ class type added by compiler 340 | size_t pos = str.find("(%class."); 341 | if (pos != string::npos) { 342 | //regex pattern1("\\(\\%class\\.[_A-Za-z0-9]+\\*,?"); 343 | regex pattern("^[_A-Za-z0-9]+\\*,?"); 344 | smatch match; 345 | string str_sub = str.substr(pos + 8); 346 | if (regex_search(str_sub, match, pattern)) { 347 | str.replace(pos + 1, 7 + match[0].length(), ""); 348 | } 349 | } 350 | string::iterator end_pos = remove(str.begin(), str.end(), ' '); 351 | str.erase(end_pos, str.end()); 352 | } 353 | 354 | string funcTypeString(FunctionType *FTy) { 355 | 356 | string output; 357 | for (FunctionType::param_iterator pi = FTy->param_begin(); 358 | pi != FTy->param_end(); ++ pi) { 359 | Type *PTy = *pi; 360 | string sig; 361 | raw_string_ostream rso(sig); 362 | PTy->print(rso); 363 | output += rso.str(); 364 | //output += to_string(PTy->getTypeID()); 365 | //output += ","; 366 | } 367 | return output; 368 | } 369 | 370 | size_t funcHash(Function *F, bool withName) { 371 | 372 | hash str_hash; 373 | string output; 374 | 375 | #ifdef HASH_SOURCE_INFO 376 | DISubprogram *SP = F->getSubprogram(); 377 | 378 | if (SP) { 379 | output = SP->getFilename(); 380 | output = output + to_string(uint_hash(SP->getLine())); 381 | } 382 | else { 383 | #endif 384 | string sig; 385 | raw_string_ostream rso(sig); 386 | FunctionType *FTy = F->getFunctionType(); 387 | FTy->print(rso); 388 | output = rso.str(); 389 | //output = funcTypeString(FTy); 390 | 391 | if (withName) 392 | output += F->getName(); 393 | #ifdef HASH_SOURCE_INFO 394 | } 395 | #endif 396 | // process string 397 | cleanString(output); 398 | 399 | return str_hash(output); 400 | } 401 | 402 | size_t callHash(CallInst *CI) { 403 | 404 | CallBase *CB = dyn_cast(CI); 405 | //Value *CO = CI->getCalledOperand(); 406 | //if (CO) { 407 | // Function *CF = dyn_cast(CO); 408 | // if (CF) 409 | // return funcHash(CF); 410 | //} 411 | hash str_hash; 412 | string sig; 413 | raw_string_ostream rso(sig); 414 | FunctionType *FTy = CB->getFunctionType(); 415 | FTy->print(rso); 416 | string strip_str = rso.str(); 417 | //string strip_str = funcTypeString(FTy); 418 | cleanString(strip_str); 419 | 420 | return str_hash(strip_str); 421 | } 422 | 423 | string structTyStr(StructType *STy) { 424 | string ty_str; 425 | string sig; 426 | for (auto Ty : STy->elements()) { 427 | ty_str += to_string(Ty->getTypeID()); 428 | } 429 | return ty_str; 430 | } 431 | 432 | void structTypeHash(StructType *STy, set &HSet) { 433 | hash str_hash; 434 | string sig; 435 | string ty_str; 436 | 437 | // TODO: Use more but reliable information 438 | // FIXME: A few cases may not even have a name 439 | if (STy->hasName()) { 440 | ty_str = STy->getName().str(); 441 | HSet.insert(str_hash(ty_str)); 442 | } 443 | else { 444 | string sstr = structTyStr(STy); 445 | if (elementsStructNameMap.find(sstr) 446 | != elementsStructNameMap.end()) { 447 | for (auto SStr : elementsStructNameMap[sstr]) { 448 | ty_str = SStr.str(); 449 | HSet.insert(str_hash(ty_str)); 450 | } 451 | } 452 | } 453 | } 454 | 455 | size_t typeHash(Type *Ty) { 456 | hash str_hash; 457 | string sig; 458 | string ty_str; 459 | 460 | if (StructType *STy = dyn_cast(Ty)) { 461 | // TODO: Use more but reliable information 462 | // FIXME: A few cases may not even have a name 463 | if (STy->hasName()) { 464 | ty_str = STy->getName().str(); 465 | } 466 | else { 467 | string sstr = structTyStr(STy); 468 | if (elementsStructNameMap.find(sstr) 469 | != elementsStructNameMap.end()) { 470 | ty_str = elementsStructNameMap[sstr].begin()->str(); 471 | } 472 | } 473 | } 474 | #ifdef SOUND_MODE 475 | else if (ArrayType *ATy = dyn_cast(Ty)) { 476 | 477 | // Compiler sometimes fails recoginize size of array (compiler 478 | // bug?), so let's just use the element type 479 | 480 | Ty = ATy->getElementType(); 481 | raw_string_ostream rso(sig); 482 | Ty->print(rso); 483 | ty_str = rso.str() + "[array]"; 484 | string::iterator end_pos = remove(ty_str.begin(), ty_str.end(), ' '); 485 | ty_str.erase(end_pos, ty_str.end()); 486 | } 487 | #endif 488 | else { 489 | raw_string_ostream rso(sig); 490 | Ty->print(rso); 491 | ty_str = rso.str(); 492 | string::iterator end_pos = remove(ty_str.begin(), ty_str.end(), ' '); 493 | ty_str.erase(end_pos, ty_str.end()); 494 | } 495 | return str_hash(ty_str); 496 | } 497 | 498 | size_t hashIdxHash(size_t Hs, int Idx) { 499 | hash str_hash; 500 | return Hs + str_hash(to_string(Idx)); 501 | } 502 | 503 | size_t typeIdxHash(Type *Ty, int Idx) { 504 | return hashIdxHash(typeHash(Ty), Idx); 505 | } 506 | 507 | size_t strIntHash(string str, int i) { 508 | hash str_hash; 509 | // FIXME: remove pos 510 | size_t pos = str.rfind("/"); 511 | return str_hash(str.substr(0, pos) + to_string(i)); 512 | } 513 | 514 | int64_t getGEPOffset(const Value *V, const DataLayout *DL) { 515 | 516 | const GEPOperator *GEP = dyn_cast(V); 517 | 518 | int64_t offset = 0; 519 | const Value *baseValue = GEP->getPointerOperand()->stripPointerCasts(); 520 | if (const ConstantExpr *cexp = dyn_cast(baseValue)) 521 | if (cexp->getOpcode() == Instruction::GetElementPtr) 522 | { 523 | // FIXME: this looks incorrect 524 | offset += getGEPOffset(cexp, DL); 525 | } 526 | Type *ptrTy = GEP->getSourceElementType(); 527 | 528 | SmallVector indexOps(GEP->op_begin() + 1, GEP->op_end()); 529 | // Make sure all indices are constants 530 | for (unsigned i = 0, e = indexOps.size(); i != e; ++i) 531 | { 532 | if (!isa(indexOps[i])) 533 | indexOps[i] = ConstantInt::get(Type::getInt32Ty(ptrTy->getContext()), 0); 534 | } 535 | offset += DL->getIndexedOffsetInType(ptrTy, indexOps); 536 | return offset; 537 | } 538 | 539 | -------------------------------------------------------------------------------- /src/lib/MLTA.cc: -------------------------------------------------------------------------------- 1 | //===-- CallGraph.cc - Build global call-graph------------------===// 2 | // 3 | // This pass builds a global call-graph. The targets of an indirect 4 | // call are identified based on various type-based analyses. 5 | // 6 | //===-----------------------------------------------------------===// 7 | 8 | #include "llvm/Pass.h" 9 | #include "llvm/IR/Module.h" 10 | #include "llvm/IR/Function.h" 11 | #include "llvm/IR/BasicBlock.h" 12 | #include "llvm/IR/Instruction.h" 13 | #include "llvm/IR/Instructions.h" 14 | #include "llvm/Support/Debug.h" 15 | #include "llvm/IR/DebugInfo.h" 16 | #include "llvm/IR/InstIterator.h" 17 | #include "llvm/IR/Constants.h" 18 | #include "llvm/ADT/StringExtras.h" 19 | #include "llvm/Analysis/CallGraph.h" 20 | #include "llvm/Support/raw_ostream.h" 21 | #include "llvm/IR/InstrTypes.h" 22 | #include "llvm/Analysis/LoopInfo.h" 23 | #include "llvm/Analysis/LoopPass.h" 24 | #include "llvm/IR/LegacyPassManager.h" 25 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 | #include "llvm/IR/IRBuilder.h" 27 | #include "llvm/IR/CFG.h" 28 | 29 | #include "Common.h" 30 | #include "MLTA.h" 31 | 32 | #include 33 | #include 34 | 35 | 36 | using namespace llvm; 37 | 38 | 39 | // 40 | // Implementation 41 | // 42 | pair typeidx_c(Type *Ty, int Idx) { 43 | return make_pair(Ty, Idx); 44 | } 45 | pair hashidx_c(size_t Hash, int Idx) { 46 | return make_pair(Hash, Idx); 47 | } 48 | 49 | bool MLTA::fuzzyTypeMatch(Type *Ty1, Type *Ty2, 50 | Module *M1, Module *M2) { 51 | 52 | if (Ty1 == Ty2) 53 | return true; 54 | 55 | while (Ty1->isPointerTy() && Ty2->isPointerTy()) { 56 | Ty1 = Ty1->getPointerElementType(); 57 | Ty2 = Ty2->getPointerElementType(); 58 | } 59 | 60 | if (Ty1->isStructTy() && Ty2->isStructTy() && 61 | (Ty1->getStructName().equals(Ty2->getStructName()))) 62 | return true; 63 | if (Ty1->isIntegerTy() && Ty2->isIntegerTy() && 64 | Ty1->getIntegerBitWidth() == Ty2->getIntegerBitWidth()) 65 | return true; 66 | // TODO: more types to be supported. 67 | 68 | // Make the type analysis conservative: assume general 69 | // pointers, i.e., "void *" and "char *", are equivalent to 70 | // any pointer type and integer type. 71 | if ( 72 | (Ty1 == Int8PtrTy[M1] && 73 | (Ty2->isPointerTy() || Ty2 == IntPtrTy[M2])) 74 | || 75 | (Ty2 == Int8PtrTy[M1] && 76 | (Ty1->isPointerTy() || Ty1 == IntPtrTy[M2])) 77 | ) 78 | return true; 79 | 80 | return false; 81 | } 82 | 83 | 84 | // Find targets of indirect calls based on function-type analysis: as 85 | // long as the number and type of parameters of a function matches 86 | // with the ones of the callsite, we say the function is a possible 87 | // target of this call. 88 | void MLTA::findCalleesWithType(CallInst *CI, FuncSet &S) { 89 | 90 | if (CI->isInlineAsm()) 91 | return; 92 | 93 | // 94 | // Performance improvement: cache results for types 95 | // 96 | size_t CIH = callHash(CI); 97 | if (MatchedFuncsMap.find(CIH) != MatchedFuncsMap.end()) { 98 | if (!MatchedFuncsMap[CIH].empty()) 99 | S.insert(MatchedFuncsMap[CIH].begin(), 100 | MatchedFuncsMap[CIH].end()); 101 | return; 102 | } 103 | 104 | CallBase *CB = dyn_cast(CI); 105 | for (Function *F : Ctx->AddressTakenFuncs) { 106 | // VarArg 107 | if (F->getFunctionType()->isVarArg()) { 108 | // Compare only known args in VarArg. 109 | } 110 | // otherwise, the numbers of args should be equal. 111 | else if (F->arg_size() != CB->arg_size()) { 112 | continue; 113 | } 114 | 115 | if (F->isIntrinsic()) { 116 | continue; 117 | } 118 | 119 | // Types completely match 120 | if (callHash(CI) == funcHash(F)) { 121 | S.insert(F); 122 | continue; 123 | } 124 | 125 | Module *CalleeM = F->getParent(); 126 | Module *CallerM = CI->getFunction()->getParent(); 127 | 128 | // Type matching on args. 129 | bool Matched = true; 130 | User::op_iterator AI = CB->arg_begin(); 131 | for (Function::arg_iterator FI = F->arg_begin(), 132 | FE = F->arg_end(); 133 | FI != FE; ++FI, ++AI) { 134 | // Check type mis-matches. 135 | // Get defined type on callee side. 136 | Type *DefinedTy = FI->getType(); 137 | // Get actual type on caller side. 138 | Type *ActualTy = (*AI)->getType(); 139 | 140 | if (!fuzzyTypeMatch(DefinedTy, ActualTy, CalleeM, CallerM)) { 141 | Matched = false; 142 | break; 143 | } 144 | } 145 | 146 | // If args are matched, further check return types 147 | if (Matched) { 148 | Type *RTy1 = F->getReturnType(); 149 | Type *RTy2 = CI->getType(); 150 | if (!fuzzyTypeMatch(RTy1, RTy2, CalleeM, CallerM)) { 151 | Matched = false; 152 | } 153 | } 154 | 155 | if (Matched) { 156 | S.insert(F); 157 | } 158 | } 159 | MatchedFuncsMap[CIH] = S; 160 | } 161 | 162 | 163 | void MLTA::unrollLoops(Function *F) { 164 | 165 | if (F->isDeclaration()) 166 | return; 167 | 168 | DominatorTree DT = DominatorTree(); 169 | DT.recalculate(*F); 170 | LoopInfo *LI = new LoopInfo(); 171 | LI->releaseMemory(); 172 | LI->analyze(DT); 173 | 174 | // Collect all loops in the function 175 | set LPSet; 176 | for (LoopInfo::iterator i = LI->begin(), e = LI->end(); i!=e; ++i) { 177 | 178 | Loop *LP = *i; 179 | LPSet.insert(LP); 180 | 181 | list LPL; 182 | 183 | LPL.push_back(LP); 184 | while (!LPL.empty()) { 185 | LP = LPL.front(); 186 | LPL.pop_front(); 187 | vector SubLPs = LP->getSubLoops(); 188 | for (auto SubLP : SubLPs) { 189 | LPSet.insert(SubLP); 190 | LPL.push_back(SubLP); 191 | } 192 | } 193 | } 194 | 195 | for (Loop *LP : LPSet) { 196 | 197 | // Get the header,latch block, exiting block of every loop 198 | BasicBlock *HeaderB = LP->getHeader(); 199 | 200 | unsigned NumBE = LP->getNumBackEdges(); 201 | SmallVector LatchBS; 202 | 203 | LP->getLoopLatches(LatchBS); 204 | 205 | for (BasicBlock *LatchB : LatchBS) { 206 | if (!HeaderB || !LatchB) { 207 | OP<<"ERROR: Cannot find Header Block or Latch Block\n"; 208 | continue; 209 | } 210 | // Two cases: 211 | // 1. Latch Block has only one successor: 212 | // for loop or while loop; 213 | // In this case: set the Successor of Latch Block to the 214 | // successor block (out of loop one) of Header block 215 | // 2. Latch Block has two successor: 216 | // do-while loop: 217 | // In this case: set the Successor of Latch Block to the 218 | // another successor block of Latch block 219 | 220 | // get the last instruction in the Latch block 221 | Instruction *TI = LatchB->getTerminator(); 222 | // Case 1: 223 | if (LatchB->getSingleSuccessor() != NULL) { 224 | for (succ_iterator sit = succ_begin(HeaderB); 225 | sit != succ_end(HeaderB); ++sit) { 226 | 227 | BasicBlock *SuccB = *sit; 228 | BasicBlockEdge BBE = BasicBlockEdge(HeaderB, SuccB); 229 | // Header block has two successor, 230 | // one edge dominate Latch block; 231 | // another does not. 232 | if (DT.dominates(BBE, LatchB)) 233 | continue; 234 | else { 235 | TI->setSuccessor(0, SuccB); 236 | } 237 | } 238 | } 239 | // Case 2: 240 | else { 241 | for (succ_iterator sit = succ_begin(LatchB); 242 | sit != succ_end(LatchB); ++sit) { 243 | 244 | BasicBlock *SuccB = *sit; 245 | // There will be two successor blocks, one is header 246 | // we need successor to be another 247 | if (SuccB == HeaderB) 248 | continue; 249 | else{ 250 | TI->setSuccessor(0, SuccB); 251 | } 252 | } 253 | } 254 | } 255 | } 256 | } 257 | 258 | bool MLTA::isCompositeType(Type *Ty) { 259 | if (Ty->isStructTy() 260 | || Ty->isArrayTy() 261 | || Ty->isVectorTy()) 262 | return true; 263 | else 264 | return false; 265 | } 266 | 267 | Type *MLTA::getFuncPtrType(Value *V) { 268 | Type *Ty = V->getType(); 269 | if (PointerType *PTy = dyn_cast(Ty)) { 270 | Type *ETy = PTy->getPointerElementType(); 271 | if (ETy->isFunctionTy()) 272 | return ETy; 273 | } 274 | 275 | return NULL; 276 | } 277 | 278 | Value *MLTA::recoverBaseType(Value *V) { 279 | if (Instruction *I = dyn_cast(V)) { 280 | map &AliasMap 281 | = AliasStructPtrMap[I->getFunction()]; 282 | if (AliasMap.find(V) != AliasMap.end()) { 283 | return AliasMap[V]; 284 | } 285 | } 286 | return NULL; 287 | } 288 | 289 | // This function analyzes globals to collect information about which 290 | // types functions have been assigned to. 291 | // The analysis is field sensitive. 292 | bool MLTA::typeConfineInInitializer(GlobalVariable *GV) { 293 | 294 | Constant *Ini = GV->getInitializer(); 295 | if (!isa(Ini)) 296 | return false; 297 | 298 | list>NestedInit; 299 | map>ContainersMap; 300 | setFuncOperands; 301 | listLU; 302 | setVisited; 303 | LU.push_back(Ini); 304 | 305 | while (!LU.empty()) { 306 | User *U = LU.front(); 307 | LU.pop_front(); 308 | if (Visited.find(U) != Visited.end()) { 309 | continue; 310 | } 311 | Visited.insert(U); 312 | 313 | Type *UTy = U->getType(); 314 | assert(!UTy->isFunctionTy()); 315 | 316 | if (StructType *STy = dyn_cast(U->getType())) { 317 | if (U->getNumOperands() > 0) 318 | assert(STy->getNumElements() == U->getNumOperands()); 319 | else 320 | continue; 321 | } 322 | 323 | for (auto oi = U->op_begin(), oe = U->op_end(); 324 | oi != oe; ++oi) { 325 | 326 | Value *O = *oi; 327 | Type *OTy = O->getType(); 328 | 329 | ContainersMap[O] = make_pair(U, oi->getOperandNo()); 330 | 331 | Function *FoundF = NULL; 332 | // Case 1: function address is assigned to a type 333 | if (Function *F = dyn_cast(O)) { 334 | FoundF = F; 335 | } 336 | // Case 2: a composite-type object (value) is assigned to a 337 | // field of another composite-type object 338 | else if (isCompositeType(OTy)) { 339 | // confine composite types 340 | Type *ITy = U->getType(); 341 | int ONo = oi->getOperandNo(); 342 | 343 | // recognize nested composite types 344 | User *OU = dyn_cast(O); 345 | LU.push_back(OU); 346 | } 347 | else if (PtrToIntOperator *PIO = dyn_cast(O)) { 348 | 349 | Function *F = dyn_cast(PIO->getOperand(0)); 350 | if (F) 351 | FoundF = F; 352 | else { 353 | User *OU = dyn_cast(PIO->getOperand(0)); 354 | LU.push_back(OU); 355 | } 356 | } 357 | // now consider if it is a bitcast from a function 358 | // address 359 | else if (BitCastOperator *CO = dyn_cast(O)) { 360 | // Virtual functions will always be cast by 361 | // inserting the first parameter 362 | Function *CF = dyn_cast(CO->getOperand(0)); 363 | if (CF) { 364 | Type *ITy = U->getType(); 365 | // FIXME: Assume this is VTable 366 | if (!ITy->isStructTy()) { 367 | VTableFuncsMap[GV].insert(CF); 368 | } 369 | 370 | FoundF = CF; 371 | } 372 | else { 373 | User *OU = dyn_cast(CO->getOperand(0)); 374 | LU.push_back(OU); 375 | } 376 | } 377 | // Case 3: a reference (i.e., pointer) of a composite-type 378 | // object is assigned to a field of another composite-type 379 | // object 380 | else if (PointerType *POTy = dyn_cast(OTy)) { 381 | if (isa(O)) 382 | continue; 383 | // if the pointer points a composite type, conservatively 384 | // treat it as a type cap (we cannot get the next-layer type 385 | // if the type is a cap) 386 | User *OU = dyn_cast(O); 387 | LU.push_back(OU); 388 | if (GlobalVariable *GO = dyn_cast(OU)) { 389 | Type *Ty = POTy->getPointerElementType(); 390 | // FIXME: take it as a confinement instead of a cap 391 | if (Ty->isStructTy()) 392 | typeCapSet.insert(typeHash(Ty)); 393 | } 394 | } 395 | else { 396 | // TODO: Type escaping? 397 | } 398 | 399 | // Found a function 400 | if (FoundF && !FoundF->isIntrinsic()) { 401 | 402 | // "llvm.compiler.used" indicates that the linker may touch 403 | // it, so do not apply MLTA against them 404 | if (GV->getName() != "llvm.compiler.used") 405 | StoredFuncs.insert(FoundF); 406 | 407 | // Add the function type to all containers 408 | Value *CV = O; 409 | setVisited; // to avoid loop 410 | while (ContainersMap.find(CV) != ContainersMap.end()) { 411 | auto Container = ContainersMap[CV]; 412 | 413 | Type *CTy = Container.first->getType(); 414 | set TyHS; 415 | if (StructType *STy = dyn_cast(CTy)) { 416 | structTypeHash(STy, TyHS); 417 | } 418 | else 419 | TyHS.insert(typeHash(CTy)); 420 | 421 | DBG<<"[INSERT-INIT] Container type: "<<*CTy 422 | <<"; Idx: "< FUNC: "<getName()<<"; Module: " 424 | <getParent()->getName()<<"\n"; 425 | 426 | for (auto TyH : TyHS) { 427 | #ifdef MLTA_FIELD_INSENSITIVE 428 | typeIdxFuncsMap[TyH][0].insert(FoundF); 429 | #else 430 | typeIdxFuncsMap[TyH][Container.second].insert(FoundF); 431 | #endif 432 | DBG<<"[HASH] "<(I)) { 460 | Value *PO = SI->getPointerOperand(); 461 | Value *VO = SI->getValueOperand(); 462 | 463 | Function *CF = getBaseFunction(VO->stripPointerCasts()); 464 | if (!CF) 465 | continue; 466 | if (F->isIntrinsic()) 467 | continue; 468 | 469 | confineTargetFunction(PO, CF); 470 | } 471 | else if (CallInst *CI = dyn_cast(I)) { 472 | for (User::op_iterator OI = I->op_begin(), 473 | OE = I->op_end(); 474 | OI != OE; ++OI) { 475 | if (Function *F = dyn_cast(*OI)) { 476 | if (F->isIntrinsic()) 477 | continue; 478 | if (CI->isIndirectCall()) { 479 | confineTargetFunction(*OI, F); 480 | continue; 481 | } 482 | Value *CV = CI->getCalledOperand(); 483 | Function *CF = dyn_cast(CV); 484 | if (!CF) 485 | continue; 486 | if (CF->isDeclaration()) 487 | CF = Ctx->GlobalFuncMap[CF->getGUID()]; 488 | if (!CF) 489 | continue; 490 | if (Argument *Arg = getParamByArgNo(CF, OI->getOperandNo())) { 491 | for (auto U : Arg->users()) { 492 | confineTargetFunction(U, F); 493 | } 494 | } 495 | // TODO: track into the callee to avoid marking the 496 | // function type as a cap 497 | } 498 | } 499 | } 500 | else if (ReturnInst *RI = dyn_cast(I)) { 501 | Value* RV = RI->getReturnValue(); 502 | if (!RV) 503 | continue; 504 | Function *CF = dyn_cast(RV); 505 | if (!CF) 506 | continue; 507 | if (F->isIntrinsic()) 508 | continue; 509 | confineTargetFunction(RI, CF); 510 | } 511 | } 512 | 513 | return true; 514 | } 515 | 516 | bool MLTA::typePropInFunction(Function *F) { 517 | 518 | // Two cases for propagation: store and cast. 519 | // For store, LLVM may use memcpy 520 | setCastSet; 521 | for (inst_iterator i = inst_begin(F), e = inst_end(F); 522 | i != e; ++i) { 523 | 524 | Instruction *I = &*i; 525 | 526 | Value *PO = NULL, *VO = NULL; 527 | if (StoreInst *SI = dyn_cast(I)) { 528 | PO = SI->getPointerOperand(); 529 | VO = SI->getValueOperand(); 530 | } 531 | else if (CallInst *CI = dyn_cast(I)) { 532 | Value *CV = CI->getCalledOperand(); 533 | Function *CF = dyn_cast(CV); 534 | if (CF) { 535 | // LLVM may optimize struct assignment into a call to 536 | // intrinsic memcpy 537 | if (CF->getName() == "llvm.memcpy.p0i8.p0i8.i64") { 538 | PO = CI->getOperand(0); 539 | VO = CI->getOperand(1); 540 | } 541 | } 542 | } 543 | 544 | if (PO && VO) { 545 | // 546 | // TODO: if VO is a global with an initializer, this should be 547 | // taken as a confinement instead of propagation, which can 548 | // improve the precision 549 | // 550 | if (isa(VO) || isa(VO)) 551 | continue; 552 | 553 | listTyList; 554 | Value *NextV = NULL; 555 | set Visited; 556 | nextLayerBaseType(VO, TyList, NextV, Visited); 557 | if (!TyList.empty()) { 558 | for (auto TyIdx : TyList) { 559 | propagateType(PO, TyIdx.first, TyIdx.second); 560 | } 561 | continue; 562 | } 563 | 564 | Visited.clear(); 565 | Type *BTy = getBaseType(VO, Visited); 566 | // Composite type 567 | if (BTy) { 568 | propagateType(PO, BTy); 569 | continue; 570 | } 571 | 572 | Type *FTy = getFuncPtrType(VO->stripPointerCasts()); 573 | // Function-pointer type 574 | if (FTy) { 575 | if (!getBaseFunction(VO)) { 576 | propagateType(PO, FTy); 577 | continue; 578 | } 579 | else 580 | continue; 581 | } 582 | 583 | if (!VO->getType()->isPointerTy()) 584 | continue; 585 | else { 586 | // General-pointer type for escaping 587 | escapeType(PO); 588 | } 589 | 590 | } 591 | 592 | 593 | // Handle casts 594 | if (CastInst *CastI = dyn_cast(I)) { 595 | // Record the cast, handle later 596 | CastSet.insert(CastI); 597 | } 598 | 599 | // Operands of instructions can be BitCastOperator 600 | for (User::op_iterator OI = I->op_begin(), 601 | OE = I->op_end(); 602 | OI != OE; ++OI) { 603 | if (BitCastOperator *CO = dyn_cast(*OI)) { 604 | CastSet.insert(CO); 605 | } 606 | } 607 | } 608 | 609 | for (auto Cast : CastSet) { 610 | 611 | // TODO: we may not need to handle casts as casts are already 612 | // stripped out in confinement and propagation analysis. Also for 613 | // a function pointer to propagate, it is supposed to be stored 614 | // in memory. 615 | 616 | // The conservative escaping policy can be optimized 617 | Type *FromTy = Cast->getOperand(0)->getType(); 618 | Type *ToTy = Cast->getType(); 619 | if (FromTy->isPointerTy() && ToTy->isPointerTy()) { 620 | Type *EFromTy = FromTy->getPointerElementType(); 621 | Type *EToTy = ToTy->getPointerElementType(); 622 | if (EFromTy->isStructTy() && EToTy->isStructTy()) { 623 | //propagateType(Cast, EFromTy, -1); 624 | } 625 | } 626 | } 627 | 628 | return true; 629 | } 630 | 631 | // This function precisely collect alias types for general pointers 632 | void MLTA::collectAliasStructPtr(Function *F) { 633 | 634 | map &AliasMap = AliasStructPtrMap[F]; 635 | setToErase; 636 | for (inst_iterator i = inst_begin(F), e = inst_end(F); 637 | i != e; ++i) { 638 | 639 | Instruction *I = &*i; 640 | 641 | if (CastInst *CI = dyn_cast(I)) { 642 | Value *FromV = CI->getOperand(0); 643 | // TODO: we only consider calls for now 644 | if (!isa(FromV)) 645 | continue; 646 | 647 | Type *FromTy = FromV->getType(); 648 | Type *ToTy = CI->getType(); 649 | if (Int8PtrTy[F->getParent()] != FromTy) 650 | continue; 651 | 652 | if (!ToTy->isPointerTy()) 653 | continue; 654 | 655 | if (!isCompositeType(ToTy->getPointerElementType())) 656 | continue; 657 | 658 | if (AliasMap.find(FromV) != AliasMap.end()) { 659 | ToErase.insert(FromV); 660 | continue; 661 | } 662 | AliasMap[FromV] = CI; 663 | } 664 | } 665 | for (auto Erase : ToErase) 666 | AliasMap.erase(Erase); 667 | } 668 | 669 | 670 | void MLTA::escapeType(Value *V) { 671 | 672 | list TyChain; 673 | bool Complete = true; 674 | getBaseTypeChain(TyChain, V, Complete); 675 | for (auto T : TyChain) { 676 | DBG<<"[Escape] Type: "<<*(T.first)<<"; Idx: "<isIntrinsic()) 683 | return; 684 | 685 | StoredFuncs.insert(F); 686 | 687 | list TyChain; 688 | bool Complete = true; 689 | getBaseTypeChain(TyChain, V, Complete); 690 | for (auto TI : TyChain) { 691 | DBG<<"[INSERT-FUNC] Container type: "<<*(TI.first)<<"; Idex: "< FUNC: "<getName()<<"; Module: " 693 | <getParent()->getName()<<"\n"; 694 | DBG<<"[HASH] "< TyChain; 708 | bool Complete = true; 709 | getBaseTypeChain(TyChain, ToV, Complete); 710 | for (auto T : TyChain) { 711 | 712 | if (typeHash(T.first) == typeHash(FromTy) && T.second == Idx) 713 | continue; 714 | 715 | typeIdxPropMap[typeHash(T.first)] 716 | [T.second].insert(hashidx_c(typeHash(FromTy), Idx)); 717 | DBG<<"[PROP] "<<*(FromTy)<<": "< "<<*(T.first)<<" "<(V)) { 734 | return getVTable(BCO->getOperand(0)); 735 | } 736 | else if (GEPOperator *GEP = dyn_cast(V)) { 737 | return getVTable(GEP->getPointerOperand()); 738 | } 739 | else if (VTableFuncsMap.find(V) != VTableFuncsMap.end()) 740 | return V; 741 | else 742 | return NULL; 743 | } 744 | 745 | 746 | void MLTA::saveCalleesInfo(CallInst *CI, FuncSet &FS, 747 | bool mlta) { 748 | 749 | DISubprogram *SP = CI->getParent()->getParent()->getSubprogram(); 750 | string CallerFN = SP->getFilename().str(); 751 | #ifdef EVAL_FN_FIRFOX 752 | size_t pos = CallerFN.find("gecko-dev"); 753 | if (pos != string::npos) { 754 | CallerFN = CallerFN.substr(pos + 10); 755 | } 756 | #else 757 | trimPathSlash(CallerFN, 2); 758 | #endif 759 | DILocation *Loc = getSourceLocation(CI); 760 | if (!Loc) 761 | return; 762 | int CallerLn = Loc->getLine(); 763 | size_t callerhash = strIntHash(CallerFN, CallerLn); 764 | 765 | for (auto F : FS) { 766 | DISubprogram *CalleeSP = F->getSubprogram(); 767 | string CalleeFN = CalleeSP->getFilename().str(); 768 | #ifdef EVAL_FN_FIRFOX 769 | pos = CalleeFN.find("gecko-dev"); 770 | if (pos != string::npos) { 771 | CalleeFN = CalleeFN.substr(pos + 10); 772 | } 773 | #else 774 | trimPathSlash(CalleeFN, 2); 775 | #endif 776 | int CalleeLn = CalleeSP->getLine(); 777 | size_t calleehash = strIntHash(CalleeFN, CalleeLn); 778 | srcLnHashSet.insert(calleehash); 779 | // adapt to the inaccracy in reports 780 | for (int i = CalleeLn - 2; i < CalleeLn + 5; ++i) { 781 | if (mlta) 782 | calleesSrcMap[callerhash].insert(strIntHash(CalleeFN, i)); 783 | else 784 | L1CalleesSrcMap[callerhash].insert(strIntHash(CalleeFN, i)); 785 | } 786 | } 787 | } 788 | 789 | void MLTA::printTypeChain(list &Chain) { 790 | if (Chain.empty()) 791 | return; 792 | 793 | for (list::iterator it = Chain.begin(); 794 | it != Chain.end(); ++it) { 795 | typeidx_t TI = *it; 796 | OP<<"--<"<<*(TI.first)<<", "<"; 797 | } 798 | OP<<"\n"; 799 | } 800 | 801 | void MLTA::printTargets(FuncSet &FS, CallInst *CI) { 802 | 803 | if (CI) { 804 | #ifdef PRINT_SOURCE_LINE 805 | OP<<"[CallGraph] Indirect call: "<<*CI<<"\n"; 806 | OP<getModule()->getName()<<"\n"; 807 | #endif 808 | printSourceCodeInfo(CI, "CALLER"); 809 | //WriteSourceInfoIntoFile(CI, "IcallInfo.txt"); 810 | } 811 | OP<<"\n\t Indirect-call targets: ("<isDeclaration()) { 814 | OP<<"ERROR: print declaration function: "<getName()<<"\n"; 815 | continue; 816 | } 817 | printSourceCodeInfo(F, "TARGET"); 818 | } 819 | OP<<"\n"; 820 | 821 | #if 0 822 | std::ofstream oFile; 823 | oFile.open("IcallInfo.txt", std::ios::out | std::ios::app); 824 | oFile<<"\n"; 825 | oFile.close(); 826 | #endif 827 | } 828 | 829 | // Get the chain of base types for V 830 | // Complete: whether the chain's end is not escaping---it won't 831 | // propagate further 832 | bool MLTA::getBaseTypeChain(list &Chain, Value *V, 833 | bool &Complete) { 834 | 835 | Complete = true; 836 | Value *CV = V, *NextV = NULL; 837 | list TyList; 838 | setVisited; 839 | 840 | Type *BTy = getBaseType(V, Visited); 841 | if (BTy) { 842 | // 0 vs. -1? 843 | Chain.push_back(typeidx_c(BTy, 0)); 844 | } 845 | Visited.clear(); 846 | 847 | while (nextLayerBaseType(CV, TyList, NextV, Visited)) { 848 | CV = NextV; 849 | } 850 | for (auto TyIdx : TyList) { 851 | Chain.push_back(typeidx_c(TyIdx.first, TyIdx.second)); 852 | } 853 | 854 | // Checking completeness 855 | if (!NextV) { 856 | Complete = false; 857 | } 858 | else if (isa(NextV) && NextV->getType()->isPointerTy()) { 859 | Complete = false; 860 | } 861 | else { 862 | for (auto U : NextV->users()) { 863 | if (StoreInst *SI = dyn_cast(U)) { 864 | if (NextV == SI->getPointerOperand()) { 865 | Complete = false; 866 | break; 867 | } 868 | } 869 | } 870 | // TODO: other cases like store? 871 | } 872 | 873 | if (!Chain.empty() && !Complete) { 874 | typeCapSet.insert(typeHash(Chain.back().first)); 875 | } 876 | 877 | return true; 878 | } 879 | 880 | // This function is to get the base type in the current layer. 881 | // To get the type of next layer (with GEP and Load), use 882 | // nextLayerBaseType() instead. 883 | Type *MLTA::getBaseType(Value *V, set &Visited) { 884 | 885 | if (!V) 886 | return NULL; 887 | 888 | if (Visited.find(V) != Visited.end()) 889 | return NULL; 890 | Visited.insert(V); 891 | 892 | Type *Ty = V->getType(); 893 | 894 | if (isCompositeType(Ty)) { 895 | return Ty; 896 | } 897 | // The value itself is a pointer to a composite type 898 | else if (Ty->isPointerTy()) { 899 | 900 | Type *ETy = Ty->getPointerElementType(); 901 | if (isCompositeType(ETy)) { 902 | return ETy; 903 | } 904 | else if (Value *BV = recoverBaseType(V)) 905 | return BV->getType()->getPointerElementType(); 906 | } 907 | 908 | if (BitCastOperator *BCO = 909 | dyn_cast(V)) { 910 | return getBaseType(BCO->getOperand(0), Visited); 911 | } 912 | else if (SelectInst *SelI = dyn_cast(V)) { 913 | // Assuming both operands have same type, so pick the first 914 | // operand 915 | return getBaseType(SelI->getTrueValue(), Visited); 916 | } 917 | else if (PHINode *PN = dyn_cast(V)) { 918 | // TODO: tracking incoming values 919 | return _getPhiBaseType(PN, Visited); 920 | } 921 | else if (LoadInst *LI = dyn_cast(V)) { 922 | return getBaseType(LI->getPointerOperand(), Visited); 923 | } 924 | else if (Type *PTy = dyn_cast(Ty)) { 925 | // ?? 926 | } 927 | else { 928 | } 929 | 930 | return NULL; 931 | } 932 | 933 | Type *MLTA::_getPhiBaseType(PHINode *PN, set &Visited) { 934 | 935 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 936 | Value *IV = PN->getIncomingValue(i); 937 | 938 | Type *BTy = getBaseType(IV, Visited); 939 | if (BTy) 940 | return BTy; 941 | } 942 | 943 | return NULL; 944 | } 945 | 946 | bool MLTA::getGEPLayerTypes(GEPOperator *GEP, list &TyList) { 947 | 948 | Value *PO = GEP->getPointerOperand(); 949 | Type *ETy = GEP->getSourceElementType(); 950 | 951 | vector Indices; 952 | list TmpTyList; 953 | // FIXME: handle downcasting: the GEP may get a field outside the 954 | // base type 955 | // Or use O0 to avoid this issue 956 | ConstantInt *ConstI = 957 | dyn_cast(GEP->idx_begin()->get()); 958 | if (ConstI && ConstI->getSExtValue() != 0) { 959 | 960 | // 961 | // FIXME: The following is an attempt to handle the intentional 962 | // out-of-bound access; however, it is not fully working, so I 963 | // skip it for now 964 | // 965 | Instruction *I = dyn_cast(PO); 966 | Value *BV = recoverBaseType(PO); 967 | if (BV) { 968 | ETy = BV->getType()->getPointerElementType(); 969 | APInt Offset (ConstI->getBitWidth(), 970 | ConstI->getZExtValue()); 971 | Type *BaseTy = ETy; 972 | SmallVectorIndiceV = DLMap[I->getModule()] 973 | ->getGEPIndicesForOffset(BaseTy, Offset); 974 | for (auto Idx : IndiceV) { 975 | Indices.push_back(*Idx.getRawData()); 976 | } 977 | } 978 | else if (StructType *STy = dyn_cast(ETy)) { 979 | 980 | bool OptGEP = false; 981 | for (auto User : GEP->users()) { 982 | if (BitCastOperator *BCO = 983 | dyn_cast(User)) { 984 | OptGEP = true; 985 | #ifdef SOUND_MODE 986 | // TODO: This conservative decision results may cases 987 | // disqualifying MLTA. Need an analysis to recover the base 988 | // types, or use O0 to avoid the optimization 989 | return false; 990 | #endif 991 | } 992 | } 993 | } 994 | } 995 | 996 | if (Indices.empty()) { 997 | for (auto it = GEP->idx_begin(); it != GEP->idx_end(); it++) { 998 | ConstantInt *ConstI = dyn_cast(it->get()); 999 | if (ConstI) 1000 | Indices.push_back(ConstI->getSExtValue()); 1001 | else 1002 | Indices.push_back(-1); 1003 | } 1004 | } 1005 | 1006 | 1007 | for (auto it = Indices.begin() + 1; it != Indices.end(); it++) { 1008 | 1009 | int Idx = *it; 1010 | #ifdef MLTA_FIELD_INSENSITIVE 1011 | TmpTyList.push_front(typeidx_c(ETy, 0)); 1012 | #else 1013 | TmpTyList.push_front(typeidx_c(ETy, Idx)); 1014 | #endif 1015 | 1016 | // Continue to parse subty 1017 | Type* SubTy = NULL; 1018 | if (StructType *STy = dyn_cast(ETy)) { 1019 | SubTy = STy->getElementType(Idx); 1020 | } 1021 | else if (ArrayType *ATy = dyn_cast(ETy)) { 1022 | SubTy = ATy->getElementType(); 1023 | } 1024 | else if (VectorType *VTy = dyn_cast(ETy)) { 1025 | SubTy = VTy->getElementType(); 1026 | } 1027 | assert(SubTy); 1028 | 1029 | ETy = SubTy; 1030 | } 1031 | // This is a trouble caused by compiler optimization that 1032 | // eliminates the access path when the index of a field is 0. 1033 | // Conservatively assume a base-struct pointer can serve as a 1034 | // pointer to its first field 1035 | StructType *STy = dyn_cast(ETy); 1036 | if (STy && STy->getNumElements() > 0) { 1037 | // Get the type of its first field 1038 | Type *Ty0 = STy->getElementType(0); 1039 | for (auto U : GEP->users()) { 1040 | if (BitCastOperator *BCO = dyn_cast(U)) { 1041 | if (PointerType *PTy 1042 | = dyn_cast(BCO->getType())) { 1043 | 1044 | Type *ToTy = PTy->getPointerElementType(); 1045 | if (Ty0 == ToTy) 1046 | TmpTyList.push_front(typeidx_c(ETy, 0)); 1047 | } 1048 | } 1049 | } 1050 | } 1051 | 1052 | if (!TmpTyList.empty()) { 1053 | // Reorder 1054 | for (auto TyIdx : TmpTyList) { 1055 | TyList.push_back(TyIdx); 1056 | } 1057 | return true; 1058 | } 1059 | else 1060 | return false; 1061 | } 1062 | 1063 | bool MLTA::nextLayerBaseTypeWL(Value *V, list &TyList, 1064 | Value * &NextV) { 1065 | 1066 | list VL; 1067 | setVisited; 1068 | VL.push_back(V); 1069 | 1070 | while (!VL.empty()) { 1071 | 1072 | Value *CV = VL.front(); 1073 | VL.pop_front(); 1074 | if (Visited.find(CV) != Visited.end()) { 1075 | NextV = CV; 1076 | continue; 1077 | } 1078 | Visited.insert(CV); 1079 | 1080 | if (!CV || isa(CV)) { 1081 | NextV = CV; 1082 | continue; 1083 | } 1084 | 1085 | // The only way to get the next layer type: GetElementPtrInst or 1086 | // GEPOperator 1087 | if (GEPOperator *GEP = dyn_cast(V)) { 1088 | 1089 | NextV = GEP->getPointerOperand(); 1090 | getGEPLayerTypes(GEP, TyList); 1091 | continue; 1092 | } 1093 | else if (LoadInst *LI = dyn_cast(V)) { 1094 | 1095 | NextV = LI->getPointerOperand(); 1096 | VL.push_back(LI->getOperand(0)); 1097 | } 1098 | else if (BitCastOperator *BCO = 1099 | dyn_cast(V)) { 1100 | 1101 | NextV = BCO->getOperand(0); 1102 | VL.push_back(BCO->getOperand(0)); 1103 | } 1104 | // Phi and Select 1105 | else if (PHINode *PN = dyn_cast(V)) { 1106 | // FIXME: tracking incoming values 1107 | Value * PV = PN->getIncomingValue(PN->getNumIncomingValues() - 1); 1108 | NextV = PV; 1109 | VL.push_back(PV); 1110 | } 1111 | else if (SelectInst *SelI = dyn_cast(V)) { 1112 | // Assuming both operands have same type, so just pick the 1113 | // first operand 1114 | NextV = SelI->getTrueValue(); 1115 | VL.push_back(SelI->getTrueValue()); 1116 | } 1117 | // Other unary instructions 1118 | // FIXME: may introduce false positives 1119 | else if (UnaryOperator *UO = dyn_cast(V)) { 1120 | 1121 | NextV = UO->getOperand(0); 1122 | VL.push_back(UO->getOperand(0)); 1123 | } 1124 | } 1125 | return (V != NextV); 1126 | } 1127 | 1128 | // Get the composite type of the lower layer. Layers are split by 1129 | // memory loads or GEP 1130 | bool MLTA::nextLayerBaseType(Value *V, list &TyList, 1131 | Value * &NextV, set &Visited) { 1132 | 1133 | if (!V || isa(V)) { 1134 | NextV = V; 1135 | return false; 1136 | } 1137 | 1138 | if (Visited.find(V) != Visited.end()) { 1139 | NextV = V; 1140 | return false; 1141 | } 1142 | Visited.insert(V); 1143 | 1144 | // The only way to get the next layer type: GetElementPtrInst or 1145 | // GEPOperator 1146 | if (GEPOperator *GEP = dyn_cast(V)) { 1147 | 1148 | NextV = GEP->getPointerOperand(); 1149 | bool ret = getGEPLayerTypes(GEP, TyList); 1150 | if (!ret) 1151 | NextV = NULL; 1152 | return ret; 1153 | } 1154 | else if (LoadInst *LI = dyn_cast(V)) { 1155 | 1156 | NextV = LI->getPointerOperand(); 1157 | return nextLayerBaseType(LI->getOperand(0), TyList, NextV, Visited); 1158 | } 1159 | else if (BitCastOperator *BCO = 1160 | dyn_cast(V)) { 1161 | 1162 | NextV = BCO->getOperand(0); 1163 | return nextLayerBaseType(BCO->getOperand(0), TyList, NextV, Visited); 1164 | } 1165 | // Phi and Select 1166 | else if (PHINode *PN = dyn_cast(V)) { 1167 | // FIXME: tracking incoming values 1168 | bool ret = false; 1169 | set NVisited; 1170 | list NTyList; 1171 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1172 | Value *IV = PN->getIncomingValue(i); 1173 | NextV = IV; 1174 | NVisited = Visited; 1175 | NTyList = TyList; 1176 | ret = nextLayerBaseType(IV, NTyList, NextV, NVisited); 1177 | if (NTyList.size() > TyList.size()) { 1178 | break; 1179 | } 1180 | } 1181 | TyList = NTyList; 1182 | Visited = NVisited; 1183 | return ret; 1184 | } 1185 | else if (SelectInst *SelI = dyn_cast(V)) { 1186 | // Assuming both operands have same type, so pick the first 1187 | // operand 1188 | NextV = SelI->getTrueValue(); 1189 | return nextLayerBaseType(SelI->getTrueValue(), TyList, NextV, Visited); 1190 | } 1191 | // Other unary instructions 1192 | // FIXME: may introduce false positives 1193 | else if (UnaryOperator *UO = dyn_cast(V)) { 1194 | 1195 | NextV = UO->getOperand(0); 1196 | return nextLayerBaseType(UO->getOperand(0), TyList, NextV, Visited); 1197 | } 1198 | 1199 | NextV = NULL; 1200 | return false; 1201 | } 1202 | 1203 | bool MLTA::getDependentTypes(Type *Ty, int Idx, 1204 | set &PropSet) { 1205 | 1206 | listLT; 1207 | LT.push_back(hashidx_c(typeHash(Ty), Idx)); 1208 | setVisited; 1209 | 1210 | while (!LT.empty()) { 1211 | hashidx_t TI = LT.front(); 1212 | LT.pop_front(); 1213 | if (Visited.find(TI) != Visited.end()) { 1214 | continue; 1215 | } 1216 | Visited.insert(TI); 1217 | 1218 | for (auto Prop : typeIdxPropMap[TI.first][TI.second]) { 1219 | PropSet.insert(Prop); 1220 | LT.push_back(Prop); 1221 | } 1222 | for (auto Prop : typeIdxPropMap[TI.first][-1]) { 1223 | PropSet.insert(Prop); 1224 | LT.push_back(Prop); 1225 | } 1226 | } 1227 | return true; 1228 | } 1229 | 1230 | 1231 | Function *MLTA::getBaseFunction(Value *V) { 1232 | 1233 | if (Function *F = dyn_cast(V)) 1234 | if (!F->isIntrinsic()) 1235 | return F; 1236 | 1237 | Value *CV = V; 1238 | while (BitCastOperator *BCO 1239 | = dyn_cast(CV)) { 1240 | Value *O = BCO->getOperand(0); 1241 | if (Function *F = dyn_cast(O)) 1242 | if (!F->isIntrinsic()) 1243 | return F; 1244 | CV = O; 1245 | } 1246 | return NULL; 1247 | } 1248 | 1249 | // Get all possible targets of the given type 1250 | bool MLTA::getTargetsWithLayerType(size_t TyHash, int Idx, 1251 | FuncSet &FS) { 1252 | 1253 | // Get the direct funcset in the current layer, which 1254 | // will be further unioned with other targets from type 1255 | // casting 1256 | if (Idx == -1) { 1257 | for (auto FSet : typeIdxFuncsMap[TyHash]) { 1258 | FS.insert(FSet.second.begin(), FSet.second.end()); 1259 | } 1260 | } 1261 | else { 1262 | FS = typeIdxFuncsMap[TyHash][Idx]; 1263 | FS.insert(typeIdxFuncsMap[TyHash][-1].begin(), 1264 | typeIdxFuncsMap[TyHash][-1].end()); 1265 | } 1266 | 1267 | return true; 1268 | } 1269 | 1270 | // The API for MLTA: it returns functions for an indirect call 1271 | bool MLTA::findCalleesWithMLTA(CallInst *CI, 1272 | FuncSet &FS) { 1273 | 1274 | // Initial set: first-layer results 1275 | // TODO: handling virtual functions 1276 | FS = Ctx->sigFuncsMap[callHash(CI)]; 1277 | 1278 | if (FS.empty()) { 1279 | // No need to go through MLTA if the first layer is empty 1280 | return false; 1281 | } 1282 | 1283 | FuncSet FS1, FS2; 1284 | Type *PrevLayerTy = (dyn_cast(CI))->getFunctionType(); 1285 | int PrevIdx = -1; 1286 | Value *CV = CI->getCalledOperand(); 1287 | Value *NextV = NULL; 1288 | int LayerNo = 1; 1289 | 1290 | // Get the next-layer type 1291 | list TyList; 1292 | bool ContinueNextLayer = true; 1293 | while (ContinueNextLayer) { 1294 | 1295 | // Check conditions 1296 | if (LayerNo >= MAX_TYPE_LAYER) 1297 | break; 1298 | 1299 | #ifdef SOUND_MODE 1300 | if (typeCapSet.find(typeHash(PrevLayerTy)) != typeCapSet.end()) { 1301 | break; 1302 | } 1303 | #endif 1304 | 1305 | set Visited; 1306 | nextLayerBaseType(CV, TyList, NextV, Visited); 1307 | if (TyList.empty()) { 1308 | if (LayerNo == 1) { 1309 | //printSourceCodeInfo(CI, "NOBASE"); 1310 | } 1311 | break; 1312 | } 1313 | 1314 | for (auto TyIdx : TyList) { 1315 | 1316 | if (LayerNo >= MAX_TYPE_LAYER) 1317 | break; 1318 | ++LayerNo; 1319 | 1320 | DBG<<"[CONTAINER] Type: "<<*(TyIdx.first) 1321 | <<"; Idx: "< PropSet; 1369 | getDependentTypes(TyIdx.first, TyIdx.second, PropSet); 1370 | for (auto Prop : PropSet) { 1371 | getTargetsWithLayerType(Prop.first, Prop.second, FS2); 1372 | FS1.insert(FS2.begin(), FS2.end()); 1373 | } 1374 | MatchedFuncsMap[TyIdxHash] = FS1; 1375 | } 1376 | 1377 | // Next layer may not always have a subset of the previous layer 1378 | // because of casting, so let's do intersection 1379 | intersectFuncSets(FS1, FS, FS2); 1380 | FS = FS2; 1381 | 1382 | CV = NextV; 1383 | 1384 | #ifdef SOUND_MODE 1385 | if (typeCapSet.find(typeHash(TyIdx.first)) != typeCapSet.end()) { 1386 | ContinueNextLayer = false; 1387 | break; 1388 | } 1389 | #endif 1390 | 1391 | PrevLayerTy = TyIdx.first; 1392 | PrevIdx = TyIdx.second; 1393 | } 1394 | TyList.clear(); 1395 | } 1396 | 1397 | if (LayerNo > 1) { 1398 | Ctx->NumSecondLayerTypeCalls++; 1399 | Ctx->NumSecondLayerTargets += FS.size(); 1400 | } 1401 | else { 1402 | Ctx->NumFirstLayerTargets += Ctx->sigFuncsMap[callHash(CI)].size(); 1403 | Ctx->NumFirstLayerTypeCalls += 1; 1404 | } 1405 | 1406 | #if 0 1407 | FuncSet FSBase = Ctx->sigFuncsMap[callHash(CI)]; 1408 | saveCalleesInfo(CI, FSBase, false); 1409 | saveCalleesInfo(CI, FSBase, true); 1410 | #endif 1411 | 1412 | return true; 1413 | } 1414 | 1415 | 1416 | 1417 | 1418 | 1419 | //////////////////////////////////////////////////////////////// 1420 | // Deprecated code 1421 | //////////////////////////////////////////////////////////////// 1422 | #if 0 1423 | listLV; 1424 | LV.push_back(V); 1425 | 1426 | while (!LV.empty()) { 1427 | Value *CV = LV.front(); 1428 | LV.pop_front(); 1429 | 1430 | if (GEPOperator *GEP = dyn_cast(CV)) { 1431 | int Idx; 1432 | if (Type *BTy = getBaseType(CV, Idx)) { 1433 | // Add the tyep to the chain 1434 | Chain.push_back(typeidx_c(BTy, Idx)); 1435 | LV.push_back(GEP->getPointerOperand()); 1436 | } 1437 | else 1438 | continue; 1439 | } 1440 | else if (BitCastOperator *BCO = 1441 | dyn_cast(CV)) { 1442 | int Idx; 1443 | if (Type *BTy = getBaseType(CV, Idx)) { 1444 | // Add the tyep to the chain 1445 | Chain.push_back(typeidx_c(BTy, Idx)); 1446 | } 1447 | else 1448 | continue; 1449 | } 1450 | else if (LoadInst *LI = dyn_cast(CV)) { 1451 | LV.push_back(LI->getPointerOperand()); 1452 | } 1453 | // Rcognizing escaping cases 1454 | else if (isa(CV) && CV->getType()->isPointerTy()){ 1455 | 1456 | Complete = false; 1457 | } 1458 | else { 1459 | for (auto U : CV->users()) { 1460 | if (StoreInst *SI = dyn_cast(U)) { 1461 | if (CV == SI->getPointerOperand()) 1462 | Complete = false; 1463 | } 1464 | } 1465 | // TODO: other cases like store? 1466 | } 1467 | } 1468 | 1469 | #endif 1470 | 1471 | 1472 | #if 0 1473 | listLV; 1474 | LV.push_back(V); 1475 | 1476 | while (!LV.empty()) { 1477 | Value *CV = LV.front(); 1478 | LV.pop_front(); 1479 | 1480 | if (GEPOperator *GEP = dyn_cast(CV)) { 1481 | int Idx; 1482 | if (Type *BTy = getBaseType(CV, Idx)) { 1483 | typeFuncsMap[typeIdxHash(BTy, Idx)].insert(F); 1484 | LV.push_back(GEP->getPointerOperand()); 1485 | } 1486 | else 1487 | continue; 1488 | } 1489 | else if (LoadInst *LI = dyn_cast(CV)) { 1490 | LV.push_back(LI->getPointerOperand()); 1491 | } 1492 | } 1493 | 1494 | bool MLTA::typeConfineInStore(StoreInst *SI) { 1495 | 1496 | Value *PO = SI->getPointerOperand(); 1497 | Value *VO = SI->getValueOperand(); 1498 | 1499 | #if 1 1500 | // 1501 | // Special handling for storing VTable pointers 1502 | // 1503 | if (BitCastOperator *BCO = dyn_cast(VO)) { 1504 | if (GEPOperator *GEP = 1505 | dyn_cast(BCO->getOperand(0))) { 1506 | if (Value *VT = 1507 | getVTable(GEP->getPointerOperand())) { 1508 | 1509 | int Idx; Value *NextV; 1510 | if (Type *BTy = nextLayerBaseType(PO, Idx, NextV)) { 1511 | FuncSet FS = VTableFuncsMap[VT]; 1512 | typeIdxFuncsMap[typeHash(BTy)][0].insert(FS.begin(), 1513 | FS.end()); 1514 | } 1515 | } 1516 | } 1517 | } 1518 | 1519 | #endif 1520 | 1521 | /////////////////////////////////////////////////// 1522 | 1523 | int IdxP; 1524 | Value *NextV; 1525 | Type *PBTy = nextLayerBaseType(PO, IdxP, NextV); 1526 | // Not targeting a composite-type, skip 1527 | if (!PBTy) 1528 | return false; 1529 | 1530 | 1531 | // Case 1: The value operand is a function 1532 | Function *F = dyn_cast(VO); 1533 | if (F) { 1534 | typeIdxFuncsMap[typeHash(PBTy)][IdxP].insert(F); 1535 | confineTargetFunction(PO, F); 1536 | return true; 1537 | } 1538 | 1539 | if (isa(VO)) 1540 | return false; 1541 | 1542 | Type *VTy = VO->getType(); 1543 | // Cast 2: value-based store 1544 | // A composite-type object is stored 1545 | // The target set will be expanded to include the ones from the 1546 | // value operaond 1547 | if (isCompositeType(VTy)) { 1548 | propagateType(PO, VTy); 1549 | return true; 1550 | } 1551 | // Case 3: reference (i.e., pointer)-based store 1552 | // Store something to a field of a composite-type object 1553 | else if (VTy->isPointerTy()) { 1554 | 1555 | int IdxV; 1556 | // The value operand is a pointer to a composite-type object 1557 | // This case confines the targets through another layer 1558 | //if (Type *VBTy = nextLayerBaseType(VO, IdxV, NextV)) { 1559 | if (Type *VBTy = getBaseType(VO, IdxV)) { 1560 | 1561 | //typeConfineMap[typeIdxHash(PBTy, 1562 | // IdxP)].insert(typeHash(VBTy)); 1563 | propagateType(PO, VBTy); 1564 | 1565 | return true; 1566 | } 1567 | else { 1568 | if (isa(VO) && !isa(VO)) { 1569 | Value * FV = 1570 | dyn_cast(VO)->getOperand(0); 1571 | if (Function *F = dyn_cast(FV)) { 1572 | typeIdxFuncsMap[typeHash(PBTy)][IdxP].insert(F); 1573 | confineTargetFunction(PO, F); 1574 | return true; 1575 | } 1576 | } 1577 | // TODO: The type is escaping? 1578 | // Example: mm/mempool.c +188: pool->free = free_fn; 1579 | // free_fn is a function pointer from an function 1580 | // argument 1581 | escapeType(PBTy, IdxP); 1582 | return false; 1583 | } 1584 | } 1585 | else { 1586 | // Unrecognized cases 1587 | assert(1); 1588 | } 1589 | 1590 | return false; 1591 | } 1592 | 1593 | #if 0 1594 | if (PointerType *PTy = dyn_cast(UTy)) { 1595 | 1596 | Type *ETy = PTy->getPointerElementType(); 1597 | if (ETy->isFunctionTy()) { 1598 | FuncOperands.insert(U); 1599 | continue; 1600 | } 1601 | } 1602 | 1603 | for (auto oi = U->op_begin(), oe = U->op_end(); 1604 | oi != oe; ++oi) { 1605 | 1606 | Value *O = *oi; 1607 | Type *OTy = O->getType(); 1608 | 1609 | if (PointerType *POTy = dyn_cast(OTy)) { 1610 | 1611 | if (isa(O)) 1612 | continue; 1613 | 1614 | Type *ETy = POTy->getPointerElementType(); 1615 | 1616 | if (ETy->isFunctionTy()) { 1617 | FuncOperands.insert(O); 1618 | continue; 1619 | } 1620 | 1621 | else if (BitCastOperator *CO = 1622 | dyn_cast(O)) { 1623 | 1624 | User *OU = dyn_cast(CO->getOperand(0)); 1625 | LU.push_back(OU); 1626 | continue; 1627 | } 1628 | else if (GEPOperator *GO = 1629 | dyn_cast(O)){ 1630 | 1631 | User *OU = dyn_cast(GO->getOperand(0)); 1632 | LU.push_back(OU); 1633 | continue; 1634 | } 1635 | else if (GlobalVariable *GO = dyn_cast(O)) { 1636 | // TODO 1637 | } 1638 | else { 1639 | // ? 1640 | } 1641 | } 1642 | else { 1643 | User *OU = dyn_cast(O); 1644 | if (OU) 1645 | LU.push_back(OU); 1646 | } 1647 | } 1648 | 1649 | #endif 1650 | #if 0 // Handling VTable pointers in C++ 1651 | FunctionType *FTy = 1652 | dyn_cast(PETy->getPointerElementType()); 1653 | if (!FTy) 1654 | return NULL; 1655 | 1656 | if (FTy->getNumParams() == 0) 1657 | return NULL; 1658 | // the first parameter should be the object itself 1659 | Type *ParamTy = FTy->getParamType(0); 1660 | if (!ParamTy->isPointerTy()) 1661 | return NULL; 1662 | 1663 | StructType *STy = 1664 | dyn_cast(ParamTy->getPointerElementType()); 1665 | // "class" is treated as a struct 1666 | if (STy && STy->getName().startswith("class.")) { 1667 | User::op_iterator ie = GEP->idx_end(); 1668 | ConstantInt *ConstI = dyn_cast((--ie)->get()); 1669 | //Idx = ConstI->getSExtValue(); 1670 | // assume the idx is always 0 1671 | Idx = 0; 1672 | return STy; 1673 | } 1674 | #endif 1675 | 1676 | #if 0 1677 | bool MLTA::typePropWithCast(User *Cast) { 1678 | 1679 | // If a function address is ever cast to another type and stored 1680 | // to a composite type, the escaping analysis will capture the 1681 | // composite type and discard it 1682 | 1683 | Value *From = Cast->getOperand(0); 1684 | Value *To = Cast; 1685 | 1686 | //int IdxTo; 1687 | //Value *NextV; 1688 | //Type *ToBTy = nextLayerBaseType(To, IdxTo, NextV); 1689 | //Type *ToBTy = getBaseType(To, IdxTo); 1690 | Type *ToBTy = To->getType(); 1691 | 1692 | // Not targeting a composite-type, skip 1693 | if (!isCompositeType(ToBTy)) { 1694 | 1695 | setVisited; 1696 | Type *FromBTy = getBaseType(From, Visited); 1697 | if (FromBTy) { 1698 | // Conservatively say the type is escaping 1699 | for (User *U : To->users()) { 1700 | if (CallInst *CI = dyn_cast(U)) { 1701 | // FIXME: use getCalledOperand instead 1702 | Function *F = CI->getCalledFunction(); 1703 | if (F && !F->onlyReadsMemory()) 1704 | escapeType(FromBTy); 1705 | } 1706 | } 1707 | } 1708 | 1709 | return false; 1710 | } 1711 | 1712 | Type *FromTy = From->getType(); 1713 | if (isCompositeType(FromTy)) { 1714 | propagateType(To, FromTy); 1715 | return true; 1716 | } 1717 | else if (FromTy->isPointerTy()) { 1718 | 1719 | setVisited; 1720 | Type *FromBTy = getBaseType(From, Visited); 1721 | // Expand 1722 | if (FromBTy) { 1723 | propagateType(To, FromBTy); 1724 | return true; 1725 | } 1726 | else { 1727 | // "newed" object will always be cast to the class type 1728 | // from a general type like i8*, so do not take it as an 1729 | // escaping case 1730 | // A tricky analysis to identify "new" call 1731 | if (CallInst *CI = dyn_cast(From)) { 1732 | // FIXME: use getCalledOperand instead 1733 | Function *F = CI->getCalledFunction(); 1734 | if (F && F->getName() == "_Znwm") 1735 | return true; 1736 | } 1737 | 1738 | // Escape 1739 | escapeType(ToBTy); 1740 | return false; 1741 | } 1742 | } 1743 | else { 1744 | assert(1); 1745 | } 1746 | 1747 | return false; 1748 | } 1749 | #endif 1750 | #if 0 1751 | //Type *ETy = dyn_cast(Ty)->getPointerElementType(); 1752 | 1753 | // Possible cases: (1) pointer to a composite type, (2) GEP 1754 | // Case 1: GetElementPtrInst or GEPOperator 1755 | if (GEPOperator *GEP = dyn_cast(V)) { 1756 | 1757 | Type *PTy = GEP->getPointerOperand()->getType(); 1758 | Type *PETy = PTy->getPointerElementType(); 1759 | if (isCompositeType(PETy) && GEP->hasAllConstantIndices()) { 1760 | User::op_iterator ie = GEP->idx_end(); 1761 | ConstantInt *ConstI = dyn_cast((--ie)->get()); 1762 | Idx = ConstI->getSExtValue(); 1763 | 1764 | return PETy; 1765 | } 1766 | // In case the pointer points to an "array" of function 1767 | // pointers---likely vtable pointer 1768 | // TODO: requires a reliable recognition 1769 | else if (PETy->isPointerTy() && GEP->hasAllConstantIndices()) { 1770 | 1771 | FunctionType *FTy = 1772 | dyn_cast(PETy->getPointerElementType()); 1773 | if (!FTy) 1774 | return NULL; 1775 | 1776 | if (FTy->getNumParams() == 0) 1777 | return NULL; 1778 | // the first parameter should be the object itself 1779 | Type *ParamTy = FTy->getParamType(0); 1780 | if (!ParamTy->isPointerTy()) 1781 | return NULL; 1782 | 1783 | StructType *STy = 1784 | dyn_cast(ParamTy->getPointerElementType()); 1785 | // "class" is treated as a struct 1786 | if (STy && STy->getName().startswith("class.")) { 1787 | User::op_iterator ie = GEP->idx_end(); 1788 | ConstantInt *ConstI = dyn_cast((--ie)->get()); 1789 | //Idx = ConstI->getSExtValue(); 1790 | // assume the idx is always 0 1791 | Idx = 0; 1792 | return STy; 1793 | } 1794 | return NULL; 1795 | } 1796 | else { 1797 | 1798 | return NULL; 1799 | } 1800 | } 1801 | #endif 1802 | #endif 1803 | --------------------------------------------------------------------------------