├── Chapter_01_code ├── Readme.txt ├── add.bc ├── add.c ├── add.ll ├── main.bc ├── main.c ├── output.bc └── output.s ├── Chapter_02_code ├── Global.cpp ├── README ├── arith.cpp ├── funcArg.cpp ├── function.cpp ├── functionBlock.cpp ├── ifelse.cpp ├── loop.cpp ├── module.cpp ├── return.cpp ├── toy.cpp └── toy1.cpp ├── Chapter_03_code ├── README ├── extract.cpp ├── gep.cpp ├── insert.cpp ├── load.cpp └── store.cpp ├── Chapter_04_code ├── CMakeLists.txt ├── FnNamePrint │ ├── CMakeLists.txt │ ├── FnNamePrint.cpp │ ├── FnNamePrint.exports │ └── Makefile ├── InstCombineAndOrXor.cpp ├── LLVMFnNamePrint.so ├── Makefile ├── Readme.txt ├── instcombineoutput.ll ├── instcombinetc.ll ├── opt ├── test.ll ├── test_out_O1.ll └── test_out_O2.ll ├── Chapter_05_code ├── README ├── Readme.txt ├── hadd.ll ├── intrinsic.cpp ├── intrinsic.ll ├── intrinsic1.ll ├── licm.bc ├── licm.ll ├── licm_opt.ll ├── llvm-dis └── scalev1.ll ├── Chapter_06_code ├── Ch6_code_suyog │ ├── README │ ├── hadd.ll │ └── llc.tar.gz └── Chapter_06_code_Mayur │ ├── README │ ├── hadd.ll │ ├── outfile │ ├── test.ll │ ├── test.s │ ├── test1.s │ └── test2.s └── Chapter_07_code └── llvm-toy.tar.gz /Chapter_01_code/Readme.txt: -------------------------------------------------------------------------------- 1 | Converting c code to LLVM IR 2 | 3 | testcode is in add.c: 4 | $ cat add.c 5 | 6 | 7 | Use the clang frontend with the following options to convert it to LLVM IR: 8 | 9 | $ clang -emit-llvm -c -S add.c 10 | 11 | 12 | ************************************************************************* 13 | 14 | Using LLVM tools 15 | LLVM assembler 16 | 17 | $ llvm-as add.ll –o add.bc 18 | 19 | To view the content of this bit code file a tool like hexdump can be used. 20 | 21 | $ hexdump –c add.bc 22 | 23 | LLVM disassembler: 24 | $ llvm-dis add.bc –o add.ll 25 | 26 | 27 | LLVM linker: 28 | 29 | Convert the C source code to LLVM IR using the following command. 30 | $ clang -emit-llvm -c main.c 31 | 32 | Now link main.bc and add.bc to generate output.bc 33 | 34 | $ llvm-link main.bc add.bc -o output.bc 35 | 36 | LLVM interpreter: 37 | 38 | $ lli output.bc 39 | 40 | 41 | LLVM static compiler: 42 | 43 | $ llc output.bc –o output.s 44 | 45 | View assembly: 46 | 47 | $ cat output.s 48 | 49 | -------------------------------------------------------------------------------- /Chapter_01_code/add.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elongbug/llvm-essentials-book/15df77ca2796e4077157698254d30c688b5138e8/Chapter_01_code/add.bc -------------------------------------------------------------------------------- /Chapter_01_code/add.c: -------------------------------------------------------------------------------- 1 | int globvar = 12; 2 | 3 | int add(int a) { return globvar + a; } 4 | -------------------------------------------------------------------------------- /Chapter_01_code/add.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'add.c' 2 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 3 | target triple = "x86_64-unknown-linux-gnu" 4 | 5 | @globvar = global i32 12, align 4 6 | 7 | ; Function Attrs: nounwind uwtable 8 | define i32 @add(i32 %a) #0 { 9 | %1 = alloca i32, align 4 10 | store i32 %a, i32* %1, align 4 11 | %2 = load i32, i32* @globvar, align 4 12 | %3 = load i32, i32* %1, align 4 13 | %4 = add nsw i32 %2, %3 14 | ret i32 %4 15 | } 16 | 17 | attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } 18 | 19 | !llvm.ident = !{!0} 20 | 21 | !0 = !{!"clang version 3.8.0 (trunk 247500) (llvm/trunk 247501)"} 22 | -------------------------------------------------------------------------------- /Chapter_01_code/main.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elongbug/llvm-essentials-book/15df77ca2796e4077157698254d30c688b5138e8/Chapter_01_code/main.bc -------------------------------------------------------------------------------- /Chapter_01_code/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern int add(int); 4 | 5 | int main() { 6 | int a = add(2); 7 | printf("%d\n", a); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /Chapter_01_code/output.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elongbug/llvm-essentials-book/15df77ca2796e4077157698254d30c688b5138e8/Chapter_01_code/output.bc -------------------------------------------------------------------------------- /Chapter_01_code/output.s: -------------------------------------------------------------------------------- 1 | .text 2 | .file "output.bc" 3 | .globl main 4 | .align 16, 0x90 5 | .type main,@function 6 | main: # @main 7 | .cfi_startproc 8 | # BB#0: 9 | pushq %rbp 10 | .Ltmp0: 11 | .cfi_def_cfa_offset 16 12 | .Ltmp1: 13 | .cfi_offset %rbp, -16 14 | movq %rsp, %rbp 15 | .Ltmp2: 16 | .cfi_def_cfa_register %rbp 17 | subq $16, %rsp 18 | movl $0, -4(%rbp) 19 | movl $2, %edi 20 | callq add 21 | movl %eax, %ecx 22 | movl %ecx, -8(%rbp) 23 | movl $.L.str, %edi 24 | xorl %eax, %eax 25 | movl %ecx, %esi 26 | callq printf 27 | xorl %eax, %eax 28 | addq $16, %rsp 29 | popq %rbp 30 | retq 31 | .Lfunc_end0: 32 | .size main, .Lfunc_end0-main 33 | .cfi_endproc 34 | 35 | .globl add 36 | .align 16, 0x90 37 | .type add,@function 38 | add: # @add 39 | .cfi_startproc 40 | # BB#0: 41 | pushq %rbp 42 | .Ltmp3: 43 | .cfi_def_cfa_offset 16 44 | .Ltmp4: 45 | .cfi_offset %rbp, -16 46 | movq %rsp, %rbp 47 | .Ltmp5: 48 | .cfi_def_cfa_register %rbp 49 | movl %edi, -4(%rbp) 50 | addl globvar(%rip), %edi 51 | movl %edi, %eax 52 | popq %rbp 53 | retq 54 | .Lfunc_end1: 55 | .size add, .Lfunc_end1-add 56 | .cfi_endproc 57 | 58 | .type .L.str,@object # @.str 59 | .section .rodata.str1.1,"aMS",@progbits,1 60 | .L.str: 61 | .asciz "%d\n" 62 | .size .L.str, 4 63 | 64 | .type globvar,@object # @globvar 65 | .data 66 | .globl globvar 67 | .align 4 68 | globvar: 69 | .long 12 # 0xc 70 | .size globvar, 4 71 | 72 | 73 | .ident "clang version 3.7.0 (trunk 234045)" 74 | .ident "clang version 3.7.0 (trunk 234045)" 75 | .section ".note.GNU-stack","",@progbits 76 | -------------------------------------------------------------------------------- /Chapter_02_code/Global.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/DerivedTypes.h" 2 | #include "llvm/IR/IRBuilder.h" 3 | #include "llvm/IR/LLVMContext.h" 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IR/Verifier.h" 6 | #include 7 | using namespace llvm; 8 | 9 | static LLVMContext &Context = getGlobalContext(); 10 | static Module *ModuleOb = new Module("my compiler", Context); 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | FunctionType *funcType = llvm::FunctionType::get(Builder.getInt32Ty(), false); 14 | Function *fooFunc = llvm::Function::Create( 15 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 16 | return fooFunc; 17 | } 18 | 19 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 20 | return BasicBlock::Create(Context, Name, fooFunc); 21 | } 22 | 23 | GlobalVariable *createGlob(IRBuilder<> &Builder, std::string Name) { 24 | ModuleOb->getOrInsertGlobal(Name, Builder.getInt32Ty()); 25 | GlobalVariable *gVar = ModuleOb->getNamedGlobal(Name); 26 | gVar->setLinkage(GlobalValue::CommonLinkage); 27 | gVar->setAlignment(4); 28 | return gVar; 29 | } 30 | 31 | int main(int argc, char *argv[]) { 32 | static IRBuilder<> Builder(Context); 33 | GlobalVariable *gVar = createGlob(Builder, "x"); 34 | Function *fooFunc = createFunc(Builder, "foo"); 35 | BasicBlock *entry = createBB(fooFunc, "entry"); 36 | Builder.SetInsertPoint(entry); 37 | verifyFunction(*fooFunc); 38 | ModuleOb->dump(); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /Chapter_02_code/README: -------------------------------------------------------------------------------- 1 | To compile the program on linux, run the following command 2 | 3 | $ clang++ module.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 4 | $ ./toy 5 | 6 | $ clang++ Global.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 7 | $ ./toy 8 | 9 | $ clang++ function.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 10 | $ ./toy 11 | 12 | $ clang++ funcArg.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 13 | $ ./toy 14 | 15 | $ clang++ functionBlock.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 16 | $ ./toy 17 | 18 | $ clang++ return.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 19 | $ ./toy 20 | 21 | $ clang++ arith.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 22 | $ ./toy 23 | 24 | $ clang++ ifelse.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 25 | $ ./toy 26 | 27 | $ clang++ loop.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 28 | $ ./toy 29 | 30 | $ clang++ arith.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 31 | $ ./toy 32 | -------------------------------------------------------------------------------- /Chapter_02_code/arith.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | std::vector Integers(FunArgs.size(), Type::getInt32Ty(Context)); 14 | FunctionType *funcType = 15 | llvm::FunctionType::get(Builder.getInt32Ty(), Integers, false); 16 | Function *fooFunc = llvm::Function::Create( 17 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 18 | return fooFunc; 19 | } 20 | 21 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 22 | 23 | unsigned Idx = 0; 24 | Function::arg_iterator AI, AE; 25 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 26 | ++AI, ++Idx) 27 | AI->setName(FunArgs[Idx]); 28 | } 29 | 30 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 31 | return BasicBlock::Create(Context, Name, fooFunc); 32 | } 33 | 34 | GlobalVariable *createGlob(IRBuilder<> &Builder, std::string Name) { 35 | ModuleOb->getOrInsertGlobal(Name, Builder.getInt32Ty()); 36 | GlobalVariable *gVar = ModuleOb->getNamedGlobal(Name); 37 | gVar->setLinkage(GlobalValue::CommonLinkage); 38 | gVar->setAlignment(4); 39 | return gVar; 40 | } 41 | 42 | Value *createArith(IRBuilder<> &Builder, Value *L, Value *R) { 43 | return Builder.CreateMul(L, R, "multmp"); 44 | } 45 | 46 | int main(int argc, char *argv[]) { 47 | FunArgs.push_back("a"); 48 | FunArgs.push_back("b"); 49 | static IRBuilder<> Builder(Context); 50 | GlobalVariable *gVar = createGlob(Builder, "x"); 51 | Function *fooFunc = createFunc(Builder, "foo"); 52 | setFuncArgs(fooFunc, FunArgs); 53 | BasicBlock *entry = createBB(fooFunc, "entry"); 54 | Builder.SetInsertPoint(entry); 55 | Value *Arg1 = fooFunc->arg_begin(); 56 | Value *constant = Builder.getInt32(16); 57 | Value *val = createArith(Builder, Arg1, constant); 58 | Builder.CreateRet(val); 59 | verifyFunction(*fooFunc); 60 | ModuleOb->dump(); 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /Chapter_02_code/funcArg.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | std::vector Integers(FunArgs.size(), Type::getInt32Ty(Context)); 14 | FunctionType *funcType = 15 | llvm::FunctionType::get(Builder.getInt32Ty(), Integers, false); 16 | Function *fooFunc = llvm::Function::Create( 17 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 18 | return fooFunc; 19 | } 20 | 21 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 22 | unsigned Idx = 0; 23 | Function::arg_iterator AI, AE; 24 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 25 | ++AI, ++Idx) 26 | AI->setName(FunArgs[Idx]); 27 | } 28 | 29 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 30 | return BasicBlock::Create(Context, Name, fooFunc); 31 | } 32 | 33 | GlobalVariable *createGlob(IRBuilder<> &Builder, std::string Name) { 34 | ModuleOb->getOrInsertGlobal(Name, Builder.getInt32Ty()); 35 | GlobalVariable *gVar = ModuleOb->getNamedGlobal(Name); 36 | gVar->setLinkage(GlobalValue::CommonLinkage); 37 | gVar->setAlignment(4); 38 | return gVar; 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | FunArgs.push_back("a"); 43 | FunArgs.push_back("b"); 44 | static IRBuilder<> Builder(Context); 45 | GlobalVariable *gVar = createGlob(Builder, "x"); 46 | Function *fooFunc = createFunc(Builder, "foo"); 47 | setFuncArgs(fooFunc, FunArgs); 48 | BasicBlock *entry = createBB(fooFunc, "entry"); 49 | Builder.SetInsertPoint(entry); 50 | Builder.CreateRet(Builder.getInt32(0)); 51 | verifyFunction(*fooFunc); 52 | ModuleOb->dump(); 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /Chapter_02_code/function.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | 11 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 12 | FunctionType *funcType = llvm::FunctionType::get(Builder.getInt32Ty(), false); 13 | Function *fooFunc = llvm::Function::Create( 14 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 15 | return fooFunc; 16 | } 17 | 18 | int main(int argc, char *argv[]) { 19 | static IRBuilder<> Builder(Context); 20 | Function *fooFunc = createFunc(Builder, "foo"); 21 | verifyFunction(*fooFunc); 22 | ModuleOb->dump(); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /Chapter_02_code/functionBlock.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | 11 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 12 | FunctionType *funcType = llvm::FunctionType::get(Builder.getInt32Ty(), false); 13 | Function *fooFunc = llvm::Function::Create( 14 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 15 | return fooFunc; 16 | } 17 | 18 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 19 | return BasicBlock::Create(Context, Name, fooFunc); 20 | } 21 | 22 | int main(int argc, char *argv[]) { 23 | static IRBuilder<> Builder(Context); 24 | Function *fooFunc = createFunc(Builder, "foo"); 25 | BasicBlock *entry = createBB(fooFunc, "entry"); 26 | Builder.SetInsertPoint(entry); 27 | verifyFunction(*fooFunc); 28 | ModuleOb->dump(); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /Chapter_02_code/ifelse.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | typedef SmallVector BBList; 12 | typedef SmallVector ValList; 13 | 14 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 15 | std::vector Integers(FunArgs.size(), Type::getInt32Ty(Context)); 16 | FunctionType *funcType = 17 | llvm::FunctionType::get(Builder.getInt32Ty(), Integers, false); 18 | Function *fooFunc = llvm::Function::Create( 19 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 20 | return fooFunc; 21 | } 22 | 23 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 24 | 25 | unsigned Idx = 0; 26 | Function::arg_iterator AI, AE; 27 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 28 | ++AI, ++Idx) 29 | AI->setName(FunArgs[Idx]); 30 | } 31 | 32 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 33 | return BasicBlock::Create(Context, Name, fooFunc); 34 | } 35 | 36 | GlobalVariable *createGlob(IRBuilder<> &Builder, std::string Name) { 37 | ModuleOb->getOrInsertGlobal(Name, Builder.getInt32Ty()); 38 | GlobalVariable *gVar = ModuleOb->getNamedGlobal(Name); 39 | gVar->setLinkage(GlobalValue::CommonLinkage); 40 | gVar->setAlignment(4); 41 | return gVar; 42 | } 43 | 44 | Value *createArith(IRBuilder<> &Builder, Value *L, Value *R) { 45 | return Builder.CreateMul(L, R, "multmp"); 46 | } 47 | 48 | Value *createIfElse(IRBuilder<> &Builder, BBList List, ValList VL) { 49 | Value *Condtn = VL[0]; 50 | Value *Arg1 = VL[1]; 51 | BasicBlock *ThenBB = List[0]; 52 | BasicBlock *ElseBB = List[1]; 53 | BasicBlock *MergeBB = List[2]; 54 | Builder.CreateCondBr(Condtn, ThenBB, ElseBB); 55 | 56 | Builder.SetInsertPoint(ThenBB); 57 | Value *ThenVal = Builder.CreateAdd(Arg1, Builder.getInt32(1), "thenaddtmp"); 58 | Builder.CreateBr(MergeBB); 59 | 60 | Builder.SetInsertPoint(ElseBB); 61 | Value *ElseVal = Builder.CreateAdd(Arg1, Builder.getInt32(2), "elseaddtmp"); 62 | Builder.CreateBr(MergeBB); 63 | 64 | unsigned PhiBBSize = List.size() - 1; 65 | Builder.SetInsertPoint(MergeBB); 66 | PHINode *Phi = Builder.CreatePHI(Type::getInt32Ty(getGlobalContext()), 67 | PhiBBSize, "iftmp"); 68 | Phi->addIncoming(ThenVal, ThenBB); 69 | Phi->addIncoming(ElseVal, ElseBB); 70 | 71 | return Phi; 72 | } 73 | 74 | int main(int argc, char *argv[]) { 75 | FunArgs.push_back("a"); 76 | FunArgs.push_back("b"); 77 | static IRBuilder<> Builder(Context); 78 | GlobalVariable *gVar = createGlob(Builder, "x"); 79 | Function *fooFunc = createFunc(Builder, "foo"); 80 | setFuncArgs(fooFunc, FunArgs); 81 | BasicBlock *entry = createBB(fooFunc, "entry"); 82 | Builder.SetInsertPoint(entry); 83 | Function::arg_iterator Args = fooFunc->arg_begin(); 84 | Value *Arg1 = &*Args++; 85 | Value *constant = Builder.getInt32(16); 86 | Value *val = createArith(Builder, Arg1, constant); 87 | 88 | Value *val2 = Builder.getInt32(100); 89 | Value *Compare = Builder.CreateICmpULT(val, val2, "cmptmp"); 90 | //Value *Condtn = Builder.CreateICmpNE(Compare, Builder.getInt32(0), "ifcond"); 91 | 92 | ValList VL; 93 | VL.push_back(Compare); 94 | VL.push_back(Arg1); 95 | 96 | BasicBlock *ThenBB = createBB(fooFunc, "then"); 97 | BasicBlock *ElseBB = createBB(fooFunc, "else"); 98 | BasicBlock *MergeBB = createBB(fooFunc, "ifcont"); 99 | BBList List; 100 | List.push_back(ThenBB); 101 | List.push_back(ElseBB); 102 | List.push_back(MergeBB); 103 | 104 | Value *v = createIfElse(Builder, List, VL); 105 | 106 | Builder.CreateRet(v); 107 | verifyFunction(*fooFunc); 108 | ModuleOb->dump(); 109 | return 0; 110 | } 111 | -------------------------------------------------------------------------------- /Chapter_02_code/loop.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | typedef SmallVector BBList; 9 | typedef SmallVector ValList; 10 | 11 | static LLVMContext &Context = getGlobalContext(); 12 | static Module *ModuleOb = new Module("my compiler", Context); 13 | static std::vector FunArgs; 14 | 15 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 16 | std::vector Integers(FunArgs.size(), Type::getInt32Ty(Context)); 17 | FunctionType *funcType = 18 | llvm::FunctionType::get(Builder.getInt32Ty(), Integers, false); 19 | Function *fooFunc = llvm::Function::Create( 20 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 21 | return fooFunc; 22 | } 23 | 24 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 25 | 26 | unsigned Idx = 0; 27 | Function::arg_iterator AI, AE; 28 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 29 | ++AI, ++Idx) 30 | AI->setName(FunArgs[Idx]); 31 | } 32 | 33 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 34 | return BasicBlock::Create(Context, Name, fooFunc); 35 | } 36 | 37 | GlobalVariable *createGlob(IRBuilder<> &Builder, std::string Name) { 38 | ModuleOb->getOrInsertGlobal(Name, Builder.getInt32Ty()); 39 | GlobalVariable *gVar = ModuleOb->getNamedGlobal(Name); 40 | gVar->setLinkage(GlobalValue::CommonLinkage); 41 | gVar->setAlignment(4); 42 | return gVar; 43 | } 44 | 45 | Value *createArith(IRBuilder<> &Builder, Value *L, Value *R) { 46 | return Builder.CreateMul(L, R, "multmp"); 47 | } 48 | 49 | Value *createLoop(IRBuilder<> &Builder, BBList List, ValList VL, 50 | Value *StartVal, Value *EndVal) { 51 | BasicBlock *PreheaderBB = Builder.GetInsertBlock(); 52 | Value *val = VL[0]; 53 | BasicBlock *LoopBB = List[0]; 54 | Builder.CreateBr(LoopBB); 55 | Builder.SetInsertPoint(LoopBB); 56 | PHINode *IndVar = Builder.CreatePHI(Type::getInt32Ty(Context), 2, "i"); 57 | IndVar->addIncoming(StartVal, PreheaderBB); 58 | Value *Add = Builder.CreateAdd(val, Builder.getInt32(5), "addtmp"); 59 | Value *StepVal = Builder.getInt32(1); 60 | Value *NextVal = Builder.CreateAdd(IndVar, StepVal, "nextval"); 61 | Value *EndCond = Builder.CreateICmpULT(IndVar, EndVal, "endcond"); 62 | EndCond = Builder.CreateICmpNE(EndCond, Builder.getInt32(0), "loopcond"); 63 | BasicBlock *LoopEndBB = Builder.GetInsertBlock(); 64 | BasicBlock *AfterBB = List[1]; 65 | Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 66 | Builder.SetInsertPoint(AfterBB); 67 | IndVar->addIncoming(NextVal, LoopEndBB); 68 | return Add; 69 | } 70 | 71 | int main(int argc, char *argv[]) { 72 | FunArgs.push_back("a"); 73 | FunArgs.push_back("b"); 74 | static IRBuilder<> Builder(Context); 75 | GlobalVariable *gVar = createGlob(Builder, "x"); 76 | Function *fooFunc = createFunc(Builder, "foo"); 77 | setFuncArgs(fooFunc, FunArgs); 78 | BasicBlock *entry = createBB(fooFunc, "entry"); 79 | Builder.SetInsertPoint(entry); 80 | Function::arg_iterator AI = fooFunc->arg_begin(); 81 | Value *Arg1 = AI++; 82 | Value *Arg2 = AI; 83 | Value *constant = Builder.getInt32(16); 84 | Value *val = createArith(Builder, Arg1, constant); 85 | 86 | ValList VL; 87 | VL.push_back(Arg1); 88 | 89 | BBList List; 90 | 91 | BasicBlock *LoopBB = createBB(fooFunc, "loop"); 92 | BasicBlock *AfterBB = createBB(fooFunc, "afterloop"); 93 | List.push_back(LoopBB); 94 | List.push_back(AfterBB); 95 | Value *StartVal = Builder.getInt32(1); 96 | Value *Res = createLoop(Builder, List, VL, StartVal, Arg2); 97 | 98 | Builder.CreateRet(Res); 99 | verifyFunction(*fooFunc); 100 | ModuleOb->dump(); 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /Chapter_02_code/module.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/LLVMContext.h" 2 | #include "llvm/IR/Module.h" 3 | using namespace llvm; 4 | static LLVMContext &Context = getGlobalContext(); 5 | static Module *ModuleOb = new Module("my compiler", Context); 6 | 7 | int main(int argc, char *argv[]) { 8 | ModuleOb->dump(); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /Chapter_02_code/return.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | 11 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 12 | FunctionType *funcType = llvm::FunctionType::get(Builder.getInt32Ty(), false); 13 | Function *fooFunc = llvm::Function::Create( 14 | funcType, llvm::Function::ExternalLinkage, Name, ModuleOb); 15 | return fooFunc; 16 | } 17 | 18 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 19 | return BasicBlock::Create(Context, Name, fooFunc); 20 | } 21 | 22 | GlobalVariable *createGlob(IRBuilder<> &Builder, std::string Name) { 23 | ModuleOb->getOrInsertGlobal(Name, Builder.getInt32Ty()); 24 | GlobalVariable *gVar = ModuleOb->getNamedGlobal(Name); 25 | gVar->setLinkage(GlobalValue::CommonLinkage); 26 | gVar->setAlignment(4); 27 | return gVar; 28 | } 29 | 30 | int main(int argc, char *argv[]) { 31 | static IRBuilder<> Builder(Context); 32 | GlobalVariable *gVar = createGlob(Builder, "x"); 33 | Function *fooFunc = createFunc(Builder, "foo"); 34 | BasicBlock *entry = createBB(fooFunc, "entry"); 35 | Builder.SetInsertPoint(entry); 36 | Builder.CreateRet(Builder.getInt32(0)); 37 | verifyFunction(*fooFunc); 38 | ModuleOb->dump(); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /Chapter_02_code/toy.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/STLExtras.h" 2 | #include "llvm/Analysis/Passes.h" 3 | #include "llvm/IR/DataLayout.h" 4 | #include "llvm/IR/DerivedTypes.h" 5 | #include "llvm/IR/IRBuilder.h" 6 | #include "llvm/IR/LLVMContext.h" 7 | #include "llvm/IR/LegacyPassManager.h" 8 | #include "llvm/IR/Module.h" 9 | #include "llvm/IR/Verifier.h" 10 | #include "llvm/Support/TargetSelect.h" 11 | #include "llvm/Transforms/Scalar.h" 12 | #include 13 | using namespace llvm; 14 | int main(int argc, char* argv[]) { 15 | LLVMContext &Context = getGlobalContext(); 16 | Module* Module_Ob = new Module("my compiler", Context); 17 | static IRBuilder<> builder(Context); 18 | Module_Ob->getOrInsertGlobal("x", builder.getInt32Ty()); 19 | GlobalVariable* gVar = Module_Ob->getNamedGlobal("x"); 20 | gVar->setLinkage(GlobalValue::CommonLinkage); 21 | gVar->setAlignment(4); 22 | 23 | std::vector Args; 24 | Args.push_back("a"); 25 | Args.push_back("b"); 26 | 27 | std::vector Integers(Args.size(),Type::getInt32Ty(Context)); 28 | FunctionType *funcType = llvm::FunctionType::get(builder.getInt32Ty(), Integers, false); 29 | Function *fooFunc = llvm::Function::Create(funcType, llvm::Function::ExternalLinkage, "foo", Module_Ob); 30 | 31 | unsigned Idx = 0; 32 | Function::arg_iterator AI, AE; 33 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 34 | ++AI, ++Idx) 35 | AI->setName(Args[Idx]); 36 | 37 | BasicBlock *entry = llvm::BasicBlock::Create(Context, "entry", fooFunc); 38 | builder.SetInsertPoint(entry); 39 | 40 | Value *L = fooFunc->arg_begin(); 41 | Value *R = ConstantInt::get(Type::getInt32Ty(Context), 16); 42 | builder.CreateMul(L, R, "multmp"); 43 | R = ConstantInt::get(Type::getInt32Ty(Context),100); 44 | L = builder.CreateICmpULT(L, R, "cmptmp"); 45 | //Value *Condtn = builder.CreateZExt(L, Type::getInt32Ty(Context),"booltmp"); 46 | 47 | Value *Condtn = builder.CreateICmpNE(L, builder.getInt32(0), "ifcond"); 48 | BasicBlock *ThenBB = BasicBlock::Create(Context, "then", fooFunc); 49 | BasicBlock *ElseBB = BasicBlock::Create(Context,"else"); 50 | BasicBlock *MergeBB = BasicBlock::Create(Context,"ifcont"); 51 | builder.CreateCondBr(Condtn, ThenBB, ElseBB); 52 | builder.SetInsertPoint(ThenBB); 53 | Value *ThenVal = builder.CreateAdd(gVar,ConstantInt::get(Type::getInt32Ty(Context),1),"thenaddtmp"); 54 | builder.CreateBr(MergeBB); 55 | //ThenBB = builder.GetInsertBlock(); 56 | fooFunc->getBasicBlockList().push_back(ElseBB); 57 | builder.SetInsertPoint(ElseBB); 58 | Value *ElseVal = builder.CreateAdd(gVar,ConstantInt::get(Type::getInt32Ty(Context),2),"elseaddtmp"); 59 | builder.CreateBr(MergeBB); 60 | //ElseBB = builder.GetInsertBlock(); 61 | fooFunc->getBasicBlockList().push_back(ThenBB); 62 | fooFunc->getBasicBlockList().push_back(ElseBB); 63 | fooFunc->getBasicBlockList().push_back(MergeBB); 64 | builder.SetInsertPoint(MergeBB); 65 | PHINode *Phi = builder.CreatePHI(Type::getInt32Ty(getGlobalContext()), 2, "iftmp"); 66 | Phi->addIncoming(ThenVal, ThenBB); 67 | Phi->addIncoming(ElseVal, ElseBB); 68 | 69 | 70 | builder.CreateRet(ConstantInt::get(Type::getInt32Ty(getGlobalContext()),0)); 71 | verifyFunction(*fooFunc); 72 | Module_Ob->dump(); 73 | return 0; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /Chapter_02_code/toy1.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/ADT/STLExtras.h" 2 | #include "llvm/Analysis/Passes.h" 3 | #include "llvm/IR/DataLayout.h" 4 | #include "llvm/IR/DerivedTypes.h" 5 | #include "llvm/IR/IRBuilder.h" 6 | #include "llvm/IR/LLVMContext.h" 7 | #include "llvm/IR/LegacyPassManager.h" 8 | #include "llvm/IR/Module.h" 9 | #include "llvm/IR/Verifier.h" 10 | #include "llvm/Support/TargetSelect.h" 11 | #include "llvm/Transforms/Scalar.h" 12 | #include 13 | using namespace llvm; 14 | int main(int argc, char* argv[]) { 15 | LLVMContext &Context = getGlobalContext(); 16 | Module* Module_Ob = new Module("my compiler", Context); 17 | static IRBuilder<> builder(Context); 18 | Module_Ob->getOrInsertGlobal("x", builder.getInt32Ty()); 19 | GlobalVariable* gVar = Module_Ob->getNamedGlobal("x"); 20 | gVar->setLinkage(GlobalValue::CommonLinkage); 21 | gVar->setAlignment(4); 22 | 23 | std::vector Args; 24 | Args.push_back("a"); 25 | Args.push_back("b"); 26 | 27 | std::vector Integers(Args.size(),Type::getInt32Ty(Context)); 28 | FunctionType *funcType = llvm::FunctionType::get(builder.getInt32Ty(), Integers, false); 29 | Function *fooFunc = llvm::Function::Create(funcType, llvm::Function::ExternalLinkage, "foo", Module_Ob); 30 | 31 | unsigned Idx = 0; 32 | Function::arg_iterator AI; 33 | for (AI = fooFunc->arg_begin(); Idx != Args.size(); 34 | ++AI, ++Idx) 35 | AI->setName(Args[Idx]); 36 | 37 | BasicBlock *entry = llvm::BasicBlock::Create(Context, "entry", fooFunc); 38 | builder.SetInsertPoint(entry); 39 | 40 | Value *L = fooFunc->arg_begin(); 41 | Value *R = ConstantInt::get(Type::getInt32Ty(Context), 16); 42 | builder.CreateMul(L, R, "multmp"); 43 | 44 | Value *StartVal = ConstantInt::get(Type::getInt32Ty(Context),1); 45 | BasicBlock *PreheaderBB = builder.GetInsertBlock(); 46 | BasicBlock *LoopBB = BasicBlock::Create(Context, "loop", fooFunc); 47 | builder.CreateBr(LoopBB); 48 | builder.SetInsertPoint(LoopBB); 49 | PHINode *Variable = builder.CreatePHI(Type::getInt32Ty(Context), 2, "i"); 50 | Variable->addIncoming(StartVal, PreheaderBB); 51 | 52 | 53 | Value *StepVal = ConstantInt::get(Type::getInt32Ty(Context), 1); 54 | Value *NextVar = builder.CreateAdd(Variable, StepVal,"nextvar"); 55 | Value *EndCond = builder.CreateICmpULT(Variable, fooFunc->arg_begin(), "endcond"); 56 | EndCond = builder.CreateICmpNE(EndCond, ConstantInt::get(Type::getInt32Ty(Context), 0), "loopcond"); 57 | BasicBlock *LoopEndBB = builder.GetInsertBlock(); 58 | BasicBlock *AfterBB = BasicBlock::Create(Context, "afterloop",fooFunc); 59 | builder.CreateCondBr(EndCond, LoopBB, AfterBB); 60 | builder.SetInsertPoint(AfterBB); 61 | Variable->addIncoming(NextVar, LoopEndBB); 62 | 63 | builder.CreateRet(ConstantInt::get(Type::getInt32Ty(getGlobalContext()),0)); 64 | Module_Ob->dump(); 65 | return 0; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /Chapter_03_code/README: -------------------------------------------------------------------------------- 1 | To compile the program on linux, run the following command 2 | 3 | $ clang++ gep.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 4 | $ ./toy 5 | 6 | $ clang++ load.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 7 | $ ./toy 8 | 9 | $ clang++ store.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 10 | $ ./toy 11 | 12 | $ clang++ insert.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 13 | $ ./toy 14 | 15 | $ clang++ extract.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -fno-rtti -o toy 16 | $ ./toy 17 | 18 | 19 | -------------------------------------------------------------------------------- /Chapter_03_code/extract.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | Type *u32Ty = Type::getInt32Ty(Context); 14 | Type *vecTy = VectorType::get(u32Ty, 4); 15 | FunctionType *funcType = 16 | FunctionType::get(Builder.getInt32Ty(), vecTy, false); 17 | Function *fooFunc = 18 | Function::Create(funcType, Function::ExternalLinkage, Name, ModuleOb); 19 | return fooFunc; 20 | } 21 | 22 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 23 | unsigned Idx = 0; 24 | Function::arg_iterator AI, AE; 25 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 26 | ++AI, ++Idx) 27 | AI->setName(FunArgs[Idx]); 28 | } 29 | 30 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 31 | return BasicBlock::Create(Context, Name, fooFunc); 32 | } 33 | 34 | Value *createArith(IRBuilder<> &Builder, Value *L, Value *R) { 35 | return Builder.CreateAdd(L, R, "add"); 36 | } 37 | 38 | Value *getExtractElement(IRBuilder<> &Builder, Value *Vec, Value *Index) { 39 | return Builder.CreateExtractElement(Vec, Index); 40 | } 41 | 42 | int main(int argc, char *argv[]) { 43 | FunArgs.push_back("a"); 44 | static IRBuilder<> Builder(Context); 45 | Function *fooFunc = createFunc(Builder, "foo"); 46 | setFuncArgs(fooFunc, FunArgs); 47 | 48 | BasicBlock *entry = createBB(fooFunc, "entry"); 49 | Builder.SetInsertPoint(entry); 50 | 51 | Value *Vec = fooFunc->arg_begin(); 52 | SmallVector V; 53 | for (unsigned int i = 0; i < 4; i++) 54 | V[i] = getExtractElement(Builder, Vec, Builder.getInt32(i)); 55 | 56 | Value *add1 = createArith(Builder, V[0], V[1]); 57 | Value *add2 = createArith(Builder, add1, V[2]); 58 | Value *add = createArith(Builder, add2, V[3]); 59 | 60 | Builder.CreateRet(add); 61 | verifyFunction(*fooFunc); 62 | ModuleOb->dump(); 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /Chapter_03_code/gep.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | Type *u32Ty = Type::getInt32Ty(Context); 14 | Type *vecTy = VectorType::get(u32Ty, 2); 15 | Type *ptrTy = vecTy->getPointerTo(0); 16 | FunctionType *funcType = 17 | FunctionType::get(Builder.getInt32Ty(), ptrTy, false); 18 | Function *fooFunc = 19 | Function::Create(funcType, Function::ExternalLinkage, Name, ModuleOb); 20 | return fooFunc; 21 | } 22 | 23 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 24 | unsigned Idx = 0; 25 | Function::arg_iterator AI, AE; 26 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 27 | ++AI, ++Idx) 28 | AI->setName(FunArgs[Idx]); 29 | } 30 | 31 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 32 | return BasicBlock::Create(Context, Name, fooFunc); 33 | } 34 | 35 | Value *getGEP(IRBuilder<> &Builder, Value *Base, Value *Offset) { 36 | return Builder.CreateGEP(Builder.getInt32Ty(), Base, Offset, "a1"); 37 | } 38 | 39 | int main(int argc, char *argv[]) { 40 | FunArgs.push_back("a"); 41 | static IRBuilder<> Builder(Context); 42 | Function *fooFunc = createFunc(Builder, "foo"); 43 | setFuncArgs(fooFunc, FunArgs); 44 | Value *Base = fooFunc->arg_begin(); 45 | BasicBlock *entry = createBB(fooFunc, "entry"); 46 | Builder.SetInsertPoint(entry); 47 | Value *gep = getGEP(Builder, Base, Builder.getInt32(1)); 48 | Builder.CreateRet(Builder.getInt32(0)); 49 | verifyFunction(*fooFunc); 50 | ModuleOb->dump(); 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /Chapter_03_code/insert.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | Type *u32Ty = Type::getInt32Ty(Context); 14 | Type *vecTy = VectorType::get(u32Ty, 4); 15 | FunctionType *funcType = 16 | FunctionType::get(Builder.getInt32Ty(), vecTy, false); 17 | Function *fooFunc = 18 | Function::Create(funcType, Function::ExternalLinkage, Name, ModuleOb); 19 | return fooFunc; 20 | } 21 | 22 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 23 | unsigned Idx = 0; 24 | Function::arg_iterator AI, AE; 25 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 26 | ++AI, ++Idx) 27 | AI->setName(FunArgs[Idx]); 28 | } 29 | 30 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 31 | return BasicBlock::Create(Context, Name, fooFunc); 32 | } 33 | 34 | Value *getInsertElement(IRBuilder<> &Builder, Value *Vec, Value *Val, 35 | Value *Index) { 36 | return Builder.CreateInsertElement(Vec, Val, Index); 37 | } 38 | 39 | int main(int argc, char *argv[]) { 40 | FunArgs.push_back("a"); 41 | static IRBuilder<> Builder(Context); 42 | Function *fooFunc = createFunc(Builder, "foo"); 43 | setFuncArgs(fooFunc, FunArgs); 44 | 45 | BasicBlock *entry = createBB(fooFunc, "entry"); 46 | Builder.SetInsertPoint(entry); 47 | 48 | Value *Vec = fooFunc->arg_begin(); 49 | for (unsigned int i = 0; i < 4; i++) 50 | Value *V = getInsertElement(Builder, Vec, Builder.getInt32((i + 1) * 10), 51 | Builder.getInt32(i)); 52 | 53 | Builder.CreateRet(Builder.getInt32(0)); 54 | verifyFunction(*fooFunc); 55 | ModuleOb->dump(); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /Chapter_03_code/load.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | Type *u32Ty = Type::getInt32Ty(Context); 14 | Type *vecTy = VectorType::get(u32Ty, 2); 15 | Type *ptrTy = vecTy->getPointerTo(0); 16 | FunctionType *funcType = 17 | FunctionType::get(Builder.getInt32Ty(), ptrTy, false); 18 | Function *fooFunc = 19 | Function::Create(funcType, Function::ExternalLinkage, Name, ModuleOb); 20 | return fooFunc; 21 | } 22 | 23 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 24 | unsigned Idx = 0; 25 | Function::arg_iterator AI, AE; 26 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 27 | ++AI, ++Idx) 28 | AI->setName(FunArgs[Idx]); 29 | } 30 | 31 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 32 | return BasicBlock::Create(Context, Name, fooFunc); 33 | } 34 | 35 | Value *getGEP(IRBuilder<> &Builder, Value *Base, Value *Offset) { 36 | return Builder.CreateGEP(Builder.getInt32Ty(), Base, Offset, "a1"); 37 | } 38 | 39 | Value *getLoad(IRBuilder<> &Builder, Value *Address) { 40 | return Builder.CreateLoad(Address, "load"); 41 | } 42 | 43 | int main(int argc, char *argv[]) { 44 | FunArgs.push_back("a"); 45 | static IRBuilder<> Builder(Context); 46 | Function *fooFunc = createFunc(Builder, "foo"); 47 | setFuncArgs(fooFunc, FunArgs); 48 | Value *Base = fooFunc->arg_begin(); 49 | BasicBlock *entry = createBB(fooFunc, "entry"); 50 | Builder.SetInsertPoint(entry); 51 | Value *gep = getGEP(Builder, Base, Builder.getInt32(1)); 52 | Value *load = getLoad(Builder, gep); 53 | Builder.CreateRet(load); 54 | verifyFunction(*fooFunc); 55 | ModuleOb->dump(); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /Chapter_03_code/store.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/IR/IRBuilder.h" 2 | #include "llvm/IR/LLVMContext.h" 3 | #include "llvm/IR/Module.h" 4 | #include "llvm/IR/Verifier.h" 5 | #include 6 | using namespace llvm; 7 | 8 | static LLVMContext &Context = getGlobalContext(); 9 | static Module *ModuleOb = new Module("my compiler", Context); 10 | static std::vector FunArgs; 11 | 12 | Function *createFunc(IRBuilder<> &Builder, std::string Name) { 13 | Type *u32Ty = Type::getInt32Ty(Context); 14 | Type *vecTy = VectorType::get(u32Ty, 2); 15 | Type *ptrTy = vecTy->getPointerTo(0); 16 | FunctionType *funcType = 17 | FunctionType::get(Builder.getInt32Ty(), ptrTy, false); 18 | Function *fooFunc = 19 | Function::Create(funcType, Function::ExternalLinkage, Name, ModuleOb); 20 | return fooFunc; 21 | } 22 | 23 | void setFuncArgs(Function *fooFunc, std::vector FunArgs) { 24 | unsigned Idx = 0; 25 | Function::arg_iterator AI, AE; 26 | for (AI = fooFunc->arg_begin(), AE = fooFunc->arg_end(); AI != AE; 27 | ++AI, ++Idx) 28 | AI->setName(FunArgs[Idx]); 29 | } 30 | 31 | BasicBlock *createBB(Function *fooFunc, std::string Name) { 32 | return BasicBlock::Create(Context, Name, fooFunc); 33 | } 34 | 35 | Value *createArith(IRBuilder<> &Builder, Value *L, Value *R) { 36 | return Builder.CreateMul(L, R, "multmp"); 37 | } 38 | 39 | Value *getGEP(IRBuilder<> &Builder, Value *Base, Value *Offset) { 40 | return Builder.CreateGEP(Builder.getInt32Ty(), Base, Offset, "a1"); 41 | } 42 | 43 | Value *getLoad(IRBuilder<> &Builder, Value *Address) { 44 | return Builder.CreateLoad(Address, "load"); 45 | } 46 | 47 | void getStore(IRBuilder<> &Builder, Value *Address, Value *V) { 48 | Builder.CreateStore(V, Address); 49 | } 50 | 51 | int main(int argc, char *argv[]) { 52 | FunArgs.push_back("a"); 53 | static IRBuilder<> Builder(Context); 54 | Function *fooFunc = createFunc(Builder, "foo"); 55 | setFuncArgs(fooFunc, FunArgs); 56 | Value *Base = fooFunc->arg_begin(); 57 | BasicBlock *entry = createBB(fooFunc, "entry"); 58 | Builder.SetInsertPoint(entry); 59 | Value *gep = getGEP(Builder, Base, Builder.getInt32(1)); 60 | Value *load = getLoad(Builder, gep); 61 | Value *constant = Builder.getInt32(16); 62 | Value *val = createArith(Builder, load, constant); 63 | getStore(Builder, gep, val); 64 | Builder.CreateRet(val); 65 | verifyFunction(*fooFunc); 66 | ModuleOb->dump(); 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /Chapter_04_code/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(Utils) 2 | add_subdirectory(Instrumentation) 3 | add_subdirectory(InstCombine) 4 | add_subdirectory(Scalar) 5 | add_subdirectory(IPO) 6 | add_subdirectory(Vectorize) 7 | add_subdirectory(Hello) 8 | add_subdirectory(ObjCARC) 9 | add_subdirectory(FnNamePrint) 10 | -------------------------------------------------------------------------------- /Chapter_04_code/FnNamePrint/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # If we don't need RTTI or EH, there's no reason to export anything 2 | # from the FnNamePrint plugin. 3 | if( NOT LLVM_REQUIRES_RTTI ) 4 | if( NOT LLVM_REQUIRES_EH ) 5 | set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/FnNamePrint.exports) 6 | endif() 7 | endif() 8 | 9 | if(WIN32 OR CYGWIN) 10 | set(LLVM_LINK_COMPONENTS Core Support) 11 | endif() 12 | 13 | add_llvm_loadable_module( LLVMFnNamePrint 14 | FnNamePrint.cpp 15 | 16 | DEPENDS 17 | intrinsics_gen 18 | ) 19 | -------------------------------------------------------------------------------- /Chapter_04_code/FnNamePrint/FnNamePrint.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/Pass.h" 2 | #include "llvm/IR/Function.h" 3 | #include "llvm/Support/raw_ostream.h" 4 | 5 | using namespace llvm; 6 | 7 | namespace { 8 | struct FnNamePrint: public FunctionPass { 9 | static char ID; 10 | FnNamePrint () : FunctionPass(ID) {} 11 | bool runOnFunction(Function &F) override { 12 | errs() << "Function " << F.getName() << '\n'; 13 | return false; 14 | } 15 | }; 16 | } 17 | 18 | char FnNamePrint::ID = 0; 19 | static RegisterPass< FnNamePrint > X("funcnameprint", 20 | "Function Name Print", false, false); 21 | -------------------------------------------------------------------------------- /Chapter_04_code/FnNamePrint/FnNamePrint.exports: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elongbug/llvm-essentials-book/15df77ca2796e4077157698254d30c688b5138e8/Chapter_04_code/FnNamePrint/FnNamePrint.exports -------------------------------------------------------------------------------- /Chapter_04_code/FnNamePrint/Makefile: -------------------------------------------------------------------------------- 1 | ##===- lib/Transforms/FnNamePrint/Makefile ----------------*- Makefile -*-===## 2 | # 3 | # The LLVM Compiler Infrastructure 4 | # 5 | # This file is distributed under the University of Illinois Open Source 6 | # License. See LICENSE.TXT for details. 7 | # 8 | ##===----------------------------------------------------------------------===## 9 | 10 | LEVEL = ../../.. 11 | LIBRARYNAME = LLVMFnNamePrint 12 | LOADABLE_MODULE = 1 13 | USEDLIBS = 14 | 15 | # If we don't need RTTI or EH, there's no reason to export anything 16 | # from the hello plugin. 17 | ifneq ($(REQUIRES_RTTI), 1) 18 | ifneq ($(REQUIRES_EH), 1) 19 | EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/FnNamePrint.exports 20 | endif 21 | endif 22 | 23 | include $(LEVEL)/Makefile.common 24 | 25 | -------------------------------------------------------------------------------- /Chapter_04_code/InstCombineAndOrXor.cpp: -------------------------------------------------------------------------------- 1 | //===- InstCombineAndOrXor.cpp --------------------------------------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | // This file implements the visitAnd, visitOr, and visitXor functions. 11 | // 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "InstCombineInternal.h" 15 | #include "llvm/Analysis/InstructionSimplify.h" 16 | #include "llvm/IR/ConstantRange.h" 17 | #include "llvm/IR/Intrinsics.h" 18 | #include "llvm/IR/PatternMatch.h" 19 | #include "llvm/Transforms/Utils/CmpInstAnalysis.h" 20 | using namespace llvm; 21 | using namespace PatternMatch; 22 | 23 | #define DEBUG_TYPE "instcombine" 24 | 25 | static inline Value *dyn_castNotVal(Value *V) { 26 | // If this is not(not(x)) don't return that this is a not: we want the two 27 | // not's to be folded first. 28 | if (BinaryOperator::isNot(V)) { 29 | Value *Operand = BinaryOperator::getNotArgument(V); 30 | if (!IsFreeToInvert(Operand, Operand->hasOneUse())) 31 | return Operand; 32 | } 33 | 34 | // Constants can be considered to be not'ed values... 35 | if (ConstantInt *C = dyn_cast(V)) 36 | return ConstantInt::get(C->getType(), ~C->getValue()); 37 | return nullptr; 38 | } 39 | 40 | /// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into 41 | /// a three bit mask. It also returns whether it is an ordered predicate by 42 | /// reference. 43 | static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { 44 | isOrdered = false; 45 | switch (CC) { 46 | case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 47 | case FCmpInst::FCMP_UNO: return 0; // 000 48 | case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 49 | case FCmpInst::FCMP_UGT: return 1; // 001 50 | case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 51 | case FCmpInst::FCMP_UEQ: return 2; // 010 52 | case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 53 | case FCmpInst::FCMP_UGE: return 3; // 011 54 | case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 55 | case FCmpInst::FCMP_ULT: return 4; // 100 56 | case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 57 | case FCmpInst::FCMP_UNE: return 5; // 101 58 | case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 59 | case FCmpInst::FCMP_ULE: return 6; // 110 60 | // True -> 7 61 | default: 62 | // Not expecting FCMP_FALSE and FCMP_TRUE; 63 | llvm_unreachable("Unexpected FCmp predicate!"); 64 | } 65 | } 66 | 67 | /// This is the complement of getICmpCode, which turns an opcode and two 68 | /// operands into either a constant true or false, or a brand new ICmp 69 | /// instruction. The sign is passed in to determine which kind of predicate to 70 | /// use in the new icmp instruction. 71 | static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, 72 | InstCombiner::BuilderTy *Builder) { 73 | ICmpInst::Predicate NewPred; 74 | if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) 75 | return NewConstant; 76 | return Builder->CreateICmp(NewPred, LHS, RHS); 77 | } 78 | 79 | /// This is the complement of getFCmpCode, which turns an opcode and two 80 | /// operands into either a FCmp instruction. isordered is passed in to determine 81 | /// which kind of predicate to use in the new fcmp instruction. 82 | static Value *getFCmpValue(bool isordered, unsigned code, 83 | Value *LHS, Value *RHS, 84 | InstCombiner::BuilderTy *Builder) { 85 | CmpInst::Predicate Pred; 86 | switch (code) { 87 | default: llvm_unreachable("Illegal FCmp code!"); 88 | case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break; 89 | case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break; 90 | case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break; 91 | case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break; 92 | case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; 93 | case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; 94 | case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; 95 | case 7: 96 | if (!isordered) 97 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); 98 | Pred = FCmpInst::FCMP_ORD; break; 99 | } 100 | return Builder->CreateFCmp(Pred, LHS, RHS); 101 | } 102 | 103 | /// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B)) 104 | /// \param I Binary operator to transform. 105 | /// \return Pointer to node that must replace the original binary operator, or 106 | /// null pointer if no transformation was made. 107 | Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { 108 | IntegerType *ITy = dyn_cast(I.getType()); 109 | 110 | // Can't do vectors. 111 | if (I.getType()->isVectorTy()) return nullptr; 112 | 113 | // Can only do bitwise ops. 114 | unsigned Op = I.getOpcode(); 115 | if (Op != Instruction::And && Op != Instruction::Or && 116 | Op != Instruction::Xor) 117 | return nullptr; 118 | 119 | Value *OldLHS = I.getOperand(0); 120 | Value *OldRHS = I.getOperand(1); 121 | ConstantInt *ConstLHS = dyn_cast(OldLHS); 122 | ConstantInt *ConstRHS = dyn_cast(OldRHS); 123 | IntrinsicInst *IntrLHS = dyn_cast(OldLHS); 124 | IntrinsicInst *IntrRHS = dyn_cast(OldRHS); 125 | bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap); 126 | bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap); 127 | 128 | if (!IsBswapLHS && !IsBswapRHS) 129 | return nullptr; 130 | 131 | if (!IsBswapLHS && !ConstLHS) 132 | return nullptr; 133 | 134 | if (!IsBswapRHS && !ConstRHS) 135 | return nullptr; 136 | 137 | /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) 138 | /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) 139 | Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) : 140 | Builder->getInt(ConstLHS->getValue().byteSwap()); 141 | 142 | Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) : 143 | Builder->getInt(ConstRHS->getValue().byteSwap()); 144 | 145 | Value *BinOp = nullptr; 146 | if (Op == Instruction::And) 147 | BinOp = Builder->CreateAnd(NewLHS, NewRHS); 148 | else if (Op == Instruction::Or) 149 | BinOp = Builder->CreateOr(NewLHS, NewRHS); 150 | else //if (Op == Instruction::Xor) 151 | BinOp = Builder->CreateXor(NewLHS, NewRHS); 152 | 153 | Module *M = I.getParent()->getParent()->getParent(); 154 | Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); 155 | return Builder->CreateCall(F, BinOp); 156 | } 157 | 158 | /// This handles expressions of the form ((val OP C1) & C2). Where 159 | /// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is 160 | /// guaranteed to be a binary operator. 161 | Instruction *InstCombiner::OptAndOp(Instruction *Op, 162 | ConstantInt *OpRHS, 163 | ConstantInt *AndRHS, 164 | BinaryOperator &TheAnd) { 165 | Value *X = Op->getOperand(0); 166 | Constant *Together = nullptr; 167 | if (!Op->isShift()) 168 | Together = ConstantExpr::getAnd(AndRHS, OpRHS); 169 | 170 | switch (Op->getOpcode()) { 171 | case Instruction::Xor: 172 | if (Op->hasOneUse()) { 173 | // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) 174 | Value *And = Builder->CreateAnd(X, AndRHS); 175 | And->takeName(Op); 176 | return BinaryOperator::CreateXor(And, Together); 177 | } 178 | break; 179 | case Instruction::Or: 180 | if (Op->hasOneUse()){ 181 | if (Together != OpRHS) { 182 | // (X | C1) & C2 --> (X | (C1&C2)) & C2 183 | Value *Or = Builder->CreateOr(X, Together); 184 | Or->takeName(Op); 185 | return BinaryOperator::CreateAnd(Or, AndRHS); 186 | } 187 | 188 | ConstantInt *TogetherCI = dyn_cast(Together); 189 | if (TogetherCI && !TogetherCI->isZero()){ 190 | // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 191 | // NOTE: This reduces the number of bits set in the & mask, which 192 | // can expose opportunities for store narrowing. 193 | Together = ConstantExpr::getXor(AndRHS, Together); 194 | Value *And = Builder->CreateAnd(X, Together); 195 | And->takeName(Op); 196 | return BinaryOperator::CreateOr(And, OpRHS); 197 | } 198 | } 199 | 200 | break; 201 | case Instruction::Add: 202 | if (Op->hasOneUse()) { 203 | // Adding a one to a single bit bit-field should be turned into an XOR 204 | // of the bit. First thing to check is to see if this AND is with a 205 | // single bit constant. 206 | const APInt &AndRHSV = AndRHS->getValue(); 207 | 208 | // If there is only one bit set. 209 | if (AndRHSV.isPowerOf2()) { 210 | // Ok, at this point, we know that we are masking the result of the 211 | // ADD down to exactly one bit. If the constant we are adding has 212 | // no bits set below this bit, then we can eliminate the ADD. 213 | const APInt& AddRHS = OpRHS->getValue(); 214 | 215 | // Check to see if any bits below the one bit set in AndRHSV are set. 216 | if ((AddRHS & (AndRHSV-1)) == 0) { 217 | // If not, the only thing that can effect the output of the AND is 218 | // the bit specified by AndRHSV. If that bit is set, the effect of 219 | // the XOR is to toggle the bit. If it is clear, then the ADD has 220 | // no effect. 221 | if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop 222 | TheAnd.setOperand(0, X); 223 | return &TheAnd; 224 | } else { 225 | // Pull the XOR out of the AND. 226 | Value *NewAnd = Builder->CreateAnd(X, AndRHS); 227 | NewAnd->takeName(Op); 228 | return BinaryOperator::CreateXor(NewAnd, AndRHS); 229 | } 230 | } 231 | } 232 | } 233 | break; 234 | 235 | case Instruction::Shl: { 236 | // We know that the AND will not produce any of the bits shifted in, so if 237 | // the anded constant includes them, clear them now! 238 | // 239 | uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 240 | uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 241 | APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); 242 | ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask); 243 | 244 | if (CI->getValue() == ShlMask) 245 | // Masking out bits that the shift already masks. 246 | return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. 247 | 248 | if (CI != AndRHS) { // Reducing bits set in and. 249 | TheAnd.setOperand(1, CI); 250 | return &TheAnd; 251 | } 252 | break; 253 | } 254 | case Instruction::LShr: { 255 | // We know that the AND will not produce any of the bits shifted in, so if 256 | // the anded constant includes them, clear them now! This only applies to 257 | // unsigned shifts, because a signed shr may bring in set bits! 258 | // 259 | uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 260 | uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 261 | APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 262 | ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask); 263 | 264 | if (CI->getValue() == ShrMask) 265 | // Masking out bits that the shift already masks. 266 | return ReplaceInstUsesWith(TheAnd, Op); 267 | 268 | if (CI != AndRHS) { 269 | TheAnd.setOperand(1, CI); // Reduce bits set in and cst. 270 | return &TheAnd; 271 | } 272 | break; 273 | } 274 | case Instruction::AShr: 275 | // Signed shr. 276 | // See if this is shifting in some sign extension, then masking it out 277 | // with an and. 278 | if (Op->hasOneUse()) { 279 | uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 280 | uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 281 | APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 282 | Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask); 283 | if (C == AndRHS) { // Masking out bits shifted in. 284 | // (Val ashr C1) & C2 -> (Val lshr C1) & C2 285 | // Make the argument unsigned. 286 | Value *ShVal = Op->getOperand(0); 287 | ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); 288 | return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); 289 | } 290 | } 291 | break; 292 | } 293 | return nullptr; 294 | } 295 | 296 | /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise 297 | /// (V < Lo || V >= Hi). In practice, we emit the more efficient 298 | /// (V-Lo) \(ConstantExpr::getICmp((isSigned ? 304 | ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && 305 | "Lo is not <= Hi in range emission code!"); 306 | 307 | if (Inside) { 308 | if (Lo == Hi) // Trivially false. 309 | return Builder->getFalse(); 310 | 311 | // V >= Min && V < Hi --> V < Hi 312 | if (cast(Lo)->isMinValue(isSigned)) { 313 | ICmpInst::Predicate pred = (isSigned ? 314 | ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); 315 | return Builder->CreateICmp(pred, V, Hi); 316 | } 317 | 318 | // Emit V-Lo CreateAdd(V, NegLo, V->getName()+".off"); 321 | Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); 322 | return Builder->CreateICmpULT(Add, UpperBound); 323 | } 324 | 325 | if (Lo == Hi) // Trivially true. 326 | return Builder->getTrue(); 327 | 328 | // V < Min || V >= Hi -> V > Hi-1 329 | Hi = SubOne(cast(Hi)); 330 | if (cast(Lo)->isMinValue(isSigned)) { 331 | ICmpInst::Predicate pred = (isSigned ? 332 | ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); 333 | return Builder->CreateICmp(pred, V, Hi); 334 | } 335 | 336 | // Emit V-Lo >u Hi-1-Lo 337 | // Note that Hi has already had one subtracted from it, above. 338 | ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); 339 | Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 340 | Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); 341 | return Builder->CreateICmpUGT(Add, LowerBound); 342 | } 343 | 344 | /// Returns true iff Val consists of one contiguous run of 1s with any number 345 | /// of 0s on either side. The 1s are allowed to wrap from LSB to MSB, 346 | /// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is 347 | /// not, since all 1s are not contiguous. 348 | static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { 349 | const APInt& V = Val->getValue(); 350 | uint32_t BitWidth = Val->getType()->getBitWidth(); 351 | if (!APIntOps::isShiftedMask(BitWidth, V)) return false; 352 | 353 | // look for the first zero bit after the run of ones 354 | MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); 355 | // look for the first non-zero bit 356 | ME = V.getActiveBits(); 357 | return true; 358 | } 359 | 360 | /// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines 361 | /// whether the operator is a sub. If we can fold one of the following xforms: 362 | /// 363 | /// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask 364 | /// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 365 | /// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 366 | /// 367 | /// return (A +/- B). 368 | /// 369 | Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, 370 | ConstantInt *Mask, bool isSub, 371 | Instruction &I) { 372 | Instruction *LHSI = dyn_cast(LHS); 373 | if (!LHSI || LHSI->getNumOperands() != 2 || 374 | !isa(LHSI->getOperand(1))) return nullptr; 375 | 376 | ConstantInt *N = cast(LHSI->getOperand(1)); 377 | 378 | switch (LHSI->getOpcode()) { 379 | default: return nullptr; 380 | case Instruction::And: 381 | if (ConstantExpr::getAnd(N, Mask) == Mask) { 382 | // If the AndRHS is a power of two minus one (0+1+), this is simple. 383 | if ((Mask->getValue().countLeadingZeros() + 384 | Mask->getValue().countPopulation()) == 385 | Mask->getValue().getBitWidth()) 386 | break; 387 | 388 | // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ 389 | // part, we don't need any explicit masks to take them out of A. If that 390 | // is all N is, ignore it. 391 | uint32_t MB = 0, ME = 0; 392 | if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive 393 | uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); 394 | APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); 395 | if (MaskedValueIsZero(RHS, Mask, 0, &I)) 396 | break; 397 | } 398 | } 399 | return nullptr; 400 | case Instruction::Or: 401 | case Instruction::Xor: 402 | // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 403 | if ((Mask->getValue().countLeadingZeros() + 404 | Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() 405 | && ConstantExpr::getAnd(N, Mask)->isNullValue()) 406 | break; 407 | return nullptr; 408 | } 409 | 410 | if (isSub) 411 | return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); 412 | return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); 413 | } 414 | 415 | /// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C) 416 | /// One of A and B is considered the mask, the other the value. This is 417 | /// described as the "AMask" or "BMask" part of the enum. If the enum 418 | /// contains only "Mask", then both A and B can be considered masks. 419 | /// If A is the mask, then it was proven, that (A & C) == C. This 420 | /// is trivial if C == A, or C == 0. If both A and C are constants, this 421 | /// proof is also easy. 422 | /// For the following explanations we assume that A is the mask. 423 | /// The part "AllOnes" declares, that the comparison is true only 424 | /// if (A & B) == A, or all bits of A are set in B. 425 | /// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes 426 | /// The part "AllZeroes" declares, that the comparison is true only 427 | /// if (A & B) == 0, or all bits of A are cleared in B. 428 | /// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes 429 | /// The part "Mixed" declares, that (A & B) == C and C might or might not 430 | /// contain any number of one bits and zero bits. 431 | /// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed 432 | /// The Part "Not" means, that in above descriptions "==" should be replaced 433 | /// by "!=". 434 | /// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes 435 | /// If the mask A contains a single bit, then the following is equivalent: 436 | /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) 437 | /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) 438 | enum MaskedICmpType { 439 | FoldMskICmp_AMask_AllOnes = 1, 440 | FoldMskICmp_AMask_NotAllOnes = 2, 441 | FoldMskICmp_BMask_AllOnes = 4, 442 | FoldMskICmp_BMask_NotAllOnes = 8, 443 | FoldMskICmp_Mask_AllZeroes = 16, 444 | FoldMskICmp_Mask_NotAllZeroes = 32, 445 | FoldMskICmp_AMask_Mixed = 64, 446 | FoldMskICmp_AMask_NotMixed = 128, 447 | FoldMskICmp_BMask_Mixed = 256, 448 | FoldMskICmp_BMask_NotMixed = 512 449 | }; 450 | 451 | /// Return the set of pattern classes (from MaskedICmpType) 452 | /// that (icmp SCC (A & B), C) satisfies. 453 | static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 454 | ICmpInst::Predicate SCC) 455 | { 456 | ConstantInt *ACst = dyn_cast(A); 457 | ConstantInt *BCst = dyn_cast(B); 458 | ConstantInt *CCst = dyn_cast(C); 459 | bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); 460 | bool icmp_abit = (ACst && !ACst->isZero() && 461 | ACst->getValue().isPowerOf2()); 462 | bool icmp_bbit = (BCst && !BCst->isZero() && 463 | BCst->getValue().isPowerOf2()); 464 | unsigned result = 0; 465 | if (CCst && CCst->isZero()) { 466 | // if C is zero, then both A and B qualify as mask 467 | result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | 468 | FoldMskICmp_Mask_AllZeroes | 469 | FoldMskICmp_AMask_Mixed | 470 | FoldMskICmp_BMask_Mixed) 471 | : (FoldMskICmp_Mask_NotAllZeroes | 472 | FoldMskICmp_Mask_NotAllZeroes | 473 | FoldMskICmp_AMask_NotMixed | 474 | FoldMskICmp_BMask_NotMixed)); 475 | if (icmp_abit) 476 | result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes | 477 | FoldMskICmp_AMask_NotMixed) 478 | : (FoldMskICmp_AMask_AllOnes | 479 | FoldMskICmp_AMask_Mixed)); 480 | if (icmp_bbit) 481 | result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes | 482 | FoldMskICmp_BMask_NotMixed) 483 | : (FoldMskICmp_BMask_AllOnes | 484 | FoldMskICmp_BMask_Mixed)); 485 | return result; 486 | } 487 | if (A == C) { 488 | result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes | 489 | FoldMskICmp_AMask_Mixed) 490 | : (FoldMskICmp_AMask_NotAllOnes | 491 | FoldMskICmp_AMask_NotMixed)); 492 | if (icmp_abit) 493 | result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | 494 | FoldMskICmp_AMask_NotMixed) 495 | : (FoldMskICmp_Mask_AllZeroes | 496 | FoldMskICmp_AMask_Mixed)); 497 | } else if (ACst && CCst && 498 | ConstantExpr::getAnd(ACst, CCst) == CCst) { 499 | result |= (icmp_eq ? FoldMskICmp_AMask_Mixed 500 | : FoldMskICmp_AMask_NotMixed); 501 | } 502 | if (B == C) { 503 | result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes | 504 | FoldMskICmp_BMask_Mixed) 505 | : (FoldMskICmp_BMask_NotAllOnes | 506 | FoldMskICmp_BMask_NotMixed)); 507 | if (icmp_bbit) 508 | result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | 509 | FoldMskICmp_BMask_NotMixed) 510 | : (FoldMskICmp_Mask_AllZeroes | 511 | FoldMskICmp_BMask_Mixed)); 512 | } else if (BCst && CCst && 513 | ConstantExpr::getAnd(BCst, CCst) == CCst) { 514 | result |= (icmp_eq ? FoldMskICmp_BMask_Mixed 515 | : FoldMskICmp_BMask_NotMixed); 516 | } 517 | return result; 518 | } 519 | 520 | /// Convert an analysis of a masked ICmp into its equivalent if all boolean 521 | /// operations had the opposite sense. Since each "NotXXX" flag (recording !=) 522 | /// is adjacent to the corresponding normal flag (recording ==), this just 523 | /// involves swapping those bits over. 524 | static unsigned conjugateICmpMask(unsigned Mask) { 525 | unsigned NewMask; 526 | NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes | 527 | FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed | 528 | FoldMskICmp_BMask_Mixed)) 529 | << 1; 530 | 531 | NewMask |= 532 | (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes | 533 | FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed | 534 | FoldMskICmp_BMask_NotMixed)) 535 | >> 1; 536 | 537 | return NewMask; 538 | } 539 | 540 | /// Decompose an icmp into the form ((X & Y) pred Z) if possible. 541 | /// The returned predicate is either == or !=. Returns false if 542 | /// decomposition fails. 543 | static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, 544 | Value *&X, Value *&Y, Value *&Z) { 545 | ConstantInt *C = dyn_cast(I->getOperand(1)); 546 | if (!C) 547 | return false; 548 | 549 | switch (I->getPredicate()) { 550 | default: 551 | return false; 552 | case ICmpInst::ICMP_SLT: 553 | // X < 0 is equivalent to (X & SignBit) != 0. 554 | if (!C->isZero()) 555 | return false; 556 | Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); 557 | Pred = ICmpInst::ICMP_NE; 558 | break; 559 | case ICmpInst::ICMP_SGT: 560 | // X > -1 is equivalent to (X & SignBit) == 0. 561 | if (!C->isAllOnesValue()) 562 | return false; 563 | Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); 564 | Pred = ICmpInst::ICMP_EQ; 565 | break; 566 | case ICmpInst::ICMP_ULT: 567 | // X getValue().isPowerOf2()) 569 | return false; 570 | Y = ConstantInt::get(I->getContext(), -C->getValue()); 571 | Pred = ICmpInst::ICMP_EQ; 572 | break; 573 | case ICmpInst::ICMP_UGT: 574 | // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. 575 | if (!(C->getValue() + 1).isPowerOf2()) 576 | return false; 577 | Y = ConstantInt::get(I->getContext(), ~C->getValue()); 578 | Pred = ICmpInst::ICMP_NE; 579 | break; 580 | } 581 | 582 | X = I->getOperand(0); 583 | Z = ConstantInt::getNullValue(C->getType()); 584 | return true; 585 | } 586 | 587 | /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) 588 | /// Return the set of pattern classes (from MaskedICmpType) 589 | /// that both LHS and RHS satisfy. 590 | static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 591 | Value*& B, Value*& C, 592 | Value*& D, Value*& E, 593 | ICmpInst *LHS, ICmpInst *RHS, 594 | ICmpInst::Predicate &LHSCC, 595 | ICmpInst::Predicate &RHSCC) { 596 | if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; 597 | // vectors are not (yet?) supported 598 | if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; 599 | 600 | // Here comes the tricky part: 601 | // LHS might be of the form L11 & L12 == X, X == L21 & L22, 602 | // and L11 & L12 == L21 & L22. The same goes for RHS. 603 | // Now we must find those components L** and R**, that are equal, so 604 | // that we can extract the parameters A, B, C, D, and E for the canonical 605 | // above. 606 | Value *L1 = LHS->getOperand(0); 607 | Value *L2 = LHS->getOperand(1); 608 | Value *L11,*L12,*L21,*L22; 609 | // Check whether the icmp can be decomposed into a bit test. 610 | if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { 611 | L21 = L22 = L1 = nullptr; 612 | } else { 613 | // Look for ANDs in the LHS icmp. 614 | if (!L1->getType()->isIntegerTy()) { 615 | // You can icmp pointers, for example. They really aren't masks. 616 | L11 = L12 = nullptr; 617 | } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { 618 | // Any icmp can be viewed as being trivially masked; if it allows us to 619 | // remove one, it's worth it. 620 | L11 = L1; 621 | L12 = Constant::getAllOnesValue(L1->getType()); 622 | } 623 | 624 | if (!L2->getType()->isIntegerTy()) { 625 | // You can icmp pointers, for example. They really aren't masks. 626 | L21 = L22 = nullptr; 627 | } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { 628 | L21 = L2; 629 | L22 = Constant::getAllOnesValue(L2->getType()); 630 | } 631 | } 632 | 633 | // Bail if LHS was a icmp that can't be decomposed into an equality. 634 | if (!ICmpInst::isEquality(LHSCC)) 635 | return 0; 636 | 637 | Value *R1 = RHS->getOperand(0); 638 | Value *R2 = RHS->getOperand(1); 639 | Value *R11,*R12; 640 | bool ok = false; 641 | if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) { 642 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { 643 | A = R11; D = R12; 644 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { 645 | A = R12; D = R11; 646 | } else { 647 | return 0; 648 | } 649 | E = R2; R1 = nullptr; ok = true; 650 | } else if (R1->getType()->isIntegerTy()) { 651 | if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { 652 | // As before, model no mask as a trivial mask if it'll let us do an 653 | // optimization. 654 | R11 = R1; 655 | R12 = Constant::getAllOnesValue(R1->getType()); 656 | } 657 | 658 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { 659 | A = R11; D = R12; E = R2; ok = true; 660 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { 661 | A = R12; D = R11; E = R2; ok = true; 662 | } 663 | } 664 | 665 | // Bail if RHS was a icmp that can't be decomposed into an equality. 666 | if (!ICmpInst::isEquality(RHSCC)) 667 | return 0; 668 | 669 | // Look for ANDs in on the right side of the RHS icmp. 670 | if (!ok && R2->getType()->isIntegerTy()) { 671 | if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) { 672 | R11 = R2; 673 | R12 = Constant::getAllOnesValue(R2->getType()); 674 | } 675 | 676 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { 677 | A = R11; D = R12; E = R1; ok = true; 678 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { 679 | A = R12; D = R11; E = R1; ok = true; 680 | } else { 681 | return 0; 682 | } 683 | } 684 | if (!ok) 685 | return 0; 686 | 687 | if (L11 == A) { 688 | B = L12; C = L2; 689 | } else if (L12 == A) { 690 | B = L11; C = L2; 691 | } else if (L21 == A) { 692 | B = L22; C = L1; 693 | } else if (L22 == A) { 694 | B = L21; C = L1; 695 | } 696 | 697 | unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC); 698 | unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC); 699 | return left_type & right_type; 700 | } 701 | 702 | /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) 703 | /// into a single (icmp(A & X) ==/!= Y). 704 | static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, 705 | llvm::InstCombiner::BuilderTy *Builder) { 706 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; 707 | ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); 708 | unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, 709 | LHSCC, RHSCC); 710 | if (mask == 0) return nullptr; 711 | assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && 712 | "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); 713 | 714 | // In full generality: 715 | // (icmp (A & B) Op C) | (icmp (A & D) Op E) 716 | // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] 717 | // 718 | // If the latter can be converted into (icmp (A & X) Op Y) then the former is 719 | // equivalent to (icmp (A & X) !Op Y). 720 | // 721 | // Therefore, we can pretend for the rest of this function that we're dealing 722 | // with the conjunction, provided we flip the sense of any comparisons (both 723 | // input and output). 724 | 725 | // In most cases we're going to produce an EQ for the "&&" case. 726 | ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; 727 | if (!IsAnd) { 728 | // Convert the masking analysis into its equivalent with negated 729 | // comparisons. 730 | mask = conjugateICmpMask(mask); 731 | } 732 | 733 | if (mask & FoldMskICmp_Mask_AllZeroes) { 734 | // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 735 | // -> (icmp eq (A & (B|D)), 0) 736 | Value *newOr = Builder->CreateOr(B, D); 737 | Value *newAnd = Builder->CreateAnd(A, newOr); 738 | // we can't use C as zero, because we might actually handle 739 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) 740 | // with B and D, having a single bit set 741 | Value *zero = Constant::getNullValue(A->getType()); 742 | return Builder->CreateICmp(NEWCC, newAnd, zero); 743 | } 744 | if (mask & FoldMskICmp_BMask_AllOnes) { 745 | // (icmp eq (A & B), B) & (icmp eq (A & D), D) 746 | // -> (icmp eq (A & (B|D)), (B|D)) 747 | Value *newOr = Builder->CreateOr(B, D); 748 | Value *newAnd = Builder->CreateAnd(A, newOr); 749 | return Builder->CreateICmp(NEWCC, newAnd, newOr); 750 | } 751 | if (mask & FoldMskICmp_AMask_AllOnes) { 752 | // (icmp eq (A & B), A) & (icmp eq (A & D), A) 753 | // -> (icmp eq (A & (B&D)), A) 754 | Value *newAnd1 = Builder->CreateAnd(B, D); 755 | Value *newAnd = Builder->CreateAnd(A, newAnd1); 756 | return Builder->CreateICmp(NEWCC, newAnd, A); 757 | } 758 | 759 | // Remaining cases assume at least that B and D are constant, and depend on 760 | // their actual values. This isn't strictly, necessary, just a "handle the 761 | // easy cases for now" decision. 762 | ConstantInt *BCst = dyn_cast(B); 763 | if (!BCst) return nullptr; 764 | ConstantInt *DCst = dyn_cast(D); 765 | if (!DCst) return nullptr; 766 | 767 | if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) { 768 | // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and 769 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) 770 | // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) 771 | // Only valid if one of the masks is a superset of the other (check "B&D" is 772 | // the same as either B or D). 773 | APInt NewMask = BCst->getValue() & DCst->getValue(); 774 | 775 | if (NewMask == BCst->getValue()) 776 | return LHS; 777 | else if (NewMask == DCst->getValue()) 778 | return RHS; 779 | } 780 | if (mask & FoldMskICmp_AMask_NotAllOnes) { 781 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) 782 | // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) 783 | // Only valid if one of the masks is a superset of the other (check "B|D" is 784 | // the same as either B or D). 785 | APInt NewMask = BCst->getValue() | DCst->getValue(); 786 | 787 | if (NewMask == BCst->getValue()) 788 | return LHS; 789 | else if (NewMask == DCst->getValue()) 790 | return RHS; 791 | } 792 | if (mask & FoldMskICmp_BMask_Mixed) { 793 | // (icmp eq (A & B), C) & (icmp eq (A & D), E) 794 | // We already know that B & C == C && D & E == E. 795 | // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of 796 | // C and E, which are shared by both the mask B and the mask D, don't 797 | // contradict, then we can transform to 798 | // -> (icmp eq (A & (B|D)), (C|E)) 799 | // Currently, we only handle the case of B, C, D, and E being constant. 800 | // we can't simply use C and E, because we might actually handle 801 | // (icmp ne (A & B), B) & (icmp eq (A & D), D) 802 | // with B and D, having a single bit set 803 | ConstantInt *CCst = dyn_cast(C); 804 | if (!CCst) return nullptr; 805 | ConstantInt *ECst = dyn_cast(E); 806 | if (!ECst) return nullptr; 807 | if (LHSCC != NEWCC) 808 | CCst = cast(ConstantExpr::getXor(BCst, CCst)); 809 | if (RHSCC != NEWCC) 810 | ECst = cast(ConstantExpr::getXor(DCst, ECst)); 811 | // if there is a conflict we should actually return a false for the 812 | // whole construct 813 | if (((BCst->getValue() & DCst->getValue()) & 814 | (CCst->getValue() ^ ECst->getValue())) != 0) 815 | return ConstantInt::get(LHS->getType(), !IsAnd); 816 | Value *newOr1 = Builder->CreateOr(B, D); 817 | Value *newOr2 = ConstantExpr::getOr(CCst, ECst); 818 | Value *newAnd = Builder->CreateAnd(A, newOr1); 819 | return Builder->CreateICmp(NEWCC, newAnd, newOr2); 820 | } 821 | return nullptr; 822 | } 823 | 824 | /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. 825 | /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n 826 | /// If \p Inverted is true then the check is for the inverted range, e.g. 827 | /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n 828 | Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, 829 | bool Inverted) { 830 | // Check the lower range comparison, e.g. x >= 0 831 | // InstCombine already ensured that if there is a constant it's on the RHS. 832 | ConstantInt *RangeStart = dyn_cast(Cmp0->getOperand(1)); 833 | if (!RangeStart) 834 | return nullptr; 835 | 836 | ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() : 837 | Cmp0->getPredicate()); 838 | 839 | // Accept x > -1 or x >= 0 (after potentially inverting the predicate). 840 | if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) || 841 | (Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero()))) 842 | return nullptr; 843 | 844 | ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() : 845 | Cmp1->getPredicate()); 846 | 847 | Value *Input = Cmp0->getOperand(0); 848 | Value *RangeEnd; 849 | if (Cmp1->getOperand(0) == Input) { 850 | // For the upper range compare we have: icmp x, n 851 | RangeEnd = Cmp1->getOperand(1); 852 | } else if (Cmp1->getOperand(1) == Input) { 853 | // For the upper range compare we have: icmp n, x 854 | RangeEnd = Cmp1->getOperand(0); 855 | Pred1 = ICmpInst::getSwappedPredicate(Pred1); 856 | } else { 857 | return nullptr; 858 | } 859 | 860 | // Check the upper range comparison, e.g. x < n 861 | ICmpInst::Predicate NewPred; 862 | switch (Pred1) { 863 | case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break; 864 | case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break; 865 | default: return nullptr; 866 | } 867 | 868 | // This simplification is only valid if the upper range is not negative. 869 | bool IsNegative, IsNotNegative; 870 | ComputeSignBit(RangeEnd, IsNotNegative, IsNegative, /*Depth=*/0, Cmp1); 871 | if (!IsNotNegative) 872 | return nullptr; 873 | 874 | if (Inverted) 875 | NewPred = ICmpInst::getInversePredicate(NewPred); 876 | 877 | return Builder->CreateICmp(NewPred, Input, RangeEnd); 878 | } 879 | 880 | /// Fold (icmp)&(icmp) if possible. 881 | Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { 882 | ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); 883 | 884 | // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 885 | if (PredicatesFoldable(LHSCC, RHSCC)) { 886 | if (LHS->getOperand(0) == RHS->getOperand(1) && 887 | LHS->getOperand(1) == RHS->getOperand(0)) 888 | LHS->swapOperands(); 889 | if (LHS->getOperand(0) == RHS->getOperand(0) && 890 | LHS->getOperand(1) == RHS->getOperand(1)) { 891 | Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); 892 | unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); 893 | bool isSigned = LHS->isSigned() || RHS->isSigned(); 894 | return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); 895 | } 896 | } 897 | 898 | // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) 899 | if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder)) 900 | return V; 901 | 902 | // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n 903 | if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/false)) 904 | return V; 905 | 906 | // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n 907 | if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false)) 908 | return V; 909 | 910 | // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). 911 | Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); 912 | ConstantInt *LHSCst = dyn_cast(LHS->getOperand(1)); 913 | ConstantInt *RHSCst = dyn_cast(RHS->getOperand(1)); 914 | if (!LHSCst || !RHSCst) return nullptr; 915 | 916 | if (LHSCst == RHSCst && LHSCC == RHSCC) { 917 | // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) 918 | // where C is a power of 2 919 | if (LHSCC == ICmpInst::ICMP_ULT && 920 | LHSCst->getValue().isPowerOf2()) { 921 | Value *NewOr = Builder->CreateOr(Val, Val2); 922 | return Builder->CreateICmp(LHSCC, NewOr, LHSCst); 923 | } 924 | 925 | // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) 926 | if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { 927 | Value *NewOr = Builder->CreateOr(Val, Val2); 928 | return Builder->CreateICmp(LHSCC, NewOr, LHSCst); 929 | } 930 | } 931 | 932 | // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 933 | // where CMAX is the all ones value for the truncated type, 934 | // iff the lower bits of C2 and CA are zero. 935 | if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && 936 | LHS->hasOneUse() && RHS->hasOneUse()) { 937 | Value *V; 938 | ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr; 939 | 940 | // (trunc x) == C1 & (and x, CA) == C2 941 | // (and x, CA) == C2 & (trunc x) == C1 942 | if (match(Val2, m_Trunc(m_Value(V))) && 943 | match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { 944 | SmallCst = RHSCst; 945 | BigCst = LHSCst; 946 | } else if (match(Val, m_Trunc(m_Value(V))) && 947 | match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { 948 | SmallCst = LHSCst; 949 | BigCst = RHSCst; 950 | } 951 | 952 | if (SmallCst && BigCst) { 953 | unsigned BigBitSize = BigCst->getType()->getBitWidth(); 954 | unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); 955 | 956 | // Check that the low bits are zero. 957 | APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); 958 | if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { 959 | Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); 960 | APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); 961 | Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); 962 | return Builder->CreateICmp(LHSCC, NewAnd, NewVal); 963 | } 964 | } 965 | } 966 | 967 | // From here on, we only handle: 968 | // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. 969 | if (Val != Val2) return nullptr; 970 | 971 | // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 972 | if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 973 | RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 974 | LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 975 | RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 976 | return nullptr; 977 | 978 | // Make a constant range that's the intersection of the two icmp ranges. 979 | // If the intersection is empty, we know that the result is false. 980 | ConstantRange LHSRange = 981 | ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue()); 982 | ConstantRange RHSRange = 983 | ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue()); 984 | 985 | if (LHSRange.intersectWith(RHSRange).isEmptySet()) 986 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); 987 | 988 | // We can't fold (ugt x, C) & (sgt x, C2). 989 | if (!PredicatesFoldable(LHSCC, RHSCC)) 990 | return nullptr; 991 | 992 | // Ensure that the larger constant is on the RHS. 993 | bool ShouldSwap; 994 | if (CmpInst::isSigned(LHSCC) || 995 | (ICmpInst::isEquality(LHSCC) && 996 | CmpInst::isSigned(RHSCC))) 997 | ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 998 | else 999 | ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 1000 | 1001 | if (ShouldSwap) { 1002 | std::swap(LHS, RHS); 1003 | std::swap(LHSCst, RHSCst); 1004 | std::swap(LHSCC, RHSCC); 1005 | } 1006 | 1007 | // At this point, we know we have two icmp instructions 1008 | // comparing a value against two constants and and'ing the result 1009 | // together. Because of the above check, we know that we only have 1010 | // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 1011 | // (from the icmp folding check above), that the two constants 1012 | // are not equal and that the larger constant is on the RHS 1013 | assert(LHSCst != RHSCst && "Compares not folded above?"); 1014 | 1015 | switch (LHSCC) { 1016 | default: llvm_unreachable("Unknown integer condition code!"); 1017 | case ICmpInst::ICMP_EQ: 1018 | switch (RHSCC) { 1019 | default: llvm_unreachable("Unknown integer condition code!"); 1020 | case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 1021 | case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 1022 | case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 1023 | return LHS; 1024 | } 1025 | case ICmpInst::ICMP_NE: 1026 | switch (RHSCC) { 1027 | default: llvm_unreachable("Unknown integer condition code!"); 1028 | case ICmpInst::ICMP_ULT: 1029 | if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 1030 | return Builder->CreateICmpULT(Val, LHSCst); 1031 | if (LHSCst->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13 1032 | return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true); 1033 | break; // (X != 13 & X u< 15) -> no change 1034 | case ICmpInst::ICMP_SLT: 1035 | if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 1036 | return Builder->CreateICmpSLT(Val, LHSCst); 1037 | break; // (X != 13 & X s< 15) -> no change 1038 | case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 1039 | case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 1040 | case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 1041 | return RHS; 1042 | case ICmpInst::ICMP_NE: 1043 | // Special case to get the ordering right when the values wrap around 1044 | // zero. 1045 | if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue()) 1046 | std::swap(LHSCst, RHSCst); 1047 | if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 1048 | Constant *AddCST = ConstantExpr::getNeg(LHSCst); 1049 | Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 1050 | return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1), 1051 | Val->getName()+".cmp"); 1052 | } 1053 | break; // (X != 13 & X != 15) -> no change 1054 | } 1055 | break; 1056 | case ICmpInst::ICMP_ULT: 1057 | switch (RHSCC) { 1058 | default: llvm_unreachable("Unknown integer condition code!"); 1059 | case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false 1060 | case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false 1061 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); 1062 | case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change 1063 | break; 1064 | case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 1065 | case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 1066 | return LHS; 1067 | case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change 1068 | break; 1069 | } 1070 | break; 1071 | case ICmpInst::ICMP_SLT: 1072 | switch (RHSCC) { 1073 | default: llvm_unreachable("Unknown integer condition code!"); 1074 | case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change 1075 | break; 1076 | case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 1077 | case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 1078 | return LHS; 1079 | case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change 1080 | break; 1081 | } 1082 | break; 1083 | case ICmpInst::ICMP_UGT: 1084 | switch (RHSCC) { 1085 | default: llvm_unreachable("Unknown integer condition code!"); 1086 | case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 1087 | case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 1088 | return RHS; 1089 | case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change 1090 | break; 1091 | case ICmpInst::ICMP_NE: 1092 | if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 1093 | return Builder->CreateICmp(LHSCC, Val, RHSCst); 1094 | break; // (X u> 13 & X != 15) -> no change 1095 | case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change 1098 | break; 1099 | } 1100 | break; 1101 | case ICmpInst::ICMP_SGT: 1102 | switch (RHSCC) { 1103 | default: llvm_unreachable("Unknown integer condition code!"); 1104 | case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 1105 | case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 1106 | return RHS; 1107 | case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change 1108 | break; 1109 | case ICmpInst::ICMP_NE: 1110 | if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 1111 | return Builder->CreateICmp(LHSCC, Val, RHSCst); 1112 | break; // (X s> 13 & X != 15) -> no change 1113 | case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 1114 | return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true); 1115 | case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change 1116 | break; 1117 | } 1118 | break; 1119 | } 1120 | 1121 | return nullptr; 1122 | } 1123 | 1124 | /// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns 1125 | /// a Value which should already be inserted into the function. 1126 | Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { 1127 | if (LHS->getPredicate() == FCmpInst::FCMP_ORD && 1128 | RHS->getPredicate() == FCmpInst::FCMP_ORD) { 1129 | if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) 1130 | return nullptr; 1131 | 1132 | // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) 1133 | if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) 1134 | if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { 1135 | // If either of the constants are nans, then the whole thing returns 1136 | // false. 1137 | if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 1138 | return Builder->getFalse(); 1139 | return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); 1140 | } 1141 | 1142 | // Handle vector zeros. This occurs because the canonical form of 1143 | // "fcmp ord x,x" is "fcmp ord x, 0". 1144 | if (isa(LHS->getOperand(1)) && 1145 | isa(RHS->getOperand(1))) 1146 | return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); 1147 | return nullptr; 1148 | } 1149 | 1150 | Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 1151 | Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 1152 | FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 1153 | 1154 | 1155 | if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 1156 | // Swap RHS operands to match LHS. 1157 | Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 1158 | std::swap(Op1LHS, Op1RHS); 1159 | } 1160 | 1161 | if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 1162 | // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). 1163 | if (Op0CC == Op1CC) 1164 | return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); 1165 | if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) 1166 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); 1167 | if (Op0CC == FCmpInst::FCMP_TRUE) 1168 | return RHS; 1169 | if (Op1CC == FCmpInst::FCMP_TRUE) 1170 | return LHS; 1171 | 1172 | bool Op0Ordered; 1173 | bool Op1Ordered; 1174 | unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 1175 | unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 1176 | // uno && ord -> false 1177 | if (Op0Pred == 0 && Op1Pred == 0 && Op0Ordered != Op1Ordered) 1178 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); 1179 | if (Op1Pred == 0) { 1180 | std::swap(LHS, RHS); 1181 | std::swap(Op0Pred, Op1Pred); 1182 | std::swap(Op0Ordered, Op1Ordered); 1183 | } 1184 | if (Op0Pred == 0) { 1185 | // uno && ueq -> uno && (uno || eq) -> uno 1186 | // ord && olt -> ord && (ord && lt) -> olt 1187 | if (!Op0Ordered && (Op0Ordered == Op1Ordered)) 1188 | return LHS; 1189 | if (Op0Ordered && (Op0Ordered == Op1Ordered)) 1190 | return RHS; 1191 | 1192 | // uno && oeq -> uno && (ord && eq) -> false 1193 | if (!Op0Ordered) 1194 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); 1195 | // ord && ueq -> ord && (uno || eq) -> oeq 1196 | return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder); 1197 | } 1198 | } 1199 | 1200 | return nullptr; 1201 | } 1202 | 1203 | /// Match De Morgan's Laws: 1204 | /// (~A & ~B) == (~(A | B)) 1205 | /// (~A | ~B) == (~(A & B)) 1206 | static Instruction *matchDeMorgansLaws(BinaryOperator &I, 1207 | InstCombiner::BuilderTy *Builder) { 1208 | auto Opcode = I.getOpcode(); 1209 | assert((Opcode == Instruction::And || Opcode == Instruction::Or) && 1210 | "Trying to match De Morgan's Laws with something other than and/or"); 1211 | // Flip the logic operation. 1212 | if (Opcode == Instruction::And) 1213 | Opcode = Instruction::Or; 1214 | else 1215 | Opcode = Instruction::And; 1216 | 1217 | Value *Op0 = I.getOperand(0); 1218 | Value *Op1 = I.getOperand(1); 1219 | // TODO: Use pattern matchers instead of dyn_cast. 1220 | if (Value *Op0NotVal = dyn_castNotVal(Op0)) 1221 | if (Value *Op1NotVal = dyn_castNotVal(Op1)) 1222 | if (Op0->hasOneUse() && Op1->hasOneUse()) { 1223 | Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal, 1224 | I.getName() + ".demorgan"); 1225 | return BinaryOperator::CreateNot(LogicOp); 1226 | } 1227 | 1228 | // De Morgan's Law in disguise: 1229 | // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B)) 1230 | // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B)) 1231 | Value *A = nullptr; 1232 | Value *B = nullptr; 1233 | ConstantInt *C1 = nullptr; 1234 | if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) && 1235 | match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) { 1236 | // TODO: This check could be loosened to handle different type sizes. 1237 | // Alternatively, we could fix the definition of m_Not to recognize a not 1238 | // operation hidden by a zext? 1239 | if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) && 1240 | C1->isOne()) { 1241 | Value *LogicOp = Builder->CreateBinOp(Opcode, A, B, 1242 | I.getName() + ".demorgan"); 1243 | Value *Not = Builder->CreateNot(LogicOp); 1244 | return CastInst::CreateZExtOrBitCast(Not, I.getType()); 1245 | } 1246 | } 1247 | 1248 | return nullptr; 1249 | } 1250 | 1251 | Instruction *InstCombiner::visitAnd(BinaryOperator &I) { 1252 | bool Changed = SimplifyAssociativeOrCommutative(I); 1253 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1254 | 1255 | if (Value *V = SimplifyVectorOp(I)) 1256 | return ReplaceInstUsesWith(I, V); 1257 | 1258 | if (Value *V = SimplifyAndInst(Op0, Op1, DL, TLI, DT, AC)) 1259 | return ReplaceInstUsesWith(I, V); 1260 | 1261 | // (A|B)&(A|C) -> A|(B&C) etc 1262 | if (Value *V = SimplifyUsingDistributiveLaws(I)) 1263 | return ReplaceInstUsesWith(I, V); 1264 | 1265 | // See if we can simplify any instructions used by the instruction whose sole 1266 | // purpose is to compute bits we don't care about. 1267 | if (SimplifyDemandedInstructionBits(I)) 1268 | return &I; 1269 | 1270 | if (Value *V = SimplifyBSwap(I)) 1271 | return ReplaceInstUsesWith(I, V); 1272 | 1273 | if (ConstantInt *AndRHS = dyn_cast(Op1)) { 1274 | const APInt &AndRHSMask = AndRHS->getValue(); 1275 | 1276 | // Optimize a variety of ((val OP C1) & C2) combinations... 1277 | if (BinaryOperator *Op0I = dyn_cast(Op0)) { 1278 | Value *Op0LHS = Op0I->getOperand(0); 1279 | Value *Op0RHS = Op0I->getOperand(1); 1280 | switch (Op0I->getOpcode()) { 1281 | default: break; 1282 | case Instruction::Xor: 1283 | case Instruction::Or: { 1284 | // If the mask is only needed on one incoming arm, push it up. 1285 | if (!Op0I->hasOneUse()) break; 1286 | 1287 | APInt NotAndRHS(~AndRHSMask); 1288 | if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) { 1289 | // Not masking anything out for the LHS, move to RHS. 1290 | Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, 1291 | Op0RHS->getName()+".masked"); 1292 | return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); 1293 | } 1294 | if (!isa(Op0RHS) && 1295 | MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) { 1296 | // Not masking anything out for the RHS, move to LHS. 1297 | Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, 1298 | Op0LHS->getName()+".masked"); 1299 | return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); 1300 | } 1301 | 1302 | break; 1303 | } 1304 | case Instruction::Add: 1305 | // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. 1306 | // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 1307 | // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 1308 | if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) 1309 | return BinaryOperator::CreateAnd(V, AndRHS); 1310 | if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) 1311 | return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes 1312 | break; 1313 | 1314 | case Instruction::Sub: 1315 | // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. 1316 | // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 1317 | // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 1318 | if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) 1319 | return BinaryOperator::CreateAnd(V, AndRHS); 1320 | 1321 | // -x & 1 -> x & 1 1322 | if (AndRHSMask == 1 && match(Op0LHS, m_Zero())) 1323 | return BinaryOperator::CreateAnd(Op0RHS, AndRHS); 1324 | 1325 | // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS 1326 | // has 1's for all bits that the subtraction with A might affect. 1327 | if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) { 1328 | uint32_t BitWidth = AndRHSMask.getBitWidth(); 1329 | uint32_t Zeros = AndRHSMask.countLeadingZeros(); 1330 | APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); 1331 | 1332 | if (MaskedValueIsZero(Op0LHS, Mask, 0, &I)) { 1333 | Value *NewNeg = Builder->CreateNeg(Op0RHS); 1334 | return BinaryOperator::CreateAnd(NewNeg, AndRHS); 1335 | } 1336 | } 1337 | break; 1338 | 1339 | case Instruction::Shl: 1340 | case Instruction::LShr: 1341 | // (1 << x) & 1 --> zext(x == 0) 1342 | // (1 >> x) & 1 --> zext(x == 0) 1343 | if (AndRHSMask == 1 && Op0LHS == AndRHS) { 1344 | Value *NewICmp = 1345 | Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); 1346 | return new ZExtInst(NewICmp, I.getType()); 1347 | } 1348 | break; 1349 | } 1350 | 1351 | if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) 1352 | if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) 1353 | return Res; 1354 | } 1355 | 1356 | // If this is an integer truncation, and if the source is an 'and' with 1357 | // immediate, transform it. This frequently occurs for bitfield accesses. 1358 | { 1359 | Value *X = nullptr; ConstantInt *YC = nullptr; 1360 | if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { 1361 | // Change: and (trunc (and X, YC) to T), C2 1362 | // into : and (trunc X to T), trunc(YC) & C2 1363 | // This will fold the two constants together, which may allow 1364 | // other simplifications. 1365 | Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); 1366 | Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); 1367 | C3 = ConstantExpr::getAnd(C3, AndRHS); 1368 | return BinaryOperator::CreateAnd(NewCast, C3); 1369 | } 1370 | } 1371 | 1372 | // Try to fold constant and into select arguments. 1373 | if (SelectInst *SI = dyn_cast(Op0)) 1374 | if (Instruction *R = FoldOpIntoSelect(I, SI)) 1375 | return R; 1376 | if (isa(Op0)) 1377 | if (Instruction *NV = FoldOpIntoPhi(I)) 1378 | return NV; 1379 | } 1380 | 1381 | if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) 1382 | return DeMorgan; 1383 | 1384 | { 1385 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; 1386 | // (A|B) & ~(A&B) -> A^B 1387 | if (match(Op0, m_Or(m_Value(A), m_Value(B))) && 1388 | match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && 1389 | ((A == C && B == D) || (A == D && B == C))) 1390 | return BinaryOperator::CreateXor(A, B); 1391 | 1392 | // ~(A&B) & (A|B) -> A^B 1393 | if (match(Op1, m_Or(m_Value(A), m_Value(B))) && 1394 | match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && 1395 | ((A == C && B == D) || (A == D && B == C))) 1396 | return BinaryOperator::CreateXor(A, B); 1397 | 1398 | // A&(A^B) => A & ~B 1399 | { 1400 | Value *tmpOp0 = Op0; 1401 | Value *tmpOp1 = Op1; 1402 | if (Op0->hasOneUse() && 1403 | match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 1404 | if (A == Op1 || B == Op1 ) { 1405 | tmpOp1 = Op0; 1406 | tmpOp0 = Op1; 1407 | // Simplify below 1408 | } 1409 | } 1410 | 1411 | if (tmpOp1->hasOneUse() && 1412 | match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) { 1413 | if (B == tmpOp0) { 1414 | std::swap(A, B); 1415 | } 1416 | // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if 1417 | // A is originally -1 (or a vector of -1 and undefs), then we enter 1418 | // an endless loop. By checking that A is non-constant we ensure that 1419 | // we will never get to the loop. 1420 | if (A == tmpOp0 && !isa(A)) // A&(A^B) -> A & ~B 1421 | return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); 1422 | } 1423 | } 1424 | 1425 | // (A&((~A)|B)) -> A&B 1426 | if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || 1427 | match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) 1428 | return BinaryOperator::CreateAnd(A, Op1); 1429 | if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || 1430 | match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) 1431 | return BinaryOperator::CreateAnd(A, Op0); 1432 | 1433 | // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C 1434 | if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) 1435 | if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) 1436 | if (Op1->hasOneUse() || cast(Op1)->hasOneUse()) 1437 | return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C)); 1438 | 1439 | // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C 1440 | if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) 1441 | if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) 1442 | if (Op0->hasOneUse() || cast(Op0)->hasOneUse()) 1443 | return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); 1444 | 1445 | // (A | B) & ((~A) ^ B) -> (A & B) 1446 | if (match(Op0, m_Or(m_Value(A), m_Value(B))) && 1447 | match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B)))) 1448 | return BinaryOperator::CreateAnd(A, B); 1449 | 1450 | // ((~A) ^ B) & (A | B) -> (A & B) 1451 | if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) && 1452 | match(Op1, m_Or(m_Specific(A), m_Specific(B)))) 1453 | return BinaryOperator::CreateAnd(A, B); 1454 | } 1455 | 1456 | { 1457 | ICmpInst *LHS = dyn_cast(Op0); 1458 | ICmpInst *RHS = dyn_cast(Op1); 1459 | if (LHS && RHS) 1460 | if (Value *Res = FoldAndOfICmps(LHS, RHS)) 1461 | return ReplaceInstUsesWith(I, Res); 1462 | 1463 | // TODO: Make this recursive; it's a little tricky because an arbitrary 1464 | // number of 'and' instructions might have to be created. 1465 | Value *X, *Y; 1466 | if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { 1467 | if (auto *Cmp = dyn_cast(X)) 1468 | if (Value *Res = FoldAndOfICmps(LHS, Cmp)) 1469 | return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); 1470 | if (auto *Cmp = dyn_cast(Y)) 1471 | if (Value *Res = FoldAndOfICmps(LHS, Cmp)) 1472 | return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, X)); 1473 | } 1474 | if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { 1475 | if (auto *Cmp = dyn_cast(X)) 1476 | if (Value *Res = FoldAndOfICmps(Cmp, RHS)) 1477 | return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); 1478 | if (auto *Cmp = dyn_cast(Y)) 1479 | if (Value *Res = FoldAndOfICmps(Cmp, RHS)) 1480 | return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, X)); 1481 | } 1482 | } 1483 | 1484 | // If and'ing two fcmp, try combine them into one. 1485 | if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) 1486 | if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) 1487 | if (Value *Res = FoldAndOfFCmps(LHS, RHS)) 1488 | return ReplaceInstUsesWith(I, Res); 1489 | 1490 | 1491 | if (CastInst *Op0C = dyn_cast(Op0)) { 1492 | Value *Op0COp = Op0C->getOperand(0); 1493 | Type *SrcTy = Op0COp->getType(); 1494 | // fold (and (cast A), (cast B)) -> (cast (and A, B)) 1495 | if (CastInst *Op1C = dyn_cast(Op1)) { 1496 | if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? 1497 | SrcTy == Op1C->getOperand(0)->getType() && 1498 | SrcTy->isIntOrIntVectorTy()) { 1499 | Value *Op1COp = Op1C->getOperand(0); 1500 | 1501 | // Only do this if the casts both really cause code to be generated. 1502 | if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && 1503 | ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { 1504 | Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); 1505 | return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 1506 | } 1507 | 1508 | // If this is and(cast(icmp), cast(icmp)), try to fold this even if the 1509 | // cast is otherwise not optimizable. This happens for vector sexts. 1510 | if (ICmpInst *RHS = dyn_cast(Op1COp)) 1511 | if (ICmpInst *LHS = dyn_cast(Op0COp)) 1512 | if (Value *Res = FoldAndOfICmps(LHS, RHS)) 1513 | return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); 1514 | 1515 | // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the 1516 | // cast is otherwise not optimizable. This happens for vector sexts. 1517 | if (FCmpInst *RHS = dyn_cast(Op1COp)) 1518 | if (FCmpInst *LHS = dyn_cast(Op0COp)) 1519 | if (Value *Res = FoldAndOfFCmps(LHS, RHS)) 1520 | return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); 1521 | } 1522 | } 1523 | 1524 | // If we are masking off the sign bit of a floating-point value, convert 1525 | // this to the canonical fabs intrinsic call and cast back to integer. 1526 | // The backend should know how to optimize fabs(). 1527 | // TODO: This transform should also apply to vectors. 1528 | ConstantInt *CI; 1529 | if (isa(Op0C) && SrcTy->isFloatingPointTy() && 1530 | match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) { 1531 | Module *M = I.getParent()->getParent()->getParent(); 1532 | Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy); 1533 | Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs"); 1534 | return CastInst::CreateBitOrPointerCast(Call, I.getType()); 1535 | } 1536 | } 1537 | 1538 | { 1539 | Value *X = nullptr; 1540 | bool OpsSwapped = false; 1541 | // Canonicalize SExt or Not to the LHS 1542 | if (match(Op1, m_SExt(m_Value())) || 1543 | match(Op1, m_Not(m_Value()))) { 1544 | std::swap(Op0, Op1); 1545 | OpsSwapped = true; 1546 | } 1547 | 1548 | // Fold (and (sext bool to A), B) --> (select bool, B, 0) 1549 | if (match(Op0, m_SExt(m_Value(X))) && 1550 | X->getType()->getScalarType()->isIntegerTy(1)) { 1551 | Value *Zero = Constant::getNullValue(Op1->getType()); 1552 | return SelectInst::Create(X, Op1, Zero); 1553 | } 1554 | 1555 | // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) 1556 | if (match(Op0, m_Not(m_SExt(m_Value(X)))) && 1557 | X->getType()->getScalarType()->isIntegerTy(1)) { 1558 | Value *Zero = Constant::getNullValue(Op0->getType()); 1559 | return SelectInst::Create(X, Zero, Op1); 1560 | } 1561 | 1562 | if (OpsSwapped) 1563 | std::swap(Op0, Op1); 1564 | } 1565 | 1566 | return Changed ? &I : nullptr; 1567 | } 1568 | 1569 | /// Analyze the specified subexpression and see if it is capable of providing 1570 | /// pieces of a bswap. The subexpression provides pieces of a bswap if it is 1571 | /// proven that each of the non-zero bytes in the output of the expression came 1572 | /// from the corresponding "byte swapped" byte in some other value. 1573 | /// For example, if the current subexpression is "(shl i32 %X, 24)" then 1574 | /// we know that the expression deposits the low byte of %X into the high byte 1575 | /// of the bswap result and that all other bytes are zero. This expression is 1576 | /// accepted, the high byte of ByteValues is set to X to indicate a correct 1577 | /// match. 1578 | /// 1579 | /// This function returns true if the match was unsuccessful and false if so. 1580 | /// On entry to the function the "OverallLeftShift" is a signed integer value 1581 | /// indicating the number of bytes that the subexpression is later shifted. For 1582 | /// example, if the expression is later right shifted by 16 bits, the 1583 | /// OverallLeftShift value would be -2 on entry. This is used to specify which 1584 | /// byte of ByteValues is actually being set. 1585 | /// 1586 | /// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding 1587 | /// byte is masked to zero by a user. For example, in (X & 255), X will be 1588 | /// processed with a bytemask of 1. Because bytemask is 32-bits, this limits 1589 | /// this function to working on up to 32-byte (256 bit) values. ByteMask is 1590 | /// always in the local (OverallLeftShift) coordinate space. 1591 | /// 1592 | static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, 1593 | SmallVectorImpl &ByteValues) { 1594 | if (Instruction *I = dyn_cast(V)) { 1595 | // If this is an or instruction, it may be an inner node of the bswap. 1596 | if (I->getOpcode() == Instruction::Or) { 1597 | return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 1598 | ByteValues) || 1599 | CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, 1600 | ByteValues); 1601 | } 1602 | 1603 | // If this is a logical shift by a constant multiple of 8, recurse with 1604 | // OverallLeftShift and ByteMask adjusted. 1605 | if (I->isLogicalShift() && isa(I->getOperand(1))) { 1606 | unsigned ShAmt = 1607 | cast(I->getOperand(1))->getLimitedValue(~0U); 1608 | // Ensure the shift amount is defined and of a byte value. 1609 | if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) 1610 | return true; 1611 | 1612 | unsigned ByteShift = ShAmt >> 3; 1613 | if (I->getOpcode() == Instruction::Shl) { 1614 | // X << 2 -> collect(X, +2) 1615 | OverallLeftShift += ByteShift; 1616 | ByteMask >>= ByteShift; 1617 | } else { 1618 | // X >>u 2 -> collect(X, -2) 1619 | OverallLeftShift -= ByteShift; 1620 | ByteMask <<= ByteShift; 1621 | ByteMask &= (~0U >> (32-ByteValues.size())); 1622 | } 1623 | 1624 | if (OverallLeftShift >= (int)ByteValues.size()) return true; 1625 | if (OverallLeftShift <= -(int)ByteValues.size()) return true; 1626 | 1627 | return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 1628 | ByteValues); 1629 | } 1630 | 1631 | // If this is a logical 'and' with a mask that clears bytes, clear the 1632 | // corresponding bytes in ByteMask. 1633 | if (I->getOpcode() == Instruction::And && 1634 | isa(I->getOperand(1))) { 1635 | // Scan every byte of the and mask, seeing if the byte is either 0 or 255. 1636 | unsigned NumBytes = ByteValues.size(); 1637 | APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); 1638 | const APInt &AndMask = cast(I->getOperand(1))->getValue(); 1639 | 1640 | for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { 1641 | // If this byte is masked out by a later operation, we don't care what 1642 | // the and mask is. 1643 | if ((ByteMask & (1 << i)) == 0) 1644 | continue; 1645 | 1646 | // If the AndMask is all zeros for this byte, clear the bit. 1647 | APInt MaskB = AndMask & Byte; 1648 | if (MaskB == 0) { 1649 | ByteMask &= ~(1U << i); 1650 | continue; 1651 | } 1652 | 1653 | // If the AndMask is not all ones for this byte, it's not a bytezap. 1654 | if (MaskB != Byte) 1655 | return true; 1656 | 1657 | // Otherwise, this byte is kept. 1658 | } 1659 | 1660 | return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 1661 | ByteValues); 1662 | } 1663 | } 1664 | 1665 | // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be 1666 | // the input value to the bswap. Some observations: 1) if more than one byte 1667 | // is demanded from this input, then it could not be successfully assembled 1668 | // into a byteswap. At least one of the two bytes would not be aligned with 1669 | // their ultimate destination. 1670 | if (!isPowerOf2_32(ByteMask)) return true; 1671 | unsigned InputByteNo = countTrailingZeros(ByteMask); 1672 | 1673 | // 2) The input and ultimate destinations must line up: if byte 3 of an i32 1674 | // is demanded, it needs to go into byte 0 of the result. This means that the 1675 | // byte needs to be shifted until it lands in the right byte bucket. The 1676 | // shift amount depends on the position: if the byte is coming from the high 1677 | // part of the value (e.g. byte 3) then it must be shifted right. If from the 1678 | // low part, it must be shifted left. 1679 | unsigned DestByteNo = InputByteNo + OverallLeftShift; 1680 | if (ByteValues.size()-1-DestByteNo != InputByteNo) 1681 | return true; 1682 | 1683 | // If the destination byte value is already defined, the values are or'd 1684 | // together, which isn't a bswap (unless it's an or of the same bits). 1685 | if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) 1686 | return true; 1687 | ByteValues[DestByteNo] = V; 1688 | return false; 1689 | } 1690 | 1691 | /// Given an OR instruction, check to see if this is a bswap idiom. 1692 | /// If so, insert the new bswap intrinsic and return it. 1693 | Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { 1694 | IntegerType *ITy = dyn_cast(I.getType()); 1695 | if (!ITy || ITy->getBitWidth() % 16 || 1696 | // ByteMask only allows up to 32-byte values. 1697 | ITy->getBitWidth() > 32*8) 1698 | return nullptr; // Can only bswap pairs of bytes. Can't do vectors. 1699 | 1700 | /// ByteValues - For each byte of the result, we keep track of which value 1701 | /// defines each byte. 1702 | SmallVector ByteValues; 1703 | ByteValues.resize(ITy->getBitWidth()/8); 1704 | 1705 | // Try to find all the pieces corresponding to the bswap. 1706 | uint32_t ByteMask = ~0U >> (32-ByteValues.size()); 1707 | if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) 1708 | return nullptr; 1709 | 1710 | // Check to see if all of the bytes come from the same value. 1711 | Value *V = ByteValues[0]; 1712 | if (!V) return nullptr; // Didn't find a byte? Must be zero. 1713 | 1714 | // Check to make sure that all of the bytes come from the same value. 1715 | for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) 1716 | if (ByteValues[i] != V) 1717 | return nullptr; 1718 | Module *M = I.getParent()->getParent()->getParent(); 1719 | Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); 1720 | return CallInst::Create(F, V); 1721 | } 1722 | 1723 | /// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0) 1724 | /// and either B or D is ~(cond?-1,0) or (cond?0,-1), then we can simplify this 1725 | /// expression to "cond ? C : D or B". 1726 | static Instruction *MatchSelectFromAndOr(Value *A, Value *B, 1727 | Value *C, Value *D) { 1728 | // If A is not a select of -1/0, this cannot match. 1729 | Value *Cond = nullptr; 1730 | if (!match(A, m_SExt(m_Value(Cond))) || 1731 | !Cond->getType()->isIntegerTy(1)) 1732 | return nullptr; 1733 | 1734 | // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. 1735 | if (match(D, m_Not(m_SExt(m_Specific(Cond))))) 1736 | return SelectInst::Create(Cond, C, B); 1737 | if (match(D, m_SExt(m_Not(m_Specific(Cond))))) 1738 | return SelectInst::Create(Cond, C, B); 1739 | 1740 | // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. 1741 | if (match(B, m_Not(m_SExt(m_Specific(Cond))))) 1742 | return SelectInst::Create(Cond, C, D); 1743 | if (match(B, m_SExt(m_Not(m_Specific(Cond))))) 1744 | return SelectInst::Create(Cond, C, D); 1745 | return nullptr; 1746 | } 1747 | 1748 | /// Fold (icmp)|(icmp) if possible. 1749 | Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, 1750 | Instruction *CxtI) { 1751 | ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); 1752 | 1753 | // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) 1754 | // if K1 and K2 are a one-bit mask. 1755 | ConstantInt *LHSCst = dyn_cast(LHS->getOperand(1)); 1756 | ConstantInt *RHSCst = dyn_cast(RHS->getOperand(1)); 1757 | 1758 | if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() && 1759 | RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) { 1760 | 1761 | BinaryOperator *LAnd = dyn_cast(LHS->getOperand(0)); 1762 | BinaryOperator *RAnd = dyn_cast(RHS->getOperand(0)); 1763 | if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() && 1764 | LAnd->getOpcode() == Instruction::And && 1765 | RAnd->getOpcode() == Instruction::And) { 1766 | 1767 | Value *Mask = nullptr; 1768 | Value *Masked = nullptr; 1769 | if (LAnd->getOperand(0) == RAnd->getOperand(0) && 1770 | isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI, 1771 | DT) && 1772 | isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI, 1773 | DT)) { 1774 | Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); 1775 | Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); 1776 | } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && 1777 | isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC, 1778 | CxtI, DT) && 1779 | isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC, 1780 | CxtI, DT)) { 1781 | Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); 1782 | Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); 1783 | } 1784 | 1785 | if (Masked) 1786 | return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask); 1787 | } 1788 | } 1789 | 1790 | // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) 1791 | // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) 1792 | // The original condition actually refers to the following two ranges: 1793 | // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3] 1794 | // We can fold these two ranges if: 1795 | // 1) C1 and C2 is unsigned greater than C3. 1796 | // 2) The two ranges are separated. 1797 | // 3) C1 ^ C2 is one-bit mask. 1798 | // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask. 1799 | // This implies all values in the two ranges differ by exactly one bit. 1800 | 1801 | if ((LHSCC == ICmpInst::ICMP_ULT || LHSCC == ICmpInst::ICMP_ULE) && 1802 | LHSCC == RHSCC && LHSCst && RHSCst && LHS->hasOneUse() && 1803 | RHS->hasOneUse() && LHSCst->getType() == RHSCst->getType() && 1804 | LHSCst->getValue() == (RHSCst->getValue())) { 1805 | 1806 | Value *LAdd = LHS->getOperand(0); 1807 | Value *RAdd = RHS->getOperand(0); 1808 | 1809 | Value *LAddOpnd, *RAddOpnd; 1810 | ConstantInt *LAddCst, *RAddCst; 1811 | if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddCst))) && 1812 | match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddCst))) && 1813 | LAddCst->getValue().ugt(LHSCst->getValue()) && 1814 | RAddCst->getValue().ugt(LHSCst->getValue())) { 1815 | 1816 | APInt DiffCst = LAddCst->getValue() ^ RAddCst->getValue(); 1817 | if (LAddOpnd == RAddOpnd && DiffCst.isPowerOf2()) { 1818 | ConstantInt *MaxAddCst = nullptr; 1819 | if (LAddCst->getValue().ult(RAddCst->getValue())) 1820 | MaxAddCst = RAddCst; 1821 | else 1822 | MaxAddCst = LAddCst; 1823 | 1824 | APInt RRangeLow = -RAddCst->getValue(); 1825 | APInt RRangeHigh = RRangeLow + LHSCst->getValue(); 1826 | APInt LRangeLow = -LAddCst->getValue(); 1827 | APInt LRangeHigh = LRangeLow + LHSCst->getValue(); 1828 | APInt LowRangeDiff = RRangeLow ^ LRangeLow; 1829 | APInt HighRangeDiff = RRangeHigh ^ LRangeHigh; 1830 | APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow 1831 | : RRangeLow - LRangeLow; 1832 | 1833 | if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff && 1834 | RangeDiff.ugt(LHSCst->getValue())) { 1835 | Value *MaskCst = ConstantInt::get(LAddCst->getType(), ~DiffCst); 1836 | 1837 | Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskCst); 1838 | Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddCst); 1839 | return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSCst)); 1840 | } 1841 | } 1842 | } 1843 | } 1844 | 1845 | // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) 1846 | if (PredicatesFoldable(LHSCC, RHSCC)) { 1847 | if (LHS->getOperand(0) == RHS->getOperand(1) && 1848 | LHS->getOperand(1) == RHS->getOperand(0)) 1849 | LHS->swapOperands(); 1850 | if (LHS->getOperand(0) == RHS->getOperand(0) && 1851 | LHS->getOperand(1) == RHS->getOperand(1)) { 1852 | Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); 1853 | unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); 1854 | bool isSigned = LHS->isSigned() || RHS->isSigned(); 1855 | return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); 1856 | } 1857 | } 1858 | 1859 | // handle (roughly): 1860 | // (icmp ne (A & B), C) | (icmp ne (A & D), E) 1861 | if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder)) 1862 | return V; 1863 | 1864 | Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); 1865 | if (LHS->hasOneUse() || RHS->hasOneUse()) { 1866 | // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) 1867 | // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) 1868 | Value *A = nullptr, *B = nullptr; 1869 | if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) { 1870 | B = Val; 1871 | if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1)) 1872 | A = Val2; 1873 | else if (RHSCC == ICmpInst::ICMP_UGT && Val == Val2) 1874 | A = RHS->getOperand(1); 1875 | } 1876 | // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1) 1877 | // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1) 1878 | else if (RHSCC == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) { 1879 | B = Val2; 1880 | if (LHSCC == ICmpInst::ICMP_ULT && Val2 == LHS->getOperand(1)) 1881 | A = Val; 1882 | else if (LHSCC == ICmpInst::ICMP_UGT && Val2 == Val) 1883 | A = LHS->getOperand(1); 1884 | } 1885 | if (A && B) 1886 | return Builder->CreateICmp( 1887 | ICmpInst::ICMP_UGE, 1888 | Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); 1889 | } 1890 | 1891 | // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n 1892 | if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/true)) 1893 | return V; 1894 | 1895 | // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n 1896 | if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true)) 1897 | return V; 1898 | 1899 | // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). 1900 | if (!LHSCst || !RHSCst) return nullptr; 1901 | 1902 | if (LHSCst == RHSCst && LHSCC == RHSCC) { 1903 | // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) 1904 | if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { 1905 | Value *NewOr = Builder->CreateOr(Val, Val2); 1906 | return Builder->CreateICmp(LHSCC, NewOr, LHSCst); 1907 | } 1908 | } 1909 | 1910 | // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) 1911 | // iff C2 + CA == C1. 1912 | if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) { 1913 | ConstantInt *AddCst; 1914 | if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst)))) 1915 | if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue()) 1916 | return Builder->CreateICmpULE(Val, LHSCst); 1917 | } 1918 | 1919 | // From here on, we only handle: 1920 | // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. 1921 | if (Val != Val2) return nullptr; 1922 | 1923 | // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 1924 | if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 1925 | RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 1926 | LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 1927 | RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 1928 | return nullptr; 1929 | 1930 | // We can't fold (ugt x, C) | (sgt x, C2). 1931 | if (!PredicatesFoldable(LHSCC, RHSCC)) 1932 | return nullptr; 1933 | 1934 | // Ensure that the larger constant is on the RHS. 1935 | bool ShouldSwap; 1936 | if (CmpInst::isSigned(LHSCC) || 1937 | (ICmpInst::isEquality(LHSCC) && 1938 | CmpInst::isSigned(RHSCC))) 1939 | ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 1940 | else 1941 | ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 1942 | 1943 | if (ShouldSwap) { 1944 | std::swap(LHS, RHS); 1945 | std::swap(LHSCst, RHSCst); 1946 | std::swap(LHSCC, RHSCC); 1947 | } 1948 | 1949 | // At this point, we know we have two icmp instructions 1950 | // comparing a value against two constants and or'ing the result 1951 | // together. Because of the above check, we know that we only have 1952 | // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the 1953 | // icmp folding check above), that the two constants are not 1954 | // equal. 1955 | assert(LHSCst != RHSCst && "Compares not folded above?"); 1956 | 1957 | switch (LHSCC) { 1958 | default: llvm_unreachable("Unknown integer condition code!"); 1959 | case ICmpInst::ICMP_EQ: 1960 | switch (RHSCC) { 1961 | default: llvm_unreachable("Unknown integer condition code!"); 1962 | case ICmpInst::ICMP_EQ: 1963 | if (LHS->getOperand(0) == RHS->getOperand(0)) { 1964 | // if LHSCst and RHSCst differ only by one bit: 1965 | // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 1966 | assert(LHSCst->getValue().ule(LHSCst->getValue())); 1967 | 1968 | APInt Xor = LHSCst->getValue() ^ RHSCst->getValue(); 1969 | if (Xor.isPowerOf2()) { 1970 | Value *NegCst = Builder->getInt(~Xor); 1971 | Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst); 1972 | return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst); 1973 | } 1974 | } 1975 | 1976 | if (LHSCst == SubOne(RHSCst)) { 1977 | // (X == 13 | X == 14) -> X-13 CreateAdd(Val, AddCST, Val->getName()+".off"); 1980 | AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); 1981 | return Builder->CreateICmpULT(Add, AddCST); 1982 | } 1983 | 1984 | break; // (X == 13 | X == 15) -> no change 1985 | case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change 1986 | case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change 1987 | break; 1988 | case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 1989 | case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 1990 | case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 1991 | return RHS; 1992 | } 1993 | break; 1994 | case ICmpInst::ICMP_NE: 1995 | switch (RHSCC) { 1996 | default: llvm_unreachable("Unknown integer condition code!"); 1997 | case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 1998 | case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 1999 | case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 2000 | return LHS; 2001 | case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true 2002 | case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true 2003 | case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true 2004 | return Builder->getTrue(); 2005 | } 2006 | case ICmpInst::ICMP_ULT: 2007 | switch (RHSCC) { 2008 | default: llvm_unreachable("Unknown integer condition code!"); 2009 | case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change 2010 | break; 2011 | case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 2012 | // If RHSCst is [us]MAXINT, it is always false. Not handling 2013 | // this can cause overflow. 2014 | if (RHSCst->isMaxValue(false)) 2015 | return LHS; 2016 | return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false); 2017 | case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change 2018 | break; 2019 | case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 2020 | case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 2021 | return RHS; 2022 | case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change 2023 | break; 2024 | } 2025 | break; 2026 | case ICmpInst::ICMP_SLT: 2027 | switch (RHSCC) { 2028 | default: llvm_unreachable("Unknown integer condition code!"); 2029 | case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change 2030 | break; 2031 | case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 2032 | // If RHSCst is [us]MAXINT, it is always false. Not handling 2033 | // this can cause overflow. 2034 | if (RHSCst->isMaxValue(true)) 2035 | return LHS; 2036 | return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false); 2037 | case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change 2038 | break; 2039 | case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 2040 | case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 2041 | return RHS; 2042 | case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change 2043 | break; 2044 | } 2045 | break; 2046 | case ICmpInst::ICMP_UGT: 2047 | switch (RHSCC) { 2048 | default: llvm_unreachable("Unknown integer condition code!"); 2049 | case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 2050 | case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 2051 | return LHS; 2052 | case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change 2053 | break; 2054 | case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true 2055 | case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true 2056 | return Builder->getTrue(); 2057 | case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change 2058 | break; 2059 | } 2060 | break; 2061 | case ICmpInst::ICMP_SGT: 2062 | switch (RHSCC) { 2063 | default: llvm_unreachable("Unknown integer condition code!"); 2064 | case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 2065 | case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 2066 | return LHS; 2067 | case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change 2068 | break; 2069 | case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true 2070 | case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true 2071 | return Builder->getTrue(); 2072 | case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change 2073 | break; 2074 | } 2075 | break; 2076 | } 2077 | return nullptr; 2078 | } 2079 | 2080 | /// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns 2081 | /// a Value which should already be inserted into the function. 2082 | Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { 2083 | if (LHS->getPredicate() == FCmpInst::FCMP_UNO && 2084 | RHS->getPredicate() == FCmpInst::FCMP_UNO && 2085 | LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { 2086 | if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) 2087 | if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { 2088 | // If either of the constants are nans, then the whole thing returns 2089 | // true. 2090 | if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 2091 | return Builder->getTrue(); 2092 | 2093 | // Otherwise, no need to compare the two constants, compare the 2094 | // rest. 2095 | return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); 2096 | } 2097 | 2098 | // Handle vector zeros. This occurs because the canonical form of 2099 | // "fcmp uno x,x" is "fcmp uno x, 0". 2100 | if (isa(LHS->getOperand(1)) && 2101 | isa(RHS->getOperand(1))) 2102 | return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); 2103 | 2104 | return nullptr; 2105 | } 2106 | 2107 | Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 2108 | Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 2109 | FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 2110 | 2111 | if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 2112 | // Swap RHS operands to match LHS. 2113 | Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 2114 | std::swap(Op1LHS, Op1RHS); 2115 | } 2116 | if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 2117 | // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). 2118 | if (Op0CC == Op1CC) 2119 | return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); 2120 | if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) 2121 | return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); 2122 | if (Op0CC == FCmpInst::FCMP_FALSE) 2123 | return RHS; 2124 | if (Op1CC == FCmpInst::FCMP_FALSE) 2125 | return LHS; 2126 | bool Op0Ordered; 2127 | bool Op1Ordered; 2128 | unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 2129 | unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 2130 | if (Op0Ordered == Op1Ordered) { 2131 | // If both are ordered or unordered, return a new fcmp with 2132 | // or'ed predicates. 2133 | return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); 2134 | } 2135 | } 2136 | return nullptr; 2137 | } 2138 | 2139 | /// This helper function folds: 2140 | /// 2141 | /// ((A | B) & C1) | (B & C2) 2142 | /// 2143 | /// into: 2144 | /// 2145 | /// (A & C1) | B 2146 | /// 2147 | /// when the XOR of the two constants is "all ones" (-1). 2148 | Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, 2149 | Value *A, Value *B, Value *C) { 2150 | ConstantInt *CI1 = dyn_cast(C); 2151 | if (!CI1) return nullptr; 2152 | 2153 | Value *V1 = nullptr; 2154 | ConstantInt *CI2 = nullptr; 2155 | if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr; 2156 | 2157 | APInt Xor = CI1->getValue() ^ CI2->getValue(); 2158 | if (!Xor.isAllOnesValue()) return nullptr; 2159 | 2160 | if (V1 == A || V1 == B) { 2161 | Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); 2162 | return BinaryOperator::CreateOr(NewOp, V1); 2163 | } 2164 | 2165 | return nullptr; 2166 | } 2167 | 2168 | /// \brief This helper function folds: 2169 | /// 2170 | /// ((A | B) & C1) ^ (B & C2) 2171 | /// 2172 | /// into: 2173 | /// 2174 | /// (A & C1) ^ B 2175 | /// 2176 | /// when the XOR of the two constants is "all ones" (-1). 2177 | Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op, 2178 | Value *A, Value *B, Value *C) { 2179 | ConstantInt *CI1 = dyn_cast(C); 2180 | if (!CI1) 2181 | return nullptr; 2182 | 2183 | Value *V1 = nullptr; 2184 | ConstantInt *CI2 = nullptr; 2185 | if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) 2186 | return nullptr; 2187 | 2188 | APInt Xor = CI1->getValue() ^ CI2->getValue(); 2189 | if (!Xor.isAllOnesValue()) 2190 | return nullptr; 2191 | 2192 | if (V1 == A || V1 == B) { 2193 | Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1); 2194 | return BinaryOperator::CreateXor(NewOp, V1); 2195 | } 2196 | 2197 | return nullptr; 2198 | } 2199 | 2200 | Instruction *InstCombiner::visitOr(BinaryOperator &I) { 2201 | bool Changed = SimplifyAssociativeOrCommutative(I); 2202 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2203 | 2204 | if (Value *V = SimplifyVectorOp(I)) 2205 | return ReplaceInstUsesWith(I, V); 2206 | 2207 | if (Value *V = SimplifyOrInst(Op0, Op1, DL, TLI, DT, AC)) 2208 | return ReplaceInstUsesWith(I, V); 2209 | 2210 | // (A&B)|(A&C) -> A&(B|C) etc 2211 | if (Value *V = SimplifyUsingDistributiveLaws(I)) 2212 | return ReplaceInstUsesWith(I, V); 2213 | 2214 | // See if we can simplify any instructions used by the instruction whose sole 2215 | // purpose is to compute bits we don't care about. 2216 | if (SimplifyDemandedInstructionBits(I)) 2217 | return &I; 2218 | 2219 | if (Value *V = SimplifyBSwap(I)) 2220 | return ReplaceInstUsesWith(I, V); 2221 | 2222 | if (ConstantInt *RHS = dyn_cast(Op1)) { 2223 | ConstantInt *C1 = nullptr; Value *X = nullptr; 2224 | // (X & C1) | C2 --> (X | C2) & (C1|C2) 2225 | // iff (C1 & C2) == 0. 2226 | if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && 2227 | (RHS->getValue() & C1->getValue()) != 0 && 2228 | Op0->hasOneUse()) { 2229 | Value *Or = Builder->CreateOr(X, RHS); 2230 | Or->takeName(Op0); 2231 | return BinaryOperator::CreateAnd(Or, 2232 | Builder->getInt(RHS->getValue() | C1->getValue())); 2233 | } 2234 | 2235 | // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) 2236 | if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && 2237 | Op0->hasOneUse()) { 2238 | Value *Or = Builder->CreateOr(X, RHS); 2239 | Or->takeName(Op0); 2240 | return BinaryOperator::CreateXor(Or, 2241 | Builder->getInt(C1->getValue() & ~RHS->getValue())); 2242 | } 2243 | 2244 | // Try to fold constant and into select arguments. 2245 | if (SelectInst *SI = dyn_cast(Op0)) 2246 | if (Instruction *R = FoldOpIntoSelect(I, SI)) 2247 | return R; 2248 | 2249 | if (isa(Op0)) 2250 | if (Instruction *NV = FoldOpIntoPhi(I)) 2251 | return NV; 2252 | } 2253 | 2254 | Value *A = nullptr, *B = nullptr; 2255 | ConstantInt *C1 = nullptr, *C2 = nullptr; 2256 | 2257 | // (A | B) | C and A | (B | C) -> bswap if possible. 2258 | bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || 2259 | match(Op1, m_Or(m_Value(), m_Value())); 2260 | // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. 2261 | bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && 2262 | match(Op1, m_LogicalShift(m_Value(), m_Value())); 2263 | // (A & B) | (C & D) -> bswap if possible. 2264 | bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && 2265 | match(Op1, m_And(m_Value(), m_Value())); 2266 | 2267 | if (OrOfOrs || OrOfShifts || OrOfAnds) 2268 | if (Instruction *BSwap = MatchBSwap(I)) 2269 | return BSwap; 2270 | 2271 | // (X^C)|Y -> (X|Y)^C iff Y&C == 0 2272 | if (Op0->hasOneUse() && 2273 | match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && 2274 | MaskedValueIsZero(Op1, C1->getValue(), 0, &I)) { 2275 | Value *NOr = Builder->CreateOr(A, Op1); 2276 | NOr->takeName(Op0); 2277 | return BinaryOperator::CreateXor(NOr, C1); 2278 | } 2279 | 2280 | // Y|(X^C) -> (X|Y)^C iff Y&C == 0 2281 | if (Op1->hasOneUse() && 2282 | match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && 2283 | MaskedValueIsZero(Op0, C1->getValue(), 0, &I)) { 2284 | Value *NOr = Builder->CreateOr(A, Op0); 2285 | NOr->takeName(Op0); 2286 | return BinaryOperator::CreateXor(NOr, C1); 2287 | } 2288 | 2289 | // ((~A & B) | A) -> (A | B) 2290 | if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) && 2291 | match(Op1, m_Specific(A))) 2292 | return BinaryOperator::CreateOr(A, B); 2293 | 2294 | // ((A & B) | ~A) -> (~A | B) 2295 | if (match(Op0, m_And(m_Value(A), m_Value(B))) && 2296 | match(Op1, m_Not(m_Specific(A)))) 2297 | return BinaryOperator::CreateOr(Builder->CreateNot(A), B); 2298 | 2299 | // (A & (~B)) | (A ^ B) -> (A ^ B) 2300 | if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && 2301 | match(Op1, m_Xor(m_Specific(A), m_Specific(B)))) 2302 | return BinaryOperator::CreateXor(A, B); 2303 | 2304 | // (A ^ B) | ( A & (~B)) -> (A ^ B) 2305 | if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && 2306 | match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) 2307 | return BinaryOperator::CreateXor(A, B); 2308 | 2309 | // (A & C)|(B & D) 2310 | Value *C = nullptr, *D = nullptr; 2311 | if (match(Op0, m_And(m_Value(A), m_Value(C))) && 2312 | match(Op1, m_And(m_Value(B), m_Value(D)))) { 2313 | Value *V1 = nullptr, *V2 = nullptr; 2314 | C1 = dyn_cast(C); 2315 | C2 = dyn_cast(D); 2316 | if (C1 && C2) { // (A & C1)|(B & C2) 2317 | if ((C1->getValue() & C2->getValue()) == 0) { 2318 | // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) 2319 | // iff (C1&C2) == 0 and (N&~C1) == 0 2320 | if (match(A, m_Or(m_Value(V1), m_Value(V2))) && 2321 | ((V1 == B && 2322 | MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N) 2323 | (V2 == B && 2324 | MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V) 2325 | return BinaryOperator::CreateAnd(A, 2326 | Builder->getInt(C1->getValue()|C2->getValue())); 2327 | // Or commutes, try both ways. 2328 | if (match(B, m_Or(m_Value(V1), m_Value(V2))) && 2329 | ((V1 == A && 2330 | MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N) 2331 | (V2 == A && 2332 | MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V) 2333 | return BinaryOperator::CreateAnd(B, 2334 | Builder->getInt(C1->getValue()|C2->getValue())); 2335 | 2336 | // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) 2337 | // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. 2338 | ConstantInt *C3 = nullptr, *C4 = nullptr; 2339 | if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && 2340 | (C3->getValue() & ~C1->getValue()) == 0 && 2341 | match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && 2342 | (C4->getValue() & ~C2->getValue()) == 0) { 2343 | V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); 2344 | return BinaryOperator::CreateAnd(V2, 2345 | Builder->getInt(C1->getValue()|C2->getValue())); 2346 | } 2347 | } 2348 | } 2349 | 2350 | // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. 2351 | // Don't do this for vector select idioms, the code generator doesn't handle 2352 | // them well yet. 2353 | if (!I.getType()->isVectorTy()) { 2354 | if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) 2355 | return Match; 2356 | if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) 2357 | return Match; 2358 | if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) 2359 | return Match; 2360 | if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) 2361 | return Match; 2362 | } 2363 | 2364 | // ((A&~B)|(~A&B)) -> A^B 2365 | if ((match(C, m_Not(m_Specific(D))) && 2366 | match(B, m_Not(m_Specific(A))))) 2367 | return BinaryOperator::CreateXor(A, D); 2368 | // ((~B&A)|(~A&B)) -> A^B 2369 | if ((match(A, m_Not(m_Specific(D))) && 2370 | match(B, m_Not(m_Specific(C))))) 2371 | return BinaryOperator::CreateXor(C, D); 2372 | // ((A&~B)|(B&~A)) -> A^B 2373 | if ((match(C, m_Not(m_Specific(B))) && 2374 | match(D, m_Not(m_Specific(A))))) 2375 | return BinaryOperator::CreateXor(A, B); 2376 | // ((~B&A)|(B&~A)) -> A^B 2377 | if ((match(A, m_Not(m_Specific(B))) && 2378 | match(D, m_Not(m_Specific(C))))) 2379 | return BinaryOperator::CreateXor(C, B); 2380 | 2381 | // ((A|B)&1)|(B&-2) -> (A&1) | B 2382 | if (match(A, m_Or(m_Value(V1), m_Specific(B))) || 2383 | match(A, m_Or(m_Specific(B), m_Value(V1)))) { 2384 | Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); 2385 | if (Ret) return Ret; 2386 | } 2387 | // (B&-2)|((A|B)&1) -> (A&1) | B 2388 | if (match(B, m_Or(m_Specific(A), m_Value(V1))) || 2389 | match(B, m_Or(m_Value(V1), m_Specific(A)))) { 2390 | Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); 2391 | if (Ret) return Ret; 2392 | } 2393 | // ((A^B)&1)|(B&-2) -> (A&1) ^ B 2394 | if (match(A, m_Xor(m_Value(V1), m_Specific(B))) || 2395 | match(A, m_Xor(m_Specific(B), m_Value(V1)))) { 2396 | Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C); 2397 | if (Ret) return Ret; 2398 | } 2399 | // (B&-2)|((A^B)&1) -> (A&1) ^ B 2400 | if (match(B, m_Xor(m_Specific(A), m_Value(V1))) || 2401 | match(B, m_Xor(m_Value(V1), m_Specific(A)))) { 2402 | Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D); 2403 | if (Ret) return Ret; 2404 | } 2405 | } 2406 | 2407 | // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C 2408 | if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) 2409 | if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) 2410 | if (Op1->hasOneUse() || cast(Op1)->hasOneUse()) 2411 | return BinaryOperator::CreateOr(Op0, C); 2412 | 2413 | // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C 2414 | if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) 2415 | if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) 2416 | if (Op0->hasOneUse() || cast(Op0)->hasOneUse()) 2417 | return BinaryOperator::CreateOr(Op1, C); 2418 | 2419 | // ((B | C) & A) | B -> B | (A & C) 2420 | if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) 2421 | return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C)); 2422 | 2423 | if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) 2424 | return DeMorgan; 2425 | 2426 | // Canonicalize xor to the RHS. 2427 | bool SwappedForXor = false; 2428 | if (match(Op0, m_Xor(m_Value(), m_Value()))) { 2429 | std::swap(Op0, Op1); 2430 | SwappedForXor = true; 2431 | } 2432 | 2433 | // A | ( A ^ B) -> A | B 2434 | // A | (~A ^ B) -> A | ~B 2435 | // (A & B) | (A ^ B) 2436 | if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { 2437 | if (Op0 == A || Op0 == B) 2438 | return BinaryOperator::CreateOr(A, B); 2439 | 2440 | if (match(Op0, m_And(m_Specific(A), m_Specific(B))) || 2441 | match(Op0, m_And(m_Specific(B), m_Specific(A)))) 2442 | return BinaryOperator::CreateOr(A, B); 2443 | 2444 | if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { 2445 | Value *Not = Builder->CreateNot(B, B->getName()+".not"); 2446 | return BinaryOperator::CreateOr(Not, Op0); 2447 | } 2448 | if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { 2449 | Value *Not = Builder->CreateNot(A, A->getName()+".not"); 2450 | return BinaryOperator::CreateOr(Not, Op0); 2451 | } 2452 | } 2453 | 2454 | // A | ~(A | B) -> A | ~B 2455 | // A | ~(A ^ B) -> A | ~B 2456 | if (match(Op1, m_Not(m_Value(A)))) 2457 | if (BinaryOperator *B = dyn_cast(A)) 2458 | if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) && 2459 | Op1->hasOneUse() && (B->getOpcode() == Instruction::Or || 2460 | B->getOpcode() == Instruction::Xor)) { 2461 | Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : 2462 | B->getOperand(0); 2463 | Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); 2464 | return BinaryOperator::CreateOr(Not, Op0); 2465 | } 2466 | 2467 | // (A & B) | ((~A) ^ B) -> (~A ^ B) 2468 | if (match(Op0, m_And(m_Value(A), m_Value(B))) && 2469 | match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B)))) 2470 | return BinaryOperator::CreateXor(Builder->CreateNot(A), B); 2471 | 2472 | // ((~A) ^ B) | (A & B) -> (~A ^ B) 2473 | if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) && 2474 | match(Op1, m_And(m_Specific(A), m_Specific(B)))) 2475 | return BinaryOperator::CreateXor(Builder->CreateNot(A), B); 2476 | 2477 | if (SwappedForXor) 2478 | std::swap(Op0, Op1); 2479 | 2480 | { 2481 | ICmpInst *LHS = dyn_cast(Op0); 2482 | ICmpInst *RHS = dyn_cast(Op1); 2483 | if (LHS && RHS) 2484 | if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) 2485 | return ReplaceInstUsesWith(I, Res); 2486 | 2487 | // TODO: Make this recursive; it's a little tricky because an arbitrary 2488 | // number of 'or' instructions might have to be created. 2489 | Value *X, *Y; 2490 | if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { 2491 | if (auto *Cmp = dyn_cast(X)) 2492 | if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) 2493 | return ReplaceInstUsesWith(I, Builder->CreateOr(Res, Y)); 2494 | if (auto *Cmp = dyn_cast(Y)) 2495 | if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) 2496 | return ReplaceInstUsesWith(I, Builder->CreateOr(Res, X)); 2497 | } 2498 | if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { 2499 | if (auto *Cmp = dyn_cast(X)) 2500 | if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) 2501 | return ReplaceInstUsesWith(I, Builder->CreateOr(Res, Y)); 2502 | if (auto *Cmp = dyn_cast(Y)) 2503 | if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) 2504 | return ReplaceInstUsesWith(I, Builder->CreateOr(Res, X)); 2505 | } 2506 | } 2507 | 2508 | // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) 2509 | if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) 2510 | if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) 2511 | if (Value *Res = FoldOrOfFCmps(LHS, RHS)) 2512 | return ReplaceInstUsesWith(I, Res); 2513 | 2514 | // fold (or (cast A), (cast B)) -> (cast (or A, B)) 2515 | if (CastInst *Op0C = dyn_cast(Op0)) { 2516 | CastInst *Op1C = dyn_cast(Op1); 2517 | if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? 2518 | Type *SrcTy = Op0C->getOperand(0)->getType(); 2519 | if (SrcTy == Op1C->getOperand(0)->getType() && 2520 | SrcTy->isIntOrIntVectorTy()) { 2521 | Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); 2522 | 2523 | if ((!isa(Op0COp) || !isa(Op1COp)) && 2524 | // Only do this if the casts both really cause code to be 2525 | // generated. 2526 | ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && 2527 | ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { 2528 | Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); 2529 | return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2530 | } 2531 | 2532 | // If this is or(cast(icmp), cast(icmp)), try to fold this even if the 2533 | // cast is otherwise not optimizable. This happens for vector sexts. 2534 | if (ICmpInst *RHS = dyn_cast(Op1COp)) 2535 | if (ICmpInst *LHS = dyn_cast(Op0COp)) 2536 | if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) 2537 | return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); 2538 | 2539 | // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the 2540 | // cast is otherwise not optimizable. This happens for vector sexts. 2541 | if (FCmpInst *RHS = dyn_cast(Op1COp)) 2542 | if (FCmpInst *LHS = dyn_cast(Op0COp)) 2543 | if (Value *Res = FoldOrOfFCmps(LHS, RHS)) 2544 | return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); 2545 | } 2546 | } 2547 | } 2548 | 2549 | // or(sext(A), B) -> A ? -1 : B where A is an i1 2550 | // or(A, sext(B)) -> B ? -1 : A where B is an i1 2551 | if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) 2552 | return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); 2553 | if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) 2554 | return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); 2555 | 2556 | // Note: If we've gotten to the point of visiting the outer OR, then the 2557 | // inner one couldn't be simplified. If it was a constant, then it won't 2558 | // be simplified by a later pass either, so we try swapping the inner/outer 2559 | // ORs in the hopes that we'll be able to simplify it this way. 2560 | // (X|C) | V --> (X|V) | C 2561 | if (Op0->hasOneUse() && !isa(Op1) && 2562 | match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { 2563 | Value *Inner = Builder->CreateOr(A, Op1); 2564 | Inner->takeName(Op0); 2565 | return BinaryOperator::CreateOr(Inner, C1); 2566 | } 2567 | 2568 | // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) 2569 | // Since this OR statement hasn't been optimized further yet, we hope 2570 | // that this transformation will allow the new ORs to be optimized. 2571 | { 2572 | Value *X = nullptr, *Y = nullptr; 2573 | if (Op0->hasOneUse() && Op1->hasOneUse() && 2574 | match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && 2575 | match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { 2576 | Value *orTrue = Builder->CreateOr(A, C); 2577 | Value *orFalse = Builder->CreateOr(B, D); 2578 | return SelectInst::Create(X, orTrue, orFalse); 2579 | } 2580 | } 2581 | 2582 | return Changed ? &I : nullptr; 2583 | } 2584 | 2585 | Instruction *InstCombiner::visitXor(BinaryOperator &I) { 2586 | bool Changed = SimplifyAssociativeOrCommutative(I); 2587 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2588 | 2589 | if (Value *V = SimplifyVectorOp(I)) 2590 | return ReplaceInstUsesWith(I, V); 2591 | 2592 | if (Value *V = SimplifyXorInst(Op0, Op1, DL, TLI, DT, AC)) 2593 | return ReplaceInstUsesWith(I, V); 2594 | 2595 | // (A&B)^(A&C) -> A&(B^C) etc 2596 | if (Value *V = SimplifyUsingDistributiveLaws(I)) 2597 | return ReplaceInstUsesWith(I, V); 2598 | 2599 | // See if we can simplify any instructions used by the instruction whose sole 2600 | // purpose is to compute bits we don't care about. 2601 | if (SimplifyDemandedInstructionBits(I)) 2602 | return &I; 2603 | 2604 | if (Value *V = SimplifyBSwap(I)) 2605 | return ReplaceInstUsesWith(I, V); 2606 | 2607 | // Is this a ~ operation? 2608 | if (Value *NotOp = dyn_castNotVal(&I)) { 2609 | if (BinaryOperator *Op0I = dyn_cast(NotOp)) { 2610 | if (Op0I->getOpcode() == Instruction::And || 2611 | Op0I->getOpcode() == Instruction::Or) { 2612 | // ~(~X & Y) --> (X | ~Y) - De Morgan's Law 2613 | // ~(~X | Y) === (X & ~Y) - De Morgan's Law 2614 | if (dyn_castNotVal(Op0I->getOperand(1))) 2615 | Op0I->swapOperands(); 2616 | if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { 2617 | Value *NotY = 2618 | Builder->CreateNot(Op0I->getOperand(1), 2619 | Op0I->getOperand(1)->getName()+".not"); 2620 | if (Op0I->getOpcode() == Instruction::And) 2621 | return BinaryOperator::CreateOr(Op0NotVal, NotY); 2622 | return BinaryOperator::CreateAnd(Op0NotVal, NotY); 2623 | } 2624 | 2625 | // ~(X & Y) --> (~X | ~Y) - De Morgan's Law 2626 | // ~(X | Y) === (~X & ~Y) - De Morgan's Law 2627 | if (IsFreeToInvert(Op0I->getOperand(0), 2628 | Op0I->getOperand(0)->hasOneUse()) && 2629 | IsFreeToInvert(Op0I->getOperand(1), 2630 | Op0I->getOperand(1)->hasOneUse())) { 2631 | Value *NotX = 2632 | Builder->CreateNot(Op0I->getOperand(0), "notlhs"); 2633 | Value *NotY = 2634 | Builder->CreateNot(Op0I->getOperand(1), "notrhs"); 2635 | if (Op0I->getOpcode() == Instruction::And) 2636 | return BinaryOperator::CreateOr(NotX, NotY); 2637 | return BinaryOperator::CreateAnd(NotX, NotY); 2638 | } 2639 | 2640 | } else if (Op0I->getOpcode() == Instruction::AShr) { 2641 | // ~(~X >>s Y) --> (X >>s Y) 2642 | if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) 2643 | return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1)); 2644 | } 2645 | } 2646 | } 2647 | 2648 | if (Constant *RHS = dyn_cast(Op1)) { 2649 | if (RHS->isAllOnesValue() && Op0->hasOneUse()) 2650 | // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B 2651 | if (CmpInst *CI = dyn_cast(Op0)) 2652 | return CmpInst::Create(CI->getOpcode(), 2653 | CI->getInversePredicate(), 2654 | CI->getOperand(0), CI->getOperand(1)); 2655 | } 2656 | 2657 | if (ConstantInt *RHS = dyn_cast(Op1)) { 2658 | // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). 2659 | if (CastInst *Op0C = dyn_cast(Op0)) { 2660 | if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { 2661 | if (CI->hasOneUse() && Op0C->hasOneUse()) { 2662 | Instruction::CastOps Opcode = Op0C->getOpcode(); 2663 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && 2664 | (RHS == ConstantExpr::getCast(Opcode, Builder->getTrue(), 2665 | Op0C->getDestTy()))) { 2666 | CI->setPredicate(CI->getInversePredicate()); 2667 | return CastInst::Create(Opcode, CI, Op0C->getType()); 2668 | } 2669 | } 2670 | } 2671 | } 2672 | 2673 | if (BinaryOperator *Op0I = dyn_cast(Op0)) { 2674 | // ~(c-X) == X-c-1 == X+(-c-1) 2675 | if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) 2676 | if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { 2677 | Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); 2678 | Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, 2679 | ConstantInt::get(I.getType(), 1)); 2680 | return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); 2681 | } 2682 | 2683 | if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { 2684 | if (Op0I->getOpcode() == Instruction::Add) { 2685 | // ~(X-c) --> (-c-1)-X 2686 | if (RHS->isAllOnesValue()) { 2687 | Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); 2688 | return BinaryOperator::CreateSub( 2689 | ConstantExpr::getSub(NegOp0CI, 2690 | ConstantInt::get(I.getType(), 1)), 2691 | Op0I->getOperand(0)); 2692 | } else if (RHS->getValue().isSignBit()) { 2693 | // (X + C) ^ signbit -> (X + C + signbit) 2694 | Constant *C = Builder->getInt(RHS->getValue() + Op0CI->getValue()); 2695 | return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); 2696 | 2697 | } 2698 | } else if (Op0I->getOpcode() == Instruction::Or) { 2699 | // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 2700 | if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue(), 2701 | 0, &I)) { 2702 | Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); 2703 | // Anything in both C1 and C2 is known to be zero, remove it from 2704 | // NewRHS. 2705 | Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); 2706 | NewRHS = ConstantExpr::getAnd(NewRHS, 2707 | ConstantExpr::getNot(CommonBits)); 2708 | Worklist.Add(Op0I); 2709 | I.setOperand(0, Op0I->getOperand(0)); 2710 | I.setOperand(1, NewRHS); 2711 | return &I; 2712 | } 2713 | } else if (Op0I->getOpcode() == Instruction::LShr) { 2714 | // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) 2715 | // E1 = "X ^ C1" 2716 | BinaryOperator *E1; 2717 | ConstantInt *C1; 2718 | if (Op0I->hasOneUse() && 2719 | (E1 = dyn_cast(Op0I->getOperand(0))) && 2720 | E1->getOpcode() == Instruction::Xor && 2721 | (C1 = dyn_cast(E1->getOperand(1)))) { 2722 | // fold (C1 >> C2) ^ C3 2723 | ConstantInt *C2 = Op0CI, *C3 = RHS; 2724 | APInt FoldConst = C1->getValue().lshr(C2->getValue()); 2725 | FoldConst ^= C3->getValue(); 2726 | // Prepare the two operands. 2727 | Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); 2728 | Opnd0->takeName(Op0I); 2729 | cast(Opnd0)->setDebugLoc(I.getDebugLoc()); 2730 | Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); 2731 | 2732 | return BinaryOperator::CreateXor(Opnd0, FoldVal); 2733 | } 2734 | } 2735 | } 2736 | } 2737 | 2738 | // Try to fold constant and into select arguments. 2739 | if (SelectInst *SI = dyn_cast(Op0)) 2740 | if (Instruction *R = FoldOpIntoSelect(I, SI)) 2741 | return R; 2742 | if (isa(Op0)) 2743 | if (Instruction *NV = FoldOpIntoPhi(I)) 2744 | return NV; 2745 | } 2746 | 2747 | BinaryOperator *Op1I = dyn_cast(Op1); 2748 | if (Op1I) { 2749 | Value *A, *B; 2750 | if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { 2751 | if (A == Op0) { // B^(B|A) == (A|B)^B 2752 | Op1I->swapOperands(); 2753 | I.swapOperands(); 2754 | std::swap(Op0, Op1); 2755 | } else if (B == Op0) { // B^(A|B) == (A|B)^B 2756 | I.swapOperands(); // Simplified below. 2757 | std::swap(Op0, Op1); 2758 | } 2759 | } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 2760 | Op1I->hasOneUse()){ 2761 | if (A == Op0) { // A^(A&B) -> A^(B&A) 2762 | Op1I->swapOperands(); 2763 | std::swap(A, B); 2764 | } 2765 | if (B == Op0) { // A^(B&A) -> (B&A)^A 2766 | I.swapOperands(); // Simplified below. 2767 | std::swap(Op0, Op1); 2768 | } 2769 | } 2770 | } 2771 | 2772 | BinaryOperator *Op0I = dyn_cast(Op0); 2773 | if (Op0I) { 2774 | Value *A, *B; 2775 | if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 2776 | Op0I->hasOneUse()) { 2777 | if (A == Op1) // (B|A)^B == (A|B)^B 2778 | std::swap(A, B); 2779 | if (B == Op1) // (A|B)^B == A & ~B 2780 | return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); 2781 | } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 2782 | Op0I->hasOneUse()){ 2783 | if (A == Op1) // (A&B)^A -> (B&A)^A 2784 | std::swap(A, B); 2785 | if (B == Op1 && // (B&A)^A == ~B & A 2786 | !isa(Op1)) { // Canonical form is (B&C)^C 2787 | return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); 2788 | } 2789 | } 2790 | } 2791 | 2792 | if (Op0I && Op1I) { 2793 | Value *A, *B, *C, *D; 2794 | // (A & B)^(A | B) -> A ^ B 2795 | if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 2796 | match(Op1I, m_Or(m_Value(C), m_Value(D)))) { 2797 | if ((A == C && B == D) || (A == D && B == C)) 2798 | return BinaryOperator::CreateXor(A, B); 2799 | } 2800 | /*** Added code here for transformation (A | (B ^ C)) ^ ((A ^ C) ^ B) -> (A & (B ^ C)) ***/ 2801 | 2802 | if (match(Op0I, m_Or(m_Xor(m_Value(B), m_Value(C)), m_Value(A))) 2803 | && match(Op1I, m_Xor( m_Xor(m_Specific(A), m_Specific(C)), m_Specific(B)))) { 2804 | return BinaryOperator::CreateAnd(A, Builder->CreateXor(B,C)); 2805 | } 2806 | 2807 | /**** End ***/ 2808 | 2809 | // (A | B)^(A & B) -> A ^ B 2810 | if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 2811 | match(Op1I, m_And(m_Value(C), m_Value(D)))) { 2812 | if ((A == C && B == D) || (A == D && B == C)) 2813 | return BinaryOperator::CreateXor(A, B); 2814 | } 2815 | // (A | ~B) ^ (~A | B) -> A ^ B 2816 | if (match(Op0I, m_Or(m_Value(A), m_Not(m_Value(B)))) && 2817 | match(Op1I, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) { 2818 | return BinaryOperator::CreateXor(A, B); 2819 | } 2820 | // (~A | B) ^ (A | ~B) -> A ^ B 2821 | if (match(Op0I, m_Or(m_Not(m_Value(A)), m_Value(B))) && 2822 | match(Op1I, m_Or(m_Specific(A), m_Not(m_Specific(B))))) { 2823 | return BinaryOperator::CreateXor(A, B); 2824 | } 2825 | // (A & ~B) ^ (~A & B) -> A ^ B 2826 | if (match(Op0I, m_And(m_Value(A), m_Not(m_Value(B)))) && 2827 | match(Op1I, m_And(m_Not(m_Specific(A)), m_Specific(B)))) { 2828 | return BinaryOperator::CreateXor(A, B); 2829 | } 2830 | // (~A & B) ^ (A & ~B) -> A ^ B 2831 | if (match(Op0I, m_And(m_Not(m_Value(A)), m_Value(B))) && 2832 | match(Op1I, m_And(m_Specific(A), m_Not(m_Specific(B))))) { 2833 | return BinaryOperator::CreateXor(A, B); 2834 | } 2835 | // (A ^ C)^(A | B) -> ((~A) & B) ^ C 2836 | if (match(Op0I, m_Xor(m_Value(D), m_Value(C))) && 2837 | match(Op1I, m_Or(m_Value(A), m_Value(B)))) { 2838 | if (D == A) 2839 | return BinaryOperator::CreateXor( 2840 | Builder->CreateAnd(Builder->CreateNot(A), B), C); 2841 | if (D == B) 2842 | return BinaryOperator::CreateXor( 2843 | Builder->CreateAnd(Builder->CreateNot(B), A), C); 2844 | } 2845 | // (A | B)^(A ^ C) -> ((~A) & B) ^ C 2846 | if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 2847 | match(Op1I, m_Xor(m_Value(D), m_Value(C)))) { 2848 | if (D == A) 2849 | return BinaryOperator::CreateXor( 2850 | Builder->CreateAnd(Builder->CreateNot(A), B), C); 2851 | if (D == B) 2852 | return BinaryOperator::CreateXor( 2853 | Builder->CreateAnd(Builder->CreateNot(B), A), C); 2854 | } 2855 | // (A & B) ^ (A ^ B) -> (A | B) 2856 | if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 2857 | match(Op1I, m_Xor(m_Specific(A), m_Specific(B)))) 2858 | return BinaryOperator::CreateOr(A, B); 2859 | // (A ^ B) ^ (A & B) -> (A | B) 2860 | if (match(Op0I, m_Xor(m_Value(A), m_Value(B))) && 2861 | match(Op1I, m_And(m_Specific(A), m_Specific(B)))) 2862 | return BinaryOperator::CreateOr(A, B); 2863 | } 2864 | 2865 | Value *A = nullptr, *B = nullptr; 2866 | // (A & ~B) ^ (~A) -> ~(A & B) 2867 | if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && 2868 | match(Op1, m_Not(m_Specific(A)))) 2869 | return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); 2870 | 2871 | // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) 2872 | if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) 2873 | if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) 2874 | if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { 2875 | if (LHS->getOperand(0) == RHS->getOperand(1) && 2876 | LHS->getOperand(1) == RHS->getOperand(0)) 2877 | LHS->swapOperands(); 2878 | if (LHS->getOperand(0) == RHS->getOperand(0) && 2879 | LHS->getOperand(1) == RHS->getOperand(1)) { 2880 | Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); 2881 | unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); 2882 | bool isSigned = LHS->isSigned() || RHS->isSigned(); 2883 | return ReplaceInstUsesWith(I, 2884 | getNewICmpValue(isSigned, Code, Op0, Op1, 2885 | Builder)); 2886 | } 2887 | } 2888 | 2889 | // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) 2890 | if (CastInst *Op0C = dyn_cast(Op0)) { 2891 | if (CastInst *Op1C = dyn_cast(Op1)) 2892 | if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? 2893 | Type *SrcTy = Op0C->getOperand(0)->getType(); 2894 | if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && 2895 | // Only do this if the casts both really cause code to be generated. 2896 | ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 2897 | I.getType()) && 2898 | ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 2899 | I.getType())) { 2900 | Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), 2901 | Op1C->getOperand(0), I.getName()); 2902 | return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2903 | } 2904 | } 2905 | } 2906 | 2907 | return Changed ? &I : nullptr; 2908 | } 2909 | -------------------------------------------------------------------------------- /Chapter_04_code/LLVMFnNamePrint.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elongbug/llvm-essentials-book/15df77ca2796e4077157698254d30c688b5138e8/Chapter_04_code/LLVMFnNamePrint.so -------------------------------------------------------------------------------- /Chapter_04_code/Makefile: -------------------------------------------------------------------------------- 1 | ##===- lib/Transforms/Makefile -----------------------------*- Makefile -*-===## 2 | # 3 | # The LLVM Compiler Infrastructure 4 | # 5 | # This file is distributed under the University of Illinois Open Source 6 | # License. See LICENSE.TXT for details. 7 | # 8 | ##===----------------------------------------------------------------------===## 9 | 10 | LEVEL = ../.. 11 | PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC FnNamePrint 12 | 13 | include $(LEVEL)/Makefile.config 14 | 15 | # No support for plugins on windows targets 16 | ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix)) 17 | PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) 18 | endif 19 | 20 | include $(LEVEL)/Makefile.common 21 | -------------------------------------------------------------------------------- /Chapter_04_code/Readme.txt: -------------------------------------------------------------------------------- 1 | Using opt tool: 2 | 3 | Use the opt tool in the code bundle to compile the testcase test.ll in code bundle 4 | with various optimization levels: 5 | 6 | 7 | $ opt -O1 -S test.ll > test_out_O1.ll 8 | $ opt -O2 -S test.ll > test_out_O2.ll 9 | 10 | *************************************************** 11 | 12 | Pass and Pass Manager 13 | 14 | 15 | For the pass to work, place the FnNamePrint folder in llvm/lib/Transforms folder and also replace the CMakeList.txt and Makefile with the given CmakeList.txt and Makefile in the code bundle. 16 | It will generate LLVMFnNamePrint.so in llvm/build-folder/lib, also opt is built 17 | 18 | To run it on the testfile use the command with opt tool present in the code bundle. 19 | Use the .so file present in the code bundle: 20 | 21 | $ opt -load LLVMFnNamePrint.so -funcnameprint test.ll 22 | 23 | 24 | *************************************************** 25 | Use -debug-pass=Structure option of opt tool: 26 | 27 | $ opt -O2 -S test.ll -debug-pass=Structure 28 | 29 | 30 | *************************************************** 31 | Instruction Combining example 32 | 33 | Relace the InstCombineAndOrXor.cpp file in llvm/lib/Transforms/InstCombine folder 34 | with the one present in the code bundle and build LLVM. You will get the new opt 35 | tool with this optimization enabled. This opt tools is present in the code bundle. Use the following command to see instruction combining at work: 36 | 37 | $ opt -S -instcombine instcombinetc.ll -o instcombineoutput.ll 38 | 39 | $ cat instcombineoutput.ll 40 | 41 | *************************************************** 42 | -------------------------------------------------------------------------------- /Chapter_04_code/instcombineoutput.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'instcombineoutput.ll' 2 | 3 | define i32 @0(i32 %x, i32 %y, i32 %z) { 4 | %1 = xor i32 %y, %z 5 | %res = and i32 %1, %x 6 | ret i32 %res 7 | } 8 | -------------------------------------------------------------------------------- /Chapter_04_code/instcombinetc.ll: -------------------------------------------------------------------------------- 1 | define i32 @testfunc(i32 %x, i32 %y, i32 %z) { 2 | %xor1 = xor i32 %y, %z 3 | %or = or i32 %x, %xor1 4 | %xor2 = xor i32 %x, %z 5 | %xor3 = xor i32 %xor2, %y 6 | %res = xor i32 %or, %xor3 7 | ret i32 %res 8 | } 9 | -------------------------------------------------------------------------------- /Chapter_04_code/opt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elongbug/llvm-essentials-book/15df77ca2796e4077157698254d30c688b5138e8/Chapter_04_code/opt -------------------------------------------------------------------------------- /Chapter_04_code/test.ll: -------------------------------------------------------------------------------- 1 | 2 | define internal i32 @test(i32* %X, i32* %Y) { 3 | %A = load i32, i32* %X 4 | %B = load i32, i32* %Y 5 | %C = add i32 %A, %B 6 | ret i32 %C 7 | } 8 | 9 | define internal i32 @caller(i32* %B) { 10 | %A = alloca i32 11 | store i32 1, i32* %A 12 | %C = call i32 @test(i32* %A, i32* %B) 13 | ret i32 %C 14 | } 15 | 16 | define i32 @callercaller() { 17 | %B = alloca i32 18 | store i32 2, i32* %B 19 | %X = call i32 @caller(i32* %B) 20 | ret i32 %X 21 | } 22 | 23 | -------------------------------------------------------------------------------- /Chapter_04_code/test_out_O1.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'argprom1.ll' 2 | 3 | ; Function Attrs: nounwind readonly 4 | define internal fastcc i32 @test(i32* nocapture readonly %X, i32* nocapture readonly %Y) #0 { 5 | %A = load i32, i32* %X, align 4 6 | %B = load i32, i32* %Y, align 4 7 | %C = add i32 %B, %A 8 | ret i32 %C 9 | } 10 | 11 | ; Function Attrs: nounwind readonly 12 | define internal fastcc i32 @caller(i32* nocapture readonly %B) #0 { 13 | %A = alloca i32, align 4 14 | store i32 1, i32* %A, align 4 15 | %C = call fastcc i32 @test(i32* nonnull %A, i32* %B) 16 | ret i32 %C 17 | } 18 | 19 | ; Function Attrs: nounwind readonly 20 | define i32 @callercaller() #0 { 21 | %B = alloca i32, align 4 22 | store i32 2, i32* %B, align 4 23 | %X = call fastcc i32 @caller(i32* nonnull %B) 24 | ret i32 %X 25 | } 26 | 27 | attributes #0 = { nounwind readonly } 28 | -------------------------------------------------------------------------------- /Chapter_04_code/test_out_O2.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'argprom1.ll' 2 | 3 | ; Function Attrs: nounwind readnone 4 | define i32 @callercaller() #0 { 5 | ret i32 3 6 | } 7 | 8 | attributes #0 = { nounwind readnone } 9 | -------------------------------------------------------------------------------- /Chapter_05_code/README: -------------------------------------------------------------------------------- 1 | To execute the sample LLVM IR with opt binary on Linux Machine 2 | 1. Unzip the zip file 3 | 2. Add permissions to opt binary 4 | $ chmod 777 opt 5 | 3. Run the opt tool on hadd.ll 6 | $ ./opt -basicaa -slp-vectorizer -mtriple=aarch64-unknown-linuxgnu -mcpu=cortex-a57 7 | -------------------------------------------------------------------------------- /Chapter_05_code/Readme.txt: -------------------------------------------------------------------------------- 1 | LICM Testcase 2 | 3 | $ cat licm.ll 4 | define void @func(i32 %i) { 5 | br label %Loop 6 | Loop: 7 | %j = phi i32 [ 0, %0 ], [ %Val, %Loop ] 8 | %i2 = mul i32 %i, 17 9 | %Val = add i32 %j, %i2 10 | %cond = icmp eq i32 %Val, 0 11 | br i1 %cond, label %Exit, label %Loop 12 | Exit: 13 | ret void 14 | } 15 | 16 | run the licm pass using opt: 17 | $ opt -licm licm.ll -o licm.bc 18 | 19 | convert .bc file to readable IR format: 20 | $ llvm-dis licm.bc -o licm_opt.ll 21 | 22 | View the output: 23 | $ cat licm_opt.ll 24 | ; ModuleID = 'licm.bc' 25 | 26 | define void @func(i32 %i) { 27 | %i2 = mul i32 %i, 17 28 | br label %Loop 29 | 30 | Loop: ; preds = %Loop, %0 31 | %j = phi i32 [ 0, %0 ], [ %Val, %Loop ] 32 | %Val = add i32 %j, %i2 33 | %cond = icmp eq i32 %Val, 0 34 | br i1 %cond, label %Exit, label %Loop 35 | 36 | Exit: ; preds = %Loop 37 | ret void 38 | } 39 | 40 | 41 | **************************************************************************** 42 | 43 | 44 | Scalar Evolution: 45 | 46 | Testcase file scalev1.ll 47 | 48 | $ cat scalev1.ll 49 | define void @fun() { 50 | entry: 51 | br label %header 52 | header: 53 | %i = phi i32 [ 1, %entry ], [ %i.next, %body ] 54 | %cond = icmp eq i32 %i, 10 55 | br i1 %cond, label %exit, label %body 56 | body: 57 | %a = mul i32 %i, 5 58 | %b = or i32 %a, 1 59 | %i.next = add i32 %i, 1 60 | br label %header 61 | exit: 62 | ret void 63 | } 64 | 65 | 66 | Run scalar evolution pass on testcase: 67 | $ opt -analyze -scalar-evolution scalev1.ll 68 | Printing analysis 'Scalar Evolution Analysis' for function 'fun': 69 | Classifying expressions for: @fun 70 | %i = phi i32 [ 1, %entry ], [ %i.next, %body ] 71 | --> {1,+,1}<%header> U: [1,11) S: [1,11) Exits: 10 72 | %a = mul i32 %i, 5 73 | --> {5,+,5}<%header> U: [5,51) S: [5,51) Exits: 50 74 | %b = or i32 %a, 1 75 | --> %b U: [1,0) S: full-set Exits: 51 76 | %i.next = add i32 %i, 1 77 | --> {2,+,1}<%header> U: [2,12) S: [2,12) Exits: 11 78 | Determining loop execution counts for: @fun 79 | Loop %header: backedge-taken count is 9 80 | Loop %header: max backedge-taken count is 9 81 | 82 | 83 | ********************************************************************** 84 | 85 | 86 | LLVM Intrinsic Example: 87 | 88 | Testcode in intrinsic.cpp file: 89 | $ cat intrinsic.cpp 90 | int func() 91 | { 92 | int a[5]; 93 | 94 | for (int i = 0; i != 5; ++i) 95 | a[i] = 0; 96 | 97 | return a[0]; 98 | } 99 | 100 | 101 | convert testcode to .ll form 102 | $ clang -emit-llvm -S intrinsic.cpp 103 | 104 | view the testcode in readable IR form. 105 | $ cat intrinsic.ll 106 | ; ModuleID = 'intrinsic.cpp' 107 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 108 | target triple = "x86_64-unknown-linux-gnu" 109 | 110 | ; Function Attrs: nounwind uwtable 111 | define i32 @_Z4funcv() #0 { 112 | %a = alloca [5 x i32], align 16 113 | %i = alloca i32, align 4 114 | store i32 0, i32* %i, align 4 115 | br label %1 116 | 117 | ;