├── .clang-format ├── .dockerignore ├── .github ├── scripts │ └── abort_previous_workflows.py └── workflows │ └── build.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CMakePresets.json ├── Dockerfile ├── LICENSE ├── README.md ├── anvillConfig.cmake.in ├── bin ├── CMakeLists.txt └── Decompile │ ├── CMakeLists.txt │ ├── Main.cpp │ ├── README.md │ └── tests │ ├── CMakeLists.txt │ ├── broken_cases │ ├── assert.c │ ├── binja_var_none_type.c │ ├── branch.c │ ├── cast.c │ ├── funcptr.c │ ├── loop.c │ ├── nested_struct.c │ └── sum.c │ ├── cases │ ├── array_swap.c │ ├── binops.c │ ├── bitops.c │ ├── init_list.c │ ├── inttoptr.c │ ├── nullptr.c │ ├── ret0.c │ ├── struct.c │ ├── struct_swap.c │ ├── trunc.c │ ├── zeroinit.c │ └── zext.c │ └── scripts │ └── roundtrip.py ├── ci ├── BinaryNinja-headless.zip.gpg ├── angha_1k_test_settings.json ├── challenge_bins_test_settings.json ├── install_binja.sh ├── install_clang.sh ├── license.txt.gpg └── switcher.py ├── cmake ├── ccache.cmake ├── git_watcher.cmake ├── modules │ ├── FindXED.cmake │ ├── Findgflags.cmake │ ├── Findglog.cmake │ ├── Findremill.cmake │ └── utils.cmake ├── options.cmake ├── packaging.cmake └── system.cmake ├── data_specifications └── specification.proto ├── docs ├── CodingStyle.md ├── SpecificationFormat.md └── TypeEncoding.md ├── include └── anvill │ ├── ABI.h │ ├── AnvillFunction.h │ ├── CrossReferenceFolder.h │ ├── CrossReferenceResolver.h │ ├── Declarations.h │ ├── Lifters.h │ ├── Optimize.h │ ├── Passes │ ├── BranchAnalysis.h │ ├── BranchHintPass.h │ ├── BranchRecovery.h │ ├── CodeQualityStatCollector.h │ ├── CombineAdjacentShifts.h │ ├── ConstraintExtractor.h │ ├── Constraints.h │ ├── ConvertAddressesToEntityUses.h │ ├── ConvertMasksToCasts.h │ ├── ConvertSymbolicReturnAddressToConcreteReturnAddress.h │ ├── ConvertXorsToCmps.h │ ├── HoistUsersOfSelectsAndPhis.h │ ├── IndirectJumpPass.h │ ├── IntrinsicPass.h │ ├── JumpTableAnalysis.h │ ├── LowerRemillMemoryAccessIntrinsics.h │ ├── LowerRemillUndefinedIntrinsics.h │ ├── LowerSwitchIntrinsics.h │ ├── LowerTypeHintIntrinsics.h │ ├── RecoverBasicStackFrame.h │ ├── RemoveCompilerBarriers.h │ ├── RemoveDelaySlotIntrinsics.h │ ├── RemoveErrorIntrinsics.h │ ├── RemoveRemillFunctionReturns.h │ ├── RemoveStackPointerCExprs.h │ ├── RemoveTrivialPhisAndSelects.h │ ├── RemoveUnusedBranchHints.h │ ├── RemoveUnusedFPClassificationCalls.h │ ├── SinkSelectionsIntoBranchTargets.h │ ├── SliceInterpreter.h │ ├── SliceManager.h │ ├── SplitStackFrameAtReturnAddress.h │ ├── SpreadPCMetadata.h │ └── TransformRemillJumpIntrinsics.h │ ├── Providers.h │ ├── Result.h │ ├── Specification.h │ ├── Transforms.h │ ├── Type.h │ ├── Utils.h │ └── Version.h ├── lib ├── ABI.cpp ├── Arch │ ├── AArch32_C.cpp │ ├── AArch64_C.cpp │ ├── AllocationState.cpp │ ├── AllocationState.h │ ├── Arch.cpp │ ├── Arch.h │ ├── SPARC32_C.cpp │ ├── SPARC64_C.cpp │ ├── X86_64_SysV.cpp │ ├── X86_C.cpp │ ├── X86_FastCall.cpp │ ├── X86_StdCall.cpp │ └── X86_ThisCall.cpp ├── CMakeLists.txt ├── CrossReferenceFolder.cpp ├── CrossReferenceResolver.cpp ├── Declarations.cpp ├── Lifters │ ├── DataLifter.cpp │ ├── DataLifter.h │ ├── EntityLifter.cpp │ ├── EntityLifter.h │ ├── FunctionLifter.cpp │ ├── FunctionLifter.h │ ├── Options.cpp │ ├── ValueLifter.cpp │ └── ValueLifter.h ├── Optimize.cpp ├── Passes │ ├── BranchAnalysis.cpp │ ├── BranchRecovery.cpp │ ├── CodeQualityStatCollector.cpp │ ├── CombineAdjacentShifts.cpp │ ├── Constraints.cpp │ ├── ConvertAddressesToEntityUses.cpp │ ├── ConvertMasksToCasts.cpp │ ├── ConvertSymbolicReturnAddressToConcreteReturnAddress.cpp │ ├── ConvertXorsToCmps.cpp │ ├── HoistUsersOfSelectsAndPhis.cpp │ ├── JumpTableAnalysis.cpp │ ├── LowerRemillMemoryAccessIntrinsics.cpp │ ├── LowerRemillUndefinedIntrinsics.cpp │ ├── LowerSwitchIntrinsics.cpp │ ├── LowerTypeHintIntrinsics.cpp │ ├── RecoverBasicStackFrame.cpp │ ├── RemoveCompilerBarriers.cpp │ ├── RemoveDelaySlotIntrinsics.cpp │ ├── RemoveErrorIntrinsics.cpp │ ├── RemoveRemillFunctionReturns.cpp │ ├── RemoveStackPointerCExprs.cpp │ ├── RemoveStackPointerCExprs.h │ ├── RemoveTrivialPhisAndSelects.cpp │ ├── RemoveUnusedBranchHints.cpp │ ├── RemoveUnusedFPClassificationCalls.cpp │ ├── SimplifyStackArithFlags.cpp │ ├── SimplifyStackArithFlags.h │ ├── SinkSelectionsIntoBranchTargets.cpp │ ├── SliceInterpreter.cpp │ ├── SliceManager.cpp │ ├── SlicerVisitor.cpp │ ├── SlicerVisitor.h │ ├── SplitStackFrameAtReturnAddress.cpp │ ├── SpreadPCMetadata.cpp │ ├── TransformRemillJumpIntrinsics.cpp │ ├── Utils.cpp │ └── Utils.h ├── Protobuf.cpp ├── Protobuf.h ├── Providers │ ├── ControlFlowProvider.cpp │ ├── MemoryProvider.cpp │ └── TypeProvider.cpp ├── Specification.cpp ├── Specification.h ├── Transforms.cpp ├── Type.cpp ├── Utils.cpp └── Version.cpp.in ├── libraries ├── CMakeLists.txt └── version │ └── CMakeLists.txt ├── packaging ├── README.md ├── cmake │ ├── dispatcher.cmake │ └── system │ │ ├── linux │ │ └── generators │ │ │ ├── deb.cmake │ │ │ ├── rpm.cmake │ │ │ └── tgz.cmake │ │ └── macos │ │ └── generators │ │ └── tgz.cmake └── main.cmake ├── parse_cspec ├── Pipfile ├── Pipfile.lock ├── example_output │ ├── x86-64gcc.json │ └── x86gcc.json └── src │ └── main.py ├── plugins └── ida │ └── anvill.py ├── python ├── CMakeLists.txt ├── anvill │ ├── __init__.py │ ├── __main__.py │ ├── arch.py │ ├── binja │ │ ├── __init__.py │ │ ├── bnfunction.py │ │ ├── bninstruction.py │ │ ├── bnprogram.py │ │ ├── bnvariable.py │ │ ├── callingconvention.py │ │ ├── table.py │ │ ├── typecache.py │ │ └── xreftype.py │ ├── call.py │ ├── exc.py │ ├── function.py │ ├── ida │ │ ├── __init__.py │ │ ├── idafunction.py │ │ ├── idaprogram.py │ │ ├── idavariable.py │ │ └── utils.py │ ├── imageparser │ │ ├── __init__.py │ │ └── elfparser.py │ ├── loc.py │ ├── logging.ini │ ├── mem.py │ ├── os.py │ ├── program.py │ ├── type.py │ ├── util.py │ └── var.py └── install.cmake ├── scripts ├── build-preset.sh ├── build.sh ├── docker-spec-entrypoint.sh ├── emit-artifact.sh ├── format-added-files ├── format-files ├── generate_changelog.sh ├── run-on-anghabench.sh ├── test-amp-challenge-bins.sh └── test-angha-1k.sh ├── setup.py └── tests ├── CMakeLists.txt ├── anvill_passes ├── CMakeLists.txt ├── data │ ├── BaseFunctionPass.ll │ ├── BranchRecoveryAarch64.ll │ ├── InstructionFolderPass.ll │ ├── RecoverStackFrameInformation.ll │ ├── RecoverSubBranch.ll │ ├── RegressionRecoverStack.ll │ ├── SinkSelectionsIntoBranchTargets.ll │ ├── SplitStackFrameAtReturnAddress.ll │ ├── SwitchLoweringLarge.ll │ ├── SwitchLoweringNeg.ll │ ├── TestingUnresolvedEntity.ll │ ├── TransformRemillJumpData0.ll │ ├── TransformRemillJumpData1.ll │ ├── TransformRemillJumpDataARM32_0.ll │ ├── TransformRemillJumpDataARM32_1.ll │ ├── UnrecoverableBranch.ll │ ├── chall2.ll │ ├── gep_add.ll │ ├── jmp0.ll │ ├── loop_test.ll │ ├── maybe_proof.smt │ ├── multiple_bitcast.ll │ ├── proof_result.smt │ ├── ret0.ll │ ├── rx_message.ll │ ├── test_array_swap_rt.ll │ ├── test_binja_var_none_type_rt.ll │ ├── test_binops_rt.ll │ ├── test_bitops_rt.ll │ ├── test_cast_rt.ll │ ├── test_init_list_rt.ll │ ├── test_inttoptr_rt.ll │ ├── test_nullptr_rt.ll │ ├── test_ret0_rt.ll │ ├── test_rx.ll │ ├── test_struct_rt.ll │ ├── test_struct_swap_rt.ll │ ├── test_trunc_rt.ll │ ├── test_zeroinit_rt.ll │ ├── test_zext_rt.ll │ ├── xor_conversion.ll │ ├── xor_conversion_nochange.ll │ ├── xor_removal.ll │ └── xor_removal_noremove.ll └── src │ ├── BranchRecoveryPass.cpp │ ├── BrightenPointers.cpp │ ├── InstructionFolderPass.cpp │ ├── RecoverEntityUses.cpp │ ├── RecoverStackFrameInformation.cpp │ ├── RemoveStackPointerCExprs.cpp │ ├── SinkSelectionsIntoBranchTargets.cpp │ ├── SplitStackFrameAtReturnAddress.cpp │ ├── SwitchLoweringPass.cpp │ ├── TransformRemillJump.cpp │ ├── Utils.cpp │ ├── Utils.h │ ├── XorConversionPass.cpp │ └── main.cpp └── tools ├── CMakeLists.txt └── src ├── Result.cpp ├── TypeSpecification.cpp └── main.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | AccessModifierOffset: '-1' 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveMacros: 'false' 6 | AlignConsecutiveAssignments: 'false' 7 | AlignConsecutiveDeclarations: 'false' 8 | AlignEscapedNewlines: DontAlign 9 | AlignOperands: 'true' 10 | AlignTrailingComments: 'false' 11 | AllowAllArgumentsOnNextLine: 'true' 12 | AllowAllConstructorInitializersOnNextLine: 'false' 13 | AllowAllParametersOfDeclarationOnNextLine: 'true' 14 | AllowShortBlocksOnASingleLine: 'false' 15 | AllowShortCaseLabelsOnASingleLine: 'true' 16 | AllowShortFunctionsOnASingleLine: Empty 17 | AllowShortIfStatementsOnASingleLine: Never 18 | AllowShortLambdasOnASingleLine: All 19 | AllowShortLoopsOnASingleLine: 'false' 20 | AlwaysBreakAfterReturnType: None 21 | AlwaysBreakTemplateDeclarations: 'Yes' 22 | BinPackParameters: 'true' 23 | BreakBeforeBinaryOperators: None 24 | BreakBeforeBraces: Custom 25 | BreakConstructorInitializers: BeforeColon 26 | BreakInheritanceList: BeforeColon 27 | BreakStringLiterals: 'false' 28 | ColumnLimit: '80' 29 | CompactNamespaces: 'false' 30 | ConstructorInitializerAllOnOneLineOrOnePerLine: 'true' 31 | ConstructorInitializerIndentWidth: '4' 32 | ContinuationIndentWidth: '4' 33 | Cpp11BracedListStyle: 'true' 34 | DerivePointerAlignment: 'false' 35 | FixNamespaceComments: 'true' 36 | IncludeBlocks: Regroup 37 | IndentCaseLabels: 'true' 38 | IndentPPDirectives: AfterHash 39 | IndentWidth: '2' 40 | IndentWrappedFunctionNames: 'false' 41 | KeepEmptyLinesAtTheStartOfBlocks: 'true' 42 | Language: Cpp 43 | MaxEmptyLinesToKeep: '2' 44 | NamespaceIndentation: None 45 | PointerAlignment: Right 46 | ReflowComments: 'false' 47 | SortIncludes: 'true' 48 | SortUsingDeclarations: 'true' 49 | SpaceAfterCStyleCast: 'true' 50 | SpaceAfterLogicalNot: 'false' 51 | SpaceAfterTemplateKeyword: 'true' 52 | SpaceBeforeAssignmentOperators: 'true' 53 | SpaceBeforeCtorInitializerColon: 'true' 54 | SpaceBeforeInheritanceColon: 'true' 55 | SpaceBeforeParens: ControlStatements 56 | SpaceBeforeRangeBasedForLoopColon: 'true' 57 | SpaceInEmptyParentheses: 'false' 58 | SpacesBeforeTrailingComments: '2' 59 | SpacesInAngles: 'false' 60 | SpacesInCStyleCastParentheses: 'false' 61 | SpacesInContainerLiterals: 'false' 62 | SpacesInParentheses: 'false' 63 | SpacesInSquareBrackets: 'false' 64 | Standard: Cpp11 65 | TabWidth: '2' 66 | UseTab: Never 67 | PenaltyReturnTypeOnItsOwnLine: '40' 68 | 69 | ... 70 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | build 2 | Dockerfile* 3 | .travis.yml 4 | .github* 5 | LICENSE 6 | README.md 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # Python 35 | .idea 36 | *.pyc 37 | anvill.egg-info/* 38 | dist/* 39 | build/* 40 | anvill/python/anvill.egg-info/* 41 | anvill/python/build/* 42 | anvill/python/dist/* 43 | python/anvill.egg-info/* 44 | python/dist/* 45 | python/build/* 46 | 47 | #ignore vscode dir 48 | .vscode 49 | 50 | #ignore generated version files 51 | VERSION 52 | 53 | #do not commit decrypted binja/license 54 | ci/license.txt 55 | ci/BinaryNinja-headless.zip 56 | 57 | # Ignore z3 run info 58 | .z3-trace 59 | 60 | #build logs 61 | anvill-build.log 62 | anvill-configure.log 63 | 64 | # build artifacts 65 | anvill-build 66 | .cache 67 | 68 | # compile commands 69 | compile_commands.json -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libraries/lifting-tools-ci"] 2 | path = libraries/lifting-tools-ci 3 | url = https://github.com/lifting-bits/lifting-tools-ci.git 4 | branch = master 5 | [submodule "remill"] 6 | path = remill 7 | url = https://github.com/lifting-bits/remill.git 8 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | cmake_minimum_required(VERSION 3.19) 10 | 11 | include("cmake/options.cmake") 12 | include("cmake/ccache.cmake") 13 | 14 | project("anvill") 15 | 16 | include(GNUInstallDirs) 17 | include("cmake/system.cmake") 18 | 19 | if(ANVILL_ENABLE_INSTALL) 20 | include("cmake/packaging.cmake") 21 | endif(ANVILL_ENABLE_INSTALL) 22 | 23 | add_subdirectory("libraries") 24 | 25 | find_package(XED CONFIG REQUIRED) 26 | find_package(gflags CONFIG REQUIRED) 27 | find_package(glog CONFIG REQUIRED) 28 | find_package(Z3 CONFIG REQUIRED) 29 | find_package(doctest CONFIG REQUIRED) 30 | find_package(LLVM CONFIG REQUIRED) 31 | llvm_map_components_to_libnames(llvm_libs support core irreader bitreader bitwriter) 32 | 33 | find_package(sleigh CONFIG) 34 | find_package(remill CONFIG REQUIRED) 35 | 36 | if(ANVILL_ENABLE_INSTALL) 37 | export(PACKAGE "${PROJECT_NAME}") 38 | 39 | set(cmake_install_dir "lib/cmake/${PROJECT_NAME}") 40 | 41 | include(CMakePackageConfigHelpers) 42 | configure_package_config_file("${PROJECT_NAME}Config.cmake.in" 43 | "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" 44 | INSTALL_DESTINATION "${cmake_install_dir}" 45 | ) 46 | 47 | install( 48 | FILES 49 | "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" 50 | DESTINATION "${cmake_install_dir}" 51 | ) 52 | install(EXPORT "${PROJECT_NAME}Targets" 53 | DESTINATION "${cmake_install_dir}" 54 | NAMESPACE "${PROJECT_NAME}::" 55 | ) 56 | endif(ANVILL_ENABLE_INSTALL) 57 | 58 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 59 | set(CMAKE_CXX_STANDARD 17) 60 | set(CMAKE_CXX_EXTENSIONS OFF) 61 | 62 | if(ANVILL_ENABLE_TESTS) 63 | message(STATUS "anvill: Tests have been enabled") 64 | enable_testing() 65 | else() 66 | message(STATUS "anvill: Tests are not enabled") 67 | endif(ANVILL_ENABLE_TESTS) 68 | 69 | add_subdirectory("lib") 70 | add_subdirectory("bin") 71 | 72 | if(ANVILL_ENABLE_TESTS) 73 | add_subdirectory("tests") 74 | endif() 75 | -------------------------------------------------------------------------------- /anvillConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | @PACKAGE_INIT@ 10 | 11 | cmake_minimum_required(VERSION 3.2) 12 | 13 | if(NOT TARGET anvill) 14 | 15 | include(CMakeFindDependencyMacro) 16 | find_dependency(remill) 17 | 18 | # Exported Targets 19 | include("${CMAKE_CURRENT_LIST_DIR}/anvillTargets.cmake") 20 | 21 | endif() 22 | -------------------------------------------------------------------------------- /bin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | add_subdirectory("Decompile") 10 | -------------------------------------------------------------------------------- /bin/Decompile/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | add_executable(anvill-decompile-spec 10 | "Main.cpp" 11 | ) 12 | 13 | set_target_properties(anvill-decompile-spec 14 | PROPERTIES 15 | LINKER_LANGUAGE CXX 16 | ) 17 | 18 | target_link_libraries(anvill-decompile-spec PRIVATE 19 | anvill 20 | ) 21 | 22 | # if(ANVILL_ENABLE_TESTS) 23 | # add_subdirectory("tests") 24 | # endif(ANVILL_ENABLE_TESTS) 25 | if(ANVILL_ENABLE_INSTALL) 26 | install( 27 | TARGETS 28 | anvill-decompile-spec 29 | 30 | EXPORT 31 | anvillTargets 32 | 33 | RUNTIME DESTINATION 34 | bin 35 | ) 36 | endif(ANVILL_ENABLE_INSTALL) 37 | -------------------------------------------------------------------------------- /bin/Decompile/README.md: -------------------------------------------------------------------------------- 1 | # anvill-decompile-spec 2 | 3 | ## TODO 4 | 5 | Insert tool description here 6 | -------------------------------------------------------------------------------- /bin/Decompile/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | message(STATUS "anvill: TBD: Ghidra-generated specification tests") 10 | -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/assert.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | unsigned long a = 1; 4 | 5 | int main(void) { 6 | assert(a % 3); 7 | assert(a % 7); 8 | assert(a % 15); 9 | 10 | return 0; 11 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/binja_var_none_type.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This test causes BinaryNinja to produce Variables with None type 3 | * This should be handled in extract_types 4 | */ 5 | void xor_swap(unsigned char buf[]) { 6 | buf[0] = buf[0] ^ buf[1]; 7 | buf[1] = buf[0] ^ buf[1]; 8 | buf[0] = buf[0] ^ buf[1]; 9 | } 10 | //Simple atoi 11 | unsigned char atoi(const char * s) { 12 | return s[0] - '0'; 13 | } 14 | int main(int argc, const char *argv[]) { 15 | unsigned char buf[3] = {1, 0, 11}; 16 | int buff_size = sizeof(buf)/sizeof(buf[0]); 17 | for(int i = 1; i < argc && i < buff_size; i++) { 18 | buf[i - 1] = (unsigned char)atoi(argv[i]); 19 | } 20 | xor_swap(buf); 21 | return buf[0]; 22 | } 23 | -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/branch.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | unsigned a = 0; 4 | unsigned c = 1; 5 | 6 | int main(void) 7 | { 8 | long b = (long)&a; 9 | long d = (long)&c; 10 | 11 | if (c) { 12 | printf("Global variable 'a' of value %u is at ", a); 13 | if (b % 2 == 0) 14 | printf("even "); 15 | else 16 | printf("odd "); 17 | } else { 18 | printf("Global variable 'c' of value %u is at ", c); 19 | if (d % 2 == 0) 20 | printf("even "); 21 | else 22 | printf("odd "); 23 | } 24 | 25 | printf("address.\n"); 26 | 27 | return 0; 28 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/cast.c: -------------------------------------------------------------------------------- 1 | int a = 0; 2 | int main(void) { 3 | return (long long) &a; 4 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/funcptr.c: -------------------------------------------------------------------------------- 1 | int add(int a, int b) { return a + b; } 2 | 3 | int sub(int a, int b) { return a - b; } 4 | 5 | int x = 1; 6 | 7 | int main(void) { 8 | int (*func)(int, int); 9 | if (x) { 10 | func = add; 11 | } else { 12 | func = sub; 13 | } 14 | return func(2, 2); 15 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/loop.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(void) 4 | { 5 | for (unsigned b = 0; b != 10; ++b) { 6 | printf("Variable at %d is ", b); 7 | if (b % 2 == 0) 8 | printf("even.\n"); 9 | else 10 | printf("odd.\n"); 11 | } 12 | 13 | return 0; 14 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/nested_struct.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct _pair { 4 | int first; 5 | int second; 6 | }; 7 | 8 | struct _person { 9 | const char *name; 10 | char age; 11 | }; 12 | 13 | struct _record { 14 | int a; 15 | struct _pair b; 16 | struct _person c; 17 | }; 18 | 19 | struct _record r1 = {14, {33, 42}, {"Bob", 66}}; 20 | 21 | int main(void) { 22 | printf("Name: %s", r1.c.name); 23 | return r1.b.second; 24 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/broken_cases/sum.c: -------------------------------------------------------------------------------- 1 | unsigned int foo(unsigned int a, unsigned int b) { 2 | unsigned int sum = 0; 3 | for (unsigned int i = 0; i != 42; i++) { 4 | sum += a; 5 | sum %= b; 6 | } 7 | return sum; 8 | } 9 | 10 | int main() { 11 | return foo(1, 200); 12 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/array_swap.c: -------------------------------------------------------------------------------- 1 | int a[2] = {0, 42}; 2 | 3 | int main(void) { 4 | int b = a[0]; 5 | a[0] = a[1]; 6 | a[1] = b; 7 | return a[0]; 8 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/binops.c: -------------------------------------------------------------------------------- 1 | unsigned int target(unsigned int n) { 2 | unsigned int mod = n % 4; 3 | unsigned int result = 0; 4 | 5 | if (mod == 0) { 6 | result = (n | 0xbaaad0bf) * (2 ^ n); 7 | } else if (mod == 1) { 8 | result = (n & 0xbaaad0bf) * (3 + n); 9 | } else if (mod == 2) { 10 | result = (n ^ 0xbaaad0bf) * (4 | n); 11 | } else { 12 | result = (n + 0xbaaad0bf) * (5 & n); 13 | } 14 | 15 | return result; 16 | } 17 | 18 | int main(void) { 19 | return target(0xdeadbeef); 20 | } 21 | -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/bitops.c: -------------------------------------------------------------------------------- 1 | int a = 0xFF; 2 | 3 | int main(void) { 4 | a = (unsigned int)a >> 7; 5 | return a & 1; 6 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/init_list.c: -------------------------------------------------------------------------------- 1 | unsigned a[5] = {0, 1, 2, 3, 4}; 2 | 3 | int main(void) { return a[1]; } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/inttoptr.c: -------------------------------------------------------------------------------- 1 | #include 2 | unsigned long a = 0xDEADBEEF; 3 | int main(void) { 4 | int *b = (int *)a; 5 | if (b != NULL) { 6 | return 42; 7 | } else { 8 | return 0; 9 | } 10 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/nullptr.c: -------------------------------------------------------------------------------- 1 | int *ptr = ((void*)0); 2 | 3 | int main(void) { 4 | if (ptr == ((void*)0)) { 5 | return 0; 6 | } else { 7 | return 1; 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/ret0.c: -------------------------------------------------------------------------------- 1 | int f(void) { 2 | return 1; 3 | } 4 | int main(void) { 5 | return f(); 6 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/struct.c: -------------------------------------------------------------------------------- 1 | struct _pair { 2 | int first; 3 | int second; 4 | }; 5 | 6 | struct _pair a = {0, 42}; 7 | 8 | int main(void) { 9 | if (a.first) { 10 | return a.first; 11 | } else { 12 | return a.second; 13 | } 14 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/struct_swap.c: -------------------------------------------------------------------------------- 1 | struct _pair { 2 | int first; 3 | int second; 4 | }; 5 | 6 | struct _pair a = {0, 42}; 7 | 8 | int main(void) { 9 | int b = a.first; 10 | a.first = a.second; 11 | a.second = b; 12 | return a.first; 13 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/trunc.c: -------------------------------------------------------------------------------- 1 | unsigned long long a = -1; 2 | int main(void) { 3 | return a; 4 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/zeroinit.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct _pair { 4 | int first; 5 | int second; 6 | }; 7 | 8 | struct _person { 9 | const char *name; 10 | char age; 11 | }; 12 | 13 | struct _record { 14 | int a; 15 | struct _pair b; 16 | struct _person c; 17 | }; 18 | 19 | struct _record r1 = {}; 20 | long long a1[256] = {}; 21 | 22 | int main(void) { 23 | if (r1.b.first == 0) 24 | r1.b.first = 1; 25 | else 26 | r1.b.first = 3; 27 | 28 | if (a1[42] == 0) 29 | a1[42] = 2; 30 | else 31 | a1[42] = 4; 32 | 33 | return r1.b.first + a1[42]; 34 | } -------------------------------------------------------------------------------- /bin/Decompile/tests/cases/zext.c: -------------------------------------------------------------------------------- 1 | unsigned char a = 1; 2 | int main(void) { 3 | return (unsigned int)a; 4 | } -------------------------------------------------------------------------------- /ci/BinaryNinja-headless.zip.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lifting-bits/anvill/9948d26cd993952d6010a59f27a198cbe3c79c1d/ci/BinaryNinja-headless.zip.gpg -------------------------------------------------------------------------------- /ci/angha_1k_test_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "tests.ignore": [ 3 | "x86/SoftEtherVPN/src/See/extr_memory_t.h_SW_LONG_AT", 4 | "amd64/SoftEtherVPN/src/See/extr_memory_t.h_SW_LONG_AT", 5 | "armv7/SoftEtherVPN/src/See/extr_memory_t.h_SW_LONG_AT", 6 | "amd64/python/success/reactos/drivers/filesystems/ext2/src/extr_linux.c_wait_queue_create.elf/output.json", 7 | "x86/python/success/reactos/drivers/filesystems/ext2/src/extr_linux.c_wait_queue_create.elf/output.json", 8 | "armv7/python/success/reactos/sdk/lib/crt/misc/extr_getargs.c_aexpand.elf/output.json", 9 | "armv7/python/success/FFmpeg/libavformat/extr_libmodplug.c_modplug_read_packet.elf/output.json" 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /ci/challenge_bins_test_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeout.seconds": "800", 3 | "tests.ignore": [ 4 | "challenge-3_amd64_program_go_patched.elf/output.json", 5 | "challenge-3_amd64_program_go.elf/output.json", 6 | "challenge-3_x86_program_go_patched.elf/output.json", 7 | "challenge-3_x86_program_go.elf/output.json", 8 | "challenge-3_arm64_program_go_patched.elf/output.json", 9 | "challenge-3_arm64_program_go.elf/output.json", 10 | "challenge-3_armv7_program_go_patched.elf/output.json", 11 | "challenge-3_armv7_program_go.elf/output.json", 12 | "challenge-3_arm64_program_go_patched.elf", 13 | "challenge-3_arm64_program_go.elf", 14 | "challenge-3_armv7_program_go_patched.elf", 15 | "challenge-3_armv7_program_go.elf", 16 | "challenge-3_amd64_program_go_patched.elf", 17 | "challenge-3_amd64_program_go.elf", 18 | "challenge-3_x86_program_go_patched.elf", 19 | "challenge-3_x86_program_go.elf" 20 | ] 21 | } -------------------------------------------------------------------------------- /ci/install_binja.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 3 | 4 | set -euo pipefail 5 | 6 | # Decrypt any CI secrets 7 | function decrypt { 8 | if [[ -f ${1} ]] 9 | then 10 | echo "Skipping ${1}; already decrypted" 11 | return 0 12 | fi 13 | 14 | if [[ -f ${1}.gpg ]] 15 | then 16 | echo "Decrypting file: ${1}.gpg" 17 | gpg --quiet --batch --yes --decrypt \ 18 | --passphrase="${BINJA_DECODE_KEY}" \ 19 | --output "${1}" "${1}.gpg" 20 | else 21 | echo "Could not find file: ${1}.gpg" 22 | return 1 23 | fi 24 | } 25 | 26 | EXTRACT_DIR=${VIRTUAL_ENV:-"/opt/vector35/binaryninja"} 27 | mkdir -p "${EXTRACT_DIR}" 28 | 29 | echo "Decrypting Binja..." 30 | decrypt ${DIR}/BinaryNinja-headless.zip 31 | echo "Decrypting license..." 32 | decrypt ${DIR}/license.txt 33 | 34 | #Run this from the bmef root directory and it will install Binja for you 35 | unzip ${DIR}/BinaryNinja-headless.zip -d "${EXTRACT_DIR}" 36 | mkdir -p ~/.binaryninja/ 37 | cp ${DIR}/license.txt ~/.binaryninja/license.dat 38 | chmod +x "${EXTRACT_DIR}/binaryninja/scripts/linux-setup.sh" 39 | "${EXTRACT_DIR}/binaryninja/scripts/linux-setup.sh" -s -d -m -l &> /dev/null 40 | # virtual env, use -v; if not don't use it 41 | python3 "${EXTRACT_DIR}/binaryninja/scripts/install_api.py" ${VIRTUAL_ENV+"-v"} 42 | echo "API install done" 43 | -------------------------------------------------------------------------------- /ci/install_clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | export DEBIAN_FRONTEND=noninteractive 5 | 6 | V="" 7 | case ${LLVM_VERSION} in 8 | llvm80*) 9 | V=8 10 | ;; 11 | llvm90*) 12 | V=9 13 | ;; 14 | llvm100*) 15 | V=10 16 | ;; 17 | llvm110*) 18 | V=11 19 | ;; 20 | *) 21 | echo "Unknown or unsupported LLVM version: ${LLVM_VERSION}" 22 | exit 1 23 | ;; 24 | esac 25 | 26 | function install_from_llvm() { 27 | echo "Could not install default clang-${V}" 28 | echo "Attempting to install it from LLVM apt repo" 29 | apt-get install -qqy lsb-release wget software-properties-common &>/dev/null 30 | wget https://apt.llvm.org/llvm.sh 31 | chmod +x ./llvm.sh 32 | ./llvm.sh ${V} 33 | rm -f llvm.sh 34 | } 35 | 36 | apt-get update &> /dev/null 37 | apt-get install -qqy clang-${V} &> /dev/null || install_from_llvm 38 | -------------------------------------------------------------------------------- /ci/license.txt.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lifting-bits/anvill/9948d26cd993952d6010a59f27a198cbe3c79c1d/ci/license.txt.gpg -------------------------------------------------------------------------------- /ci/switcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | from binaryninja.update import UpdateChannel, set_auto_updates_enabled, is_update_installation_pending, install_pending_update 5 | from binaryninja import core_version 6 | import argparse 7 | import sys 8 | 9 | chandefault = list(UpdateChannel)[0].name 10 | channel = None 11 | versions = [] 12 | 13 | 14 | def main(): 15 | prs = argparse.ArgumentParser("Binja Version Switcher") 16 | prs.add_argument('--version_string', type=str) 17 | prs.add_argument('channel_string', metavar='C', type=str) 18 | 19 | args = prs.parse_args() 20 | 21 | 22 | cname = set([chan.name for chan in list(UpdateChannel)]) 23 | if args.channel_string not in cname: 24 | sys.exit(f"Invalid channel name: {args.channel_string}, options are {list(cname)}") 25 | 26 | channel = UpdateChannel[args.channel_string] 27 | 28 | if args.channel_string is None: 29 | channel.update_to_latest() 30 | else: 31 | set_auto_updates_enabled(False) 32 | print(channel.versions) 33 | for v in channel.versions: 34 | if args.version_string in v.version: 35 | print("Updating...") 36 | v.update() 37 | if is_update_installation_pending(): 38 | install_pending_update() 39 | return 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /cmake/ccache.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | if(PLATFORM_LINUX OR PLATFORM_MACOS) 10 | find_program(ccache_executable "ccache") 11 | if(NOT ccache_executable STREQUAL "ccache_executable-NOTFOUND") 12 | message(STATUS "${PROJECT_NAME}: Enabling ccache support (${ccache_executable})") 13 | 14 | set(CMAKE_CXX_COMPILER_LAUNCHER "${ccache_executable}" CACHE FILEPATH "ccache") 15 | set(CMAKE_C_COMPILER_LAUNCHER "${ccache_executable}" CACHE FILEPATH "ccache") 16 | 17 | else() 18 | message(STATUS "${PROJECT_NAME}: No ccache executable found") 19 | endif() 20 | endif() 21 | -------------------------------------------------------------------------------- /cmake/modules/FindXED.cmake: -------------------------------------------------------------------------------- 1 | include("${CMAKE_CURRENT_LIST_DIR}/utils.cmake") 2 | 3 | set(ANVILL_XED_LOCATION "/usr" CACHE FILEPATH "XED install directory") 4 | 5 | set(xed_library_list 6 | "xed" 7 | "xed-ild" 8 | ) 9 | 10 | message(STATUS "Attempting to locate: XED (hints: ANVILL_XED_LOCATION=\"${ANVILL_XED_LOCATION}\")") 11 | 12 | locateLibrary( 13 | NAME "xed" 14 | HINT "${ANVILL_XED_LOCATION}" 15 | LIBRARIES ${xed_library_list} 16 | MAIN_INCLUDE "xed/xed-decode.h" 17 | ) 18 | -------------------------------------------------------------------------------- /cmake/modules/Findgflags.cmake: -------------------------------------------------------------------------------- 1 | include("${CMAKE_CURRENT_LIST_DIR}/utils.cmake") 2 | 3 | set(ANVILL_GFLAGS_LOCATION "/usr" CACHE FILEPATH "gflags install directory") 4 | 5 | set(gflags_library_list 6 | "gflags" 7 | ) 8 | 9 | message(STATUS "Attempting to locate: gflags (hints: ANVILL_GFLAGS_LOCATION=\"${ANVILL_GFLAGS_LOCATION}\")") 10 | 11 | locateLibrary( 12 | NAME "gflags" 13 | HINT "${ANVILL_GFLAGS_LOCATION}" 14 | LIBRARIES ${gflags_library_list} 15 | MAIN_INCLUDE "gflags/gflags.h" 16 | ) 17 | -------------------------------------------------------------------------------- /cmake/modules/Findglog.cmake: -------------------------------------------------------------------------------- 1 | include("${CMAKE_CURRENT_LIST_DIR}/utils.cmake") 2 | 3 | set(ANVILL_GLOG_LOCATION "/usr" CACHE FILEPATH "glog install directory") 4 | 5 | set(glog_library_list 6 | "glog" 7 | ) 8 | 9 | message(STATUS "Attempting to locate: glog (hints: ANVILL_GLOG_LOCATION=\"${ANVILL_GLOG_LOCATION}\")") 10 | 11 | locateLibrary( 12 | NAME "glog" 13 | HINT "${ANVILL_GLOG_LOCATION}" 14 | LIBRARIES ${glog_library_list} 15 | MAIN_INCLUDE "glog/logging.h" 16 | ) 17 | -------------------------------------------------------------------------------- /cmake/modules/Findremill.cmake: -------------------------------------------------------------------------------- 1 | include("${CMAKE_CURRENT_LIST_DIR}/utils.cmake") 2 | 3 | set(ANVILL_REMILL_LOCATION "/usr" CACHE FILEPATH "remill install directory") 4 | 5 | set(remill_library_list 6 | "remill_arch" 7 | "remill_arch_aarch64" 8 | "remill_arch_sparc64" 9 | "remill_bc" 10 | "remill_version" 11 | "remill_arch_aarch32" 12 | "remill_arch_sparc32" 13 | "remill_arch_x86" 14 | "remill_os" 15 | ) 16 | 17 | message(STATUS "Attempting to locate: remill (hints: ANVILL_REMILL_LOCATION=\"${ANVILL_REMILL_LOCATION}\")") 18 | 19 | locateLibrary( 20 | NAME "remill" 21 | HINT "${ANVILL_REMILL_LOCATION}" 22 | LIBRARIES ${remill_library_list} 23 | MAIN_INCLUDE "remill/Version/Version.h" 24 | ) 25 | 26 | if(NOT DEFINED LLVM_VERSION_MAJOR) 27 | message(FATAL_ERROR "The LLVM_VERSION_MAJOR variable is not set") 28 | endif() 29 | 30 | set(REMILL_LLVM_VERSION "${LLVM_VERSION_MAJOR}") 31 | 32 | # anvill relies on inheriting all the libraries from remill 33 | # so we have to attach them there 34 | add_library(remill_settings INTERFACE) 35 | 36 | target_link_libraries(remill_settings INTERFACE 37 | thirdparty_llvm 38 | xed 39 | gflags 40 | glog 41 | ) 42 | 43 | target_link_libraries(remill INTERFACE 44 | remill_settings 45 | ) 46 | 47 | target_compile_features(remill_settings INTERFACE cxx_std_17) 48 | -------------------------------------------------------------------------------- /cmake/modules/utils.cmake: -------------------------------------------------------------------------------- 1 | function(locateLibrary) 2 | cmake_parse_arguments( 3 | PARSE_ARGV 4 | 0 5 | "LOCATELIBRARY" 6 | "" 7 | "NAME;HINT" 8 | "LIBRARIES;MAIN_INCLUDE" 9 | ) 10 | 11 | add_library("${LOCATELIBRARY_NAME}" INTERFACE) 12 | 13 | # Import the (sub)libraries 14 | foreach(library ${LOCATELIBRARY_LIBRARIES}) 15 | set(target_name "${LOCATELIBRARY_NAME}_${library}") 16 | 17 | set(location_name "${target_name}_lib_location") 18 | find_library("${location_name}" 19 | NAMES "${library}" 20 | PATHS "${LOCATELIBRARY_HINT}" 21 | PATH_SUFFIXES "lib" 22 | ) 23 | 24 | if("${${location_name}}" STREQUAL "${location_name}-NOTFOUND") 25 | message(FATAL_ERROR "Failed to locate the following library: ${library}") 26 | endif() 27 | 28 | add_library("${target_name}" UNKNOWN IMPORTED GLOBAL) 29 | set_target_properties("${target_name}" PROPERTIES 30 | IMPORTED_LOCATION "${${location_name}}" 31 | ) 32 | 33 | target_link_libraries("${LOCATELIBRARY_NAME}" INTERFACE 34 | "${target_name}" 35 | ) 36 | 37 | message(STATUS "Found: ${${location_name}}") 38 | endforeach() 39 | 40 | # Locate the include header 41 | set(location_name "${target_name}_header_location") 42 | find_path("${location_name}" 43 | NAMES "${LOCATELIBRARY_MAIN_INCLUDE}" 44 | PATHS "${LOCATELIBRARY_HINT}" 45 | PATH_SUFFIXES "include" 46 | ) 47 | 48 | if("${${location_name}}" STREQUAL "${location_name}-NOTFOUND") 49 | message(FATAL_ERROR "Failed to locate the following header file: ${library}") 50 | endif() 51 | 52 | message(STATUS "Found: ${${location_name}}") 53 | 54 | target_include_directories("${LOCATELIBRARY_NAME}" INTERFACE 55 | "${${location_name}}" 56 | ) 57 | 58 | set("${LOCATELIBRARY_NAME}_FOUND" true PARENT_SCOPE) 59 | endfunction() 60 | -------------------------------------------------------------------------------- /cmake/options.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | include(CMakeDependentOption) 10 | 11 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows") 12 | set(default_build_type "Release") 13 | else() 14 | set(default_build_type "RelWithDebInfo") 15 | endif() 16 | 17 | set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Build type") 18 | 19 | option(ANVILL_ENABLE_INSTALL "Set to ON to enable the install directives. This installs both the native and python components" TRUE) 20 | option(ANVILL_ENABLE_PYTHON3_LIBS "Build Python 3 libraries" TRUE) 21 | cmake_dependent_option(ANVILL_INSTALL_PYTHON3_LIBS "Install Python 3 libraries to the **local machine** at build time. Mostly used for local development, not required for packaging" FALSE 22 | ANVILL_ENABLE_INSTALL FALSE) 23 | option(ANVILL_ENABLE_TESTS "Set to ON to enable the tests" TRUE) 24 | option(ANVILL_ENABLE_SANITIZERS "Set to ON to enable sanitizers. May not work with VCPKG") 25 | 26 | if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 27 | set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "Install prefix (forced)" FORCE) 28 | endif() 29 | -------------------------------------------------------------------------------- /cmake/packaging.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | # Common settings 10 | set(CPACK_PACKAGE_DESCRIPTION "Anvill") 11 | set(CPACK_PACKAGE_NAME "Anvill") 12 | set(CPACK_PACKAGE_VERSION "1.0.0") 13 | set(CPACK_PACKAGE_VENDOR "Trail of Bits") 14 | set(CPACK_PACKAGE_CONTACT "peter@trailofbits.com") 15 | set(CPACK_PACKAGE_HOMEPAGE_URL "https://www.trailofbits.com") 16 | 17 | # DEB settings 18 | set(CPACK_DEBIAN_PACKAGE_PRIORITY "extra") 19 | set(CPACK_DEBIAN_PACKAGE_SECTION "default") 20 | set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "${CPACK_PACKAGE_HOMEPAGE_URL}") 21 | 22 | # RPM settings 23 | set(CPACK_RPM_PACKAGE_RELEASE "${CPACK_PACKAGE_VERSION}") 24 | set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") 25 | set(CPACK_RPM_PACKAGE_DESCRIPTION "${CPACK_PACKAGE_DESCRIPTION}") 26 | set(CPACK_RPM_PACKAGE_GROUP "default") 27 | set(CPACK_RPM_PACKAGE_LICENSE "AGPL 3") 28 | 29 | # ZIP settings 30 | if("${CPACK_GENERATOR}" STREQUAL "ZIP") 31 | set(CPACK_SET_DESTDIR ON) 32 | endif() 33 | 34 | if (NOT CPack_CMake_INCLUDED) 35 | include("CPack") 36 | endif() 37 | -------------------------------------------------------------------------------- /cmake/system.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | if(CMAKE_SYSTEM_NAME STREQUAL "Linux") 10 | set(PLATFORM_LINUX true) 11 | 12 | elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") 13 | set(PLATFORM_MACOS true) 14 | 15 | else() 16 | message(FATAL_ERROR "Unsupported platform") 17 | endif() 18 | -------------------------------------------------------------------------------- /include/anvill/AnvillFunction.h: -------------------------------------------------------------------------------- 1 | /* 2 | * An AnvillFunction 3 | */ 4 | class AnvillFunction {}; -------------------------------------------------------------------------------- /include/anvill/CrossReferenceResolver.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace llvm { 16 | class Constant; 17 | class Type; 18 | } // namespace llvm 19 | namespace anvill { 20 | 21 | class EntityLifter; 22 | class EntityCrossReferenceResolverImpl; 23 | 24 | // Cross-reference resolver interface. 25 | class CrossReferenceResolver { 26 | public: 27 | virtual ~CrossReferenceResolver(void) = default; 28 | 29 | virtual std::optional AddressOfEntity( 30 | llvm::Constant *ent) const = 0; 31 | 32 | // `value_type` represents the type of the data stored at `addr`. If it's 33 | // not a `nullptr`, then the return value's `getType()` will be an 34 | // `llvm::PointerType` whose element type is `value_type`. 35 | virtual llvm::Constant *EntityAtAddress( 36 | std::uint64_t addr, llvm::Type *value_type=nullptr, 37 | unsigned address_space=0u) const = 0; 38 | }; 39 | 40 | // Default cross-reference resolver. Never resolves anything. 41 | class NullCrossReferenceResolver : public CrossReferenceResolver { 42 | public: 43 | virtual ~NullCrossReferenceResolver(void) = default; 44 | 45 | std::optional AddressOfEntity( 46 | llvm::Constant *ent) const override; 47 | 48 | llvm::Constant *EntityAtAddress( 49 | std::uint64_t addr, llvm::Type *value_type, 50 | unsigned address_space) const override; 51 | }; 52 | 53 | // Resolve cross-references with an entity lifter. 54 | class EntityCrossReferenceResolver : public CrossReferenceResolver { 55 | protected: 56 | std::unique_ptr impl; 57 | 58 | public: 59 | virtual ~EntityCrossReferenceResolver(void); 60 | explicit EntityCrossReferenceResolver(const EntityLifter &entity_lifter_); 61 | 62 | std::optional AddressOfEntity( 63 | llvm::Constant *ent) const override; 64 | 65 | llvm::Constant *EntityAtAddress( 66 | std::uint64_t addr, llvm::Type *value_type, 67 | unsigned address_space) const override; 68 | }; 69 | 70 | } // namespace anvill 71 | -------------------------------------------------------------------------------- /include/anvill/Optimize.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace llvm { 12 | class Module; 13 | } // namespace llvm 14 | namespace remill { 15 | class Arch; 16 | } // namespace remill 17 | 18 | namespace anvill { 19 | 20 | class EntityLifter; 21 | 22 | // Optimize a module. This can be a module with semantics code, lifted 23 | // code, etc. 24 | void OptimizeModule(const EntityLifter &lifter_context, 25 | llvm::Module &module); 26 | 27 | } // namespace anvill 28 | -------------------------------------------------------------------------------- /include/anvill/Passes/BranchHintPass.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Trail of Bits, Inc. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as 6 | * published by the Free Software Foundation, either version 3 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | #pragma once 19 | 20 | #include 21 | 22 | namespace anvill { 23 | 24 | 25 | static constexpr auto kFlagIntrinsicPrefix = "__remill_flag_computation"; 26 | static constexpr auto kCompareInstrinsicPrefix = "__remill_compare"; 27 | static constexpr auto kCompareExchangePrefix = "__remill_compare_exchange"; 28 | 29 | template 30 | class BranchHintPass : public IntrinsicPass { 31 | public: 32 | static bool isTargetInstrinsic(const llvm::CallInst *callinsn) { 33 | if (const auto *callee = callinsn->getCalledFunction()) { 34 | return callee->getName().startswith(kCompareInstrinsicPrefix) && !callee->getName().startswith(kCompareExchangePrefix); 35 | } 36 | 37 | return false; 38 | } 39 | }; 40 | } // namespace anvill 41 | -------------------------------------------------------------------------------- /include/anvill/Passes/BranchRecovery.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Trail of Bits, Inc. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as 6 | * published by the Free Software Foundation, either version 3 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | #pragma once 19 | 20 | #include 21 | #include 22 | 23 | 24 | namespace anvill { 25 | 26 | // This pass consumes the analysis from BranchAnalysis and replaces the compare intrinsic 27 | // with an icmp of the form icmp compare compared.0 compared.1 which was proven equivalent to the flag 28 | // computation. 29 | 30 | class BranchRecovery 31 | : public BranchHintPass, 32 | public llvm::PassInfoMixin { 33 | public: 34 | // Maps CallInst to anvill_compare prims to the result 35 | using Result = llvm::PreservedAnalyses; 36 | 37 | static Result INIT_RES; 38 | 39 | 40 | Result runOnIntrinsic(llvm::CallInst *indirectJump, 41 | llvm::FunctionAnalysisManager &am, Result agg); 42 | 43 | 44 | static llvm::StringRef name(); 45 | }; 46 | } // namespace anvill -------------------------------------------------------------------------------- /include/anvill/Passes/CodeQualityStatCollector.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // This pass collects additional stats which are useful for measuring code quality. 4 | 5 | 6 | namespace anvill { 7 | class CodeQualityStatCollector 8 | : public llvm::PassInfoMixin { 9 | public: 10 | llvm::PreservedAnalyses run(llvm::Function &function, 11 | llvm::FunctionAnalysisManager &analysisManager); 12 | 13 | static llvm::StringRef name(void); 14 | }; 15 | } // namespace anvill -------------------------------------------------------------------------------- /include/anvill/Passes/CombineAdjacentShifts.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Identify `(ashr (shl V, A), B)` and try to convert to 4 | // 5 | // V_short = trunc V to iA 6 | // V_signed = sext V_short 7 | // res = shl V_signed, A - B 8 | namespace anvill { 9 | class CombineAdjacentShifts final 10 | : public llvm::PassInfoMixin { 11 | public: 12 | static llvm::StringRef name(void); 13 | 14 | llvm::PreservedAnalyses run(llvm::Function &func, 15 | llvm::FunctionAnalysisManager &fam); 16 | }; 17 | 18 | } // namespace anvill 19 | -------------------------------------------------------------------------------- /include/anvill/Passes/ConvertAddressesToEntityUses.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace llvm { 17 | class Use; 18 | } // namespace llvm 19 | namespace anvill { 20 | 21 | class TypeProvider; 22 | 23 | // Describes an instruction that appears to reference some entity. 24 | struct EntityUse final { 25 | inline explicit EntityUse(llvm::Use *use_, ResolvedCrossReference xref_) 26 | : use(use_), 27 | xref(xref_) {} 28 | 29 | // An operand inside of a particular instruction, where `use->getUser()` 30 | // is an `llvm::Instruction`, and `use->get()` is a value related to the 31 | // stack pointer. 32 | llvm::Use *const use; 33 | 34 | // Resolved cross-reference. 35 | const ResolvedCrossReference xref; 36 | }; 37 | 38 | // Contains a list of instruction operand uses that could feasibly be 39 | // entity references. 40 | using EntityUsages = std::vector; 41 | 42 | // This function pass recovers stack information by analyzing the usage 43 | // of the `__anvill_sp` symbol 44 | class ConvertAddressesToEntityUses final 45 | : public llvm::PassInfoMixin { 46 | private: 47 | 48 | // Resolve addresses to entities and vice versa. 49 | const CrossReferenceResolver &xref_resolver; 50 | 51 | // The metadata ID to annotation recovered entities with. 52 | const std::optional pc_metadata_id; 53 | 54 | public: 55 | 56 | // Function pass entry point 57 | llvm::PreservedAnalyses run(llvm::Function &function, 58 | llvm::FunctionAnalysisManager &fam); 59 | 60 | // Returns the pass name 61 | static llvm::StringRef name(void); 62 | 63 | // Enumerates some of the possible entity usages that are isolated to 64 | // specific instruction operand uses. 65 | EntityUsages EnumeratePossibleEntityUsages(llvm::Function &function); 66 | 67 | ConvertAddressesToEntityUses( 68 | const CrossReferenceResolver &xref_resolver_, 69 | std::optional pc_metadata_id_=std::nullopt); 70 | }; 71 | 72 | } // namespace anvill 73 | -------------------------------------------------------------------------------- /include/anvill/Passes/ConvertMasksToCasts.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | // Looks for the following patterns that can be converted into casts, where 17 | // we focus on high-level casting patterns, i.e. truncations, zero-extensions, 18 | // and sign-extensions. 19 | // 20 | // and i64 %val, 0xff -> %down_casted_val = trunc %val to i8 21 | // %new_val = zext %down_casted_val to i64 22 | // and i64 %val, 0xffff -> %down_casted_val = trunc %val to i16 23 | // %new_val = zext %down_casted_val to i64 24 | // and i64 %val, 0xffffffff -> %down_casted_val = trunc %val to i32 25 | // %new_val = zext %down_casted_val to i64 26 | // 27 | // We also look for patterns of the form: 28 | // 29 | // %low_val = shl i64 %val, 32 30 | // %signed_val = ashr i64 %low_val, 32 31 | // 32 | // And convert it into: 33 | // 34 | // %low_val = trunc i64 %val to i32 35 | // %signed_val = sext i32 %low_val to i64 36 | // 37 | // In general, these types of patterns are easier to lift into a combination 38 | // of one down cast, followed by one implicit upcast in decompiled code, and 39 | // thus look simpler than the shifting/masking variants. 40 | // 41 | // In the latter case with shifting/masking, this type of 32-bit shifting/ 42 | // masking pattern can negatively affect offset/displacement analysis, e.g. 43 | // for PC- and SP-relative displacements. For example: 44 | // 45 | // %255 = sub i64 %252, zext (i32 ... @__anvill_sp ... to i64) 46 | // %256 = shl i64 %255, 32 47 | // %257 = ashr exact i64 %256, 32, !pc !70 48 | class ConvertMasksToCasts final 49 | : public llvm::PassInfoMixin { 50 | public: 51 | llvm::PreservedAnalyses run(llvm::Function &F, 52 | llvm::FunctionAnalysisManager &AM); 53 | 54 | static llvm::StringRef name(void); 55 | }; 56 | } // namespace anvill 57 | -------------------------------------------------------------------------------- /include/anvill/Passes/ConvertSymbolicReturnAddressToConcreteReturnAddress.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | // Look for uses of the `(ptrtoint __remill_ra)` constant expression 17 | // representing uses of the return address, and translate them to concrete uses 18 | // of the return address. 19 | class ConvertSymbolicReturnAddressToConcreteReturnAddress final : 20 | public llvm::PassInfoMixin { 21 | public: 22 | llvm::PreservedAnalyses run(llvm::Function &func, 23 | llvm::FunctionAnalysisManager &fam); 24 | 25 | static llvm::StringRef name(void); 26 | }; 27 | 28 | } // namespace anvill 29 | -------------------------------------------------------------------------------- /include/anvill/Passes/ConvertXorsToCmps.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | // Finds values in the form of: 17 | // 18 | // %cmp = icmp eq val1, val2 19 | // %n = xor %cmp, 1 20 | // 21 | // %br %cmp, d1, d2 (optional) 22 | // 23 | // and converts it to: 24 | // 25 | // %cmp = icmp ne val1, val2 26 | // %n = %cmp 27 | // %br %cmp, d2, d1 28 | // 29 | // This happens often enough in lifted code due to bit shift ops, and the code 30 | // with xors is more difficult to analyze and for a human to read. This pass 31 | // should only work on boolean values, and handle when those are used in 32 | // branches and selects. 33 | class ConvertXorsToCmps final : public llvm::PassInfoMixin { 34 | public: 35 | llvm::PreservedAnalyses run(llvm::Function &F, 36 | llvm::FunctionAnalysisManager &AM); 37 | 38 | static llvm::StringRef name(void); 39 | }; 40 | } // namespace anvill 41 | -------------------------------------------------------------------------------- /include/anvill/Passes/HoistUsersOfSelectsAndPhis.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace llvm { 15 | class BasicBlock; 16 | class Function; 17 | class Instruction; 18 | class Value; 19 | } // namespace llvm 20 | namespace anvill { 21 | 22 | // This function pass will attempt to hoist uses of `select` and `phi` through 23 | // the `select` and `phi`s themselves. For example, if there is: 24 | // 25 | // %b = select %cond, %x, %y 26 | // %a = add %b, %c 27 | // 28 | // Then this pass produces the following: 29 | // 30 | // %x_b = add %x, %c 31 | // %y_b = add %y, %c 32 | // %a = select %cond, %x_b, %y_b 33 | // 34 | // The idea is that we want to be able to make things like address calculations 35 | // unconditional. 36 | class HoistUsersOfSelectsAndPhis final 37 | : public llvm::PassInfoMixin { 38 | public: 39 | using InstructionList = std::vector; 40 | 41 | llvm::PreservedAnalyses run(llvm::Function &function, 42 | llvm::FunctionAnalysisManager &fam); 43 | 44 | static llvm::StringRef name(void); 45 | 46 | class PassFunctionState; 47 | }; 48 | 49 | } // namespace anvill 50 | -------------------------------------------------------------------------------- /include/anvill/Passes/IndirectJumpPass.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace anvill { 18 | namespace { 19 | 20 | // NOTE(ian): The jump table analysis could also be targeted towards 21 | // incomplete switch intrinsics. 22 | // 23 | // It is always safe to run this analysis because the bounds on the index are 24 | // conservative. That being said if the intrinsic is truly incomplete when we 25 | // attempt to lower the switch there will be missing labels in the PC binding 26 | // mapping, therefore it is unlikely the switch lowering pass should be run 27 | // against the incomplete switches. Perhaps the best solution here is to run 28 | // the jump table analysis on its own against incomplete switches and allow it 29 | // to call back into the lifter for more code. 30 | static bool isTargetInstrinsic(const llvm::CallInst *callinsn) { 31 | if (const auto *callee = callinsn->getCalledFunction()) { 32 | return callee->getName().equals(kAnvillSwitchCompleteFunc); 33 | } 34 | 35 | return false; 36 | } 37 | 38 | static inline std::vector 39 | getTargetCalls(llvm::Function &fromFunction) { 40 | std::vector calls; 41 | for (auto &insn : llvm::instructions(fromFunction)) { 42 | llvm::Instruction *new_insn = &insn; 43 | if (llvm::CallInst *call_insn = llvm::dyn_cast(new_insn)) { 44 | if (isTargetInstrinsic(call_insn)) { 45 | calls.push_back(call_insn); 46 | } 47 | } 48 | } 49 | return calls; 50 | } 51 | } // namespace 52 | 53 | // NOTE(ian): Unfortunately pretty sure CRTP is the only way to do this without 54 | // running into issues with pass IDs 55 | template 56 | class IndirectJumpPass { 57 | public: 58 | IndirectJumpPass(void) {} 59 | 60 | Result run(llvm::Function &F, llvm::FunctionAnalysisManager &am); 61 | }; 62 | 63 | 64 | template 65 | Result IndirectJumpPass::run( 66 | llvm::Function &F, llvm::FunctionAnalysisManager &am) { 67 | auto &function_pass = *static_cast(this); 68 | Result total = UserFunctionPass::BuildInitialResult(); 69 | for (auto targetCall : getTargetCalls(F)) { 70 | total = function_pass.runOnIndirectJump(targetCall, am, std::move(total)); 71 | } 72 | 73 | return total; 74 | } 75 | 76 | } // namespace anvill -------------------------------------------------------------------------------- /include/anvill/Passes/IntrinsicPass.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Trail of Bits, Inc. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as 6 | * published by the Free Software Foundation, either version 3 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | #pragma once 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace anvill { 27 | 28 | template 29 | class IntrinsicPass { 30 | 31 | private: 32 | static std::vector 33 | getTargetCalls(llvm::Function &fromFunction) { 34 | std::vector calls; 35 | for (auto &insn : llvm::instructions(fromFunction)) { 36 | llvm::Instruction *new_insn = &insn; 37 | if (llvm::CallInst *call_insn = 38 | llvm::dyn_cast(new_insn)) { 39 | if (UserFunctionPass::isTargetInstrinsic(call_insn)) { 40 | calls.push_back(call_insn); 41 | } 42 | } 43 | } 44 | return calls; 45 | } 46 | 47 | public: 48 | IntrinsicPass(void) {} 49 | 50 | Result run(llvm::Function &F, llvm::FunctionAnalysisManager &am); 51 | }; 52 | 53 | 54 | template 55 | Result IntrinsicPass::run( 56 | llvm::Function &F, llvm::FunctionAnalysisManager &am) { 57 | auto &function_pass = *static_cast(this); 58 | Result total = function_pass.INIT_RES; 59 | for (auto targetCall : getTargetCalls(F)) { 60 | total = function_pass.runOnIntrinsic(targetCall, am, std::move(total)); 61 | } 62 | 63 | return total; 64 | } 65 | } // namespace anvill -------------------------------------------------------------------------------- /include/anvill/Passes/JumpTableAnalysis.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace anvill { 21 | 22 | // A slice that represents the computation of the program counter, given a loaded value from a jump table. 23 | // The slice has one unknown argument which is the loaded value. The slice argument and return value are integers. 24 | class PcRel { 25 | private: 26 | SliceID slice; 27 | 28 | public: 29 | PcRel(SliceID slice) : slice(slice) {} 30 | 31 | // Interprets the slice, providing loadedVal as the argument. 32 | llvm::APInt apply(SliceInterpreter &interp, llvm::APInt loadedVal) const; 33 | 34 | llvm::IntegerType *getExpectedType(const InterpreterBuilder &) const; 35 | }; 36 | 37 | // A slice that represents the computation from an index (some non-constant 38 | // value) to a loaded address. The slice is linear and constant except for the 39 | // index, resulting in one integer argument for the slice. 40 | class IndexRel { 41 | private: 42 | SliceID slice; 43 | llvm::Value *index; 44 | 45 | public: 46 | llvm::Value *getIndex() const; 47 | 48 | // Interprets the slice, substituting indexValue for the index, retrieving a 49 | // jump table address. 50 | llvm::APInt apply( SliceInterpreter &, llvm::APInt indexValue) const; 51 | 52 | IndexRel(SliceID slice, llvm::Value *index) : slice(slice), index(index) {} 53 | }; 54 | 55 | struct Bound { 56 | llvm::APInt lower; 57 | llvm::APInt upper; 58 | bool isSigned; 59 | 60 | bool lessThanOrEqual(llvm::APInt lhs, llvm::APInt rhs) const { 61 | if (isSigned) { 62 | return lhs.sle(rhs); 63 | } else { 64 | return lhs.ule(rhs); 65 | } 66 | } 67 | }; 68 | 69 | struct JumpTableResult { 70 | PcRel pcRel; 71 | IndexRel indexRel; 72 | Bound bounds; 73 | llvm::BasicBlock *defaultOut; 74 | InterpreterBuilder interp; 75 | }; 76 | 77 | class JumpTableAnalysis 78 | : public IndirectJumpPass< 79 | JumpTableAnalysis, llvm::DenseMap>, 80 | public llvm::AnalysisInfoMixin { 81 | 82 | private: 83 | const EntityLifter &ent_lifter; 84 | friend llvm::AnalysisInfoMixin; 85 | static llvm::AnalysisKey Key; 86 | 87 | public: 88 | JumpTableAnalysis(const EntityLifter &ent_lifter) 89 | : IndirectJumpPass(), ent_lifter(ent_lifter) {} 90 | 91 | static llvm::StringRef name(void); 92 | 93 | using Result = llvm::DenseMap; 94 | 95 | static Result BuildInitialResult(); 96 | 97 | Result runOnIndirectJump(llvm::CallInst *indirectJump, 98 | llvm::FunctionAnalysisManager &am, Result agg); 99 | }; 100 | } // namespace anvill 101 | -------------------------------------------------------------------------------- /include/anvill/Passes/LowerRemillMemoryAccessIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class LowerRemillMemoryAccessIntrinsics final 16 | : public llvm::PassInfoMixin { 17 | public: 18 | LowerRemillMemoryAccessIntrinsics(void) {} 19 | 20 | static llvm::StringRef name(void); 21 | 22 | llvm::PreservedAnalyses run(llvm::Function &F, 23 | llvm::FunctionAnalysisManager &AM); 24 | }; 25 | } // namespace anvill 26 | -------------------------------------------------------------------------------- /include/anvill/Passes/LowerRemillUndefinedIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | class LowerRemillUndefinedIntrinsics final 15 | : public llvm::PassInfoMixin { 16 | public: 17 | LowerRemillUndefinedIntrinsics(void) {} 18 | 19 | static llvm::StringRef name(void); 20 | 21 | llvm::PreservedAnalyses run(llvm::Function &F, 22 | llvm::FunctionAnalysisManager &AM); 23 | }; 24 | } // namespace anvill 25 | -------------------------------------------------------------------------------- /include/anvill/Passes/LowerSwitchIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | // The goal here is to lower anvill_complete_switch to an llvm switch when we 19 | // can recover the cases. This analysis must be sound but 20 | // `anvill_complete_switch` maybe used for any complete set of indirect targets 21 | // so cases may not even exist. 22 | // 23 | // The analysis has to prove to us that this transformation is semantically 24 | // preserving. 25 | // 26 | // This pass focuses on lowering switch statements where a jump table does exist 27 | 28 | namespace anvill { 29 | 30 | class LowerSwitchIntrinsics 31 | : public IndirectJumpPass, 32 | public llvm::PassInfoMixin { 33 | 34 | private: 35 | const MemoryProvider &memProv; 36 | 37 | public: 38 | LowerSwitchIntrinsics(const MemoryProvider &memProv) 39 | : memProv(memProv) {} 40 | 41 | static llvm::StringRef name(void); 42 | 43 | llvm::PreservedAnalyses runOnIndirectJump(llvm::CallInst *indirectJump, 44 | llvm::FunctionAnalysisManager &am, 45 | llvm::PreservedAnalyses); 46 | 47 | 48 | static llvm::PreservedAnalyses BuildInitialResult(); 49 | }; 50 | } // namespace anvill 51 | -------------------------------------------------------------------------------- /include/anvill/Passes/LowerTypeHintIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class LowerTypeHintIntrinsics final 16 | : public llvm::PassInfoMixin { 17 | public: 18 | static llvm::StringRef name(void); 19 | 20 | llvm::PreservedAnalyses run(llvm::Function &F, 21 | llvm::FunctionAnalysisManager &AM); 22 | }; 23 | 24 | } // namespace anvill 25 | -------------------------------------------------------------------------------- /include/anvill/Passes/RecoverBasicStackFrame.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | class StackFrameRecoveryOptions; 17 | 18 | // This function pass recovers stack information by analyzing the usage 19 | // of the `__anvill_sp` symbol 20 | class RecoverBasicStackFrame final 21 | : public llvm::PassInfoMixin { 22 | 23 | // Lifting options 24 | const StackFrameRecoveryOptions &options; 25 | 26 | public: 27 | 28 | // Function pass entry point 29 | llvm::PreservedAnalyses run(llvm::Function &func, 30 | llvm::FunctionAnalysisManager &fam); 31 | 32 | // Returns the pass name 33 | static llvm::StringRef name(void); 34 | 35 | inline explicit RecoverBasicStackFrame( 36 | const StackFrameRecoveryOptions &options_) 37 | : options(options_) {} 38 | }; 39 | 40 | } // namespace anvill 41 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveCompilerBarriers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class RemoveCompilerBarriers final 16 | : public llvm::PassInfoMixin { 17 | public: 18 | llvm::PreservedAnalyses run(llvm::Function &F, 19 | llvm::FunctionAnalysisManager &AM); 20 | 21 | 22 | static llvm::StringRef name(void); 23 | }; 24 | 25 | } // namespace anvill 26 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveDelaySlotIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class RemoveDelaySlotIntrinsics final 16 | : public llvm::PassInfoMixin { 17 | public: 18 | static llvm::StringRef name(void); 19 | 20 | llvm::PreservedAnalyses run(llvm::Function &F, 21 | llvm::FunctionAnalysisManager &AM); 22 | }; 23 | 24 | } // namespace anvill 25 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveErrorIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class RemoveErrorIntrinsics final 16 | : public llvm::PassInfoMixin { 17 | public: 18 | static llvm::StringRef name(void); 19 | 20 | llvm::PreservedAnalyses run(llvm::Function &F, 21 | llvm::FunctionAnalysisManager &AM); 22 | }; 23 | 24 | } // namespace anvill 25 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveRemillFunctionReturns.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class CrossReferenceFolder; 16 | class CrossReferenceResolver; 17 | class StackPointerResolver; 18 | 19 | enum ReturnAddressResult { 20 | 21 | // We've found a case where a value returned by `llvm.returnaddress`, or 22 | // casted from `__anvill_ra`, reaches into the `pc` argument of the 23 | // `__remill_function_return` intrinsic. This is the ideal case that we 24 | // want to handle. 25 | kFoundReturnAddress, 26 | 27 | // We've found a case where we're seeing a load from something derived from 28 | // `__anvill_sp`, our "symbolic stack pointer", is reaching into the `pc` 29 | // argument of `__remill_function_return`. This suggests that stack frame 30 | // recovery has not happened yet, and thus we haven't really given stack 31 | // frame recovery or stack frame splitting a chance to work. 32 | kFoundSymbolicStackPointerLoad, 33 | 34 | // We've found a `load` or something else. This is probably a sign that 35 | // stack frame recovery has happened, and that the actual return address 36 | // is not necessarily the expected value, and so we need to try to swap 37 | // out the return address with whatever we loaded. 38 | kUnclassifiableReturnAddress 39 | }; 40 | 41 | class RemoveRemillFunctionReturns final 42 | : public llvm::PassInfoMixin { 43 | private: 44 | const CrossReferenceResolver &xref_resolver; 45 | 46 | public: 47 | inline explicit RemoveRemillFunctionReturns( 48 | const CrossReferenceResolver &xref_resolver_) 49 | : xref_resolver(xref_resolver_) {} 50 | 51 | static llvm::StringRef name(void); 52 | 53 | llvm::PreservedAnalyses run(llvm::Function &F, 54 | llvm::FunctionAnalysisManager &AM); 55 | 56 | private: 57 | ReturnAddressResult QueryReturnAddress( 58 | const CrossReferenceFolder &xref_folder, 59 | const StackPointerResolver &sp_resolver, 60 | llvm::Module *module, 61 | llvm::Value *val) const; 62 | }; 63 | } // namespace anvill 64 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveStackPointerCExprs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | // This pass unrolls constant expressions that involve the stack pointer 17 | // into instructions so that RecoverStackInformation can replace the stack 18 | // pointer with its stack representation. The pass strips away portions of the 19 | // constant expression that cant be resolved to a stack reference so that 20 | // hopefully they will be resolved later. 21 | // 22 | // define i1 @slice() local_unnamed_addr #2 { 23 | // %1 = call zeroext i1 (i1, ...) @__remill_flag_computation_sign( 24 | // i1 zeroext icmp slt 25 | // (i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12), i32 0), 26 | // i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12)) #5 27 | // ret i1 %1 28 | // } 29 | // 30 | // Becomes: 31 | // define i1 @slice() local_unnamed_addr { 32 | // %1 = icmp slt i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12), 0 33 | // %2 = call zeroext i1 (i1, ...) @__remill_flag_computation_sign(i1 zeroext %1, i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12)) 34 | // ret i1 %2 35 | // } 36 | // } 37 | 38 | class StackFrameRecoveryOptions; 39 | 40 | class RemoveStackPointerCExprs final 41 | : public llvm::PassInfoMixin { 42 | private: 43 | const StackFrameRecoveryOptions &options; 44 | public: 45 | 46 | inline explicit RemoveStackPointerCExprs( 47 | const StackFrameRecoveryOptions &options_) 48 | : options(options_) {} 49 | 50 | static llvm::StringRef name(void); 51 | llvm::PreservedAnalyses run(llvm::Function &F, 52 | llvm::FunctionAnalysisManager &AM); 53 | }; 54 | 55 | } // namespace anvill 56 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveTrivialPhisAndSelects.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | class RemoveTrivialPhisAndSelects final 16 | : public llvm::PassInfoMixin { 17 | public: 18 | static llvm::StringRef name(void); 19 | 20 | llvm::PreservedAnalyses run(llvm::Function &F, 21 | llvm::FunctionAnalysisManager &AM); 22 | }; 23 | 24 | } // namespace anvill 25 | -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveUnusedBranchHints.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | namespace anvill { 7 | 8 | // This pass consumes the analysis from BranchAnalysis and replaces the compare intrinsic 9 | // with an icmp of the form icmp compare compared.0 compared.1 which was proven equivalent to the flag 10 | // computation. 11 | 12 | class RemoveUnusedBranchHints 13 | : public IntrinsicPass, 14 | public llvm::PassInfoMixin { 15 | public: 16 | // Maps CallInst to anvill_compare prims to the result 17 | using Result = llvm::PreservedAnalyses; 18 | 19 | static Result INIT_RES; 20 | 21 | 22 | static bool isTargetInstrinsic(const llvm::CallInst *callinsn) { 23 | if (const auto *callee = callinsn->getCalledFunction()) { 24 | return callee->getName().startswith(kCompareInstrinsicPrefix) || 25 | callee->getName().startswith(kFlagIntrinsicPrefix); 26 | } 27 | 28 | return false; 29 | } 30 | 31 | 32 | Result runOnIntrinsic(llvm::CallInst *indirectJump, 33 | llvm::FunctionAnalysisManager &am, Result agg); 34 | 35 | 36 | static llvm::StringRef name(); 37 | }; 38 | } // namespace anvill -------------------------------------------------------------------------------- /include/anvill/Passes/RemoveUnusedFPClassificationCalls.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | class RemoveUnusedFPClassificationCalls final 17 | : public llvm::PassInfoMixin { 18 | public: 19 | static llvm::StringRef name(void); 20 | 21 | llvm::PreservedAnalyses run(llvm::Function &function, 22 | llvm::FunctionAnalysisManager &analysisManager); 23 | }; 24 | } // namespace anvill 25 | -------------------------------------------------------------------------------- /include/anvill/Passes/SinkSelectionsIntoBranchTargets.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace anvill { 18 | 19 | struct FunctionAnalysis final { 20 | struct Replacement final { 21 | llvm::Use *use_to_replace{nullptr}; 22 | llvm::Value *replace_with{nullptr}; 23 | }; 24 | 25 | using ReplacementList = std::vector; 26 | using DisposableInstructionList = std::unordered_set; 27 | 28 | ReplacementList replacement_list; 29 | DisposableInstructionList disposable_instruction_list; 30 | }; 31 | 32 | // When lifting conditional control-flow, we end up with the following pattern: 33 | // 34 | // %25 = icmp eq i8 %24, 0 35 | // %26 = select i1 %25, i64 TAKEN_PC, i64 NOT_TAKEN_PC 36 | // br i1 %25, label %27, label %34 37 | // 38 | // 27: 39 | // ... use of %26 40 | // 41 | // 34: 42 | // ... use of %26 43 | // 44 | // This function pass transforms the above pattern into the following: 45 | // 46 | // %25 = icmp eq i8 %24, 0 47 | // br i1 %25, label %27, label %34 48 | // 49 | // 27: 50 | // ... use of TAKEN_PC 51 | // 52 | // 34: 53 | // ... use of NOT_TAKEN_PC 54 | // 55 | // When this happens, we're better able to fold cross-references at the targets 56 | // of conditional branches. 57 | class SinkSelectionsIntoBranchTargets final 58 | : public llvm::PassInfoMixin { 59 | public: 60 | 61 | // Function pass entry point 62 | llvm::PreservedAnalyses run(llvm::Function &function, 63 | llvm::FunctionAnalysisManager &fam); 64 | 65 | // Returns the pass name 66 | static llvm::StringRef name(void); 67 | 68 | static FunctionAnalysis AnalyzeFunction(const llvm::DominatorTreeAnalysis::Result &dt,llvm::Function &function); 69 | }; 70 | 71 | 72 | 73 | } // namespace anvill 74 | -------------------------------------------------------------------------------- /include/anvill/Passes/SliceInterpreter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace llvm { 16 | class ExecutionEngine; 17 | class Module; 18 | } // namespace llvm 19 | namespace anvill { 20 | 21 | class SliceID; 22 | 23 | class SliceInterpreter { 24 | private: 25 | std::unique_ptr execEngine; 26 | 27 | SliceInterpreter(void) = delete; 28 | 29 | public: 30 | ~SliceInterpreter(void); 31 | explicit SliceInterpreter(const llvm::Module &module); 32 | 33 | llvm::GenericValue executeSlice(SliceID sliceId, 34 | llvm::ArrayRef ArgValue); 35 | }; 36 | 37 | } // namespace anvill 38 | -------------------------------------------------------------------------------- /include/anvill/Passes/SplitStackFrameAtReturnAddress.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | // 10 | // The main goal of this pass is to isolate the return address value 11 | // saved on the stack frame in its own structure, so that additional 12 | // function passes can more easily eliminate clean up the code and 13 | // eliminate unneeded structures from the stack frame. 14 | // 15 | // The following is an example scenario 16 | // 17 | // int add(int *a, int *b) { 18 | // return *a + *b; 19 | // } 20 | // 21 | // int main(int argc, char **) { 22 | // int x = argc; 23 | // return add(&x, &x); 24 | // } 25 | // 26 | // The stack frame may look like this: 27 | // 28 | // struct StackFrame final { 29 | // int argc; 30 | // void *ret_addr_of_main; 31 | // int x; 32 | // }; 33 | // 34 | // Passing the `x` pointer to the call causes LLVM to not be able to rule 35 | // out the possibility that the `add` function may in fact decide to access 36 | // other members in the stack frame (like StackFrame::argc). This prevents 37 | // further optimizations to simplify the code. 38 | // 39 | // In order to fix this problem, this function pass splits the StackFrame 40 | // type while also updating all its usages throughout the code. 41 | // 42 | // Here's how this example scenario is handled: 43 | // 44 | // struct StackFrame_part0 final { 45 | // int argc; 46 | // }; 47 | // 48 | // struct StackFrame_part1 final { 49 | // void *ret_addr_of_main; 50 | // }; 51 | // 52 | // struct StackFrame_part2 final { 53 | // int x; 54 | // }; 55 | // 56 | 57 | #pragma once 58 | 59 | #include 60 | #include 61 | 62 | namespace anvill { 63 | 64 | class StackFrameRecoveryOptions; 65 | 66 | // Splits the stack frame type of the given function, isolating the 67 | // llvm.returnaddress (if present) in its own StructType to allow for 68 | // further optimization passes to better simplify/eliminate stack 69 | // accesses. 70 | class SplitStackFrameAtReturnAddress final 71 | : public llvm::PassInfoMixin { 72 | private: 73 | const StackFrameRecoveryOptions &options; 74 | public: 75 | 76 | inline explicit SplitStackFrameAtReturnAddress( 77 | const StackFrameRecoveryOptions &options_) 78 | : options(options_) {} 79 | 80 | // Function pass entry point 81 | llvm::PreservedAnalyses run(llvm::Function &function, 82 | llvm::FunctionAnalysisManager &fam); 83 | 84 | 85 | static llvm::StringRef name(void); 86 | }; 87 | 88 | } // namespace anvill 89 | -------------------------------------------------------------------------------- /include/anvill/Passes/SpreadPCMetadata.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace anvill { 15 | 16 | // Looks for instructions missing the program counter-specific metadata, and 17 | // spreads nearby program counter-annotated metadata to those instructions. 18 | class SpreadPCMetadata final 19 | : public llvm::PassInfoMixin { 20 | private: 21 | const char * const pc_metadata_name; 22 | public: 23 | inline explicit SpreadPCMetadata(const char *pc_metadata_name_) 24 | : pc_metadata_name(pc_metadata_name_) {} 25 | 26 | llvm::PreservedAnalyses run(llvm::Function &F, 27 | llvm::FunctionAnalysisManager &AM); 28 | 29 | static llvm::StringRef name(void); 30 | }; 31 | } // namespace anvill 32 | -------------------------------------------------------------------------------- /include/anvill/Passes/TransformRemillJumpIntrinsics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace anvill { 17 | 18 | class CrossReferenceFolder; 19 | class CrossReferenceResolver; 20 | 21 | enum ReturnAddressResult { 22 | 23 | // This is a case where a value returned by `llvm.returnaddress`, or 24 | // casted from `__anvill_ra`, reaches into the `pc` argument of the 25 | // `__remill_jump` intrinsic. This is the ideal case that we want to 26 | // replace it with `__remill_function_return`. 27 | kReturnAddressProgramCounter, 28 | 29 | // This is a case a value returned by `llvm.returnaddress`, or casted 30 | // from `__anvill_ra` does not reaches to the `pc` argument and it 31 | // should not get transformed to `__remill_function_return`. 32 | kUnclassifiableProgramCounter 33 | }; 34 | 35 | class TransformRemillJumpIntrinsics final 36 | : public llvm::PassInfoMixin { 37 | private: 38 | const CrossReferenceResolver &xref_resolver; 39 | 40 | 41 | ReturnAddressResult QueryReturnAddress( 42 | const CrossReferenceFolder &xref_folder, llvm::Module *module, 43 | llvm::Value *val) const; 44 | 45 | bool TransformJumpIntrinsic(llvm::CallBase *call); 46 | 47 | public: 48 | inline TransformRemillJumpIntrinsics( 49 | const CrossReferenceResolver &xref_resolver_) 50 | : xref_resolver(xref_resolver_) {} 51 | 52 | static llvm::StringRef name(void); 53 | 54 | llvm::PreservedAnalyses run(llvm::Function &F, 55 | llvm::FunctionAnalysisManager &AM); 56 | }; 57 | } // namespace anvill 58 | -------------------------------------------------------------------------------- /include/anvill/Version.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2020 Trail of Bits 2 | // Based on: https://github.com/andrew-hardin/cmake-git-version-tracking/blob/master/better-example/git.h 3 | // Which is (C) 2020 Andrew Hardin 4 | // 5 | // MIT License 6 | // Copyright (c) 2020 Andrew Hardin 7 | // 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy 9 | // of this software and associated documentation files (the "Software"), to deal 10 | // in the Software without restriction, including without limitation the rights 11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | // copies of the Software, and to permit persons to whom the Software is 13 | // furnished to do so, subject to the following conditions: 14 | // 15 | // The above copyright notice and this permission notice shall be included in all 16 | // copies or substantial portions of the Software. 17 | // 18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | // SOFTWARE. 25 | 26 | #pragma once 27 | 28 | #include 29 | 30 | namespace anvill { 31 | namespace version { 32 | 33 | bool HasVersionData(void); 34 | bool HasUncommittedChanges(void); 35 | std::string_view GetAuthorName(void); 36 | std::string_view GetAuthorEmail(void); 37 | std::string_view GetCommitHash(void); 38 | std::string_view GetCommitDate(void); 39 | std::string_view GetCommitSubject(void); 40 | std::string_view GetCommitBody(void); 41 | std::string_view GetVersionString(void); 42 | 43 | } // namespace version 44 | } // namespace anvill 45 | -------------------------------------------------------------------------------- /lib/Arch/AllocationState.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include "Arch.h" 17 | 18 | namespace remill { 19 | 20 | class Arch; 21 | 22 | } // namespace remill 23 | namespace anvill { 24 | 25 | class CallingConvention; 26 | 27 | struct AllocationConfig { 28 | bool can_pack_multiple_values_together{false}; 29 | llvm::Type *(*type_splitter)(llvm::Type *) = nullptr; 30 | }; 31 | 32 | // Captures the state of allocation for registers, including which registers 33 | // are taken and the available space left in each register. The AllocationState 34 | // needs a reference to the working architecture so that it can lookup 35 | // registers. 36 | struct AllocationState { 37 | public: 38 | ~AllocationState(void); 39 | 40 | AllocationState(const std::vector &_constraints, 41 | const remill::Arch *_arch, const CallingConvention *_conv); 42 | 43 | SizeAndType AssignSizeAndType(llvm::Type &type); 44 | 45 | llvm::Optional> TryRegisterAllocate(llvm::Type &type); 46 | 47 | llvm::Optional> 48 | TryCompositeRegisterAllocate(llvm::Type &type); 49 | 50 | llvm::Optional> 51 | TryBasicRegisterAllocate(llvm::Type &type, llvm::Optional hint); 52 | 53 | llvm::Optional> 54 | TryVectorRegisterAllocate(llvm::FixedVectorType &type); 55 | 56 | bool IsFilled(size_t i); 57 | 58 | uint64_t RemainingSpace(size_t i); 59 | 60 | llvm::Optional> 61 | ProcessIntVecX86_64SysV(llvm::Type *elem_type, unsigned int vec_size, 62 | unsigned int bit_width); 63 | 64 | llvm::Error CoalescePacking(const std::vector &vector, 65 | std::vector &packed_values); 66 | 67 | const std::vector &constraints; 68 | const remill::Arch *arch; 69 | std::vector reserved; 70 | std::vector fill; 71 | const CallingConvention *conv; 72 | AllocationConfig config; 73 | const SizeConstraint ptr_size_constraint; 74 | }; 75 | 76 | } // namespace anvill 77 | -------------------------------------------------------------------------------- /lib/Lifters/DataLifter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace llvm { 17 | class Constant; 18 | class GlobalAlias; 19 | class LLVMContext; 20 | class Module; 21 | class Type; 22 | class Value; 23 | } // namespace llvm 24 | namespace anvill { 25 | 26 | class EntityLifterImpl; 27 | class LifterOptions; 28 | class MemoryProvider; 29 | class TypeProvider; 30 | 31 | struct VariableDecl; 32 | 33 | // Orchestrates lifting of instructions and control-flow between instructions. 34 | class DataLifter { 35 | public: 36 | ~DataLifter(void); 37 | 38 | DataLifter(const LifterOptions &options_); 39 | 40 | // Lift a function. Will return `nullptr` if the memory is not accessible. 41 | llvm::Constant *LiftData(const VariableDecl &decl, 42 | EntityLifterImpl &lifter_context); 43 | 44 | // Declare a lifted a variable. Will not return `nullptr`. 45 | llvm::Constant *GetOrDeclareData(const VariableDecl &decl, 46 | EntityLifterImpl &lifter_context); 47 | 48 | private: 49 | friend class FunctionLifter; 50 | 51 | const LifterOptions &options; 52 | const MemoryProvider &memory_provider; 53 | const TypeProvider &type_provider; 54 | const TypeTranslator type_specifier; 55 | 56 | // Context associated with `module`. 57 | llvm::LLVMContext &context; 58 | }; 59 | 60 | } // namespace anvill 61 | -------------------------------------------------------------------------------- /lib/Passes/BranchRecovery.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Trail of Bits, Inc. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as 6 | * published by the Free Software Foundation, either version 3 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace anvill { 24 | 25 | void AddBranchRecovery(llvm::FunctionPassManager &fpm) { 26 | fpm.addPass(BranchRecovery()); 27 | } 28 | 29 | BranchRecovery::Result BranchRecovery::INIT_RES = 30 | llvm::PreservedAnalyses::all(); 31 | 32 | llvm::StringRef BranchRecovery::name() { 33 | return "BranchRecovery"; 34 | } 35 | 36 | BranchRecovery::Result 37 | BranchRecovery::runOnIntrinsic(llvm::CallInst *brcond, 38 | llvm::FunctionAnalysisManager &am, Result agg) { 39 | auto res = am.getResult(*brcond->getFunction()); 40 | auto brres = res.find(brcond); 41 | if (brres != res.end()) { 42 | auto ba = brres->second; 43 | llvm::ReplaceInstWithInst( 44 | brcond, 45 | new llvm::ICmpInst(ba.compare, ba.compared.first, ba.compared.second)); 46 | 47 | agg.intersect(llvm::PreservedAnalyses::none()); 48 | } 49 | 50 | return agg; 51 | } 52 | } // namespace anvill -------------------------------------------------------------------------------- /lib/Passes/CodeQualityStatCollector.cpp: -------------------------------------------------------------------------------- 1 | 2 | #define DEBUG_TYPE "code_quality" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace anvill { 14 | STATISTIC( 15 | ConditionalComplexity, 16 | "A factor that approximates the complexity of the condition in branch instructions"); 17 | STATISTIC(NumberOfInstructions, "Total number of instructions"); 18 | STATISTIC(AbruptControlFlow, "Indirect control flow instructions"); 19 | STATISTIC(IntToPointerCasts, "Integer to pointer casts"); 20 | STATISTIC(PointerToIntCasts, "Pointer to integer casts"); 21 | 22 | 23 | namespace { 24 | // The idea here is that we count the number of boolean expressions involved in this branch which should be an indicator of its complexity 25 | class ConditionalComplexityVisitor 26 | : public llvm::InstVisitor { 27 | 28 | public: 29 | void tryVisit(llvm::Value *v) { 30 | if (auto *insn = llvm::dyn_cast(v)) { 31 | this->visit(insn); 32 | } 33 | } 34 | 35 | void visitBinaryOperator(llvm::BinaryOperator &I) { 36 | if (auto *inttype = llvm::dyn_cast(I.getType())) { 37 | if (inttype->getBitWidth() == 1) { 38 | ConditionalComplexity++; 39 | this->tryVisit(I.getOperand(0)); 40 | this->tryVisit(I.getOperand(1)); 41 | } 42 | } 43 | } 44 | 45 | void visitCmpInst(llvm::CmpInst &I) { 46 | ConditionalComplexity++; 47 | } 48 | 49 | void visitUnaryOperator(llvm::UnaryOperator &I) { 50 | if (auto *inttype = llvm::dyn_cast(I.getType())) { 51 | ConditionalComplexity++; 52 | this->tryVisit(I.getOperand(0)); 53 | } 54 | } 55 | }; 56 | } // namespace 57 | 58 | 59 | llvm::PreservedAnalyses 60 | CodeQualityStatCollector::run(llvm::Function &function, 61 | llvm::FunctionAnalysisManager &analysisManager) { 62 | ConditionalComplexityVisitor complexity_visitor; 63 | for (auto &i : llvm::instructions(function)) { 64 | if (auto *int_to_ptr = llvm::dyn_cast(&i)) { 65 | IntToPointerCasts++; 66 | } 67 | 68 | if (auto *int_to_ptr = llvm::dyn_cast(&i)) { 69 | PointerToIntCasts++; 70 | } 71 | 72 | 73 | NumberOfInstructions++; 74 | if (auto *branch = llvm::dyn_cast(&i)) { 75 | if (branch->isConditional()) { 76 | complexity_visitor.tryVisit(branch->getCondition()); 77 | } 78 | } 79 | 80 | if (auto *cb = llvm::dyn_cast(&i)) { 81 | auto target = cb->getCalledFunction(); 82 | if (target != nullptr) { 83 | if (target->getName() == kAnvillSwitchCompleteFunc || 84 | target->getName() == kAnvillSwitchIncompleteFunc) { 85 | AbruptControlFlow++; 86 | } 87 | } 88 | } 89 | } 90 | return llvm::PreservedAnalyses::all(); 91 | } 92 | 93 | llvm::StringRef CodeQualityStatCollector::name(void) { 94 | return "CodeQualityStatCollector"; 95 | } 96 | 97 | } // namespace anvill -------------------------------------------------------------------------------- /lib/Passes/LowerRemillUndefinedIntrinsics.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include "Utils.h" 21 | 22 | namespace anvill { 23 | 24 | llvm::StringRef LowerRemillUndefinedIntrinsics::name(void) { 25 | return "LowerRemillUndefinedIntrinsics"; 26 | } 27 | 28 | llvm::PreservedAnalyses 29 | LowerRemillUndefinedIntrinsics::run(llvm::Function &func, 30 | llvm::FunctionAnalysisManager &AM) { 31 | std::vector calls; 32 | 33 | for (auto &inst : llvm::instructions(func)) { 34 | if (auto call = llvm::dyn_cast(&inst)) { 35 | if (auto callee = call->getCalledFunction(); 36 | callee && callee->getName().startswith("__remill_undefined_")) { 37 | calls.push_back(call); 38 | } 39 | } 40 | } 41 | 42 | auto changed = false; 43 | for (auto call : calls) { 44 | auto *undef_val = llvm::UndefValue::get(call->getType()); 45 | CopyMetadataTo(call, undef_val); 46 | call->replaceAllUsesWith(undef_val); 47 | call->eraseFromParent(); 48 | changed = true; 49 | } 50 | 51 | return ConvertBoolToPreserved(changed); 52 | } 53 | 54 | // Some machine code instructions explicitly introduce undefined values / 55 | // behavior. Often, this is a result of the CPUs of different steppings of 56 | // an ISA producing different results for specific registers. For example, 57 | // some instructions leave the value of specific arithmetic flags instructions 58 | // in an undefined state. 59 | // 60 | // Remill models these situations using opaque function calls, i.e. an 61 | // undefined value is produced via a call to something like 62 | // `__remill_undefined_8`, which represents an 8-bit undefined value. We want 63 | // to lower these to `undef` values in LLVM; however, we don't want to do this 64 | // too early, otherwise the "undefinedness" can spread and possibly get out 65 | // of control. 66 | // 67 | // This pass exists to do the lowering to `undef` values, and should be run 68 | // as late as possible. 69 | void AddLowerRemillUndefinedIntrinsics(llvm::FunctionPassManager &fpm) { 70 | fpm.addPass(LowerRemillUndefinedIntrinsics()); 71 | } 72 | 73 | } // namespace anvill 74 | -------------------------------------------------------------------------------- /lib/Passes/LowerTypeHintIntrinsics.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include "Utils.h" 21 | 22 | namespace anvill { 23 | 24 | llvm::StringRef LowerTypeHintIntrinsics::name(void) { 25 | return "LowerTypeHintIntrinsics"; 26 | } 27 | 28 | llvm::PreservedAnalyses 29 | LowerTypeHintIntrinsics::run(llvm::Function &func, 30 | llvm::FunctionAnalysisManager &AM) { 31 | std::vector calls; 32 | 33 | for (auto &inst : llvm::instructions(func)) { 34 | if (auto call = llvm::dyn_cast(&inst)) { 35 | if (auto callee = call->getCalledFunction(); 36 | callee && callee->getName().startswith(kTypeHintFunctionPrefix)) { 37 | calls.push_back(call); 38 | } 39 | } 40 | } 41 | 42 | auto changed = false; 43 | for (auto call : calls) { 44 | auto val = call->getArgOperand(0)->stripPointerCasts(); 45 | llvm::IRBuilder<> ir(call); 46 | auto *cast_val = ir.CreateBitOrPointerCast(val, call->getType()); 47 | CopyMetadataTo(call, cast_val); 48 | call->replaceAllUsesWith(cast_val); 49 | changed = true; 50 | } 51 | 52 | for (auto call : calls) { 53 | if (call->use_empty()) { 54 | call->eraseFromParent(); 55 | changed = true; 56 | } 57 | } 58 | 59 | return ConvertBoolToPreserved(changed); 60 | } 61 | 62 | void AddLowerTypeHintIntrinsics(llvm::FunctionPassManager &fpm) { 63 | fpm.addPass(LowerTypeHintIntrinsics()); 64 | } 65 | 66 | } // namespace anvill 67 | -------------------------------------------------------------------------------- /lib/Passes/RemoveCompilerBarriers.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "Utils.h" 19 | 20 | namespace anvill { 21 | 22 | llvm::StringRef RemoveCompilerBarriers::name(void) { 23 | return "RemoveCompilerBarriers"; 24 | } 25 | 26 | // Try to lower remill memory access intrinsics. 27 | llvm::PreservedAnalyses 28 | RemoveCompilerBarriers::run(llvm::Function &func, 29 | llvm::FunctionAnalysisManager &AM) { 30 | std::vector to_remove; 31 | 32 | for (llvm::BasicBlock &block : func) { 33 | auto prev_is_compiler_barrier = false; 34 | llvm::CallBase *prev_barrier = nullptr; 35 | for (auto &inst : block) { 36 | if (auto call = llvm::dyn_cast(&inst)) { 37 | const auto called_val = call->getCalledOperand(); 38 | const auto inline_asm = llvm::dyn_cast(called_val); 39 | if (inline_asm) { 40 | if (inline_asm->hasSideEffects() && call->getType()->isVoidTy() && 41 | inline_asm->getAsmString().empty()) { 42 | 43 | if (prev_is_compiler_barrier) { 44 | to_remove.push_back(call); 45 | } else { 46 | prev_barrier = call; 47 | } 48 | prev_is_compiler_barrier = true; 49 | 50 | } else { 51 | prev_is_compiler_barrier = false; 52 | prev_barrier = nullptr; 53 | } 54 | 55 | } else if (auto target_func = call->getCalledFunction()) { 56 | if (target_func->hasExternalLinkage()) { 57 | if (prev_is_compiler_barrier && prev_barrier) { 58 | to_remove.push_back(prev_barrier); 59 | } 60 | prev_is_compiler_barrier = true; 61 | } else { 62 | prev_is_compiler_barrier = false; 63 | } 64 | 65 | prev_barrier = nullptr; 66 | 67 | } else { 68 | prev_is_compiler_barrier = false; 69 | prev_barrier = nullptr; 70 | } 71 | } else { 72 | prev_is_compiler_barrier = false; 73 | prev_barrier = nullptr; 74 | } 75 | } 76 | } 77 | 78 | auto removed = false; 79 | for (auto call_inst : to_remove) { 80 | if (call_inst->use_empty()) { 81 | call_inst->eraseFromParent(); 82 | removed = true; 83 | } 84 | } 85 | 86 | return ConvertBoolToPreserved(removed); 87 | } 88 | 89 | // Remill semantics sometimes contain compiler barriers (empty inline assembly 90 | // statements), especially related to floating point code (i.e. preventing 91 | // re-ordering of floating point operations so that we can capture the flags). 92 | // This pass eliminates those empty inline assembly statements. 93 | void AddRemoveCompilerBarriers(llvm::FunctionPassManager &fpm) { 94 | fpm.addPass(RemoveCompilerBarriers()); 95 | } 96 | } // namespace anvill 97 | -------------------------------------------------------------------------------- /lib/Passes/RemoveDelaySlotIntrinsics.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "Utils.h" 20 | 21 | namespace anvill { 22 | 23 | llvm::StringRef RemoveDelaySlotIntrinsics::name(void) { 24 | return "RemoveDelaySlotIntrinsics"; 25 | } 26 | 27 | // Try to lower remill memory access intrinsics. 28 | llvm::PreservedAnalyses 29 | RemoveDelaySlotIntrinsics::run(llvm::Function &func, 30 | llvm::FunctionAnalysisManager &AM) { 31 | auto module = func.getParent(); 32 | auto begin = module->getFunction("__remill_delay_slot_begin"); 33 | auto end = module->getFunction("__remill_delay_slot_end"); 34 | 35 | if (!begin && !end) { 36 | return llvm::PreservedAnalyses::all(); 37 | } 38 | 39 | auto calls = FindFunctionCalls(func, [=](llvm::CallBase *call) -> bool { 40 | const auto func = call->getCalledFunction(); 41 | return func == begin || func == end; 42 | }); 43 | 44 | for (llvm::CallBase *call : calls) { 45 | auto mem_ptr = call->getArgOperand(0); 46 | CopyMetadataTo(call, mem_ptr); 47 | call->replaceAllUsesWith(mem_ptr); 48 | call->eraseFromParent(); 49 | } 50 | 51 | return ConvertBoolToPreserved(!calls.empty()); 52 | } 53 | 54 | // Removes calls to `__remill_delay_slot_begin` and `__remill_delay_slot_end`. 55 | void AddRemoveDelaySlotIntrinsics(llvm::FunctionPassManager &fpm) { 56 | fpm.addPass(RemoveDelaySlotIntrinsics()); 57 | } 58 | } // namespace anvill 59 | -------------------------------------------------------------------------------- /lib/Passes/RemoveStackPointerCExprs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Trail of Bits, Inc. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as 6 | * published by the Free Software Foundation, either version 3 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace anvill { 25 | 26 | // This pass unrolls constant expressions that involve the stack pointer into instructions so that 27 | // RecoverStackInformation can replace the stack pointer with its stack representation. 28 | // The pass strips away portions of the constant expression that cant be resolved to a stack reference so that hopefully they 29 | // will be resolved later. 30 | 31 | // define i1 @slice() local_unnamed_addr #2 { 32 | // %1 = call zeroext i1 (i1, ...) @__remill_flag_computation_sign(i1 zeroext icmp slt (i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12), i32 0), i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12)) #5 33 | // ret i1 %1 34 | // } 35 | 36 | // Becomes: 37 | // define i1 @slice() local_unnamed_addr { 38 | // %1 = icmp slt i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12), 0 39 | // %2 = call zeroext i1 (i1, ...) @__remill_flag_computation_sign(i1 zeroext %1, i32 add (i32 ptrtoint (i8* @__anvill_sp to i32), i32 -12)) 40 | // ret i1 %2 41 | // } 42 | // } 43 | 44 | class RemoveStackPointerCExprs final 45 | : public llvm::PassInfoMixin { 46 | public: 47 | RemoveStackPointerCExprs(void) {} 48 | 49 | llvm::PreservedAnalyses run(llvm::Function &F, 50 | llvm::FunctionAnalysisManager &AM); 51 | }; 52 | 53 | } // namespace anvill 54 | -------------------------------------------------------------------------------- /lib/Passes/RemoveUnusedBranchHints.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | namespace anvill { 3 | 4 | 5 | RemoveUnusedBranchHints::Result 6 | RemoveUnusedBranchHints::runOnIntrinsic(llvm::CallInst *indirectJump, 7 | llvm::FunctionAnalysisManager &am, 8 | 9 | RemoveUnusedBranchHints::Result agg) { 10 | auto real_res = indirectJump->getArgOperand(0); 11 | indirectJump->replaceAllUsesWith(real_res); 12 | indirectJump->eraseFromParent(); 13 | return llvm::PreservedAnalyses::none(); 14 | } 15 | 16 | RemoveUnusedBranchHints::Result RemoveUnusedBranchHints::INIT_RES = 17 | llvm::PreservedAnalyses::all(); 18 | 19 | 20 | void AddRemoveFailedBranchHints(llvm::FunctionPassManager &fpm) { 21 | fpm.addPass(RemoveUnusedBranchHints()); 22 | } 23 | 24 | llvm::StringRef RemoveUnusedBranchHints::name() { 25 | return "RemoveUnusedBranchHints"; 26 | } 27 | 28 | } // namespace anvill -------------------------------------------------------------------------------- /lib/Passes/RemoveUnusedFPClassificationCalls.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "Utils.h" 20 | 21 | namespace anvill { 22 | // Try to remove unused floating point classification function calls. 23 | llvm::PreservedAnalyses RemoveUnusedFPClassificationCalls::run( 24 | llvm::Function &func, llvm::FunctionAnalysisManager &analysisManager) { 25 | auto calls = FindFunctionCalls(func, [](llvm::CallBase *call) -> bool { 26 | const auto func = call->getCalledFunction(); 27 | if (!func) { 28 | return false; 29 | } 30 | 31 | const auto name = func->getName(); 32 | return name == "fpclassify" || name == "__fpclassifyd" || 33 | name == "__fpclassifyf" || name == "__fpclassifyld"; 34 | }); 35 | 36 | auto ret = false; 37 | for (llvm::CallBase *call : calls) { 38 | if (call->use_empty()) { 39 | call->eraseFromParent(); 40 | ret = true; 41 | } 42 | } 43 | 44 | return ConvertBoolToPreserved(ret); 45 | } 46 | 47 | llvm::StringRef RemoveUnusedFPClassificationCalls::name(void) { 48 | return "RemoveUnusedFPClassificationCalls"; 49 | } 50 | 51 | // Remove unused calls to floating point classification functions. Calls to 52 | // these functions are present in a bunch of FPU-related instruction semantics 53 | // functions. It's frequently the case that instructions don't actually care 54 | // about the FPU state, though. In these cases, we won't observe the return 55 | // values of these classification functions being used. However, LLVM can't 56 | // eliminate the calls to these functions on its own because they are not 57 | // "pure" functions. 58 | // 59 | // NOTE(pag): This pass must be applied before any kind of renaming of lifted 60 | // functions is performed, so that we don't accidentally remove 61 | // calls to classification functions present in the target binary. 62 | void AddRemoveUnusedFPClassificationCalls(llvm::FunctionPassManager &fpm) { 63 | fpm.addPass(RemoveUnusedFPClassificationCalls()); 64 | } 65 | 66 | 67 | } // namespace anvill 68 | -------------------------------------------------------------------------------- /lib/Passes/SimplifyStackArithFlags.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Trail of Bits, Inc. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as 6 | * published by the Free Software Foundation, either version 3 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | #include "SimplifyStackArithFlags.h" 19 | 20 | #include 21 | #include 22 | namespace anvill { 23 | 24 | 25 | llvm::PreservedAnalyses SimplifyStackArithFlags::INIT_RES = 26 | llvm::PreservedAnalyses::all(); 27 | 28 | 29 | bool SimplifyStackArithFlags::isTargetInstrinsic( 30 | const llvm::CallInst *callinsn) { 31 | return ParseFlagIntrinsic(callinsn).has_value(); 32 | } 33 | 34 | llvm::PreservedAnalyses 35 | SimplifyStackArithFlags::runOnIntrinsic(llvm::CallInst *call, 36 | llvm::FunctionAnalysisManager &am, 37 | llvm::PreservedAnalyses agg) { 38 | auto maybeflag = ParseFlagIntrinsic(call); 39 | if (maybeflag.has_value() && 40 | IsRelatedToStackPointer(call->getModule(), maybeflag->over) && 41 | llvm::isa(maybeflag->over)) { 42 | llvm::Value *newValue = nullptr; 43 | if (this->constant_flags.find(maybeflag->flg) == 44 | this->constant_flags.end()) { 45 | newValue = llvm::UndefValue::get(call->getType()); 46 | } else { 47 | newValue = llvm::ConstantInt::getBool( 48 | call->getType(), this->constant_flags[maybeflag->flg]); 49 | } 50 | call->replaceAllUsesWith(newValue); 51 | call->eraseFromParent(); 52 | agg.intersect(llvm::PreservedAnalyses::none()); 53 | } 54 | 55 | return agg; 56 | } 57 | 58 | llvm::StringRef SimplifyStackArithFlags::name(void) { 59 | return "SimplifyStackArithFlags"; 60 | } 61 | 62 | 63 | void AddSimplifyStackArithFlags(llvm::FunctionPassManager &fpm, 64 | bool stack_pointer_is_signed) { 65 | fpm.addPass(SimplifyStackArithFlags(stack_pointer_is_signed)); 66 | } 67 | } // namespace anvill -------------------------------------------------------------------------------- /lib/Passes/SimplifyStackArithFlags.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | /* 4 | * Copyright (c) 2021 Trail of Bits, Inc. 5 | * 6 | * This program is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU Affero General Public License as 8 | * published by the Free Software Foundation, either version 3 of the 9 | * License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License 17 | * along with this program. If not, see . 18 | */ 19 | 20 | #pragma once 21 | #include 22 | #include 23 | 24 | #include 25 | namespace anvill { 26 | 27 | // This pass attempts to remove flag computations over the stack variable, hinted by "__remill_flag_computation_*". 28 | // The pass checks for arithmetic flags that should be constant for a constant arithmetic expression over a stack variable. 29 | // The sign flag is configurable in the lifter options to support non user mode code. 30 | class SimplifyStackArithFlags 31 | : public IntrinsicPass, 32 | llvm::PassInfoMixin { 33 | 34 | 35 | private: 36 | // Flags that can be treated as a constant boolean 37 | std::map constant_flags = {{ArithFlags::OF, false}, 38 | {ArithFlags::ZF, false}, 39 | {ArithFlags::SIGN, true}}; 40 | 41 | public: 42 | SimplifyStackArithFlags(bool stack_pointer_is_signed) { 43 | this->constant_flags.insert({ArithFlags::SIGN, stack_pointer_is_signed}); 44 | } 45 | 46 | llvm::PreservedAnalyses runOnIntrinsic(llvm::CallInst *indirectJump, 47 | llvm::FunctionAnalysisManager &am, 48 | llvm::PreservedAnalyses); 49 | 50 | 51 | static llvm::PreservedAnalyses INIT_RES; 52 | 53 | 54 | static bool isTargetInstrinsic(const llvm::CallInst *callinsn); 55 | static llvm::StringRef name(); 56 | }; 57 | 58 | } // namespace anvill -------------------------------------------------------------------------------- /lib/Passes/SliceInterpreter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace anvill { 16 | 17 | SliceInterpreter::~SliceInterpreter(void) {} 18 | 19 | SliceInterpreter::SliceInterpreter(const llvm::Module &module) { 20 | auto builder = llvm::EngineBuilder(llvm::CloneModule(module)); 21 | this->execEngine.reset( 22 | builder.setEngineKind(llvm::EngineKind::Interpreter).create()); 23 | } 24 | 25 | llvm::GenericValue 26 | SliceInterpreter::executeSlice(SliceID sliceId, 27 | llvm::ArrayRef ArgValue) { 28 | auto f = this->execEngine->FindFunctionNamed( 29 | SliceManager::getFunctionName(sliceId)); 30 | 31 | assert(f != nullptr); 32 | return this->execEngine->runFunction(f, ArgValue); 33 | } 34 | 35 | } // namespace anvill 36 | -------------------------------------------------------------------------------- /lib/Passes/SlicerVisitor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include "SlicerVisitor.h" 10 | 11 | namespace anvill { 12 | llvm::Value *Slicer::checkInstruction(llvm::Value *targetValue) { 13 | if (auto *insn = llvm::dyn_cast(targetValue)) { 14 | return this->visit(insn); 15 | } 16 | 17 | return targetValue; 18 | } 19 | 20 | 21 | llvm::SmallVector Slicer::getSlice() { 22 | llvm::SmallVector res; 23 | std::reverse_copy(this->resultingSlice.begin(), this->resultingSlice.end(), 24 | std::back_inserter(res)); 25 | return res; 26 | } 27 | 28 | // just assume we are non linear 29 | llvm::Value *Slicer::visitInstruction(llvm::Instruction &I) { 30 | return &I; 31 | } 32 | 33 | llvm::Value *Slicer::visitCastInst(llvm::CastInst &I) { 34 | assert(I.getNumOperands() == 1); 35 | this->resultingSlice.push_back(&I); 36 | return this->checkInstruction(I.getOperand(0)); 37 | } 38 | 39 | 40 | // same with unary ops 41 | llvm::Value *Slicer::visitUnaryOperator(llvm::UnaryOperator &I) { 42 | assert(I.getNumOperands() == 1); 43 | this->resultingSlice.push_back(&I); 44 | return this->checkInstruction(I.getOperand(0)); 45 | } 46 | 47 | std::optional Slicer::getConstantCast(llvm::Value *v) { 48 | 49 | if (auto cst = llvm::dyn_cast(v)) { 50 | if (llvm::isa(cst->getOperand(0))) { 51 | return {cst}; 52 | } 53 | } 54 | 55 | return std::nullopt; 56 | } 57 | 58 | 59 | // if RHS is constant then continue, otherwise stop. 60 | llvm::Value *Slicer::visitBinaryOperator(llvm::BinaryOperator &I) { 61 | assert(I.getNumOperands() == 2); 62 | 63 | if (auto constant_cst = this->getConstantCast(I.getOperand(1))) { 64 | this->resultingSlice.push_back(&I); 65 | this->resultingSlice.push_back(*constant_cst); 66 | return this->checkInstruction(I.getOperand(0)); 67 | } 68 | if (!llvm::isa(I.getOperand(1))) { 69 | return &I; 70 | } 71 | 72 | this->resultingSlice.push_back(&I); 73 | return this->checkInstruction(I.getOperand(0)); 74 | } 75 | 76 | 77 | } // namespace anvill 78 | -------------------------------------------------------------------------------- /lib/Passes/SlicerVisitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace anvill { 16 | 17 | // Produces a complete linear defining slice, stopping when the slice becomes non linear (phi, non constant binop, load, argument) 18 | // This slicer is expected to run after instcombiner. 19 | // Return the value that is non linear (could be argument) 20 | 21 | // If the expression is entirely constant then a constant will be returned as the stop value 22 | class Slicer : public llvm::InstVisitor { 23 | private: 24 | llvm::SmallVector resultingSlice; 25 | 26 | 27 | std::optional getConstantCast(llvm::Value *v); 28 | 29 | public: 30 | llvm::Value *checkInstruction(llvm::Value *targetValue); 31 | llvm::SmallVector getSlice(); 32 | 33 | // default case is stop condition 34 | llvm::Value *visitInstruction(llvm::Instruction &I); 35 | 36 | 37 | llvm::Value *visitCastInst(llvm::CastInst &I); 38 | 39 | // same with unary ops 40 | llvm::Value *visitUnaryOperator(llvm::UnaryOperator &I); 41 | 42 | 43 | // if RHS is constant then continue, otherwise stop. 44 | llvm::Value *visitBinaryOperator(llvm::BinaryOperator &I); 45 | }; 46 | } // namespace anvill 47 | -------------------------------------------------------------------------------- /lib/Passes/SpreadPCMetadata.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "Utils.h" 17 | 18 | namespace anvill { 19 | 20 | llvm::PreservedAnalyses SpreadPCMetadata::run( 21 | llvm::Function &func, llvm::FunctionAnalysisManager &fam) { 22 | 23 | llvm::LLVMContext &context = func.getContext(); 24 | auto md_id = context.getMDKindID(pc_metadata_name); 25 | auto changed = false; 26 | 27 | for (llvm::BasicBlock &block : func) { 28 | 29 | auto has_missing = false; 30 | llvm::MDNode *last_md = nullptr; 31 | for (llvm::Instruction &inst : block) { 32 | auto md = inst.getMetadata(md_id); 33 | if (!md) { 34 | has_missing = true; 35 | continue; 36 | } 37 | 38 | last_md = md; 39 | 40 | // Propagate the pc metadata of an instruction back to its uses, if those 41 | // uses are in the same block. 42 | for (llvm::Use &op : inst.operands()) { 43 | if (auto inst_arg = llvm::dyn_cast(op.get()); 44 | inst_arg && inst_arg->getParent() == &block) { 45 | if (!inst_arg->getMetadata(md_id)) { 46 | inst_arg->setMetadata(md_id, md); 47 | changed = true; 48 | } 49 | } 50 | } 51 | } 52 | 53 | if (!has_missing) { 54 | continue; 55 | } 56 | 57 | // Go backward through the basic block and apply any known metadata IDs to 58 | // instructions. 59 | auto rit = block.rbegin(); 60 | auto rend = block.rend(); 61 | for (; rit != rend; ++rit) { 62 | llvm::Instruction &inst = *rit; 63 | if (auto md = inst.getMetadata(md_id)) { 64 | last_md = md; 65 | } else if (last_md) { 66 | inst.setMetadata(md_id, last_md); 67 | changed = true; 68 | } 69 | } 70 | } 71 | 72 | return ConvertBoolToPreserved(changed); 73 | } 74 | 75 | llvm::StringRef SpreadPCMetadata::name(void) { 76 | return "SpreadPCMetadata"; 77 | } 78 | 79 | // Looks for instructions missing the program counter-specific metadata, and 80 | // spreads nearby program counter-annotated metadata to those instructions. 81 | void AddSpreadPCMetadata(llvm::FunctionPassManager &fpm, 82 | const LifterOptions &options) { 83 | if (options.pc_metadata_name) { 84 | fpm.addPass(SpreadPCMetadata(options.pc_metadata_name)); 85 | } 86 | } 87 | 88 | } // namespace anvill 89 | -------------------------------------------------------------------------------- /lib/Passes/Utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | namespace llvm { 22 | class CallBase; 23 | class Function; 24 | class Instruction; 25 | class IRBuilderBase; 26 | class PointerType; 27 | class Value; 28 | } // namespace llvm 29 | namespace anvill { 30 | namespace { 31 | 32 | template 33 | static std::vector SelectInstructions( 34 | llvm::Function &function) { 35 | std::vector output; 36 | 37 | for (auto &instruction : llvm::instructions(function)) { 38 | bool selected = (llvm::dyn_cast(&instruction) || ...); 39 | if (selected) { 40 | output.push_back(&instruction); 41 | } 42 | } 43 | 44 | return output; 45 | } 46 | 47 | 48 | } // namespace 49 | 50 | // Returns `true` if it seems like a basic block is sane. 51 | bool BasicBlockIsSane(llvm::BasicBlock *block); 52 | 53 | inline static bool BasicBlockIsSane(llvm::Instruction *inst) { 54 | return BasicBlockIsSane(inst->getParent()); 55 | } 56 | 57 | // Find all function calls in `func` such that `pred(call)` returns `true`. 58 | std::vector 59 | FindFunctionCalls(llvm::Function &func, 60 | std::function pred); 61 | 62 | // Convert the constant `val` to have the pointer type `dest_ptr_ty`. 63 | llvm::Value *ConvertToPointer(llvm::Instruction *usage_site, 64 | llvm::Value *val_to_convert, 65 | llvm::PointerType *dest_ptr_ty); 66 | 67 | // Returns the function's IR 68 | std::string GetFunctionIR(llvm::Function &func); 69 | 70 | // Returns the module's IR 71 | std::string GetModuleIR(llvm::Module &module); 72 | 73 | llvm::PreservedAnalyses ConvertBoolToPreserved(bool); 74 | 75 | // Returns the pointer to the function that lets us overwrite the return 76 | // address. This is not available on all architectures / OSes. 77 | llvm::Function *AddressOfReturnAddressFunction(llvm::Module *module); 78 | 79 | } // namespace anvill 80 | -------------------------------------------------------------------------------- /lib/Providers/ControlFlowProvider.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | #include "Specification.h" 13 | 14 | namespace anvill { 15 | 16 | ControlFlowOverride NullControlFlowProvider::GetControlFlowOverride(uint64_t addr) const { 17 | return {}; 18 | } 19 | 20 | 21 | SpecificationControlFlowProvider::~SpecificationControlFlowProvider(void) {} 22 | 23 | SpecificationControlFlowProvider::SpecificationControlFlowProvider( 24 | const Specification &spec) 25 | : impl(spec.impl) {} 26 | 27 | ControlFlowOverride SpecificationControlFlowProvider::GetControlFlowOverride(uint64_t addr) const { 28 | return impl->control_flow_overrides[addr]; 29 | } 30 | 31 | } // namespace anvill 32 | -------------------------------------------------------------------------------- /lib/Providers/MemoryProvider.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include "Specification.h" 12 | 13 | namespace anvill { 14 | 15 | std::tuple 16 | NullMemoryProvider::Query(uint64_t address) const { 17 | return {0, ByteAvailability::kUnknown, BytePermission::kUnknown}; 18 | } 19 | 20 | MemoryProvider::~MemoryProvider(void) {} 21 | 22 | 23 | SpecificationMemoryProvider::~SpecificationMemoryProvider(void) {} 24 | 25 | SpecificationMemoryProvider::SpecificationMemoryProvider( 26 | const Specification &spec) 27 | : impl(spec.impl) {} 28 | 29 | std::tuple 30 | SpecificationMemoryProvider::Query(uint64_t address) const { 31 | auto byte_it = impl->memory.find(address); 32 | 33 | // TODO(pag): ANVILL specs don't communicate the structure of the address 34 | // space, just the contents of a subset of the memory of the 35 | // address space. 36 | if (byte_it == impl->memory.end()) { 37 | return {{}, anvill::ByteAvailability::kUnknown, 38 | anvill::BytePermission::kUnknown}; 39 | } else { 40 | return {byte_it->second.first, anvill::ByteAvailability::kAvailable, 41 | byte_it->second.second}; 42 | } 43 | } 44 | 45 | 46 | } // namespace anvill 47 | -------------------------------------------------------------------------------- /lib/Specification.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | namespace llvm { 22 | class LLVMContext; 23 | } // namespace llvm 24 | namespace anvill { 25 | 26 | enum class BytePermission : std::uint8_t; 27 | 28 | class SpecificationImpl 29 | : public std::enable_shared_from_this { 30 | private: 31 | friend class Specification; 32 | 33 | SpecificationImpl(void) = delete; 34 | SpecificationImpl(std::unique_ptr arch_); 35 | 36 | Result, std::string> 37 | ParseSpecification(const ::specification::Specification &obj); 38 | 39 | public: 40 | ~SpecificationImpl(void); 41 | 42 | // Architecture used by all of the function and global variable declarations. 43 | const std::unique_ptr arch; 44 | 45 | const TypeDictionary type_dictionary; 46 | const TypeTranslator type_translator; 47 | 48 | using VariableDeclPtr = std::unique_ptr; 49 | using FunctionDeclPtr = std::unique_ptr; 50 | using CallSiteDeclPtr = std::unique_ptr; 51 | 52 | // Sorted list of functions, variables, and call sites. 53 | std::vector variables; 54 | std::vector functions; 55 | std::vector call_sites; 56 | 57 | // List of functions that have been parsed from the JSON spec. 58 | std::unordered_map address_to_function; 59 | 60 | // Inverted mapping of byte addresses to the variables containing those 61 | // addresses. 62 | std::unordered_map address_to_var; 63 | 64 | 65 | // NOTE(pag): We used ordered containers so that any type of round-tripping 66 | // to/from JSON ends up getting a consistent order of information. 67 | 68 | // Mapping of addresses to one or more names. 69 | std::multimap symbols; 70 | 71 | // Mapping of byte addresses to the byte values and their permissions. 72 | std::map> memory; 73 | 74 | // Control-flow redirections. 75 | std::map redirections; 76 | 77 | // Call-site specific target information. 78 | std::map, const CallSiteDecl *> 79 | loc_to_call_site; 80 | 81 | // Sorted list of jumps, calls, returns and other control flow overrides 82 | std::vector jumps; 83 | std::vector calls; 84 | std::vector returns; 85 | std::vector misc_overrides; 86 | 87 | std::unordered_map control_flow_overrides; 88 | }; 89 | 90 | } // namespace anvill 91 | -------------------------------------------------------------------------------- /lib/Transforms.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | namespace anvill { 14 | 15 | TransformationErrorManager::~TransformationErrorManager(void) { 16 | CHECK(error_list.empty()); 17 | } 18 | 19 | } // namespace anvill 20 | -------------------------------------------------------------------------------- /lib/Version.cpp.in: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2020 Trail of Bits 2 | // Based on: https://github.com/andrew-hardin/cmake-git-version-tracking/blob/master/better-example/git.cc.in 3 | // Which is (C) 2020 Andrew Hardin 4 | // 5 | // MIT License 6 | // Copyright (c) 2020 Andrew Hardin 7 | // 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy 9 | // of this software and associated documentation files (the "Software"), to deal 10 | // in the Software without restriction, including without limitation the rights 11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | // copies of the Software, and to permit persons to whom the Software is 13 | // furnished to do so, subject to the following conditions: 14 | // 15 | // The above copyright notice and this permission notice shall be included in all 16 | // copies or substantial portions of the Software. 17 | // 18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | // SOFTWARE. 25 | 26 | #include "anvill/Version.h" 27 | 28 | namespace anvill { 29 | namespace version { 30 | 31 | bool HasVersionData(void) { 32 | return @GIT_RETRIEVED_STATE@; 33 | } 34 | 35 | bool HasUncommittedChanges(void) { 36 | return @GIT_IS_DIRTY@; 37 | } 38 | 39 | std::string_view GetAuthorName(void) { 40 | return "@GIT_AUTHOR_NAME@"; 41 | } 42 | 43 | std::string_view GetAuthorEmail(void) { 44 | return "@GIT_AUTHOR_EMAIL@"; 45 | } 46 | 47 | std::string_view GetCommitHash(void) { 48 | return "@GIT_HEAD_SHA1@"; 49 | } 50 | 51 | std::string_view GetCommitDate(void) { 52 | return "@GIT_COMMIT_DATE_ISO8601@"; 53 | } 54 | 55 | std::string_view GetVersionString(void) { 56 | return "@VERSION_STRING@"; 57 | } 58 | 59 | } // namespace version 60 | } // namespace anvill 61 | -------------------------------------------------------------------------------- /libraries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | add_subdirectory("version") 10 | -------------------------------------------------------------------------------- /libraries/version/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lifting-bits/anvill/9948d26cd993952d6010a59f27a198cbe3c79c1d/libraries/version/CMakeLists.txt -------------------------------------------------------------------------------- /packaging/README.md: -------------------------------------------------------------------------------- 1 | # anvill packaging scripts 2 | 3 | ## How to generate packages 4 | 5 | 1. Configure and build anvill 6 | 2. Set the **DESTDIR** variable to a new folder 7 | 3. Run the packaging script, passing the **DESTDIR** folder 8 | 9 | Example: 10 | 11 | ```sh 12 | anvill_version=$(git describe --always) 13 | 14 | cpack -D ANVILL_DATA_PATH="/path/to/install/directory" \ 15 | -R ${anvill_version} \ 16 | --config "packaging/main.cmake" 17 | ``` 18 | -------------------------------------------------------------------------------- /packaging/cmake/dispatcher.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}") 10 | set(CPACK_INSTALLED_DIRECTORIES "${ANVILL_DATA_PATH};.") 11 | 12 | string(TOLOWER "${CMAKE_SYSTEM_NAME}" system_name) 13 | if(system_name STREQUAL "darwin") 14 | set(system_name "macos") 15 | endif() 16 | 17 | set(common_include "${CMAKE_CURRENT_LIST_DIR}/system/${system_name}/common.cmake") 18 | if(EXISTS "${common_include}") 19 | include("${common_include}") 20 | endif() 21 | 22 | string(TOLOWER "${CPACK_GENERATOR}" cpack_generator) 23 | include("${CMAKE_CURRENT_LIST_DIR}/system/${system_name}/generators/${cpack_generator}.cmake") 24 | -------------------------------------------------------------------------------- /packaging/cmake/system/linux/generators/deb.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | set(CPACK_DEBIAN_PACKAGE_PRIORITY "extra") 10 | set(CPACK_DEBIAN_PACKAGE_SECTION "default") 11 | set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "${CPACK_PACKAGE_HOMEPAGE_URL}") 12 | 13 | -------------------------------------------------------------------------------- /packaging/cmake/system/linux/generators/rpm.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | set(CPACK_RPM_PACKAGE_RELEASE "${CPACK_PACKAGE_VERSION}") 10 | set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") 11 | set(CPACK_RPM_PACKAGE_DESCRIPTION "${CPACK_PACKAGE_DESCRIPTION}") 12 | set(CPACK_RPM_PACKAGE_GROUP "default") 13 | set(CPACK_RPM_PACKAGE_LICENSE "GNU Affero General Public License v3.0") 14 | -------------------------------------------------------------------------------- /packaging/cmake/system/linux/generators/tgz.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | set(CPACK_SET_DESTDIR ON) 10 | set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY 0) 11 | -------------------------------------------------------------------------------- /packaging/cmake/system/macos/generators/tgz.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | set(CPACK_SET_DESTDIR ON) 10 | set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY 0) 11 | -------------------------------------------------------------------------------- /packaging/main.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") 10 | set(CPACK_GENERATOR "TGZ;DEB;RPM") 11 | 12 | elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") 13 | set(CPACK_GENERATOR "TGZ") 14 | endif() 15 | 16 | if(ANVILL_DATA_PATH STREQUAL "") 17 | message(FATAL_ERROR "The ANVILL_DATA_PATH variable was not set") 18 | endif() 19 | 20 | if(ANVILL_PACKAGE_VERSION STREQUAL "") 21 | message(FATAL_ERROR "The ANVILL_PACKAGE_VERSION variable was not set") 22 | endif() 23 | 24 | set(CPACK_PROJECT_CONFIG_FILE "${CMAKE_CURRENT_LIST_DIR}/cmake/dispatcher.cmake") 25 | 26 | set(CPACK_PACKAGE_DESCRIPTION "anvill forges beautiful LLVM bitcode out of raw machine code") 27 | set(CPACK_PACKAGE_NAME "anvill") 28 | set(CPACK_PACKAGE_VENDOR "Trail of Bits") 29 | set(CPACK_PACKAGE_CONTACT "opensource@trailofbits.com") 30 | set(CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/lifting-bits/anvill") 31 | -------------------------------------------------------------------------------- /parse_cspec/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | beautifulsoup4 = "*" 8 | lxml = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3.9" 14 | -------------------------------------------------------------------------------- /parse_cspec/example_output/x86-64gcc.json: -------------------------------------------------------------------------------- 1 | { 2 | "calling_convention": 0, 3 | "is_noreturn": false, 4 | "is_variadic": false, 5 | "parameters": [ 6 | { 7 | "register": "XMM0", 8 | "type": "F" 9 | }, 10 | { 11 | "register": "XMM1", 12 | "type": "F" 13 | }, 14 | { 15 | "register": "XMM2", 16 | "type": "F" 17 | }, 18 | { 19 | "register": "XMM3", 20 | "type": "F" 21 | }, 22 | { 23 | "register": "XMM4", 24 | "type": "F" 25 | }, 26 | { 27 | "register": "XMM5", 28 | "type": "F" 29 | }, 30 | { 31 | "register": "XMM6", 32 | "type": "F" 33 | }, 34 | { 35 | "register": "XMM7", 36 | "type": "F" 37 | }, 38 | { 39 | "register": "RDI", 40 | "type": "l" 41 | }, 42 | { 43 | "register": "RSI", 44 | "type": "l" 45 | }, 46 | { 47 | "register": "RDX", 48 | "type": "l" 49 | }, 50 | { 51 | "register": "RCX", 52 | "type": "l" 53 | }, 54 | { 55 | "register": "R8", 56 | "type": "l" 57 | }, 58 | { 59 | "register": "R9", 60 | "type": "l" 61 | } 62 | ], 63 | "return_address": { 64 | "memory": { 65 | "offset": 0, 66 | "register": "RSP" 67 | }, 68 | "type": "l" 69 | }, 70 | "return_stack_pointer": { 71 | "offset": 0, 72 | "register": "RSP", 73 | "type": "l" 74 | }, 75 | "return_values": [ 76 | { 77 | "register": "XMM0", 78 | "type": "F" 79 | }, 80 | { 81 | "register": "RAX", 82 | "type": "l" 83 | } 84 | ] 85 | } -------------------------------------------------------------------------------- /parse_cspec/example_output/x86gcc.json: -------------------------------------------------------------------------------- 1 | { 2 | "calling_convention": 0, 3 | "is_noreturn": false, 4 | "is_variadic": false, 5 | "parameters": [], 6 | "return_address": { 7 | "memory": { 8 | "offset": 0, 9 | "register": "ESP" 10 | }, 11 | "type": "i" 12 | }, 13 | "return_stack_pointer": { 14 | "offset": 0, 15 | "register": "ESP", 16 | "type": "i" 17 | }, 18 | "return_values": [ 19 | { 20 | "register": "ST0", 21 | "type": "d" 22 | }, 23 | { 24 | "register": "EAX", 25 | "type": "i" 26 | } 27 | ] 28 | } -------------------------------------------------------------------------------- /plugins/ida/anvill.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | import ida_funcs 10 | import ida_kernwin 11 | import idautils 12 | 13 | import anvill 14 | import json 15 | 16 | class generate_anvill_spec_t(ida_kernwin.action_handler_t): 17 | def activate(self, ctx): 18 | user_input = ida_kernwin.ask_yn(ida_kernwin.ASKBTN_YES, "Would you like to export all functions?") 19 | if user_input == ida_kernwin.ASKBTN_CANCEL: 20 | return 1 21 | 22 | output_file_name_hint = "" 23 | 24 | p = anvill.get_program() 25 | 26 | if user_input == ida_kernwin.ASKBTN_NO: 27 | screen_cursor = ida_kernwin.get_screen_ea() 28 | function_name = ida_funcs.get_func_name(screen_cursor) 29 | if function_name is None: 30 | print("anvill: The cursor is not located inside a function") 31 | return 1 32 | 33 | output_file_name_hint = function_name + ".json" 34 | 35 | try: 36 | p.add_function_definition(screen_cursor) 37 | 38 | except: 39 | print("anvill: Failed to process the function at address {0:x}".format(screen_cursor)) 40 | return 1 41 | 42 | else: 43 | function_address_list = idautils.Functions() 44 | for function_address in function_address_list: 45 | try: 46 | p.add_function_definition(function_address) 47 | 48 | except: 49 | print("anvill: Failed to process the function at address {0:x}".format(function_address)) 50 | 51 | output_file_name_hint = "program.json" 52 | 53 | output_path = ida_kernwin.ask_file(True, output_file_name_hint, "Select where to save the spec file") 54 | if not output_path: 55 | return 1 56 | 57 | output = json.dumps(p.proto(), sort_keys=False, indent=2) 58 | 59 | print("anvill: Saving the spec file to {}".format(output_path)) 60 | with open(output_path, "w") as f: 61 | f.write(output) 62 | 63 | def update(self, ctx): 64 | if ctx.widget_type == ida_kernwin.BWN_DISASM: 65 | return ida_kernwin.AST_ENABLE_FOR_WIDGET 66 | 67 | return ida_kernwin.AST_DISABLE_FOR_WIDGET 68 | 69 | ACTION_NAME = "generate-anvill-spec-file" 70 | 71 | ida_kernwin.register_action( 72 | ida_kernwin.action_desc_t( 73 | ACTION_NAME, 74 | "Generate anvill spec file", 75 | generate_anvill_spec_t(), 76 | "Ctrl+H")) 77 | 78 | class popup_hooks_t(ida_kernwin.UI_Hooks): 79 | def finish_populating_widget_popup(self, w, popup): 80 | if ida_kernwin.get_widget_type(w) == ida_kernwin.BWN_DISASM: 81 | ida_kernwin.attach_action_to_popup(w, popup, ACTION_NAME, None) 82 | 83 | hooks = popup_hooks_t() 84 | hooks.hook() 85 | -------------------------------------------------------------------------------- /python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | if(ANVILL_ENABLE_INSTALL AND NOT ANVILL_INSTALL_PYTHON3_LIBS) 10 | install( 11 | FILES "${PROJECT_SOURCE_DIR}/plugins/ida/anvill.py" 12 | DESTINATION "share/anvill/ida" 13 | ) 14 | 15 | install(SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/install.cmake") 16 | endif() 17 | 18 | if(ANVILL_INSTALL_PYTHON3_LIBS) 19 | set(setup_file_path "${PROJECT_SOURCE_DIR}/setup.py") 20 | 21 | message(WARNING "The ANVILL_INSTALL_PYTHON3_LIBS setting is used to auto-install " 22 | "anvill to the local machine") 23 | 24 | message(WARNING "This is a development setting! Do **NOT** use this to create " 25 | "packages, ANVILL_ENABLE_INSTALL already covers this!") 26 | 27 | set(frontend_source_list 28 | "${setup_file_path}" 29 | anvill/__init__.py 30 | anvill/__main__.py 31 | anvill/arch.py 32 | anvill/binja/__init__.py 33 | anvill/binja/bnprogram.py 34 | anvill/binja/bnvariable.py 35 | anvill/binja/bnfunction.py 36 | anvill/binja/bninstruction.py 37 | anvill/binja/callingconvention.py 38 | anvill/binja/table.py 39 | anvill/binja/typecache.py 40 | anvill/binja/xreftype.py 41 | anvill/exc.py 42 | anvill/function.py 43 | anvill/ida/__init__.py 44 | anvill/ida/idafunction.py 45 | anvill/ida/idaprogram.py 46 | anvill/ida/idavariable.py 47 | anvill/ida/utils.py 48 | anvill/loc.py 49 | anvill/mem.py 50 | anvill/os.py 51 | anvill/program.py 52 | anvill/type.py 53 | anvill/var.py 54 | ) 55 | 56 | # TODO(ian): If we are using this as a development setting maybe this should be installed as a development package. 57 | if(NOT DEFINED ENV{VIRTUAL_ENV}) 58 | # NOT a venv install, specify --user and --prefix 59 | set(extra_install_flags --force --user --prefix=) 60 | else() 61 | # virtual env; install normally 62 | set(extra_install_flags "") 63 | endif() 64 | 65 | find_package(Python3 3.8 COMPONENTS Interpreter REQUIRED) 66 | 67 | set(installer_log "${CMAKE_CURRENT_BINARY_DIR}/frontend_installer_log.txt") 68 | 69 | add_custom_command( 70 | OUTPUT "${installer_log}" 71 | COMMAND "${Python3_EXECUTABLE}" "${setup_file_path}" install ${extra_install_flags} > "${installer_log}" 2>&1 72 | DEPENDS ${frontend_source_list} 73 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 74 | COMMENT "Installing Anvill Python 3 API **to the local machine**. Log: ${installer_log}" 75 | VERBATIM 76 | ) 77 | 78 | add_custom_target(frontend_installer 79 | DEPENDS "${installer_log}" 80 | ) 81 | 82 | add_dependencies(anvill 83 | frontend_installer 84 | ) 85 | endif() 86 | -------------------------------------------------------------------------------- /python/anvill/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from .arch import * 10 | import os 11 | 12 | try: 13 | import ida_idp 14 | from .ida import * 15 | except ImportError as e: 16 | try: 17 | import binaryninja 18 | from .binja import * 19 | 20 | except ImportError as e: 21 | raise NotImplementedError("Could not find either IDA or Binary Ninja APIs") 22 | -------------------------------------------------------------------------------- /python/anvill/binja/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from typing import Optional, Union 10 | 11 | 12 | import binaryninja as bn 13 | 14 | 15 | from .bnprogram import * 16 | 17 | 18 | from ..util import * 19 | from ..program import * 20 | 21 | 22 | def get_program( 23 | binary_path: Optional[str] = None, 24 | binary_view: Optional[bn.BinaryView] = None, 25 | base_address: Optional[int] = None, 26 | ) -> Optional[Specification]: 27 | if isinstance(binary_path, str): 28 | if isinstance(base_address, int): 29 | # Force the new image base address; according to the 30 | # documentation, we will 31 | # not inherit any of the default load options that we get when 32 | # calling the 33 | # get_view_of_file method 34 | binary_view = bn.BinaryViewType.get_view_of_file_with_options( 35 | binary_path, options={"loader.imageBase": base_address} 36 | ) 37 | 38 | else: 39 | # Use the auto-generated load options 40 | binary_view = bn.BinaryViewType.get_view_of_file(binary_path) 41 | elif isinstance(binary_view, bn.BinaryView): 42 | try: 43 | binary_path = binary_view.file.filename 44 | except: 45 | pass 46 | assert base_address is None 47 | 48 | 49 | if binary_view is None: 50 | DEBUG("Failed to create the BinaryView") 51 | return None 52 | 53 | DEBUG("Recovering program {}".format(binary_path)) 54 | return BNSpecification(binary_view, binary_path) 55 | -------------------------------------------------------------------------------- /python/anvill/binja/bnvariable.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | import binaryninja as bn 10 | 11 | 12 | from ..var import * 13 | from ..type import * 14 | 15 | 16 | class BNVariable(Variable): 17 | def __init__(self, bn_var, arch, address, type_): 18 | super(BNVariable, self).__init__(arch, address, type_) 19 | self._bn_var = bn_var 20 | 21 | def visit(self, program, is_definition, add_refs_as_defs, ignore_no_refs): 22 | if not is_definition: 23 | return 24 | 25 | # type could be None if type class not handled 26 | if self._type is None: 27 | return 28 | 29 | if isinstance(self._type, VoidType): 30 | return 31 | 32 | bv = program.bv 33 | br = bn.BinaryReader(bv) 34 | mem = program.memory 35 | begin = self._address 36 | end = begin + self._type.size(self._arch) 37 | 38 | for ea in range(begin, end): 39 | br.seek(ea) 40 | seg = bv.get_segment_at(ea) 41 | # _elf_header is getting recovered as variable 42 | # ignore null pointer reference 43 | if ea == 0: 44 | continue 45 | 46 | # ignore data variables with no references 47 | if ignore_no_refs: 48 | var = bv.data_vars.get(ea) 49 | if var is not None and next(var.code_refs, None) is None and next(var.data_refs, None) is None: 50 | continue 51 | 52 | #NOTE(artem): This is a workaround for binary ninja's fake 53 | # .externs section, which is (correctly) mapped as 54 | # not readable, not writable, and not executable. 55 | # because it is a fictional creation of the disassembler. 56 | # 57 | # However, when we do control flow tragetting to thunks, 58 | # we will sanity check that the target goes to an executable 59 | # location. If we are lying about the target being readable, 60 | # then we may as well lie about it being executable. 61 | is_executable = seg.executable 62 | if seg.writable == seg.readable == False: 63 | is_executable = True 64 | 65 | mem.map_byte(ea, br.read8(), seg.writable, is_executable) 66 | -------------------------------------------------------------------------------- /python/anvill/binja/callingconvention.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | import binaryninja as bn 10 | 11 | from ..util import * 12 | 13 | _FLOAT_ARGS_REGS = { 14 | "x86": ["st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"], 15 | "x86_64": ["xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"], 16 | # AAPCS uses integer register for passing floating point arguments 17 | "armv7": ["r0", "r1", "r2", "r3"], 18 | # AAPCS_VFP can use s0-15 registers for passing floating point arguments 19 | "thumb2": ["s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"], 20 | # Floating point registers with quad size 21 | "aarch64": ["v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"], 22 | } 23 | 24 | 25 | class CallingConvention: 26 | def __init__(self, arch, bn_func: bn.Function, cc): 27 | self._cc = cc 28 | self._arch = arch 29 | self._bn_func = bn_func 30 | self._int_arg_regs = self._cc.int_arg_regs 31 | self._float_arg_regs = self._cc.float_arg_regs 32 | 33 | self._int_return_reg = self._cc.int_return_reg 34 | self._high_int_return_reg = self._cc.high_int_return_reg 35 | self._float_return_reg = self._cc.float_return_reg 36 | 37 | # if the func calling_convention is None assign 0 38 | # as the default calling convention 39 | if self._cc is None: 40 | self._cc = self._bn_func.arch.calling_conventions[0] 41 | 42 | # set the float_arg_regs for default calling convention (cdecl) 43 | # for both x86 and arm architectures 44 | if self._cc.name == "cdecl" or self._cc.name == "sysv": 45 | try: 46 | self._float_arg_regs = _FLOAT_ARGS_REGS[self._cc.arch.name] 47 | except KeyError: 48 | WARN(f"Unsupported architecture: {self._cc.arch}") 49 | 50 | def is_sysv(self): 51 | return self._cc.name == "sysv" 52 | 53 | def is_cdecl(self): 54 | return self._cc.name == "cdecl" 55 | 56 | @property 57 | def int_arg_reg(self): 58 | return self._int_arg_regs 59 | 60 | @property 61 | def float_arg_reg(self): 62 | return self._float_arg_regs 63 | 64 | @property 65 | def int_return_reg(self): 66 | return self._int_return_reg 67 | 68 | @property 69 | def high_int_return_reg(self): 70 | return self._high_int_return_reg 71 | 72 | @property 73 | def float_return_reg(self): 74 | return self._float_return_reg 75 | 76 | @property 77 | def next_int_arg_reg(self): 78 | try: 79 | reg_name = self._int_arg_regs[0] 80 | del self._int_arg_regs[0] 81 | return reg_name 82 | except: 83 | return None 84 | 85 | @property 86 | def next_float_arg_reg(self): 87 | reg_name = self._float_arg_regs[0] 88 | del self._float_arg_regs[0] 89 | return reg_name 90 | 91 | @property 92 | def return_regs(self): 93 | if isinstance(self._bn_func, bn.Function): 94 | for reg in self._bn_func.return_regs: 95 | yield reg 96 | -------------------------------------------------------------------------------- /python/anvill/binja/xreftype.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | class XrefType: 10 | XREF_NONE = 0 11 | XREF_IMMEDIATE = 1 12 | XREF_DISPLACEMENT = 2 13 | XREF_MEMORY = 3 14 | XREF_CONTROL_FLOW = 4 15 | 16 | @staticmethod 17 | def is_memory(bv, reftype): 18 | return reftype in (XrefType.XREF_DISPLACEMENT, XrefType.XREF_MEMORY) 19 | -------------------------------------------------------------------------------- /python/anvill/exc.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | class AnvillException(Exception): 10 | pass 11 | 12 | 13 | class UnhandledArchitectureType(AnvillException): 14 | pass 15 | 16 | 17 | class UnhandledTypeException(AnvillException): 18 | def __init__(self, msg, ty=None): 19 | super(UnhandledTypeException, self).__init__(msg) 20 | self.type = ty 21 | 22 | 23 | class UnhandledOSException(AnvillException): 24 | pass 25 | 26 | 27 | class InvalidFunctionException(AnvillException): 28 | pass 29 | 30 | 31 | class InvalidParameterException(AnvillException): 32 | pass 33 | 34 | 35 | class InvalidVariableException(AnvillException): 36 | pass 37 | 38 | 39 | class InvalidLocationException(AnvillException): 40 | pass 41 | 42 | 43 | class ParseException(AnvillException): 44 | pass 45 | -------------------------------------------------------------------------------- /python/anvill/ida/idavariable.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from .utils import * 10 | 11 | 12 | from ..util import * 13 | from ..var import * 14 | 15 | 16 | class IDAVariable(Variable): 17 | 18 | __slots__ = ("_ida_seg",) 19 | 20 | def __init__(self, arch, address, type_, ida_seg): 21 | super(IDAVariable, self).__init__(arch, address, type_) 22 | self._ida_seg = ida_seg 23 | 24 | def visit(self, program, is_definition, add_refs_as_defs, ignore_no_refs): 25 | seg_ref = [None] 26 | seg = find_segment_containing_ea(self.address(), seg_ref) 27 | if seg and is_imported_table_seg(seg): 28 | DEBUG("Variable at {:x} is in an import table!".format(self.address())) 29 | is_definition = True 30 | 31 | if not is_definition: 32 | return 33 | 34 | memory = program.memory 35 | # TODO 36 | -------------------------------------------------------------------------------- /python/anvill/ida/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | import ida_segment 10 | import idc 11 | import ida_xref 12 | import ida_idaapi 13 | import ida_fixup 14 | 15 | 16 | def find_segment_containing_ea(ea, seg_ref): 17 | """Find and return a `segment_t` containing `ea`, or `None`.""" 18 | seg = seg_ref[0] 19 | if seg and seg.contains(ea): 20 | return seg 21 | 22 | seg = ida_segment.get_first_seg() 23 | while seg: 24 | seg_ref[0] = seg 25 | if seg.contains(ea): 26 | return seg 27 | seg = ida_segment.get_next_seg(seg.start_ea) 28 | 29 | return None 30 | 31 | 32 | def is_imported_table_seg(seg): 33 | """Returns `True` if `seg` refers to a segment that typically contains 34 | import entries, i.e. cross-reference pointers into an external segment.""" 35 | if not seg: 36 | return False 37 | 38 | seg_name = idc.get_segm_name(seg.start_ea) 39 | return ".idata" in seg_name or ".plt" in seg_name or ".got" in seg_name 40 | 41 | 42 | def xref_generator(ea, seg_ref): 43 | """Generate all outbound cross-references from `ea`""" 44 | for ref_ea in _xref_iterator( 45 | ea, ida_xref.get_first_cref_from, ida_xref.get_next_cref_from 46 | ): 47 | if find_segment_containing_ea(ref_ea, seg_ref): 48 | yield ref_ea 49 | 50 | for ref_ea in _xref_iterator( 51 | ea, ida_xref.get_first_dref_from, ida_xref.get_next_dref_from 52 | ): 53 | if find_segment_containing_ea(ref_ea, seg_ref): 54 | yield ref_ea 55 | 56 | fd = ida_fixup.fixup_data_t() 57 | if fd.get(ea): 58 | if find_segment_containing_ea(fd.off, seg_ref): 59 | yield fd.off 60 | # TODO(pag): What about `fd.displacement`? 61 | 62 | 63 | def _xref_iterator(ea, get_first, get_next): 64 | """Generate the cross-references addresses using functors `get_first` and 65 | `get_next`.""" 66 | target_ea = get_first(ea) 67 | while target_ea != ida_idaapi.BADADDR: 68 | yield target_ea 69 | target_ea = get_next(ea, target_ea) 70 | -------------------------------------------------------------------------------- /python/anvill/imageparser/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from abc import ABC, abstractmethod 10 | from dataclasses import dataclass 11 | from typing import List 12 | 13 | from ..util import * 14 | 15 | 16 | @dataclass 17 | class ImageFunctionThunk: 18 | """A function thunk, used to call imported functions""" 19 | 20 | start: int = 0 21 | name: str = "" 22 | 23 | 24 | class ImageReader: 25 | """This is the interface for the memory reader used by image parser""" 26 | 27 | def __init__(self): 28 | pass 29 | 30 | @abstractmethod 31 | def get_function_thunk_list(self) -> List[ImageFunctionThunk]: 32 | """Returns a list of function thunks found in the image""" 33 | pass 34 | 35 | 36 | class ImageParser: 37 | """This is the interface for image parser classes""" 38 | 39 | def __init__(self): 40 | pass 41 | 42 | @abstractmethod 43 | def get_function_thunk_list(self) -> List[ImageFunctionThunk]: 44 | """Returns a list of function thunks found in the image""" 45 | pass 46 | 47 | @abstractmethod 48 | def get_image_bitness(self) -> int: 49 | """Returns the bitness of this image (i.e. 16, 32, 64)""" 50 | pass 51 | 52 | 53 | from .elfparser import * 54 | 55 | 56 | def create_elf_image_parser(input_file_path: str) -> ImageParser: 57 | """Creates a new ImageParser object for ELF files 58 | 59 | Args: 60 | input_file_path: The path to the input file 61 | 62 | Returns: 63 | An image parser object for ELF files 64 | """ 65 | 66 | return ELFParser(input_file_path) 67 | -------------------------------------------------------------------------------- /python/anvill/logging.ini: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=logFormatter 9 | 10 | [logger_root] 11 | level=WARNING 12 | handlers=consoleHandler 13 | 14 | [handler_consoleHandler] 15 | class=StreamHandler 16 | formatter=logFormatter 17 | args=(sys.stderr,) 18 | 19 | [formatter_logFormatter] 20 | format=%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d]: %(message)s 21 | 22 | -------------------------------------------------------------------------------- /python/anvill/mem.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from typing import Tuple, Dict, Any, List 10 | 11 | 12 | class Memory(object): 13 | def __init__(self): 14 | self._bytes: Dict[int, Tuple[int, bool, bool]] = {} 15 | 16 | def map_byte(self, ea: int, val: int, can_write: bool, can_exec: bool): 17 | self._bytes[ea] = (int(val & 0xFF), can_write, can_exec) 18 | 19 | def proto(self) -> List[Dict[str, Any]]: 20 | proto: List[Dict[str, Any]] = [] 21 | if not len(self._bytes): 22 | return proto 23 | 24 | for ea in sorted(self._bytes.keys()): 25 | val, can_write, can_exec = self._bytes[ea] 26 | if not len(proto) or \ 27 | proto[-1]["is_writeable"] != can_write or \ 28 | proto[-1]["is_executable"] != can_exec or \ 29 | (proto[-1]["address"] + (len(proto[-1]["data"]) / 2)) != ea: 30 | proto.append({ 31 | "address": ea, 32 | "is_executable": can_exec, 33 | "is_writeable": can_write, 34 | "data": "" 35 | }) 36 | proto[-1]["data"] += "{:02x}".format(val & 0xFF) 37 | 38 | return proto 39 | -------------------------------------------------------------------------------- /python/anvill/os.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from abc import ABC, abstractmethod 10 | from typing import NewType, cast 11 | 12 | from .arch import Arch 13 | 14 | CC = NewType('CC', int) 15 | DEFAULT_CC = cast(CC, 0) 16 | X86_STDCALL_CC = cast(CC, 64) 17 | X86_CDECL_CC = cast(CC, 0) 18 | X86_THISCALL_CC = cast(CC, 70) 19 | AARCH32_CDECL_CC = cast(CC, 48) 20 | 21 | 22 | class OS(ABC): 23 | @abstractmethod 24 | def name(self) -> str: 25 | ... 26 | 27 | @abstractmethod 28 | def default_calling_convention(self, arch: Arch) -> CC: 29 | ... 30 | 31 | 32 | class LinuxOS(OS): 33 | def name(self) -> str: 34 | return "linux" 35 | 36 | def default_calling_convention(self, arch: Arch) -> CC: 37 | arch_name = arch.name() 38 | if arch_name == "x86": 39 | return cast(CC, 0) # cdecl 40 | elif arch_name == "amd64": 41 | return cast(CC, 78) # X86_64_SysV 42 | else: 43 | return cast(CC, 0) # cdecl 44 | 45 | 46 | class MacOS(OS): 47 | def name(self) -> str: 48 | return "macos" 49 | 50 | def default_calling_convention(self, arch: Arch) -> CC: 51 | arch_name = arch.name() 52 | if arch_name == "x86": 53 | return cast(CC, 0) # cdecl 54 | elif arch_name == "amd64": 55 | return cast(CC, 78) # X86_64_SysV 56 | else: 57 | return cast(CC, 0) # cdecl 58 | 59 | 60 | class WindowsOS(OS): 61 | def name(self) -> str: 62 | return "windows" 63 | 64 | def default_calling_convention(self, arch: Arch) -> CC: 65 | arch_name = arch.name() 66 | if arch_name == "x86": 67 | return cast(CC, 64) # stdcall 68 | elif arch_name == "amd64": 69 | return cast(CC, 79) # Win64 70 | else: 71 | return cast(CC, 0) # cdecl 72 | 73 | 74 | class SolarisOS(OS): 75 | def name(self) -> str: 76 | return "solaris" 77 | 78 | def default_calling_convention(self, arch: Arch) -> CC: 79 | return cast(CC, 0) 80 | -------------------------------------------------------------------------------- /python/anvill/util.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | import os 10 | import sys 11 | import logging 12 | 13 | try: 14 | """If the config file is available, configure logger""" 15 | config_file = os.path.join( 16 | os.path.dirname(os.path.abspath(__file__)), "logging.ini" 17 | ) 18 | logging.config.fileConfig(config_file) 19 | 20 | except Exception as e1: 21 | """If logging.ini is missing from the package. Setup the 22 | basic configuration for root logger 23 | """ 24 | try: 25 | stream_handler = logging.StreamHandler(sys.stderr) 26 | logging.basicConfig( 27 | level=logging.ERROR, 28 | format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d]: %(message)s", 29 | handlers=[stream_handler], 30 | ) 31 | 32 | except Exception as e2: 33 | print(f"Fail to configure root logger {e2}") 34 | 35 | # debug log 36 | DEBUG = logging.getLogger().debug 37 | 38 | # info log 39 | INFO = logging.getLogger().info 40 | 41 | # warning log 42 | WARN = logging.getLogger().warning 43 | 44 | # error log 45 | ERROR = logging.getLogger().error 46 | 47 | # fatal log 48 | FATAL = logging.getLogger().critical 49 | 50 | 51 | def config_logger(logfile, verbose=False): 52 | """Set the logger file handler and set the log level 53 | to verbose if required 54 | """ 55 | # Get root logger 56 | logger = logging.getLogger() 57 | 58 | if logfile is not None: 59 | try: 60 | log_format = logging.Formatter( 61 | "%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d]: %(message)s" 62 | ) 63 | file_handler = logging.FileHandler(logfile, mode="w") 64 | file_handler.setFormatter(log_format) 65 | logger.addHandler(file_handler) 66 | 67 | # if the file handler is set; change log level of 68 | # all stream handlers to ERROR 69 | for h in logger.handlers: 70 | if not isinstance(h, logging.FileHandler) and isinstance( 71 | h, logging.StreamHandler 72 | ): 73 | h.setLevel(logging.ERROR) 74 | 75 | except Exception as e: 76 | logger.warning(f"Failed to set up log file: {e}") 77 | 78 | # enable verbose mode 79 | if verbose: 80 | logger.setLevel(logging.DEBUG) 81 | 82 | 83 | def create_logger(name): 84 | """Create module level logger which can be configured using 85 | logging.ini files 86 | """ 87 | return ( 88 | logging.getLogger(name).info, 89 | logging.getLogger(name).debug, 90 | logging.getLogger(name).warning, 91 | logging.getLogger(name).error, 92 | logging.getLogger(name).critical, 93 | ) 94 | -------------------------------------------------------------------------------- /python/anvill/var.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | from abc import ABC, abstractmethod 10 | from typing import Dict, Any 11 | 12 | from .type import Type 13 | 14 | 15 | class Variable(object): 16 | """Represents a generic global variable.""" 17 | 18 | __slots__ = ("_arch", "_address", "_type") 19 | 20 | def __init__(self, arch, address, type_): 21 | self._arch = arch 22 | self._address = address 23 | self._type = type_ 24 | 25 | def address(self) -> int: 26 | return self._address 27 | 28 | def type(self) -> Type: 29 | return self._type 30 | 31 | @abstractmethod 32 | def visit(self, program: "Specification", is_definition: bool, add_refs_as_defs: bool, ignore_no_refs: bool): 33 | ... 34 | 35 | def proto(self) -> Dict[str, Any]: 36 | proto = {} 37 | proto["address"] = self.address() 38 | if self.type() != None: 39 | proto["type"] = self.type().proto(self._arch) 40 | 41 | return proto 42 | -------------------------------------------------------------------------------- /python/install.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | find_package(Python3 COMPONENTS Interpreter REQUIRED) 10 | 11 | set(pure_lib_dest "${CMAKE_INSTALL_PREFIX}/../lib/python3/dist-packages") 12 | 13 | # Redirect the whole installation if DESTDIR is specified 14 | if(DEFINED ENV{DESTDIR}) 15 | set(root_parameter "--root=$ENV{DESTDIR}") 16 | set(single_version "--single-version-externally-managed") 17 | endif() 18 | 19 | # and if we *are* in a virtualenv, default to normal install to install into venv 20 | # otherwise, if *NO VENV* or we have a DESTDIR, set up the prefixes 21 | if((NOT DEFINED ENV{VIRTUAL_ENV}) OR (DEFINED ENV{DESTDIR})) 22 | set(purelib_arg "--install-purelib=${pure_lib_dest}") 23 | set(prefix_arg "--prefix=${CMAKE_INSTALL_PREFIX}") 24 | endif() 25 | 26 | execute_process( 27 | COMMAND 28 | "${Python3_EXECUTABLE}" 29 | "${CMAKE_CURRENT_LIST_DIR}/../setup.py" 30 | install 31 | ${purelib_arg} 32 | ${prefix_arg} 33 | ${root_parameter} 34 | ${single_version} 35 | RESULT_VARIABLE setup_py_result 36 | OUTPUT_VARIABLE setup_py_stdout 37 | ERROR_VARIABLE setup_py_stderr 38 | ) 39 | 40 | message(STATUS "${setup_py_stdout}") 41 | 42 | if(NOT ${setup_py_result} EQUAL 0) 43 | message(FATAL_ERROR "result: ${setup_py_result}\n\n" 44 | "stderr: ${setup_py_stderr}" 45 | ) 46 | endif() 47 | -------------------------------------------------------------------------------- /scripts/docker-spec-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright (c) 2019-present, Trail of Bits, Inc. 5 | # All rights reserved. 6 | # 7 | # This source code is licensed in accordance with the terms specified in 8 | # the LICENSE file found in the root directory of this source tree. 9 | # 10 | 11 | # Needed to process multiple arguments to docker image and source venv 12 | 13 | source "${VIRTUAL_ENV}/bin/activate" 14 | 15 | python3 -m anvill "$@" 16 | -------------------------------------------------------------------------------- /scripts/emit-artifact.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright (c) 2019-present, Trail of Bits, Inc. 5 | # All rights reserved. 6 | # 7 | # This source code is licensed in accordance with the terms specified in 8 | # the LICENSE file found in the root directory of this source tree. 9 | # 10 | 11 | # This script is run inside Github Actions CI to create an archive of a fully-built project 12 | 13 | # install pixz for parallel xz 14 | DEBIAN_FRONTEND=noninteractive apt-get update 15 | DEBIAN_FRONTEND=noninteractive apt-get install -yqq pixz 16 | # compress /opt/trailofbits/{anvill,remill} and emit it to $1 17 | echo "Compressing to: ${1}" 18 | tar -Ipixz -cf "${1}" -C /opt/trailofbits anvill remill 19 | -------------------------------------------------------------------------------- /scripts/format-added-files: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright (c) 2019-present, Trail of Bits, Inc. 5 | # All rights reserved. 6 | # 7 | # This source code is licensed in accordance with the terms specified in 8 | # the LICENSE file found in the root directory of this source tree. 9 | # 10 | 11 | # Get the list of changed C++ header and source files. 12 | STAGED_FILES=$( git diff --cached --name-only --diff-filter=ACM | grep -e '\.\(h\)\|\(cpp\)$' ) 13 | 14 | if [[ "${STAGED_FILES}" = "" ]]; then 15 | exit 0 16 | fi 17 | 18 | PYTHON3_EXE=$( which python3 ) 19 | if [[ "$?" == 1 ]]; then 20 | echo "\t\033[41mPlease install python3\033[0m" 21 | exit 1 22 | fi 23 | 24 | CLANG_FORMAT_EXE=$( which clang-format ) 25 | if [[ "$?" == 1 ]]; then 26 | echo "\t\033[41mPlease install clang-format\033[0m" 27 | exit 1 28 | fi 29 | 30 | THIS_FILE_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 31 | NAME_OF_THIS_FILE_DIR=$( basename "${THIS_FILE_DIR}" ) 32 | IN_SCRIPTS_DIR=0 33 | 34 | # This script being executed is located in the `scripts/` directory. 35 | if [[ "${NAME_OF_THIS_FILE_DIR}" == "scripts" ]] ; then 36 | SCRIPTS_DIR="${THIS_FILE_DIR}" 37 | SOURCE_DIR=$( cd "$( dirname "${SCRIPTS_DIR}" )" && pwd ) 38 | IN_SCRIPTS_DIR=1 39 | 40 | # This script being executed is located in the `.git/hooks` directory. 41 | elif [[ "${NAME_OF_THIS_FILE_DIR}" == "hooks" ]] ; then 42 | GIT_DIR=$( cd "$( dirname "${SCRIPTS_DIR}" )" && pwd ) 43 | SOURCE_DIR=$( cd "$( dirname "${GIT_DIR}" )" && pwd ) 44 | SCRIPTS_DIR="${SOURCE_DIR}/scripts" 45 | 46 | # Not sure where this script is. 47 | else 48 | echo "\t\033[41mScript '${BASH_SOURCE[0]}' is in an unexpected location\033[0m" 49 | echo 1 50 | fi 51 | 52 | # Run our wrapper of `clang-format` on the files that have changed. 53 | ${PYTHON3_EXE} "${SCRIPTS_DIR}/format-files" \ 54 | --source_dir "${SOURCE_DIR}" \ 55 | --format_exe "${CLANG_FORMAT_EXE}" \ 56 | --files ${STAGED_FILES} \ 57 | --check 58 | 59 | if [[ "$?" == 0 ]] ; then 60 | for staged_file in ${STAGED_FILES} ; do 61 | git add "${staged_file}" 62 | done 63 | else 64 | exit 1 65 | fi 66 | -------------------------------------------------------------------------------- /scripts/generate_changelog.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PROJECT_NAME="anvill" 4 | 5 | main() { 6 | if [[ $# != 1 ]] ; then 7 | printf "Usage:\n\tgenerate_changelog.sh \n" 8 | return 1 9 | fi 10 | 11 | local output_path="${1}" 12 | local current_version="$(git describe --tags --always)" 13 | local previous_version="$(git describe --tags --always --abbrev=0 ${current_version}^)" 14 | 15 | echo "Current version: ${current_version}" 16 | echo "Previous version: ${previous_version}" 17 | echo "Output file: ${output_path}" 18 | 19 | printf "# Changelog\n\n" > "${output_path}" 20 | printf "The following are the changes that happened between versions ${previous_version} and ${current_version}\n\n" >> "${output_path}" 21 | 22 | git log ${previous_version}...${current_version} \ 23 | --pretty=format:" * [%h](http://github.com/lifting-bits/${PROJECT_NAME}/commit/%H) - %s" \ 24 | --reverse | grep -v 'Merge branch' >> "${output_path}" 25 | 26 | return 0 27 | } 28 | 29 | main $@ 30 | exit $? 31 | -------------------------------------------------------------------------------- /scripts/run-on-anghabench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #These are filled in by the CI system 4 | export ANVILL_BRANCH=__ANVILL_BRANCH__ 5 | export RUN_SIZE=__RUN_SIZE__ 6 | export BINJA_DECODE_KEY=__BINJA_DECODE_KEY__ 7 | export BINJA_CHANNEL=__BINJA_CHANNEL__ 8 | export BINJA_VERSION=__BINJA_VERSION__ 9 | 10 | export LLVM_VERSION=14 11 | export CC=clang-13 CXX=clang++-13 12 | 13 | dpkg --add-architecture i386 14 | apt-get update 15 | apt-get install -yqq s3cmd pixz curl git python3 python3-venv python3-pip xz-utils cmake ninja-build clang-13 g++-multilib unzip 16 | apt-get install -yqq libc6-dev:i386 libstdc++-*-dev:i386 17 | python3 -m pip install requests 18 | 19 | #install new cmake 20 | curl -LO https://github.com/Kitware/CMake/releases/download/v3.22.1/cmake-3.22.1-linux-x86_64.sh 21 | sh ./cmake-3.22.1-linux-x86_64.sh --skip-license --prefix=/usr 22 | 23 | git clone --recursive --shallow-submodules --depth=1 -b ${ANVILL_BRANCH} https://github.com/lifting-bits/anvill anvill 24 | # CI Branch is defined by the CI system 25 | git clone --recursive --shallow-submodules --depth=1 -b ${CI_BRANCH} https://github.com/lifting-bits/lifting-tools-ci ci 26 | 27 | python3 -m venv anvill-venv 28 | source anvill-venv/bin/activate 29 | 30 | pushd anvill 31 | # build us an anvill (and remill) 32 | scripts/build.sh \ 33 | --install \ 34 | --llvm-version ${LLVM_VERSION} \ 35 | --extra-cmake-args "-DCMAKE_BUILD_TYPE=Release" 36 | 37 | # Should be installed by build.sh --install, but sometimes the script accidentally breaks venv installs 38 | python3 setup.py install 39 | 40 | # install binja 41 | ci/install_binja.sh 42 | python3 ci/switcher.py --version ${BINJA_VERSION} ${BINJA_CHANNEL} 43 | popd 44 | 45 | pushd ci 46 | 47 | # Install extra requirements if needed 48 | if [[ -f requirements.txt ]] 49 | then 50 | python3 -m pip install -r requirements.txt 51 | fi 52 | 53 | mkdir -p $(pwd)/anvill_bitcode 54 | 55 | # default to 1k 56 | if [[ "${RUN_SIZE,,}" = "__run_size__" ]] 57 | then 58 | RUN_SIZE=1k 59 | fi 60 | 61 | datasets/fetch_anghabench.sh --clang ${LLVM_VERSION} --binaries --run-size ${RUN_SIZE} 62 | 63 | for i in *.tar.xz 64 | do 65 | tar -xJf $i 66 | done 67 | 68 | # Run the benchmark 69 | tool_run_scripts/anvill.py \ 70 | --run-name "[${RUN_NAME}] [size: ${RUN_SIZE}] [anvill: ${ANVILL_BRANCH}]" \ 71 | --input-dir $(pwd)/binaries \ 72 | --output-dir $(pwd)/anvill_bitcode \ 73 | --anvill-decompile /usr/local/bin/anvill-decompile-spec \ 74 | --slack-notify 75 | 76 | # AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY passed in from original invocation environment 77 | if [[ "${AWS_ACCESS_KEY_ID,,}" != "" ]] 78 | then 79 | datenow=$(date +'%F-%H-%M') 80 | url_base="https://tob-amp-ci-results.nyc3.digitaloceanspaces.com" 81 | tar -Ipixz -cf anvill-ci-${datenow}.tar.xz anvill_bitcode 82 | 83 | s3cmd -c /dev/null \ 84 | '--host-bucket=%(bucket)s.nyc3.digitaloceanspaces.com' \ 85 | --acl-public \ 86 | put \ 87 | anvill-ci-${datenow}.tar.xz \ 88 | s3://tob-amp-ci-results/anvill/ 89 | 90 | tool_run_scripts/slack.py \ 91 | --msg "Uploaded Anvill lifting results to ${url_base}/anvill/anvill-ci-${datenow}.tar.xz" 92 | fi 93 | 94 | # exit hook called here 95 | exit 0 96 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Copyright (c) 2019-present, Trail of Bits, Inc. 5 | # All rights reserved. 6 | # 7 | # This source code is licensed in accordance with the terms specified in 8 | # the LICENSE file found in the root directory of this source tree. 9 | # 10 | 11 | import setuptools 12 | import sys 13 | import os 14 | 15 | os.chdir(os.path.join(os.path.dirname(__file__), "python")) 16 | 17 | setuptools.setup( 18 | name="anvill", 19 | version="1.0.2", 20 | description="Specification-based decompilation library", 21 | author="Peter Goodman", 22 | author_email="peter@trailofbits.com", 23 | url="https://github.com/lifting-bits/anvill", 24 | license="AGPL 3", 25 | data_files=[('anvill', ['anvill/logging.ini'])], 26 | py_modules=[ 27 | "anvill.__init__", "anvill.__main__", "anvill.arch", "anvill.binja.__init__", 28 | "anvill.binja.bnfunction", "anvill.binja.bninstruction", "anvill.binja.bnprogram", 29 | "anvill.binja.bnvariable", "anvill.binja.callingconvention", "anvill.binja.typecache", 30 | "anvill.binja.xreftype", "anvill.binja.table", "anvill.call", "anvill.exc", "anvill.function", 31 | "anvill.ida.__init__", "anvill.ida.idafunction", "anvill.ida.idaprogram", 32 | "anvill.ida.idavariable", "anvill.ida.utils", "anvill.imageparser.__init__", 33 | "anvill.imageparser.elfparser", "anvill.loc", "anvill.mem", "anvill.os", "anvill.program", 34 | "anvill.type", "anvill.var", "anvill.util"]) 35 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | add_subdirectory("tools") 10 | add_subdirectory("anvill_passes") -------------------------------------------------------------------------------- /tests/anvill_passes/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | add_executable(test_anvill_passes 10 | src/main.cpp 11 | 12 | src/Utils.h 13 | src/Utils.cpp 14 | 15 | src/RecoverStackFrameInformation.cpp 16 | src/SinkSelectionsIntoBranchTargets.cpp 17 | src/SplitStackFrameAtReturnAddress.cpp 18 | src/InstructionFolderPass.cpp 19 | src/BrightenPointers.cpp 20 | src/TransformRemillJump.cpp 21 | src/SwitchLoweringPass.cpp 22 | src/XorConversionPass.cpp 23 | src/BranchRecoveryPass.cpp 24 | src/RemoveStackPointerCExprs.cpp 25 | src/RecoverEntityUses.cpp 26 | ) 27 | 28 | target_link_libraries(test_anvill_passes PRIVATE 29 | remill_settings 30 | remill 31 | anvill 32 | doctest::doctest 33 | ) 34 | 35 | target_compile_definitions(test_anvill_passes PRIVATE 36 | ANVILL_TEST_DATA_PATH=\"${CMAKE_CURRENT_SOURCE_DIR}/data\" 37 | ) 38 | 39 | target_include_directories(test_anvill_passes PRIVATE 40 | "${CMAKE_CURRENT_SOURCE_DIR}/../src" 41 | "${CMAKE_CURRENT_SOURCE_DIR}/../../lib/Passes" 42 | ) 43 | 44 | add_test( 45 | NAME test_anvill_passes 46 | COMMAND "$" 47 | WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" 48 | ) 49 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/BaseFunctionPass.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'BaseFunctionPass.ll' 2 | source_filename = "BaseFunctionPass" 3 | 4 | @__anvill_sp = external global i8 5 | 6 | define i32 @SelectInstructions() { 7 | entry: 8 | %0 = alloca i32, align 4 9 | store i32 0, ptr %0, align 4 10 | %1 = load i32, ptr %0, align 4 11 | %2 = icmp eq i32 %1, 0 12 | br i1 %2, label %first, label %second 13 | 14 | first: ; preds = %entry 15 | %3 = add i32 %1, 1 16 | br label %exit 17 | 18 | second: ; preds = %entry 19 | %4 = add i32 %1, 2 20 | br label %exit 21 | 22 | exit: ; preds = %second, %first 23 | %5 = phi i32 [ %3, %first ], [ %4, %second ] 24 | ret i32 %5 25 | } 26 | 27 | define i64 @InstructionReferencesStackPointer() { 28 | entry: 29 | %0 = icmp eq i1 false, false 30 | %1 = select i1 %0, i64 11, i64 22 31 | store i32 add (i32 sub (i32 ptrtoint (ptr @__anvill_sp to i32), i32 16), i32 12), ptr inttoptr (i32 add (i32 sub (i32 ptrtoint (ptr @__anvill_sp to i32), i32 16), i32 -4) to ptr), align 4 32 | ret i64 %1 33 | } 34 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/RegressionRecoverStack.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'RegressionRecoverStack.ll' 2 | source_filename = "lifted_code" 3 | target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 4 | target triple = "i386-pc-linux-gnu-elf" 5 | 6 | @__anvill_sp = external global i8 7 | @__anvill_ra = external global i8 8 | @__anvill_pc = external global i8 9 | 10 | declare zeroext i1 @__remill_flag_computation_sign(i1 zeroext, ...) local_unnamed_addr 11 | 12 | define i1 @slice() local_unnamed_addr { 13 | %1 = call zeroext i1 (i1, ...) @__remill_flag_computation_sign(i1 zeroext icmp slt (i32 add (i32 ptrtoint (ptr @__anvill_sp to i32), i32 -12), i32 0), i32 add (i32 ptrtoint (ptr @__anvill_sp to i32), i32 -12)) 14 | ret i1 %1 15 | } 16 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/SinkSelectionsIntoBranchTargets.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'SinkSelectionsIntoBranchTargets.ll' 2 | source_filename = "SinkSelectionsIntoBranchTargets" 3 | 4 | define void @SimpleCase() { 5 | entry: 6 | %0 = alloca i64, align 8 7 | %1 = load i64, ptr %0, align 4 8 | %2 = icmp eq i64 %1, 1 9 | %3 = select i1 %2, i64 10, i64 20 10 | br i1 %2, label %4, label %6 11 | 12 | 4: ; preds = %entry 13 | %5 = add i64 %3, 10 14 | br label %8 15 | 16 | 6: ; preds = %entry 17 | %7 = add i64 %3, 20 18 | br label %8 19 | 20 | 8: ; preds = %6, %4 21 | %9 = phi i64 [ %5, %4 ], [ %7, %6 ] 22 | ret void 23 | } 24 | 25 | define void @MultipleSelects() { 26 | entry: 27 | %0 = alloca i64, align 8 28 | %1 = load i64, ptr %0, align 4 29 | %2 = icmp eq i64 %1, 1 30 | %3 = select i1 %2, i64 10, i64 20 31 | %4 = select i1 %2, i64 10, i64 20 32 | %5 = select i1 %2, i64 10, i64 20 33 | br i1 %2, label %6, label %10 34 | 35 | 6: ; preds = %entry 36 | %7 = add i64 %3, 10 37 | %8 = add i64 %4, 10 38 | %9 = add i64 %5, 10 39 | br label %14 40 | 41 | 10: ; preds = %entry 42 | %11 = add i64 %3, 20 43 | %12 = add i64 %4, 20 44 | %13 = add i64 %5, 20 45 | br label %14 46 | 47 | 14: ; preds = %10, %6 48 | %15 = phi i64 [ %7, %6 ], [ %11, %10 ] 49 | %16 = phi i64 [ %8, %6 ], [ %12, %10 ] 50 | %17 = phi i64 [ %9, %6 ], [ %13, %10 ] 51 | ret void 52 | } 53 | 54 | define void @MultipleSelectUsages() { 55 | entry: 56 | %0 = alloca i64, align 8 57 | %1 = load i64, ptr %0, align 4 58 | %2 = icmp eq i64 %1, 1 59 | %3 = select i1 %2, i64 10, i64 20 60 | br i1 %2, label %4, label %10 61 | 62 | 4: ; preds = %entry 63 | %5 = add i64 %3, 10 64 | %6 = add i64 %3, 10 65 | %7 = add i64 %3, 10 66 | %8 = add i64 %5, %6 67 | %9 = add i64 %8, %7 68 | br label %16 69 | 70 | 10: ; preds = %entry 71 | %11 = add i64 %3, 10 72 | %12 = add i64 %3, 10 73 | %13 = add i64 %3, 10 74 | %14 = add i64 %11, %12 75 | %15 = add i64 %14, %13 76 | br label %16 77 | 78 | 16: ; preds = %10, %4 79 | %17 = phi i64 [ %9, %4 ], [ %15, %10 ] 80 | ret void 81 | } 82 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/loop_test.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'loop_test.ll' 2 | source_filename = "llvm-link" 3 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 | target triple = "x86_64-pc-linux-gnu-elf" 5 | 6 | ; Function Attrs: noinline 7 | define i32 @main(i32 %0, ptr %1, ptr %2) local_unnamed_addr #0 { 8 | br label %4 9 | 10 | 4: ; preds = %13, %3 11 | %.sroa.3.0 = phi i32 [ 0, %3 ], [ %14, %13 ] 12 | %.sroa.0.0 = phi i32 [ 0, %3 ], [ %15, %13 ] 13 | %5 = add i32 %.sroa.0.0, -100 14 | %6 = lshr i32 %5, 31 15 | %7 = lshr i32 %.sroa.0.0, 31 16 | %8 = xor i32 %6, %7 17 | %9 = add nuw nsw i32 %8, %7 18 | %10 = icmp eq i32 %9, 2 19 | %11 = icmp sgt i32 %5, -1 20 | %12 = xor i1 %11, %10 21 | br i1 %12, label %16, label %13 22 | 23 | 13: ; preds = %4 24 | %14 = add i32 %.sroa.3.0, 2 25 | %15 = add i32 %.sroa.0.0, 1 26 | br label %4 27 | 28 | 16: ; preds = %4 29 | ret i32 %.sroa.3.0 30 | } 31 | 32 | attributes #0 = { noinline } 33 | 34 | !llvm.ident = !{!0, !0, !0} 35 | !llvm.module.flags = !{!1, !2, !3} 36 | !llvm.dbg.cu = !{} 37 | 38 | !0 = !{!"clang version 11.0.0 (https://github.com/trailofbits/vcpkg.git 4592a93cc4ca82f1963dba08413c43639662d7ae)"} 39 | !1 = !{i32 1, !"wchar_size", i32 4} 40 | !2 = !{i32 7, !"Dwarf Version", i32 4} 41 | !3 = !{i32 2, !"Debug Info Version", i32 3} 42 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/maybe_proof.smt: -------------------------------------------------------------------------------- 1 | (declare-fun value1 () (_ BitVec 32)) 2 | (declare-fun value0 () (_ BitVec 32)) 3 | (assert (let ((a!1 ((_ extract 31 0) 4 | (bvadd ((_ zero_extend 32) (bvxor value1 #xffffffff)) 5 | #x0000000000000001 6 | ((_ zero_extend 32) value0)))) 7 | (a!2 (bvshl (bvadd ((_ zero_extend 32) (bvxor value1 #xffffffff)) 8 | #x0000000000000001 9 | ((_ zero_extend 32) value0)) 10 | #x0000000000000020))) 11 | (let ((a!3 (distinct (bvashr a!2 #x0000000000000020) 12 | (bvadd ((_ sign_extend 32) (bvxor value1 #xffffffff)) 13 | #x0000000000000001 14 | ((_ sign_extend 32) value0))))) 15 | (let ((a!4 (and (bvsge value0 value1) 16 | (not (xor (bvslt a!1 #x00000000) a!3 (distinct #b1 #b0)))))) 17 | (or a!4 18 | (and (xor (bvslt a!1 #x00000000) a!3 (distinct #b1 #b0)) 19 | (not (bvsge value0 value1)))))))) 20 | 21 | (check-sat) -------------------------------------------------------------------------------- /tests/anvill_passes/data/proof_result.smt: -------------------------------------------------------------------------------- 1 | (declare-fun value1 () (_ BitVec 32)) 2 | (declare-fun value0 () (_ BitVec 32)) 3 | (declare-fun total_flag () Bool) 4 | (declare-fun sign_flag () Bool) 5 | (declare-fun overflow_flag () Bool) 6 | (declare-fun condition () Bool) 7 | (declare-fun value1neg () (_ BitVec 32)) 8 | 9 | (assert (= value1neg (bvxor value1 #xffffffff))) 10 | 11 | (assert (= sign_flag (bvslt ((_ extract 31 0) 12 | (bvadd ((_ zero_extend 32) value1neg) 13 | #x0000000000000001 14 | ((_ zero_extend 32) value0))) 15 | #x00000000))) 16 | 17 | 18 | (assert (= overflow_flag (distinct (bvashr (bvshl (bvadd ((_ zero_extend 32) value1neg) 19 | #x0000000000000001 20 | ((_ zero_extend 32) value0)) 21 | #x0000000000000020) 22 | #x0000000000000020) (bvadd ((_ sign_extend 32) value1neg) 23 | #x0000000000000001 24 | ((_ sign_extend 32) value0))))) 25 | 26 | (assert (= total_flag (= sign_flag 27 | overflow_flag))) 28 | 29 | 30 | (assert (= condition (bvsge value0 value1) )) 31 | 32 | (assert (or (and condition (not total_flag)) (and total_flag (not condition)))) 33 | 34 | (check-sat) 35 | (get-model) -------------------------------------------------------------------------------- /tests/anvill_passes/data/test_cast_rt.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'test_cast_rt.ll' 2 | source_filename = "lifted_code" 3 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 | target triple = "x86_64-pc-linux-gnu-elf" 5 | 6 | @__anvill_reg_RBP = internal local_unnamed_addr global i64 0 7 | @__anvill_sp = internal global i8 0 8 | @__anvill_ra = internal global i8 0 9 | @llvm.compiler.used = appending global [3 x ptr] [ptr @sub_4003b4__All_Svl_B_0, ptr @main, ptr @__libc_start_main], section "llvm.metadata" 10 | @__anvill_stack_minus_12 = global i8 0 11 | @__anvill_stack_minus_11 = global i8 0 12 | @__anvill_stack_minus_10 = global i8 0 13 | @__anvill_stack_minus_9 = global i8 0 14 | @__anvill_stack_minus_8 = global i8 0 15 | @__anvill_stack_minus_7 = global i8 0 16 | @__anvill_stack_minus_6 = global i8 0 17 | @__anvill_stack_minus_5 = global i8 0 18 | @__anvill_stack_minus_4 = global i8 0 19 | @__anvill_stack_minus_3 = global i8 0 20 | @__anvill_stack_minus_2 = global i8 0 21 | @__anvill_stack_minus_1 = global i8 0 22 | @__anvill_stack_0 = global i8 0 23 | @__anvill_stack_plus_1 = global i8 0 24 | @__anvill_stack_plus_2 = global i8 0 25 | @__anvill_stack_plus_3 = global i8 0 26 | @__anvill_stack_plus_4 = global i8 0 27 | @__anvill_stack_plus_5 = global i8 0 28 | @__anvill_stack_plus_6 = global i8 0 29 | @__anvill_stack_plus_7 = global i8 0 30 | 31 | ; Function Attrs: noinline 32 | declare i64 @sub_4003b4__All_Svl_B_0(i64, i64, ptr) #0 33 | 34 | ; Function Attrs: noinline 35 | define i32 @main(i32 %0, ptr %1, ptr %2) #0 { 36 | ret i32 6295596 37 | } 38 | 39 | ; Function Attrs: noduplicate noinline nounwind optnone readnone 40 | declare dso_local ptr @__remill_write_memory_64(ptr, i64, i64) local_unnamed_addr #1 41 | 42 | ; Function Attrs: noduplicate noinline nounwind optnone readnone 43 | declare dso_local ptr @__remill_write_memory_32(ptr, i64, i32) local_unnamed_addr #1 44 | 45 | ; Function Attrs: noduplicate noinline nounwind optnone readnone 46 | declare dso_local i64 @__remill_read_memory_64(ptr, i64) local_unnamed_addr #1 47 | 48 | ; Function Attrs: noduplicate noinline nounwind optnone 49 | declare dso_local ptr @__remill_function_return(ptr nonnull align 1, i64, ptr) local_unnamed_addr #2 50 | 51 | ; Function Attrs: noinline 52 | declare x86_64_sysvcc i32 @__libc_start_main(ptr, i32, ptr, ptr, ptr, ptr, ptr) #0 53 | 54 | attributes #0 = { noinline } 55 | attributes #1 = { noduplicate noinline nounwind optnone readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-builtins" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 56 | attributes #2 = { noduplicate noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-builtins" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 57 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/test_trunc_rt.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'test_trunc_rt.ll' 2 | source_filename = "lifted_code" 3 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 | target triple = "x86_64-pc-linux-gnu-elf" 5 | 6 | @var_601028_l = global i64 -1 7 | @__anvill_reg_RBP = internal local_unnamed_addr global i64 0 8 | @__anvill_sp = internal global i8 0 9 | @__anvill_ra = internal global i8 0 10 | @llvm.compiler.used = appending global [4 x ptr] [ptr @sub_4003b4__All_Svl_B_0, ptr @main, ptr @__libc_start_main, ptr @var_601028_l], section "llvm.metadata" 11 | @__anvill_stack_minus_12 = global i8 0 12 | @__anvill_stack_minus_11 = global i8 0 13 | @__anvill_stack_minus_10 = global i8 0 14 | @__anvill_stack_minus_9 = global i8 0 15 | @__anvill_stack_minus_8 = global i8 0 16 | @__anvill_stack_minus_7 = global i8 0 17 | @__anvill_stack_minus_6 = global i8 0 18 | @__anvill_stack_minus_5 = global i8 0 19 | @__anvill_stack_minus_4 = global i8 0 20 | @__anvill_stack_minus_3 = global i8 0 21 | @__anvill_stack_minus_2 = global i8 0 22 | @__anvill_stack_minus_1 = global i8 0 23 | @__anvill_stack_0 = global i8 0 24 | @__anvill_stack_plus_1 = global i8 0 25 | @__anvill_stack_plus_2 = global i8 0 26 | @__anvill_stack_plus_3 = global i8 0 27 | @__anvill_stack_plus_4 = global i8 0 28 | @__anvill_stack_plus_5 = global i8 0 29 | @__anvill_stack_plus_6 = global i8 0 30 | @__anvill_stack_plus_7 = global i8 0 31 | 32 | ; Function Attrs: noinline 33 | declare i64 @sub_4003b4__All_Svl_B_0(i64, i64, ptr) #0 34 | 35 | ; Function Attrs: noinline 36 | define i32 @main(i32 %0, ptr %1, ptr %2) #0 { 37 | %4 = load i64, ptr @var_601028_l, align 8 38 | %5 = trunc i64 %4 to i32 39 | ret i32 %5 40 | } 41 | 42 | ; Function Attrs: noduplicate noinline nounwind optnone readnone 43 | declare dso_local ptr @__remill_write_memory_64(ptr, i64, i64) local_unnamed_addr #1 44 | 45 | ; Function Attrs: noduplicate noinline nounwind optnone readnone 46 | declare dso_local ptr @__remill_write_memory_32(ptr, i64, i32) local_unnamed_addr #1 47 | 48 | ; Function Attrs: noduplicate noinline nounwind optnone readnone 49 | declare dso_local i64 @__remill_read_memory_64(ptr, i64) local_unnamed_addr #1 50 | 51 | ; Function Attrs: noduplicate noinline nounwind optnone 52 | declare dso_local ptr @__remill_function_return(ptr nonnull align 1, i64, ptr) local_unnamed_addr #2 53 | 54 | ; Function Attrs: noinline 55 | declare x86_64_sysvcc i32 @__libc_start_main(ptr, i32, ptr, ptr, ptr, ptr, ptr) #0 56 | 57 | attributes #0 = { noinline } 58 | attributes #1 = { noduplicate noinline nounwind optnone readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-builtins" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 59 | attributes #2 = { noduplicate noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-builtins" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 60 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/xor_conversion_nochange.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'xor_conversion_nochange.ll' 2 | source_filename = "xor_conv_2.c" 3 | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 4 | target triple = "x86_64-pc-linux-gnu" 5 | 6 | ; Function Attrs: nofree norecurse nounwind uwtable 7 | define dso_local void @xor_as_not_nochange(ptr nocapture readonly %0, ptr nocapture %1) local_unnamed_addr #0 { 8 | %3 = getelementptr inbounds i8, ptr %0, i64 4 9 | %4 = load i8, ptr %3, align 1, !tbaa !2 10 | %5 = and i8 %4, 12 11 | %6 = icmp eq i8 %5, 0 12 | %7 = zext i1 %6 to i8 13 | %8 = xor i1 %6, true 14 | %9 = zext i1 %8 to i8 15 | %10 = getelementptr inbounds i8, ptr %1, i64 5 16 | store i8 %9, ptr %10, align 1, !tbaa !2 17 | %11 = getelementptr inbounds i8, ptr %1, i64 1 18 | store i8 %7, ptr %11, align 1, !tbaa !2 19 | %12 = zext i1 %6 to i64 20 | %13 = getelementptr inbounds i8, ptr %0, i64 %12 21 | %14 = load i8, ptr %13, align 1, !tbaa !2 22 | %15 = add i8 %14, 1 23 | store i8 %15, ptr %1, align 1, !tbaa !2 24 | %16 = getelementptr inbounds i8, ptr %1, i64 3 25 | store i8 %9, ptr %16, align 1, !tbaa !2 26 | ret void 27 | } 28 | 29 | attributes #0 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 30 | 31 | !llvm.module.flags = !{!0} 32 | !llvm.ident = !{!1} 33 | 34 | !0 = !{i32 1, !"wchar_size", i32 4} 35 | !1 = !{!"Debian clang version 11.1.0-++20210428103904+1fdec59bffc1-1~exp1~20210428204532.8"} 36 | !2 = !{!3, !3, i64 0} 37 | !3 = !{!"omnipotent char", !4, i64 0} 38 | !4 = !{!"Simple C/C++ TBAA"} 39 | -------------------------------------------------------------------------------- /tests/anvill_passes/data/xor_removal.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'xor_removal.ll' 2 | source_filename = "xor_removal.c" 3 | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 4 | target triple = "x86_64-pc-linux-gnu" 5 | 6 | @.str = private unnamed_addr constant [6 x i8] c"true\0A\00", align 1 7 | @.str.1 = private unnamed_addr constant [7 x i8] c"false\0A\00", align 1 8 | 9 | ; Function Attrs: nofree nounwind uwtable 10 | define i32 @xor_removal(i1 zeroext %0) #0 { 11 | %2 = alloca i8, align 1 12 | %3 = alloca i8, align 1 13 | %4 = alloca i8, align 1 14 | %5 = zext i1 %0 to i8 15 | store i8 %5, ptr %2, align 1 16 | %6 = load i8, ptr %2, align 1 17 | %7 = trunc i8 %6 to i1 18 | %8 = zext i1 %7 to i8 19 | store i8 %8, ptr %3, align 1 20 | %9 = load i8, ptr %3, align 1 21 | %10 = trunc i8 %9 to i1 22 | %11 = xor i1 %10, true 23 | br i1 %11, label %a17, label %a19 24 | 25 | a17: ; preds = %1 26 | %12 = call i32 @puts(ptr getelementptr inbounds ([6 x i8], ptr @.str, i64 0, i64 0)) 27 | br label %a21 28 | 29 | a19: ; preds = %1 30 | %13 = call i32 @puts(ptr getelementptr inbounds ([7 x i8], ptr @.str.1, i64 0, i64 0)) 31 | br label %a21 32 | 33 | a21: ; preds = %a19, %a17 34 | ret i32 0 35 | } 36 | 37 | ; Function Attrs: nofree nounwind 38 | declare dso_local i32 @puts(ptr nocapture readonly) local_unnamed_addr #1 39 | 40 | attributes #0 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 41 | attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 42 | 43 | !llvm.module.flags = !{!0} 44 | !llvm.ident = !{!1} 45 | 46 | !0 = !{i32 1, !"wchar_size", i32 4} 47 | !1 = !{!"Debian clang version 11.1.0-++20210428103904+1fdec59bffc1-1~exp1~20210428204532.8"} 48 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/InstructionFolderPass.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "Utils.h" 20 | 21 | namespace anvill { 22 | 23 | TEST_SUITE("InstructionFolderPass") { 24 | TEST_CASE("Run the whole pass on a well-formed function") { 25 | auto context = anvill::CreateContextWithOpaquePointers(); 26 | auto module = LoadTestData(*context, "InstructionFolderPass.ll"); 27 | 28 | REQUIRE(module != nullptr); 29 | 30 | auto arch = remill::Arch::Build(context.get(), remill::GetOSName("linux"), 31 | remill::GetArchName("amd64")); 32 | 33 | REQUIRE(arch != nullptr); 34 | 35 | CHECK(RunFunctionPass(module.get(), HoistUsersOfSelectsAndPhis())); 36 | } 37 | } 38 | 39 | } // namespace anvill 40 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/RecoverEntityUses.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include "Utils.h" 15 | 16 | namespace anvill { 17 | 18 | TEST_SUITE("RecoverEntityUses") { 19 | TEST_CASE("Regression test for unresolved anvill_pc") { 20 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 21 | auto module = LoadTestData(*llvm_context, "TestingUnresolvedEntity.ll"); 22 | 23 | 24 | auto arch = 25 | remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), 26 | remill::GetArchName("amd64")); 27 | REQUIRE(arch != nullptr); 28 | 29 | auto ctrl_flow_provider = anvill::NullControlFlowProvider(); 30 | TypeDictionary tyDict(*llvm_context); 31 | 32 | NullTypeProvider ty_prov(tyDict); 33 | NullMemoryProvider mem_prov; 34 | anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, 35 | std::move(ctrl_flow_provider), 36 | mem_prov); 37 | 38 | anvill::LifterOptions options(arch.get(), *module, ty_prov, 39 | std::move(ctrl_flow_provider), mem_prov); 40 | 41 | // memory and types will not get used and create lifter with null 42 | anvill::EntityLifter lifter(options); 43 | 44 | EntityCrossReferenceResolver xref(lifter); 45 | 46 | ConvertAddressesToEntityUses conv(xref); 47 | 48 | auto func = module->getFunction("sub_12b30__A_SBI_B_0.6"); 49 | 50 | REQUIRE(func != nullptr); 51 | 52 | llvm::FunctionAnalysisManager fam; 53 | 54 | conv.run(*func, fam); 55 | func->dump(); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/RemoveStackPointerCExprs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "Utils.h" 17 | namespace anvill { 18 | 19 | static llvm::Function *FindFunction(llvm::Module *module, std::string name) { 20 | for (auto &function : *module) { 21 | if (function.getName().equals(name)) { 22 | return &function; 23 | } 24 | } 25 | return nullptr; 26 | } 27 | 28 | TEST_SUITE("RemoveStackPointerCExprs") { 29 | TEST_CASE("RegressionRecoverStack.ll") { 30 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 31 | auto mod = LoadTestData(*llvm_context, "RegressionRecoverStack.ll"); 32 | auto target_function = FindFunction(mod.get(), "slice"); 33 | CHECK(target_function != nullptr); 34 | llvm::FunctionPassManager fpm; 35 | llvm::FunctionAnalysisManager fam; 36 | llvm::ModuleAnalysisManager mam; 37 | llvm::LoopAnalysisManager lam; 38 | llvm::CGSCCAnalysisManager cgam; 39 | 40 | llvm::PassBuilder pb; 41 | 42 | pb.registerFunctionAnalyses(fam); 43 | pb.registerModuleAnalyses(mam); 44 | pb.registerCGSCCAnalyses(cgam); 45 | pb.registerLoopAnalyses(lam); 46 | 47 | pb.crossRegisterProxies(lam, fam, cgam, mam); 48 | 49 | StackFrameRecoveryOptions opt; 50 | fpm.addPass(RemoveStackPointerCExprs(opt)); 51 | fpm.run(*target_function, fam); 52 | 53 | target_function->dump(); 54 | 55 | CHECK(VerifyModule(mod.get())); 56 | 57 | fam.clear(); 58 | cgam.clear(); 59 | lam.clear(); 60 | mam.clear(); 61 | } 62 | } 63 | } // namespace anvill 64 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/SinkSelectionsIntoBranchTargets.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "Utils.h" 15 | #include 16 | 17 | namespace anvill { 18 | 19 | TEST_SUITE("SinkSelectionsIntoBranchTargets") { 20 | TEST_CASE("Run the whole pass on a well-formed function") { 21 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 22 | auto module = 23 | LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); 24 | 25 | REQUIRE(module.get() != nullptr); 26 | 27 | CHECK(RunFunctionPass( 28 | module.get(), SinkSelectionsIntoBranchTargets())); 29 | 30 | } 31 | 32 | TEST_CASE("SimpleCase") { 33 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 34 | auto module = 35 | LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); 36 | 37 | REQUIRE(module.get() != nullptr); 38 | 39 | auto function = module->getFunction("SimpleCase"); 40 | REQUIRE(function != nullptr); 41 | 42 | llvm::DominatorTreeAnalysis dt; 43 | llvm::FunctionAnalysisManager fam; 44 | 45 | auto dt_res = dt.run(*function, fam); 46 | 47 | auto analysis = SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); 48 | 49 | CHECK(analysis.replacement_list.size() == 2U); 50 | CHECK(analysis.disposable_instruction_list.size() == 1U); 51 | } 52 | 53 | TEST_CASE("MultipleSelects") { 54 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 55 | auto module = 56 | LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); 57 | 58 | REQUIRE(module.get() != nullptr); 59 | 60 | auto function = module->getFunction("MultipleSelects"); 61 | REQUIRE(function != nullptr); 62 | 63 | llvm::DominatorTreeAnalysis dt; 64 | llvm::FunctionAnalysisManager fam; 65 | 66 | auto dt_res = dt.run(*function, fam); 67 | 68 | auto analysis = SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); 69 | 70 | CHECK(analysis.replacement_list.size() == 6U); 71 | CHECK(analysis.disposable_instruction_list.size() == 3U); 72 | } 73 | 74 | TEST_CASE("MultipleSelectUsages") { 75 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 76 | auto module = 77 | LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); 78 | 79 | REQUIRE(module.get() != nullptr); 80 | 81 | auto function = module->getFunction("MultipleSelectUsages"); 82 | REQUIRE(function != nullptr); 83 | 84 | llvm::DominatorTreeAnalysis dt; 85 | llvm::FunctionAnalysisManager fam; 86 | 87 | auto dt_res = dt.run(*function, fam); 88 | 89 | auto analysis = SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); 90 | 91 | CHECK(analysis.replacement_list.size() == 6U); 92 | CHECK(analysis.disposable_instruction_list.size() == 1U); 93 | } 94 | } 95 | 96 | } // namespace anvill 97 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/SplitStackFrameAtReturnAddress.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include "Utils.h" 18 | 19 | 20 | namespace anvill { 21 | 22 | TEST_SUITE("SplitStackFrameAtReturnAddress") { 23 | TEST_CASE("Run the whole pass on a well-formed function") { 24 | auto llvm_context = anvill::CreateContextWithOpaquePointers(); 25 | auto module = 26 | LoadTestData(*llvm_context, "SplitStackFrameAtReturnAddress.ll"); 27 | 28 | REQUIRE(module != nullptr); 29 | StackFrameRecoveryOptions opt; 30 | CHECK(RunFunctionPass( 31 | module.get(), SplitStackFrameAtReturnAddress(opt))); 32 | 33 | } 34 | } 35 | 36 | } // namespace anvill 37 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/Utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #include "Utils.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace anvill { 19 | 20 | bool VerifyModule(llvm::Module *module) { 21 | std::string error_buffer; 22 | llvm::raw_string_ostream error_stream(error_buffer); 23 | 24 | if (llvm::verifyModule(*module, &error_stream) != 0) { 25 | auto module_name = module->getName().str(); 26 | 27 | std::string error_message = 28 | "Module verification failed for '" + module_name + "'"; 29 | 30 | error_stream.flush(); 31 | if (!error_buffer.empty()) { 32 | error_message += ": " + error_buffer; 33 | } 34 | 35 | std::cerr << error_message << std::endl; 36 | return false; 37 | } 38 | 39 | return true; 40 | } 41 | 42 | std::unique_ptr LoadTestData(llvm::LLVMContext &context, 43 | const std::string &data_name) { 44 | auto data_path = std::string(ANVILL_TEST_DATA_PATH) + "/" + data_name; 45 | 46 | llvm::SMDiagnostic error; 47 | auto llvm_module = std::unique_ptr( 48 | llvm::parseIRFile(data_path, error, context)); 49 | 50 | if (llvm_module == nullptr) { 51 | throw std::runtime_error( 52 | "Failed to load the anvill_passes test data named " + data_name + ": " + 53 | error.getMessage().str()); 54 | } 55 | 56 | std::string error_buffer; 57 | llvm::raw_string_ostream error_stream(error_buffer); 58 | 59 | auto succeeded = llvm::verifyModule(*llvm_module.get(), &error_stream) == 0; 60 | error_stream.flush(); 61 | 62 | if (!succeeded) { 63 | std::string error_message = 64 | "Module verification failed for '" + data_name + "'"; 65 | 66 | if (!error_buffer.empty()) { 67 | error_message += ": " + error_buffer; 68 | } 69 | 70 | std::cerr << error_message << std::endl; 71 | } 72 | 73 | return llvm_module; 74 | } 75 | 76 | 77 | const PlatformList &GetSupportedPlatforms(void) { 78 | static const PlatformList kSupportedPlatforms = {{"linux", "amd64"}}; 79 | 80 | return kSupportedPlatforms; 81 | } 82 | 83 | std::unique_ptr CreateContextWithOpaquePointers(void) { 84 | auto context = std::make_unique(); 85 | #if LLVM_VERSION_NUMBER < LLVM_VERSION(15, 0) 86 | context->enableOpaquePointers(); 87 | #endif 88 | return context; 89 | } 90 | 91 | } // namespace anvill 92 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/Utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace anvill { 24 | 25 | bool VerifyModule(llvm::Module *module); 26 | 27 | std::unique_ptr LoadTestData(llvm::LLVMContext &context, 28 | const std::string &data_name); 29 | 30 | template 31 | bool RunFunctionPass(llvm::Module *module, PassT &&function_pass) { 32 | 33 | llvm::PassBuilder pass_builder; 34 | llvm::FunctionPassManager fpm; 35 | llvm::FunctionAnalysisManager fam; 36 | llvm::ModuleAnalysisManager mam; 37 | pass_builder.registerModuleAnalyses(mam); 38 | pass_builder.registerFunctionAnalyses(fam); 39 | 40 | fam.registerPass( 41 | [&] { return llvm::ModuleAnalysisManagerFunctionProxy(mam); }); 42 | 43 | fpm.addPass(std::forward(function_pass)); 44 | for (auto &func : *module) { 45 | fpm.run(func, fam); 46 | } 47 | 48 | return VerifyModule(module); 49 | } 50 | 51 | 52 | struct Platform final { 53 | std::string os; 54 | std::string arch; 55 | }; 56 | 57 | using PlatformList = std::vector; 58 | const PlatformList &GetSupportedPlatforms(void); 59 | 60 | std::unique_ptr CreateContextWithOpaquePointers(void); 61 | 62 | } // namespace anvill 63 | -------------------------------------------------------------------------------- /tests/anvill_passes/src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN 10 | #include 11 | -------------------------------------------------------------------------------- /tests/tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019-present, Trail of Bits, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed in accordance with the terms specified in 6 | # the LICENSE file found in the root directory of this source tree. 7 | # 8 | 9 | add_executable(test_anvill 10 | src/main.cpp 11 | src/Result.cpp 12 | src/TypeSpecification.cpp 13 | ) 14 | 15 | target_link_libraries(test_anvill PRIVATE 16 | remill_settings 17 | remill 18 | anvill 19 | doctest::doctest 20 | ) 21 | 22 | target_include_directories(test_anvill PRIVATE 23 | "${CMAKE_CURRENT_SOURCE_DIR}/../src" 24 | ) 25 | 26 | add_test( 27 | NAME test_anvill 28 | COMMAND "$" 29 | WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" 30 | ) 31 | -------------------------------------------------------------------------------- /tests/tools/src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-present, Trail of Bits, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed in accordance with the terms specified in 6 | * the LICENSE file found in the root directory of this source tree. 7 | */ 8 | 9 | #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN 10 | #include 11 | --------------------------------------------------------------------------------