├── CMakeLists.txt ├── LICENSE ├── README.md ├── docs └── doxygen.config ├── ida_export └── export.py ├── ida_import ├── ida_import.py └── marx.py ├── include ├── amd64.h ├── amd64_registers.h ├── base_analysis.h ├── blacklist_functions.h ├── block.h ├── block_semantics.h ├── dump_file.h ├── expression.h ├── external_functions.h ├── function.h ├── got.h ├── idata.h ├── mapped_elf.h ├── mapped_pe.h ├── memory.h ├── module_plt.h ├── new_operators.h ├── overwrite_analysis.h ├── path_builder.h ├── pe.h ├── return_value.h ├── serialization.h ├── state.h ├── translator.h ├── vcall.h ├── vcall_types.h ├── vex.h ├── vtable_file.h ├── vtable_hierarchy.h ├── vtable_update.h └── vtv_vcall_gt.h ├── paper.pdf ├── patch └── heap_allocation_patch.diff ├── scripts ├── ida_get_all_icalls.py ├── ida_get_hierarchies_through_rtti.py ├── ida_has_refs.py ├── ida_is_subvtable.py ├── ida_win_find_blacklist_functions.py └── ida_win_get_hierarchies_through_rtti.py └── src ├── base_analysis.cpp ├── blacklist_functions.cpp ├── block.cpp ├── block_semantics.cpp ├── dump_file.cpp ├── expression.cpp ├── external_functions.cpp ├── function.cpp ├── got.cpp ├── idata.cpp ├── main.cpp ├── mapped_elf.cpp ├── mapped_pe.cpp ├── module_plt.cpp ├── new_operators.cpp ├── overwrite_analysis.cpp ├── path_builder.cpp ├── return_value.cpp ├── serialization.cpp ├── state.cpp ├── translator.cpp ├── vcall.cpp ├── vex.cpp ├── vtable_file.cpp ├── vtable_hierarchy.cpp ├── vtable_update.cpp └── vtv_vcall_gt.cpp /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 3 | project(marx) 4 | 5 | set(CMAKE_CXX_COMPILER "//usr/bin/clang++-3.6") 6 | 7 | add_definitions("-std=c++11") 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -pedantic -Wextra") 9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wpointer-arith -Wcast-qual") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wstrict-prototypes") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") 12 | 13 | set(CMAKE_BUILD_TYPE Release) 14 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build") 15 | 16 | file(GLOB SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) 17 | file(GLOB HDR_FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h) 18 | 19 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 20 | 21 | add_library(lib_vex STATIC IMPORTED GLOBAL) 22 | set_property(TARGET lib_vex PROPERTY 23 | IMPORTED_LOCATION /usr/local/lib/valgrind/libvex-amd64-linux.a) 24 | 25 | add_executable(marx ${SRC_FILES} ${HDR_FILES}) 26 | target_link_libraries(marx lib_vex pthread) 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Andre Pawlowski (sqall) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Marx 2 | Uncovering Class Hierarchies in C++ Programs 3 | 4 | This repository holds the programs used for the NDSS 2017 paper [MARX: Uncovering Class Hierarchies in C++ Programs](https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/marx-uncovering-class-hierarchies-c-programs/). 5 | 6 | 7 | ## Evaluation Data 8 | 9 | The data used to evaluate Marx is available at [zenodo.org](https://zenodo.org/record/238686). 10 | 11 | 12 | ## Organization 13 | 14 | Folder structure as follows: 15 | - `include` contains the header files, 16 | - `src` contains the source code itself, 17 | - `patch` contains a patch to enable manual memory management for VEX, 18 | - `ida_export` contains an IDA script creating a `.dmp` file, 19 | - `scripts` contains helper scripts. 20 | 21 | 22 | ## Development Setup 23 | 24 | Requires CMake, at least version 2.8. As for IDEs, QtCreator works pretty well 25 | for C++ development and contains an useful debugger. 26 | 27 | When using QtCreator, simply click on "Open Project" and select 28 | `CMakeLists.txt`. It makes sense to let the build directory point to a 29 | directory called `build` inside the project's root directory (in case your IDE 30 | does not honor CMake's `RUNTIME_OUTPUT_DIRECTORY` variable). 31 | 32 | Debug builds are considerably slower but are necessary for proper debugging 33 | behavior. Make sure to set `CMAKE_BUILD_TYPE` accordingly in `CMakeLists.txt` 34 | (you can do so from within QtCreator). In desperate cases, try the option "Run 35 | CMake" from the context menu. 36 | 37 | When developing from the command line, issue the following commands from the 38 | project's root directory: 39 | ``` 40 | mkdir build && cd build 41 | cmake .. 42 | make -j{CPU_COUNT} 43 | ``` 44 | 45 | The project requires a patched version of _Valgrind_. To be more exact, only 46 | the _VEX_ sub-project is actually used and patched. 47 | 48 | Download Valgrind from [the official project page](http://valgrind.org/). We 49 | recommend checking out the subversion repository. Revision 3203 of VEX is known 50 | to work: 51 | ``` 52 | svn co svn://svn.valgrind.org/valgrind/trunk@15732 valgrind 53 | cd valgrind/VEX/ 54 | svn update -r 3203 55 | ``` 56 | 57 | Configure the project as per its installation instructions. Switch to the `VEX` 58 | directory and apply the patch found in folder `patch`: 59 | ``` 60 | cd VEX 61 | patch -p0 < ../marx/patch/heap_allocation_patch.diff 62 | ``` 63 | 64 | First configure Valgrind by issuing `./autogen.sh` and `./configure`. 65 | Then issue `make` and `make install` inside the `VEX` directory to install the 66 | VEX components. The CMake project tries to include the library 67 | `/usr/local/lib/valgrind/libvex-amd64-linux.a`. Make sure it exists. 68 | 69 | 70 | ## Usage 71 | 72 | When developing on a new binary, the first step is to export data from an IDA 73 | database. The IDAPython script found 74 | in `ida_export` creates a dump file `{BINARY_NAME}.dmp` and exports all 75 | necessary data used for the analysis in the folder the 76 | binary lies in. Remember to set the pure_virtual_addr in the IDAPython script 77 | before executing it. In case of Windows, the function is called `_purecall`. 78 | In Linux, it is called `__cxa_pure_virtual`. 79 | 80 | After exporting all data, a config file for Marx has to be created manually. 81 | A config file looks like the following: 82 | ``` 83 | MODULENAME filezilla 84 | TARGETDIR ../tests/filezilla/ 85 | FORMAT ELF64 86 | NEWOPERATORS 2 431F80 432C00 87 | EXTERNALMODULES 8 ../tests/libwx_gtk2u_aui/libwx_gtk2u_aui-3.1.so.0.0.0 ../tests/libwx_gtk2u_xrc/libwx_gtk2u_xrc-3.1.so.0.0.0 ../tests/libwx_gtk2u_adv/libwx_gtk2u_adv-3.1.so.0.0.0 ../tests/libwx_gtk2u_core/libwx_gtk2u_core-3.1.so.0.0.0 ../tests/libwx_baseu_net/libwx_baseu_net-3.1.so.0.0.0 ../tests/libwx_baseu/libwx_baseu-3.1.so.0.0.0 ../tests/libwx_gtk2u_html/libwx_gtk2u_html-3.1.so.0.0.0 ../tests/libwx_baseu_xml/libwx_baseu_xml-3.1.so.0.0.0 88 | ``` 89 | 90 | Further examples of config files can be seen in the evaluation data at [zenodo.org](https://zenodo.org/record/238686). 91 | 92 | When the config file is created, Marx can be executed by issuing the following command: 93 | ``` 94 | ./marx ../tests/filezilla/config.cfg 95 | ``` 96 | 97 | Afterwards, the IDAPython script found in `ida_import` can be used to import the analyzed data back to IDA. 98 | 99 | NOTE: Windows binaries have to be loaded at base address 0x0 (or rebased) 100 | in IDA before exporting them. Also, the IDAPython script only supports Windows 101 | binaries which are compiled with RTTI. Furthermore, specific functions 102 | have to be blacklisted in Windows binaries 103 | (because of compiler optimizations which would cause a lot of false-positives 104 | during the analysis) that are in multiple vtables but do not belong together. 105 | This is the case for example for short functions that do just zero a 106 | register and do nothing more. See for further details the helper script 107 | `ida_win_find_blacklist_functions.py`. 108 | -------------------------------------------------------------------------------- /ida_import/ida_import.py: -------------------------------------------------------------------------------- 1 | 2 | import marx 3 | import os 4 | import re 5 | from sys import stdout 6 | 7 | # IDA imports 8 | from idaapi import add_dref, dr_O, Form 9 | from idc import MakeComm, MakeQword, Comment 10 | # from idautils import DataRefsFrom 11 | # from idautils import Modules as ida_Modules 12 | 13 | # Number of bytes of an address 14 | WORD_BYTE_COUNT = 8 15 | 16 | 17 | class MarxIDAImportForm(Form): 18 | def __init__(self): 19 | Form.__init__(self, r"""Marx IDA Import Script 20 | 21 | <#Select a hierarchy file to open# Hierarchy File:{iHierarchyFileOpen}> 22 | <#Select a new operators file to open#New Operators File:{iNewOpFileOpen}> 23 | <#Select a vcalls file to open# Vcalls File:{iVcallFileOpen}> 24 | <#Select a Vtables file to open# Vtables File:{iVTablesFileOpen}> 25 | Class hierarchies {cHierarchies}> 26 | """, { 27 | 'iHierarchyFileOpen': Form.FileInput(open=True, value="*.hierarchy"), 28 | 'iNewOpFileOpen': Form.FileInput(open=True, value="*.new_operators"), 29 | 'iVcallFileOpen': Form.FileInput(open=True, value="*.vcalls_extended"), 30 | 'iVTablesFileOpen': Form.FileInput(open=True, value="*_vtables.txt"), 31 | 'cHierarchies': Form.ChkGroupControl(("rAllowSingleClassHierarchies",)) 32 | }) 33 | 34 | 35 | def vtable_hierarchy_to_ida_db(marx_module): 36 | for hierarchy in marx_module.class_hierarchies: 37 | for vtable in hierarchy.vtables: 38 | new_comment = "Begin of vtable - Class_{:X}, part of ClassHierarchy_{:d}".format(vtable.address, hierarchy.number) 39 | comment = Comment(vtable.address) or "" 40 | # Check if there is already a comment (with the same content) 41 | if new_comment not in comment: 42 | MakeComm(vtable.address, new_comment + comment) 43 | MakeQword(vtable.address) 44 | 45 | 46 | def new_operators_to_ida_db(marx_module): 47 | for new_op in marx_module.new_operators.itervalues(): 48 | comment = Comment(new_op.address) 49 | if new_op.class_hierarchy: 50 | # Check if there is already a comment, do nothing if there is already a comment 51 | if not comment: 52 | MakeComm(new_op.address, 53 | "New operator - Size: {:d}, ".format(new_op.size) + 54 | "ClassHierarchy_{:d}".format( 55 | new_op.class_hierarchy.number)) 56 | 57 | # For each vtable of an object which could be constructed by this new operator 58 | for vtable in new_op.class_hierarchy.vtables: 59 | # Add references from new operator address to vtable address 60 | add_dref(new_op.address, vtable.address, dr_O) 61 | else: 62 | # Check if there is already a comment, do nothing if there is already a comment 63 | if not comment: 64 | MakeComm(new_op.address, "New operator - Size: {:d}, no class info available".format(new_op.size)) 65 | 66 | 67 | def vcalls_extended_to_ida_db(marx_module): 68 | target_addresses = set() 69 | for vcall in marx_module.vcalls.itervalues(): 70 | comment = Comment(vcall.address) 71 | if vcall.class_hierarchy: 72 | # For each vtable of an object which is possible at this vcall 73 | for vtable in vcall.class_hierarchy.vtables: 74 | 75 | # Add reference from vcall address to target function address (resolves icall) 76 | target_function = vtable.functions.get(vcall.index, None) 77 | if target_function: 78 | add_dref(vcall.address, target_function.address, dr_O) 79 | target_addresses.add(target_function.address) 80 | 81 | # Check if there is already a comment, do nothing if there is already a comment 82 | if not comment: 83 | MakeComm(vcall.address, 84 | "Vcall - vtable index: {:d}, ".format(vcall.index) + 85 | "ClassHierarchy_{:d}\n".format( 86 | vcall.class_hierarchy.number) + 87 | "\n".join( 88 | map(lambda target_address: "Possible target: 0x{:X}".format(target_address), target_addresses))) 89 | target_addresses.clear() 90 | 91 | else: 92 | # Check if there is already a comment, do nothing if there is already a comment 93 | if not comment: 94 | MakeComm(vcall.address, "Vcall - vtable index: {:d}, no class info available".format(vcall.index)) 95 | 96 | 97 | def vtables_to_ida_db(marx_module): 98 | for vtable in marx_module.vtables.itervalues(): 99 | vtable_entry_address = 0 100 | for index, target_function in vtable.functions.iteritems(): 101 | vtable_entry_address = vtable.address + (index * WORD_BYTE_COUNT) 102 | MakeQword(vtable_entry_address) 103 | 104 | if target_function.address: 105 | # Add reference from vtable entry address to target function address 106 | add_dref(vtable_entry_address, target_function.address, dr_O) 107 | # else: 108 | # MakeComm("Unknown target function.") 109 | 110 | # Add comment at the end of the vtable 111 | if vtable_entry_address and vtable.class_hierarchy: 112 | MakeComm(vtable_entry_address, 113 | "End of vtable - Class_{:X}, ".format(vtable.address) + 114 | "part of ClassHierarchy_{:d}".format( 115 | vtable.class_hierarchy.number)) 116 | 117 | 118 | def ida_main(): 119 | # # Get IDA's module representation 120 | # ida_modules_dict = {module.name : module for module in ida_Modules()} 121 | 122 | # Create form object 123 | form = MarxIDAImportForm() 124 | # Compile (in order to populate the controls) 125 | form.Compile() 126 | 127 | # Execute the form 128 | if form.Execute() == 1: 129 | # Get file paths set in form 130 | hierarchy_file_path = form.iHierarchyFileOpen.value 131 | new_operators_file_path = form.iNewOpFileOpen.value 132 | vcalls_extended_file_path = form.iVcallFileOpen.value 133 | vtables_file_path = form.iVTablesFileOpen.value 134 | marx_module = None 135 | 136 | try: 137 | # Parsing hierarchy file 138 | with open(hierarchy_file_path, "r") as f: 139 | marx_module = marx.parse_hierarchy(f) 140 | except IOError: 141 | print "Could not open hierarchy file: {:s}".format(hierarchy_file_path) 142 | 143 | try: 144 | # Parsing new_operators file 145 | with open(new_operators_file_path, "r") as f: 146 | marx_module = marx.parse_new_operators(f) 147 | except IOError: 148 | print "Could not open new_operators file: {:s}".format(new_operators_file_path) 149 | 150 | try: 151 | # Parsing vcalls_extended file 152 | with open(vcalls_extended_file_path, "r") as f: 153 | marx_module = marx.parse_vcalls_extended(f) 154 | except IOError: 155 | print "Could not open vcalls_extended file: {:s}".format(vcalls_extended_file_path) 156 | 157 | try: 158 | # Parsing vtables file 159 | with open(vtables_file_path, "r") as f: 160 | marx_module = marx.parse_vtables(f) 161 | except IOError: 162 | print "Could not open vtables file: {:s}".format(vtables_file_path) 163 | 164 | # Toggle allow_false_positives 165 | marx.allow_single_class_hierarchies = bool(form.cHierarchies.value) 166 | 167 | # Add comments to vtables 168 | vtable_hierarchy_to_ida_db(marx_module) 169 | # Add data references and comments to new operators 170 | new_operators_to_ida_db(marx_module) 171 | # Add data references and comments to vcalls and vtables 172 | vcalls_extended_to_ida_db(marx_module) 173 | # Add data references to vtables 174 | vtables_to_ida_db(marx_module) 175 | 176 | 177 | ida_main() 178 | -------------------------------------------------------------------------------- /include/amd64.h: -------------------------------------------------------------------------------- 1 | #ifndef FOO_AMD64_H 2 | #define FOO_AMD64_H 3 | 4 | #include "expression.h" 5 | #include "amd64_registers.h" 6 | 7 | #include 8 | 9 | static const auto register_rip = std::make_shared(OFFB_RIP); 10 | static const auto register_rsp = std::make_shared(OFFB_RSP); 11 | 12 | static const auto register_rax = std::make_shared(OFFB_RAX); 13 | static const auto register_rbx = std::make_shared(OFFB_RBX); 14 | static const auto register_rcx = std::make_shared(OFFB_RCX); 15 | static const auto register_rdx = std::make_shared(OFFB_RDX); 16 | 17 | static const auto register_rbp = std::make_shared(OFFB_RBP); 18 | static const auto register_rsi = std::make_shared(OFFB_RSI); 19 | static const auto register_rdi = std::make_shared(OFFB_RDI); 20 | 21 | static const auto register_r8 = std::make_shared(OFFB_R8); 22 | static const auto register_r9 = std::make_shared(OFFB_R9); 23 | static const auto register_r10 = std::make_shared(OFFB_R10); 24 | static const auto register_r11 = std::make_shared(OFFB_R11); 25 | static const auto register_r12 = std::make_shared(OFFB_R12); 26 | static const auto register_r13 = std::make_shared(OFFB_R13); 27 | static const auto register_r14 = std::make_shared(OFFB_R14); 28 | static const auto register_r15 = std::make_shared(OFFB_R15); 29 | 30 | static const std::shared_ptr system_v_arguments[] = { 31 | register_rdi, 32 | register_rsi, 33 | register_rdx, 34 | register_rcx, 35 | register_r8, 36 | register_r9, 37 | }; 38 | 39 | static const std::shared_ptr system_v_scratch[] = { 40 | register_rdi, 41 | register_rsi, 42 | register_rdx, 43 | register_rcx, 44 | register_r8, 45 | register_r9, 46 | register_r10, 47 | register_r11, 48 | }; 49 | 50 | static const std::shared_ptr system_v_preserved[] = { 51 | register_rbx, 52 | register_rsp, 53 | register_rbp, 54 | register_r12, 55 | register_r13, 56 | register_r14, 57 | register_r15, 58 | }; 59 | 60 | static const std::shared_ptr msvc_arguments[] = { 61 | register_rcx, 62 | register_rdx, 63 | register_r8, 64 | register_r9, 65 | }; 66 | 67 | static const std::shared_ptr msvc_scratch[] = { 68 | register_rcx, 69 | register_rdx, 70 | register_r8, 71 | register_r9, 72 | register_r10, 73 | register_r11, 74 | }; 75 | 76 | static const std::shared_ptr msvc_preserved[] = { 77 | register_rbx, 78 | register_rsp, 79 | register_rbp, 80 | register_rdi, 81 | register_rsi, 82 | register_r12, 83 | register_r13, 84 | register_r14, 85 | register_r15, 86 | }; 87 | 88 | #endif // FOO_AMD64_H 89 | -------------------------------------------------------------------------------- /include/amd64_registers.h: -------------------------------------------------------------------------------- 1 | #ifndef AMD64_REGISTERS_H 2 | #define AMD64_REGISTERS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | extern "C" { 9 | #include 10 | #include 11 | } 12 | 13 | #define OFFB_RAX offsetof(VexGuestAMD64State, guest_RAX) 14 | #define OFFB_RBX offsetof(VexGuestAMD64State, guest_RBX) 15 | #define OFFB_RCX offsetof(VexGuestAMD64State, guest_RCX) 16 | #define OFFB_RDX offsetof(VexGuestAMD64State, guest_RDX) 17 | #define OFFB_RSP offsetof(VexGuestAMD64State, guest_RSP) 18 | #define OFFB_RBP offsetof(VexGuestAMD64State, guest_RBP) 19 | #define OFFB_RSI offsetof(VexGuestAMD64State, guest_RSI) 20 | #define OFFB_RDI offsetof(VexGuestAMD64State, guest_RDI) 21 | #define OFFB_R8 offsetof(VexGuestAMD64State, guest_R8) 22 | #define OFFB_R9 offsetof(VexGuestAMD64State, guest_R9) 23 | #define OFFB_R10 offsetof(VexGuestAMD64State, guest_R10) 24 | #define OFFB_R11 offsetof(VexGuestAMD64State, guest_R11) 25 | #define OFFB_R12 offsetof(VexGuestAMD64State, guest_R12) 26 | #define OFFB_R13 offsetof(VexGuestAMD64State, guest_R13) 27 | #define OFFB_R14 offsetof(VexGuestAMD64State, guest_R14) 28 | #define OFFB_R15 offsetof(VexGuestAMD64State, guest_R15) 29 | 30 | #define OFFB_RIP offsetof(VexGuestAMD64State, guest_RIP) 31 | #define OFFB_RSP offsetof(VexGuestAMD64State, guest_RSP) 32 | 33 | //! Register offsets used to encode x86_64 registers by VEX. 34 | static const unsigned int AMD64_REGISTERS[] = { 35 | OFFB_RAX, OFFB_RBX, OFFB_RCX, OFFB_RDX, OFFB_RSP, OFFB_RBP, OFFB_RSI, 36 | OFFB_RDI, OFFB_R8, OFFB_R9, OFFB_R10, OFFB_R11, OFFB_R12, OFFB_R13, 37 | OFFB_R14, OFFB_R15 38 | }; 39 | 40 | static std::map AMD64_DISPLAY_REGISTERS = []{ 41 | std::map result; 42 | 43 | result[OFFB_RAX] = "rax"; 44 | result[OFFB_RBX] = "rbx"; 45 | result[OFFB_RCX] = "rcx"; 46 | result[OFFB_RDX] = "rdx"; 47 | result[OFFB_RSP] = "rsp"; 48 | result[OFFB_RBP] = "rbp"; 49 | result[OFFB_RSI] = "rsi"; 50 | result[OFFB_RDI] = "rdi"; 51 | result[OFFB_R8] = "r8"; 52 | result[OFFB_R9] = "r9"; 53 | result[OFFB_R10] = "r10"; 54 | result[OFFB_R11] = "r11"; 55 | result[OFFB_R12] = "r12"; 56 | result[OFFB_R13] = "r13"; 57 | result[OFFB_R14] = "r14"; 58 | result[OFFB_R15] = "r15"; 59 | result[OFFB_RIP] = "rip"; 60 | 61 | return result; 62 | }(); 63 | 64 | #endif // AMD64_REGISTERS_H 65 | -------------------------------------------------------------------------------- /include/base_analysis.h: -------------------------------------------------------------------------------- 1 | #ifndef BASE_ANALYSIS_H 2 | #define BASE_ANALYSIS_H 3 | 4 | #include "function.h" 5 | #include "block.h" 6 | #include "memory.h" 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | typedef std::map PathStates; 13 | typedef std::vector States; 14 | 15 | /*! 16 | * \brief (Abstract) base class for analyses run on a function. 17 | */ 18 | class BaseAnalysis { 19 | protected: 20 | const Function &_function; 21 | 22 | const FileFormatType _file_format; 23 | 24 | PathStates _states; 25 | PathStates _side_effects; 26 | 27 | State _initial_state; 28 | States _semantics; 29 | 30 | /*! Set by `BaseAnalysis::on_traversal` if the current block's 31 | * terminator is a `call`; `nullptr` otherwise. If set, contains the 32 | * symbol corresponding to the (unique) formatted return value. 33 | */ 34 | std::shared_ptr _current_return_value; 35 | 36 | public: 37 | BaseAnalysis(const Function &function, 38 | FileFormatType file_format); 39 | BaseAnalysis(const Function &function, 40 | const State &initial_state, 41 | FileFormatType file_format); 42 | 43 | BaseAnalysis(const BaseAnalysis&) = delete; 44 | void operator=(const BaseAnalysis&) = delete; 45 | 46 | bool obtain(); 47 | 48 | protected: 49 | /*! Pure virtual (implemented by sub-class). This function is called before 50 | * the function is actually traversed and may be used for initialization 51 | * work. 52 | */ 53 | virtual void pre_traversal() = 0; 54 | 55 | /*! Pure virtual (implemented by sub-class). This function is called after 56 | * the traversal and may be used for post-processing of the collected 57 | * results. */ 58 | virtual void post_traversal() = 0; 59 | 60 | /*! Pure virtual (implemented by sub-class). This function is called on each 61 | * basic block on the given path. Accumulates the analysis results. 62 | */ 63 | virtual bool in_traversal(const Path&, const Block&, State&) = 0; 64 | 65 | /*! Pure virtual (implemented by sub-class). This function is called on each 66 | * basic block during path construction (i.e., if the lightweight path 67 | * policy is active). Determines whether the given block is "interesting" 68 | * and should be traversed by the generated paths. 69 | */ 70 | virtual bool block_predicate(const Block&) = 0; 71 | 72 | /*! Pure virtual (implemented by sub-class). This function is called after 73 | * a single path has been fully traversed. 74 | */ 75 | virtual void path_traversed(const Path&) = 0; 76 | 77 | private: 78 | bool on_traversal(const Path &path, const Block &block); 79 | }; 80 | 81 | #endif // BASE_ANALYSIS_H 82 | -------------------------------------------------------------------------------- /include/blacklist_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef BLACKLIST_FUNCTIONS_H 2 | #define BLACKLIST_FUNCTIONS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef std::set BlacklistFuncsSet; 13 | 14 | const BlacklistFuncsSet import_blacklist_funcs(const std::string &target_file); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /include/block.h: -------------------------------------------------------------------------------- 1 | #ifndef BLOCK_H 2 | #define BLOCK_H 3 | 4 | #include 5 | #include 6 | 7 | #include "state.h" 8 | 9 | extern "C" { 10 | #include 11 | } 12 | 13 | /*! 14 | * \brief Enumerates all possible types of instructions terminating a basic 15 | * block (so called _terminators_). 16 | * 17 | * A terminator is _unresolved_ if its target cannot be determined statically. 18 | */ 19 | enum TerminatorType { 20 | //! Terminator is an instruction whose target could not be resolved 21 | //! (currently only set for unresolvable conditional or unconditional 22 | //! jumps). 23 | TerminatorUnresolved = 0, 24 | 25 | //! The next instruction is reached using a fall-through edge. 26 | TerminatorFallthrough, 27 | 28 | //! Terminator is a `ret` instruction. 29 | TerminatorReturn, 30 | 31 | //! Terminator is a `jmp` instruction. 32 | TerminatorJump, 33 | 34 | //! Terminator is a `call` instruction. 35 | TerminatorCall, 36 | 37 | //! Terminator is a `call` but its target cannot be resolved statically. 38 | //! Used separately to distinguish from unresolved jumps. 39 | TerminatorCallUnresolved, 40 | 41 | //! Terminator is a resolved conditional jump. 42 | TerminatorJcc, 43 | 44 | //! Terminator points to a non-returning target (such as `exit`). 45 | TerminatorNoReturn, 46 | }; 47 | 48 | /*! 49 | * \brief Structure to describe a terminating instruction. 50 | */ 51 | struct Terminator { 52 | //! Type of the terminator. \see `TerminatorType` 53 | TerminatorType type; 54 | 55 | //! Fall-through address of the terminator. This value is set for calls, 56 | //! conditional jumps and fall-throughs. `nullptr`, if not set. 57 | uintptr_t fall_through; 58 | 59 | //! Target address of the terminator. Set for resolved jumps and calls, 60 | //! else `nullptr`. 61 | uintptr_t target; 62 | 63 | //! Boolean value indicating whether the given (resolvable) jump is a tail 64 | //! jump inlining another function. `false` for any other type. 65 | //! \see `Translator::detect_tail_jumps` 66 | bool is_tail; 67 | }; 68 | 69 | /*! 70 | * \brief Class tieing together the underlying VEX block and additional 71 | * information such as block address and terminator. 72 | */ 73 | class Block { 74 | private: 75 | uintptr_t _address; 76 | IRSB *_vex_block; 77 | Terminator _terminator; 78 | 79 | public: 80 | Block(uintptr_t address, IRSB *block, const Terminator &terminator); 81 | 82 | /*! 83 | * \brief get_address 84 | * \return Returns the block's virtual address. 85 | */ 86 | uintptr_t get_address() const { 87 | return _address; 88 | } 89 | 90 | /*! 91 | * \brief get_last_address 92 | * \return Returns the block's last virtual address 93 | * or 0 in case of an error. 94 | */ 95 | uint64_t get_last_address() const; 96 | 97 | /*! 98 | * \brief get_terminator 99 | * \return Returns information about the terminator. 100 | */ 101 | const Terminator &get_terminator() const { 102 | return _terminator; 103 | } 104 | 105 | /*! 106 | * \brief get_vex_block 107 | * \return Returns a (read-only) reference to the underlying VEX block. 108 | */ 109 | const IRSB &get_vex_block() const { 110 | return *_vex_block; 111 | } 112 | 113 | void retrieve_semantics(State &state) const; 114 | 115 | private: 116 | }; 117 | 118 | using BlockPredicate = std::function; 119 | 120 | #endif // BLOCK_H 121 | -------------------------------------------------------------------------------- /include/block_semantics.h: -------------------------------------------------------------------------------- 1 | #ifndef BLOCK_SEMANTICS_H 2 | #define BLOCK_SEMANTICS_H 3 | 4 | #include "expression.h" 5 | #include "state.h" 6 | 7 | extern "C" { 8 | #include 9 | #include 10 | } 11 | 12 | #include 13 | #include 14 | 15 | #define arg_out 16 | class BlockSemantics; 17 | 18 | typedef ExpressionPtr (BlockSemantics::*ExpressionParser)(const IRExpr&); 19 | typedef bool (BlockSemantics::*StatementHandler)(const IRStmt&); 20 | 21 | // TODO: Handle calls, calling conventions (System-V for now). 22 | // FIXME: This class is infected with shared_ptr:s, consider boost::variant. 23 | 24 | class Block; 25 | 26 | /*! 27 | * \brief Class computing the effective semantics of a given `Block`. 28 | * 29 | * \todo Allow sub-classes of this class at every point where this class is 30 | * currently use. This enables a user to implement custom semantics. 31 | */ 32 | class BlockSemantics { 33 | private: 34 | State &_state; 35 | const Block &_block; 36 | std::shared_ptr _unknown; 37 | 38 | static const std::map _expression_parser; 39 | static const std::map _statement_handler; 40 | 41 | public: 42 | BlockSemantics() = delete; 43 | BlockSemantics(const BlockSemantics&) = delete; 44 | void operator=(const BlockSemantics&) = delete; 45 | 46 | BlockSemantics(const Block &block, State &initial_state); 47 | 48 | /*! 49 | * \brief Getter to access the computed state. 50 | * \return Returns the computed semantics in form of a `State` reference. 51 | */ 52 | const State &get_state() const { 53 | return _state; 54 | } 55 | 56 | private: 57 | bool extract_semantics(const IRSB &block); 58 | 59 | ExpressionPtr parse_expression(const IRExpr &expression); 60 | bool handle_statement(const IRStmt &statement); 61 | 62 | uint64_t get_mask(uint8_t size) const; 63 | bool get_size(const IRType &type, arg_out uint8_t &size) const; 64 | bool parse_type(const IRType &type, arg_out uint8_t &size, 65 | arg_out uint64_t &mask) const; 66 | 67 | // Expression parsers. 68 | ExpressionPtr parse_get(const IRExpr &expression); 69 | ExpressionPtr parse_geti(const IRExpr &expression); 70 | 71 | ExpressionPtr parse_rdtmp(const IRExpr &expression); 72 | 73 | ExpressionPtr parse_qop(const IRExpr &expression); 74 | ExpressionPtr parse_triop(const IRExpr &expression); 75 | ExpressionPtr parse_binop(const IRExpr &expression); 76 | ExpressionPtr parse_unop(const IRExpr &expression); 77 | 78 | ExpressionPtr parse_load(const IRExpr &expression); 79 | ExpressionPtr parse_const(const IRExpr &expression); 80 | 81 | ExpressionPtr parse_ccall(const IRExpr &expression); 82 | ExpressionPtr parse_ite(const IRExpr &expression); 83 | 84 | // Statement handlers. 85 | bool handle_noop(const IRStmt &statement); 86 | bool handle_put(const IRStmt &statement); 87 | bool handle_puti(const IRStmt &statement); 88 | bool handle_wrtmp(const IRStmt &statement); 89 | bool handle_store(const IRStmt &statement); 90 | bool handle_storeg(const IRStmt &statement); 91 | bool handle_loadg(const IRStmt &statement); 92 | bool handle_abi_hint(const IRStmt &statement); 93 | }; 94 | 95 | #endif // BLOCK_SEMANTICS_H 96 | -------------------------------------------------------------------------------- /include/dump_file.h: -------------------------------------------------------------------------------- 1 | #ifndef DUMP_FILE_H 2 | #define DUMP_FILE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /*! 10 | * \brief Structure containing information about a serialized block in the 11 | * `.dmp` file. 12 | */ 13 | struct BlockDescriptor { 14 | uintptr_t block_start; 15 | uintptr_t block_end; 16 | uintptr_t instruction_count; 17 | }; 18 | 19 | typedef std::vector FunctionBlocks; 20 | typedef std::map ParsedFunctions; 21 | typedef std::set NonReturningFunctions; 22 | 23 | /*! 24 | * \brief Class collecting the information that was produced by the IDA 25 | * exporting script. 26 | * 27 | * For a given `.dmp` file (produced by the exporter), an optional 28 | * `.dmp.no-return` file is supported which contains information about 29 | * non-returning functions in the processed binary. 30 | * 31 | * \todo This can be handled in a better manner. 32 | */ 33 | class DumpFile { 34 | private: 35 | ParsedFunctions _functions; 36 | NonReturningFunctions _functions_no_return; 37 | 38 | public: 39 | DumpFile(const std::string &dump_file); 40 | 41 | /*! 42 | * \brief Returns all known functions. 43 | * \return Returns a `map` with all known functions (address as key, 44 | * `Function` object as value). 45 | */ 46 | const ParsedFunctions &get_functions() const { 47 | return _functions; 48 | } 49 | 50 | /*! 51 | * \brief Returns known, non-returning functions. 52 | * \return Returns a `set` containing the addresses of all known, 53 | * non-returning functions. 54 | */ 55 | const NonReturningFunctions &get_non_returning() const { 56 | return _functions_no_return; 57 | } 58 | 59 | private: 60 | bool parse(const std::string &dump_file); 61 | bool parse_no_return(const std::string &no_return_file); 62 | }; 63 | 64 | #endif // DUMP_FILE_H 65 | -------------------------------------------------------------------------------- /include/external_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef EXTERNAL_FUNCTIONS_H 2 | #define EXTERNAL_FUNCTIONS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | struct ExternalFunction { 14 | uint32_t index; 15 | uint64_t addr; 16 | std::string name; 17 | std::string module_name; 18 | }; 19 | 20 | 21 | typedef std::vector ExternalFunctionVector; 22 | typedef std::map ExternalFunctionMap; 23 | 24 | 25 | 26 | class ExternalFunctions { 27 | private: 28 | ExternalFunctionVector _external_functions; 29 | ExternalFunctionMap _external_functions_map; 30 | uint32_t _index = 0; 31 | 32 | bool _is_finalized = false; 33 | 34 | public: 35 | 36 | /*! 37 | * \brief Returns `true` if the external functions structure is finalized. 38 | * \return Returns `true` the external functions structure is finalized. 39 | */ 40 | bool is_finalized() const; 41 | 42 | 43 | /*! 44 | * \brief Parses a given functions file and builds internal 45 | * functions structure. 46 | */ 47 | bool parse(const std::string &funcs_file); 48 | 49 | 50 | /*! 51 | * \brief Finalizes the external functions structures. 52 | * 53 | * This function finalizes the external functions structures. It can only 54 | * be used once all external functions files are imported via the `parse` 55 | * function. After `finalize` was executed, no changes to the 56 | * external functions structures are possible. 57 | */ 58 | void finalize(); 59 | 60 | 61 | /*! 62 | * \brief Returns a pointer to the external function given by the name. 63 | * \return Returns a pointer to the external function given by the name 64 | * or null if it was not found. 65 | */ 66 | const ExternalFunction* get_external_function( 67 | const std::string &name) const; 68 | 69 | 70 | /*! 71 | * \brief Returns a pointer to the external function given by the module 72 | * name and address. 73 | * \return Returns a pointer to the external function given by the name 74 | * or null if it was not found. 75 | */ 76 | const ExternalFunction* get_external_function( 77 | const std::string &module_name, 78 | uint64_t func_addr) const; 79 | }; 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | #endif // EXTERNAL_FUNCTIONS_H 97 | -------------------------------------------------------------------------------- /include/function.h: -------------------------------------------------------------------------------- 1 | #ifndef FUNCTION_H 2 | #define FUNCTION_H 3 | 4 | #include "block.h" 5 | #include "expression.h" 6 | #include "block_semantics.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | extern "C" { 15 | #include 16 | } 17 | 18 | /*! 19 | * \brief A path describing how we reached a basic block in the function. 20 | * 21 | * A path is merely a vector of `bool`s. Each entry denotes how control flow 22 | * changed at each terminator/basic block, starting at the beginning of the 23 | * traversal (most commonly the function's entry point). Following a 24 | * fall-through or an unconditional jumps is recorded using `true`, whereas 25 | * the target of a conditional jump is recorded as `false`. 26 | */ 27 | typedef std::vector Path; 28 | typedef std::map> BlockMap; 29 | 30 | /*! 31 | * \brief A function called on each visited basic block in a traversal. 32 | * 33 | * \see `Function::traverse` 34 | */ 35 | typedef std::function 36 | TraversalCallback; 37 | 38 | /*! 39 | * \brief A function called after a path has been fully traversed. 40 | * 41 | * \see `Function::traverse` 42 | */ 43 | typedef std::function PathCallback; 44 | 45 | class Translator; 46 | const uint8_t BRANCH_THRESHOLD = 0; 47 | 48 | /*! 49 | * \brief Class representing a function translated to VEX. 50 | * 51 | * Objects of this class are to be instantiated by the `Translator` class (hence 52 | * the `friend` relationship). 53 | */ 54 | class Function { 55 | private: 56 | uintptr_t _entry; 57 | uint8_t _branch_threshold = BRANCH_THRESHOLD; 58 | BlockMap _function_blocks; 59 | 60 | public: 61 | Function() = default; 62 | Function(uintptr_t entry, uint8_t branch_threshold=BRANCH_THRESHOLD); 63 | 64 | /*! 65 | * \brief Returns the function's entry address. 66 | * \return Returns the first virtual address in the function. 67 | */ 68 | uintptr_t get_entry() const { 69 | return _entry; 70 | } 71 | 72 | bool can_be_fully_traversed() const; 73 | 74 | // FIXME: Cache this. 75 | /*! 76 | * \brief Returns the addresses of all known blocks. 77 | * \return Returns a vector of addresses. 78 | */ 79 | std::vector get_block_addresses() const { 80 | std::vector result; 81 | for(const auto &kv : _function_blocks) { 82 | result.push_back(kv.first); 83 | } 84 | 85 | return result; 86 | } 87 | 88 | // FIXME: Cache this. 89 | /*! 90 | * \brief Returns the addresses of block's returning from the function 91 | * (i.e., those with a terminator of type `TerminatorReturn`). 92 | * \return Returns a vector of addresses. 93 | */ 94 | std::vector get_return_block_addresses() const { 95 | std::vector result; 96 | for(const auto &kv : _function_blocks) { 97 | if(kv.second->get_terminator().type == TerminatorReturn) { 98 | result.push_back(kv.first); 99 | } 100 | } 101 | 102 | return result; 103 | } 104 | 105 | /*! 106 | * \brief Returns all blocks. 107 | * \return Returns a map containing all blocks of the function (key is the 108 | * block's address). 109 | */ 110 | const BlockMap &get_blocks() const { 111 | return _function_blocks; 112 | } 113 | 114 | bool traverse(const TraversalCallback &block_callback, 115 | const BlockPredicate &block_predicate, 116 | const PathCallback &path_callback, 117 | void *user_defined=nullptr) const; 118 | 119 | private: 120 | bool traverser(const TraversalCallback &callback, 121 | void *user_defined=nullptr) const; 122 | 123 | void add_block(uintptr_t address, IRSB *block, 124 | const Terminator &terminator); 125 | 126 | friend class Translator; 127 | }; 128 | 129 | #endif // FUNCTION_H 130 | -------------------------------------------------------------------------------- /include/got.h: -------------------------------------------------------------------------------- 1 | #ifndef GOT_H 2 | #define GOT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | typedef std::map GotMap; 11 | 12 | GotMap import_got(const std::string &target_file); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /include/idata.h: -------------------------------------------------------------------------------- 1 | #ifndef IDATA_H 2 | #define IDATA_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | typedef std::map IDataMap; 12 | 13 | IDataMap import_idata(const std::string &target_file); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /include/mapped_elf.h: -------------------------------------------------------------------------------- 1 | #ifndef MAPPED_ELF_H 2 | #define MAPPED_ELF_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "memory.h" 12 | 13 | /*! 14 | * \brief Class holding information about a memory-mapped ELF file. 15 | */ 16 | class MappedElf : public Memory { 17 | private: 18 | std::vector _buffer; 19 | 20 | ElfW(Ehdr) *_e_header = nullptr; 21 | ElfW(Phdr) *_p_header = nullptr; 22 | 23 | uintptr_t _base = 0; 24 | size_t _size = 0; 25 | 26 | public: 27 | MappedElf(const MappedElf&) = delete; 28 | virtual void operator=(const MappedElf&) = delete; 29 | 30 | MappedElf(const std::string &elf_file); 31 | virtual const uint8_t *operator[](const uintptr_t index) const; 32 | 33 | /*! 34 | * \brief Returns the begin of the executable `LOAD` segment. 35 | * \return Returns a pointer to the segment's begin. 36 | */ 37 | virtual uintptr_t get_load_begin() const { 38 | return _base; 39 | } 40 | 41 | /*! 42 | * \brief Returns the end of the executable `LOAD` segment. 43 | * \return Returns a pointer to the segment's end. 44 | */ 45 | virtual uintptr_t get_load_end() const { 46 | return _base + _size; 47 | } 48 | }; 49 | 50 | #endif // MAPPED_ELF_H 51 | -------------------------------------------------------------------------------- /include/mapped_pe.h: -------------------------------------------------------------------------------- 1 | #ifndef MAPPED_PE_H 2 | #define MAPPED_PE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "pe.h" 9 | 10 | #include "memory.h" 11 | 12 | /*! 13 | * \brief Class holding information about a memory-mapped PE file. 14 | */ 15 | class MappedPe : public Memory { 16 | private: 17 | std::vector _buffer; 18 | 19 | mz_hdr *_mz_header = nullptr; 20 | pe_hdr *_pe_header = nullptr; 21 | 22 | pe32_opt_hdr *_pe32_opt_header = nullptr; 23 | pe32plus_opt_hdr *_pe32_plus_opt_header = nullptr; 24 | 25 | section_header *_text_section_header = nullptr; 26 | 27 | uintptr_t _base = 0; 28 | size_t _size = 0; 29 | uintptr_t _file_addr = 0; 30 | size_t _file_size = 0; 31 | 32 | public: 33 | MappedPe(const MappedPe&) = delete; 34 | virtual void operator=(const MappedPe&) = delete; 35 | 36 | MappedPe(const std::string &pe_file); 37 | virtual const uint8_t *operator[](const uintptr_t index) const; 38 | 39 | /*! 40 | * \brief Returns the begin of the executable `LOAD` segment. 41 | * \return Returns a pointer to the segment's begin. 42 | */ 43 | virtual uintptr_t get_load_begin() const { 44 | return _base; 45 | } 46 | 47 | /*! 48 | * \brief Returns the end of the executable `LOAD` segment. 49 | * \return Returns a pointer to the segment's end. 50 | */ 51 | virtual uintptr_t get_load_end() const { 52 | return _base + _size; 53 | } 54 | }; 55 | 56 | #endif // MAPPED_PE_H 57 | -------------------------------------------------------------------------------- /include/memory.h: -------------------------------------------------------------------------------- 1 | #ifndef MEMORY_H 2 | #define MEMORY_H 3 | 4 | /*! 5 | * \brief Enumerates all supported `File Format` types. 6 | */ 7 | enum FileFormatType { 8 | FileFormatELF64 = 0, 9 | FileFormatPE64 = 1, 10 | FileFormatCount 11 | }; 12 | 13 | 14 | class Memory { 15 | 16 | public: 17 | 18 | Memory() {}; 19 | 20 | Memory(const Memory&) = delete; 21 | virtual void operator=(const Memory&) = delete; 22 | 23 | virtual const uint8_t *operator[](const uintptr_t index) const = 0; 24 | 25 | virtual uintptr_t get_load_begin() const = 0; 26 | 27 | virtual uintptr_t get_load_end() const = 0; 28 | }; 29 | 30 | #endif // MEMORY_H 31 | -------------------------------------------------------------------------------- /include/module_plt.h: -------------------------------------------------------------------------------- 1 | #ifndef MODULE_PLT_H 2 | #define MODULE_PLT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | struct PltEntry { 11 | uint64_t addr; 12 | std::string func_name; 13 | }; 14 | 15 | 16 | typedef std::map PltMap; 17 | 18 | 19 | class ModulePlt { 20 | 21 | const std::string &_module_name; 22 | PltMap _plt_entries; 23 | 24 | private: 25 | 26 | public: 27 | ModulePlt(const std::string &module_name); 28 | 29 | 30 | /*! 31 | * \brief Parses the .plt entries file for the given module. 32 | */ 33 | bool parse(const std::string &plt_file); 34 | 35 | 36 | /*! 37 | * \brief Returns a pointer to the plt entry given by the address. 38 | * \return Returns a pointer to the plt entry given by the address 39 | * or null if it was not found. 40 | */ 41 | const PltEntry* get_plt_entry(uint64_t addr) const; 42 | 43 | 44 | /*! 45 | * \brief Returns a pointer to the plt entry given by the function name. 46 | * \return Returns a pointer to the plt entry given by the address 47 | * or null if it was not found. 48 | */ 49 | const PltEntry* get_plt_entry(const std::string func_name) const; 50 | 51 | }; 52 | 53 | #endif // MODULE_PLT_H 54 | -------------------------------------------------------------------------------- /include/new_operators.h: -------------------------------------------------------------------------------- 1 | #ifndef NEW_OPERATORS_H 2 | #define NEW_OPERATORS_H 3 | 4 | #include 5 | #include 6 | 7 | #include "expression.h" 8 | #include "vtable_file.h" 9 | #include "vtable_hierarchy.h" 10 | 11 | struct NewOperator { 12 | uint64_t addr; 13 | uint64_t size; 14 | ExpressionPtr expr; 15 | std::unordered_set vtbl_idxs; 16 | }; 17 | 18 | 19 | typedef std::map OperatorNewAddrMap; 20 | 21 | 22 | class NewOperators { 23 | private: 24 | 25 | const std::string &_module_name; 26 | const VTableFile &_vtable_file; 27 | const VTableHierarchies &_vtable_hierarchies; 28 | 29 | OperatorNewAddrMap _op_new_candidates; 30 | 31 | public: 32 | 33 | NewOperators(const std::string &module_name, 34 | const VTableFile &vtable_file, 35 | const VTableHierarchies &vtable_hierarchies); 36 | 37 | 38 | void add_op_new_candidate(const NewOperator &new_op_candidate); 39 | 40 | 41 | void export_new_operators(const std::string &target_dir); 42 | 43 | 44 | const OperatorNewAddrMap& get_new_operators() const; 45 | 46 | 47 | void copy_new_operators(const OperatorNewAddrMap &new_ops); 48 | }; 49 | 50 | #endif //NEW_OPERATORS_H 51 | -------------------------------------------------------------------------------- /include/path_builder.h: -------------------------------------------------------------------------------- 1 | #ifndef PATH_BUILDER_H 2 | #define PATH_BUILDER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "block.h" 12 | #include "function.h" 13 | 14 | //! A concrete path that contains the full addresses of basic blocks to visit. 15 | using ConcretePath = std::deque; 16 | 17 | //! A map relating a node to a concrete path. 18 | using PathsByNode = std::map; 19 | 20 | const uint8_t NODE_THRESHOLD = 20; 21 | 22 | //! 23 | //! \brief Class calculating viable paths through a given `Function` (the 24 | //! "lightweight" policy used as a fallback in `Function::traverse`). 25 | //! 26 | class PathBuilder { 27 | private: 28 | const Function &_function; 29 | void *_user_defined; 30 | const uint8_t _node_threshold; 31 | 32 | public: 33 | PathBuilder(const Function &function, void *user_defined=nullptr, 34 | uint8_t node_threshold=NODE_THRESHOLD); 35 | std::set build_paths(BlockPredicate predicate) const; 36 | 37 | private: 38 | PathsByNode breadth_first(const BlockMap &blocks, uintptr_t root, 39 | BlockPredicate predicate, 40 | bool terminate_on_match=false) const; 41 | }; 42 | 43 | #endif // PATH_BUILDER_H 44 | -------------------------------------------------------------------------------- /include/return_value.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RETURN_VALUE_H 3 | #define RETURN_VALUE_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "serialization.h" 10 | #include "external_functions.h" 11 | #include "module_plt.h" 12 | #include "vtable_file.h" 13 | #include "function.h" 14 | 15 | 16 | struct VTableActive { 17 | Path path; 18 | ExpressionPtr vtbl_ptr_loc; 19 | uint32_t index; // Index of vtable. 20 | bool from_caller; 21 | bool from_callee; 22 | }; 23 | 24 | 25 | struct ReturnValue { 26 | Path path; 27 | uint64_t func_addr; // Set to 0 if function not in the current module. 28 | ExpressionPtr content; 29 | }; 30 | 31 | 32 | struct FctReturnValues { 33 | uint64_t func_addr; // Set to 0 if function not in the current module. 34 | std::vector return_values; 35 | std::vector active_vtables; 36 | }; 37 | 38 | 39 | struct ExternalFctReturnValues { 40 | const ExternalFunction *ext_func; 41 | FctReturnValues func_return_values; 42 | }; 43 | 44 | 45 | typedef std::map FctReturnValuesMap; 46 | typedef std::map FctReturnValuesPtrMap; 47 | typedef std::vector ReturnValues; 48 | typedef std::vector ExtReturnValues; 49 | 50 | 51 | class FctReturnValuesFile { 52 | private: 53 | 54 | const std::string &_module_name; 55 | const VTableFile &_vtable_file; 56 | const ModulePlt &_module_plt; 57 | const ExternalFunctions &_external_funcs; 58 | 59 | FctReturnValuesMap _return_values_map; 60 | 61 | FctReturnValuesPtrMap _plt_return_values_ptr_map; 62 | 63 | ExtReturnValues _ext_return_values; 64 | 65 | mutable std::mutex _mtx; 66 | 67 | bool _is_finalized = false; 68 | 69 | public: 70 | 71 | FctReturnValuesFile(const std::string &module_name, 72 | const VTableFile &vtable_file, 73 | const ModulePlt &module_plt, 74 | const ExternalFunctions &external_funcs); 75 | 76 | 77 | void add_return_value(uint64_t func_addr, 78 | const ReturnValue &return_value); 79 | 80 | 81 | void add_active_vtable(uint64_t func_addr, 82 | const VTableActive &active_vtable); 83 | 84 | 85 | void export_return_values(const std::string &target_dir); 86 | 87 | 88 | void import_ext_return_values(const std::string &module_file); 89 | 90 | 91 | /*! 92 | * \brief Returns a function return values object given by .plt address. 93 | * \return Returns a function return values object pointer 94 | * or nullptr if object does not exist. 95 | */ 96 | const FctReturnValues* get_plt_return_values_ptr(uint64_t addr) const; 97 | 98 | 99 | /*! 100 | * \brief Returns a function return values object given by module name and 101 | * function address. 102 | * \return Returns a function return values object pointer 103 | * or nullptr if object does not exist. 104 | */ 105 | const FctReturnValues* get_ext_return_values_ptr( 106 | const std::string &module_name, 107 | uint64_t func_addr) const; 108 | 109 | 110 | /*! 111 | * \brief Returns a copy of all return values objects. 112 | * \return Returns a copy of all return values objects. 113 | */ 114 | ExtReturnValues get_return_values() const; 115 | 116 | 117 | /*! 118 | * \brief Finalizes the external return values structures. 119 | * 120 | * This function finalizes the external return values structures. 121 | * It can only be used 122 | * once all external return values files are imported via the 123 | * `import_ext_return_values` function. 124 | * After `finalize` was executed, no changes to the external return values 125 | * structures are possible. 126 | */ 127 | void finalize_ext_return_values(); 128 | 129 | 130 | /*! 131 | * \brief Returns `true` if the external return values structure is 132 | * finalized. 133 | * \return Returns `true` if the external return values structure is 134 | * finalized. 135 | */ 136 | bool is_finalized_ext_return_values() const; 137 | 138 | }; 139 | 140 | 141 | 142 | #endif 143 | -------------------------------------------------------------------------------- /include/serialization.h: -------------------------------------------------------------------------------- 1 | #ifndef SERIALIZATION_H 2 | #define SERIALIZATION_H 3 | 4 | #include "expression.h" 5 | #include "iostream" 6 | 7 | void serialize(ExpressionPtr exp, std::ostream &output); 8 | ExpressionPtr unserialize(std::istream &input); 9 | 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /include/state.h: -------------------------------------------------------------------------------- 1 | #ifndef STATE_H 2 | #define STATE_H 3 | 4 | #include "expression.h" 5 | #include "amd64.h" 6 | #include "memory.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define arg_out 17 | 18 | /*! 19 | * \brief Type that specifies how to `std::shared_ptr` instances 20 | * shall be compared. 21 | * 22 | * As `State` uses a STL map to track the state of expressions, we want to 23 | * compare the stored objects by value in order to provide sane updates. 24 | */ 25 | struct ExpressionPtrComparison { 26 | bool operator()(const ExpressionPtr &lhs, const ExpressionPtr &rhs) const { 27 | return *lhs == *rhs; 28 | } 29 | }; 30 | 31 | using InternalState = std::unordered_map, ExpressionPtrComparison>; 33 | 34 | using InitialValues = std::map>; 35 | using Expressions = std::vector>; 36 | 37 | using kill_results = std::unordered_set, 38 | ExpressionPtrComparison>; 39 | 40 | /*! 41 | * \brief Class that represents a CPU state. 42 | * 43 | * Effectively, this class represents the side effects of a computation by 44 | * keeping track of the various entities modified during a symbolic run. It 45 | * basically wraps around a STL map and provides some convience functions for 46 | * modifying the state. 47 | * 48 | * Keys in the state are destinations (such as temporaries, registers and memory 49 | * indirections), whereas values are the (abstract) values that are written to 50 | * said destination. An assignment of the form `key -> value` is commonly called 51 | * a _binding_ (binding the value to the key expression). 52 | * 53 | * \see `InternalState` 54 | */ 55 | class State { 56 | private: 57 | static InitialValues _initial_values; 58 | InternalState _state; 59 | 60 | std::shared_ptr _unknown; 61 | 62 | public: 63 | using iterator = InternalState::iterator; 64 | using const_iterator = InternalState::const_iterator; 65 | 66 | State(bool initialize=true); 67 | State(const State&) = default; 68 | 69 | /*! 70 | * \brief Static function that returns the initial register assignment. 71 | * \return Returns a (read-only) map, with keys being register offsets and 72 | * values the corresponding `ExpressionPtr`s. 73 | * 74 | * \see `AMD64_REGISTERS` 75 | */ 76 | static const InitialValues &initial_values() { 77 | return _initial_values; 78 | } 79 | 80 | void set_initial_state(); 81 | void purge_scratch_registers(FileFormatType file_format); 82 | 83 | void merge(const State &other); 84 | void optimize(bool do_purge_unchanged=false); 85 | 86 | const Expressions get_memory_accesses() const; 87 | 88 | friend std::ostream &operator<<(std::ostream &stream, const State &state); 89 | static const std::string format_return_value(uintptr_t address); 90 | 91 | InternalState::iterator erase(const InternalState::iterator &iterator); 92 | size_t erase(const InternalState::key_type &key); 93 | 94 | bool find(const InternalState::key_type &key, 95 | arg_out InternalState::iterator &iterator); 96 | bool find(const InternalState::key_type &key, 97 | arg_out InternalState::const_iterator &iterator) const; 98 | 99 | void update(const InternalState::key_type &key, 100 | const InternalState::mapped_type &value); 101 | 102 | private: 103 | static const std::string format_initial_value(size_t offset); 104 | 105 | bool optimizer(bool do_purge_unchanged=false); 106 | void optimize_entries(); 107 | 108 | bool propagate(); 109 | 110 | bool purge_unchanged(); 111 | bool purge_uninteresting(); 112 | 113 | kill_results kill_helper(const ExpressionPtr &key, 114 | const ExpressionPtr &value); 115 | void kill(const ExpressionPtr &key, const ExpressionPtr &value); 116 | }; 117 | 118 | #endif // STATE_H 119 | -------------------------------------------------------------------------------- /include/translator.h: -------------------------------------------------------------------------------- 1 | #ifndef TRANSLATOR_H 2 | #define TRANSLATOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "vex.h" 12 | #include "function.h" 13 | #include "dump_file.h" 14 | #include "mapped_elf.h" 15 | #include "mapped_pe.h" 16 | #include "memory.h" 17 | 18 | extern "C" { 19 | #include 20 | } 21 | 22 | 23 | /*! 24 | * \brief Class responsible for translating functions into VEX blocks. 25 | * 26 | * This class takes CFG descriptions as generated by the IDAPython exporter 27 | * script and information about non-returning functions and uses both to 28 | * generate `Function` instances. Basic blocks are mapped to VEX basic 29 | * blocks of type IRSB. 30 | */ 31 | class Translator { 32 | private: 33 | /* Only one VEX instance should be present (libVEX seems to fail if 34 | * initialized multiple times), hence we do not want to be the sole 35 | * owner. */ 36 | Vex &_vex; 37 | 38 | DumpFile _dump_file; 39 | const Memory *_memory; 40 | 41 | std::set _seen_blocks; 42 | std::map _blocks; 43 | 44 | std::map _functions; 45 | 46 | FileFormatType _file_format; 47 | 48 | mutable std::mutex _mutex; 49 | 50 | public: 51 | Translator(Vex &vex, const std::string &file, FileFormatType file_format, 52 | bool parse_on_demand=true); 53 | 54 | const Function &get_function(const uintptr_t address); 55 | const Function *maybe_get_function(const uintptr_t address); 56 | 57 | /*! 58 | * \brief Returns all functions known to the `Translator`. 59 | * \return Returns a map with the function's address as key and `Function` 60 | * object as value. 61 | */ 62 | const std::map &get_functions() const { 63 | std::lock_guard _(_mutex); 64 | 65 | return _functions; 66 | } 67 | 68 | /*! 69 | * \brief Returns the view on memory as given by mapping the ELF/PE file. 70 | * 71 | * This is mostly used for queries on the binary (such as known memory 72 | * ranges). 73 | * 74 | * \return A reference of type `Memory`. 75 | */ 76 | const Memory &get_memory() const { 77 | std::lock_guard _(_mutex); 78 | 79 | // Since memory is only once initialized in the constructor and 80 | // otherwise never changed, we assume that the pointer is always set. 81 | return *_memory; 82 | } 83 | 84 | 85 | /*! 86 | * \brief Returns the format of the file (ELF/PE => Linux/Windows). 87 | * 88 | * \return Fype format type. 89 | */ 90 | FileFormatType get_file_format() const { 91 | return _file_format; 92 | } 93 | 94 | private: 95 | bool process_block(Function &function, const BlockDescriptor &block); 96 | void finalize_block(Function &function, const BlockDescriptor &block, 97 | IRSB *block_pointer); 98 | 99 | void parse_known_functions(); 100 | void detect_tail_jumps(Function &function); 101 | 102 | Function *maybe_translate_function(const uintptr_t address); 103 | Function *translate_function(const std::pair&); 104 | 105 | Terminator get_terminator(const IRSB &block) const; 106 | }; 107 | 108 | #endif // TRANSLATOR_H 109 | -------------------------------------------------------------------------------- /include/vcall.h: -------------------------------------------------------------------------------- 1 | #ifndef VCALL_H 2 | #define VCALL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "vcall_types.h" 13 | #include "vtable_hierarchy.h" 14 | #include "vtable_file.h" 15 | 16 | class VCallFile { 17 | private: 18 | VCalls _vcalls; 19 | PossibleVCalls _possible_vcalls; 20 | 21 | const std::string &_module_name; 22 | 23 | const VTableHierarchies &_vtable_hierarchies; 24 | const VTableFile &_vtable_file; 25 | 26 | mutable std::mutex _mtx; 27 | 28 | public: 29 | 30 | VCallFile(const std::string &module_name, 31 | const VTableHierarchies &vtable_hierarchies, 32 | const VTableFile &vtable_file); 33 | 34 | /*! 35 | * \brief Returns the found virtual callsites. 36 | * \return Returns the found virtual callsites. 37 | */ 38 | const VCalls& get_vcalls() const; 39 | 40 | 41 | void add_vcall(uint64_t addr, uint32_t index, size_t entry_index); 42 | 43 | 44 | void add_possible_vcall(uint64_t addr); 45 | 46 | 47 | void export_vcalls(const std::string &target_dir); 48 | }; 49 | 50 | 51 | 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /include/vcall_types.h: -------------------------------------------------------------------------------- 1 | #ifndef VCALL_TYPES_H 2 | #define VCALL_TYPES_H 3 | 4 | #include 5 | #include 6 | 7 | struct VCall { 8 | uint64_t addr; 9 | std::unordered_set indexes; 10 | size_t entry_index; 11 | }; 12 | 13 | typedef std::vector VCalls; 14 | typedef std::unordered_set PossibleVCalls; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /include/vex.h: -------------------------------------------------------------------------------- 1 | #ifndef VEX_H 2 | #define VEX_H 3 | 4 | extern "C" { 5 | #include 6 | #include 7 | } 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | enum CONFIGURATION : size_t { 14 | MAX_INSTRUCTIONS = 100 15 | }; 16 | 17 | #define arg_out 18 | 19 | /*! 20 | * \brief Main class which acts as an interface to the linked VEX library. 21 | * 22 | * A singleton class used to interface the (patched) library libVEX. This is 23 | * done as the library does not seem to support multiple initializations. 24 | * 25 | * \todo This class merely supports x86_64 for now. 26 | */ 27 | class Vex { 28 | private: 29 | VexAbiInfo _abi_info; 30 | VexArchInfo _arch_info; 31 | VexGuestExtents _guest_extents; 32 | 33 | VexControl _control; 34 | VexTranslateArgs _args; 35 | 36 | IRSB *_block; 37 | 38 | // Consider std::set (tree size?). 39 | std::vector _allocations; 40 | 41 | public: 42 | /*! 43 | * \brief `get_instance` returns the only instance of this singleton class. 44 | * \return An instance of class `Vex`. 45 | */ 46 | static Vex &get_instance() { 47 | static Vex singleton; 48 | return singleton; 49 | } 50 | 51 | Vex(const Vex&) = delete; 52 | void operator=(const Vex&) = delete; 53 | 54 | ~Vex(); 55 | 56 | const IRSB &translate(const uint8_t *bytes, uintptr_t guest_address, 57 | size_t instruction_count=MAX_INSTRUCTIONS, 58 | arg_out uintptr_t *vex_block_end=nullptr); 59 | 60 | private: 61 | Vex(); 62 | 63 | void initialize(); 64 | void initialize_amd64(); 65 | 66 | static void __attribute__((noreturn)) failure_exit() { 67 | throw std::string("Fatal exit from libVEX."); 68 | } 69 | 70 | static void *dispatch() { 71 | return nullptr; 72 | } 73 | 74 | static unsigned int needs_self_check(void*, VexRegisterUpdates*, 75 | const VexGuestExtents*) { 76 | return 0; 77 | } 78 | 79 | static unsigned char chase_into_ok(void*, Addr) { 80 | return false; 81 | } 82 | 83 | static void log_bytes(const char *bytes, size_t number_bytes); 84 | static IRSB *instrument(void *callback_opaque, IRSB *block, 85 | const VexGuestLayout*, const VexGuestExtents*, 86 | const VexArchInfo*, IRType, IRType); 87 | 88 | void manage_allocation(void *allocation); 89 | static void incoming_allocation(void *user, void *allocation); 90 | }; 91 | 92 | #endif // VEX_H 93 | -------------------------------------------------------------------------------- /include/vtable_file.h: -------------------------------------------------------------------------------- 1 | #ifndef VTABLE_FILE_H 2 | #define VTABLE_FILE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | /*! 16 | * \brief Structure containing information about a vtable stored in the 17 | * `_vtables.txt` file. 18 | */ 19 | struct VTable { 20 | uint32_t index; 21 | uint64_t addr; 22 | int offset_to_top; 23 | std::vector entries; 24 | std::string module_name; 25 | }; 26 | 27 | 28 | typedef std::map VTableMap; 29 | typedef std::vector VTableVector; 30 | typedef std::map VTableModulesMap; 31 | typedef std::vector VTableModulesVector; 32 | 33 | 34 | /*! 35 | * \brief Class collecting the information that was produced by the IDA 36 | * exporting script. 37 | * 38 | * For a given `_vtables.txt` file (produced by the exporter). 39 | */ 40 | class VTableFile { 41 | private: 42 | VTableVector _vtables; 43 | VTableModulesVector _module_vtables; 44 | VTableModulesMap _module_vtables_map; 45 | std::set _managed_modules; 46 | uint32_t _index; 47 | 48 | std::string _this_module_name; 49 | bool _is_finalized = false; 50 | 51 | public: 52 | VTableFile(const std::string &this_module_name); 53 | 54 | 55 | /*! 56 | * \brief Returns all known vtables for this module. 57 | * \return Returns a `map` with all known vtables (address as key, 58 | * `Vtable` struct as value) for the current module. 59 | */ 60 | const VTableMap& get_this_vtables() const; 61 | 62 | 63 | /*! 64 | * \brief Returns all known vtables for the given module. 65 | * \return Returns a `map` with all known vtables (address as key, 66 | * `Vtable` struct as value) for the given module. 67 | */ 68 | const VTableMap& get_vtables(const std::string &module_name) const; 69 | 70 | 71 | /*! 72 | * \brief Returns all known vtables. 73 | * \return Returns a `vector` with all known vtables. 74 | */ 75 | const VTableVector& get_all_vtables() const; 76 | 77 | 78 | /*! 79 | * \brief Parses a given vtable file and builds internal vtable structure. 80 | */ 81 | bool parse(const std::string &vtables_file); 82 | 83 | 84 | /*! 85 | * \brief Finalizes the vtable structures. 86 | * 87 | * This function finalizes the vtable structures. It can only be used 88 | * once all vtable files are imported via the `parse` function. 89 | * After `finalize` was executed, no changes to the vtable structures 90 | * are possible. 91 | */ 92 | void finalize(); 93 | 94 | 95 | /*! 96 | * \brief Returns `true` if the vtable structure is finalized. 97 | * \return Returns `true` if the vtable structure is finalized. 98 | */ 99 | bool is_finalized() const; 100 | 101 | 102 | /*! 103 | * \brief Returns a vtable object given by module name and address. 104 | * \return Returns a vtable object. 105 | */ 106 | const VTable& get_vtable(const std::string &module_name, uint64_t addr) 107 | const; 108 | 109 | 110 | /*! 111 | * \brief Returns a vtable object given by its index. 112 | * \return Returns a vtable object. 113 | */ 114 | const VTable& get_vtable(uint32_t index) const; 115 | 116 | 117 | /*! 118 | * \brief Returns a vtable object given by module name and address. 119 | * \return Returns a vtable object pointer or nullptr if object does 120 | * not exist. 121 | */ 122 | const VTable* get_vtable_ptr(const std::string &module_name, 123 | uint64_t addr) const; 124 | 125 | }; 126 | 127 | #endif // VTABLE_FILE_H 128 | -------------------------------------------------------------------------------- /include/vtable_hierarchy.h: -------------------------------------------------------------------------------- 1 | #ifndef VTABLE_HIERARCHY_H 2 | #define VTABLE_HIERARCHY_H 3 | 4 | #include "vtable_file.h" 5 | #include "vtable_update.h" 6 | #include "external_functions.h" 7 | #include "module_plt.h" 8 | #include "state.h" 9 | #include "vcall_types.h" 10 | #include "blacklist_functions.h" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | typedef std::set DependentVTables; 20 | typedef std::vector HierarchiesVTable; 21 | 22 | 23 | #define DEBUG_WRITE_HIERARCHY_STEPS 0 24 | #define DEBUG_PRINT_DEPENDENCIES 0 25 | #define DEBUG_SEARCH_MERGING_REASON 0 26 | #define DEBUG_SEARCH_MERGING_VTABLE_ADDR_1 0xdb3148 27 | #define DEBUG_SEARCH_MERGING_VTABLE_ADDR_2 0xf0a000 28 | 29 | 30 | /*! 31 | * \brief Class holding the information about the extracted hierarchy. 32 | * 33 | * Holding the internal structure of the extracted hierarchy. Can import 34 | * already found hierarchies and add it to its structure (makes it possible 35 | * to analyze binaries in an iterative manner). Found hierarchy can be 36 | * exported into a `.hierarchy` file for further usage. 37 | */ 38 | class VTableHierarchies { 39 | private: 40 | HierarchiesVTable _hierarchies; 41 | const FileFormatType _file_format; 42 | const VTableFile &_vtable_file; 43 | const VTableMap &_this_vtables; 44 | const std::string &_module_name; 45 | 46 | const ExternalFunctions &_external_funcs; 47 | const ModulePlt &_module_plt; 48 | 49 | const BlacklistFuncsSet &_funcs_blacklist; 50 | 51 | // Only needed for debugging. 52 | const int _thread_id; 53 | 54 | #if DEBUG_WRITE_HIERARCHY_STEPS 55 | std::ofstream hierarchy_steps_file; 56 | #endif 57 | 58 | void merge_hierarchies_priv(); 59 | 60 | bool get_vtable_dependencies(const VTableUpdates &vtable_updates, 61 | const ExpressionPtr &base_base, 62 | uint32_t base_index, 63 | size_t base_offset); 64 | 65 | bool get_sub_vtable_dependencies(const VTableUpdates &vtable_updates, 66 | const ExpressionPtr &sub_base, 67 | uint32_t sub_index, 68 | size_t sub_offset); 69 | 70 | bool extract_vtable_dependencies(const VTableUpdates &vtable_updates); 71 | 72 | bool add_to_hierarchy(uint32_t vtable_1_idx, 73 | uint32_t vtable_2_idx); 74 | 75 | void update_hierarchy_priv(uint32_t vtable_1_idx, 76 | uint32_t vtable_2_idx, 77 | bool merge_hierarchy); 78 | 79 | public: 80 | VTableHierarchies(const FileFormatType file_format, 81 | const VTableFile &vtable_file, 82 | const std::string &module_name, 83 | const ExternalFunctions &external_funcs, 84 | const ModulePlt &module_plt, 85 | const BlacklistFuncsSet &funcs_blacklist, 86 | const int thread_id); 87 | 88 | 89 | /*! 90 | * \brief Merges the existing hierarchies if they can be merged. 91 | */ 92 | void merge_hierarchies(); 93 | 94 | 95 | /*! 96 | * \brief Returns the current extracted hierarchy structure. 97 | * \return Returns the current extracted hierarchy structure. 98 | */ 99 | const HierarchiesVTable& get_hierarchies() const; 100 | 101 | 102 | /*! 103 | * \brief Updates the hierarchy structure with the new given information. 104 | * 105 | * This function uses the extracted vtable updates to update 106 | * the hierarchy structure. Note that it is also using the information 107 | * which function in which module was analyzed in order to gain 108 | * the vtable update information. 109 | */ 110 | void update_hierarchy(const VTableUpdates &vtable_updates, 111 | const std::string &module_name, 112 | uint64_t func_addr, 113 | bool merge_hierarchy=true); 114 | 115 | 116 | /*! 117 | * \brief Updates the hierarchy structure with the new given information. 118 | * 119 | * This function adds both vtables given by their index into a hierarchy 120 | * (either in a new one or existing one if a dependency is already known). 121 | */ 122 | void update_hierarchy(uint32_t vtable_1_idx, 123 | uint32_t vtable_2_idx, 124 | bool merge_hierarchy=true); 125 | 126 | 127 | /*! 128 | * \brief Updates the hierarchy structure with the new given information. 129 | * 130 | * This function adds all vtable hierarchies into the existing hierarchies 131 | * (either in a new one or existing one if a dependency is already known). 132 | */ 133 | void update_hierarchy(const HierarchiesVTable& vtable_hierarchies, 134 | bool merge_hierarchy=true); 135 | 136 | 137 | /*! 138 | * \brief Exports the current hierarchy structure into a file. 139 | */ 140 | void export_hierarchy(const std::string &target_dir); 141 | 142 | 143 | /*! 144 | * \brief Imports a hierarchy from file, adds it to the current hierarchy. 145 | */ 146 | void import_hierarchy(const std::string &target_file); 147 | 148 | 149 | /*! 150 | * \brief Inter-modular check if the same function is at the same position. 151 | * 152 | * This function checks if an entry in a vtable of this module also exists 153 | * in a vtable of another module. If it does the vtables are considered 154 | * as dependent. 155 | */ 156 | void entry_heuristic_inter(); 157 | 158 | 159 | void vcall_analysis(const VCalls &vcalls); 160 | 161 | 162 | }; 163 | 164 | #endif // VTABLE_HIERARCHY_H 165 | -------------------------------------------------------------------------------- /include/vtable_update.h: -------------------------------------------------------------------------------- 1 | #ifndef VTABLE_UPDATE_H 2 | #define VTABLE_UPDATE_H 3 | 4 | #include "expression.h" 5 | #include "state.h" 6 | #include "vtable_file.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define arg_out 17 | 18 | /*! 19 | * \brief Structure containing information about a vtable overwrite 20 | * found during the analysis. 21 | */ 22 | struct VTableUpdate { 23 | size_t offset; 24 | ExpressionPtr base; 25 | uint32_t index; 26 | }; 27 | 28 | 29 | typedef std::vector VTableUpdates; 30 | typedef std::map VTableUpdatesMap; 31 | typedef std::map VTableUpdatesModuleMap; 32 | 33 | 34 | class FctVTableUpdates { 35 | private: 36 | 37 | // VTable updates made by functions of this module. 38 | VTableUpdatesMap _this_vtable_updates; 39 | VTableUpdatesModuleMap _external_vtable_updates; 40 | 41 | VTableFile &_vtable_file; 42 | const std::string &_module_name; 43 | 44 | mutable std::mutex _mtx; 45 | 46 | ExpressionPtr _rdi = State::initial_values().at(OFFB_RDI); 47 | ExpressionPtr _rsi = State::initial_values().at(OFFB_RSI); 48 | ExpressionPtr _rdx = State::initial_values().at(OFFB_RDX); 49 | ExpressionPtr _rcx = State::initial_values().at(OFFB_RCX); 50 | ExpressionPtr _r8 = State::initial_values().at(OFFB_R8); 51 | ExpressionPtr _r9 = State::initial_values().at(OFFB_R9); 52 | 53 | bool convert_expression_str(ExpressionPtr base, 54 | arg_out std::string &base_str); 55 | 56 | bool convert_str_expression(const std::string &base_str, 57 | arg_out ExpressionPtr &base); 58 | 59 | public: 60 | 61 | FctVTableUpdates(VTableFile &vtable_file, 62 | const std::string &module_name); 63 | 64 | 65 | /*! 66 | * \brief Adds vtable updates for the given function. 67 | */ 68 | void add_vtable_updates(uint64_t fct_addr, 69 | const VTableUpdates &vtable_updates); 70 | 71 | 72 | /*! 73 | * \brief Exports the vtable updates that are done by this module. 74 | */ 75 | void export_vtable_updates(const std::string &target_dir); 76 | 77 | 78 | /*! 79 | * \brief Returns all vtable updates made by a function of a 80 | * specific module. 81 | * \return Returns all vtable updates made by a function of a 82 | * specific module. 83 | */ 84 | const VTableUpdates* get_vtable_updates(const std::string &module_name, 85 | uint64_t fct_addr) const; 86 | 87 | 88 | /*! 89 | * \brief Imports a vtable update from file, 90 | * adds it to the current vtable updates. 91 | */ 92 | void import_updates(const std::string &target_file); 93 | 94 | }; 95 | 96 | #endif // VTABLE_UPDATE_H 97 | -------------------------------------------------------------------------------- /include/vtv_vcall_gt.h: -------------------------------------------------------------------------------- 1 | #ifndef VTV_VCALL_GT_H 2 | #define VTV_VCALL_GT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "expression.h" 11 | 12 | struct VTVVcall { 13 | uint64_t addr_verify_call; 14 | ExpressionPtr vtbl_obj; 15 | std::unordered_set addr_vcalls; 16 | }; 17 | 18 | typedef std::map VTVVcalls; 19 | 20 | 21 | class VTVVcallsFile { 22 | private: 23 | 24 | const std::string &_module_name; 25 | 26 | VTVVcalls _vtv_vcalls; 27 | 28 | public: 29 | 30 | VTVVcallsFile(const std::string &module_name); 31 | 32 | void add_vtv_vcalls(const VTVVcalls &vtv_vcalls); 33 | 34 | void export_vtv_vcalls(const std::string &target_dir); 35 | 36 | /*! 37 | * \brief Returns the found vtv vcalls. 38 | * \return Returns the found vtv vcalls. 39 | */ 40 | const VTVVcalls& get_vtv_vcalls() const; 41 | }; 42 | 43 | #endif // VTV_VCALL_GT_H 44 | -------------------------------------------------------------------------------- /paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUB-SysSec/Marx/3bd6f21da8defd7746ed803e008490ea4ecadc62/paper.pdf -------------------------------------------------------------------------------- /patch/heap_allocation_patch.diff: -------------------------------------------------------------------------------- 1 | Index: priv/ir_defs.c 2 | =================================================================== 3 | --- priv/ir_defs.c (revision 3203) 4 | +++ priv/ir_defs.c (working copy) 5 | @@ -1669,7 +1669,9 @@ 6 | /*--- Constructors ---*/ 7 | /*---------------------------------------------------------------*/ 8 | 9 | +Bool _allocate_on_heap = 0; 10 | 11 | + 12 | /* Constructors -- IRConst */ 13 | 14 | IRConst* IRConst_U1 ( Bool bit ) 15 | @@ -2446,6 +2448,18 @@ 16 | return dst; 17 | } 18 | 19 | +IRSB* deepCopyIRSB_Heap ( const IRSB* bb ) 20 | +{ 21 | + IRSB *bb2; 22 | + Bool previous = _allocate_on_heap; 23 | + 24 | + _allocate_on_heap = 1; 25 | + bb2 = deepCopyIRSB(bb); 26 | + 27 | + _allocate_on_heap = previous; 28 | + return bb2; 29 | +} 30 | + 31 | IRSB* deepCopyIRSB ( const IRSB* bb ) 32 | { 33 | Int i; 34 | Index: priv/main_util.c 35 | =================================================================== 36 | --- priv/main_util.c (revision 3203) 37 | +++ priv/main_util.c (working copy) 38 | @@ -39,7 +39,75 @@ 39 | #include "main_globals.h" 40 | #include "main_util.h" 41 | 42 | +#include 43 | 44 | +extern Bool _allocate_on_heap; 45 | + 46 | +typedef void (*AllocationListener)( void*, void* ); 47 | + 48 | +void vexInitialListener ( void*, void* ); 49 | +void vexInitialListener( void *userdata, void *data ) 50 | +{ 51 | + return; 52 | +} 53 | + 54 | +void * _global_listener_data = NULL; 55 | +AllocationListener _global_listener_func = &vexInitialListener; 56 | + 57 | +void LibVEX_registerAllocationListener ( void *userdata, AllocationListener listener ) 58 | +{ 59 | + _global_listener_data = userdata; 60 | + _global_listener_func = listener; 61 | +} 62 | + 63 | +void* LibVEX_Alloc_inline ( SizeT nbytes ) 64 | +{ 65 | + struct align { 66 | + char c; 67 | + union { 68 | + char c; 69 | + short s; 70 | + int i; 71 | + long l; 72 | + long long ll; 73 | + float f; 74 | + double d; 75 | + /* long double is currently not used and would increase alignment 76 | + unnecessarily. */ 77 | + /* long double ld; */ 78 | + void *pto; 79 | + void (*ptf)(void); 80 | + } x; 81 | + }; 82 | + 83 | + /* Make sure the compiler does no surprise us */ 84 | + vassert(offsetof(struct align,x) <= REQ_ALIGN); 85 | + 86 | + if(_allocate_on_heap) { 87 | + void *data = malloc(nbytes); 88 | + 89 | + _global_listener_func(_global_listener_data, data); 90 | + return data; 91 | + } 92 | + 93 | +#if 0 94 | + /* Nasty debugging hack, do not use. */ 95 | + return malloc(nbytes); 96 | +#else 97 | + HChar* curr; 98 | + HChar* next; 99 | + SizeT ALIGN; 100 | + ALIGN = offsetof(struct align,x) - 1; 101 | + nbytes = (nbytes + ALIGN) & ~ALIGN; 102 | + curr = private_LibVEX_alloc_curr; 103 | + next = curr + nbytes; 104 | + if (next >= private_LibVEX_alloc_last) 105 | + private_LibVEX_alloc_OOM(); 106 | + private_LibVEX_alloc_curr = next; 107 | + return curr; 108 | +#endif 109 | +} 110 | + 111 | /*---------------------------------------------------------*/ 112 | /*--- Storage ---*/ 113 | /*---------------------------------------------------------*/ 114 | @@ -75,6 +143,7 @@ 115 | 116 | static VexAllocMode mode = VexAllocModeTEMP; 117 | 118 | + 119 | void vexAllocSanityCheck ( void ) 120 | { 121 | vassert(temporary_first == &temporary[0]); 122 | Index: priv/main_util.h 123 | =================================================================== 124 | --- priv/main_util.h (revision 3203) 125 | +++ priv/main_util.h (working copy) 126 | @@ -122,47 +122,8 @@ 127 | boundary. */ 128 | #define REQ_ALIGN 8 129 | 130 | -static inline void* LibVEX_Alloc_inline ( SizeT nbytes ) 131 | -{ 132 | - struct align { 133 | - char c; 134 | - union { 135 | - char c; 136 | - short s; 137 | - int i; 138 | - long l; 139 | - long long ll; 140 | - float f; 141 | - double d; 142 | - /* long double is currently not used and would increase alignment 143 | - unnecessarily. */ 144 | - /* long double ld; */ 145 | - void *pto; 146 | - void (*ptf)(void); 147 | - } x; 148 | - }; 149 | +void* LibVEX_Alloc_inline ( SizeT ); 150 | 151 | - /* Make sure the compiler does no surprise us */ 152 | - vassert(offsetof(struct align,x) <= REQ_ALIGN); 153 | - 154 | -#if 0 155 | - /* Nasty debugging hack, do not use. */ 156 | - return malloc(nbytes); 157 | -#else 158 | - HChar* curr; 159 | - HChar* next; 160 | - SizeT ALIGN; 161 | - ALIGN = offsetof(struct align,x) - 1; 162 | - nbytes = (nbytes + ALIGN) & ~ALIGN; 163 | - curr = private_LibVEX_alloc_curr; 164 | - next = curr + nbytes; 165 | - if (next >= private_LibVEX_alloc_last) 166 | - private_LibVEX_alloc_OOM(); 167 | - private_LibVEX_alloc_curr = next; 168 | - return curr; 169 | -#endif 170 | -} 171 | - 172 | /* Misaligned memory access support. */ 173 | 174 | extern UInt read_misaligned_UInt_LE ( void* addr ); 175 | Index: pub/libvex_ir.h 176 | =================================================================== 177 | --- pub/libvex_ir.h (revision 3203) 178 | +++ pub/libvex_ir.h (working copy) 179 | @@ -2992,6 +2992,12 @@ 180 | /* Deep-copy an IRSB */ 181 | extern IRSB* deepCopyIRSB ( const IRSB* ); 182 | 183 | +/* Deep-copy an IRSB onto the heap */ 184 | +extern IRSB* deepCopyIRSB_Heap ( const IRSB* ); 185 | + 186 | +typedef void (*AllocationListener)( void*, void* ); 187 | +extern void LibVEX_registerAllocationListener ( void*, AllocationListener ); 188 | + 189 | /* Deep-copy an IRSB, except for the statements list, which set to be 190 | a new, empty, list of statements. */ 191 | extern IRSB* deepCopyIRSBExceptStmts ( const IRSB* ); 192 | -------------------------------------------------------------------------------- /scripts/ida_get_all_icalls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import sys 4 | 5 | from idc import * 6 | from idaapi import * 7 | from idautils import * 8 | 9 | ''' 10 | Generate a file with all indirect calls. 11 | ''' 12 | 13 | counter = 0 14 | segments = list(Segments()) 15 | icalls_set = set() 16 | 17 | for segment in segments: 18 | permissions = getseg(segment).perm 19 | if not permissions & SEGPERM_EXEC: 20 | continue 21 | 22 | print('\nProcessing segment %s.' % SegName(segment)) 23 | 24 | ea = SegStart(segment) 25 | end = SegEnd(segment) 26 | while ea < end: 27 | 28 | # Return values of GetOpType 29 | # https://www.hex-rays.com/products/ida/support/idadoc/276.shtml 30 | if (GetMnem(ea) == "call" 31 | and GetOpType(ea, 0) >= 1 32 | and GetOpType(ea, 0) <= 4): 33 | #print "0x%x - call %s" % (ea, GetOpnd(ea, 0)) 34 | icalls_set.add(ea) 35 | counter += 1 36 | 37 | ea = idc.NextHead(ea) 38 | 39 | print "Number of icalls found: %d" % counter 40 | 41 | target_file = GetInputFile() + ".icalls" 42 | with open(target_file, 'w') as fp: 43 | for icall_addr in icalls_set: 44 | fp.write("%x\n" % icall_addr) 45 | 46 | print "File written to: %s" % target_file -------------------------------------------------------------------------------- /scripts/ida_get_hierarchies_through_rtti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | 3 | import sys 4 | from idc import * 5 | from idaapi import * 6 | from idautils import * 7 | 8 | ''' 9 | Generate ground truth from RTTI values. 10 | ''' 11 | 12 | vtable_section_names = [".rodata", ".data.rel.ro", ".data.rel.ro.local"] 13 | 14 | 15 | vtables = [ ] 16 | 17 | 18 | with open(GetInputFile() + '.gt_vtables', 'r') as fp: 19 | for line in fp: 20 | vtables.append(int(line.split(" ")[0], 16)) 21 | 22 | 23 | 24 | #vtables = [0x790810, 0x7a6df0, 0x7a9570, 0x7a9590, 0x7b9930, 0x7c76f0] 25 | 26 | DEBUG = True 27 | 28 | class ClassObject: 29 | 30 | def __init__(self, name): 31 | self.name = name 32 | self.base_classes = list() 33 | 34 | 35 | def add_base_class(self, base_class): 36 | self.base_classes.append(base_class) 37 | 38 | 39 | def parse_typeinfo(rtti_ptr): 40 | 41 | in_vtable_section = False 42 | for vtable_sec in vtable_sections: 43 | if SegStart(vtable_sec) <= rtti_ptr <= SegEnd(vtable_sec): 44 | in_vtable_section = True 45 | break 46 | 47 | # Check if type info resides in extern. 48 | if not in_vtable_section: 49 | name = Name(rtti_ptr) 50 | if name == "": 51 | print "Error for type info: 0x%x" % rtti_ptr 52 | print "No name found for extern symbol." 53 | return None 54 | class_obj = ClassObject(name) 55 | return class_obj 56 | 57 | name_ptr = Qword(rtti_ptr + 0x8) 58 | name = GetString(name_ptr) 59 | 60 | if not name: 61 | print "Error for type info: 0x%x" % rtti_ptr 62 | print "No name found." 63 | return None 64 | ''' 65 | # Try to demangle name to: 66 | # `typeinfo name for'ClassName 67 | if not Demangle("__ZTS" + name, 0): 68 | print "Error for type info: 0x%x" % rtti_ptr 69 | print "Not able to demangle name: %s." % name 70 | return None 71 | ''' 72 | 73 | # Remove the number in the beginning of the name 74 | # (part of the mangled name). 75 | skip = 0 76 | for i in range(len(name)): 77 | if 48 <= ord(name[i]) <= 57: 78 | continue 79 | skip = i 80 | break 81 | name = name[skip:] 82 | 83 | 84 | class_obj = ClassObject(name) 85 | 86 | 87 | # Upper base ptr is 0 if we have multi-inheritance 88 | # (because then we have the number of base classes given in 89 | # the lower base ptr) 90 | upper_base_ptr = Dword(rtti_ptr + 0x10) 91 | if upper_base_ptr < 0x50: # multi-inheritance 92 | number_bases = Dword(rtti_ptr + 0x14) 93 | 94 | if DEBUG: 95 | print "multi" 96 | 97 | if number_bases > 100: 98 | print "Error for type info: 0x%x" % rtti_ptr 99 | print "Detected multi-inheritance but with over 100 base classes." 100 | return None 101 | 102 | for i in range(number_bases): 103 | base_ptr = Qword(rtti_ptr + 0x18 + (i*0x10)) 104 | 105 | if DEBUG: 106 | print "multi 0x%x" % base_ptr 107 | 108 | base_class = parse_typeinfo(base_ptr) 109 | if base_class: 110 | class_obj.add_base_class(base_class) 111 | 112 | else: # single-inheritance or base-class 113 | base_ptr = Qword(rtti_ptr + 0x10) 114 | 115 | is_ptr = False 116 | for vtable_sec in vtable_sections: 117 | if SegStart(vtable_sec) <= base_ptr <= SegEnd(vtable_sec): 118 | is_ptr = True 119 | break 120 | 121 | is_extern = False 122 | if SegStart(extern_section) <= base_ptr <= SegEnd(extern_section): 123 | is_extern = True 124 | 125 | if is_ptr: # single-inheritance 126 | 127 | if DEBUG: 128 | print "single" 129 | print "0x%x" % base_ptr 130 | 131 | base_class = parse_typeinfo(base_ptr) 132 | if base_class: 133 | class_obj.add_base_class(base_class) 134 | 135 | elif is_extern: # has inheritance to external module 136 | 137 | if DEBUG: 138 | print "external" 139 | print "0x%x" % base_ptr 140 | 141 | name = Name(base_ptr) 142 | if name == "": 143 | print "Error for external type info: 0x%x" % base_ptr 144 | print "No name found for extern symbol." 145 | return None 146 | if (Demangle(name, 0) and 147 | (name[:4] == "_ZTI" or name[:5] == "__ZTI")): 148 | temp = ClassObject(name) 149 | if temp: 150 | class_obj.add_base_class(temp) 151 | 152 | else: # base-class 153 | if DEBUG: 154 | print "base" 155 | print "0x%x" % base_ptr 156 | 157 | return class_obj 158 | 159 | 160 | def print_class_hierarchy(class_obj): 161 | 162 | def pretty_print(class_obj, depth): 163 | print " "*depth, 164 | print class_obj.name 165 | for base_class in class_obj.base_classes: 166 | pretty_print(base_class, depth+1) 167 | 168 | pretty_print(class_obj, 0) 169 | 170 | 171 | def convert_to_set(class_obj): 172 | hierarchy_set = set() 173 | hierarchy_set.add(class_obj.name) 174 | for base_obj in class_obj.base_classes: 175 | hierarchy_set |= convert_to_set(base_obj) 176 | return hierarchy_set 177 | 178 | 179 | segments = list(Segments()) 180 | vtable_sections = set() 181 | extern_section = None 182 | for segment in segments: 183 | if SegName(segment) in vtable_section_names: 184 | vtable_sections.add(segment) 185 | 186 | if SegName(segment) == "extern": 187 | extern_section = segment 188 | 189 | 190 | 191 | hierarchy_list = list() 192 | vtable_mapping = dict() 193 | vtable_addr_error = set() 194 | 195 | for vtable_addr in vtables: 196 | 197 | print "Processing vtable: 0x%x" % vtable_addr 198 | 199 | 200 | rtti_ptr = Qword(vtable_addr - 0x8) 201 | if rtti_ptr == 0: 202 | print "Error for vtable: 0x%x" % vtable_addr 203 | print "RTTI pointer 0. Seems not to be a vtable." 204 | vtable_addr_error.add(vtable_addr) 205 | continue 206 | 207 | class_obj = parse_typeinfo(rtti_ptr) 208 | if class_obj is None: 209 | print "Error for vtable: 0x%x" % vtable_addr 210 | print "Seems not to be a vtable." 211 | vtable_addr_error.add(vtable_addr) 212 | continue 213 | vtable_mapping[vtable_addr] = class_obj 214 | 215 | if DEBUG: 216 | print_class_hierarchy(class_obj) 217 | 218 | # Convert to hierarchy set and merge into hierarchies 219 | hierarchy_set = convert_to_set(class_obj) 220 | is_merged = False 221 | i = 0 222 | while i < len(hierarchy_list): 223 | if hierarchy_list[i].isdisjoint(hierarchy_set): 224 | i += 1 225 | continue 226 | 227 | hierarchy_list[i] |= hierarchy_set 228 | is_merged = True 229 | break 230 | if not is_merged: 231 | hierarchy_list.append(hierarchy_set) 232 | 233 | 234 | # Replace vtable names with vtable addresses. 235 | for hierarchy_set in hierarchy_list: 236 | for name in list(hierarchy_set): 237 | was_added = False 238 | for k,v in vtable_mapping.iteritems(): 239 | if name == v.name: 240 | was_added = True 241 | hierarchy_set.add(k) 242 | if was_added: 243 | hierarchy_set.remove(name) 244 | ''' 245 | for hierarchy_set in hierarchy_list: 246 | for k,v in vtable_mapping.iteritems(): 247 | if v.name in hierarchy_set: 248 | hierarchy_set.remove(v.name) 249 | hierarchy_set.add(k) 250 | ''' 251 | 252 | # Merge hierarchies 253 | i = 0 254 | while i < len(hierarchy_list): 255 | 256 | is_merged = False 257 | j = i + 1 258 | while j < len(hierarchy_list): 259 | 260 | if hierarchy_list[i].isdisjoint(hierarchy_list[j]): 261 | j += 1 262 | continue 263 | 264 | hierarchy_list[j] |= hierarchy_list[i] 265 | is_merged = True 266 | break 267 | 268 | if is_merged: 269 | hierarchy_list.remove(hierarchy_list[i]) 270 | else: 271 | i += 1 272 | 273 | # Sanity check if all vtable addresses are in the hierarchy. 274 | for k,v in vtable_mapping.iteritems(): 275 | found = False 276 | for hierarchy_set in hierarchy_list: 277 | if k in hierarchy_set: 278 | found = True 279 | break 280 | if not found: 281 | print "Error: Can not find vtable address 0x%x in hierarchies." % k 282 | 283 | ''' 284 | DEBUG 285 | print hierarchy_list 286 | sys.exit(0) 287 | #''' 288 | 289 | not_complete_hierarchies = list() 290 | with open(GetInputFile() + '.gt_hierarchy', 'w') as fp: 291 | for hierarchy_set in hierarchy_list: 292 | has_written = False 293 | is_complete = True 294 | for vtable in hierarchy_set: 295 | if isinstance(vtable, int): 296 | fp.write("%x " % vtable) 297 | has_written = True 298 | else: 299 | 300 | ''' 301 | temp = Demangle(vtable, 0) 302 | if temp: 303 | fp.write("%s " % temp) 304 | else: 305 | fp.write("%s " % vtable) 306 | ''' 307 | 308 | is_complete = False 309 | if has_written: 310 | fp.write("\n") 311 | if not is_complete: 312 | not_complete_hierarchies.append(hierarchy_set) 313 | 314 | 315 | 316 | 317 | if vtable_addr_error: 318 | print "The following vtable addresses created errors:" 319 | for vtable_addr in vtable_addr_error: 320 | print "0x%x" % vtable_addr 321 | else: 322 | print "No vtable errors." 323 | 324 | 325 | if not_complete_hierarchies: 326 | print "The following hierarchies are not complete:" 327 | for hierarchy_set in not_complete_hierarchies: 328 | print "Hierarchy:" 329 | for vtable in hierarchy_set: 330 | if isinstance(vtable, int): 331 | print "0x%x" % vtable 332 | else: 333 | temp = Demangle(vtable, 0) 334 | if temp: 335 | print temp 336 | else: 337 | print vtable 338 | print "" 339 | else: 340 | print "All hierarchies complete." 341 | 342 | -------------------------------------------------------------------------------- /scripts/ida_has_refs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | 3 | from idc import * 4 | from idaapi import * 5 | from idautils import * 6 | 7 | missing = [0x7dfa30] 8 | 9 | def xrefs(m): 10 | return [x.frm for x in XrefsTo(m)] 11 | 12 | print "" 13 | 14 | for i, m in enumerate(missing): 15 | a = xrefs(m) 16 | print('%03i %08x %d %s' \ 17 | % (i, m, len(a), ' '.join('%08x' % x for x in a))) 18 | sum([int(len(xrefs(m)) == 0) for m in missing]) -------------------------------------------------------------------------------- /scripts/ida_win_find_blacklist_functions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import sys 4 | 5 | from idc import * 6 | from idaapi import * 7 | from idautils import * 8 | 9 | from struct import pack 10 | from ctypes import c_uint32, c_uint64 11 | import subprocess 12 | 13 | base = get_imagebase() 14 | plt_start, plt_end = 0, 0 15 | segments = list(Segments()) 16 | 17 | # C++ configuration 18 | dump_vtables = True 19 | vtable_section_names = [".rodata", 20 | ".data.rel.ro", 21 | ".data.rel.ro.local", 22 | ".rdata"] 23 | 24 | # global variables that are needed for multiple C++ algorithms 25 | if dump_vtables: 26 | extern_seg = None 27 | extern_start = 0 28 | extern_end = 0 29 | text_seg = None 30 | text_start = 0 31 | text_end = 0 32 | plt_seg = None 33 | plt_start = 0 34 | plt_end = 0 35 | got_seg = None 36 | got_start = 0 37 | got_end = 0 38 | idata_seg = None 39 | idata_start = 0 40 | idata_end = 0 41 | vtable_sections = list() 42 | for segment in segments: 43 | if SegName(segment) == "extern": 44 | extern_seg = segment 45 | extern_start = SegStart(extern_seg) 46 | extern_end = SegEnd(extern_seg) 47 | elif SegName(segment) == ".text": 48 | text_seg = segment 49 | text_start = SegStart(text_seg) 50 | text_end = SegEnd(text_seg) 51 | elif SegName(segment) == ".plt": 52 | plt_seg = segment 53 | plt_start = SegStart(plt_seg) 54 | plt_end = SegEnd(plt_seg) 55 | elif SegName(segment) == ".got": 56 | got_seg = segment 57 | got_start = SegStart(got_seg) 58 | got_end = SegEnd(got_seg) 59 | elif SegName(segment) == ".idata": 60 | idata_seg = segment 61 | idata_start = SegStart(idata_seg) 62 | idata_end = SegEnd(idata_seg) 63 | elif SegName(segment) in vtable_section_names: 64 | vtable_sections.append(segment) 65 | 66 | def main(): 67 | 68 | for func in Functions(): 69 | 70 | flow = list(FlowChart(get_func(func))) 71 | if len(flow) == 1: 72 | block = flow[0] 73 | block_start = block.startEA 74 | block_end = block.endEA 75 | 76 | address = block_start 77 | counter = 0 78 | is_zero_xor = False 79 | has_ret = False 80 | has_mov = False 81 | while address != BADADDR and address < block_end: 82 | 83 | mnem = GetMnem(address) 84 | if mnem == "xor": 85 | if GetOpnd(address, 0) == GetOpnd(address, 1): 86 | is_zero_xor = True 87 | elif mnem == "retn": 88 | has_ret = True 89 | elif mnem == "mov": 90 | # Check if second is constant 91 | if GetOpType(address, 1) == 5: 92 | has_mov = True 93 | 94 | counter += 1 95 | address = NextHead(address) 96 | 97 | if counter == 2 and is_zero_xor and has_ret: 98 | print "%x Ignore XOR func" % func 99 | 100 | elif counter == 1 and has_ret: 101 | print "%x Ignore RETN func" % func 102 | 103 | elif counter == 2 and has_mov and has_ret: 104 | print "%x Ignore MOV func" % func 105 | 106 | if __name__ == '__main__': 107 | main() -------------------------------------------------------------------------------- /scripts/ida_win_get_hierarchies_through_rtti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | 3 | import sys 4 | from idc import * 5 | from idaapi import * 6 | from idautils import * 7 | 8 | ''' 9 | Generate ground truth from RTTI values. 10 | ''' 11 | 12 | vtable_section_names = [".rdata"] 13 | 14 | # Get all vtables through the symbols. 15 | vtable_symbols = [] 16 | for name_tuple in Names(): 17 | temp = Demangle(name_tuple[1], 8) 18 | if not temp: 19 | continue 20 | if "vftable" in temp: 21 | vtable_symbols.append(name_tuple) 22 | 23 | vtables = [] 24 | for vtable_tuple in vtable_symbols: 25 | vtables.append(vtable_tuple[0]) 26 | 27 | with open(GetInputFile() + '.gt_vtables', 'w') as fp: 28 | for vtable_tuple in vtable_symbols: 29 | fp.write("%x %s\n" % (vtable_tuple[0], Demangle(vtable_tuple[1], 8))) 30 | 31 | #vtables = [0xDD0148, 0x0DD0108] 32 | 33 | DEBUG = False 34 | 35 | class ClassObject: 36 | 37 | def __init__(self, name): 38 | self.name = name 39 | self.base_classes = list() 40 | 41 | 42 | def add_base_class(self, base_class): 43 | self.base_classes.append(base_class) 44 | 45 | 46 | def parse_typeinfo(rtti_ptr): 47 | 48 | def get_name_type_descr(type_descr): 49 | # Extract name of vtable (skip *pVFTable, void *). 50 | name_ptr = type_descr + 0x10 51 | name = GetString(name_ptr) 52 | return name 53 | 54 | # Skip signature, offset, cdOffset (each 4 bytes). 55 | # NOTE: This only works if the idb is rebased to 0x0 as image base. 56 | type_descr_ptr = rtti_ptr + 0xc 57 | class_hier_descr_ptr = rtti_ptr + 0x10 58 | 59 | type_descr = Dword(type_descr_ptr) 60 | class_hier_descr = Dword(class_hier_descr_ptr) 61 | 62 | name = get_name_type_descr(type_descr) 63 | class_obj = ClassObject(name) 64 | 65 | num_bases_ptr = class_hier_descr + 0x8 66 | num_bases = Dword(num_bases_ptr) 67 | 68 | if num_bases > 100: 69 | print "Error? Class %s has more than 100 base classes." % name 70 | return None 71 | 72 | elif num_bases > 0: 73 | base_array_ptr = class_hier_descr + 0xc 74 | base_array = Dword(base_array_ptr) 75 | 76 | temp_ptr = base_array 77 | for i in range(num_bases): 78 | base_descr = Dword(temp_ptr) 79 | base_type_descr = Dword(base_descr) 80 | base_name = get_name_type_descr(base_type_descr) 81 | 82 | if base_name != name: 83 | base_class_obj = ClassObject(base_name) 84 | class_obj.add_base_class(base_class_obj) 85 | temp_ptr += 0x4 86 | 87 | return class_obj 88 | 89 | 90 | def print_class_hierarchy(class_obj): 91 | 92 | def pretty_print(class_obj, depth): 93 | print " "*depth, 94 | print class_obj.name 95 | for base_class in class_obj.base_classes: 96 | pretty_print(base_class, depth+1) 97 | 98 | pretty_print(class_obj, 0) 99 | 100 | 101 | def convert_to_set(class_obj): 102 | hierarchy_set = set() 103 | hierarchy_set.add(class_obj.name) 104 | for base_obj in class_obj.base_classes: 105 | hierarchy_set |= convert_to_set(base_obj) 106 | return hierarchy_set 107 | 108 | 109 | # Abort if image base is not 0 110 | if get_imagebase() != 0x0: 111 | print "Image base has to be 0x0." 112 | 113 | else: 114 | hierarchy_list = list() 115 | vtable_mapping = dict() 116 | vtable_addr_error = set() 117 | 118 | for vtable_addr in vtables: 119 | 120 | print "Processing vtable: 0x%x" % vtable_addr 121 | 122 | # We assume that RTTI is always available 123 | # since MSVC reuses this field otherwise if it is not added. 124 | rtti_ptr = Qword(vtable_addr - 0x8) 125 | 126 | class_obj = parse_typeinfo(rtti_ptr) 127 | if class_obj is None: 128 | print "Error for vtable: 0x%x" % vtable_addr 129 | print "Seems not to be a vtable." 130 | vtable_addr_error.add(vtable_addr) 131 | continue 132 | vtable_mapping[vtable_addr] = class_obj 133 | 134 | if DEBUG: 135 | print_class_hierarchy(class_obj) 136 | 137 | # Convert to hierarchy set and merge into hierarchies 138 | hierarchy_set = convert_to_set(class_obj) 139 | is_merged = False 140 | i = 0 141 | while i < len(hierarchy_list): 142 | if hierarchy_list[i].isdisjoint(hierarchy_set): 143 | i += 1 144 | continue 145 | 146 | hierarchy_list[i] |= hierarchy_set 147 | is_merged = True 148 | break 149 | if not is_merged: 150 | hierarchy_list.append(hierarchy_set) 151 | 152 | # Replace vtable names with vtable addresses. 153 | for hierarchy_set in hierarchy_list: 154 | for name in list(hierarchy_set): 155 | was_added = False 156 | for k,v in vtable_mapping.iteritems(): 157 | if name == v.name: 158 | was_added = True 159 | hierarchy_set.add(k) 160 | if was_added: 161 | hierarchy_set.remove(name) 162 | 163 | # Merge hierarchies 164 | i = 0 165 | while i < len(hierarchy_list): 166 | 167 | is_merged = False 168 | j = i + 1 169 | while j < len(hierarchy_list): 170 | 171 | if hierarchy_list[i].isdisjoint(hierarchy_list[j]): 172 | j += 1 173 | continue 174 | 175 | hierarchy_list[j] |= hierarchy_list[i] 176 | is_merged = True 177 | break 178 | 179 | if is_merged: 180 | hierarchy_list.remove(hierarchy_list[i]) 181 | else: 182 | i += 1 183 | 184 | # Sanity check if all vtable addresses are in the hierarchy. 185 | for k,v in vtable_mapping.iteritems(): 186 | found = False 187 | for hierarchy_set in hierarchy_list: 188 | if k in hierarchy_set: 189 | found = True 190 | break 191 | if not found: 192 | print "Error: Can not find vtable address 0x%x in hierarchies." % k 193 | 194 | ''' 195 | DEBUG 196 | print hierarchy_list 197 | sys.exit(0) 198 | #''' 199 | 200 | not_complete_hierarchies = list() 201 | with open(GetInputFile() + '.gt_hierarchy', 'w') as fp: 202 | for hierarchy_set in hierarchy_list: 203 | has_written = False 204 | is_complete = True 205 | for vtable in hierarchy_set: 206 | if isinstance(vtable, int) or isinstance(vtable, long): 207 | fp.write("%x " % vtable) 208 | has_written = True 209 | else: 210 | 211 | ''' 212 | temp = Demangle(vtable, 0) 213 | if temp: 214 | fp.write("%s " % temp) 215 | else: 216 | fp.write("%s " % vtable) 217 | ''' 218 | 219 | is_complete = False 220 | if has_written: 221 | fp.write("\n") 222 | if not is_complete: 223 | not_complete_hierarchies.append(hierarchy_set) 224 | 225 | if vtable_addr_error: 226 | print "The following vtable addresses created errors:" 227 | for vtable_addr in vtable_addr_error: 228 | print "0x%x" % vtable_addr 229 | else: 230 | print "No vtable errors." 231 | 232 | if not_complete_hierarchies: 233 | print "The following hierarchies are not complete:" 234 | for hierarchy_set in not_complete_hierarchies: 235 | print "Hierarchy:" 236 | for vtable in hierarchy_set: 237 | if isinstance(vtable, int) or isinstance(vtable, long): 238 | print "0x%x" % vtable 239 | else: 240 | temp = Demangle(vtable, 8) 241 | if temp: 242 | print temp 243 | else: 244 | print vtable 245 | print "" 246 | else: 247 | print "All hierarchies complete." -------------------------------------------------------------------------------- /src/base_analysis.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "base_analysis.h" 3 | #include "amd64.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | /*! 14 | * \brief Constructs a new analysis on function `function`. 15 | * 16 | * The state at function entry is initialized to default values. 17 | * 18 | * \param function The function on which the analysis is run. 19 | */ 20 | BaseAnalysis::BaseAnalysis(const Function &function, 21 | FileFormatType file_format) 22 | : _function(function), 23 | _file_format(file_format), 24 | _current_return_value(nullptr) { 25 | 26 | _initial_state.set_initial_state(); 27 | } 28 | 29 | /*! 30 | * \brief Constructs a new analysis on function `function`, setting the state 31 | * at function entry to the specified state `initial_state`. 32 | * 33 | * \param function The function on which the analysis is run. 34 | * \param initial_state The state that should be set on function entry. 35 | */ 36 | BaseAnalysis::BaseAnalysis(const Function &function, 37 | const State &initial_state, 38 | FileFormatType file_format) 39 | : BaseAnalysis(function, file_format) { 40 | _initial_state = initial_state; 41 | } 42 | 43 | /*! 44 | * \brief Runs the analysis. 45 | * 46 | * The traversal callback `BaseAnalysis::on_traversal` also handles 47 | * updates on the state across function calls. Currently, System V is assumed 48 | * per default. 49 | * 50 | * \see `BaseAnalysis::on_traversal` 51 | * 52 | * \return Always returns `true`. 53 | * \todo Better use for return value? Also generalize calling convention. 54 | */ 55 | bool BaseAnalysis::obtain() { 56 | auto block_callback = [&](void *self_pointer, const Path &path, 57 | const Block &block) -> bool { 58 | 59 | BaseAnalysis &self = *reinterpret_cast(self_pointer); 60 | return self.on_traversal(path, block); 61 | }; 62 | 63 | auto block_predicate = [&](void *self_pointer, const Block &block) -> bool { 64 | BaseAnalysis &self = *reinterpret_cast(self_pointer); 65 | return self.block_predicate(block); 66 | }; 67 | 68 | auto path_callback = [&](void *self_pointer, const Path &path) { 69 | BaseAnalysis &self = *reinterpret_cast(self_pointer); 70 | self.path_traversed(path); 71 | }; 72 | 73 | pre_traversal(); 74 | auto result = _function.traverse(block_callback, block_predicate, 75 | path_callback, this); 76 | post_traversal(); 77 | 78 | _states.clear(); 79 | return result; 80 | } 81 | 82 | bool BaseAnalysis::on_traversal(const Path &path, const Block &block) { 83 | State new_state; 84 | 85 | // Get hold of the previous state set on the path. 86 | if(path.empty()) { 87 | new_state = _initial_state; 88 | } else { 89 | Path preceding_path(path.cbegin(), path.cend() - 1); 90 | const auto &preceding_state = _states[preceding_path]; 91 | new_state = preceding_state; 92 | 93 | // Handle side-effects as caused by the calling convention used. 94 | const auto &side_effect = _side_effects.find(preceding_path); 95 | if(side_effect != _side_effects.cend()) { 96 | new_state.purge_scratch_registers(_file_format); 97 | new_state.merge(side_effect->second); 98 | } 99 | } 100 | 101 | bool is_call = false; 102 | switch(block.get_terminator().type) { 103 | case TerminatorCall: 104 | case TerminatorCallUnresolved: { 105 | 106 | auto formatted = State::format_return_value(block.get_address()); 107 | _current_return_value = make_shared(formatted); 108 | 109 | is_call = true; 110 | break; 111 | } 112 | 113 | default: 114 | _current_return_value = nullptr; 115 | break; 116 | } 117 | 118 | // Actually compute the new semantics. 119 | block.retrieve_semantics(new_state); 120 | bool continue_path = in_traversal(path, block, new_state); 121 | 122 | // Handle calls specially as they introduce side-effects. 123 | if(is_call) { 124 | State::iterator needle; 125 | if(new_state.find(register_rip, needle)) { 126 | // Construct an empty state which will contain side-effects only. 127 | State side_effects(false); 128 | 129 | side_effects.update(register_rax, _current_return_value); 130 | _side_effects[path] = side_effects; 131 | } 132 | } 133 | 134 | // Keep the state when hitting either a return instruction or a tail jump. 135 | const auto &terminator = block.get_terminator(); 136 | 137 | if(terminator.is_tail || terminator.type == TerminatorReturn) { 138 | new_state.erase(register_rip); 139 | new_state.purge_scratch_registers(_file_format); 140 | 141 | _semantics.push_back(new_state); 142 | } else { 143 | _states[path] = new_state; 144 | } 145 | 146 | return continue_path; 147 | } 148 | -------------------------------------------------------------------------------- /src/blacklist_functions.cpp: -------------------------------------------------------------------------------- 1 | #include "blacklist_functions.h" 2 | 3 | using namespace std; 4 | 5 | const BlacklistFuncsSet import_blacklist_funcs(const string &target_file) { 6 | 7 | ifstream file(target_file + "_funcs_blacklist.txt"); 8 | if(!file) { 9 | throw runtime_error("Opening function blacklist file failed."); 10 | } 11 | 12 | string line; 13 | 14 | // Parse first line manually. 15 | getline(file, line); 16 | istringstream header_parser(line); 17 | 18 | // First entry of file is always the module name. 19 | string import_module_name; 20 | header_parser >> import_module_name; 21 | if(header_parser.fail()) { 22 | throw runtime_error("Parsing function blacklist file failed."); 23 | } 24 | 25 | BlacklistFuncsSet blacklist_set; 26 | 27 | while(getline(file, line)) { 28 | istringstream parser(line); 29 | uint64_t func_addr = 0; 30 | 31 | parser >> hex >> func_addr; 32 | if(parser.fail()) { 33 | throw runtime_error("Parsing function blacklist file failed."); 34 | } 35 | 36 | blacklist_set.insert(func_addr); 37 | } 38 | 39 | return blacklist_set; 40 | } 41 | -------------------------------------------------------------------------------- /src/block.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "block.h" 3 | #include "block_semantics.h" 4 | 5 | /*! 6 | * \brief Constructs a `Block` object. 7 | * \param address Virtual address the block lies at. 8 | * \param block Pointer to an `IRSB` VEX block. 9 | * \param terminator Description of the block's terminator. 10 | */ 11 | Block::Block(uintptr_t address, IRSB *block, const Terminator &terminator) 12 | : _address(address), _vex_block(block), _terminator(terminator) { 13 | } 14 | 15 | /*! 16 | * \brief Retrieves the block's semantics using an instance of `BlockSemantics`. 17 | * 18 | * \todo For now, there is no easy way to sub-class how the semantics are 19 | * retrieved, this should change by allowing custom semantic extractors. 20 | * 21 | * \param[in,out] state Initial state as used when computing the semantics. This 22 | * is updated with the resulting state which reflects the block's semantics. 23 | */ 24 | void Block::retrieve_semantics(State &state) const { 25 | BlockSemantics semantics(*this, state); 26 | state = semantics.get_state(); 27 | } 28 | 29 | /*! 30 | * \brief get_last_address 31 | * \return Returns the block's last virtual address 32 | * or 0 in case of an error. 33 | */ 34 | uint64_t Block::get_last_address() const { 35 | for(int i = _vex_block->stmts_used - 1; i >= 0; --i) { 36 | const auto ¤t = *_vex_block->stmts[i]; 37 | if(current.tag == Ist_IMark) { 38 | const auto &temp = current.Ist.IMark; 39 | return temp.addr; 40 | } 41 | } 42 | 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /src/dump_file.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dump_file.h" 3 | 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | /*! 10 | * \brief Constructs a new `DumpFile` object. 11 | * \param dump_file The filename of the `.dmp` file (as produced by the exporter 12 | * script). 13 | * 14 | * Optionally tries to parse `.dmp.no-return` as well. 15 | */ 16 | DumpFile::DumpFile(const string &dump_file) { 17 | if(!parse(dump_file)) { 18 | throw runtime_error("Cannot parse function dump file " + dump_file + 19 | "."); 20 | } 21 | 22 | parse_no_return(dump_file + ".no-return"); 23 | } 24 | 25 | bool DumpFile::parse(const string &dump_file) { 26 | // FIXME: Comment on dump file structure. 27 | _functions.clear(); 28 | 29 | ifstream file(dump_file.c_str(), ios::binary); 30 | if(!file) { 31 | return false; 32 | } 33 | 34 | uint64_t image_base = 0; 35 | if(!file.read(reinterpret_cast(&image_base), sizeof(image_base))) { 36 | return false; 37 | } 38 | 39 | uint32_t function_count = 0; 40 | if(!file.read(reinterpret_cast(&function_count), 41 | sizeof(function_count))) { 42 | return false; 43 | } 44 | 45 | for(auto i = 0u; i < function_count; ++i) { 46 | uint32_t function_rva = 0; 47 | if(!file.read(reinterpret_cast(&function_rva), 48 | sizeof(function_rva))) { 49 | return false; 50 | } 51 | 52 | uint16_t block_count = 0; 53 | if(!file.read(reinterpret_cast(&block_count), 54 | sizeof(block_count))) { 55 | return false; 56 | } 57 | 58 | uint64_t function_base = image_base + function_rva; 59 | _functions[function_base] = FunctionBlocks(); 60 | 61 | FunctionBlocks &blocks = _functions[function_base]; 62 | for(auto j = 0u; j < block_count; ++j) { 63 | 64 | uint32_t block_rva = 0; 65 | if(!file.read(reinterpret_cast(&block_rva), 66 | sizeof(block_rva))) { 67 | return false; 68 | } 69 | 70 | uint32_t block_size = 0; 71 | if(!file.read(reinterpret_cast(&block_size), 72 | sizeof(block_size))) { 73 | return false; 74 | } 75 | 76 | uint16_t instruction_count = 0; 77 | if(!file.read(reinterpret_cast(&instruction_count), 78 | sizeof(instruction_count))) { 79 | return false; 80 | } 81 | 82 | BlockDescriptor block; 83 | block.block_start = image_base + block_rva; 84 | block.block_end = block.block_start + block_size; 85 | block.instruction_count = instruction_count; 86 | 87 | blocks.push_back(block); 88 | } 89 | } 90 | 91 | return true; 92 | } 93 | 94 | bool DumpFile::parse_no_return(const string &no_return_file) { 95 | _functions_no_return.clear(); 96 | 97 | ifstream file(no_return_file.c_str()); 98 | if(!file) { 99 | return false; 100 | } 101 | 102 | string line; 103 | while(getline(file, line)) { 104 | uintptr_t current; 105 | istringstream parser(line); 106 | 107 | parser >> hex >> current; 108 | if(parser.fail()) { 109 | return false; 110 | } 111 | 112 | _functions_no_return.insert(current); 113 | } 114 | 115 | return true; 116 | } 117 | -------------------------------------------------------------------------------- /src/expression.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "expression.h" 3 | 4 | using namespace std; 5 | 6 | bool Operation::optimizer() { 7 | /* Operations are the only expressions that could possibly be 8 | * ambiguous. We need to make sure to sanitize and optimize it as for 9 | * the state updating logic to be reasonable. */ 10 | 11 | /* This is done in-place (as to avoid having to return a new shared_ptr 12 | * everytime). If an operation can reduced to one argument (e.g., 1+2), 13 | * it should be transformed into (result + 0), such that it can be 14 | * pruned later (e.g., 3+0). 15 | * 16 | * Make sure that all destructive updates on sub-expressions are immutable, 17 | * i.e., yield new objects that reflect the changes. This is done in order 18 | * not to propagate changes to other expressions referencing the modified 19 | * expression. 20 | * 21 | * TODO: Verify that there are not any destructive expression updates on 22 | * anything not covered here (optimizer?). Expression::propagate 23 | * should not be affected. 24 | * TODO: Consider shared_ptr.reset. 25 | * TODO: Arithmetic simplifications do not care for signedness or 26 | * operand size. 27 | */ 28 | 29 | /* TODO: Do we want to allow basic sanitization first (i.e., without 30 | * optimization)? 31 | */ 32 | _changed = has_changed(); 33 | if(!_changed) { 34 | return false; 35 | } 36 | 37 | bool dirty = false; 38 | sanitize(); 39 | 40 | if(_operation == OperationSub) { 41 | // (X - X) = (0 + 0). 42 | if(*_lhs == *_rhs) { 43 | _lhs = std::make_shared(0); 44 | _rhs = std::make_shared(0); 45 | 46 | _operation = OperationAdd; 47 | dirty = true; 48 | } 49 | 50 | // (const_a - const_b) = (const_c - 0). 51 | else if(_lhs->type() == _rhs->type() && 52 | _lhs->type() == ExpressionConstant) { 53 | 54 | auto &lhs = static_cast(*_lhs); 55 | auto &rhs = static_cast(*_rhs); 56 | 57 | if(rhs.value()) { 58 | _lhs = make_shared(lhs.value() - rhs.value()); 59 | _rhs = make_shared(0); 60 | 61 | dirty = true; 62 | } 63 | } 64 | } else if(_operation == OperationAdd) { 65 | // (const_a + const_b) = (const_c + 0). 66 | 67 | // TODO: Generalize this for other operators. 68 | if(_lhs->type() == _rhs->type() && 69 | _lhs->type() == ExpressionConstant) { 70 | 71 | auto &lhs = static_cast(*_lhs); 72 | auto &rhs = static_cast(*_rhs); 73 | 74 | if(lhs.value() && rhs.value()) { 75 | _lhs = make_shared(lhs.value() + rhs.value()); 76 | _rhs = make_shared(0); 77 | 78 | dirty = true; 79 | } 80 | } 81 | } 82 | 83 | auto is_add_or_sub = [&](const OperationType &op) { 84 | return op == OperationAdd || op == OperationSub; 85 | }; 86 | 87 | if(is_add_or_sub(_operation)) { 88 | // (X +- const_a), const_a > UINT64_MAX = (X +- (-const_a)). 89 | if(_rhs->type() == ExpressionConstant && 90 | static_cast(*_rhs).value() > 91 | UINT64_MAX / 2 + 1) { 92 | 93 | switch(_operation) { 94 | case OperationAdd: 95 | _operation = OperationSub; 96 | break; 97 | 98 | case OperationSub: 99 | _operation = OperationAdd; 100 | break; 101 | 102 | default: 103 | __builtin_unreachable(); 104 | } 105 | 106 | auto &rhs = static_cast(*_rhs); 107 | _rhs = make_shared(-rhs.value()); 108 | 109 | dirty = true; 110 | } 111 | 112 | // ((X +- const_1) +- const_2) = (X +- const_3). 113 | if(_lhs->type() == ExpressionOperation && 114 | _rhs->type() == ExpressionConstant) { 115 | 116 | const auto &lhs = static_cast(*_lhs); 117 | auto &rhs = static_cast(*_rhs); 118 | 119 | if(lhs._rhs->type() == ExpressionConstant) { 120 | _lhs = lhs._lhs; // TODO: Copy here? 121 | auto value = static_cast(*lhs._rhs).value(); 122 | 123 | bool inner = lhs._operation == OperationAdd; 124 | bool outer = _operation == OperationAdd; 125 | 126 | if(inner != outer) { 127 | value -= rhs.value(); 128 | } else { 129 | value += rhs.value(); 130 | } 131 | 132 | _operation = lhs._operation; 133 | _rhs = make_shared(value); 134 | 135 | dirty = true; 136 | } 137 | } 138 | } 139 | 140 | _changed = dirty; 141 | 142 | sanitize(); 143 | return dirty; 144 | } 145 | 146 | void Operation::sanitize() { 147 | _lhs->optimize(); 148 | _rhs->optimize(); 149 | 150 | switch(_operation) { 151 | case OperationSub: 152 | // Non-commutative, nothing we can do about that. 153 | return; 154 | 155 | default: 156 | break; 157 | } 158 | 159 | // Highest precedence on LHS. 160 | if(_lhs->type() < _rhs->type()) { 161 | _lhs.swap(_rhs); 162 | } 163 | 164 | // On equal types, decide using operator<. 165 | if(_lhs->type() == _rhs->type()) { 166 | if(*_lhs < *_rhs) { 167 | _lhs.swap(_rhs); 168 | } 169 | } 170 | } 171 | 172 | bool Expression::operation_equal(const Expression &other) const { 173 | /* All we do here is assert that (x +- 0) == x remains true. 174 | * FIXME: Integrate this better. */ 175 | 176 | const auto &operation = static_cast(*this); 177 | return operation.equals_inner(other); 178 | } 179 | -------------------------------------------------------------------------------- /src/external_functions.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "external_functions.h" 3 | 4 | 5 | using namespace std; 6 | 7 | 8 | bool ExternalFunctions::is_finalized() const { 9 | return _is_finalized; 10 | } 11 | 12 | 13 | bool ExternalFunctions::parse(const string &funcs_file) { 14 | 15 | // Make sure that we parse files only if object was not finalized yet. 16 | if(_is_finalized) { 17 | throw runtime_error("Parse attempt after ExternalFunctions object was"\ 18 | " finalized."); 19 | } 20 | 21 | ifstream file(funcs_file + "_funcs.txt"); 22 | if(!file) { 23 | return false; 24 | } 25 | 26 | string line; 27 | 28 | // Parse first line manually. 29 | getline(file, line); 30 | istringstream header_parser(line); 31 | 32 | // First entry of file is always the module name. 33 | string module_name; 34 | header_parser >> module_name; 35 | if(header_parser.fail()) { 36 | return false; 37 | } 38 | 39 | while(getline(file, line)) { 40 | istringstream parser(line); 41 | uint64_t func_addr = 0; 42 | string func_name; 43 | 44 | parser >> hex >> func_addr; 45 | if(parser.fail()) { 46 | return false; 47 | } 48 | 49 | parser >> func_name; 50 | if(parser.fail()) { 51 | return false; 52 | } 53 | 54 | ExternalFunction func; 55 | func.addr = func_addr; 56 | func.name = func_name; 57 | func.module_name = module_name; 58 | 59 | // NOTE: Index is a unique identifier for all functions in all 60 | // external modules. 61 | func.index = _index; 62 | 63 | _external_functions.push_back(func); 64 | assert(_external_functions[_index].module_name == func.module_name 65 | && _external_functions[_index].addr == func.addr 66 | && _external_functions[_index].name == func.name 67 | && _external_functions[_index].index == func.index 68 | && "Index of function and index in vector are not the same."); 69 | 70 | _index++; 71 | } 72 | 73 | return true; 74 | } 75 | 76 | 77 | void ExternalFunctions::finalize() { 78 | 79 | // Make sure that we only finalize this object once. 80 | if(_is_finalized) { 81 | throw runtime_error("ExternalFunctions object was already finalized."); 82 | } 83 | _is_finalized = true; 84 | 85 | // Build external functions map for this module. 86 | for(auto &it : _external_functions) { 87 | _external_functions_map[it.name] = ⁢ 88 | } 89 | 90 | return; 91 | } 92 | 93 | 94 | const ExternalFunction* ExternalFunctions::get_external_function( 95 | const string &name) const { 96 | 97 | // Make sure that the object is finalized. 98 | if(!_is_finalized) { 99 | throw runtime_error("ExternalFunctions object was not finalized."); 100 | } 101 | 102 | if(_external_functions_map.find(name) == _external_functions_map.cend()) { 103 | return nullptr; 104 | } 105 | return _external_functions_map.at(name); 106 | } 107 | 108 | 109 | const ExternalFunction* ExternalFunctions::get_external_function( 110 | const std::string &module_name, 111 | uint64_t func_addr) const { 112 | 113 | // Make sure that the object is finalized. 114 | if(!_is_finalized) { 115 | throw runtime_error("ExternalFunctions object was not finalized."); 116 | } 117 | 118 | for(const auto &it : _external_functions) { 119 | if(it.module_name == module_name 120 | && it.addr == func_addr) { 121 | return ⁢ 122 | } 123 | } 124 | return nullptr; 125 | } 126 | -------------------------------------------------------------------------------- /src/function.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "function.h" 3 | #include "path_builder.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | typedef set SeenBlocks; 17 | typedef map PathBlocks; 18 | 19 | /*! 20 | * \brief Creates a new instance of the class, explicitly setting its entry 21 | * address. 22 | * \param entry The (virtual) address where the function starts originally. 23 | * \param branch_threshold The number of branches inside a function that 24 | * trigger a switch to a more lightweight traversal method. Defaults to at 25 | * least 15. 26 | */ 27 | Function::Function(uintptr_t entry, uint8_t branch_threshold) 28 | : _entry(entry), _branch_threshold(branch_threshold) { 29 | } 30 | 31 | /*! 32 | * \brief Initial policy that checks feasibility of traversing all paths. 33 | * 34 | * This (initial) policy simply counts the number of indirect branches which 35 | * give a rough estimate of the number of paths through the function. 36 | * 37 | * \return `true`, if the function contains fewer than 15 branches; `false` 38 | * otherwise. 39 | */ 40 | bool Function::can_be_fully_traversed() const { 41 | auto branches = 0; 42 | for(const auto &kv : _function_blocks) { 43 | if(kv.second->get_terminator().type == TerminatorJcc) { 44 | branches++; 45 | } 46 | } 47 | 48 | return branches < _branch_threshold; 49 | } 50 | 51 | /*! 52 | * \brief Traverses all paths through the function. 53 | * 54 | * Traverses all possible paths through the function and calls the supplied 55 | * callback on each encountered basic block. If it is infeasible to traverse 56 | * all possible paths (as determined by `can_be_fully_traversed`), logic 57 | * switches to a lightweight path generation algorithm. For this to work 58 | * properly, `block_predicate` has to be set. 59 | * 60 | * The traversal callback is passed several parameters: 61 | * 62 | * 1. a user-defined parameter (which can be used, e.g., to pass an additional 63 | * structure with data associated with the traversal, like a this pointer), 64 | * 2. the path describing the position of the currently visited basic block, 65 | * 3. the currently visited basic block itself, a `Block` reference. 66 | * 67 | * \param callback The function that is to be called on each basic block visit. 68 | * \param block_predicate A callback which decides whether a basic block is 69 | * deemed "interesting" for the current analysis and should be visited during 70 | * the traversal. 71 | * \param user_defined A user-defined parameter that is passed to the callback. 72 | * \return Always `true`. 73 | * 74 | * \todo Decide if the return type still makes sense in the current setup. 75 | */ 76 | bool Function::traverse(const TraversalCallback &block_callback, 77 | const BlockPredicate &block_predicate, 78 | const PathCallback &path_callback, 79 | void *user_defined) 80 | const { 81 | if(can_be_fully_traversed()) { 82 | throw runtime_error("Path callbacks are not yet implemented for full" 83 | " traversals."); 84 | return traverser(block_callback, user_defined); 85 | } 86 | 87 | if(!block_predicate) { 88 | throw runtime_error("Cannot switch to lightweight policy without a " 89 | "valid block predicate."); 90 | } 91 | 92 | PathBuilder builder(*this, user_defined); 93 | const auto paths = builder.build_paths(block_predicate); 94 | 95 | // FIXME: This duplicates code from below. 96 | for(const auto &path : paths) { 97 | 98 | Path current_path; 99 | const Terminator *previous_terminator = nullptr; 100 | 101 | for(const auto &block : path) { 102 | const auto &needle = _function_blocks.find(block); 103 | if(needle == _function_blocks.cend()) { 104 | break; 105 | } 106 | 107 | if(previous_terminator) { 108 | bool annotation = false; 109 | 110 | const auto &terminator = *previous_terminator; 111 | switch(terminator.type) { 112 | case TerminatorJump: 113 | annotation = true; 114 | break; 115 | 116 | case TerminatorJcc: { 117 | const auto current = needle->second->get_address(); 118 | if(terminator.target == current) { 119 | annotation = false; 120 | break; 121 | } 122 | 123 | assert(terminator.fall_through == current && 124 | "Cannot reconstruct annotation."); 125 | annotation = true; 126 | } 127 | 128 | case TerminatorFallthrough: 129 | case TerminatorCallUnresolved: 130 | case TerminatorCall: 131 | annotation = true; 132 | break; 133 | 134 | default: 135 | throw runtime_error("Lightweight policy: This should not" 136 | " happen."); 137 | break; 138 | } 139 | 140 | current_path.push_back(annotation); 141 | } 142 | 143 | previous_terminator = &needle->second->get_terminator(); 144 | if(!block_callback(user_defined, current_path, *needle->second)) { 145 | /* The callback has decided not to follow this path any 146 | * further. */ 147 | break; 148 | } 149 | } 150 | 151 | if(path_callback) { 152 | path_callback(user_defined, current_path); 153 | } 154 | } 155 | 156 | return true; 157 | } 158 | 159 | bool Function::traverser(const TraversalCallback &callback, 160 | void *user_defined) const { 161 | 162 | deque> work_list; 163 | 164 | PathBlocks path_seen_blocks; 165 | work_list.push_back(make_pair(_entry, Path())); 166 | 167 | while(!work_list.empty()) { 168 | const auto pair = work_list.back(); 169 | work_list.pop_back(); 170 | 171 | uintptr_t current_address = pair.first; 172 | const Path &path = pair.second; 173 | 174 | SeenBlocks &seen_blocks = path_seen_blocks[path]; 175 | if(seen_blocks.find(current_address) != seen_blocks.cend()) { 176 | continue; 177 | } 178 | 179 | const auto &needle = _function_blocks.find(current_address); 180 | if(needle == _function_blocks.cend()) { 181 | /* We cannot find a block with the given address that lies within 182 | * the current function. This is most likely the case due to the 183 | * invocation of a non-returning call. We must not follow these 184 | * anyway. */ 185 | continue; 186 | } 187 | 188 | seen_blocks.insert(current_address); 189 | if(!callback(user_defined, path, *needle->second)) { 190 | /* The callback has decided not to follow this path any further. */ 191 | continue; 192 | } 193 | 194 | // The current path may be extended by a true or false annotation. 195 | Path path_false = path, path_true = path; 196 | 197 | path_false.push_back(false); 198 | path_true.push_back(true); 199 | 200 | const Terminator &terminator = needle->second->get_terminator(); 201 | 202 | switch(terminator.type) { 203 | case TerminatorJump: 204 | work_list.push_back(make_pair(terminator.target, path_true)); 205 | path_seen_blocks[path_true] = seen_blocks; 206 | break; 207 | 208 | case TerminatorJcc: 209 | work_list.push_back(make_pair(terminator.target, path_false)); 210 | path_seen_blocks[path_false] = seen_blocks; 211 | 212 | case TerminatorFallthrough: 213 | case TerminatorCallUnresolved: 214 | case TerminatorCall: 215 | work_list.push_back(make_pair(terminator.fall_through, path_true)); 216 | path_seen_blocks[path_true] = seen_blocks; 217 | break; 218 | 219 | default: 220 | break; 221 | } 222 | } 223 | 224 | return true; 225 | } 226 | 227 | void Function::add_block(uintptr_t address, IRSB *block, 228 | const Terminator &terminator) { 229 | _function_blocks[address] = make_shared(address, block, terminator); 230 | } 231 | -------------------------------------------------------------------------------- /src/got.cpp: -------------------------------------------------------------------------------- 1 | #include "got.h" 2 | 3 | using namespace std; 4 | 5 | GotMap import_got(const string &target_file) { 6 | 7 | ifstream file(target_file + "_got.txt"); 8 | if(!file) { 9 | throw runtime_error("Opening .got file failed."); 10 | } 11 | 12 | string line; 13 | 14 | // Parse first line manually. 15 | getline(file, line); 16 | istringstream header_parser(line); 17 | 18 | // First entry of file is always the module name. 19 | string import_module_name; 20 | header_parser >> import_module_name; 21 | if(header_parser.fail()) { 22 | throw runtime_error("Parsing .got file failed."); 23 | } 24 | 25 | GotMap got_map; 26 | 27 | while(getline(file, line)) { 28 | istringstream parser(line); 29 | uint64_t got_entry_addr = 0; 30 | uint64_t got_entry_content = 0; 31 | 32 | parser >> hex >> got_entry_addr; 33 | if(parser.fail()) { 34 | throw runtime_error("Parsing .got file failed."); 35 | } 36 | 37 | parser >> hex >> got_entry_content; 38 | if(parser.fail()) { 39 | throw runtime_error("Parsing .got file failed."); 40 | } 41 | 42 | got_map[got_entry_addr] = got_entry_content; 43 | } 44 | 45 | return got_map; 46 | } 47 | -------------------------------------------------------------------------------- /src/idata.cpp: -------------------------------------------------------------------------------- 1 | #include "idata.h" 2 | 3 | using namespace std; 4 | 5 | IDataMap import_idata(const string &target_file) { 6 | 7 | ifstream file(target_file + "_idata.txt"); 8 | if(!file) { 9 | throw runtime_error("Opening .idata file failed."); 10 | } 11 | 12 | string line; 13 | 14 | // Parse first line manually. 15 | getline(file, line); 16 | istringstream header_parser(line); 17 | 18 | // First entry of file is always the module name. 19 | string import_module_name; 20 | header_parser >> import_module_name; 21 | if(header_parser.fail()) { 22 | throw runtime_error("Parsing .idata file failed."); 23 | } 24 | 25 | IDataMap idata_map; 26 | 27 | while(getline(file, line)) { 28 | istringstream parser(line); 29 | uint64_t idata_entry_addr = 0; 30 | string idata_entry_content; 31 | 32 | parser >> hex >> idata_entry_addr; 33 | if(parser.fail()) { 34 | throw runtime_error("Parsing .idata file failed."); 35 | } 36 | 37 | parser >> idata_entry_content; 38 | if(parser.fail()) { 39 | throw runtime_error("Parsing .idata file failed."); 40 | } 41 | 42 | idata_map[idata_entry_addr] = idata_entry_content; 43 | } 44 | 45 | return idata_map; 46 | } 47 | -------------------------------------------------------------------------------- /src/mapped_elf.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "mapped_elf.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | /*! 11 | * \brief Constructs a new `MappedElf` instance from a given ELF file. 12 | * \param elf_file The path to the ELF file which is to be mapped. 13 | * 14 | * If the file cannot be found or seems to be malformed, a `runtime_error` 15 | * exception is thrown. 16 | */ 17 | MappedElf::MappedElf(const string &elf_file) { 18 | ifstream file(elf_file.c_str(), ios::binary); 19 | if(!file) { 20 | throw runtime_error("Cannot open file " + elf_file + "."); 21 | } 22 | 23 | _buffer = vector(istreambuf_iterator(file), 24 | istreambuf_iterator()); 25 | _e_header = reinterpret_cast(_buffer.data()); 26 | _p_header = reinterpret_cast(_buffer.data() + 27 | _e_header->e_phoff); 28 | 29 | // FIXME: We rely on compilers a bit here, this can be generalized. 30 | for(auto i = 0; i < _e_header->e_phnum; ++i) { 31 | const auto ¤t = _p_header[i]; 32 | if(current.p_type == PT_LOAD && current.p_flags & PF_X) { 33 | _base = current.p_vaddr; 34 | _size = current.p_memsz; 35 | break; 36 | } 37 | } 38 | 39 | if(!_size) { 40 | throw runtime_error("Malformed input file " + elf_file + "."); 41 | } 42 | } 43 | 44 | /*! 45 | * \brief Implements indexing access, effectively accessing the memory lieing 46 | * at the given virtual address. 47 | * \param address (Virtual) address of memory to access. 48 | * \return A pointer to the memory requested, if it lies at the given virtual 49 | * address. `nullptr` else. 50 | */ 51 | const uint8_t *MappedElf::operator[](const uintptr_t address) const { 52 | if(address < _base || address > _base + _size) { 53 | return nullptr; 54 | } 55 | 56 | const uint8_t *data = reinterpret_cast(_buffer.data()); 57 | return data + address - _base; 58 | } 59 | -------------------------------------------------------------------------------- /src/mapped_pe.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "mapped_pe.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | /*! 12 | * \brief Constructs a new `MappedPe` instance from a given PE file. 13 | * \param pe_file The path to the PE file which is to be mapped. 14 | * 15 | * If the file cannot be found or seems to be malformed, a `runtime_error` 16 | * exception is thrown. 17 | */ 18 | MappedPe::MappedPe(const string &pe_file) { 19 | ifstream file(pe_file.c_str(), ios::binary); 20 | if(!file) { 21 | throw runtime_error("Cannot open file " + pe_file + "."); 22 | } 23 | 24 | _buffer = vector(istreambuf_iterator(file), 25 | istreambuf_iterator()); 26 | 27 | 28 | _mz_header = reinterpret_cast(_buffer.data()); 29 | if(_mz_header->magic != MZ_MAGIC) { 30 | throw runtime_error("Malformed input file " + pe_file + "."); 31 | } 32 | 33 | _pe_header = reinterpret_cast(_buffer.data() + _mz_header->peaddr); 34 | if(_pe_header->magic != PE_MAGIC) { 35 | throw runtime_error("Malformed input file " + pe_file + "."); 36 | } 37 | 38 | // Magic value for optional header lies directly behind PE header. 39 | uint16_t *opt_hdr_magic = reinterpret_cast(_buffer.data() 40 | + _mz_header->peaddr 41 | + sizeof(pe_hdr)); 42 | 43 | if(*opt_hdr_magic == IMAGE_FILE_OPT_PE32_MAGIC) { 44 | _pe32_opt_header = reinterpret_cast(_buffer.data() 45 | + _mz_header->peaddr 46 | + sizeof(pe_hdr)); 47 | } 48 | else if(*opt_hdr_magic == IMAGE_FILE_OPT_PE32_PLUS_MAGIC) { 49 | _pe32_plus_opt_header = reinterpret_cast( 50 | _buffer.data() 51 | + _mz_header->peaddr 52 | + sizeof(pe_hdr)); 53 | } 54 | else { 55 | throw runtime_error("Malformed input file " + pe_file + "."); 56 | } 57 | 58 | for(uint32_t i = 0; i < _pe_header->sections; i++) { 59 | _text_section_header = reinterpret_cast( 60 | _buffer.data() 61 | + _mz_header->peaddr 62 | + sizeof(pe_hdr) 63 | + _pe_header->opt_hdr_size 64 | + (i*sizeof(section_header))); 65 | 66 | // FIXME: We rely on compilers a bit here, this can be generalized. 67 | if(strcmp(_text_section_header->name, ".text") == 0) { 68 | _base = _text_section_header->virtual_address; 69 | _size = _text_section_header->virtual_size; 70 | _file_addr = _text_section_header->data_addr; 71 | _file_size = _text_section_header->raw_data_size; 72 | break; 73 | } 74 | } 75 | 76 | if(!_size) { 77 | throw runtime_error("Malformed input file " + pe_file + "."); 78 | } 79 | } 80 | 81 | 82 | /*! 83 | * \brief Implements indexing access, effectively accessing the memory lieing 84 | * at the given virtual address. 85 | * \param address (Virtual) address of memory to access. 86 | * \return A pointer to the memory requested, if it lies at the given virtual 87 | * address. `nullptr` else. 88 | */ 89 | const uint8_t *MappedPe::operator[](const uintptr_t address) const { 90 | if(address < _base || address > _base + _size) { 91 | return nullptr; 92 | } 93 | 94 | const uint8_t *data = reinterpret_cast(_buffer.data()); 95 | 96 | return data + address - _base + _file_addr; 97 | } 98 | -------------------------------------------------------------------------------- /src/module_plt.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "module_plt.h" 3 | 4 | 5 | using namespace std; 6 | 7 | 8 | ModulePlt::ModulePlt(const string &module_name) 9 | : _module_name(module_name) { 10 | 11 | } 12 | 13 | 14 | bool ModulePlt::parse(const string &plt_file) { 15 | 16 | ifstream file(plt_file + "_plt.txt"); 17 | if(!file) { 18 | return false; 19 | } 20 | 21 | string line; 22 | 23 | // Parse first line manually. 24 | getline(file, line); 25 | istringstream header_parser(line); 26 | 27 | // First entry of file is always the module name. 28 | string module_name; 29 | header_parser >> module_name; 30 | if(header_parser.fail()) { 31 | return false; 32 | } 33 | 34 | // Only allow to parse a .plt file for this module. 35 | if(_module_name != module_name) { 36 | return false; 37 | } 38 | 39 | while(getline(file, line)) { 40 | istringstream parser(line); 41 | uint64_t func_addr = 0; 42 | string func_name; 43 | 44 | parser >> hex >> func_addr; 45 | if(parser.fail()) { 46 | return false; 47 | } 48 | 49 | parser >> func_name; 50 | if(parser.fail()) { 51 | return false; 52 | } 53 | 54 | PltEntry plt_entry; 55 | plt_entry.addr = func_addr; 56 | plt_entry.func_name = func_name; 57 | 58 | _plt_entries[func_addr] = plt_entry; 59 | } 60 | 61 | return true; 62 | } 63 | 64 | 65 | const PltEntry* ModulePlt::get_plt_entry(uint64_t addr) const { 66 | if(_plt_entries.find(addr) == _plt_entries.cend()) { 67 | return nullptr; 68 | } 69 | return &(_plt_entries.at(addr)); 70 | } 71 | 72 | 73 | const PltEntry* ModulePlt::get_plt_entry(const string func_name) const { 74 | for(const auto &kv : _plt_entries) { 75 | if(kv.second.func_name == func_name) { 76 | return &(kv.second); 77 | } 78 | } 79 | return nullptr; 80 | } 81 | -------------------------------------------------------------------------------- /src/new_operators.cpp: -------------------------------------------------------------------------------- 1 | #include "new_operators.h" 2 | 3 | using namespace std; 4 | 5 | 6 | NewOperators::NewOperators(const string &module_name, 7 | const VTableFile &vtable_file, 8 | const VTableHierarchies &vtable_hierarchies) 9 | : _module_name(module_name), 10 | _vtable_file(vtable_file), 11 | _vtable_hierarchies(vtable_hierarchies) {} 12 | 13 | 14 | void NewOperators::add_op_new_candidate(const NewOperator &new_op_candidate) { 15 | if(_op_new_candidates.find(new_op_candidate.addr) 16 | == _op_new_candidates.cend()) { 17 | 18 | _op_new_candidates[new_op_candidate.addr] = new_op_candidate; 19 | } 20 | else { 21 | for(uint32_t idx : new_op_candidate.vtbl_idxs) { 22 | _op_new_candidates[new_op_candidate.addr].vtbl_idxs.insert(idx); 23 | } 24 | } 25 | } 26 | 27 | 28 | void NewOperators::export_new_operators(const string &target_dir) { 29 | 30 | stringstream temp_str; 31 | temp_str << target_dir << "/" << _module_name << ".new_operators"; 32 | string target_file = temp_str.str(); 33 | 34 | ofstream new_op_file; 35 | new_op_file.open(target_file); 36 | 37 | new_op_file << _module_name << endl; 38 | 39 | const HierarchiesVTable &vtbl_hierarchies = 40 | _vtable_hierarchies.get_hierarchies(); 41 | 42 | for(const auto &new_op : _op_new_candidates) { 43 | unordered_set possible_vtables; 44 | for(uint32_t idx : new_op.second.vtbl_idxs) { 45 | 46 | // Copy also the whole vtable hierarchy into the possible 47 | // vtable set. 48 | if(possible_vtables.find(idx) == possible_vtables.cend()) { 49 | for(const auto &dep_vtables : vtbl_hierarchies) { 50 | if(dep_vtables.find(idx) != dep_vtables.cend()) { 51 | for(uint32_t dep_vtbl_idx : dep_vtables) { 52 | possible_vtables.insert(dep_vtbl_idx); 53 | } 54 | break; 55 | } 56 | } 57 | } 58 | possible_vtables.insert(idx); 59 | } 60 | 61 | new_op_file << hex << new_op.second.addr 62 | << " " 63 | << hex << new_op.second.size 64 | << " "; 65 | 66 | for(uint32_t idx : possible_vtables) { 67 | const auto &temp = _vtable_file.get_vtable(idx); 68 | new_op_file << temp.module_name 69 | << ":" 70 | << hex << temp.addr 71 | << " "; 72 | } 73 | 74 | new_op_file << endl; 75 | } 76 | new_op_file.close(); 77 | } 78 | 79 | 80 | const OperatorNewAddrMap& NewOperators::get_new_operators() const { 81 | return _op_new_candidates; 82 | } 83 | 84 | 85 | void NewOperators::copy_new_operators(const OperatorNewAddrMap &new_ops) { 86 | for(const auto &new_op : new_ops) { 87 | add_op_new_candidate(new_op.second); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/path_builder.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "path_builder.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | /*! 11 | * \brief Creates a new instance of the class. 12 | * \param function The function for which paths should be constructed. 13 | * \param user_defined A user-defined parameter that is passed to the block 14 | * predicate. Defaults to `nullptr`. 15 | * \param node_threshold The number of interesting nodes a function has to 16 | * exceed such that simpler paths are generated. Defaults to 20. 17 | * \see `PathBuilder::build_paths` 18 | */ 19 | PathBuilder::PathBuilder(const Function &function, void *user_defined, 20 | uint8_t node_threshold) 21 | : _function(function), _user_defined(user_defined), 22 | _node_threshold(node_threshold) { 23 | } 24 | 25 | using Successors = array; 26 | 27 | Successors get_successors(const Block &block) { 28 | Successors result = { 0, 0 }; 29 | 30 | const Terminator &terminator = block.get_terminator(); 31 | switch(terminator.type) { 32 | case TerminatorJump: 33 | result[0] = terminator.target; 34 | break; 35 | 36 | case TerminatorJcc: 37 | result[0] = terminator.target; 38 | 39 | case TerminatorFallthrough: 40 | case TerminatorCallUnresolved: 41 | case TerminatorCall: 42 | result[1] = terminator.fall_through; 43 | break; 44 | 45 | default: 46 | break; 47 | } 48 | 49 | return result; 50 | } 51 | 52 | bool is_exit_block(void*, const Block &block) { 53 | const auto &terminator = block.get_terminator(); 54 | return terminator.is_tail || terminator.type == TerminatorReturn; 55 | } 56 | 57 | template 58 | bool contains_duplicates(const deque &container) { 59 | set witness(container.cbegin(), container.cend()); 60 | return witness.size() != container.size(); 61 | } 62 | 63 | deque paths(const PathsByNode &p) { 64 | deque collect; 65 | for(const auto &kv : p) { 66 | collect.push_back(kv.second); 67 | } 68 | 69 | return collect; 70 | } 71 | 72 | //! 73 | //! \brief Constructs the paths. 74 | //! \param predicate A predicate which decides whether the given basic block is 75 | //! deemed "interesting" and should be visited by the generated paths. 76 | //! \return A set of distinct concrete paths through the function which try to 77 | //! visit as much of the interesting nodes as possible. 78 | //! 79 | //! The algorithms determines sub-paths from the root node to an interesting 80 | //! block, from any interesting block to another and from an interesting block 81 | //! to a return block. Then, it tries to combine them such that the number of 82 | //! interesting blocks visited by the constructed path is maximized. 83 | //! 84 | //! If the number of interesting basic blocks exceeds `_node_threshold`, a 85 | //! simpler algorithm is used. The algorithm falls back to merely yielding 86 | //! paths that visit _one_ interesting block (being optimistic about other 87 | //! interesting blocks lying on that very same path). 88 | //! 89 | set PathBuilder::build_paths(BlockPredicate predicate) const { 90 | const auto blocks = _function.get_blocks(); 91 | const auto root = _function.get_entry(); 92 | 93 | // Get paths from root to interesting nodes. 94 | auto root_to_interesting = breadth_first(blocks, root, predicate); 95 | 96 | // Get paths from interesting node to exit. 97 | map> interesting_to_exit; 98 | 99 | for(const auto &kv : root_to_interesting) { 100 | auto to_exit = breadth_first(blocks, kv.first, &is_exit_block); 101 | interesting_to_exit[kv.first] = paths(to_exit); 102 | } 103 | 104 | bool safety_threshold = root_to_interesting.size() > _node_threshold; 105 | 106 | /* Get paths from one interesting node to another (distinct) node; done 107 | * only if the safe threshold is not exceeded. 108 | */ 109 | map interesting_to_interesting; 110 | if(!safety_threshold) { 111 | for(const auto &kv : root_to_interesting) { 112 | const auto source = kv.first; 113 | 114 | for(const auto &kv_dst : root_to_interesting) { 115 | const auto destination = kv_dst.first; 116 | if(source == destination) { 117 | continue; 118 | } 119 | 120 | auto to_other = breadth_first(blocks, source, 121 | [&](void*, const Block &block) -> bool { 122 | return block.get_address() == destination; 123 | }, true); 124 | 125 | interesting_to_interesting[source] = to_other; 126 | } 127 | } 128 | } 129 | 130 | // Stitch together possible paths. 131 | set paths; 132 | 133 | /* TODO: Constraint the number of interesting nodes to chain on a 134 | * single path. */ 135 | struct Entry { 136 | ConcretePath path; 137 | set visited; 138 | }; 139 | 140 | queue work; 141 | for(const auto &kv : root_to_interesting) { 142 | Entry entry; 143 | entry.path = kv.second; 144 | entry.visited.insert(kv.first); 145 | 146 | work.push(entry); 147 | } 148 | 149 | while(!work.empty()) { 150 | auto current = work.front(); 151 | work.pop(); 152 | 153 | auto tails = interesting_to_exit[current.path.back()]; 154 | for(const auto &tail : tails) { 155 | deque head = current.path; 156 | head.pop_back(); 157 | 158 | for(const auto &t : tail) { 159 | head.push_back(t); 160 | } 161 | 162 | paths.insert(head); 163 | } 164 | 165 | /* Safety threshold: Only visit one interesting node and hope that 166 | * the others happen to lie on the same path. 167 | */ 168 | if(safety_threshold) { 169 | continue; 170 | } 171 | 172 | for(const auto &kv : interesting_to_interesting[current.path.back()]) { 173 | const auto &next_node = kv.first; 174 | 175 | auto needle = current.visited.find(next_node); 176 | if(needle != current.visited.cend()) { 177 | continue; 178 | } 179 | 180 | Entry next; 181 | const auto &path_to_next = kv.second; 182 | 183 | next.path = current.path; 184 | next.path.pop_back(); 185 | 186 | for(const auto &p : path_to_next) { 187 | next.path.push_back(p); 188 | } 189 | 190 | next.visited = current.visited; 191 | next.visited.insert(next_node); 192 | 193 | work.push(next); 194 | } 195 | } 196 | 197 | /* If there are no interesting blocks, collect all paths from the root 198 | * node to any exit block. 199 | */ 200 | if(paths.empty()) { 201 | auto root_to_exit = breadth_first(blocks, root, &is_exit_block); 202 | for(const auto &kv : root_to_exit) { 203 | if(!contains_duplicates(kv.second)) { 204 | paths.insert(kv.second); 205 | } 206 | } 207 | } 208 | 209 | return paths; 210 | } 211 | 212 | struct Node { 213 | uintptr_t address; 214 | shared_ptr block; 215 | uint16_t distance; 216 | 217 | Node *parent; 218 | }; 219 | 220 | PathsByNode PathBuilder::breadth_first(const BlockMap &blocks, 221 | uintptr_t root, 222 | BlockPredicate predicate, 223 | bool terminate_on_match) const { 224 | map nodes; 225 | PathsByNode result; 226 | 227 | for(const auto &kv : blocks) { 228 | Node current; 229 | current.address = kv.first; 230 | current.block = kv.second; 231 | current.distance = static_cast(-1); 232 | current.parent = nullptr; 233 | 234 | nodes[kv.first] = current; 235 | } 236 | 237 | nodes[root].distance = 0; 238 | queue q; 239 | 240 | // Explicitly check if the root node is interesting as well. 241 | if(predicate(_user_defined, *nodes[root].block)) { 242 | auto &path = result[root]; 243 | path.push_front(root); 244 | } 245 | 246 | q.push(&nodes[root]); 247 | while(!q.empty()) { 248 | Node ¤t = *q.front(); 249 | q.pop(); 250 | 251 | const auto &adjacent = get_successors(*current.block); 252 | for(const auto &neighbor : adjacent) { 253 | if(!neighbor) { 254 | continue; 255 | } 256 | 257 | auto &n = nodes[neighbor]; 258 | if(n.distance == static_cast(-1)) { 259 | n.distance = current.distance + 1; 260 | n.parent = ¤t; 261 | 262 | if(predicate(_user_defined, *n.block)) { 263 | auto &path = result[n.address]; 264 | path.push_front(n.address); 265 | 266 | auto *parent = n.parent; 267 | while(parent) { 268 | path.push_front(parent->address); 269 | parent = parent->parent; 270 | } 271 | 272 | if(terminate_on_match) { 273 | return result; 274 | } 275 | } 276 | 277 | q.push(&n); 278 | } 279 | } 280 | } 281 | 282 | return result; 283 | } 284 | -------------------------------------------------------------------------------- /src/return_value.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "return_value.h" 3 | 4 | 5 | using namespace std; 6 | 7 | 8 | FctReturnValuesFile::FctReturnValuesFile(const string &module_name, 9 | const VTableFile &vtable_file, 10 | const ModulePlt &module_plt, 11 | const ExternalFunctions &external_funcs) 12 | : _module_name(module_name), 13 | _vtable_file(vtable_file), 14 | _module_plt(module_plt), 15 | _external_funcs(external_funcs) { 16 | 17 | } 18 | 19 | 20 | void FctReturnValuesFile::add_return_value(uint64_t func_addr, 21 | const ReturnValue &return_value) { 22 | lock_guard _(_mtx); 23 | 24 | if(_return_values_map.find(func_addr) == _return_values_map.cend()) { 25 | FctReturnValues temp; 26 | temp.func_addr = func_addr; 27 | temp.return_values.push_back(return_value); 28 | _return_values_map[func_addr] = temp; 29 | } 30 | 31 | else { 32 | FctReturnValues &temp = _return_values_map[func_addr]; 33 | 34 | for(const auto &it : temp.return_values) { 35 | if(it.func_addr == return_value.func_addr 36 | && *(it.content) == *(return_value.content)) { 37 | return; 38 | } 39 | } 40 | 41 | // TODO 42 | // Check if return value does already exist. 43 | temp.return_values.push_back(return_value); 44 | } 45 | } 46 | 47 | 48 | void FctReturnValuesFile::add_active_vtable(uint64_t func_addr, 49 | const VTableActive &active_vtable) { 50 | lock_guard _(_mtx); 51 | 52 | if(_return_values_map.find(func_addr) == _return_values_map.cend()) { 53 | FctReturnValues temp; 54 | temp.func_addr = func_addr; 55 | temp.active_vtables.push_back(active_vtable); 56 | _return_values_map[func_addr] = temp; 57 | } 58 | 59 | else { 60 | FctReturnValues &temp = _return_values_map[func_addr]; 61 | 62 | // TODO 63 | // Check if active vtable does already exist. 64 | temp.active_vtables.push_back(active_vtable); 65 | } 66 | } 67 | 68 | 69 | void FctReturnValuesFile::export_return_values(const string &target_dir) { 70 | lock_guard _(_mtx); 71 | 72 | stringstream temp_str; 73 | temp_str << target_dir << "/" << _module_name << ".ret_values"; 74 | string target_file = temp_str.str(); 75 | 76 | ofstream ret_file; 77 | ret_file.open(target_file, ios::out|ios::binary); 78 | 79 | // First entry of file is always the module name. 80 | ret_file.write(_module_name.c_str(), _module_name.length() + 1); 81 | 82 | for(const auto &kv : _return_values_map) { 83 | // Write function address. 84 | ret_file.write(reinterpret_cast(&kv.first), 85 | sizeof(kv.first)); 86 | 87 | uint32_t number = kv.second.return_values.size(); 88 | ret_file.write(reinterpret_cast(&number), 89 | sizeof(number)); 90 | for(const auto &it : kv.second.return_values) { 91 | serialize(it.content, ret_file); 92 | } 93 | 94 | number = kv.second.active_vtables.size(); 95 | ret_file.write(reinterpret_cast(&number), 96 | sizeof(number)); 97 | for(const auto &it : kv.second.active_vtables) { 98 | 99 | serialize(it.vtbl_ptr_loc, ret_file); 100 | 101 | const VTable &vtable = _vtable_file.get_vtable(it.index); 102 | 103 | // Write actual vtable representation to file. 104 | // Length + 1 to have \0 at the end. 105 | ret_file.write(vtable.module_name.c_str(), 106 | vtable.module_name.length() + 1); 107 | ret_file.write(reinterpret_cast(&vtable.addr), 108 | sizeof(vtable.addr)); 109 | } 110 | } 111 | 112 | ret_file.close(); 113 | } 114 | 115 | 116 | void FctReturnValuesFile::import_ext_return_values(const string &module_file) { 117 | lock_guard _(_mtx); 118 | 119 | // Make sure that the object is finalized. 120 | if(_is_finalized) { 121 | throw runtime_error("FctReturnValuesFile object is finalized."); 122 | } 123 | 124 | ifstream ret_file(module_file + ".ret_values", ios::in|ios::binary); 125 | if(!ret_file) { 126 | throw runtime_error("Could not open return values file."); 127 | } 128 | 129 | // First entry of file is always the module name. 130 | string import_module_name; 131 | // Read C-like string. 132 | getline(ret_file, import_module_name, '\0'); 133 | 134 | while(!ret_file.eof()) { 135 | 136 | uint64_t func_addr; 137 | ret_file.read(reinterpret_cast(&func_addr), 138 | sizeof(func_addr)); 139 | 140 | // EOF is only present after first read instruction that does 141 | // reach it. 142 | if(ret_file.eof()) { 143 | break; 144 | } 145 | 146 | FctReturnValues func_ret_values; 147 | 148 | uint32_t number; 149 | ret_file.read(reinterpret_cast(&number), 150 | sizeof(number)); 151 | 152 | for(uint32_t i = 0; i < number; i++) { 153 | ReturnValue ret_value; 154 | ret_value.content = unserialize(ret_file); 155 | ret_value.func_addr = 0; 156 | func_ret_values.return_values.push_back(ret_value); 157 | } 158 | 159 | ret_file.read(reinterpret_cast(&number), 160 | sizeof(number)); 161 | 162 | for(uint32_t i = 0; i < number; i++) { 163 | 164 | VTableActive act_vtable; 165 | act_vtable.from_callee = true; 166 | act_vtable.from_caller = false; 167 | act_vtable.vtbl_ptr_loc = unserialize(ret_file); 168 | 169 | string vtbl_module_name; 170 | // Read C-like string. 171 | getline(ret_file, vtbl_module_name, '\0'); 172 | 173 | uint64_t vtable_addr; 174 | ret_file.read(reinterpret_cast(&vtable_addr), 175 | sizeof(vtable_addr)); 176 | const VTable &vtable = _vtable_file.get_vtable(vtbl_module_name, 177 | vtable_addr); 178 | act_vtable.index = vtable.index; 179 | func_ret_values.active_vtables.push_back(act_vtable); 180 | } 181 | 182 | // Get corresponding function of return value. 183 | const ExternalFunction *ext_func; 184 | ext_func = _external_funcs.get_external_function(import_module_name, 185 | func_addr); 186 | if(ext_func == nullptr) { 187 | throw runtime_error("Imported return value does not belong "\ 188 | "to a function."); 189 | } 190 | 191 | // Add external return value. 192 | ExternalFctReturnValues ext_ret_value; 193 | ext_ret_value.func_return_values = func_ret_values; 194 | ext_ret_value.ext_func = ext_func; 195 | _ext_return_values.push_back(ext_ret_value); 196 | } 197 | 198 | ret_file.close(); 199 | } 200 | 201 | 202 | const FctReturnValues* FctReturnValuesFile::get_plt_return_values_ptr( 203 | uint64_t addr) const { 204 | lock_guard _(_mtx); 205 | // Make sure that the object is finalized. 206 | if(!_is_finalized) { 207 | throw runtime_error("FctReturnValuesFile object was not finalized."); 208 | } 209 | 210 | if(_plt_return_values_ptr_map.find(addr) 211 | != _plt_return_values_ptr_map.cend()) { 212 | return _plt_return_values_ptr_map.at(addr); 213 | } 214 | return nullptr; 215 | } 216 | 217 | 218 | const FctReturnValues* FctReturnValuesFile::get_ext_return_values_ptr( 219 | const string &module_name, 220 | uint64_t func_addr) const { 221 | lock_guard _(_mtx); 222 | // Make sure that the object is finalized. 223 | if(!_is_finalized) { 224 | throw runtime_error("FctReturnValuesFile object was not finalized."); 225 | } 226 | 227 | for(const auto &it : _ext_return_values) { 228 | if(it.ext_func->addr == func_addr 229 | && it.ext_func->module_name == module_name) { 230 | return &(it.func_return_values); 231 | } 232 | } 233 | return nullptr; 234 | } 235 | 236 | 237 | ExtReturnValues FctReturnValuesFile::get_return_values() const { 238 | lock_guard _(_mtx); 239 | 240 | // Make sure that the object is finalized. 241 | if(!_is_finalized) { 242 | throw runtime_error("FctReturnValuesFile object was not finalized."); 243 | } 244 | 245 | return _ext_return_values; 246 | } 247 | 248 | 249 | bool FctReturnValuesFile::is_finalized_ext_return_values() const { 250 | lock_guard _(_mtx); 251 | return _is_finalized; 252 | } 253 | 254 | 255 | void FctReturnValuesFile::finalize_ext_return_values() { 256 | lock_guard _(_mtx); 257 | 258 | // Make sure that the object is finalized. 259 | if(_is_finalized) { 260 | throw runtime_error("FctReturnValuesFile object is finalized."); 261 | } 262 | 263 | // Set up a map that contains only pointer to .plt entry return values 264 | for(uint32_t i = 0; i < _ext_return_values.size(); i++) { 265 | ExternalFctReturnValues &ext_ret_value = _ext_return_values[i]; 266 | 267 | const PltEntry *plt_entry; 268 | plt_entry = _module_plt.get_plt_entry(ext_ret_value.ext_func->name); 269 | if(plt_entry == nullptr) { 270 | continue; 271 | } 272 | 273 | // Use the plt address as function address for the return value. 274 | ext_ret_value.func_return_values.func_addr = plt_entry->addr; 275 | 276 | // Set all function addresses of the return values to the plt entry. 277 | for(auto &it : ext_ret_value.func_return_values.return_values) { 278 | it.func_addr = plt_entry->addr; 279 | } 280 | 281 | _plt_return_values_ptr_map[plt_entry->addr] = 282 | &(ext_ret_value.func_return_values); 283 | } 284 | 285 | _is_finalized = true; 286 | } 287 | -------------------------------------------------------------------------------- /src/serialization.cpp: -------------------------------------------------------------------------------- 1 | #include "serialization.h" 2 | 3 | 4 | using namespace std; 5 | 6 | 7 | void serialize(ExpressionPtr exp, ostream &output) { 8 | 9 | switch(exp->type()) { 10 | 11 | case ExpressionUnknown: { 12 | output.put(ExpressionUnknown); 13 | break; 14 | } 15 | 16 | case ExpressionConstant: { 17 | output.put(ExpressionConstant); 18 | Constant &temp = static_cast(*exp); 19 | uint64_t value = temp.value(); 20 | output.write(reinterpret_cast(&value), 21 | sizeof(value)); 22 | break; 23 | } 24 | 25 | case ExpressionSymbolic: { 26 | output.put(ExpressionSymbolic); 27 | Symbolic &temp = static_cast(*exp); 28 | // Length + 1 to have \0 at the end. 29 | output.write(temp.name().c_str(), 30 | temp.name().length() + 1); 31 | break; 32 | } 33 | 34 | case ExpressionTemporary: { 35 | output.put(ExpressionTemporary); 36 | Temporary &temp = static_cast(*exp); 37 | uint32_t id = temp.id(); 38 | output.write(reinterpret_cast(&id), 39 | sizeof(id)); 40 | break; 41 | } 42 | 43 | case ExpressionRegister: { 44 | output.put(ExpressionRegister); 45 | Register &temp = static_cast(*exp); 46 | uint32_t offset = temp.offset(); 47 | output.write(reinterpret_cast(&offset), 48 | sizeof(offset)); 49 | break; 50 | } 51 | 52 | case ExpressionIndirection: { 53 | output.put(ExpressionIndirection); 54 | Indirection &temp = static_cast(*exp); 55 | serialize(temp.address(), output); 56 | break; 57 | } 58 | 59 | case ExpressionOperation: { 60 | output.put(ExpressionOperation); 61 | Operation &temp = static_cast(*exp); 62 | output.put(temp.operation()); 63 | serialize(temp.lhs(), output); 64 | serialize(temp.rhs(), output); 65 | break; 66 | } 67 | 68 | default: 69 | throw runtime_error("Do not know how to serialize "\ 70 | "expression type."); 71 | } 72 | } 73 | 74 | 75 | ExpressionPtr unserialize(istream &input) { 76 | 77 | switch(input.get()) { 78 | 79 | case ExpressionUnknown: { 80 | Unknown temp; 81 | return make_shared(temp); 82 | break; 83 | } 84 | 85 | case ExpressionConstant: { 86 | uint64_t value; 87 | input.read(reinterpret_cast(&value), 88 | sizeof(value)); 89 | Constant temp(value); 90 | return make_shared(temp); 91 | } 92 | 93 | case ExpressionSymbolic: { 94 | string name; 95 | // Read C-like string. 96 | getline(input, name, '\0'); 97 | Symbolic temp(name); 98 | return make_shared(temp); 99 | } 100 | 101 | case ExpressionTemporary: { 102 | uint32_t id; 103 | input.read(reinterpret_cast(&id), 104 | sizeof(id)); 105 | Temporary temp(id); 106 | return make_shared(temp); 107 | } 108 | 109 | case ExpressionRegister: { 110 | uint32_t offset; 111 | input.read(reinterpret_cast(&offset), 112 | sizeof(offset)); 113 | Register temp(offset); 114 | return make_shared(temp); 115 | } 116 | 117 | case ExpressionIndirection: { 118 | Indirection temp(unserialize(input)); 119 | return make_shared(temp); 120 | break; 121 | } 122 | 123 | case ExpressionOperation: { 124 | OperationType op_type = static_cast(input.get()); 125 | Operation temp(unserialize(input), 126 | op_type, 127 | unserialize(input)); 128 | return make_shared(temp); 129 | break; 130 | } 131 | 132 | default: 133 | break; 134 | } 135 | 136 | throw runtime_error("Do not know how to unserialize "\ 137 | "expression type."); 138 | } 139 | -------------------------------------------------------------------------------- /src/state.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "state.h" 3 | 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | map> State::_initial_values = [] { 10 | map> result; 11 | 12 | for(const auto &r : AMD64_REGISTERS) { 13 | const auto &initial = make_shared(format_initial_value(r)); 14 | result[r] = initial; 15 | } 16 | 17 | return result; 18 | }(); 19 | 20 | /*! 21 | * \brief Constructs a new `State` and initializes it (unless specified 22 | * otherwise). 23 | * 24 | * \param initialize `True` per default. Whether the state should be fully 25 | * initialized. \see `set_initial_state` 26 | * 27 | * \todo This is specific to x86_64. Possibly better to provide a generic 28 | * base class first. 29 | */ 30 | State::State(bool initialize) 31 | : _unknown(make_shared()) { 32 | if(initialize) { 33 | set_initial_state(); 34 | } 35 | } 36 | 37 | /*! 38 | * \brief Prints the state to the given output stream. 39 | * \param stream The output stream to which the state is printed. 40 | * \param state The `State` itself. 41 | * \return The (modified) output stream `stream`. 42 | */ 43 | ostream &operator<<(ostream &stream, const State &state) { 44 | for(const auto &kv : state._state) { 45 | stream << *kv.first << " -> " << *kv.second << endl; 46 | } 47 | 48 | return stream; 49 | } 50 | 51 | const string State::format_initial_value(size_t offset) { 52 | // TODO: Precompute this. 53 | const auto &needle = AMD64_DISPLAY_REGISTERS.find(offset); 54 | if(needle != AMD64_DISPLAY_REGISTERS.cend()) { 55 | return "init_" + needle->second; 56 | } 57 | 58 | stringstream stream; 59 | string symbolic_value; 60 | 61 | stream << "init_r" << dec << offset; 62 | stream >> symbolic_value; 63 | return symbolic_value; 64 | } 65 | 66 | /*! 67 | * \brief Formats a return value of a call at the given address. 68 | * \param address The address of the call whose return value shall be formatted. 69 | * \return A unique string to be used for a symbol describing the return value 70 | * of the call. 71 | */ 72 | const string State::format_return_value(uintptr_t address) { 73 | stringstream stream; 74 | stream << "return_" << hex << address; 75 | 76 | return stream.str(); 77 | } 78 | 79 | /*! 80 | * \brief Initializes the state in respect to x86_64 registers. 81 | * 82 | * Each register gets assigned a symbol depicting its initial value. 83 | * 84 | * \see `State::format_initial_value` 85 | */ 86 | void State::set_initial_state() { 87 | for(const auto &r : AMD64_REGISTERS) { 88 | // Copy necessary here? 89 | const auto &dst = make_shared(r); 90 | const auto &src = make_shared(format_initial_value(r)); 91 | 92 | _state[dst] = src; 93 | } 94 | } 95 | 96 | /*! 97 | * \brief Removes all System V scratch registers from the state. 98 | * 99 | * \todo Support different calling conventions. 100 | * \see `system_v_scratch` 101 | */ 102 | void State::purge_scratch_registers(FileFormatType file_format) { 103 | switch(file_format) { 104 | case FileFormatELF64: 105 | for(const auto &scratch : system_v_scratch) { 106 | _state.erase(scratch); 107 | } 108 | break; 109 | case FileFormatPE64: 110 | for(const auto &scratch : msvc_scratch) { 111 | _state.erase(scratch); 112 | } 113 | break; 114 | default: 115 | throw runtime_error("Do not know how to "\ 116 | "handle file format."); 117 | } 118 | } 119 | 120 | /*! 121 | * \brief Merge another `State` into this one. 122 | * 123 | * Already existing entries are overwritten with the values of the other state. 124 | * 125 | * \param other The state that is merged into this. 126 | */ 127 | void State::merge(const State &other) { 128 | for(const auto &kv : other._state) { 129 | _state[kv.first] = kv.second; 130 | } 131 | } 132 | 133 | /*! 134 | * \brief Helper function to return all memory indirections recorded in the 135 | * state. 136 | * 137 | * The first entry of the pair denotes the memory address, whereas the second 138 | * entry denotes the value that is to be written. 139 | * 140 | * \return A vector of key/value pairs describing a memory access. 141 | */ 142 | const Expressions State::get_memory_accesses() const { 143 | Expressions result; 144 | for(const auto &kv : _state) { 145 | if(kv.first->type() == ExpressionIndirection) { 146 | result.push_back(kv); 147 | } 148 | } 149 | 150 | return result; 151 | } 152 | 153 | /*! 154 | * \brief Helper function to optimize the representation of the state. 155 | * \param do_purge_unchanged `true`, if unchanged registers (those still set to 156 | * their initial value) shall be removed from state. 157 | * 158 | * \todo Purging unchanged registers may lead to issues regarding the binding 159 | * of `rsp`. It is assumed to be set in some cases. We should rather check for 160 | * existence and throw `runtime_error` on mismatch. 161 | */ 162 | void State::optimize(bool do_purge_unchanged) { 163 | // Transitively kill expressions affected by a self-reference. 164 | for(const auto &kv: _state) { 165 | if(kv.second->contains(*kv.first)) { 166 | kill(kv.first, kv.second); 167 | } 168 | } 169 | 170 | /* Purging unchanged registers will fail when propagating states (e.g., 171 | * an AbiHint requires rsp to be defined and cannot implement calling 172 | * conventions properly if it has been purged. Disabled by default. 173 | */ 174 | if(optimizer() && do_purge_unchanged) { 175 | purge_unchanged(); 176 | } 177 | } 178 | 179 | bool State::optimizer(bool do_purge_unchanged) { 180 | bool dirty = false; 181 | if(do_purge_unchanged) { 182 | dirty |= purge_unchanged(); 183 | } 184 | 185 | dirty |= purge_uninteresting(); 186 | 187 | optimize_entries(); 188 | dirty |= propagate(); 189 | 190 | return dirty; 191 | } 192 | 193 | bool State::propagate() { 194 | bool dirty = false; 195 | 196 | // Propagate any values which are also keys in the same state. 197 | for(const auto &kv : _state) { 198 | const auto &value = kv.second; 199 | 200 | const auto &needle = _state.find(value); 201 | if(needle != _state.cend()) { 202 | _state[kv.first] = needle->second; 203 | dirty = true; 204 | } 205 | } 206 | 207 | // Propagate sub-expressions. 208 | for(const auto &kv : _state) { 209 | for(const auto &p : _state) { 210 | dirty |= p.first->propagate(kv.first, kv.second); 211 | dirty |= p.second->propagate(kv.first, kv.second); 212 | } 213 | } 214 | 215 | return dirty; 216 | } 217 | 218 | void State::optimize_entries() { 219 | for(auto i = _state.begin(); i != _state.end(); ++i) { 220 | i->first->optimize(); 221 | i->second->optimize(); 222 | } 223 | } 224 | 225 | /* Set explicitly to Unknown instead of deleting and keep Unknown:s? As not to 226 | * mess up logic trying to get a value regardless. Need to think about this. 227 | */ 228 | bool State::purge_uninteresting() { 229 | bool dirty = false; 230 | 231 | for(auto i = _state.begin(); i != _state.end();) { 232 | Expression &key = *i->first; 233 | Expression &value = *i->second; 234 | 235 | if(key.type() == ExpressionTemporary) { 236 | i = _state.erase(i); 237 | dirty = true; 238 | continue; 239 | } 240 | 241 | // We want to keep Unknown:s for register values only. 242 | if(value.type() == ExpressionUnknown && 243 | key.type() != ExpressionRegister) { 244 | i = _state.erase(i); 245 | dirty = true; 246 | continue; 247 | } 248 | 249 | if(key.type() == ExpressionRegister) { 250 | auto reg = static_cast(key); 251 | if(reg.offset() > OFFB_R15 && reg.offset() != OFFB_RIP) { 252 | 253 | i = _state.erase(i); 254 | dirty = true; 255 | continue; 256 | } 257 | } 258 | 259 | ++i; 260 | } 261 | 262 | return dirty; 263 | } 264 | 265 | bool State::purge_unchanged() { 266 | bool dirty = false; 267 | 268 | for(auto i = _state.begin(); i != _state.end();) { 269 | Expression &key = *i->first; 270 | Expression &value = *i->second; 271 | 272 | if(key.type() == ExpressionRegister) { 273 | auto offset = static_cast(key).offset(); 274 | const auto &initial = _initial_values.find(offset); 275 | 276 | if(initial != _initial_values.cend()) { 277 | if(*initial->second == value) { 278 | i = _state.erase(i); 279 | dirty = true; 280 | continue; 281 | } 282 | } 283 | } 284 | 285 | ++i; 286 | } 287 | 288 | return dirty; 289 | } 290 | 291 | InternalState::iterator State::erase(const InternalState::iterator &iterator) { 292 | return _state.erase(iterator); 293 | } 294 | 295 | size_t State::erase(const InternalState::key_type &key) { 296 | return _state.erase(key); 297 | } 298 | 299 | bool State::find(const InternalState::key_type &key, 300 | arg_out InternalState::iterator &iterator) { 301 | InternalState::iterator needle = _state.find(key); 302 | if(needle == _state.end()) { 303 | return false; 304 | } 305 | 306 | iterator = needle; 307 | return true; 308 | } 309 | 310 | bool State::find(const InternalState::key_type &key, 311 | arg_out InternalState::const_iterator &iterator) const { 312 | InternalState::const_iterator needle = _state.find(key); 313 | if(needle == _state.cend()) { 314 | return false; 315 | } 316 | 317 | iterator = needle; 318 | return true; 319 | } 320 | 321 | kill_results State::kill_helper(const ExpressionPtr &key, 322 | const ExpressionPtr &value) { 323 | kill_results affected; 324 | 325 | for(auto i = _state.begin(), e = _state.end(); i != e; ++i) { 326 | if(i->second->contains(*key) || i->second->contains(*value)) { 327 | if(i->second->type() != ExpressionUnknown) { 328 | affected.insert(i->first); 329 | i->second = _unknown; 330 | } 331 | } 332 | } 333 | 334 | return affected; 335 | } 336 | 337 | void State::kill(const ExpressionPtr &key, const ExpressionPtr &value) { 338 | _state[key] = _unknown; 339 | 340 | kill_results affected = { key }; 341 | while(!affected.empty()) { 342 | kill_results work_list; 343 | 344 | for(const auto &a : affected) { 345 | const auto &killed = kill_helper(a, value); 346 | 347 | for(const auto &k : killed) { 348 | work_list.insert(k); 349 | } 350 | } 351 | 352 | affected = work_list; 353 | } 354 | } 355 | 356 | void State::update(const InternalState::key_type &key, 357 | const InternalState::mapped_type &value) { 358 | _state[key] = value; 359 | } 360 | -------------------------------------------------------------------------------- /src/vcall.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "vcall.h" 3 | #include "expression.h" 4 | 5 | using namespace std; 6 | 7 | 8 | VCallFile::VCallFile(const string &module_name, 9 | const VTableHierarchies &vtable_hierarchies, 10 | const VTableFile &vtable_file) 11 | : _module_name(module_name), 12 | _vtable_hierarchies(vtable_hierarchies), 13 | _vtable_file(vtable_file) {} 14 | 15 | 16 | const VCalls &VCallFile::get_vcalls() const { 17 | lock_guard _(_mtx); 18 | 19 | return _vcalls; 20 | } 21 | 22 | 23 | void VCallFile::add_possible_vcall(uint64_t addr) { 24 | lock_guard _(_mtx); 25 | 26 | _possible_vcalls.insert(addr); 27 | } 28 | 29 | 30 | void VCallFile::add_vcall(uint64_t addr, uint32_t index, size_t entry_index) { 31 | lock_guard _(_mtx); 32 | 33 | // Check if virtual callsite is already known. 34 | for(auto &it : _vcalls) { 35 | if(it.addr == addr) { 36 | it.indexes.insert(index); 37 | 38 | // Do a sanity check that the entry indexes have not changed. 39 | // (Intuition: Can never be different for the same vcall). 40 | if(it.entry_index != entry_index) { 41 | cerr << "Different entry index at vcall 0x" 42 | << hex << addr << endl; 43 | cerr << "Old entry index: " 44 | << dec << it.entry_index << endl; 45 | cerr << "New entry index: " 46 | << dec << entry_index << endl; 47 | throw runtime_error("Different vtable entry indexes "\ 48 | "for same vcall."); 49 | } 50 | 51 | return; 52 | } 53 | } 54 | 55 | VCall vcall; 56 | vcall.indexes.insert(index); 57 | vcall.addr = addr; 58 | vcall.entry_index = entry_index; 59 | _vcalls.push_back(vcall); 60 | } 61 | 62 | 63 | void VCallFile::export_vcalls(const string &target_dir) { 64 | lock_guard _(_mtx); 65 | 66 | stringstream temp_str; 67 | temp_str << target_dir << "/" << _module_name << ".vcalls"; 68 | string target_file = temp_str.str(); 69 | 70 | ofstream vcall_file; 71 | vcall_file.open(target_file); 72 | 73 | stringstream temp_str_ext; 74 | temp_str_ext << target_dir << "/" << _module_name << ".vcalls_extended"; 75 | string target_file_ext = temp_str_ext.str(); 76 | 77 | ofstream vcall_file_ext; 78 | vcall_file_ext.open(target_file_ext); 79 | 80 | vcall_file << _module_name << endl; 81 | vcall_file_ext << _module_name << endl; 82 | 83 | const HierarchiesVTable &hierarchies = 84 | _vtable_hierarchies.get_hierarchies(); 85 | for(const auto &it : _vcalls) { 86 | 87 | // Do not consider all vtables used in this vcall as in one hierarchy. 88 | unordered_set allowed_vtables; 89 | for(const auto idx : it.indexes) { 90 | for(const auto dependent_vtbls : hierarchies) { 91 | if(dependent_vtbls.find(idx) != dependent_vtbls.cend()) { 92 | for(uint32_t hier_idx : dependent_vtbls) { 93 | allowed_vtables.insert(hier_idx); 94 | } 95 | } 96 | } 97 | 98 | // Add vtable index manually afterwards in order to also export 99 | // vtables that do not belong to a hierarchy. 100 | allowed_vtables.insert(idx); 101 | } 102 | 103 | // Address of vcall in module. 104 | vcall_file << hex << it.addr; 105 | vcall_file_ext << hex << it.addr; 106 | 107 | // Index into vtable that is used by vcall. 108 | vcall_file_ext << " " << hex << it.entry_index; 109 | 110 | // Export the hierarchy in the following format: 111 | // 112 | for(const auto idx : allowed_vtables) { 113 | const VTable& temp = _vtable_file.get_vtable(idx); 114 | 115 | // Export vtable address. 116 | vcall_file << " " 117 | << temp.module_name 118 | << ":" 119 | << hex << temp.addr; 120 | vcall_file_ext << " " 121 | << temp.module_name 122 | << ":" 123 | << hex << temp.addr; 124 | 125 | // Export target function address. 126 | uint64_t target_func = 0; 127 | if(temp.entries.size() > it.entry_index) { 128 | target_func = temp.entries.at(it.entry_index); 129 | } 130 | vcall_file_ext << " " 131 | << temp.module_name 132 | << ":" 133 | << hex << target_func; 134 | } 135 | 136 | vcall_file << endl; 137 | vcall_file_ext << endl; 138 | } 139 | 140 | vcall_file.close(); 141 | vcall_file_ext.close(); 142 | 143 | stringstream temp_str_poss; 144 | temp_str_poss << target_dir << "/" << _module_name << ".vcalls_possible"; 145 | string target_file_poss = temp_str_poss.str(); 146 | 147 | ofstream vcall_file_poss; 148 | vcall_file_poss.open(target_file_poss); 149 | 150 | vcall_file_poss << _module_name << endl; 151 | 152 | for(const auto &it : _possible_vcalls) { 153 | 154 | // Address of possible vcall in module. 155 | vcall_file_poss << hex << it << endl; 156 | } 157 | 158 | vcall_file_poss.close(); 159 | } 160 | -------------------------------------------------------------------------------- /src/vex.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "vex.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | Vex::Vex() 13 | : _block(nullptr) { 14 | 15 | AllocationListener listener = &Vex::incoming_allocation; 16 | LibVEX_registerAllocationListener(this, listener); 17 | 18 | LibVEX_default_VexControl(&_control); 19 | 20 | _control.iropt_level = 2; 21 | _control.iropt_verbosity = 0; 22 | _control.iropt_unroll_thresh = 0; 23 | 24 | _control.guest_chase_thresh = 0; 25 | _control.guest_max_insns = MAX_INSTRUCTIONS; 26 | 27 | LibVEX_Init(&Vex::failure_exit, &Vex::log_bytes, 0, &_control); 28 | } 29 | 30 | Vex::~Vex() { 31 | for(auto allocation : _allocations) { 32 | free(allocation); 33 | } 34 | } 35 | 36 | void Vex::incoming_allocation(void *user, void *allocation) { 37 | auto self = reinterpret_cast(user); 38 | self->manage_allocation(allocation); 39 | } 40 | 41 | void Vex::manage_allocation(void *allocation) { 42 | _allocations.push_back(allocation); 43 | } 44 | 45 | // FIXME: This is specific to AMD-64. 46 | void Vex::initialize() { 47 | memset(&_args, 0, sizeof(_args)); 48 | memset(&_abi_info, 0, sizeof(_abi_info)); 49 | memset(&_arch_info, 0, sizeof(_arch_info)); 50 | 51 | LibVEX_default_VexAbiInfo(&_abi_info); 52 | LibVEX_default_VexArchInfo(&_arch_info); 53 | 54 | _abi_info.guest_amd64_assume_fs_is_const = true; 55 | _abi_info.guest_amd64_assume_gs_is_const = true; 56 | 57 | _args.callback_opaque = this; 58 | 59 | _args.instrument1 = &Vex::instrument; 60 | _args.chase_into_ok = &Vex::chase_into_ok; 61 | _args.needs_self_check = &Vex::needs_self_check; 62 | 63 | const auto dispatch = reinterpret_cast(&Vex::dispatch); 64 | _args.disp_cp_chain_me_to_fastEP = dispatch; 65 | _args.disp_cp_chain_me_to_slowEP = dispatch; 66 | _args.disp_cp_xassisted = dispatch; 67 | _args.disp_cp_xindir = dispatch; 68 | 69 | _args.guest_extents = &_guest_extents; 70 | } 71 | 72 | void Vex::initialize_amd64() { 73 | initialize(); 74 | 75 | _arch_info.endness = VexEndnessLE; 76 | _arch_info.hwcaps = VEX_HWCAPS_AMD64_SSE3 | 77 | VEX_HWCAPS_AMD64_CX16 | 78 | VEX_HWCAPS_AMD64_LZCNT | 79 | VEX_HWCAPS_AMD64_AVX | 80 | VEX_HWCAPS_AMD64_RDTSCP | 81 | VEX_HWCAPS_AMD64_BMI | 82 | VEX_HWCAPS_AMD64_AVX2; 83 | 84 | _abi_info.guest_stack_redzone_size = 128; 85 | 86 | _args.arch_host = VexArchAMD64; 87 | _args.arch_guest = VexArchAMD64; 88 | 89 | _args.archinfo_host = _arch_info; 90 | _args.archinfo_guest = _arch_info; 91 | 92 | _args.abiinfo_both = _abi_info; 93 | } 94 | 95 | void Vex::log_bytes(const char *bytes, size_t number_bytes) { 96 | for(auto i = 0u; i < number_bytes; ++i) { 97 | printf("%c", bytes[i]); 98 | } 99 | } 100 | 101 | IRSB *Vex::instrument(void *callback_opaque, IRSB *block, 102 | const VexGuestLayout*, const VexGuestExtents*, 103 | const VexArchInfo*, IRType, IRType) { 104 | 105 | Vex &self = *static_cast(callback_opaque); 106 | self._block = deepCopyIRSB(block); 107 | 108 | return block; 109 | } 110 | 111 | /*! 112 | * \brief Translates bytes at a certain address into a VEX block of type IRSB. 113 | * 114 | * Translates the bytes given by array `bytes` which is assumed to lie at 115 | * virtual address `guest_address`. Outputs the virtual address of the end of 116 | * the translated block in parameter `vex_block_end`. 117 | * 118 | * \param bytes The bytes that are to be processed. 119 | * \param guest_address The virtual address the bytes originally lie at. 120 | * \param instruction_count The number of instructions VEX shall translate. 121 | * \param[out] vex_block_end The virtual address of the end of the translated 122 | * block. 123 | * \return A reference to the translated VEX block (of type IRSB). Due to the 124 | * way VEX works internally, this reference lives as long as no further 125 | * translation request is made and hence should be deep-copied immediately. 126 | * 127 | * \todo VEX may not respect `instruction_count` properly. This should be 128 | * handled by the `Translator` class though. 129 | */ 130 | const IRSB &Vex::translate(const uint8_t *bytes, uintptr_t guest_address, 131 | size_t instruction_count, 132 | arg_out uintptr_t *vex_block_end) { 133 | initialize_amd64(); 134 | _control.guest_max_insns = instruction_count; 135 | 136 | _args.guest_bytes = bytes; 137 | _args.guest_bytes_addr = guest_address; 138 | 139 | const auto result = LibVEX_Translate(&_args); 140 | if(!(result.status & result.VexTransOK)) { 141 | stringstream stream; 142 | stream << "Cannot translate code at address " 143 | << hex << reinterpret_cast(bytes) 144 | << " (guest address " << hex << guest_address << ")."; 145 | 146 | throw runtime_error(stream.str()); 147 | } 148 | 149 | if(vex_block_end) { 150 | // FIXME: Assert only one guest extent was used. 151 | *vex_block_end = guest_address + _args.guest_extents->len[0]; 152 | } 153 | 154 | return *_block; 155 | } 156 | -------------------------------------------------------------------------------- /src/vtable_file.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "vtable_file.h" 3 | 4 | using namespace std; 5 | 6 | /*! 7 | * \brief Constructs a new `VtableFile` object. 8 | * \param vtable_file The filename of the `_vtables.txt` file (as produced 9 | * by the exporter script). 10 | */ 11 | VTableFile::VTableFile(const string &this_module_name) { 12 | _this_module_name = this_module_name; 13 | _vtables.clear(); 14 | _index = 0; 15 | } 16 | 17 | bool VTableFile::parse(const string &vtables_file) { 18 | 19 | // Make sure that we parse files only if object was not finalized yet. 20 | if(_is_finalized) { 21 | throw runtime_error("Parse attempt after VTableFile object was"\ 22 | " finalized."); 23 | } 24 | 25 | ifstream file(vtables_file + "_vtables.txt"); 26 | if(!file) { 27 | return false; 28 | } 29 | 30 | string line; 31 | 32 | // Parse first line manually. 33 | getline(file, line); 34 | istringstream header_parser(line); 35 | 36 | // First entry of file is always the module name. 37 | string module_name; 38 | header_parser >> module_name; 39 | if(header_parser.fail()) { 40 | return false; 41 | } 42 | 43 | // Check if we already parsed a vtables file for this module. 44 | if(_managed_modules.find(module_name) != _managed_modules.cend()) { 45 | throw runtime_error("A vtables file for this module was already "\ 46 | "parsed."); 47 | } 48 | 49 | bool has_vtables = false; 50 | while(getline(file, line)) { 51 | has_vtables = true; 52 | istringstream parser(line); 53 | uint64_t vtable_addr = 0; 54 | uint64_t vtable_entry = 0; 55 | int offset_to_top = 0; 56 | 57 | parser >> hex >> vtable_addr; 58 | if(parser.fail()) { 59 | return false; 60 | } 61 | 62 | parser >> dec >> offset_to_top; 63 | if(parser.fail()) { 64 | return false; 65 | } 66 | 67 | VTable vtable; 68 | vtable.addr = vtable_addr; 69 | vtable.offset_to_top = offset_to_top; 70 | vtable.module_name = module_name; 71 | 72 | // NOTE: Index is a unique identifier for all vtables in all modules. 73 | vtable.index = _index; 74 | 75 | while(parser >> hex >> vtable_entry) { 76 | if(parser.fail()) { 77 | return false; 78 | } 79 | 80 | vtable.entries.push_back(vtable_entry); 81 | } 82 | 83 | _vtables.push_back(vtable); 84 | assert(_vtables[_index].module_name == vtable.module_name 85 | && _vtables[_index].addr == vtable.addr 86 | && _vtables[_index].index == vtable.index 87 | && "Index of vtable and index in vector are not the same."); 88 | 89 | _index++; 90 | } 91 | 92 | // Only add module to managed modules if it has at least one vtable. 93 | if(has_vtables) { 94 | _managed_modules.insert(module_name); 95 | } 96 | 97 | return true; 98 | } 99 | 100 | 101 | const VTableMap& VTableFile::get_this_vtables() const { 102 | 103 | // Make sure that the object is finalized. 104 | if(!_is_finalized) { 105 | throw runtime_error("VTableFile object was not finalized."); 106 | } 107 | 108 | return *(_module_vtables_map.at(_this_module_name)); 109 | } 110 | 111 | 112 | const VTableMap& VTableFile::get_vtables(const string &module_name) const { 113 | 114 | // Make sure that the object is finalized. 115 | if(!_is_finalized) { 116 | throw runtime_error("VTableFile object was not finalized."); 117 | } 118 | 119 | if(_module_vtables_map.find(module_name) == _module_vtables_map.cend()) { 120 | throw runtime_error("VTableFile object does not know module name."); 121 | } 122 | 123 | return *(_module_vtables_map.at(module_name)); 124 | } 125 | 126 | 127 | const VTableVector& VTableFile::get_all_vtables() const { 128 | 129 | // Make sure that the object is finalized. 130 | if(!_is_finalized) { 131 | throw runtime_error("VTableFile object was not finalized."); 132 | } 133 | 134 | return _vtables; 135 | } 136 | 137 | 138 | void VTableFile::finalize() { 139 | 140 | // Make sure that we only finalize this object once. 141 | if(_is_finalized) { 142 | throw runtime_error("VTableFile object was already finalized."); 143 | } 144 | _is_finalized = true; 145 | 146 | if(_managed_modules.find(_this_module_name) == _managed_modules.cend()) { 147 | throw runtime_error("VTableFile object has no data for the "\ 148 | "module to analyze."); 149 | } 150 | 151 | // Build up a vector that contains a mapping for each module 152 | // that maps from vtable address to vtable object. 153 | uint32_t idx = 0; 154 | for(auto &module_it : _managed_modules) { 155 | for(auto &vtbl_it : _vtables) { 156 | if(vtbl_it.module_name != module_it) { 157 | continue; 158 | } 159 | 160 | if(_module_vtables.size() <= idx) { 161 | VTableMap temp; 162 | temp[vtbl_it.addr] = &vtbl_it; 163 | _module_vtables.push_back(temp); 164 | } 165 | else { 166 | _module_vtables[idx][vtbl_it.addr] = &vtbl_it; 167 | } 168 | } 169 | idx++; 170 | } 171 | 172 | // Build up a mapping that maps a module name to its vtable address 173 | // to vtable object map. 174 | idx = 0; 175 | for(auto &module_it : _managed_modules) { 176 | _module_vtables_map[module_it] = &_module_vtables[idx]; 177 | idx++; 178 | } 179 | 180 | // Sanity check if module mapping is completely correct 181 | // (Added for now to exclude this as error source) 182 | for(auto &module_it : _managed_modules) { 183 | const auto &vtable_map = *(_module_vtables_map.at(module_it)); 184 | for(const auto &vtbl_kv : vtable_map) { 185 | if(vtbl_kv.second->module_name != module_it) { 186 | throw runtime_error("Error while finalizing vtable mapping."); 187 | } 188 | } 189 | } 190 | 191 | return; 192 | } 193 | 194 | 195 | bool VTableFile::is_finalized() const { 196 | return _is_finalized; 197 | } 198 | 199 | 200 | const VTable* VTableFile::get_vtable_ptr(const std::string &module_name, 201 | uint64_t addr) const { 202 | 203 | // Make sure that the object is finalized. 204 | if(!_is_finalized) { 205 | throw runtime_error("VTableFile object was not finalized."); 206 | } 207 | 208 | if(_module_vtables_map.at(module_name)->find(addr) 209 | != _module_vtables_map.at(module_name)->cend()) { 210 | 211 | return (_module_vtables_map.at(module_name)->at(addr)); 212 | } 213 | return nullptr; 214 | } 215 | 216 | 217 | const VTable& VTableFile::get_vtable(const std::string &module_name, 218 | uint64_t addr) const { 219 | 220 | // Make sure that the object is finalized. 221 | if(!_is_finalized) { 222 | throw runtime_error("VTableFile object was not finalized."); 223 | } 224 | 225 | return *(_module_vtables_map.at(module_name)->at(addr)); 226 | 227 | } 228 | 229 | 230 | const VTable& VTableFile::get_vtable(uint32_t index) const { 231 | 232 | // Make sure that the object is finalized. 233 | if(!_is_finalized) { 234 | throw runtime_error("VTableFile object was not finalized."); 235 | } 236 | 237 | if(_vtables.size() <= index) { 238 | throw runtime_error("Vtable index is out of range."); 239 | } 240 | 241 | return _vtables[index]; 242 | } 243 | -------------------------------------------------------------------------------- /src/vtable_update.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "vtable_update.h" 3 | 4 | 5 | using namespace std; 6 | 7 | #define DEBUG_PRINT_UPDATES 0 8 | 9 | FctVTableUpdates::FctVTableUpdates(VTableFile &vtable_file, 10 | const string &module_name) 11 | : _vtable_file(vtable_file), 12 | _module_name(module_name) { 13 | 14 | } 15 | 16 | 17 | void FctVTableUpdates::add_vtable_updates(uint64_t fct_addr, 18 | const VTableUpdates &vtable_updates) { 19 | lock_guard _(_mtx); 20 | 21 | if(_this_vtable_updates.find(fct_addr) == _this_vtable_updates.cend()) { 22 | _this_vtable_updates[fct_addr] = vtable_updates; 23 | } 24 | else { 25 | for(const auto &it : vtable_updates) { 26 | _this_vtable_updates[fct_addr].push_back(it); 27 | } 28 | } 29 | } 30 | 31 | 32 | void FctVTableUpdates::export_vtable_updates(const string &target_dir) { 33 | lock_guard _(_mtx); 34 | 35 | stringstream temp_str; 36 | temp_str << target_dir << "/" << _module_name << ".vtableupdates"; 37 | string target_file = temp_str.str(); 38 | 39 | ofstream update_file; 40 | update_file.open(target_file); 41 | 42 | update_file << _module_name << endl; 43 | 44 | for(const auto &it : _this_vtable_updates) { 45 | uint64_t fct_addr = it.first; 46 | 47 | // Get all vtable updates that can be exported. 48 | VTableUpdates exportable_vtable_updates; 49 | for(const auto &vtable_update : it.second) { 50 | 51 | // Convert base expression ptr to string for export. 52 | ExpressionPtr base = vtable_update.base; 53 | string base_str; 54 | if(!convert_expression_str(base, base_str)) { 55 | continue; 56 | } 57 | exportable_vtable_updates.push_back(vtable_update); 58 | } 59 | 60 | // Ignore functions that do not have any vtable updates. 61 | if(exportable_vtable_updates.size() == 0) { 62 | continue; 63 | } 64 | 65 | update_file << hex << fct_addr 66 | << " "; 67 | 68 | // Export all vtable updates for this function. 69 | for(const auto &vtable_update : exportable_vtable_updates) { 70 | 71 | // Convert base expression ptr to string for export. 72 | ExpressionPtr base = vtable_update.base; 73 | string base_str; 74 | if(!convert_expression_str(base, base_str)) { 75 | throw runtime_error("Not able to convert vtable update base"\ 76 | "to string."); 77 | } 78 | 79 | const VTable &vtable = _vtable_file.get_vtable(vtable_update.index); 80 | uint64_t vtable_addr = vtable.addr; 81 | const string module_name = vtable.module_name; 82 | size_t offset = vtable_update.offset; 83 | 84 | update_file << module_name 85 | << ":" 86 | << hex << vtable_addr 87 | << ":" 88 | << base_str 89 | << ":" 90 | << dec << offset 91 | << " "; 92 | 93 | #if DEBUG_PRINT_UPDATES 94 | cout << "Fct Addr: 0x" << hex << fct_addr << endl; 95 | cout << "Module Name: " << module_name << endl; 96 | cout << "VTable Addr: 0x" << hex << vtable_addr << endl; 97 | cout << "Base: " << base_str << endl; 98 | cout << "Offset: 0x" << hex << offset << endl; 99 | #endif 100 | 101 | } 102 | update_file << endl; 103 | } 104 | update_file.close(); 105 | } 106 | 107 | 108 | // Convert expression to string (only consider System V argument register 109 | // for now). 110 | bool FctVTableUpdates::convert_expression_str(ExpressionPtr base, 111 | string &base_str) { 112 | 113 | if(*_rdi == *base) { 114 | base_str = "RDI"; 115 | } 116 | else if(*_rsi == *base) { 117 | base_str = "RSI"; 118 | } 119 | else if(*_rdx == *base) { 120 | base_str = "RDX"; 121 | } 122 | else if(*_rcx == *base) { 123 | base_str = "RCX"; 124 | } 125 | else if(*_r8 == *base) { 126 | base_str = "R8"; 127 | } 128 | else if(*_r9 == *base) { 129 | base_str = "R9"; 130 | } 131 | else { 132 | return false; 133 | } 134 | 135 | return true; 136 | } 137 | 138 | 139 | // Convert string to expression (only consider System V argument register 140 | // for now). 141 | bool FctVTableUpdates::convert_str_expression(const string &base_str, 142 | ExpressionPtr &base) { 143 | 144 | if("RDI" == base_str) { 145 | base = _rdi; 146 | } 147 | else if("RSI" == base_str) { 148 | base = _rsi; 149 | } 150 | else if("RDX" == base_str) { 151 | base = _rdx; 152 | } 153 | else if("RCX" == base_str) { 154 | base = _rcx; 155 | } 156 | else if("R8" == base_str) { 157 | base = _r8; 158 | } 159 | else if("R9" == base_str) { 160 | base = _r9; 161 | } 162 | else { 163 | return false; 164 | } 165 | 166 | return true; 167 | } 168 | 169 | 170 | const VTableUpdates* FctVTableUpdates::get_vtable_updates( 171 | const string &module_name, 172 | uint64_t fct_addr) const { 173 | lock_guard _(_mtx); 174 | 175 | // Differentiate between the module that is currently analyzed and 176 | // the imported modules. 177 | if(_module_name == module_name) { 178 | if(_this_vtable_updates.find(fct_addr) == _this_vtable_updates.cend()) { 179 | return nullptr; 180 | } 181 | const VTableUpdates *temp = &(_this_vtable_updates.at(fct_addr)); 182 | return temp; 183 | } 184 | 185 | if(_external_vtable_updates.find(module_name) == 186 | _external_vtable_updates.cend()) { 187 | return nullptr; 188 | } 189 | const VTableUpdatesMap &vtable_updates_map = 190 | _external_vtable_updates.at(module_name); 191 | 192 | if(vtable_updates_map.find(fct_addr) == vtable_updates_map.cend()) { 193 | return nullptr; 194 | } 195 | const VTableUpdates *temp = &(vtable_updates_map.at(fct_addr)); 196 | return temp; 197 | } 198 | 199 | 200 | void FctVTableUpdates::import_updates(const string &target_file) { 201 | lock_guard _(_mtx); 202 | 203 | ifstream file(target_file + ".vtableupdates"); 204 | if(!file) { 205 | throw runtime_error("Opening vtable update file failed."); 206 | } 207 | 208 | VTableUpdatesMap vtable_updates_map; 209 | string line; 210 | 211 | // Parse first line manually. 212 | getline(file, line); 213 | istringstream header_parser(line); 214 | 215 | // First entry of file is always the module name. 216 | string import_module_name; 217 | header_parser >> import_module_name; 218 | if(header_parser.fail()) { 219 | throw runtime_error("Parsing vtable update file failed."); 220 | } 221 | 222 | // Parse each vtable update line which is given in the following form: 223 | // ... 224 | while(getline(file, line)) { 225 | istringstream parser(line); 226 | string update_entry; 227 | VTableUpdates imported_updates; 228 | 229 | uint64_t fct_addr; 230 | parser >> hex >> fct_addr; 231 | if(parser.fail()) { 232 | throw runtime_error("Parsing vtable update file failed."); 233 | } 234 | 235 | // Parse each vtable update entry which is given in the following form: 236 | // ::: 237 | while(parser >> update_entry) { 238 | if(parser.fail()) { 239 | throw runtime_error("Parsing vtable update file failed."); 240 | } 241 | 242 | string module_name; 243 | string vtable_addr_str; 244 | string arg_reg_str; 245 | string offset_str; 246 | uint64_t vtable_addr; 247 | size_t offset; 248 | 249 | istringstream parser_entry(update_entry); 250 | if(parser_entry.fail()) { 251 | throw runtime_error("Parsing vtable update file failed."); 252 | } 253 | getline(parser_entry, module_name, ':'); 254 | getline(parser_entry, vtable_addr_str, ':'); 255 | getline(parser_entry, arg_reg_str, ':'); 256 | getline(parser_entry, offset_str, ':'); 257 | 258 | istringstream parser_vtable_addr(vtable_addr_str); 259 | if(parser_vtable_addr.fail()) { 260 | throw runtime_error("Parsing vtable update file failed."); 261 | } 262 | parser_vtable_addr >> hex >> vtable_addr; 263 | 264 | istringstream parser_offset(offset_str); 265 | if(parser_offset.fail()) { 266 | throw runtime_error("Parsing vtable update file failed."); 267 | } 268 | parser_offset >> dec >> offset; 269 | 270 | // Convert read data into the local data structure. 271 | const VTable &vtable = _vtable_file.get_vtable(module_name, 272 | vtable_addr); 273 | 274 | ExpressionPtr base; 275 | if(!convert_str_expression(arg_reg_str, base)) { 276 | throw runtime_error("Parsing vtable update file failed."); 277 | } 278 | 279 | VTableUpdate vtable_update; 280 | vtable_update.index = vtable.index; 281 | vtable_update.offset = offset; 282 | vtable_update.base = base; 283 | imported_updates.push_back(vtable_update); 284 | } 285 | vtable_updates_map[fct_addr] = imported_updates; 286 | } 287 | 288 | _external_vtable_updates[import_module_name] = vtable_updates_map; 289 | } 290 | -------------------------------------------------------------------------------- /src/vtv_vcall_gt.cpp: -------------------------------------------------------------------------------- 1 | #include "vtv_vcall_gt.h" 2 | 3 | using namespace std; 4 | 5 | 6 | VTVVcallsFile::VTVVcallsFile(const string &module_name) 7 | : _module_name(module_name) {} 8 | 9 | 10 | void VTVVcallsFile::add_vtv_vcalls(const VTVVcalls &vtv_vcalls) { 11 | 12 | for(const auto &it : vtv_vcalls) { 13 | uint64_t verify_addr = it.second.addr_verify_call; 14 | if(_vtv_vcalls.find(verify_addr) != _vtv_vcalls.cend()) { 15 | for(uint64_t it_addr : it.second.addr_vcalls) { 16 | _vtv_vcalls[verify_addr].addr_vcalls.insert(it_addr); 17 | } 18 | } 19 | else { 20 | VTVVcall temp; 21 | temp.addr_verify_call = verify_addr; 22 | temp.vtbl_obj = nullptr; 23 | temp.addr_vcalls = it.second.addr_vcalls; 24 | _vtv_vcalls[verify_addr] = temp; 25 | } 26 | } 27 | } 28 | 29 | 30 | void VTVVcallsFile::export_vtv_vcalls(const string &target_dir) { 31 | 32 | stringstream temp_str; 33 | temp_str << target_dir << "/" << _module_name << ".vtv_vcalls"; 34 | string target_file = temp_str.str(); 35 | 36 | ofstream vtv_file; 37 | vtv_file.open(target_file); 38 | 39 | vtv_file << _module_name << endl; 40 | 41 | for(const auto &it_vtv : _vtv_vcalls) { 42 | vtv_file << hex << it_vtv.second.addr_verify_call; 43 | 44 | for(uint64_t vcall_addr : it_vtv.second.addr_vcalls) { 45 | vtv_file << " " << hex << vcall_addr; 46 | } 47 | vtv_file << endl; 48 | } 49 | 50 | vtv_file.close(); 51 | } 52 | 53 | 54 | const VTVVcalls& VTVVcallsFile::get_vtv_vcalls() const { 55 | return _vtv_vcalls; 56 | } 57 | --------------------------------------------------------------------------------