├── CMakeLists.txt
├── LICENSE
├── README.md
├── docs
    └── doxygen.config
├── ida_export
    └── export.py
├── ida_import
    ├── ida_import.py
    └── marx.py
├── include
    ├── amd64.h
    ├── amd64_registers.h
    ├── base_analysis.h
    ├── blacklist_functions.h
    ├── block.h
    ├── block_semantics.h
    ├── dump_file.h
    ├── expression.h
    ├── external_functions.h
    ├── function.h
    ├── got.h
    ├── idata.h
    ├── mapped_elf.h
    ├── mapped_pe.h
    ├── memory.h
    ├── module_plt.h
    ├── new_operators.h
    ├── overwrite_analysis.h
    ├── path_builder.h
    ├── pe.h
    ├── return_value.h
    ├── serialization.h
    ├── state.h
    ├── translator.h
    ├── vcall.h
    ├── vcall_types.h
    ├── vex.h
    ├── vtable_file.h
    ├── vtable_hierarchy.h
    ├── vtable_update.h
    └── vtv_vcall_gt.h
├── paper.pdf
├── patch
    └── heap_allocation_patch.diff
├── scripts
    ├── ida_get_all_icalls.py
    ├── ida_get_hierarchies_through_rtti.py
    ├── ida_has_refs.py
    ├── ida_is_subvtable.py
    ├── ida_win_find_blacklist_functions.py
    └── ida_win_get_hierarchies_through_rtti.py
└── src
    ├── base_analysis.cpp
    ├── blacklist_functions.cpp
    ├── block.cpp
    ├── block_semantics.cpp
    ├── dump_file.cpp
    ├── expression.cpp
    ├── external_functions.cpp
    ├── function.cpp
    ├── got.cpp
    ├── idata.cpp
    ├── main.cpp
    ├── mapped_elf.cpp
    ├── mapped_pe.cpp
    ├── module_plt.cpp
    ├── new_operators.cpp
    ├── overwrite_analysis.cpp
    ├── path_builder.cpp
    ├── return_value.cpp
    ├── serialization.cpp
    ├── state.cpp
    ├── translator.cpp
    ├── vcall.cpp
    ├── vex.cpp
    ├── vtable_file.cpp
    ├── vtable_hierarchy.cpp
    ├── vtable_update.cpp
    └── vtv_vcall_gt.cpp


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 3 | project(marx)
 4 | 
 5 | set(CMAKE_CXX_COMPILER "//usr/bin/clang++-3.6")
 6 | 
 7 | add_definitions("-std=c++11")
 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -pedantic -Wextra")
 9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wpointer-arith -Wcast-qual")
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wstrict-prototypes")
11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces")
12 | 
13 | set(CMAKE_BUILD_TYPE Release)
14 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build")
15 | 
16 | file(GLOB SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
17 | file(GLOB HDR_FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h)
18 | 
19 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
20 | 
21 | add_library(lib_vex STATIC IMPORTED GLOBAL)
22 | set_property(TARGET lib_vex PROPERTY
23 |              IMPORTED_LOCATION /usr/local/lib/valgrind/libvex-amd64-linux.a)
24 | 
25 | add_executable(marx ${SRC_FILES} ${HDR_FILES})
26 | target_link_libraries(marx lib_vex pthread)
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Andre Pawlowski (sqall)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Marx
  2 | Uncovering Class Hierarchies in C++ Programs
  3 | 
  4 | This repository holds the programs used for the NDSS 2017 paper [MARX: Uncovering Class Hierarchies in C++ Programs](https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/marx-uncovering-class-hierarchies-c-programs/).
  5 | 
  6 | 
  7 | ## Evaluation Data
  8 | 
  9 | The data used to evaluate Marx is available at [zenodo.org](https://zenodo.org/record/238686).
 10 | 
 11 | 
 12 | ## Organization
 13 | 
 14 | Folder structure as follows:
 15 | - `include` contains the header files,
 16 | - `src` contains the source code itself,
 17 | - `patch` contains a patch to enable manual memory management for VEX,
 18 | - `ida_export` contains an IDA script creating a `.dmp` file,
 19 | - `scripts` contains helper scripts.
 20 | 
 21 | 
 22 | ## Development Setup
 23 | 
 24 | Requires CMake, at least version 2.8. As for IDEs, QtCreator works pretty well
 25 | for C++ development and contains an useful debugger.
 26 | 
 27 | When using QtCreator, simply click on "Open Project" and select
 28 | `CMakeLists.txt`. It makes sense to let the build directory point to a
 29 | directory called `build` inside the project's root directory (in case your IDE
 30 | does not honor CMake's `RUNTIME_OUTPUT_DIRECTORY` variable).
 31 | 
 32 | Debug builds are considerably slower but are necessary for proper debugging
 33 | behavior. Make sure to set `CMAKE_BUILD_TYPE` accordingly in `CMakeLists.txt`
 34 | (you can do so from within QtCreator). In desperate cases, try the option "Run
 35 | CMake" from the context menu.
 36 | 
 37 | When developing from the command line, issue the following commands from the
 38 | project's root directory:
 39 | ```
 40 | mkdir build && cd build
 41 | cmake ..
 42 | make -j{CPU_COUNT}
 43 | ```
 44 | 
 45 | The project requires a patched version of _Valgrind_. To be more exact, only
 46 | the _VEX_ sub-project is actually used and patched.
 47 | 
 48 | Download Valgrind from [the official project page](http://valgrind.org/). We
 49 | recommend checking out the subversion repository. Revision 3203 of VEX is known
 50 | to work:
 51 | ```
 52 | svn co svn://svn.valgrind.org/valgrind/trunk@15732 valgrind
 53 | cd valgrind/VEX/
 54 | svn update -r 3203
 55 | ```
 56 | 
 57 | Configure the project as per its installation instructions. Switch to the `VEX`
 58 | directory and apply the patch found in folder `patch`:
 59 | ```
 60 | cd VEX
 61 | patch -p0 < ../marx/patch/heap_allocation_patch.diff
 62 | ```
 63 | 
 64 | First configure Valgrind by issuing `./autogen.sh` and `./configure`.
 65 | Then issue `make` and `make install` inside the `VEX` directory to install the
 66 | VEX components. The CMake project tries to include the library
 67 | `/usr/local/lib/valgrind/libvex-amd64-linux.a`. Make sure it exists.
 68 | 
 69 | 
 70 | ## Usage
 71 | 
 72 | When developing on a new binary, the first step is to export data from an IDA
 73 | database. The IDAPython script found
 74 | in `ida_export` creates a dump file `{BINARY_NAME}.dmp` and exports all
 75 | necessary data used for the analysis in the folder the
 76 | binary lies in. Remember to set the pure_virtual_addr in the IDAPython script
 77 | before executing it. In case of Windows, the function is called `_purecall`.
 78 | In Linux, it is called `__cxa_pure_virtual`.
 79 | 
 80 | After exporting all data, a config file for Marx has to be created manually.
 81 | A config file looks like the following:
 82 | ```
 83 | MODULENAME filezilla
 84 | TARGETDIR ../tests/filezilla/
 85 | FORMAT ELF64
 86 | NEWOPERATORS 2 431F80 432C00
 87 | EXTERNALMODULES 8 ../tests/libwx_gtk2u_aui/libwx_gtk2u_aui-3.1.so.0.0.0 ../tests/libwx_gtk2u_xrc/libwx_gtk2u_xrc-3.1.so.0.0.0 ../tests/libwx_gtk2u_adv/libwx_gtk2u_adv-3.1.so.0.0.0 ../tests/libwx_gtk2u_core/libwx_gtk2u_core-3.1.so.0.0.0 ../tests/libwx_baseu_net/libwx_baseu_net-3.1.so.0.0.0 ../tests/libwx_baseu/libwx_baseu-3.1.so.0.0.0 ../tests/libwx_gtk2u_html/libwx_gtk2u_html-3.1.so.0.0.0 ../tests/libwx_baseu_xml/libwx_baseu_xml-3.1.so.0.0.0
 88 | ```
 89 | 
 90 | Further examples of config files can be seen in the evaluation data at [zenodo.org](https://zenodo.org/record/238686).
 91 | 
 92 | When the config file is created, Marx can be executed by issuing the following command:
 93 | ```
 94 | ./marx ../tests/filezilla/config.cfg
 95 | ```
 96 | 
 97 | Afterwards, the IDAPython script found in `ida_import` can be used to import the analyzed data back to IDA.
 98 | 
 99 | NOTE: Windows binaries have to be loaded at base address 0x0 (or rebased)
100 | in IDA before exporting them. Also, the IDAPython script only supports Windows
101 | binaries which are compiled with RTTI. Furthermore, specific functions
102 | have to be blacklisted in Windows binaries
103 | (because of compiler optimizations which would cause a lot of false-positives
104 | during the analysis) that are in multiple vtables but do not belong together.
105 | This is the case for example for short functions that do just zero a
106 | register and do nothing more. See for further details the helper script
107 | `ida_win_find_blacklist_functions.py`.
108 | 


--------------------------------------------------------------------------------
/ida_import/ida_import.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import marx
  3 | import os
  4 | import re
  5 | from sys import stdout
  6 | 
  7 | # IDA imports
  8 | from idaapi import add_dref, dr_O, Form
  9 | from idc import MakeComm, MakeQword, Comment
 10 | # from idautils import DataRefsFrom
 11 | # from idautils import Modules as ida_Modules
 12 | 
 13 | # Number of bytes of an address
 14 | WORD_BYTE_COUNT = 8
 15 | 
 16 | 
 17 | class MarxIDAImportForm(Form):
 18 |     def __init__(self):
 19 |         Form.__init__(self, r"""Marx IDA Import Script
 20 | 
 21 | <#Select a hierarchy file to open#    Hierarchy File:{iHierarchyFileOpen}>
 22 | <#Select a new operators file to open#New Operators File:{iNewOpFileOpen}>
 23 | <#Select a vcalls file to open#       Vcalls File:{iVcallFileOpen}>
 24 | <#Select a Vtables file to open#      Vtables File:{iVTablesFileOpen}>
 25 | Class hierarchies <Allow hierarchies with only one class:{rAllowSingleClassHierarchies}>{cHierarchies}>
 26 | """, {
 27 |             'iHierarchyFileOpen': Form.FileInput(open=True, value="*.hierarchy"),
 28 |             'iNewOpFileOpen': Form.FileInput(open=True, value="*.new_operators"),
 29 |             'iVcallFileOpen': Form.FileInput(open=True, value="*.vcalls_extended"),
 30 |             'iVTablesFileOpen': Form.FileInput(open=True, value="*_vtables.txt"),
 31 |             'cHierarchies': Form.ChkGroupControl(("rAllowSingleClassHierarchies",))
 32 |         })
 33 | 
 34 | 
 35 | def vtable_hierarchy_to_ida_db(marx_module):
 36 |     for hierarchy in marx_module.class_hierarchies:
 37 |         for vtable in hierarchy.vtables:
 38 |             new_comment = "Begin of vtable - Class_{:X}, part of ClassHierarchy_{:d}".format(vtable.address, hierarchy.number)
 39 |             comment = Comment(vtable.address) or ""
 40 |             # Check if there is already a comment (with the same content)
 41 |             if new_comment not in comment:
 42 |                 MakeComm(vtable.address, new_comment + comment)
 43 |             MakeQword(vtable.address)
 44 | 
 45 | 
 46 | def new_operators_to_ida_db(marx_module):
 47 |     for new_op in marx_module.new_operators.itervalues():
 48 |         comment = Comment(new_op.address)
 49 |         if new_op.class_hierarchy:
 50 |             # Check if there is already a comment, do nothing if there is already a comment
 51 |             if not comment:
 52 |                 MakeComm(new_op.address,
 53 |                          "New operator - Size: {:d}, ".format(new_op.size) +
 54 |                          "ClassHierarchy_{:d}".format(
 55 |                          new_op.class_hierarchy.number))
 56 | 
 57 |             # For each vtable of an object which could be constructed by this new operator
 58 |             for vtable in new_op.class_hierarchy.vtables:
 59 |                 # Add references from new operator address to vtable address
 60 |                 add_dref(new_op.address, vtable.address, dr_O)
 61 |         else:
 62 |             # Check if there is already a comment, do nothing if there is already a comment
 63 |             if not comment:
 64 |                 MakeComm(new_op.address, "New operator - Size: {:d}, no class info available".format(new_op.size))
 65 | 
 66 | 
 67 | def vcalls_extended_to_ida_db(marx_module):
 68 |     target_addresses = set()
 69 |     for vcall in marx_module.vcalls.itervalues():
 70 |         comment = Comment(vcall.address)
 71 |         if vcall.class_hierarchy:
 72 |             # For each vtable of an object which is possible at this vcall
 73 |             for vtable in vcall.class_hierarchy.vtables:
 74 | 
 75 |                 # Add reference from vcall address to target function address (resolves icall)
 76 |                 target_function = vtable.functions.get(vcall.index, None)
 77 |                 if target_function:
 78 |                     add_dref(vcall.address, target_function.address, dr_O)
 79 |                     target_addresses.add(target_function.address)
 80 | 
 81 |             # Check if there is already a comment, do nothing if there is already a comment
 82 |             if not comment:
 83 |                 MakeComm(vcall.address,
 84 |                          "Vcall - vtable index: {:d}, ".format(vcall.index) + 
 85 |                          "ClassHierarchy_{:d}\n".format(
 86 |                          vcall.class_hierarchy.number) +
 87 |                          "\n".join(
 88 |                          map(lambda target_address: "Possible target: 0x{:X}".format(target_address), target_addresses)))
 89 |                 target_addresses.clear()
 90 | 
 91 |         else:
 92 |             # Check if there is already a comment, do nothing if there is already a comment
 93 |             if not comment:
 94 |                 MakeComm(vcall.address, "Vcall - vtable index: {:d}, no class info available".format(vcall.index))
 95 | 
 96 | 
 97 | def vtables_to_ida_db(marx_module):
 98 |     for vtable in marx_module.vtables.itervalues():
 99 |         vtable_entry_address = 0
100 |         for index, target_function in vtable.functions.iteritems():
101 |             vtable_entry_address = vtable.address + (index * WORD_BYTE_COUNT)
102 |             MakeQword(vtable_entry_address)
103 | 
104 |             if target_function.address:
105 |                 # Add reference from vtable entry address to target function address
106 |                 add_dref(vtable_entry_address, target_function.address, dr_O)
107 |             # else:
108 |             #     MakeComm("Unknown target function.")
109 | 
110 |         # Add comment at the end of the vtable
111 |         if vtable_entry_address and vtable.class_hierarchy:
112 |             MakeComm(vtable_entry_address,
113 |                      "End of vtable - Class_{:X}, ".format(vtable.address) +
114 |                      "part of ClassHierarchy_{:d}".format(
115 |                      vtable.class_hierarchy.number))
116 | 
117 | 
118 | def ida_main():
119 |     # # Get IDA's module representation
120 |     # ida_modules_dict = {module.name : module for module in ida_Modules()}
121 | 
122 |     # Create form object
123 |     form = MarxIDAImportForm()
124 |     # Compile (in order to populate the controls)
125 |     form.Compile()
126 | 
127 |     # Execute the form
128 |     if form.Execute() == 1:
129 |         # Get file paths set in form
130 |         hierarchy_file_path = form.iHierarchyFileOpen.value
131 |         new_operators_file_path = form.iNewOpFileOpen.value
132 |         vcalls_extended_file_path = form.iVcallFileOpen.value
133 |         vtables_file_path = form.iVTablesFileOpen.value
134 |         marx_module = None
135 | 
136 |         try:
137 |             # Parsing hierarchy file
138 |             with open(hierarchy_file_path, "r") as f:
139 |                 marx_module = marx.parse_hierarchy(f)
140 |         except IOError:
141 |             print "Could not open hierarchy file: {:s}".format(hierarchy_file_path)
142 | 
143 |         try:
144 |             # Parsing new_operators file
145 |             with open(new_operators_file_path, "r") as f:
146 |                 marx_module = marx.parse_new_operators(f)
147 |         except IOError:
148 |             print "Could not open new_operators file: {:s}".format(new_operators_file_path)
149 | 
150 |         try:
151 |             # Parsing vcalls_extended file
152 |             with open(vcalls_extended_file_path, "r") as f:
153 |                 marx_module = marx.parse_vcalls_extended(f)
154 |         except IOError:
155 |             print "Could not open vcalls_extended file: {:s}".format(vcalls_extended_file_path)
156 | 
157 |         try:
158 |             # Parsing vtables file
159 |             with open(vtables_file_path, "r") as f:
160 |                 marx_module = marx.parse_vtables(f)
161 |         except IOError:
162 |             print "Could not open vtables file: {:s}".format(vtables_file_path)
163 | 
164 |         # Toggle allow_false_positives
165 |         marx.allow_single_class_hierarchies = bool(form.cHierarchies.value)
166 | 
167 |         # Add comments to vtables
168 |         vtable_hierarchy_to_ida_db(marx_module)
169 |         # Add data references and comments to new operators
170 |         new_operators_to_ida_db(marx_module)
171 |         # Add data references and comments to vcalls and vtables
172 |         vcalls_extended_to_ida_db(marx_module)
173 |         # Add data references to vtables
174 |         vtables_to_ida_db(marx_module)
175 | 
176 | 
177 | ida_main()
178 | 


--------------------------------------------------------------------------------
/include/amd64.h:
--------------------------------------------------------------------------------
 1 | #ifndef FOO_AMD64_H
 2 | #define FOO_AMD64_H
 3 | 
 4 | #include "expression.h"
 5 | #include "amd64_registers.h"
 6 | 
 7 | #include <memory>
 8 | 
 9 | static const auto register_rip = std::make_shared<Register>(OFFB_RIP);
10 | static const auto register_rsp = std::make_shared<Register>(OFFB_RSP);
11 | 
12 | static const auto register_rax = std::make_shared<Register>(OFFB_RAX);
13 | static const auto register_rbx = std::make_shared<Register>(OFFB_RBX);
14 | static const auto register_rcx = std::make_shared<Register>(OFFB_RCX);
15 | static const auto register_rdx = std::make_shared<Register>(OFFB_RDX);
16 | 
17 | static const auto register_rbp = std::make_shared<Register>(OFFB_RBP);
18 | static const auto register_rsi = std::make_shared<Register>(OFFB_RSI);
19 | static const auto register_rdi = std::make_shared<Register>(OFFB_RDI);
20 | 
21 | static const auto register_r8 = std::make_shared<Register>(OFFB_R8);
22 | static const auto register_r9 = std::make_shared<Register>(OFFB_R9);
23 | static const auto register_r10 = std::make_shared<Register>(OFFB_R10);
24 | static const auto register_r11 = std::make_shared<Register>(OFFB_R11);
25 | static const auto register_r12 = std::make_shared<Register>(OFFB_R12);
26 | static const auto register_r13 = std::make_shared<Register>(OFFB_R13);
27 | static const auto register_r14 = std::make_shared<Register>(OFFB_R14);
28 | static const auto register_r15 = std::make_shared<Register>(OFFB_R15);
29 | 
30 | static const std::shared_ptr<Register> system_v_arguments[] = {
31 |     register_rdi,
32 |     register_rsi,
33 |     register_rdx,
34 |     register_rcx,
35 |     register_r8,
36 |     register_r9,
37 | };
38 | 
39 | static const std::shared_ptr<Register> system_v_scratch[] = {
40 |     register_rdi,
41 |     register_rsi,
42 |     register_rdx,
43 |     register_rcx,
44 |     register_r8,
45 |     register_r9,
46 |     register_r10,
47 |     register_r11,
48 | };
49 | 
50 | static const std::shared_ptr<Register> system_v_preserved[] = {
51 |     register_rbx,
52 |     register_rsp,
53 |     register_rbp,
54 |     register_r12,
55 |     register_r13,
56 |     register_r14,
57 |     register_r15,
58 | };
59 | 
60 | static const std::shared_ptr<Register> msvc_arguments[] = {
61 |     register_rcx,
62 |     register_rdx,
63 |     register_r8,
64 |     register_r9,
65 | };
66 | 
67 | static const std::shared_ptr<Register> msvc_scratch[] = {
68 |     register_rcx,
69 |     register_rdx,
70 |     register_r8,
71 |     register_r9,
72 |     register_r10,
73 |     register_r11,
74 | };
75 | 
76 | static const std::shared_ptr<Register> msvc_preserved[] = {
77 |     register_rbx,
78 |     register_rsp,
79 |     register_rbp,
80 |     register_rdi,
81 |     register_rsi,
82 |     register_r12,
83 |     register_r13,
84 |     register_r14,
85 |     register_r15,
86 | };
87 | 
88 | #endif // FOO_AMD64_H
89 | 


--------------------------------------------------------------------------------
/include/amd64_registers.h:
--------------------------------------------------------------------------------
 1 | #ifndef AMD64_REGISTERS_H
 2 | #define AMD64_REGISTERS_H
 3 | 
 4 | #include <map>
 5 | #include <string>
 6 | #include <memory>
 7 | 
 8 | extern "C" {
 9 | #include <valgrind/libvex.h>
10 | #include <valgrind/libvex_guest_amd64.h>
11 | }
12 | 
13 | #define OFFB_RAX offsetof(VexGuestAMD64State, guest_RAX)
14 | #define OFFB_RBX offsetof(VexGuestAMD64State, guest_RBX)
15 | #define OFFB_RCX offsetof(VexGuestAMD64State, guest_RCX)
16 | #define OFFB_RDX offsetof(VexGuestAMD64State, guest_RDX)
17 | #define OFFB_RSP offsetof(VexGuestAMD64State, guest_RSP)
18 | #define OFFB_RBP offsetof(VexGuestAMD64State, guest_RBP)
19 | #define OFFB_RSI offsetof(VexGuestAMD64State, guest_RSI)
20 | #define OFFB_RDI offsetof(VexGuestAMD64State, guest_RDI)
21 | #define OFFB_R8  offsetof(VexGuestAMD64State, guest_R8)
22 | #define OFFB_R9  offsetof(VexGuestAMD64State, guest_R9)
23 | #define OFFB_R10 offsetof(VexGuestAMD64State, guest_R10)
24 | #define OFFB_R11 offsetof(VexGuestAMD64State, guest_R11)
25 | #define OFFB_R12 offsetof(VexGuestAMD64State, guest_R12)
26 | #define OFFB_R13 offsetof(VexGuestAMD64State, guest_R13)
27 | #define OFFB_R14 offsetof(VexGuestAMD64State, guest_R14)
28 | #define OFFB_R15 offsetof(VexGuestAMD64State, guest_R15)
29 | 
30 | #define OFFB_RIP offsetof(VexGuestAMD64State, guest_RIP)
31 | #define OFFB_RSP offsetof(VexGuestAMD64State, guest_RSP)
32 | 
33 | //! Register offsets used to encode x86_64 registers by VEX.
34 | static const unsigned int AMD64_REGISTERS[] = {
35 |     OFFB_RAX, OFFB_RBX, OFFB_RCX, OFFB_RDX, OFFB_RSP, OFFB_RBP, OFFB_RSI,
36 |     OFFB_RDI, OFFB_R8, OFFB_R9, OFFB_R10, OFFB_R11, OFFB_R12, OFFB_R13,
37 |     OFFB_R14, OFFB_R15
38 | };
39 | 
40 | static std::map<unsigned int, std::string> AMD64_DISPLAY_REGISTERS = []{
41 |     std::map<unsigned int, std::string> result;
42 | 
43 |     result[OFFB_RAX] = "rax";
44 |     result[OFFB_RBX] = "rbx";
45 |     result[OFFB_RCX] = "rcx";
46 |     result[OFFB_RDX] = "rdx";
47 |     result[OFFB_RSP] = "rsp";
48 |     result[OFFB_RBP] = "rbp";
49 |     result[OFFB_RSI] = "rsi";
50 |     result[OFFB_RDI] = "rdi";
51 |     result[OFFB_R8]  = "r8";
52 |     result[OFFB_R9]  = "r9";
53 |     result[OFFB_R10] = "r10";
54 |     result[OFFB_R11] = "r11";
55 |     result[OFFB_R12] = "r12";
56 |     result[OFFB_R13] = "r13";
57 |     result[OFFB_R14] = "r14";
58 |     result[OFFB_R15] = "r15";
59 |     result[OFFB_RIP] = "rip";
60 | 
61 |     return result;
62 | }();
63 | 
64 | #endif // AMD64_REGISTERS_H
65 | 


--------------------------------------------------------------------------------
/include/base_analysis.h:
--------------------------------------------------------------------------------
 1 | #ifndef BASE_ANALYSIS_H
 2 | #define BASE_ANALYSIS_H
 3 | 
 4 | #include "function.h"
 5 | #include "block.h"
 6 | #include "memory.h"
 7 | 
 8 | #include <map>
 9 | #include <set>
10 | #include <vector>
11 | 
12 | typedef std::map<Path, State> PathStates;
13 | typedef std::vector<State> States;
14 | 
15 | /*!
16 |  * \brief (Abstract) base class for analyses run on a function.
17 |  */
18 | class BaseAnalysis {
19 | protected:
20 |     const Function &_function;
21 | 
22 |     const FileFormatType _file_format;
23 | 
24 |     PathStates _states;
25 |     PathStates _side_effects;
26 | 
27 |     State _initial_state;
28 |     States _semantics;
29 | 
30 |     /*! Set by `BaseAnalysis::on_traversal` if the current block's
31 |      * terminator is a `call`; `nullptr` otherwise. If set, contains the
32 |      * symbol corresponding to the (unique) formatted return value.
33 |      */
34 |     std::shared_ptr<Symbolic> _current_return_value;
35 | 
36 | public:
37 |     BaseAnalysis(const Function &function,
38 |                  FileFormatType file_format);
39 |     BaseAnalysis(const Function &function,
40 |                  const State &initial_state,
41 |                  FileFormatType file_format);
42 | 
43 |     BaseAnalysis(const BaseAnalysis&) = delete;
44 |     void operator=(const BaseAnalysis&) = delete;
45 | 
46 |     bool obtain();
47 | 
48 | protected:
49 |     /*! Pure virtual (implemented by sub-class). This function is called before
50 |      * the function is actually traversed and may be used for initialization
51 |      * work.
52 |      */
53 |     virtual void pre_traversal() = 0;
54 | 
55 |     /*! Pure virtual (implemented by sub-class). This function is called after
56 |      * the traversal and may be used for post-processing of the collected
57 |      * results. */
58 |     virtual void post_traversal() = 0;
59 | 
60 |     /*! Pure virtual (implemented by sub-class). This function is called on each
61 |      * basic block on the given path. Accumulates the analysis results.
62 |      */
63 |     virtual bool in_traversal(const Path&, const Block&, State&) = 0;
64 | 
65 |     /*! Pure virtual (implemented by sub-class). This function is called on each
66 |      * basic block during path construction (i.e., if the lightweight path
67 |      * policy is active). Determines whether the given block is "interesting"
68 |      * and should be traversed by the generated paths.
69 |      */
70 |     virtual bool block_predicate(const Block&) = 0;
71 | 
72 |     /*! Pure virtual (implemented by sub-class). This function is called after
73 |      * a single path has been fully traversed.
74 |      */
75 |     virtual void path_traversed(const Path&) = 0;
76 | 
77 | private:
78 |     bool on_traversal(const Path &path, const Block &block);
79 | };
80 | 
81 | #endif // BASE_ANALYSIS_H
82 | 


--------------------------------------------------------------------------------
/include/blacklist_functions.h:
--------------------------------------------------------------------------------
 1 | #ifndef BLACKLIST_FUNCTIONS_H
 2 | #define BLACKLIST_FUNCTIONS_H
 3 | 
 4 | #include <stdexcept>
 5 | #include <set>
 6 | #include <string>
 7 | #include <cstring>
 8 | #include <fstream>
 9 | #include <sstream>
10 | #include <iostream>
11 | 
12 | typedef std::set<uint64_t> BlacklistFuncsSet;
13 | 
14 | const BlacklistFuncsSet import_blacklist_funcs(const std::string &target_file);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/include/block.h:
--------------------------------------------------------------------------------
  1 | #ifndef BLOCK_H
  2 | #define BLOCK_H
  3 | 
  4 | #include <cstdint>
  5 | #include <functional>
  6 | 
  7 | #include "state.h"
  8 | 
  9 | extern "C" {
 10 | #include <valgrind/libvex.h>
 11 | }
 12 | 
 13 | /*!
 14 |  * \brief Enumerates all possible types of instructions terminating a basic
 15 |  * block (so called _terminators_).
 16 |  *
 17 |  * A terminator is _unresolved_ if its target cannot be determined statically.
 18 |  */
 19 | enum TerminatorType {
 20 |     //! Terminator is an instruction whose target could not be resolved
 21 |     //! (currently only set for unresolvable conditional or unconditional
 22 |     //! jumps).
 23 |     TerminatorUnresolved = 0,
 24 | 
 25 |     //! The next instruction is reached using a fall-through edge.
 26 |     TerminatorFallthrough,
 27 | 
 28 |      //! Terminator is a `ret` instruction.
 29 |     TerminatorReturn,
 30 | 
 31 |      //! Terminator is a `jmp` instruction.
 32 |     TerminatorJump,
 33 | 
 34 |      //! Terminator is a `call` instruction.
 35 |     TerminatorCall,
 36 | 
 37 |     //! Terminator is a `call` but its target cannot be resolved statically.
 38 |     //! Used separately to distinguish from unresolved jumps.
 39 |     TerminatorCallUnresolved,
 40 | 
 41 |      //! Terminator is a resolved conditional jump.
 42 |     TerminatorJcc,
 43 | 
 44 |     //! Terminator points to a non-returning target (such as `exit`).
 45 |     TerminatorNoReturn,
 46 | };
 47 | 
 48 | /*!
 49 |  * \brief Structure to describe a terminating instruction.
 50 |  */
 51 | struct Terminator {
 52 |     //! Type of the terminator. \see `TerminatorType`
 53 |     TerminatorType type;
 54 | 
 55 |     //! Fall-through address of the terminator. This value is set for calls,
 56 |     //! conditional jumps and fall-throughs. `nullptr`, if not set.
 57 |     uintptr_t fall_through;
 58 | 
 59 |     //! Target address of the terminator. Set for resolved jumps and calls,
 60 |     //! else `nullptr`.
 61 |     uintptr_t target;
 62 | 
 63 |     //! Boolean value indicating whether the given (resolvable) jump is a tail
 64 |     //! jump inlining another function. `false` for any other type.
 65 |     //! \see `Translator::detect_tail_jumps`
 66 |     bool is_tail;
 67 | };
 68 | 
 69 | /*!
 70 |  * \brief Class tieing together the underlying VEX block and additional
 71 |  * information such as block address and terminator.
 72 |  */
 73 | class Block {
 74 | private:
 75 |     uintptr_t _address;
 76 |     IRSB *_vex_block;
 77 |     Terminator _terminator;
 78 | 
 79 | public:
 80 |     Block(uintptr_t address, IRSB *block, const Terminator &terminator);
 81 | 
 82 |     /*!
 83 |      * \brief get_address
 84 |      * \return Returns the block's virtual address.
 85 |      */
 86 |     uintptr_t get_address() const {
 87 |         return _address;
 88 |     }
 89 | 
 90 |     /*!
 91 |      * \brief get_last_address
 92 |      * \return Returns the block's last virtual address
 93 |      * or 0 in case of an error.
 94 |      */
 95 |     uint64_t get_last_address() const;
 96 | 
 97 |     /*!
 98 |      * \brief get_terminator
 99 |      * \return Returns information about the terminator.
100 |      */
101 |     const Terminator &get_terminator() const {
102 |         return _terminator;
103 |     }
104 | 
105 |     /*!
106 |      * \brief get_vex_block
107 |      * \return Returns a (read-only) reference to the underlying VEX block.
108 |      */
109 |     const IRSB &get_vex_block() const {
110 |         return *_vex_block;
111 |     }
112 | 
113 |     void retrieve_semantics(State &state) const;
114 | 
115 | private:
116 | };
117 | 
118 | using BlockPredicate = std::function<bool (void*, const Block&)>;
119 | 
120 | #endif // BLOCK_H
121 | 


--------------------------------------------------------------------------------
/include/block_semantics.h:
--------------------------------------------------------------------------------
 1 | #ifndef BLOCK_SEMANTICS_H
 2 | #define BLOCK_SEMANTICS_H
 3 | 
 4 | #include "expression.h"
 5 | #include "state.h"
 6 | 
 7 | extern "C" {
 8 | #include <valgrind/libvex.h>
 9 | #include <valgrind/libvex_ir.h>
10 | }
11 | 
12 | #include <map>
13 | #include <unordered_map>
14 | 
15 | #define arg_out
16 | class BlockSemantics;
17 | 
18 | typedef ExpressionPtr (BlockSemantics::*ExpressionParser)(const IRExpr&);
19 | typedef bool (BlockSemantics::*StatementHandler)(const IRStmt&);
20 | 
21 | // TODO: Handle calls, calling conventions (System-V for now).
22 | // FIXME: This class is infected with shared_ptr:s, consider boost::variant.
23 | 
24 | class Block;
25 | 
26 | /*!
27 |  * \brief Class computing the effective semantics of a given `Block`.
28 |  *
29 |  * \todo Allow sub-classes of this class at every point where this class is
30 |  * currently use. This enables a user to implement custom semantics.
31 |  */
32 | class BlockSemantics {
33 | private:
34 |     State &_state;
35 |     const Block &_block;
36 |     std::shared_ptr<Unknown> _unknown;
37 | 
38 |     static const std::map<IRExprTag, ExpressionParser> _expression_parser;
39 |     static const std::map<IRStmtTag, StatementHandler> _statement_handler;
40 | 
41 | public:
42 |     BlockSemantics() = delete;
43 |     BlockSemantics(const BlockSemantics&) = delete;
44 |     void operator=(const BlockSemantics&) = delete;
45 | 
46 |     BlockSemantics(const Block &block, State &initial_state);
47 | 
48 |     /*!
49 |      * \brief Getter to access the computed state.
50 |      * \return Returns the computed semantics in form of a `State` reference.
51 |      */
52 |     const State &get_state() const {
53 |         return _state;
54 |     }
55 | 
56 | private:
57 |     bool extract_semantics(const IRSB &block);
58 | 
59 |     ExpressionPtr parse_expression(const IRExpr &expression);
60 |     bool handle_statement(const IRStmt &statement);
61 | 
62 |     uint64_t get_mask(uint8_t size) const;
63 |     bool get_size(const IRType &type, arg_out uint8_t &size) const;
64 |     bool parse_type(const IRType &type, arg_out uint8_t &size,
65 |                     arg_out uint64_t &mask) const;
66 | 
67 |     // Expression parsers.
68 |     ExpressionPtr parse_get(const IRExpr &expression);
69 |     ExpressionPtr parse_geti(const IRExpr &expression);
70 | 
71 |     ExpressionPtr parse_rdtmp(const IRExpr &expression);
72 | 
73 |     ExpressionPtr parse_qop(const IRExpr &expression);
74 |     ExpressionPtr parse_triop(const IRExpr &expression);
75 |     ExpressionPtr parse_binop(const IRExpr &expression);
76 |     ExpressionPtr parse_unop(const IRExpr &expression);
77 | 
78 |     ExpressionPtr parse_load(const IRExpr &expression);
79 |     ExpressionPtr parse_const(const IRExpr &expression);
80 | 
81 |     ExpressionPtr parse_ccall(const IRExpr &expression);
82 |     ExpressionPtr parse_ite(const IRExpr &expression);
83 | 
84 |     // Statement handlers.
85 |     bool handle_noop(const IRStmt &statement);
86 |     bool handle_put(const IRStmt &statement);
87 |     bool handle_puti(const IRStmt &statement);
88 |     bool handle_wrtmp(const IRStmt &statement);
89 |     bool handle_store(const IRStmt &statement);
90 |     bool handle_storeg(const IRStmt &statement);
91 |     bool handle_loadg(const IRStmt &statement);
92 |     bool handle_abi_hint(const IRStmt &statement);
93 | };
94 | 
95 | #endif // BLOCK_SEMANTICS_H
96 | 


--------------------------------------------------------------------------------
/include/dump_file.h:
--------------------------------------------------------------------------------
 1 | #ifndef DUMP_FILE_H
 2 | #define DUMP_FILE_H
 3 | 
 4 | #include <map>
 5 | #include <set>
 6 | #include <vector>
 7 | #include <string>
 8 | 
 9 | /*!
10 |  * \brief Structure containing information about a serialized block in the
11 |  * `.dmp` file.
12 |  */
13 | struct BlockDescriptor {
14 |     uintptr_t block_start;
15 |     uintptr_t block_end;
16 |     uintptr_t instruction_count;
17 | };
18 | 
19 | typedef std::vector<BlockDescriptor> FunctionBlocks;
20 | typedef std::map<uintptr_t, FunctionBlocks> ParsedFunctions;
21 | typedef std::set<uintptr_t> NonReturningFunctions;
22 | 
23 | /*!
24 |  * \brief Class collecting the information that was produced by the IDA
25 |  * exporting script.
26 |  *
27 |  * For a given `.dmp` file (produced by the exporter), an optional
28 |  * `.dmp.no-return` file is supported which contains information about
29 |  * non-returning functions in the processed binary.
30 |  *
31 |  * \todo This can be handled in a better manner.
32 |  */
33 | class DumpFile {
34 | private:
35 |     ParsedFunctions _functions;
36 |     NonReturningFunctions _functions_no_return;
37 | 
38 | public:
39 |     DumpFile(const std::string &dump_file);
40 | 
41 |     /*!
42 |      * \brief Returns all known functions.
43 |      * \return Returns a `map` with all known functions (address as key,
44 |      * `Function` object as value).
45 |      */
46 |     const ParsedFunctions &get_functions() const {
47 |         return _functions;
48 |     }
49 | 
50 |     /*!
51 |      * \brief Returns known, non-returning functions.
52 |      * \return Returns a `set` containing the addresses of all known,
53 |      * non-returning functions.
54 |      */
55 |     const NonReturningFunctions &get_non_returning() const {
56 |         return _functions_no_return;
57 |     }
58 | 
59 | private:
60 |     bool parse(const std::string &dump_file);
61 |     bool parse_no_return(const std::string &no_return_file);
62 | };
63 | 
64 | #endif // DUMP_FILE_H
65 | 


--------------------------------------------------------------------------------
/include/external_functions.h:
--------------------------------------------------------------------------------
 1 | #ifndef EXTERNAL_FUNCTIONS_H
 2 | #define EXTERNAL_FUNCTIONS_H
 3 | 
 4 | #include <iostream>
 5 | #include <fstream>
 6 | #include <sstream>
 7 | #include <iostream>
 8 | #include <cassert>
 9 | #include <vector>
10 | #include <map>
11 | 
12 | 
13 | struct ExternalFunction {
14 |     uint32_t index;
15 |     uint64_t addr;
16 |     std::string name;
17 |     std::string module_name;
18 | };
19 | 
20 | 
21 | typedef std::vector<ExternalFunction> ExternalFunctionVector;
22 | typedef std::map<std::string, ExternalFunction*> ExternalFunctionMap;
23 | 
24 | 
25 | 
26 | class ExternalFunctions {
27 | private:
28 |     ExternalFunctionVector _external_functions;
29 |     ExternalFunctionMap _external_functions_map;
30 |     uint32_t _index = 0;
31 | 
32 |     bool _is_finalized = false;
33 | 
34 | public:
35 | 
36 |     /*!
37 |      * \brief Returns `true` if the external functions structure is finalized.
38 |      * \return Returns `true` the external functions structure is finalized.
39 |      */
40 |     bool is_finalized() const;
41 | 
42 | 
43 |     /*!
44 |      * \brief Parses a given functions file and builds internal
45 |      * functions structure.
46 |      */
47 |     bool parse(const std::string &funcs_file);
48 | 
49 | 
50 |     /*!
51 |      * \brief Finalizes the external functions structures.
52 |      *
53 |      * This function finalizes the external functions structures. It can only
54 |      * be used once all external functions files are imported via the `parse`
55 |      * function. After `finalize` was executed, no changes to the
56 |      * external functions structures are possible.
57 |      */
58 |     void finalize();
59 | 
60 | 
61 |     /*!
62 |      * \brief Returns a pointer to the external function given by the name.
63 |      * \return Returns a pointer to the external function given by the name
64 |      * or null if it was not found.
65 |      */
66 |     const ExternalFunction* get_external_function(
67 |             const std::string &name) const;
68 | 
69 | 
70 |     /*!
71 |      * \brief Returns a pointer to the external function given by the module
72 |      * name and address.
73 |      * \return Returns a pointer to the external function given by the name
74 |      * or null if it was not found.
75 |      */
76 |     const ExternalFunction* get_external_function(
77 |             const std::string &module_name,
78 |             uint64_t func_addr) const;
79 | };
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 
96 | #endif // EXTERNAL_FUNCTIONS_H
97 | 


--------------------------------------------------------------------------------
/include/function.h:
--------------------------------------------------------------------------------
  1 | #ifndef FUNCTION_H
  2 | #define FUNCTION_H
  3 | 
  4 | #include "block.h"
  5 | #include "expression.h"
  6 | #include "block_semantics.h"
  7 | 
  8 | #include <map>
  9 | #include <set>
 10 | #include <vector>
 11 | #include <cstddef>
 12 | #include <functional>
 13 | 
 14 | extern "C" {
 15 | #include <valgrind/libvex.h>
 16 | }
 17 | 
 18 | /*!
 19 |  * \brief A path describing how we reached a basic block in the function.
 20 |  *
 21 |  * A path is merely a vector of `bool`s. Each entry denotes how control flow
 22 |  * changed at each terminator/basic block, starting at the beginning of the
 23 |  * traversal (most commonly the function's entry point). Following a
 24 |  * fall-through or an unconditional jumps is recorded using `true`, whereas
 25 |  * the target of a conditional jump is recorded as `false`.
 26 |  */
 27 | typedef std::vector<bool> Path;
 28 | typedef std::map<uintptr_t, std::shared_ptr<Block>> BlockMap;
 29 | 
 30 | /*!
 31 |  * \brief A function called on each visited basic block in a traversal.
 32 |  *
 33 |  * \see `Function::traverse`
 34 |  */
 35 | typedef std::function<bool (void*, const Path&, const Block&)>
 36 |     TraversalCallback;
 37 | 
 38 | /*!
 39 |  * \brief A function called after a path has been fully traversed.
 40 |  *
 41 |  * \see `Function::traverse`
 42 |  */
 43 | typedef std::function<void (void*, const Path&)> PathCallback;
 44 | 
 45 | class Translator;
 46 | const uint8_t BRANCH_THRESHOLD = 0;
 47 | 
 48 | /*!
 49 |  * \brief Class representing a function translated to VEX.
 50 |  *
 51 |  * Objects of this class are to be instantiated by the `Translator` class (hence
 52 |  * the `friend` relationship).
 53 |  */
 54 | class Function {
 55 | private:
 56 |     uintptr_t _entry;
 57 |     uint8_t _branch_threshold = BRANCH_THRESHOLD;
 58 |     BlockMap _function_blocks;
 59 | 
 60 | public:
 61 |     Function() = default;
 62 |     Function(uintptr_t entry, uint8_t branch_threshold=BRANCH_THRESHOLD);
 63 | 
 64 |     /*!
 65 |      * \brief Returns the function's entry address.
 66 |      * \return Returns the first virtual address in the function.
 67 |      */
 68 |     uintptr_t get_entry() const {
 69 |         return _entry;
 70 |     }
 71 | 
 72 |     bool can_be_fully_traversed() const;
 73 | 
 74 |     // FIXME: Cache this.
 75 |     /*!
 76 |      * \brief Returns the addresses of all known blocks.
 77 |      * \return Returns a vector of addresses.
 78 |      */
 79 |     std::vector<uintptr_t> get_block_addresses() const {
 80 |         std::vector<uintptr_t> result;
 81 |         for(const auto &kv : _function_blocks) {
 82 |             result.push_back(kv.first);
 83 |         }
 84 | 
 85 |         return result;
 86 |     }
 87 | 
 88 |     // FIXME: Cache this.
 89 |     /*!
 90 |      * \brief Returns the addresses of block's returning from the function
 91 |      * (i.e., those with a terminator of type `TerminatorReturn`).
 92 |      * \return Returns a vector of addresses.
 93 |      */
 94 |     std::vector<uintptr_t> get_return_block_addresses() const {
 95 |         std::vector<uintptr_t> result;
 96 |         for(const auto &kv : _function_blocks) {
 97 |             if(kv.second->get_terminator().type == TerminatorReturn) {
 98 |                 result.push_back(kv.first);
 99 |             }
100 |         }
101 | 
102 |         return result;
103 |     }
104 | 
105 |     /*!
106 |      * \brief Returns all blocks.
107 |      * \return Returns a map containing all blocks of the function (key is the
108 |      * block's address).
109 |      */
110 |     const BlockMap &get_blocks() const {
111 |         return _function_blocks;
112 |     }
113 | 
114 |     bool traverse(const TraversalCallback &block_callback,
115 |                   const BlockPredicate &block_predicate,
116 |                   const PathCallback &path_callback,
117 |                   void *user_defined=nullptr) const;
118 | 
119 | private:
120 |     bool traverser(const TraversalCallback &callback,
121 |                    void *user_defined=nullptr) const;
122 | 
123 |     void add_block(uintptr_t address, IRSB *block,
124 |                    const Terminator &terminator);
125 | 
126 |     friend class Translator;
127 | };
128 | 
129 | #endif // FUNCTION_H
130 | 


--------------------------------------------------------------------------------
/include/got.h:
--------------------------------------------------------------------------------
 1 | #ifndef GOT_H
 2 | #define GOT_H
 3 | 
 4 | #include <map>
 5 | #include <cstring>
 6 | #include <fstream>
 7 | #include <sstream>
 8 | #include <iostream>
 9 | 
10 | typedef std::map<uint64_t, uint64_t> GotMap;
11 | 
12 | GotMap import_got(const std::string &target_file);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/include/idata.h:
--------------------------------------------------------------------------------
 1 | #ifndef IDATA_H
 2 | #define IDATA_H
 3 | 
 4 | #include <map>
 5 | #include <string>
 6 | #include <cstring>
 7 | #include <fstream>
 8 | #include <sstream>
 9 | #include <iostream>
10 | 
11 | typedef std::map<uint64_t, std::string> IDataMap;
12 | 
13 | IDataMap import_idata(const std::string &target_file);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/include/mapped_elf.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAPPED_ELF_H
 2 | #define MAPPED_ELF_H
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <cstdint>
 7 | 
 8 | #include <elf.h>
 9 | #include <link.h>
10 | 
11 | #include "memory.h"
12 | 
13 | /*!
14 |  * \brief Class holding information about a memory-mapped ELF file.
15 |  */
16 | class MappedElf : public Memory {
17 | private:
18 |     std::vector<char> _buffer;
19 | 
20 |     ElfW(Ehdr) *_e_header = nullptr;
21 |     ElfW(Phdr) *_p_header = nullptr;
22 | 
23 |     uintptr_t _base = 0;
24 |     size_t _size = 0;
25 | 
26 | public:
27 |     MappedElf(const MappedElf&) = delete;
28 |     virtual void operator=(const MappedElf&) = delete;
29 | 
30 |     MappedElf(const std::string &elf_file);
31 |     virtual const uint8_t *operator[](const uintptr_t index) const;
32 | 
33 |     /*!
34 |      * \brief Returns the begin of the executable `LOAD` segment.
35 |      * \return Returns a pointer to the segment's begin.
36 |      */
37 |     virtual uintptr_t get_load_begin() const {
38 |         return _base;
39 |     }
40 | 
41 |     /*!
42 |      * \brief Returns the end of the executable `LOAD` segment.
43 |      * \return Returns a pointer to the segment's end.
44 |      */
45 |     virtual uintptr_t get_load_end() const {
46 |         return _base + _size;
47 |     }
48 | };
49 | 
50 | #endif // MAPPED_ELF_H
51 | 


--------------------------------------------------------------------------------
/include/mapped_pe.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAPPED_PE_H
 2 | #define MAPPED_PE_H
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <cstdint>
 7 | 
 8 | #include "pe.h"
 9 | 
10 | #include "memory.h"
11 | 
12 | /*!
13 |  * \brief Class holding information about a memory-mapped PE file.
14 |  */
15 | class MappedPe : public Memory {
16 | private:
17 |     std::vector<char> _buffer;
18 | 
19 |     mz_hdr *_mz_header = nullptr;
20 |     pe_hdr *_pe_header = nullptr;
21 | 
22 |     pe32_opt_hdr *_pe32_opt_header = nullptr;
23 |     pe32plus_opt_hdr *_pe32_plus_opt_header = nullptr;
24 | 
25 |     section_header *_text_section_header = nullptr;
26 | 
27 |     uintptr_t _base = 0;
28 |     size_t _size = 0;
29 |     uintptr_t _file_addr = 0;
30 |     size_t _file_size = 0;
31 | 
32 | public:
33 |     MappedPe(const MappedPe&) = delete;
34 |     virtual void operator=(const MappedPe&) = delete;
35 | 
36 |     MappedPe(const std::string &pe_file);
37 |     virtual const uint8_t *operator[](const uintptr_t index) const;
38 | 
39 |     /*!
40 |      * \brief Returns the begin of the executable `LOAD` segment.
41 |      * \return Returns a pointer to the segment's begin.
42 |      */
43 |     virtual uintptr_t get_load_begin() const {
44 |         return _base;
45 |     }
46 | 
47 |     /*!
48 |      * \brief Returns the end of the executable `LOAD` segment.
49 |      * \return Returns a pointer to the segment's end.
50 |      */
51 |     virtual uintptr_t get_load_end() const {
52 |         return _base + _size;
53 |     }
54 | };
55 | 
56 | #endif // MAPPED_PE_H
57 | 


--------------------------------------------------------------------------------
/include/memory.h:
--------------------------------------------------------------------------------
 1 | #ifndef MEMORY_H
 2 | #define MEMORY_H
 3 | 
 4 | /*!
 5 |  * \brief Enumerates all supported `File Format` types.
 6 |  */
 7 | enum FileFormatType {
 8 |     FileFormatELF64 = 0,
 9 |     FileFormatPE64 = 1,
10 |     FileFormatCount
11 | };
12 | 
13 | 
14 | class Memory {
15 | 
16 | public:
17 | 
18 |     Memory() {};
19 | 
20 |     Memory(const Memory&) = delete;
21 |     virtual void operator=(const Memory&) = delete;
22 | 
23 |     virtual const uint8_t *operator[](const uintptr_t index) const = 0;
24 | 
25 |     virtual uintptr_t get_load_begin() const = 0;
26 | 
27 |     virtual uintptr_t get_load_end() const = 0;
28 | };
29 | 
30 | #endif // MEMORY_H
31 | 


--------------------------------------------------------------------------------
/include/module_plt.h:
--------------------------------------------------------------------------------
 1 | #ifndef MODULE_PLT_H
 2 | #define MODULE_PLT_H
 3 | 
 4 | #include <fstream>
 5 | #include <sstream>
 6 | #include <iostream>
 7 | #include <map>
 8 | 
 9 | 
10 | struct PltEntry {
11 |     uint64_t addr;
12 |     std::string func_name;
13 | };
14 | 
15 | 
16 | typedef std::map<uint64_t, PltEntry> PltMap;
17 | 
18 | 
19 | class ModulePlt {
20 | 
21 |     const std::string &_module_name;
22 |     PltMap _plt_entries;
23 | 
24 | private:
25 | 
26 | public:
27 |     ModulePlt(const std::string &module_name);
28 | 
29 | 
30 |     /*!
31 |      * \brief Parses the .plt entries file for the given module.
32 |      */
33 |     bool parse(const std::string &plt_file);
34 | 
35 | 
36 |     /*!
37 |      * \brief Returns a pointer to the plt entry given by the address.
38 |      * \return Returns a pointer to the plt entry given by the address
39 |      * or null if it was not found.
40 |      */
41 |     const PltEntry* get_plt_entry(uint64_t addr) const;
42 | 
43 | 
44 |     /*!
45 |      * \brief Returns a pointer to the plt entry given by the function name.
46 |      * \return Returns a pointer to the plt entry given by the address
47 |      * or null if it was not found.
48 |      */
49 |     const PltEntry* get_plt_entry(const std::string func_name) const;
50 | 
51 | };
52 | 
53 | #endif // MODULE_PLT_H
54 | 


--------------------------------------------------------------------------------
/include/new_operators.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEW_OPERATORS_H
 2 | #define NEW_OPERATORS_H
 3 | 
 4 | #include <map>
 5 | #include <unordered_set>
 6 | 
 7 | #include "expression.h"
 8 | #include "vtable_file.h"
 9 | #include "vtable_hierarchy.h"
10 | 
11 | struct NewOperator {
12 |     uint64_t addr;
13 |     uint64_t size;
14 |     ExpressionPtr expr;
15 |     std::unordered_set<uint32_t> vtbl_idxs;
16 | };
17 | 
18 | 
19 | typedef std::map<uint64_t, NewOperator> OperatorNewAddrMap;
20 | 
21 | 
22 | class NewOperators {
23 | private:
24 | 
25 |     const std::string &_module_name;
26 |     const VTableFile &_vtable_file;
27 |     const VTableHierarchies &_vtable_hierarchies;
28 | 
29 |     OperatorNewAddrMap _op_new_candidates;
30 | 
31 | public:
32 | 
33 |     NewOperators(const std::string &module_name,
34 |                  const VTableFile &vtable_file,
35 |                  const VTableHierarchies &vtable_hierarchies);
36 | 
37 | 
38 |     void add_op_new_candidate(const NewOperator &new_op_candidate);
39 | 
40 | 
41 |     void export_new_operators(const std::string &target_dir);
42 | 
43 | 
44 |     const OperatorNewAddrMap& get_new_operators() const;
45 | 
46 | 
47 |     void copy_new_operators(const OperatorNewAddrMap &new_ops);
48 | };
49 | 
50 | #endif //NEW_OPERATORS_H
51 | 


--------------------------------------------------------------------------------
/include/path_builder.h:
--------------------------------------------------------------------------------
 1 | #ifndef PATH_BUILDER_H
 2 | #define PATH_BUILDER_H
 3 | 
 4 | #include <map>
 5 | #include <set>
 6 | #include <deque>
 7 | #include <vector>
 8 | #include <cstdint>
 9 | #include <functional>
10 | 
11 | #include "block.h"
12 | #include "function.h"
13 | 
14 | //! A concrete path that contains the full addresses of basic blocks to visit.
15 | using ConcretePath = std::deque<uintptr_t>;
16 | 
17 | //! A map relating a node to a concrete path.
18 | using PathsByNode = std::map<uintptr_t, ConcretePath>;
19 | 
20 | const uint8_t NODE_THRESHOLD = 20;
21 | 
22 | //!
23 | //! \brief Class calculating viable paths through a given `Function` (the
24 | //! "lightweight" policy used as a fallback in `Function::traverse`).
25 | //!
26 | class PathBuilder {
27 | private:
28 |     const Function &_function;
29 |     void *_user_defined;
30 |     const uint8_t _node_threshold;
31 | 
32 | public:
33 |     PathBuilder(const Function &function, void *user_defined=nullptr,
34 |                 uint8_t node_threshold=NODE_THRESHOLD);
35 |     std::set<ConcretePath> build_paths(BlockPredicate predicate) const;
36 | 
37 | private:
38 |     PathsByNode breadth_first(const BlockMap &blocks, uintptr_t root,
39 |                               BlockPredicate predicate,
40 |                               bool terminate_on_match=false) const;
41 | };
42 | 
43 | #endif // PATH_BUILDER_H
44 | 


--------------------------------------------------------------------------------
/include/return_value.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef RETURN_VALUE_H
  3 | #define RETURN_VALUE_H
  4 | 
  5 | #include <vector>
  6 | #include <string>
  7 | #include <mutex>
  8 | 
  9 | #include "serialization.h"
 10 | #include "external_functions.h"
 11 | #include "module_plt.h"
 12 | #include "vtable_file.h"
 13 | #include "function.h"
 14 | 
 15 | 
 16 | struct VTableActive {
 17 |     Path path;
 18 |     ExpressionPtr vtbl_ptr_loc;
 19 |     uint32_t index; // Index of vtable.
 20 |     bool from_caller;
 21 |     bool from_callee;
 22 | };
 23 | 
 24 | 
 25 | struct ReturnValue {
 26 |     Path path;
 27 |     uint64_t func_addr; // Set to 0 if function not in the current module.
 28 |     ExpressionPtr content;
 29 | };
 30 | 
 31 | 
 32 | struct FctReturnValues {
 33 |     uint64_t func_addr; // Set to 0 if function not in the current module.
 34 |     std::vector<ReturnValue> return_values;
 35 |     std::vector<VTableActive> active_vtables;
 36 | };
 37 | 
 38 | 
 39 | struct ExternalFctReturnValues {
 40 |     const ExternalFunction *ext_func;
 41 |     FctReturnValues func_return_values;
 42 | };
 43 | 
 44 | 
 45 | typedef std::map<uint64_t, FctReturnValues> FctReturnValuesMap;
 46 | typedef std::map<uint64_t, FctReturnValues*> FctReturnValuesPtrMap;
 47 | typedef std::vector<ReturnValue> ReturnValues;
 48 | typedef std::vector<ExternalFctReturnValues> ExtReturnValues;
 49 | 
 50 | 
 51 | class FctReturnValuesFile {
 52 | private:
 53 | 
 54 |     const std::string &_module_name;
 55 |     const VTableFile &_vtable_file;
 56 |     const ModulePlt &_module_plt;
 57 |     const ExternalFunctions &_external_funcs;
 58 | 
 59 |     FctReturnValuesMap _return_values_map;
 60 | 
 61 |     FctReturnValuesPtrMap _plt_return_values_ptr_map;
 62 | 
 63 |     ExtReturnValues _ext_return_values;
 64 | 
 65 |     mutable std::mutex _mtx;
 66 | 
 67 |     bool _is_finalized = false;
 68 | 
 69 | public:
 70 | 
 71 |     FctReturnValuesFile(const std::string &module_name,
 72 |                         const VTableFile &vtable_file,
 73 |                         const ModulePlt &module_plt,
 74 |                         const ExternalFunctions &external_funcs);
 75 | 
 76 | 
 77 |     void add_return_value(uint64_t func_addr,
 78 |                           const ReturnValue &return_value);
 79 | 
 80 | 
 81 |     void add_active_vtable(uint64_t func_addr,
 82 |                            const VTableActive &active_vtable);
 83 | 
 84 | 
 85 |     void export_return_values(const std::string &target_dir);
 86 | 
 87 | 
 88 |     void import_ext_return_values(const std::string &module_file);
 89 | 
 90 | 
 91 |     /*!
 92 |      * \brief Returns a function return values object given by .plt address.
 93 |      * \return Returns a function return values object pointer
 94 |      * or nullptr if object does not exist.
 95 |      */
 96 |     const FctReturnValues* get_plt_return_values_ptr(uint64_t addr) const;
 97 | 
 98 | 
 99 |     /*!
100 |      * \brief Returns a function return values object given by module name and
101 |      * function address.
102 |      * \return Returns a function return values object pointer
103 |      * or nullptr if object does not exist.
104 |      */
105 |     const FctReturnValues* get_ext_return_values_ptr(
106 |                                         const std::string &module_name,
107 |                                         uint64_t func_addr) const;
108 | 
109 | 
110 |     /*!
111 |      * \brief Returns a copy of all return values objects.
112 |      * \return Returns a copy of all return values objects.
113 |      */
114 |     ExtReturnValues get_return_values() const;
115 | 
116 | 
117 |     /*!
118 |      * \brief Finalizes the external return values structures.
119 |      *
120 |      * This function finalizes the external return values structures.
121 |      * It can only be used
122 |      * once all external return values files are imported via the
123 |      * `import_ext_return_values` function.
124 |      * After `finalize` was executed, no changes to the external return values
125 |      * structures are possible.
126 |      */
127 |     void finalize_ext_return_values();
128 | 
129 | 
130 |     /*!
131 |      * \brief Returns `true` if the external return values structure is
132 |      * finalized.
133 |      * \return Returns `true` if the external return values structure is
134 |      * finalized.
135 |      */
136 |     bool is_finalized_ext_return_values() const;
137 | 
138 | };
139 | 
140 | 
141 | 
142 | #endif
143 | 


--------------------------------------------------------------------------------
/include/serialization.h:
--------------------------------------------------------------------------------
 1 | #ifndef SERIALIZATION_H
 2 | #define SERIALIZATION_H
 3 | 
 4 | #include "expression.h"
 5 | #include "iostream"
 6 | 
 7 | void serialize(ExpressionPtr exp, std::ostream &output);
 8 | ExpressionPtr unserialize(std::istream &input);
 9 | 
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/include/state.h:
--------------------------------------------------------------------------------
  1 | #ifndef STATE_H
  2 | #define STATE_H
  3 | 
  4 | #include "expression.h"
  5 | #include "amd64.h"
  6 | #include "memory.h"
  7 | 
  8 | #include <map>
  9 | #include <set>
 10 | #include <vector>
 11 | #include <memory>
 12 | #include <ostream>
 13 | #include <unordered_map>
 14 | #include <unordered_set>
 15 | 
 16 | #define arg_out
 17 | 
 18 | /*!
 19 |  * \brief Type that specifies how to `std::shared_ptr<Expression>` instances
 20 |  * shall be compared.
 21 |  *
 22 |  * As `State` uses a STL map to track the state of expressions, we want to
 23 |  * compare the stored objects by value in order to provide sane updates.
 24 |  */
 25 | struct ExpressionPtrComparison {
 26 |     bool operator()(const ExpressionPtr &lhs, const ExpressionPtr &rhs) const {
 27 |         return *lhs == *rhs;
 28 |     }
 29 | };
 30 | 
 31 | using InternalState = std::unordered_map<ExpressionPtr, ExpressionPtr,
 32 |         std::hash<ExpressionPtr>, ExpressionPtrComparison>;
 33 | 
 34 | using InitialValues = std::map<unsigned int, std::shared_ptr<Symbolic>>;
 35 | using Expressions = std::vector<std::pair<ExpressionPtr, ExpressionPtr>>;
 36 | 
 37 | using kill_results = std::unordered_set<ExpressionPtr, std::hash<ExpressionPtr>,
 38 |         ExpressionPtrComparison>;
 39 | 
 40 | /*!
 41 |  * \brief Class that represents a CPU state.
 42 |  *
 43 |  * Effectively, this class represents the side effects of a computation by
 44 |  * keeping track of the various entities modified during a symbolic run. It
 45 |  * basically wraps around a STL map and provides some convience functions for
 46 |  * modifying the state.
 47 |  *
 48 |  * Keys in the state are destinations (such as temporaries, registers and memory
 49 |  * indirections), whereas values are the (abstract) values that are written to
 50 |  * said destination. An assignment of the form `key -> value` is commonly called
 51 |  * a _binding_ (binding the value to the key expression).
 52 |  *
 53 |  * \see `InternalState`
 54 |  */
 55 | class State {
 56 | private:
 57 |     static InitialValues _initial_values;
 58 |     InternalState _state;
 59 | 
 60 |     std::shared_ptr<Unknown> _unknown;
 61 | 
 62 | public:
 63 |     using iterator = InternalState::iterator;
 64 |     using const_iterator = InternalState::const_iterator;
 65 | 
 66 |     State(bool initialize=true);
 67 |     State(const State&) = default;
 68 | 
 69 |     /*!
 70 |      * \brief Static function that returns the initial register assignment.
 71 |      * \return Returns a (read-only) map, with keys being register offsets and
 72 |      * values the corresponding `ExpressionPtr`s.
 73 |      *
 74 |      * \see `AMD64_REGISTERS`
 75 |      */
 76 |     static const InitialValues &initial_values() {
 77 |         return _initial_values;
 78 |     }
 79 | 
 80 |     void set_initial_state();
 81 |     void purge_scratch_registers(FileFormatType file_format);
 82 | 
 83 |     void merge(const State &other);
 84 |     void optimize(bool do_purge_unchanged=false);
 85 | 
 86 |     const Expressions get_memory_accesses() const;
 87 | 
 88 |     friend std::ostream &operator<<(std::ostream &stream, const State &state);
 89 |     static const std::string format_return_value(uintptr_t address);
 90 | 
 91 |     InternalState::iterator erase(const InternalState::iterator &iterator);
 92 |     size_t erase(const InternalState::key_type &key);
 93 | 
 94 |     bool find(const InternalState::key_type &key,
 95 |               arg_out InternalState::iterator &iterator);
 96 |     bool find(const InternalState::key_type &key,
 97 |               arg_out InternalState::const_iterator &iterator) const;
 98 | 
 99 |     void update(const InternalState::key_type &key,
100 |                 const InternalState::mapped_type &value);
101 | 
102 | private:
103 |     static const std::string format_initial_value(size_t offset);
104 | 
105 |     bool optimizer(bool do_purge_unchanged=false);
106 |     void optimize_entries();
107 | 
108 |     bool propagate();
109 | 
110 |     bool purge_unchanged();
111 |     bool purge_uninteresting();
112 | 
113 |     kill_results kill_helper(const ExpressionPtr &key,
114 |                              const ExpressionPtr &value);
115 |     void kill(const ExpressionPtr &key, const ExpressionPtr &value);
116 | };
117 | 
118 | #endif // STATE_H
119 | 


--------------------------------------------------------------------------------
/include/translator.h:
--------------------------------------------------------------------------------
  1 | #ifndef TRANSLATOR_H
  2 | #define TRANSLATOR_H
  3 | 
  4 | #include <set>
  5 | #include <map>
  6 | #include <mutex>
  7 | #include <thread>
  8 | #include <string>
  9 | #include <cstddef>
 10 | 
 11 | #include "vex.h"
 12 | #include "function.h"
 13 | #include "dump_file.h"
 14 | #include "mapped_elf.h"
 15 | #include "mapped_pe.h"
 16 | #include "memory.h"
 17 | 
 18 | extern "C" {
 19 | #include <valgrind/libvex.h>
 20 | }
 21 | 
 22 | 
 23 | /*!
 24 |  * \brief Class responsible for translating functions into VEX blocks.
 25 |  *
 26 |  * This class takes CFG descriptions as generated by the IDAPython exporter
 27 |  * script and information about non-returning functions and uses both to
 28 |  * generate `Function` instances. Basic blocks are mapped to VEX basic
 29 |  * blocks of type IRSB.
 30 |  */
 31 | class Translator {
 32 | private:
 33 |     /* Only one VEX instance should be present (libVEX seems to fail if
 34 |      * initialized multiple times), hence we do not want to be the sole
 35 |      * owner. */
 36 |     Vex &_vex;
 37 | 
 38 |     DumpFile _dump_file;
 39 |     const Memory *_memory;
 40 | 
 41 |     std::set<uintptr_t> _seen_blocks;
 42 |     std::map<uintptr_t, const IRSB*> _blocks;
 43 | 
 44 |     std::map<uintptr_t, Function> _functions;
 45 | 
 46 |     FileFormatType _file_format;
 47 | 
 48 |     mutable std::mutex _mutex;
 49 | 
 50 | public:
 51 |     Translator(Vex &vex, const std::string &file, FileFormatType file_format,
 52 |                bool parse_on_demand=true);
 53 | 
 54 |     const Function &get_function(const uintptr_t address);
 55 |     const Function *maybe_get_function(const uintptr_t address);
 56 | 
 57 |     /*!
 58 |      * \brief Returns all functions known to the `Translator`.
 59 |      * \return Returns a map with the function's address as key and `Function`
 60 |      * object as value.
 61 |      */
 62 |     const std::map<uintptr_t, Function> &get_functions() const {
 63 |         std::lock_guard<std::mutex> _(_mutex);
 64 | 
 65 |         return _functions;
 66 |     }
 67 | 
 68 |     /*!
 69 |      * \brief Returns the view on memory as given by mapping the ELF/PE file.
 70 |      *
 71 |      * This is mostly used for queries on the binary (such as known memory
 72 |      * ranges).
 73 |      *
 74 |      * \return A reference of type `Memory`.
 75 |      */
 76 |     const Memory &get_memory() const {
 77 |         std::lock_guard<std::mutex> _(_mutex);
 78 | 
 79 |         // Since memory is only once initialized in the constructor and
 80 |         // otherwise never changed, we assume that the pointer is always set.
 81 |         return *_memory;
 82 |     }
 83 | 
 84 | 
 85 |     /*!
 86 |      * \brief Returns the format of the file (ELF/PE => Linux/Windows).
 87 |      *
 88 |      * \return Fype format type.
 89 |      */
 90 |     FileFormatType get_file_format() const {
 91 |         return _file_format;
 92 |     }
 93 | 
 94 | private:
 95 |     bool process_block(Function &function, const BlockDescriptor &block);
 96 |     void finalize_block(Function &function, const BlockDescriptor &block,
 97 |                         IRSB *block_pointer);
 98 | 
 99 |     void parse_known_functions();
100 |     void detect_tail_jumps(Function &function);
101 | 
102 |     Function *maybe_translate_function(const uintptr_t address);
103 |     Function *translate_function(const std::pair<uintptr_t, FunctionBlocks>&);
104 | 
105 |     Terminator get_terminator(const IRSB &block) const;
106 | };
107 | 
108 | #endif // TRANSLATOR_H
109 | 


--------------------------------------------------------------------------------
/include/vcall.h:
--------------------------------------------------------------------------------
 1 | #ifndef VCALL_H
 2 | #define VCALL_H
 3 | 
 4 | #include <fstream>
 5 | #include <sstream>
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <vector>
 9 | #include <unordered_set>
10 | #include <mutex>
11 | 
12 | #include "vcall_types.h"
13 | #include "vtable_hierarchy.h"
14 | #include "vtable_file.h"
15 | 
16 | class VCallFile {
17 | private:
18 |     VCalls _vcalls;
19 |     PossibleVCalls _possible_vcalls;
20 | 
21 |     const std::string &_module_name;
22 | 
23 |     const VTableHierarchies &_vtable_hierarchies;
24 |     const VTableFile &_vtable_file;
25 | 
26 |     mutable std::mutex _mtx;
27 | 
28 | public:
29 | 
30 |     VCallFile(const std::string &module_name,
31 |               const VTableHierarchies &vtable_hierarchies,
32 |               const VTableFile &vtable_file);
33 | 
34 |     /*!
35 |      * \brief Returns the found virtual callsites.
36 |      * \return Returns the found virtual callsites.
37 |      */
38 |     const VCalls& get_vcalls() const;
39 | 
40 | 
41 |     void add_vcall(uint64_t addr, uint32_t index, size_t entry_index);
42 | 
43 | 
44 |     void add_possible_vcall(uint64_t addr);
45 | 
46 | 
47 |     void export_vcalls(const std::string &target_dir);
48 | };
49 | 
50 | 
51 | 
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/include/vcall_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef VCALL_TYPES_H
 2 | #define VCALL_TYPES_H
 3 | 
 4 | #include <vector>
 5 | #include <unordered_set>
 6 | 
 7 | struct VCall {
 8 |     uint64_t addr;
 9 |     std::unordered_set<uint32_t> indexes;
10 |     size_t entry_index;
11 | };
12 | 
13 | typedef std::vector<VCall> VCalls;
14 | typedef std::unordered_set<uint64_t> PossibleVCalls;
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/include/vex.h:
--------------------------------------------------------------------------------
 1 | #ifndef VEX_H
 2 | #define VEX_H
 3 | 
 4 | extern "C" {
 5 | #include <valgrind/libvex.h>
 6 | #include <valgrind/libvex_ir.h>
 7 | }
 8 | 
 9 | #include <vector>
10 | #include <string>
11 | #include <cstdint>
12 | 
13 | enum CONFIGURATION : size_t {
14 |     MAX_INSTRUCTIONS = 100
15 | };
16 | 
17 | #define arg_out
18 | 
19 | /*!
20 |  * \brief Main class which acts as an interface to the linked VEX library.
21 |  *
22 |  * A singleton class used to interface the (patched) library libVEX. This is
23 |  * done as the library does not seem to support multiple initializations.
24 |  *
25 |  * \todo This class merely supports x86_64 for now.
26 |  */
27 | class Vex {
28 | private:
29 |     VexAbiInfo _abi_info;
30 |     VexArchInfo _arch_info;
31 |     VexGuestExtents _guest_extents;
32 | 
33 |     VexControl _control;
34 |     VexTranslateArgs _args;
35 | 
36 |     IRSB *_block;
37 | 
38 |     // Consider std::set (tree size?).
39 |     std::vector<void*> _allocations;
40 | 
41 | public:
42 |     /*!
43 |      * \brief `get_instance` returns the only instance of this singleton class.
44 |      * \return An instance of class `Vex`.
45 |      */
46 |     static Vex &get_instance() {
47 |         static Vex singleton;
48 |         return singleton;
49 |     }
50 | 
51 |     Vex(const Vex&) = delete;
52 |     void operator=(const Vex&) = delete;
53 | 
54 |     ~Vex();
55 | 
56 |     const IRSB &translate(const uint8_t *bytes, uintptr_t guest_address,
57 |                           size_t instruction_count=MAX_INSTRUCTIONS,
58 |                           arg_out uintptr_t *vex_block_end=nullptr);
59 | 
60 | private:
61 |     Vex();
62 | 
63 |     void initialize();
64 |     void initialize_amd64();
65 | 
66 |     static void __attribute__((noreturn)) failure_exit() {
67 |         throw std::string("Fatal exit from libVEX.");
68 |     }
69 | 
70 |     static void *dispatch() {
71 |         return nullptr;
72 |     }
73 | 
74 |     static unsigned int needs_self_check(void*, VexRegisterUpdates*,
75 |                                          const VexGuestExtents*) {
76 |         return 0;
77 |     }
78 | 
79 |     static unsigned char chase_into_ok(void*, Addr) {
80 |         return false;
81 |     }
82 | 
83 |     static void log_bytes(const char *bytes, size_t number_bytes);
84 |     static IRSB *instrument(void *callback_opaque, IRSB *block,
85 |                             const VexGuestLayout*, const VexGuestExtents*,
86 |                             const VexArchInfo*, IRType, IRType);
87 | 
88 |     void manage_allocation(void *allocation);
89 |     static void incoming_allocation(void *user, void *allocation);
90 | };
91 | 
92 | #endif // VEX_H
93 | 


--------------------------------------------------------------------------------
/include/vtable_file.h:
--------------------------------------------------------------------------------
  1 | #ifndef VTABLE_FILE_H
  2 | #define VTABLE_FILE_H
  3 | 
  4 | #include <map>
  5 | #include <set>
  6 | #include <unordered_set>
  7 | #include <vector>
  8 | #include <string>
  9 | #include <memory>
 10 | #include <fstream>
 11 | #include <sstream>
 12 | #include <iostream>
 13 | #include <cassert>
 14 | 
 15 | /*!
 16 |  * \brief Structure containing information about a vtable stored in the
 17 |  * `_vtables.txt` file.
 18 |  */
 19 | struct VTable {
 20 |     uint32_t index;
 21 |     uint64_t addr;
 22 |     int offset_to_top;
 23 |     std::vector<uint64_t> entries;
 24 |     std::string module_name;
 25 | };
 26 | 
 27 | 
 28 | typedef std::map<uint64_t, VTable*> VTableMap;
 29 | typedef std::vector<VTable> VTableVector;
 30 | typedef std::map<std::string, VTableMap*> VTableModulesMap;
 31 | typedef std::vector<VTableMap> VTableModulesVector;
 32 | 
 33 | 
 34 | /*!
 35 |  * \brief Class collecting the information that was produced by the IDA
 36 |  * exporting script.
 37 |  *
 38 |  * For a given `_vtables.txt` file (produced by the exporter).
 39 |  */
 40 | class VTableFile {
 41 | private:
 42 |     VTableVector _vtables;
 43 |     VTableModulesVector _module_vtables;
 44 |     VTableModulesMap _module_vtables_map;
 45 |     std::set<std::string> _managed_modules;
 46 |     uint32_t _index;
 47 | 
 48 |     std::string _this_module_name;
 49 |     bool _is_finalized = false;
 50 | 
 51 | public:
 52 |     VTableFile(const std::string &this_module_name);
 53 | 
 54 | 
 55 |     /*!
 56 |      * \brief Returns all known vtables for this module.
 57 |      * \return Returns a `map` with all known vtables (address as key,
 58 |      * `Vtable` struct as value) for the current module.
 59 |      */
 60 |     const VTableMap& get_this_vtables() const;
 61 | 
 62 | 
 63 |     /*!
 64 |      * \brief Returns all known vtables for the given module.
 65 |      * \return Returns a `map` with all known vtables (address as key,
 66 |      * `Vtable` struct as value) for the given module.
 67 |      */
 68 |     const VTableMap& get_vtables(const std::string &module_name) const;
 69 | 
 70 | 
 71 |     /*!
 72 |      * \brief Returns all known vtables.
 73 |      * \return Returns a `vector` with all known vtables.
 74 |      */
 75 |     const VTableVector& get_all_vtables() const;
 76 | 
 77 | 
 78 |     /*!
 79 |      * \brief Parses a given vtable file and builds internal vtable structure.
 80 |      */
 81 |     bool parse(const std::string &vtables_file);
 82 | 
 83 | 
 84 |     /*!
 85 |      * \brief Finalizes the vtable structures.
 86 |      *
 87 |      * This function finalizes the vtable structures. It can only be used
 88 |      * once all vtable files are imported via the `parse` function.
 89 |      * After `finalize` was executed, no changes to the vtable structures
 90 |      * are possible.
 91 |      */
 92 |     void finalize();
 93 | 
 94 | 
 95 |     /*!
 96 |      * \brief Returns `true` if the vtable structure is finalized.
 97 |      * \return Returns `true` if the vtable structure is finalized.
 98 |      */
 99 |     bool is_finalized() const;
100 | 
101 | 
102 |     /*!
103 |      * \brief Returns a vtable object given by module name and address.
104 |      * \return Returns a vtable object.
105 |      */
106 |     const VTable& get_vtable(const std::string &module_name, uint64_t addr)
107 |         const;
108 | 
109 | 
110 |     /*!
111 |      * \brief Returns a vtable object given by its index.
112 |      * \return Returns a vtable object.
113 |      */
114 |     const VTable& get_vtable(uint32_t index) const;
115 | 
116 | 
117 |     /*!
118 |      * \brief Returns a vtable object given by module name and address.
119 |      * \return Returns a vtable object pointer or nullptr if object does
120 |      * not exist.
121 |      */
122 |     const VTable* get_vtable_ptr(const std::string &module_name,
123 |                                              uint64_t addr) const;
124 | 
125 | };
126 | 
127 | #endif // VTABLE_FILE_H
128 | 


--------------------------------------------------------------------------------
/include/vtable_hierarchy.h:
--------------------------------------------------------------------------------
  1 | #ifndef VTABLE_HIERARCHY_H
  2 | #define VTABLE_HIERARCHY_H
  3 | 
  4 | #include "vtable_file.h"
  5 | #include "vtable_update.h"
  6 | #include "external_functions.h"
  7 | #include "module_plt.h"
  8 | #include "state.h"
  9 | #include "vcall_types.h"
 10 | #include "blacklist_functions.h"
 11 | 
 12 | #include <set>
 13 | #include <cassert>
 14 | #include <cstring>
 15 | #include <vector>
 16 | #include <mutex>
 17 | 
 18 | 
 19 | typedef std::set<uint32_t> DependentVTables;
 20 | typedef std::vector<DependentVTables> HierarchiesVTable;
 21 | 
 22 | 
 23 | #define DEBUG_WRITE_HIERARCHY_STEPS 0
 24 | #define DEBUG_PRINT_DEPENDENCIES 0
 25 | #define DEBUG_SEARCH_MERGING_REASON 0
 26 | #define DEBUG_SEARCH_MERGING_VTABLE_ADDR_1 0xdb3148
 27 | #define DEBUG_SEARCH_MERGING_VTABLE_ADDR_2 0xf0a000
 28 | 
 29 | 
 30 | /*!
 31 |  * \brief Class holding the information about the extracted hierarchy.
 32 |  *
 33 |  * Holding the internal structure of the extracted hierarchy. Can import
 34 |  * already found hierarchies and add it to its structure (makes it possible
 35 |  * to analyze binaries in an iterative manner). Found hierarchy can be
 36 |  * exported into a `.hierarchy` file for further usage.
 37 |  */
 38 | class VTableHierarchies {
 39 | private:
 40 |     HierarchiesVTable _hierarchies;
 41 |     const FileFormatType _file_format;
 42 |     const VTableFile &_vtable_file;
 43 |     const VTableMap &_this_vtables;
 44 |     const std::string &_module_name;
 45 | 
 46 |     const ExternalFunctions &_external_funcs;
 47 |     const ModulePlt &_module_plt;
 48 | 
 49 |     const BlacklistFuncsSet &_funcs_blacklist;
 50 | 
 51 |     // Only needed for debugging.
 52 |     const int _thread_id;
 53 | 
 54 | #if DEBUG_WRITE_HIERARCHY_STEPS
 55 |     std::ofstream hierarchy_steps_file;
 56 | #endif
 57 | 
 58 |     void merge_hierarchies_priv();
 59 | 
 60 |     bool get_vtable_dependencies(const VTableUpdates &vtable_updates,
 61 |                                  const ExpressionPtr &base_base,
 62 |                                  uint32_t base_index,
 63 |                                  size_t base_offset);
 64 | 
 65 |     bool get_sub_vtable_dependencies(const VTableUpdates &vtable_updates,
 66 |                                      const ExpressionPtr &sub_base,
 67 |                                      uint32_t sub_index,
 68 |                                      size_t sub_offset);
 69 | 
 70 |     bool extract_vtable_dependencies(const VTableUpdates &vtable_updates);
 71 | 
 72 |     bool add_to_hierarchy(uint32_t vtable_1_idx,
 73 |                           uint32_t vtable_2_idx);
 74 | 
 75 |     void update_hierarchy_priv(uint32_t vtable_1_idx,
 76 |                                uint32_t vtable_2_idx,
 77 |                                bool merge_hierarchy);
 78 | 
 79 | public:
 80 |     VTableHierarchies(const FileFormatType file_format,
 81 |                       const VTableFile &vtable_file,
 82 |                       const std::string &module_name,
 83 |                       const ExternalFunctions &external_funcs,
 84 |                       const ModulePlt &module_plt,
 85 |                       const BlacklistFuncsSet &funcs_blacklist,
 86 |                       const int thread_id);
 87 | 
 88 | 
 89 |     /*!
 90 |      * \brief Merges the existing hierarchies if they can be merged.
 91 |      */
 92 |     void merge_hierarchies();
 93 | 
 94 | 
 95 |     /*!
 96 |      * \brief Returns the current extracted hierarchy structure.
 97 |      * \return Returns the current extracted hierarchy structure.
 98 |      */
 99 |     const HierarchiesVTable& get_hierarchies() const;
100 | 
101 | 
102 |     /*!
103 |      * \brief Updates the hierarchy structure with the new given information.
104 |      *
105 |      * This function uses the extracted vtable updates to update
106 |      * the hierarchy structure. Note that it is also using the information
107 |      * which function in which module was analyzed in order to gain
108 |      * the vtable update information.
109 |      */
110 |     void update_hierarchy(const VTableUpdates &vtable_updates,
111 |                           const std::string &module_name,
112 |                           uint64_t func_addr,
113 |                           bool merge_hierarchy=true);
114 | 
115 | 
116 |     /*!
117 |      * \brief Updates the hierarchy structure with the new given information.
118 |      *
119 |      * This function adds both vtables given by their index into a hierarchy
120 |      * (either in a new one or existing one if a dependency is already known).
121 |      */
122 |     void update_hierarchy(uint32_t vtable_1_idx,
123 |                           uint32_t vtable_2_idx,
124 |                           bool merge_hierarchy=true);
125 | 
126 | 
127 |     /*!
128 |      * \brief Updates the hierarchy structure with the new given information.
129 |      *
130 |      * This function adds all vtable hierarchies into the existing hierarchies
131 |      * (either in a new one or existing one if a dependency is already known).
132 |      */
133 |     void update_hierarchy(const HierarchiesVTable& vtable_hierarchies,
134 |                           bool merge_hierarchy=true);
135 | 
136 | 
137 |     /*!
138 |      * \brief Exports the current hierarchy structure into a file.
139 |      */
140 |     void export_hierarchy(const std::string &target_dir);
141 | 
142 | 
143 |     /*!
144 |      * \brief Imports a hierarchy from file, adds it to the current hierarchy.
145 |      */
146 |     void import_hierarchy(const std::string &target_file);
147 | 
148 | 
149 |     /*!
150 |      * \brief Inter-modular check if the same function is at the same position.
151 |      *
152 |      * This function checks if an entry in a vtable of this module also exists
153 |      * in a vtable of another module. If it does the vtables are considered
154 |      * as dependent.
155 |      */
156 |     void entry_heuristic_inter();
157 | 
158 | 
159 |     void vcall_analysis(const VCalls &vcalls);
160 | 
161 | 
162 | };
163 | 
164 | #endif // VTABLE_HIERARCHY_H
165 | 


--------------------------------------------------------------------------------
/include/vtable_update.h:
--------------------------------------------------------------------------------
 1 | #ifndef VTABLE_UPDATE_H
 2 | #define VTABLE_UPDATE_H
 3 | 
 4 | #include "expression.h"
 5 | #include "state.h"
 6 | #include "vtable_file.h"
 7 | 
 8 | #include <fstream>
 9 | #include <sstream>
10 | #include <iostream>
11 | #include <string>
12 | #include <vector>
13 | #include <map>
14 | #include <mutex>
15 | 
16 | #define arg_out
17 | 
18 | /*!
19 |  * \brief Structure containing information about a vtable overwrite
20 |  * found during the analysis.
21 |  */
22 | struct VTableUpdate {
23 |     size_t offset;
24 |     ExpressionPtr base;
25 |     uint32_t index;
26 | };
27 | 
28 | 
29 | typedef std::vector<VTableUpdate> VTableUpdates;
30 | typedef std::map<uint64_t, VTableUpdates> VTableUpdatesMap;
31 | typedef std::map<std::string, VTableUpdatesMap> VTableUpdatesModuleMap;
32 | 
33 | 
34 | class FctVTableUpdates {
35 | private:
36 | 
37 |     // VTable updates made by functions of this module.
38 |     VTableUpdatesMap _this_vtable_updates;
39 |     VTableUpdatesModuleMap _external_vtable_updates;
40 | 
41 |     VTableFile &_vtable_file;
42 |     const std::string &_module_name;
43 | 
44 |     mutable std::mutex _mtx;
45 | 
46 |     ExpressionPtr _rdi = State::initial_values().at(OFFB_RDI);
47 |     ExpressionPtr _rsi = State::initial_values().at(OFFB_RSI);
48 |     ExpressionPtr _rdx = State::initial_values().at(OFFB_RDX);
49 |     ExpressionPtr _rcx = State::initial_values().at(OFFB_RCX);
50 |     ExpressionPtr _r8 = State::initial_values().at(OFFB_R8);
51 |     ExpressionPtr _r9 = State::initial_values().at(OFFB_R9);
52 | 
53 |     bool convert_expression_str(ExpressionPtr base,
54 |                                 arg_out std::string &base_str);
55 | 
56 |     bool convert_str_expression(const std::string &base_str,
57 |                                 arg_out ExpressionPtr &base);
58 | 
59 | public:
60 | 
61 |     FctVTableUpdates(VTableFile &vtable_file,
62 |                      const std::string &module_name);
63 | 
64 | 
65 |     /*!
66 |      * \brief Adds vtable updates for the given function.
67 |      */
68 |     void add_vtable_updates(uint64_t fct_addr,
69 |                            const VTableUpdates &vtable_updates);
70 | 
71 | 
72 |     /*!
73 |      * \brief Exports the vtable updates that are done by this module.
74 |      */
75 |     void export_vtable_updates(const std::string &target_dir);
76 | 
77 | 
78 |     /*!
79 |      * \brief Returns all vtable updates made by a function of a
80 |      * specific module.
81 |      * \return Returns all vtable updates made by a function of a
82 |      * specific module.
83 |      */
84 |     const VTableUpdates* get_vtable_updates(const std::string &module_name,
85 |                                             uint64_t fct_addr) const;
86 | 
87 | 
88 |     /*!
89 |      * \brief Imports a vtable update from file,
90 |      * adds it to the current vtable updates.
91 |      */
92 |     void import_updates(const std::string &target_file);
93 | 
94 | };
95 | 
96 | #endif // VTABLE_UPDATE_H
97 | 


--------------------------------------------------------------------------------
/include/vtv_vcall_gt.h:
--------------------------------------------------------------------------------
 1 | #ifndef VTV_VCALL_GT_H
 2 | #define VTV_VCALL_GT_H
 3 | 
 4 | #include <map>
 5 | #include <unordered_set>
 6 | #include <fstream>
 7 | #include <sstream>
 8 | #include <iostream>
 9 | 
10 | #include "expression.h"
11 | 
12 | struct VTVVcall {
13 |     uint64_t addr_verify_call;
14 |     ExpressionPtr vtbl_obj;
15 |     std::unordered_set<uint64_t> addr_vcalls;
16 | };
17 | 
18 | typedef std::map<uint64_t, VTVVcall> VTVVcalls;
19 | 
20 | 
21 | class VTVVcallsFile {
22 | private:
23 | 
24 |     const std::string &_module_name;
25 | 
26 |     VTVVcalls _vtv_vcalls;
27 | 
28 | public:
29 | 
30 |     VTVVcallsFile(const std::string &module_name);
31 | 
32 |     void add_vtv_vcalls(const VTVVcalls &vtv_vcalls);
33 | 
34 |     void export_vtv_vcalls(const std::string &target_dir);
35 | 
36 |     /*!
37 |      * \brief Returns the found vtv vcalls.
38 |      * \return Returns the found vtv vcalls.
39 |      */
40 |     const VTVVcalls& get_vtv_vcalls() const;
41 | };
42 | 
43 | #endif // VTV_VCALL_GT_H
44 | 


--------------------------------------------------------------------------------
/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RUB-SysSec/Marx/3bd6f21da8defd7746ed803e008490ea4ecadc62/paper.pdf


--------------------------------------------------------------------------------
/patch/heap_allocation_patch.diff:
--------------------------------------------------------------------------------
  1 | Index: priv/ir_defs.c
  2 | ===================================================================
  3 | --- priv/ir_defs.c	(revision 3203)
  4 | +++ priv/ir_defs.c	(working copy)
  5 | @@ -1669,7 +1669,9 @@
  6 |  /*--- Constructors                                            ---*/
  7 |  /*---------------------------------------------------------------*/
  8 |  
  9 | +Bool _allocate_on_heap = 0;
 10 |  
 11 | +
 12 |  /* Constructors -- IRConst */
 13 |  
 14 |  IRConst* IRConst_U1 ( Bool bit )
 15 | @@ -2446,6 +2448,18 @@
 16 |     return dst;
 17 |  }
 18 |  
 19 | +IRSB* deepCopyIRSB_Heap ( const IRSB* bb )
 20 | +{
 21 | +   IRSB *bb2;
 22 | +   Bool previous = _allocate_on_heap;
 23 | +
 24 | +   _allocate_on_heap = 1;
 25 | +   bb2 = deepCopyIRSB(bb);
 26 | +
 27 | +   _allocate_on_heap = previous;
 28 | +   return bb2;
 29 | +}
 30 | +
 31 |  IRSB* deepCopyIRSB ( const IRSB* bb )
 32 |  {
 33 |     Int      i;
 34 | Index: priv/main_util.c
 35 | ===================================================================
 36 | --- priv/main_util.c	(revision 3203)
 37 | +++ priv/main_util.c	(working copy)
 38 | @@ -39,7 +39,75 @@
 39 |  #include "main_globals.h"
 40 |  #include "main_util.h"
 41 |  
 42 | +#include <stdlib.h>
 43 |  
 44 | +extern Bool _allocate_on_heap;
 45 | +
 46 | +typedef void (*AllocationListener)( void*, void* );
 47 | +
 48 | +void vexInitialListener ( void*, void* );
 49 | +void vexInitialListener( void *userdata, void *data )
 50 | +{
 51 | +   return;
 52 | +}
 53 | +
 54 | +void *             _global_listener_data  = NULL;
 55 | +AllocationListener _global_listener_func  = &vexInitialListener;
 56 | +
 57 | +void LibVEX_registerAllocationListener ( void *userdata, AllocationListener listener )
 58 | +{
 59 | +   _global_listener_data = userdata;
 60 | +   _global_listener_func = listener;
 61 | +}
 62 | +
 63 | +void* LibVEX_Alloc_inline ( SizeT nbytes )
 64 | +{
 65 | +   struct align {
 66 | +      char c;
 67 | +      union {
 68 | +         char c;
 69 | +         short s;
 70 | +         int i;
 71 | +         long l;
 72 | +         long long ll;
 73 | +         float f;
 74 | +         double d;
 75 | +         /* long double is currently not used and would increase alignment
 76 | +            unnecessarily. */
 77 | +         /* long double ld; */
 78 | +         void *pto;
 79 | +         void (*ptf)(void);
 80 | +      } x;
 81 | +   };
 82 | +
 83 | +   /* Make sure the compiler does no surprise us */
 84 | +   vassert(offsetof(struct align,x) <= REQ_ALIGN);
 85 | +
 86 | +   if(_allocate_on_heap) {
 87 | +      void *data = malloc(nbytes);
 88 | +
 89 | +      _global_listener_func(_global_listener_data, data);
 90 | +      return data;
 91 | +   }
 92 | +
 93 | +#if 0
 94 | +  /* Nasty debugging hack, do not use. */
 95 | +  return malloc(nbytes);
 96 | +#else
 97 | +   HChar* curr;
 98 | +   HChar* next;
 99 | +   SizeT  ALIGN;
100 | +   ALIGN  = offsetof(struct align,x) - 1;
101 | +   nbytes = (nbytes + ALIGN) & ~ALIGN;
102 | +   curr   = private_LibVEX_alloc_curr;
103 | +   next   = curr + nbytes;
104 | +   if (next >= private_LibVEX_alloc_last)
105 | +      private_LibVEX_alloc_OOM();
106 | +   private_LibVEX_alloc_curr = next;
107 | +   return curr;
108 | +#endif
109 | +}
110 | +
111 |  /*---------------------------------------------------------*/
112 |  /*--- Storage                                           ---*/
113 |  /*---------------------------------------------------------*/
114 | @@ -75,6 +143,7 @@
115 |  
116 |  static VexAllocMode mode = VexAllocModeTEMP;
117 |  
118 | +
119 |  void vexAllocSanityCheck ( void )
120 |  {
121 |     vassert(temporary_first == &temporary[0]);
122 | Index: priv/main_util.h
123 | ===================================================================
124 | --- priv/main_util.h	(revision 3203)
125 | +++ priv/main_util.h	(working copy)
126 | @@ -122,47 +122,8 @@
127 |     boundary. */
128 |  #define REQ_ALIGN 8
129 |  
130 | -static inline void* LibVEX_Alloc_inline ( SizeT nbytes )
131 | -{
132 | -   struct align {
133 | -      char c;
134 | -      union {
135 | -         char c;
136 | -         short s;
137 | -         int i;
138 | -         long l;
139 | -         long long ll;
140 | -         float f;
141 | -         double d;
142 | -         /* long double is currently not used and would increase alignment
143 | -            unnecessarily. */
144 | -         /* long double ld; */
145 | -         void *pto;
146 | -         void (*ptf)(void);
147 | -      } x;
148 | -   };
149 | +void* LibVEX_Alloc_inline ( SizeT );
150 |  
151 | -   /* Make sure the compiler does no surprise us */
152 | -   vassert(offsetof(struct align,x) <= REQ_ALIGN);
153 | -
154 | -#if 0
155 | -  /* Nasty debugging hack, do not use. */
156 | -  return malloc(nbytes);
157 | -#else
158 | -   HChar* curr;
159 | -   HChar* next;
160 | -   SizeT  ALIGN;
161 | -   ALIGN  = offsetof(struct align,x) - 1;
162 | -   nbytes = (nbytes + ALIGN) & ~ALIGN;
163 | -   curr   = private_LibVEX_alloc_curr;
164 | -   next   = curr + nbytes;
165 | -   if (next >= private_LibVEX_alloc_last)
166 | -      private_LibVEX_alloc_OOM();
167 | -   private_LibVEX_alloc_curr = next;
168 | -   return curr;
169 | -#endif
170 | -}
171 | -
172 |  /* Misaligned memory access support. */
173 |  
174 |  extern UInt  read_misaligned_UInt_LE  ( void* addr );
175 | Index: pub/libvex_ir.h
176 | ===================================================================
177 | --- pub/libvex_ir.h	(revision 3203)
178 | +++ pub/libvex_ir.h	(working copy)
179 | @@ -2992,6 +2992,12 @@
180 |  /* Deep-copy an IRSB */
181 |  extern IRSB* deepCopyIRSB ( const IRSB* );
182 |  
183 | +/* Deep-copy an IRSB onto the heap */
184 | +extern IRSB* deepCopyIRSB_Heap ( const IRSB* );
185 | +
186 | +typedef void (*AllocationListener)( void*, void* );
187 | +extern void LibVEX_registerAllocationListener ( void*, AllocationListener );
188 | +
189 |  /* Deep-copy an IRSB, except for the statements list, which set to be
190 |     a new, empty, list of statements. */
191 |  extern IRSB* deepCopyIRSBExceptStmts ( const IRSB* );
192 | 


--------------------------------------------------------------------------------
/scripts/ida_get_all_icalls.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | 
 3 | import sys
 4 | 
 5 | from idc import *
 6 | from idaapi import *
 7 | from idautils import *
 8 | 
 9 | '''
10 | Generate a file with all indirect calls.
11 | '''
12 | 
13 | counter = 0
14 | segments = list(Segments())
15 | icalls_set = set()
16 | 
17 | for segment in segments:
18 |     permissions = getseg(segment).perm
19 |     if not permissions & SEGPERM_EXEC:
20 |         continue
21 | 
22 |     print('\nProcessing segment %s.' % SegName(segment))
23 | 
24 |     ea = SegStart(segment)
25 |     end = SegEnd(segment)
26 |     while ea < end:
27 | 
28 |         # Return values of GetOpType
29 |         # https://www.hex-rays.com/products/ida/support/idadoc/276.shtml
30 |         if (GetMnem(ea) == "call"
31 |             and GetOpType(ea, 0) >= 1
32 |             and GetOpType(ea, 0) <= 4):
33 |             #print "0x%x - call %s" % (ea, GetOpnd(ea, 0))
34 |             icalls_set.add(ea)
35 |             counter += 1
36 | 
37 |         ea = idc.NextHead(ea)
38 | 
39 | print "Number of icalls found: %d" % counter
40 | 
41 | target_file = GetInputFile() + ".icalls"
42 | with open(target_file, 'w') as fp:
43 |     for icall_addr in icalls_set:
44 |         fp.write("%x\n" % icall_addr)
45 | 
46 | print "File written to: %s" % target_file


--------------------------------------------------------------------------------
/scripts/ida_get_hierarchies_through_rtti.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | 
  3 | import sys
  4 | from idc import *
  5 | from idaapi import *
  6 | from idautils import *
  7 | 
  8 | '''
  9 | Generate ground truth from RTTI values.
 10 | '''
 11 | 
 12 | vtable_section_names = [".rodata", ".data.rel.ro", ".data.rel.ro.local"]
 13 | 
 14 | 
 15 | vtables = [  ]
 16 | 
 17 | 
 18 | with open(GetInputFile() + '.gt_vtables', 'r') as fp:
 19 |     for line in fp:
 20 |         vtables.append(int(line.split(" ")[0], 16))
 21 | 
 22 | 
 23 | 
 24 | #vtables = [0x790810, 0x7a6df0, 0x7a9570, 0x7a9590, 0x7b9930, 0x7c76f0]
 25 | 
 26 | DEBUG = True
 27 | 
 28 | class ClassObject:
 29 | 
 30 |     def __init__(self, name):
 31 |         self.name = name
 32 |         self.base_classes = list()
 33 | 
 34 | 
 35 |     def add_base_class(self, base_class):
 36 |         self.base_classes.append(base_class)
 37 | 
 38 | 
 39 | def parse_typeinfo(rtti_ptr):
 40 | 
 41 |     in_vtable_section = False
 42 |     for vtable_sec in vtable_sections:
 43 |         if SegStart(vtable_sec) <= rtti_ptr <= SegEnd(vtable_sec):
 44 |             in_vtable_section = True
 45 |             break
 46 | 
 47 |     # Check if type info resides in extern.
 48 |     if not in_vtable_section:
 49 |         name = Name(rtti_ptr)
 50 |         if name == "":
 51 |             print "Error for type info: 0x%x" % rtti_ptr
 52 |             print "No name found for extern symbol."
 53 |             return None
 54 |         class_obj = ClassObject(name)
 55 |         return class_obj
 56 | 
 57 |     name_ptr = Qword(rtti_ptr + 0x8)
 58 |     name = GetString(name_ptr)
 59 | 
 60 |     if not name:
 61 |         print "Error for type info: 0x%x" % rtti_ptr
 62 |         print "No name found."
 63 |         return None
 64 |     '''
 65 |     # Try to demangle name to:
 66 |     # `typeinfo name for'ClassName
 67 |     if not Demangle("__ZTS" + name, 0):
 68 |         print "Error for type info: 0x%x" % rtti_ptr
 69 |         print "Not able to demangle name: %s." % name
 70 |         return None
 71 |     '''
 72 | 
 73 |     # Remove the number in the beginning of the name
 74 |     # (part of the mangled name).
 75 |     skip = 0
 76 |     for i in range(len(name)):
 77 |         if 48 <= ord(name[i]) <= 57:
 78 |             continue
 79 |         skip = i
 80 |         break
 81 |     name = name[skip:]
 82 | 
 83 | 
 84 |     class_obj = ClassObject(name)
 85 | 
 86 | 
 87 |     # Upper base ptr is 0 if we have multi-inheritance
 88 |     # (because then we have the number of base classes given in
 89 |     # the lower base ptr)
 90 |     upper_base_ptr = Dword(rtti_ptr + 0x10)
 91 |     if upper_base_ptr < 0x50: # multi-inheritance
 92 |         number_bases = Dword(rtti_ptr + 0x14)
 93 | 
 94 |         if DEBUG:
 95 |             print "multi"
 96 | 
 97 |         if number_bases > 100:
 98 |             print "Error for type info: 0x%x" % rtti_ptr
 99 |             print "Detected multi-inheritance but with over 100 base classes."
100 |             return None
101 | 
102 |         for i in range(number_bases):
103 |             base_ptr = Qword(rtti_ptr + 0x18 + (i*0x10))
104 | 
105 |             if DEBUG:
106 |                 print "multi 0x%x" % base_ptr
107 | 
108 |             base_class = parse_typeinfo(base_ptr)
109 |             if base_class:
110 |                 class_obj.add_base_class(base_class)
111 | 
112 |     else: # single-inheritance or base-class
113 |         base_ptr = Qword(rtti_ptr + 0x10)
114 | 
115 |         is_ptr = False
116 |         for vtable_sec in vtable_sections:
117 |             if SegStart(vtable_sec) <= base_ptr <= SegEnd(vtable_sec):
118 |                 is_ptr = True
119 |                 break
120 | 
121 |         is_extern = False
122 |         if SegStart(extern_section) <= base_ptr <= SegEnd(extern_section):
123 |             is_extern = True
124 | 
125 |         if is_ptr: # single-inheritance
126 | 
127 |             if DEBUG:
128 |                 print "single"
129 |                 print "0x%x" % base_ptr
130 | 
131 |             base_class = parse_typeinfo(base_ptr)
132 |             if base_class:
133 |                 class_obj.add_base_class(base_class)
134 | 
135 |         elif is_extern: # has inheritance to external module
136 | 
137 |             if DEBUG:
138 |                 print "external"
139 |                 print "0x%x" % base_ptr
140 | 
141 |             name = Name(base_ptr)
142 |             if name == "":
143 |                 print "Error for external type info: 0x%x" % base_ptr
144 |                 print "No name found for extern symbol."
145 |                 return None
146 |             if (Demangle(name, 0) and
147 |                 (name[:4] == "_ZTI" or name[:5] == "__ZTI")):
148 |                 temp = ClassObject(name)
149 |                 if temp:
150 |                     class_obj.add_base_class(temp)
151 | 
152 |         else: # base-class
153 |             if DEBUG:
154 |                 print "base"
155 |                 print "0x%x" % base_ptr
156 | 
157 |     return class_obj
158 | 
159 | 
160 | def print_class_hierarchy(class_obj):
161 | 
162 |     def pretty_print(class_obj, depth):
163 |         print "   "*depth,
164 |         print class_obj.name
165 |         for base_class in class_obj.base_classes:
166 |             pretty_print(base_class, depth+1)
167 | 
168 |     pretty_print(class_obj, 0)
169 | 
170 | 
171 | def convert_to_set(class_obj):
172 |     hierarchy_set = set()
173 |     hierarchy_set.add(class_obj.name)
174 |     for base_obj in class_obj.base_classes:
175 |         hierarchy_set |= convert_to_set(base_obj)
176 |     return hierarchy_set
177 | 
178 | 
179 | segments = list(Segments())
180 | vtable_sections = set()
181 | extern_section = None
182 | for segment in segments:
183 |     if SegName(segment) in vtable_section_names:
184 |         vtable_sections.add(segment)
185 | 
186 |     if SegName(segment) == "extern":
187 |         extern_section = segment
188 | 
189 | 
190 | 
191 | hierarchy_list = list()
192 | vtable_mapping = dict()
193 | vtable_addr_error = set()
194 | 
195 | for vtable_addr in vtables:
196 | 
197 |     print "Processing vtable: 0x%x" % vtable_addr
198 | 
199 | 
200 |     rtti_ptr = Qword(vtable_addr - 0x8)
201 |     if rtti_ptr == 0:
202 |         print "Error for vtable: 0x%x" % vtable_addr
203 |         print "RTTI pointer 0. Seems not to be a vtable."
204 |         vtable_addr_error.add(vtable_addr)
205 |         continue
206 | 
207 |     class_obj = parse_typeinfo(rtti_ptr)
208 |     if class_obj is None:
209 |         print "Error for vtable: 0x%x" % vtable_addr
210 |         print "Seems not to be a vtable."
211 |         vtable_addr_error.add(vtable_addr)
212 |         continue
213 |     vtable_mapping[vtable_addr] = class_obj
214 | 
215 |     if DEBUG:
216 |         print_class_hierarchy(class_obj)
217 | 
218 |     # Convert to hierarchy set and merge into hierarchies
219 |     hierarchy_set = convert_to_set(class_obj)
220 |     is_merged = False
221 |     i = 0
222 |     while i < len(hierarchy_list):
223 |         if hierarchy_list[i].isdisjoint(hierarchy_set):
224 |             i += 1
225 |             continue
226 | 
227 |         hierarchy_list[i] |= hierarchy_set
228 |         is_merged = True
229 |         break
230 |     if not is_merged:
231 |         hierarchy_list.append(hierarchy_set)
232 | 
233 | 
234 | # Replace vtable names with vtable addresses.
235 | for hierarchy_set in hierarchy_list:
236 |     for name in list(hierarchy_set):
237 |         was_added = False
238 |         for k,v in vtable_mapping.iteritems():
239 |             if name == v.name:
240 |                 was_added = True
241 |                 hierarchy_set.add(k)
242 |         if was_added:
243 |             hierarchy_set.remove(name)
244 | '''
245 | for hierarchy_set in hierarchy_list:
246 |     for k,v in vtable_mapping.iteritems():
247 |         if v.name in hierarchy_set:
248 |             hierarchy_set.remove(v.name)
249 |             hierarchy_set.add(k)
250 | '''
251 | 
252 | # Merge hierarchies
253 | i = 0
254 | while i < len(hierarchy_list):
255 | 
256 |     is_merged = False
257 |     j = i + 1
258 |     while j < len(hierarchy_list):
259 | 
260 |         if hierarchy_list[i].isdisjoint(hierarchy_list[j]):
261 |             j += 1
262 |             continue
263 | 
264 |         hierarchy_list[j] |= hierarchy_list[i]
265 |         is_merged = True
266 |         break
267 | 
268 |     if is_merged:
269 |         hierarchy_list.remove(hierarchy_list[i])
270 |     else:
271 |         i += 1
272 | 
273 | # Sanity check if all vtable addresses are in the hierarchy.
274 | for k,v in vtable_mapping.iteritems():
275 |     found = False
276 |     for hierarchy_set in hierarchy_list:
277 |         if k in hierarchy_set:
278 |             found = True
279 |             break
280 |     if not found:
281 |         print "Error: Can not find vtable address 0x%x in hierarchies." % k
282 | 
283 | '''
284 | DEBUG
285 | print hierarchy_list
286 | sys.exit(0)
287 | #'''
288 | 
289 | not_complete_hierarchies = list()
290 | with open(GetInputFile() + '.gt_hierarchy', 'w') as fp:
291 |     for hierarchy_set in hierarchy_list:
292 |         has_written = False
293 |         is_complete = True
294 |         for vtable in hierarchy_set:
295 |             if isinstance(vtable, int):
296 |                 fp.write("%x " % vtable)
297 |                 has_written = True
298 |             else:
299 | 
300 |                 '''
301 |                 temp = Demangle(vtable, 0)
302 |                 if temp:
303 |                     fp.write("%s " % temp)
304 |                 else:
305 |                     fp.write("%s " % vtable)
306 |                 '''
307 | 
308 |                 is_complete = False
309 |         if has_written:
310 |             fp.write("\n")
311 |         if not is_complete:
312 |             not_complete_hierarchies.append(hierarchy_set)
313 | 
314 | 
315 | 
316 | 
317 | if vtable_addr_error:
318 |     print "The following vtable addresses created errors:"
319 |     for vtable_addr in vtable_addr_error:
320 |         print "0x%x" % vtable_addr
321 | else:
322 |     print "No vtable errors."
323 | 
324 | 
325 | if not_complete_hierarchies:
326 |     print "The following hierarchies are not complete:"
327 |     for hierarchy_set in not_complete_hierarchies:
328 |         print "Hierarchy:"
329 |         for vtable in hierarchy_set:
330 |             if isinstance(vtable, int):
331 |                 print "0x%x" % vtable
332 |             else:
333 |                 temp = Demangle(vtable, 0)
334 |                 if temp:
335 |                     print temp
336 |                 else:
337 |                     print vtable
338 |         print ""
339 | else:
340 |     print "All hierarchies complete."
341 | 
342 | 


--------------------------------------------------------------------------------
/scripts/ida_has_refs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2
 2 | 
 3 | from idc import *
 4 | from idaapi import *
 5 | from idautils import *
 6 | 
 7 | missing = [0x7dfa30]
 8 | 
 9 | def xrefs(m):
10 |     return [x.frm for x in XrefsTo(m)]
11 | 
12 | print ""
13 | 
14 | for i, m in enumerate(missing):
15 |     a = xrefs(m)
16 |     print('%03i %08x %d %s' \
17 |         % (i, m, len(a), ' '.join('%08x' % x for x in a)))
18 | sum([int(len(xrefs(m)) == 0) for m in missing])


--------------------------------------------------------------------------------
/scripts/ida_win_find_blacklist_functions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2.7
  2 | 
  3 | import sys
  4 | 
  5 | from idc import *
  6 | from idaapi import *
  7 | from idautils import *
  8 | 
  9 | from struct import pack
 10 | from ctypes import c_uint32, c_uint64
 11 | import subprocess
 12 | 
 13 | base = get_imagebase()
 14 | plt_start, plt_end = 0, 0
 15 | segments = list(Segments())
 16 | 
 17 | # C++ configuration
 18 | dump_vtables = True
 19 | vtable_section_names = [".rodata",
 20 |     ".data.rel.ro",
 21 |     ".data.rel.ro.local",
 22 |     ".rdata"]
 23 | 
 24 | # global variables that are needed for multiple C++ algorithms
 25 | if dump_vtables:
 26 |     extern_seg = None
 27 |     extern_start = 0
 28 |     extern_end = 0
 29 |     text_seg = None
 30 |     text_start = 0
 31 |     text_end = 0
 32 |     plt_seg = None
 33 |     plt_start = 0
 34 |     plt_end = 0
 35 |     got_seg = None
 36 |     got_start = 0
 37 |     got_end = 0
 38 |     idata_seg = None
 39 |     idata_start = 0
 40 |     idata_end = 0
 41 |     vtable_sections = list()
 42 |     for segment in segments:
 43 |         if SegName(segment) == "extern":
 44 |             extern_seg = segment
 45 |             extern_start = SegStart(extern_seg)
 46 |             extern_end = SegEnd(extern_seg)
 47 |         elif SegName(segment) == ".text":
 48 |             text_seg = segment
 49 |             text_start = SegStart(text_seg)
 50 |             text_end = SegEnd(text_seg)
 51 |         elif SegName(segment) == ".plt":
 52 |             plt_seg = segment
 53 |             plt_start = SegStart(plt_seg)
 54 |             plt_end = SegEnd(plt_seg)
 55 |         elif SegName(segment) == ".got":
 56 |             got_seg = segment
 57 |             got_start = SegStart(got_seg)
 58 |             got_end = SegEnd(got_seg)
 59 |         elif SegName(segment) == ".idata":
 60 |             idata_seg = segment
 61 |             idata_start = SegStart(idata_seg)
 62 |             idata_end = SegEnd(idata_seg)
 63 |         elif SegName(segment) in vtable_section_names:
 64 |             vtable_sections.append(segment)
 65 | 
 66 | def main():
 67 | 
 68 |     for func in Functions():
 69 | 
 70 |         flow = list(FlowChart(get_func(func)))
 71 |         if len(flow) == 1:
 72 |             block = flow[0]
 73 |             block_start = block.startEA
 74 |             block_end = block.endEA
 75 | 
 76 |             address = block_start
 77 |             counter = 0
 78 |             is_zero_xor = False
 79 |             has_ret = False
 80 |             has_mov = False
 81 |             while address != BADADDR and address < block_end:
 82 | 
 83 |                 mnem = GetMnem(address)
 84 |                 if mnem == "xor":
 85 |                     if GetOpnd(address, 0) == GetOpnd(address, 1):
 86 |                         is_zero_xor = True
 87 |                 elif mnem == "retn":
 88 |                     has_ret = True
 89 |                 elif mnem == "mov":
 90 |                     # Check if second is constant
 91 |                     if GetOpType(address, 1) == 5:
 92 |                         has_mov = True
 93 | 
 94 |                 counter += 1
 95 |                 address = NextHead(address)
 96 | 
 97 |             if counter == 2 and is_zero_xor and has_ret:
 98 |                 print "%x Ignore XOR func" % func
 99 | 
100 |             elif counter == 1 and has_ret:
101 |                 print "%x Ignore RETN func" % func
102 | 
103 |             elif counter == 2 and has_mov and has_ret:
104 |                 print "%x Ignore MOV func" % func
105 | 
106 | if __name__ == '__main__':
107 |     main()


--------------------------------------------------------------------------------
/scripts/ida_win_get_hierarchies_through_rtti.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | 
  3 | import sys
  4 | from idc import *
  5 | from idaapi import *
  6 | from idautils import *
  7 | 
  8 | '''
  9 | Generate ground truth from RTTI values.
 10 | '''
 11 | 
 12 | vtable_section_names = [".rdata"]
 13 | 
 14 | # Get all vtables through the symbols.
 15 | vtable_symbols = []
 16 | for name_tuple in Names():
 17 |     temp = Demangle(name_tuple[1], 8)
 18 |     if not temp:
 19 |         continue
 20 |     if "vftable" in temp:
 21 |         vtable_symbols.append(name_tuple)
 22 | 
 23 | vtables = []
 24 | for vtable_tuple in vtable_symbols:
 25 |     vtables.append(vtable_tuple[0])
 26 | 
 27 | with open(GetInputFile() + '.gt_vtables', 'w') as fp:
 28 |     for vtable_tuple in vtable_symbols:
 29 |         fp.write("%x %s\n" % (vtable_tuple[0], Demangle(vtable_tuple[1], 8)))
 30 | 
 31 | #vtables = [0xDD0148, 0x0DD0108]
 32 | 
 33 | DEBUG = False
 34 | 
 35 | class ClassObject:
 36 | 
 37 |     def __init__(self, name):
 38 |         self.name = name
 39 |         self.base_classes = list()
 40 | 
 41 | 
 42 |     def add_base_class(self, base_class):
 43 |         self.base_classes.append(base_class)
 44 | 
 45 | 
 46 | def parse_typeinfo(rtti_ptr):
 47 | 
 48 |     def get_name_type_descr(type_descr):
 49 |         # Extract name of vtable (skip *pVFTable, void *).
 50 |         name_ptr = type_descr + 0x10
 51 |         name = GetString(name_ptr)
 52 |         return name
 53 | 
 54 |     # Skip signature, offset, cdOffset (each 4 bytes).
 55 |     # NOTE: This only works if the idb is rebased to 0x0 as image base.
 56 |     type_descr_ptr = rtti_ptr + 0xc
 57 |     class_hier_descr_ptr = rtti_ptr + 0x10
 58 | 
 59 |     type_descr = Dword(type_descr_ptr)
 60 |     class_hier_descr = Dword(class_hier_descr_ptr)
 61 | 
 62 |     name = get_name_type_descr(type_descr)
 63 |     class_obj = ClassObject(name)
 64 | 
 65 |     num_bases_ptr = class_hier_descr + 0x8
 66 |     num_bases = Dword(num_bases_ptr)
 67 | 
 68 |     if num_bases > 100:
 69 |         print "Error? Class %s has more than 100 base classes." % name
 70 |         return None
 71 | 
 72 |     elif num_bases > 0:
 73 |         base_array_ptr = class_hier_descr + 0xc
 74 |         base_array = Dword(base_array_ptr)
 75 | 
 76 |         temp_ptr = base_array
 77 |         for i in range(num_bases):
 78 |             base_descr = Dword(temp_ptr)
 79 |             base_type_descr = Dword(base_descr)
 80 |             base_name = get_name_type_descr(base_type_descr)
 81 | 
 82 |             if base_name != name:
 83 |                 base_class_obj = ClassObject(base_name)
 84 |                 class_obj.add_base_class(base_class_obj)
 85 |             temp_ptr += 0x4
 86 | 
 87 |     return class_obj
 88 | 
 89 | 
 90 | def print_class_hierarchy(class_obj):
 91 | 
 92 |     def pretty_print(class_obj, depth):
 93 |         print "   "*depth,
 94 |         print class_obj.name
 95 |         for base_class in class_obj.base_classes:
 96 |             pretty_print(base_class, depth+1)
 97 | 
 98 |     pretty_print(class_obj, 0)
 99 | 
100 | 
101 | def convert_to_set(class_obj):
102 |     hierarchy_set = set()
103 |     hierarchy_set.add(class_obj.name)
104 |     for base_obj in class_obj.base_classes:
105 |         hierarchy_set |= convert_to_set(base_obj)
106 |     return hierarchy_set
107 | 
108 | 
109 | # Abort if image base is not 0
110 | if get_imagebase() != 0x0:
111 |     print "Image base has to be 0x0."
112 | 
113 | else:
114 |     hierarchy_list = list()
115 |     vtable_mapping = dict()
116 |     vtable_addr_error = set()
117 | 
118 |     for vtable_addr in vtables:
119 | 
120 |         print "Processing vtable: 0x%x" % vtable_addr
121 | 
122 |         # We assume that RTTI is always available
123 |         # since MSVC reuses this field otherwise if it is not added.
124 |         rtti_ptr = Qword(vtable_addr - 0x8)
125 | 
126 |         class_obj = parse_typeinfo(rtti_ptr)
127 |         if class_obj is None:
128 |             print "Error for vtable: 0x%x" % vtable_addr
129 |             print "Seems not to be a vtable."
130 |             vtable_addr_error.add(vtable_addr)
131 |             continue
132 |         vtable_mapping[vtable_addr] = class_obj
133 | 
134 |         if DEBUG:
135 |             print_class_hierarchy(class_obj)
136 | 
137 |         # Convert to hierarchy set and merge into hierarchies
138 |         hierarchy_set = convert_to_set(class_obj)
139 |         is_merged = False
140 |         i = 0
141 |         while i < len(hierarchy_list):
142 |             if hierarchy_list[i].isdisjoint(hierarchy_set):
143 |                 i += 1
144 |                 continue
145 | 
146 |             hierarchy_list[i] |= hierarchy_set
147 |             is_merged = True
148 |             break
149 |         if not is_merged:
150 |             hierarchy_list.append(hierarchy_set)
151 | 
152 |     # Replace vtable names with vtable addresses.
153 |     for hierarchy_set in hierarchy_list:
154 |         for name in list(hierarchy_set):
155 |             was_added = False
156 |             for k,v in vtable_mapping.iteritems():
157 |                 if name == v.name:
158 |                     was_added = True
159 |                     hierarchy_set.add(k)
160 |             if was_added:
161 |                 hierarchy_set.remove(name)
162 | 
163 |     # Merge hierarchies
164 |     i = 0
165 |     while i < len(hierarchy_list):
166 | 
167 |         is_merged = False
168 |         j = i + 1
169 |         while j < len(hierarchy_list):
170 | 
171 |             if hierarchy_list[i].isdisjoint(hierarchy_list[j]):
172 |                 j += 1
173 |                 continue
174 | 
175 |             hierarchy_list[j] |= hierarchy_list[i]
176 |             is_merged = True
177 |             break
178 | 
179 |         if is_merged:
180 |             hierarchy_list.remove(hierarchy_list[i])
181 |         else:
182 |             i += 1
183 | 
184 |     # Sanity check if all vtable addresses are in the hierarchy.
185 |     for k,v in vtable_mapping.iteritems():
186 |         found = False
187 |         for hierarchy_set in hierarchy_list:
188 |             if k in hierarchy_set:
189 |                 found = True
190 |                 break
191 |         if not found:
192 |             print "Error: Can not find vtable address 0x%x in hierarchies." % k
193 | 
194 |     '''
195 |     DEBUG
196 |     print hierarchy_list
197 |     sys.exit(0)
198 |     #'''
199 | 
200 |     not_complete_hierarchies = list()
201 |     with open(GetInputFile() + '.gt_hierarchy', 'w') as fp:
202 |         for hierarchy_set in hierarchy_list:
203 |             has_written = False
204 |             is_complete = True
205 |             for vtable in hierarchy_set:
206 |                 if isinstance(vtable, int) or isinstance(vtable, long):
207 |                     fp.write("%x " % vtable)
208 |                     has_written = True
209 |                 else:
210 | 
211 |                     '''
212 |                     temp = Demangle(vtable, 0)
213 |                     if temp:
214 |                         fp.write("%s " % temp)
215 |                     else:
216 |                         fp.write("%s " % vtable)
217 |                     '''
218 | 
219 |                     is_complete = False
220 |             if has_written:
221 |                 fp.write("\n")
222 |             if not is_complete:
223 |                 not_complete_hierarchies.append(hierarchy_set)
224 | 
225 |     if vtable_addr_error:
226 |         print "The following vtable addresses created errors:"
227 |         for vtable_addr in vtable_addr_error:
228 |             print "0x%x" % vtable_addr
229 |     else:
230 |         print "No vtable errors."
231 | 
232 |     if not_complete_hierarchies:
233 |         print "The following hierarchies are not complete:"
234 |         for hierarchy_set in not_complete_hierarchies:
235 |             print "Hierarchy:"
236 |             for vtable in hierarchy_set:
237 |                 if isinstance(vtable, int) or isinstance(vtable, long):
238 |                     print "0x%x" % vtable
239 |                 else:
240 |                     temp = Demangle(vtable, 8)
241 |                     if temp:
242 |                         print temp
243 |                     else:
244 |                         print vtable
245 |             print ""
246 |     else:
247 |         print "All hierarchies complete."


--------------------------------------------------------------------------------
/src/base_analysis.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "base_analysis.h"
  3 | #include "amd64.h"
  4 | 
  5 | #include <memory>
  6 | #include <sstream>
  7 | #include <iostream>
  8 | #include <iterator>
  9 | #include <algorithm>
 10 | 
 11 | using namespace std;
 12 | 
 13 | /*!
 14 |  * \brief Constructs a new analysis on function `function`.
 15 |  *
 16 |  * The state at function entry is initialized to default values.
 17 |  *
 18 |  * \param function The function on which the analysis is run.
 19 |  */
 20 | BaseAnalysis::BaseAnalysis(const Function &function,
 21 |                            FileFormatType file_format)
 22 |     : _function(function),
 23 |       _file_format(file_format),
 24 |       _current_return_value(nullptr) {
 25 | 
 26 |     _initial_state.set_initial_state();
 27 | }
 28 | 
 29 | /*!
 30 |  * \brief Constructs a new analysis on function `function`, setting the state
 31 |  * at function entry to the specified state `initial_state`.
 32 |  *
 33 |  * \param function The function on which the analysis is run.
 34 |  * \param initial_state The state that should be set on function entry.
 35 |  */
 36 | BaseAnalysis::BaseAnalysis(const Function &function,
 37 |                            const State &initial_state,
 38 |                            FileFormatType file_format)
 39 |     : BaseAnalysis(function, file_format) {
 40 |     _initial_state = initial_state;
 41 | }
 42 | 
 43 | /*!
 44 |  * \brief Runs the analysis.
 45 |  *
 46 |  * The traversal callback `BaseAnalysis::on_traversal` also handles
 47 |  * updates on the state across function calls. Currently, System V is assumed
 48 |  * per default.
 49 |  *
 50 |  * \see `BaseAnalysis::on_traversal`
 51 |  *
 52 |  * \return Always returns `true`.
 53 |  * \todo Better use for return value? Also generalize calling convention.
 54 |  */
 55 | bool BaseAnalysis::obtain() {
 56 |     auto block_callback = [&](void *self_pointer, const Path &path,
 57 |                               const Block &block) -> bool {
 58 | 
 59 |         BaseAnalysis &self = *reinterpret_cast<BaseAnalysis*>(self_pointer);
 60 |         return self.on_traversal(path, block);
 61 |     };
 62 | 
 63 |     auto block_predicate = [&](void *self_pointer, const Block &block) -> bool {
 64 |         BaseAnalysis &self = *reinterpret_cast<BaseAnalysis*>(self_pointer);
 65 |         return self.block_predicate(block);
 66 |     };
 67 | 
 68 |     auto path_callback = [&](void *self_pointer, const Path &path) {
 69 |         BaseAnalysis &self = *reinterpret_cast<BaseAnalysis*>(self_pointer);
 70 |         self.path_traversed(path);
 71 |     };
 72 | 
 73 |     pre_traversal();
 74 |     auto result = _function.traverse(block_callback, block_predicate,
 75 |                                      path_callback, this);
 76 |     post_traversal();
 77 | 
 78 |     _states.clear();
 79 |     return result;
 80 | }
 81 | 
 82 | bool BaseAnalysis::on_traversal(const Path &path, const Block &block) {
 83 |     State new_state;
 84 | 
 85 |     // Get hold of the previous state set on the path.
 86 |     if(path.empty()) {
 87 |         new_state = _initial_state;
 88 |     } else {
 89 |         Path preceding_path(path.cbegin(), path.cend() - 1);
 90 |         const auto &preceding_state = _states[preceding_path];
 91 |         new_state = preceding_state;
 92 | 
 93 |         // Handle side-effects as caused by the calling convention used.
 94 |         const auto &side_effect = _side_effects.find(preceding_path);
 95 |         if(side_effect != _side_effects.cend()) {
 96 |             new_state.purge_scratch_registers(_file_format);
 97 |             new_state.merge(side_effect->second);
 98 |         }
 99 |     }
100 | 
101 |     bool is_call = false;
102 |     switch(block.get_terminator().type) {
103 |     case TerminatorCall:
104 |     case TerminatorCallUnresolved: {
105 | 
106 |         auto formatted = State::format_return_value(block.get_address());
107 |         _current_return_value = make_shared<Symbolic>(formatted);
108 | 
109 |         is_call = true;
110 |         break;
111 |     }
112 | 
113 |     default:
114 |         _current_return_value = nullptr;
115 |         break;
116 |     }
117 | 
118 |     // Actually compute the new semantics.
119 |     block.retrieve_semantics(new_state);
120 |     bool continue_path = in_traversal(path, block, new_state);
121 | 
122 |     // Handle calls specially as they introduce side-effects.
123 |     if(is_call) {
124 |         State::iterator needle;
125 |         if(new_state.find(register_rip, needle)) {
126 |             // Construct an empty state which will contain side-effects only.
127 |             State side_effects(false);
128 | 
129 |             side_effects.update(register_rax, _current_return_value);
130 |             _side_effects[path] = side_effects;
131 |         }
132 |     }
133 | 
134 |     // Keep the state when hitting either a return instruction or a tail jump.
135 |     const auto &terminator = block.get_terminator();
136 | 
137 |     if(terminator.is_tail || terminator.type == TerminatorReturn) {
138 |         new_state.erase(register_rip);
139 |         new_state.purge_scratch_registers(_file_format);
140 | 
141 |         _semantics.push_back(new_state);
142 |     } else {
143 |         _states[path] = new_state;
144 |     }
145 | 
146 |     return continue_path;
147 | }
148 | 


--------------------------------------------------------------------------------
/src/blacklist_functions.cpp:
--------------------------------------------------------------------------------
 1 | #include "blacklist_functions.h"
 2 | 
 3 | using namespace std;
 4 | 
 5 | const BlacklistFuncsSet import_blacklist_funcs(const string &target_file) {
 6 | 
 7 |     ifstream file(target_file + "_funcs_blacklist.txt");
 8 |     if(!file) {
 9 |         throw runtime_error("Opening function blacklist file failed.");
10 |     }
11 | 
12 |     string line;
13 | 
14 |     // Parse first line manually.
15 |     getline(file, line);
16 |     istringstream header_parser(line);
17 | 
18 |     // First entry of file is always the module name.
19 |     string import_module_name;
20 |     header_parser >> import_module_name;
21 |     if(header_parser.fail()) {
22 |         throw runtime_error("Parsing function blacklist file failed.");
23 |     }
24 | 
25 |     BlacklistFuncsSet blacklist_set;
26 | 
27 |     while(getline(file, line)) {
28 |         istringstream parser(line);
29 |         uint64_t func_addr = 0;
30 | 
31 |         parser >> hex >> func_addr;
32 |         if(parser.fail()) {
33 |             throw runtime_error("Parsing function blacklist file failed.");
34 |         }
35 | 
36 |         blacklist_set.insert(func_addr);
37 |     }
38 | 
39 |     return blacklist_set;
40 | }
41 | 


--------------------------------------------------------------------------------
/src/block.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "block.h"
 3 | #include "block_semantics.h"
 4 | 
 5 | /*!
 6 |  * \brief Constructs a `Block` object.
 7 |  * \param address Virtual address the block lies at.
 8 |  * \param block Pointer to an `IRSB` VEX block.
 9 |  * \param terminator Description of the block's terminator.
10 |  */
11 | Block::Block(uintptr_t address, IRSB *block, const Terminator &terminator)
12 |     : _address(address), _vex_block(block), _terminator(terminator) {
13 | }
14 | 
15 | /*!
16 |  * \brief Retrieves the block's semantics using an instance of `BlockSemantics`.
17 |  *
18 |  * \todo For now, there is no easy way to sub-class how the semantics are
19 |  * retrieved, this should change by allowing custom semantic extractors.
20 |  *
21 |  * \param[in,out] state Initial state as used when computing the semantics. This
22 |  * is updated with the resulting state which reflects the block's semantics.
23 |  */
24 | void Block::retrieve_semantics(State &state) const {
25 |     BlockSemantics semantics(*this, state);
26 |     state = semantics.get_state();
27 | }
28 | 
29 | /*!
30 |  * \brief get_last_address
31 |  * \return Returns the block's last virtual address
32 |  * or 0 in case of an error.
33 |  */
34 | uint64_t Block::get_last_address() const {
35 |     for(int i = _vex_block->stmts_used - 1; i >= 0; --i) {
36 |         const auto &current = *_vex_block->stmts[i];
37 |         if(current.tag == Ist_IMark) {
38 |             const auto &temp = current.Ist.IMark;
39 |             return temp.addr;
40 |         }
41 |     }
42 | 
43 |     return 0;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/dump_file.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "dump_file.h"
  3 | 
  4 | #include <fstream>
  5 | #include <sstream>
  6 | 
  7 | using namespace std;
  8 | 
  9 | /*!
 10 |  * \brief Constructs a new `DumpFile` object.
 11 |  * \param dump_file The filename of the `.dmp` file (as produced by the exporter
 12 |  * script).
 13 |  *
 14 |  * Optionally tries to parse `.dmp.no-return` as well.
 15 |  */
 16 | DumpFile::DumpFile(const string &dump_file) {
 17 |     if(!parse(dump_file)) {
 18 |         throw runtime_error("Cannot parse function dump file " + dump_file +
 19 |                             ".");
 20 |     }
 21 | 
 22 |     parse_no_return(dump_file + ".no-return");
 23 | }
 24 | 
 25 | bool DumpFile::parse(const string &dump_file) {
 26 |     // FIXME: Comment on dump file structure.
 27 |     _functions.clear();
 28 | 
 29 |     ifstream file(dump_file.c_str(), ios::binary);
 30 |     if(!file) {
 31 |         return false;
 32 |     }
 33 | 
 34 |     uint64_t image_base = 0;
 35 |     if(!file.read(reinterpret_cast<char*>(&image_base), sizeof(image_base))) {
 36 |         return false;
 37 |     }
 38 | 
 39 |     uint32_t function_count = 0;
 40 |     if(!file.read(reinterpret_cast<char*>(&function_count),
 41 |                   sizeof(function_count))) {
 42 |         return false;
 43 |     }
 44 | 
 45 |     for(auto i = 0u; i < function_count; ++i) {
 46 |         uint32_t function_rva = 0;
 47 |         if(!file.read(reinterpret_cast<char*>(&function_rva),
 48 |                       sizeof(function_rva))) {
 49 |             return false;
 50 |         }
 51 | 
 52 |         uint16_t block_count = 0;
 53 |         if(!file.read(reinterpret_cast<char*>(&block_count),
 54 |                       sizeof(block_count))) {
 55 |             return false;
 56 |         }
 57 | 
 58 |         uint64_t function_base = image_base + function_rva;
 59 |         _functions[function_base] = FunctionBlocks();
 60 | 
 61 |         FunctionBlocks &blocks = _functions[function_base];
 62 |         for(auto j = 0u; j < block_count; ++j) {
 63 | 
 64 |             uint32_t block_rva = 0;
 65 |             if(!file.read(reinterpret_cast<char*>(&block_rva),
 66 |                           sizeof(block_rva))) {
 67 |                 return false;
 68 |             }
 69 | 
 70 |             uint32_t block_size = 0;
 71 |             if(!file.read(reinterpret_cast<char*>(&block_size),
 72 |                           sizeof(block_size))) {
 73 |                 return false;
 74 |             }
 75 | 
 76 |             uint16_t instruction_count = 0;
 77 |             if(!file.read(reinterpret_cast<char*>(&instruction_count),
 78 |                           sizeof(instruction_count))) {
 79 |                 return false;
 80 |             }
 81 | 
 82 |             BlockDescriptor block;
 83 |             block.block_start = image_base + block_rva;
 84 |             block.block_end = block.block_start + block_size;
 85 |             block.instruction_count = instruction_count;
 86 | 
 87 |             blocks.push_back(block);
 88 |         }
 89 |     }
 90 | 
 91 |     return true;
 92 | }
 93 | 
 94 | bool DumpFile::parse_no_return(const string &no_return_file) {
 95 |     _functions_no_return.clear();
 96 | 
 97 |     ifstream file(no_return_file.c_str());
 98 |     if(!file) {
 99 |         return false;
100 |     }
101 | 
102 |     string line;
103 |     while(getline(file, line)) {
104 |         uintptr_t current;
105 |         istringstream parser(line);
106 | 
107 |         parser >> hex >> current;
108 |         if(parser.fail()) {
109 |             return false;
110 |         }
111 | 
112 |         _functions_no_return.insert(current);
113 |     }
114 | 
115 |     return true;
116 | }
117 | 


--------------------------------------------------------------------------------
/src/expression.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "expression.h"
  3 | 
  4 | using namespace std;
  5 | 
  6 | bool Operation::optimizer() {
  7 |     /* Operations are the only expressions that could possibly be
  8 |      * ambiguous. We need to make sure to sanitize and optimize it as for
  9 |      * the state updating logic to be reasonable. */
 10 | 
 11 |     /* This is done in-place (as to avoid having to return a new shared_ptr
 12 |      * everytime). If an operation can reduced to one argument (e.g., 1+2),
 13 |      * it should be transformed into (result + 0), such that it can be
 14 |      * pruned later (e.g., 3+0).
 15 |      *
 16 |      * Make sure that all destructive updates on sub-expressions are immutable,
 17 |      * i.e., yield new objects that reflect the changes. This is done in order
 18 |      * not to propagate changes to other expressions referencing the modified
 19 |      * expression.
 20 |      *
 21 |      * TODO: Verify that there are not any destructive expression updates on
 22 |      *       anything not covered here (optimizer?). Expression::propagate
 23 |      *       should not be affected.
 24 |      * TODO: Consider shared_ptr.reset.
 25 |      * TODO: Arithmetic simplifications do not care for signedness or
 26 |      *       operand size.
 27 |      */
 28 | 
 29 |     /* TODO: Do we want to allow basic sanitization first (i.e., without
 30 |      * optimization)?
 31 |      */
 32 |     _changed = has_changed();
 33 |     if(!_changed) {
 34 |         return false;
 35 |     }
 36 | 
 37 |     bool dirty = false;
 38 |     sanitize();
 39 | 
 40 |     if(_operation == OperationSub) {
 41 |         // (X - X) = (0 + 0).
 42 |         if(*_lhs == *_rhs) {
 43 |             _lhs = std::make_shared<Constant>(0);
 44 |             _rhs = std::make_shared<Constant>(0);
 45 | 
 46 |             _operation = OperationAdd;
 47 |             dirty = true;
 48 |         }
 49 | 
 50 |         // (const_a - const_b) = (const_c - 0).
 51 |         else if(_lhs->type() == _rhs->type() &&
 52 |                 _lhs->type() == ExpressionConstant) {
 53 | 
 54 |             auto &lhs = static_cast<Constant&>(*_lhs);
 55 |             auto &rhs = static_cast<Constant&>(*_rhs);
 56 | 
 57 |             if(rhs.value()) {
 58 |                 _lhs = make_shared<Constant>(lhs.value() - rhs.value());
 59 |                 _rhs = make_shared<Constant>(0);
 60 | 
 61 |                 dirty = true;
 62 |             }
 63 |         }
 64 |     } else if(_operation == OperationAdd) {
 65 |         // (const_a + const_b) = (const_c + 0).
 66 | 
 67 |         // TODO: Generalize this for other operators.
 68 |         if(_lhs->type() == _rhs->type() &&
 69 |                 _lhs->type() == ExpressionConstant) {
 70 | 
 71 |             auto &lhs = static_cast<Constant&>(*_lhs);
 72 |             auto &rhs = static_cast<Constant&>(*_rhs);
 73 | 
 74 |             if(lhs.value() && rhs.value()) {
 75 |                 _lhs = make_shared<Constant>(lhs.value() + rhs.value());
 76 |                 _rhs = make_shared<Constant>(0);
 77 | 
 78 |                 dirty = true;
 79 |             }
 80 |         }
 81 |     }
 82 | 
 83 |     auto is_add_or_sub = [&](const OperationType &op) {
 84 |         return op == OperationAdd || op == OperationSub;
 85 |     };
 86 | 
 87 |     if(is_add_or_sub(_operation)) {
 88 |         // (X +- const_a), const_a > UINT64_MAX = (X +- (-const_a)).
 89 |         if(_rhs->type() == ExpressionConstant &&
 90 |                 static_cast<const Constant&>(*_rhs).value() >
 91 |                 UINT64_MAX / 2 + 1) {
 92 | 
 93 |             switch(_operation) {
 94 |             case OperationAdd:
 95 |                 _operation = OperationSub;
 96 |                 break;
 97 | 
 98 |             case OperationSub:
 99 |                 _operation = OperationAdd;
100 |                 break;
101 | 
102 |             default:
103 |                 __builtin_unreachable();
104 |             }
105 | 
106 |             auto &rhs = static_cast<Constant&>(*_rhs);
107 |             _rhs = make_shared<Constant>(-rhs.value());
108 | 
109 |             dirty = true;
110 |         }
111 | 
112 |         // ((X +- const_1) +- const_2) = (X +- const_3).
113 |         if(_lhs->type() == ExpressionOperation &&
114 |                 _rhs->type() == ExpressionConstant) {
115 | 
116 |             const auto &lhs = static_cast<const Operation&>(*_lhs);
117 |             auto &rhs = static_cast<Constant&>(*_rhs);
118 | 
119 |             if(lhs._rhs->type() == ExpressionConstant) {
120 |                 _lhs = lhs._lhs; // TODO: Copy here?
121 |                 auto value = static_cast<Constant&>(*lhs._rhs).value();
122 | 
123 |                 bool inner = lhs._operation == OperationAdd;
124 |                 bool outer = _operation == OperationAdd;
125 | 
126 |                 if(inner != outer) {
127 |                     value -= rhs.value();
128 |                 } else {
129 |                     value += rhs.value();
130 |                 }
131 | 
132 |                 _operation = lhs._operation;
133 |                 _rhs = make_shared<Constant>(value);
134 | 
135 |                 dirty = true;
136 |             }
137 |         }
138 |     }
139 | 
140 |     _changed = dirty;
141 | 
142 |     sanitize();
143 |     return dirty;
144 | }
145 | 
146 | void Operation::sanitize() {
147 |     _lhs->optimize();
148 |     _rhs->optimize();
149 | 
150 |     switch(_operation) {
151 |     case OperationSub:
152 |         // Non-commutative, nothing we can do about that.
153 |         return;
154 | 
155 |     default:
156 |         break;
157 |     }
158 | 
159 |     // Highest precedence on LHS.
160 |     if(_lhs->type() < _rhs->type()) {
161 |         _lhs.swap(_rhs);
162 |     }
163 | 
164 |     // On equal types, decide using operator<.
165 |     if(_lhs->type() == _rhs->type()) {
166 |         if(*_lhs < *_rhs) {
167 |             _lhs.swap(_rhs);
168 |         }
169 |     }
170 | }
171 | 
172 | bool Expression::operation_equal(const Expression &other) const {
173 |     /* All we do here is assert that (x +- 0) == x remains true.
174 |      * FIXME: Integrate this better. */
175 | 
176 |     const auto &operation = static_cast<const Operation&>(*this);
177 |     return operation.equals_inner(other);
178 | }
179 | 


--------------------------------------------------------------------------------
/src/external_functions.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "external_functions.h"
  3 | 
  4 | 
  5 | using namespace std;
  6 | 
  7 | 
  8 | bool ExternalFunctions::is_finalized() const {
  9 |     return _is_finalized;
 10 | }
 11 | 
 12 | 
 13 | bool ExternalFunctions::parse(const string &funcs_file) {
 14 | 
 15 |     // Make sure that we parse files only if object was not finalized yet.
 16 |     if(_is_finalized) {
 17 |         throw runtime_error("Parse attempt after ExternalFunctions object was"\
 18 |                             " finalized.");
 19 |     }
 20 | 
 21 |     ifstream file(funcs_file + "_funcs.txt");
 22 |     if(!file) {
 23 |         return false;
 24 |     }
 25 | 
 26 |     string line;
 27 | 
 28 |     // Parse first line manually.
 29 |     getline(file, line);
 30 |     istringstream header_parser(line);
 31 | 
 32 |     // First entry of file is always the module name.
 33 |     string module_name;
 34 |     header_parser >> module_name;
 35 |     if(header_parser.fail()) {
 36 |         return false;
 37 |     }
 38 | 
 39 |     while(getline(file, line)) {
 40 |         istringstream parser(line);
 41 |         uint64_t func_addr = 0;
 42 |         string func_name;
 43 | 
 44 |         parser >> hex >> func_addr;
 45 |         if(parser.fail()) {
 46 |             return false;
 47 |         }
 48 | 
 49 |         parser >> func_name;
 50 |         if(parser.fail()) {
 51 |             return false;
 52 |         }
 53 | 
 54 |         ExternalFunction func;
 55 |         func.addr = func_addr;
 56 |         func.name = func_name;
 57 |         func.module_name = module_name;
 58 | 
 59 |         // NOTE: Index is a unique identifier for all functions in all
 60 |         // external modules.
 61 |         func.index = _index;
 62 | 
 63 |         _external_functions.push_back(func);
 64 |         assert(_external_functions[_index].module_name == func.module_name
 65 |                && _external_functions[_index].addr == func.addr
 66 |                && _external_functions[_index].name == func.name
 67 |                && _external_functions[_index].index == func.index
 68 |                && "Index of function and index in vector are not the same.");
 69 | 
 70 |         _index++;
 71 |     }
 72 | 
 73 |     return true;
 74 | }
 75 | 
 76 | 
 77 | void ExternalFunctions::finalize() {
 78 | 
 79 |     // Make sure that we only finalize this object once.
 80 |     if(_is_finalized) {
 81 |         throw runtime_error("ExternalFunctions object was already finalized.");
 82 |     }
 83 |     _is_finalized = true;
 84 | 
 85 |     // Build external functions map for this module.
 86 |     for(auto &it : _external_functions) {
 87 |         _external_functions_map[it.name] = &it;
 88 |     }
 89 | 
 90 |     return;
 91 | }
 92 | 
 93 | 
 94 | const ExternalFunction* ExternalFunctions::get_external_function(
 95 |         const string &name) const {
 96 | 
 97 |     // Make sure that the object is finalized.
 98 |     if(!_is_finalized) {
 99 |         throw runtime_error("ExternalFunctions object was not finalized.");
100 |     }
101 | 
102 |     if(_external_functions_map.find(name) == _external_functions_map.cend()) {
103 |         return nullptr;
104 |     }
105 |     return _external_functions_map.at(name);
106 | }
107 | 
108 | 
109 | const ExternalFunction* ExternalFunctions::get_external_function(
110 |         const std::string &module_name,
111 |         uint64_t func_addr) const {
112 | 
113 |     // Make sure that the object is finalized.
114 |     if(!_is_finalized) {
115 |         throw runtime_error("ExternalFunctions object was not finalized.");
116 |     }
117 | 
118 |     for(const auto &it : _external_functions) {
119 |         if(it.module_name == module_name
120 |             && it.addr == func_addr) {
121 |             return &it;
122 |         }
123 |     }
124 |     return nullptr;
125 | }
126 | 


--------------------------------------------------------------------------------
/src/function.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "function.h"
  3 | #include "path_builder.h"
  4 | 
  5 | #include <map>
  6 | #include <set>
  7 | #include <deque>
  8 | #include <sstream>
  9 | #include <cstddef>
 10 | #include <cassert>
 11 | #include <iterator>
 12 | #include <algorithm>
 13 | 
 14 | using namespace std;
 15 | 
 16 | typedef set<uintptr_t> SeenBlocks;
 17 | typedef map<Path, SeenBlocks> PathBlocks;
 18 | 
 19 | /*!
 20 |  * \brief Creates a new instance of the class, explicitly setting its entry
 21 |  * address.
 22 |  * \param entry The (virtual) address where the function starts originally.
 23 |  * \param branch_threshold The number of branches inside a function that
 24 |  * trigger a switch to a more lightweight traversal method. Defaults to at
 25 |  * least 15.
 26 |  */
 27 | Function::Function(uintptr_t entry, uint8_t branch_threshold)
 28 |     : _entry(entry), _branch_threshold(branch_threshold) {
 29 | }
 30 | 
 31 | /*!
 32 |  * \brief Initial policy that checks feasibility of traversing all paths.
 33 |  *
 34 |  * This (initial) policy simply counts the number of indirect branches which
 35 |  * give a rough estimate of the number of paths through the function.
 36 |  *
 37 |  * \return `true`, if the function contains fewer than 15 branches; `false`
 38 |  * otherwise.
 39 |  */
 40 | bool Function::can_be_fully_traversed() const {
 41 |     auto branches = 0;
 42 |     for(const auto &kv : _function_blocks) {
 43 |         if(kv.second->get_terminator().type == TerminatorJcc) {
 44 |             branches++;
 45 |         }
 46 |     }
 47 | 
 48 |     return branches < _branch_threshold;
 49 | }
 50 | 
 51 | /*!
 52 |  * \brief Traverses all paths through the function.
 53 |  *
 54 |  * Traverses all possible paths through the function and calls the supplied
 55 |  * callback on each encountered basic block. If it is infeasible to traverse
 56 |  * all possible paths (as determined by `can_be_fully_traversed`), logic
 57 |  * switches to a lightweight path generation algorithm. For this to work
 58 |  * properly, `block_predicate` has to be set.
 59 |  *
 60 |  * The traversal callback is passed several parameters:
 61 |  *
 62 |  * 1. a user-defined parameter (which can be used, e.g., to pass an additional
 63 |  * structure with data associated with the traversal, like a this pointer),
 64 |  * 2. the path describing the position of the currently visited basic block,
 65 |  * 3. the currently visited basic block itself, a `Block` reference.
 66 |  *
 67 |  * \param callback The function that is to be called on each basic block visit.
 68 |  * \param block_predicate A callback which decides whether a basic block is
 69 |  * deemed "interesting" for the current analysis and should be visited during
 70 |  * the traversal.
 71 |  * \param user_defined A user-defined parameter that is passed to the callback.
 72 |  * \return Always `true`.
 73 |  *
 74 |  * \todo Decide if the return type still makes sense in the current setup.
 75 |  */
 76 | bool Function::traverse(const TraversalCallback &block_callback,
 77 |                         const BlockPredicate &block_predicate,
 78 |                         const PathCallback &path_callback,
 79 |                         void *user_defined)
 80 |     const {
 81 |     if(can_be_fully_traversed()) {
 82 |         throw runtime_error("Path callbacks are not yet implemented for full"
 83 |                             " traversals.");
 84 |         return traverser(block_callback, user_defined);
 85 |     }
 86 | 
 87 |     if(!block_predicate) {
 88 |         throw runtime_error("Cannot switch to lightweight policy without a "
 89 |                             "valid block predicate.");
 90 |     }
 91 | 
 92 |     PathBuilder builder(*this, user_defined);
 93 |     const auto paths = builder.build_paths(block_predicate);
 94 | 
 95 |     // FIXME: This duplicates code from below.
 96 |     for(const auto &path : paths) {
 97 | 
 98 |         Path current_path;
 99 |         const Terminator *previous_terminator = nullptr;
100 | 
101 |         for(const auto &block : path) {
102 |             const auto &needle = _function_blocks.find(block);
103 |             if(needle == _function_blocks.cend()) {
104 |                 break;
105 |             }
106 | 
107 |             if(previous_terminator) {
108 |                 bool annotation = false;
109 | 
110 |                 const auto &terminator = *previous_terminator;
111 |                 switch(terminator.type) {
112 |                 case TerminatorJump:
113 |                     annotation = true;
114 |                     break;
115 | 
116 |                 case TerminatorJcc: {
117 |                     const auto current = needle->second->get_address();
118 |                     if(terminator.target == current) {
119 |                         annotation = false;
120 |                         break;
121 |                     }
122 | 
123 |                     assert(terminator.fall_through == current &&
124 |                            "Cannot reconstruct annotation.");
125 |                     annotation = true;
126 |                 }
127 | 
128 |                 case TerminatorFallthrough:
129 |                 case TerminatorCallUnresolved:
130 |                 case TerminatorCall:
131 |                     annotation = true;
132 |                     break;
133 | 
134 |                 default:
135 |                     throw runtime_error("Lightweight policy: This should not"
136 |                                         " happen.");
137 |                     break;
138 |                 }
139 | 
140 |                 current_path.push_back(annotation);
141 |             }
142 | 
143 |             previous_terminator = &needle->second->get_terminator();
144 |             if(!block_callback(user_defined, current_path, *needle->second)) {
145 |                 /* The callback has decided not to follow this path any
146 |                  * further. */
147 |                 break;
148 |             }
149 |         }
150 | 
151 |         if(path_callback) {
152 |             path_callback(user_defined, current_path);
153 |         }
154 |     }
155 | 
156 |     return true;
157 | }
158 | 
159 | bool Function::traverser(const TraversalCallback &callback,
160 |                          void *user_defined) const {
161 | 
162 |     deque<pair<uintptr_t, Path>> work_list;
163 | 
164 |     PathBlocks path_seen_blocks;
165 |     work_list.push_back(make_pair(_entry, Path()));
166 | 
167 |     while(!work_list.empty()) {
168 |         const auto pair = work_list.back();
169 |         work_list.pop_back();
170 | 
171 |         uintptr_t current_address = pair.first;
172 |         const Path &path = pair.second;
173 | 
174 |         SeenBlocks &seen_blocks = path_seen_blocks[path];
175 |         if(seen_blocks.find(current_address) != seen_blocks.cend()) {
176 |             continue;
177 |         }
178 | 
179 |         const auto &needle = _function_blocks.find(current_address);
180 |         if(needle == _function_blocks.cend()) {
181 |             /* We cannot find a block with the given address that lies within
182 |              * the current function. This is most likely the case due to the
183 |              * invocation of a non-returning call. We must not follow these
184 |              * anyway. */
185 |             continue;
186 |         }
187 | 
188 |         seen_blocks.insert(current_address);
189 |         if(!callback(user_defined, path, *needle->second)) {
190 |             /* The callback has decided not to follow this path any further. */
191 |             continue;
192 |         }
193 | 
194 |         // The current path may be extended by a true or false annotation.
195 |         Path path_false = path, path_true = path;
196 | 
197 |         path_false.push_back(false);
198 |         path_true.push_back(true);
199 | 
200 |         const Terminator &terminator = needle->second->get_terminator();
201 | 
202 |         switch(terminator.type) {
203 |         case TerminatorJump:
204 |             work_list.push_back(make_pair(terminator.target, path_true));
205 |             path_seen_blocks[path_true] = seen_blocks;
206 |             break;
207 | 
208 |         case TerminatorJcc:
209 |             work_list.push_back(make_pair(terminator.target, path_false));
210 |             path_seen_blocks[path_false] = seen_blocks;
211 | 
212 |         case TerminatorFallthrough:
213 |         case TerminatorCallUnresolved:
214 |         case TerminatorCall:
215 |             work_list.push_back(make_pair(terminator.fall_through, path_true));
216 |             path_seen_blocks[path_true] = seen_blocks;
217 |             break;
218 | 
219 |         default:
220 |             break;
221 |         }
222 |     }
223 | 
224 |     return true;
225 | }
226 | 
227 | void Function::add_block(uintptr_t address, IRSB *block,
228 |                          const Terminator &terminator) {
229 |     _function_blocks[address] = make_shared<Block>(address, block, terminator);
230 | }
231 | 


--------------------------------------------------------------------------------
/src/got.cpp:
--------------------------------------------------------------------------------
 1 | #include "got.h"
 2 | 
 3 | using namespace std;
 4 | 
 5 | GotMap import_got(const string &target_file) {
 6 | 
 7 |     ifstream file(target_file + "_got.txt");
 8 |     if(!file) {
 9 |         throw runtime_error("Opening .got file failed.");
10 |     }
11 | 
12 |     string line;
13 | 
14 |     // Parse first line manually.
15 |     getline(file, line);
16 |     istringstream header_parser(line);
17 | 
18 |     // First entry of file is always the module name.
19 |     string import_module_name;
20 |     header_parser >> import_module_name;
21 |     if(header_parser.fail()) {
22 |         throw runtime_error("Parsing .got file failed.");
23 |     }
24 | 
25 |     GotMap got_map;
26 | 
27 |     while(getline(file, line)) {
28 |         istringstream parser(line);
29 |         uint64_t got_entry_addr = 0;
30 |         uint64_t got_entry_content = 0;
31 | 
32 |         parser >> hex >> got_entry_addr;
33 |         if(parser.fail()) {
34 |             throw runtime_error("Parsing .got file failed.");
35 |         }
36 | 
37 |         parser >> hex >> got_entry_content;
38 |         if(parser.fail()) {
39 |             throw runtime_error("Parsing .got file failed.");
40 |         }
41 | 
42 |         got_map[got_entry_addr] = got_entry_content;
43 |     }
44 | 
45 |     return got_map;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/idata.cpp:
--------------------------------------------------------------------------------
 1 | #include "idata.h"
 2 | 
 3 | using namespace std;
 4 | 
 5 | IDataMap import_idata(const string &target_file) {
 6 | 
 7 |     ifstream file(target_file + "_idata.txt");
 8 |     if(!file) {
 9 |         throw runtime_error("Opening .idata file failed.");
10 |     }
11 | 
12 |     string line;
13 | 
14 |     // Parse first line manually.
15 |     getline(file, line);
16 |     istringstream header_parser(line);
17 | 
18 |     // First entry of file is always the module name.
19 |     string import_module_name;
20 |     header_parser >> import_module_name;
21 |     if(header_parser.fail()) {
22 |         throw runtime_error("Parsing .idata file failed.");
23 |     }
24 | 
25 |     IDataMap idata_map;
26 | 
27 |     while(getline(file, line)) {
28 |         istringstream parser(line);
29 |         uint64_t idata_entry_addr = 0;
30 |         string idata_entry_content;
31 | 
32 |         parser >> hex >> idata_entry_addr;
33 |         if(parser.fail()) {
34 |             throw runtime_error("Parsing .idata file failed.");
35 |         }
36 | 
37 |         parser >> idata_entry_content;
38 |         if(parser.fail()) {
39 |             throw runtime_error("Parsing .idata file failed.");
40 |         }
41 | 
42 |         idata_map[idata_entry_addr] = idata_entry_content;
43 |     }
44 | 
45 |     return idata_map;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/mapped_elf.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mapped_elf.h"
 3 | 
 4 | #include <fstream>
 5 | #include <iostream>
 6 | #include <stdexcept>
 7 | 
 8 | using namespace std;
 9 | 
10 | /*!
11 |  * \brief Constructs a new `MappedElf` instance from a given ELF file.
12 |  * \param elf_file The path to the ELF file which is to be mapped.
13 |  *
14 |  * If the file cannot be found or seems to be malformed, a `runtime_error`
15 |  * exception is thrown.
16 |  */
17 | MappedElf::MappedElf(const string &elf_file) {
18 |     ifstream file(elf_file.c_str(), ios::binary);
19 |     if(!file) {
20 |         throw runtime_error("Cannot open file " + elf_file + ".");
21 |     }
22 | 
23 |     _buffer = vector<char>(istreambuf_iterator<char>(file),
24 |                            istreambuf_iterator<char>());
25 |     _e_header = reinterpret_cast<ElfW(Ehdr)*>(_buffer.data());
26 |     _p_header = reinterpret_cast<ElfW(Phdr)*>(_buffer.data() +
27 |                                               _e_header->e_phoff);
28 | 
29 |     // FIXME: We rely on compilers a bit here, this can be generalized.
30 |     for(auto i = 0; i < _e_header->e_phnum; ++i) {
31 |         const auto &current = _p_header[i];
32 |         if(current.p_type == PT_LOAD && current.p_flags & PF_X) {
33 |             _base = current.p_vaddr;
34 |             _size = current.p_memsz;
35 |             break;
36 |         }
37 |     }
38 | 
39 |     if(!_size) {
40 |         throw runtime_error("Malformed input file " + elf_file + ".");
41 |     }
42 | }
43 | 
44 | /*!
45 |  * \brief Implements indexing access, effectively accessing the memory lieing
46 |  * at the given virtual address.
47 |  * \param address (Virtual) address of memory to access.
48 |  * \return A pointer to the memory requested, if it lies at the given virtual
49 |  * address. `nullptr` else.
50 |  */
51 | const uint8_t *MappedElf::operator[](const uintptr_t address) const {
52 |     if(address < _base || address > _base + _size) {
53 |         return nullptr;
54 |     }
55 | 
56 |     const uint8_t *data = reinterpret_cast<const uint8_t*>(_buffer.data());
57 |     return data + address - _base;
58 | }
59 | 


--------------------------------------------------------------------------------
/src/mapped_pe.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mapped_pe.h"
 3 | 
 4 | #include <fstream>
 5 | #include <iostream>
 6 | #include <stdexcept>
 7 | #include <cstring>
 8 | 
 9 | using namespace std;
10 | 
11 | /*!
12 |  * \brief Constructs a new `MappedPe` instance from a given PE file.
13 |  * \param pe_file The path to the PE file which is to be mapped.
14 |  *
15 |  * If the file cannot be found or seems to be malformed, a `runtime_error`
16 |  * exception is thrown.
17 |  */
18 | MappedPe::MappedPe(const string &pe_file) {
19 |     ifstream file(pe_file.c_str(), ios::binary);
20 |     if(!file) {
21 |         throw runtime_error("Cannot open file " + pe_file + ".");
22 |     }
23 | 
24 |     _buffer = vector<char>(istreambuf_iterator<char>(file),
25 |                            istreambuf_iterator<char>());
26 | 
27 | 
28 |     _mz_header = reinterpret_cast<mz_hdr*>(_buffer.data());
29 |     if(_mz_header->magic != MZ_MAGIC) {
30 |         throw runtime_error("Malformed input file " + pe_file + ".");
31 |     }
32 | 
33 |     _pe_header = reinterpret_cast<pe_hdr*>(_buffer.data() + _mz_header->peaddr);
34 |     if(_pe_header->magic != PE_MAGIC) {
35 |         throw runtime_error("Malformed input file " + pe_file + ".");
36 |     }
37 | 
38 |     // Magic value for optional header lies directly behind PE header.
39 |     uint16_t *opt_hdr_magic = reinterpret_cast<uint16_t*>(_buffer.data()
40 |                                                           + _mz_header->peaddr
41 |                                                           + sizeof(pe_hdr));
42 | 
43 |     if(*opt_hdr_magic == IMAGE_FILE_OPT_PE32_MAGIC) {
44 |         _pe32_opt_header = reinterpret_cast<pe32_opt_hdr*>(_buffer.data()
45 |                                                            + _mz_header->peaddr
46 |                                                            + sizeof(pe_hdr));
47 |     }
48 |     else if(*opt_hdr_magic == IMAGE_FILE_OPT_PE32_PLUS_MAGIC) {
49 |         _pe32_plus_opt_header = reinterpret_cast<pe32plus_opt_hdr*>(
50 |                                                             _buffer.data()
51 |                                                             + _mz_header->peaddr
52 |                                                             + sizeof(pe_hdr));
53 |     }
54 |     else {
55 |         throw runtime_error("Malformed input file " + pe_file + ".");
56 |     }
57 | 
58 |     for(uint32_t i = 0; i < _pe_header->sections; i++) {
59 |         _text_section_header = reinterpret_cast<section_header*>(
60 |                                                   _buffer.data()
61 |                                                   + _mz_header->peaddr
62 |                                                   + sizeof(pe_hdr)
63 |                                                   + _pe_header->opt_hdr_size
64 |                                                   + (i*sizeof(section_header)));
65 | 
66 |         // FIXME: We rely on compilers a bit here, this can be generalized.
67 |         if(strcmp(_text_section_header->name, ".text") == 0) {
68 |             _base = _text_section_header->virtual_address;
69 |             _size = _text_section_header->virtual_size;
70 |             _file_addr = _text_section_header->data_addr;
71 |             _file_size = _text_section_header->raw_data_size;
72 |             break;
73 |         }
74 |     }
75 | 
76 |     if(!_size) {
77 |         throw runtime_error("Malformed input file " + pe_file + ".");
78 |     }
79 | }
80 | 
81 | 
82 | /*!
83 |  * \brief Implements indexing access, effectively accessing the memory lieing
84 |  * at the given virtual address.
85 |  * \param address (Virtual) address of memory to access.
86 |  * \return A pointer to the memory requested, if it lies at the given virtual
87 |  * address. `nullptr` else.
88 |  */
89 | const uint8_t *MappedPe::operator[](const uintptr_t address) const {
90 |     if(address < _base || address > _base + _size) {
91 |         return nullptr;
92 |     }
93 | 
94 |     const uint8_t *data = reinterpret_cast<const uint8_t*>(_buffer.data());
95 | 
96 |     return data + address - _base + _file_addr;
97 | }
98 | 


--------------------------------------------------------------------------------
/src/module_plt.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "module_plt.h"
 3 | 
 4 | 
 5 | using namespace std;
 6 | 
 7 | 
 8 | ModulePlt::ModulePlt(const string &module_name)
 9 |     : _module_name(module_name) {
10 | 
11 | }
12 | 
13 | 
14 | bool ModulePlt::parse(const string &plt_file) {
15 | 
16 |     ifstream file(plt_file + "_plt.txt");
17 |     if(!file) {
18 |         return false;
19 |     }
20 | 
21 |     string line;
22 | 
23 |     // Parse first line manually.
24 |     getline(file, line);
25 |     istringstream header_parser(line);
26 | 
27 |     // First entry of file is always the module name.
28 |     string module_name;
29 |     header_parser >> module_name;
30 |     if(header_parser.fail()) {
31 |         return false;
32 |     }
33 | 
34 |     // Only allow to parse a .plt file for this module.
35 |     if(_module_name != module_name) {
36 |         return false;
37 |     }
38 | 
39 |     while(getline(file, line)) {
40 |         istringstream parser(line);
41 |         uint64_t func_addr = 0;
42 |         string func_name;
43 | 
44 |         parser >> hex >> func_addr;
45 |         if(parser.fail()) {
46 |             return false;
47 |         }
48 | 
49 |         parser >> func_name;
50 |         if(parser.fail()) {
51 |             return false;
52 |         }
53 | 
54 |         PltEntry plt_entry;
55 |         plt_entry.addr = func_addr;
56 |         plt_entry.func_name = func_name;
57 | 
58 |         _plt_entries[func_addr] = plt_entry;
59 |     }
60 | 
61 |     return true;
62 | }
63 | 
64 | 
65 | const PltEntry* ModulePlt::get_plt_entry(uint64_t addr) const {
66 |     if(_plt_entries.find(addr) == _plt_entries.cend()) {
67 |         return nullptr;
68 |     }
69 |     return &(_plt_entries.at(addr));
70 | }
71 | 
72 | 
73 | const PltEntry* ModulePlt::get_plt_entry(const string func_name) const {
74 |     for(const auto &kv : _plt_entries) {
75 |         if(kv.second.func_name == func_name) {
76 |             return &(kv.second);
77 |         }
78 |     }
79 |     return nullptr;
80 | }
81 | 


--------------------------------------------------------------------------------
/src/new_operators.cpp:
--------------------------------------------------------------------------------
 1 | #include "new_operators.h"
 2 | 
 3 | using namespace std;
 4 | 
 5 | 
 6 | NewOperators::NewOperators(const string &module_name,
 7 |                            const VTableFile &vtable_file,
 8 |                            const VTableHierarchies &vtable_hierarchies)
 9 |     : _module_name(module_name),
10 |       _vtable_file(vtable_file),
11 |       _vtable_hierarchies(vtable_hierarchies) {}
12 | 
13 | 
14 | void NewOperators::add_op_new_candidate(const NewOperator &new_op_candidate) {
15 |     if(_op_new_candidates.find(new_op_candidate.addr)
16 |             == _op_new_candidates.cend()) {
17 | 
18 |         _op_new_candidates[new_op_candidate.addr] = new_op_candidate;
19 |     }
20 |     else {
21 |         for(uint32_t idx : new_op_candidate.vtbl_idxs) {
22 |             _op_new_candidates[new_op_candidate.addr].vtbl_idxs.insert(idx);
23 |         }
24 |     }
25 | }
26 | 
27 | 
28 | void NewOperators::export_new_operators(const string &target_dir) {
29 | 
30 |     stringstream temp_str;
31 |     temp_str << target_dir << "/" << _module_name << ".new_operators";
32 |     string target_file = temp_str.str();
33 | 
34 |     ofstream new_op_file;
35 |     new_op_file.open(target_file);
36 | 
37 |     new_op_file << _module_name << endl;
38 | 
39 |     const HierarchiesVTable &vtbl_hierarchies =
40 |                                         _vtable_hierarchies.get_hierarchies();
41 | 
42 |     for(const auto &new_op : _op_new_candidates) {
43 |         unordered_set<uint32_t> possible_vtables;
44 |         for(uint32_t idx : new_op.second.vtbl_idxs) {
45 | 
46 |             // Copy also the whole vtable hierarchy into the possible
47 |             // vtable set.
48 |             if(possible_vtables.find(idx) == possible_vtables.cend()) {
49 |                 for(const auto &dep_vtables : vtbl_hierarchies) {
50 |                     if(dep_vtables.find(idx) != dep_vtables.cend()) {
51 |                         for(uint32_t dep_vtbl_idx : dep_vtables) {
52 |                             possible_vtables.insert(dep_vtbl_idx);
53 |                         }
54 |                         break;
55 |                     }
56 |                 }
57 |             }
58 |             possible_vtables.insert(idx);
59 |         }
60 | 
61 |         new_op_file << hex << new_op.second.addr
62 |                     << " "
63 |                     << hex << new_op.second.size
64 |                     << " ";
65 | 
66 |         for(uint32_t idx : possible_vtables) {
67 |             const auto &temp = _vtable_file.get_vtable(idx);
68 |             new_op_file << temp.module_name
69 |                         << ":"
70 |                         << hex << temp.addr
71 |                         << " ";
72 |         }
73 | 
74 |         new_op_file << endl;
75 |     }
76 |     new_op_file.close();
77 | }
78 | 
79 | 
80 | const OperatorNewAddrMap& NewOperators::get_new_operators() const {
81 |     return _op_new_candidates;
82 | }
83 | 
84 | 
85 | void NewOperators::copy_new_operators(const OperatorNewAddrMap &new_ops) {
86 |     for(const auto &new_op : new_ops) {
87 |         add_op_new_candidate(new_op.second);
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/path_builder.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "path_builder.h"
  3 | 
  4 | #include <array>
  5 | #include <queue>
  6 | #include <algorithm>
  7 | 
  8 | using namespace std;
  9 | 
 10 | /*!
 11 |  * \brief Creates a new instance of the class.
 12 |  * \param function The function for which paths should be constructed.
 13 |  * \param user_defined A user-defined parameter that is passed to the block
 14 |  * predicate. Defaults to `nullptr`.
 15 |  * \param node_threshold The number of interesting nodes a function has to
 16 |  * exceed such that simpler paths are generated. Defaults to 20.
 17 |  * \see `PathBuilder::build_paths`
 18 |  */
 19 | PathBuilder::PathBuilder(const Function &function, void *user_defined,
 20 |                          uint8_t node_threshold)
 21 |     : _function(function), _user_defined(user_defined),
 22 |       _node_threshold(node_threshold) {
 23 | }
 24 | 
 25 | using Successors = array<uintptr_t, 2>;
 26 | 
 27 | Successors get_successors(const Block &block) {
 28 |     Successors result = { 0, 0 };
 29 | 
 30 |     const Terminator &terminator = block.get_terminator();
 31 |     switch(terminator.type) {
 32 |     case TerminatorJump:
 33 |         result[0] = terminator.target;
 34 |         break;
 35 | 
 36 |     case TerminatorJcc:
 37 |         result[0] = terminator.target;
 38 | 
 39 |     case TerminatorFallthrough:
 40 |     case TerminatorCallUnresolved:
 41 |     case TerminatorCall:
 42 |         result[1] = terminator.fall_through;
 43 |         break;
 44 | 
 45 |     default:
 46 |         break;
 47 |     }
 48 | 
 49 |     return result;
 50 | }
 51 | 
 52 | bool is_exit_block(void*, const Block &block) {
 53 |     const auto &terminator = block.get_terminator();
 54 |     return terminator.is_tail || terminator.type == TerminatorReturn;
 55 | }
 56 | 
 57 | template<typename T>
 58 | bool contains_duplicates(const deque<T> &container) {
 59 |     set<T> witness(container.cbegin(), container.cend());
 60 |     return witness.size() != container.size();
 61 | }
 62 | 
 63 | deque<ConcretePath> paths(const PathsByNode &p) {
 64 |     deque<ConcretePath> collect;
 65 |     for(const auto &kv : p) {
 66 |         collect.push_back(kv.second);
 67 |     }
 68 | 
 69 |     return collect;
 70 | }
 71 | 
 72 | //!
 73 | //! \brief Constructs the paths.
 74 | //! \param predicate A predicate which decides whether the given basic block is
 75 | //! deemed "interesting" and should be visited by the generated paths.
 76 | //! \return A set of distinct concrete paths through the function which try to
 77 | //! visit as much of the interesting nodes as possible.
 78 | //!
 79 | //! The algorithms determines sub-paths from the root node to an interesting
 80 | //! block, from any interesting block to another and from an interesting block
 81 | //! to a return block. Then, it tries to combine them such that the number of
 82 | //! interesting blocks visited by the constructed path is maximized.
 83 | //!
 84 | //! If the number of interesting basic blocks exceeds `_node_threshold`, a
 85 | //! simpler algorithm is used. The algorithm falls back to merely yielding
 86 | //! paths that visit _one_ interesting block (being optimistic about other
 87 | //! interesting blocks lying on that very same path).
 88 | //!
 89 | set<ConcretePath> PathBuilder::build_paths(BlockPredicate predicate) const {
 90 |     const auto blocks = _function.get_blocks();
 91 |     const auto root = _function.get_entry();
 92 | 
 93 |     // Get paths from root to interesting nodes.
 94 |     auto root_to_interesting = breadth_first(blocks, root, predicate);
 95 | 
 96 |     // Get paths from interesting node to exit.
 97 |     map<uintptr_t, deque<ConcretePath>> interesting_to_exit;
 98 | 
 99 |     for(const auto &kv : root_to_interesting) {
100 |         auto to_exit = breadth_first(blocks, kv.first, &is_exit_block);
101 |         interesting_to_exit[kv.first] = paths(to_exit);
102 |     }
103 | 
104 |     bool safety_threshold = root_to_interesting.size() > _node_threshold;
105 | 
106 |     /* Get paths from one interesting node to another (distinct) node; done
107 |      * only if the safe threshold is not exceeded.
108 |      */
109 |     map<uintptr_t, PathsByNode> interesting_to_interesting;
110 |     if(!safety_threshold) {
111 |         for(const auto &kv : root_to_interesting) {
112 |             const auto source = kv.first;
113 | 
114 |             for(const auto &kv_dst : root_to_interesting) {
115 |                 const auto destination = kv_dst.first;
116 |                 if(source == destination) {
117 |                     continue;
118 |                 }
119 | 
120 |                 auto to_other = breadth_first(blocks, source,
121 |                     [&](void*, const Block &block) -> bool {
122 |                         return block.get_address() == destination;
123 |                 }, true);
124 | 
125 |                 interesting_to_interesting[source] = to_other;
126 |             }
127 |         }
128 |     }
129 | 
130 |     // Stitch together possible paths.
131 |     set<ConcretePath> paths;
132 | 
133 |     /* TODO: Constraint the number of interesting nodes to chain on a
134 |      * single path. */
135 |     struct Entry {
136 |         ConcretePath path;
137 |         set<uintptr_t> visited;
138 |     };
139 | 
140 |     queue<Entry> work;
141 |     for(const auto &kv : root_to_interesting) {
142 |         Entry entry;
143 |         entry.path = kv.second;
144 |         entry.visited.insert(kv.first);
145 | 
146 |         work.push(entry);
147 |     }
148 | 
149 |     while(!work.empty()) {
150 |         auto current = work.front();
151 |         work.pop();
152 | 
153 |         auto tails = interesting_to_exit[current.path.back()];
154 |         for(const auto &tail : tails) {
155 |             deque<uintptr_t> head = current.path;
156 |             head.pop_back();
157 | 
158 |             for(const auto &t : tail) {
159 |                 head.push_back(t);
160 |             }
161 | 
162 |             paths.insert(head);
163 |         }
164 | 
165 |         /* Safety threshold: Only visit one interesting node and hope that
166 |          * the others happen to lie on the same path.
167 |          */
168 |         if(safety_threshold) {
169 |             continue;
170 |         }
171 | 
172 |         for(const auto &kv : interesting_to_interesting[current.path.back()]) {
173 |             const auto &next_node = kv.first;
174 | 
175 |             auto needle = current.visited.find(next_node);
176 |             if(needle != current.visited.cend()) {
177 |                 continue;
178 |             }
179 | 
180 |             Entry next;
181 |             const auto &path_to_next = kv.second;
182 | 
183 |             next.path = current.path;
184 |             next.path.pop_back();
185 | 
186 |             for(const auto &p : path_to_next) {
187 |                 next.path.push_back(p);
188 |             }
189 | 
190 |             next.visited = current.visited;
191 |             next.visited.insert(next_node);
192 | 
193 |             work.push(next);
194 |         }
195 |     }
196 | 
197 |     /* If there are no interesting blocks, collect all paths from the root
198 |      * node to any exit block.
199 |      */
200 |     if(paths.empty()) {
201 |         auto root_to_exit = breadth_first(blocks, root, &is_exit_block);
202 |         for(const auto &kv : root_to_exit) {
203 |             if(!contains_duplicates(kv.second)) {
204 |                 paths.insert(kv.second);
205 |             }
206 |         }
207 |     }
208 | 
209 |     return paths;
210 | }
211 | 
212 | struct Node {
213 |     uintptr_t address;
214 |     shared_ptr<Block> block;
215 |     uint16_t distance;
216 | 
217 |     Node *parent;
218 | };
219 | 
220 | PathsByNode PathBuilder::breadth_first(const BlockMap &blocks,
221 |                                        uintptr_t root,
222 |                                        BlockPredicate predicate,
223 |                                        bool terminate_on_match) const {
224 |     map<uintptr_t, Node> nodes;
225 |     PathsByNode result;
226 | 
227 |     for(const auto &kv : blocks) {
228 |         Node current;
229 |         current.address  = kv.first;
230 |         current.block    = kv.second;
231 |         current.distance = static_cast<uint16_t>(-1);
232 |         current.parent   = nullptr;
233 | 
234 |         nodes[kv.first] = current;
235 |     }
236 | 
237 |     nodes[root].distance = 0;
238 |     queue<Node*> q;
239 | 
240 |     // Explicitly check if the root node is interesting as well.
241 |     if(predicate(_user_defined, *nodes[root].block)) {
242 |         auto &path = result[root];
243 |         path.push_front(root);
244 |     }
245 | 
246 |     q.push(&nodes[root]);
247 |     while(!q.empty()) {
248 |         Node &current = *q.front();
249 |         q.pop();
250 | 
251 |         const auto &adjacent = get_successors(*current.block);
252 |         for(const auto &neighbor : adjacent) {
253 |             if(!neighbor) {
254 |                 continue;
255 |             }
256 | 
257 |             auto &n = nodes[neighbor];
258 |             if(n.distance == static_cast<uint16_t>(-1)) {
259 |                 n.distance = current.distance + 1;
260 |                 n.parent = &current;
261 | 
262 |                 if(predicate(_user_defined, *n.block)) {
263 |                     auto &path = result[n.address];
264 |                     path.push_front(n.address);
265 | 
266 |                     auto *parent = n.parent;
267 |                     while(parent) {
268 |                         path.push_front(parent->address);
269 |                         parent = parent->parent;
270 |                     }
271 | 
272 |                     if(terminate_on_match) {
273 |                         return result;
274 |                     }
275 |                 }
276 | 
277 |                 q.push(&n);
278 |             }
279 |         }
280 |     }
281 | 
282 |     return result;
283 | }
284 | 


--------------------------------------------------------------------------------
/src/return_value.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "return_value.h"
  3 | 
  4 | 
  5 | using namespace std;
  6 | 
  7 | 
  8 | FctReturnValuesFile::FctReturnValuesFile(const string &module_name,
  9 |                                          const VTableFile &vtable_file,
 10 |                                          const ModulePlt &module_plt,
 11 |                                          const ExternalFunctions &external_funcs)
 12 |     : _module_name(module_name),
 13 |       _vtable_file(vtable_file),
 14 |       _module_plt(module_plt),
 15 |       _external_funcs(external_funcs) {
 16 | 
 17 | }
 18 | 
 19 | 
 20 | void FctReturnValuesFile::add_return_value(uint64_t func_addr,
 21 |                                            const ReturnValue &return_value) {
 22 |     lock_guard<mutex> _(_mtx);
 23 | 
 24 |     if(_return_values_map.find(func_addr) == _return_values_map.cend()) {
 25 |         FctReturnValues temp;
 26 |         temp.func_addr = func_addr;
 27 |         temp.return_values.push_back(return_value);
 28 |         _return_values_map[func_addr] = temp;
 29 |     }
 30 | 
 31 |     else {
 32 |         FctReturnValues &temp = _return_values_map[func_addr];
 33 | 
 34 |         for(const auto &it : temp.return_values) {
 35 |             if(it.func_addr == return_value.func_addr
 36 |                 && *(it.content) == *(return_value.content)) {
 37 |                 return;
 38 |             }
 39 |         }
 40 | 
 41 |         // TODO
 42 |         // Check if return value does already exist.
 43 |         temp.return_values.push_back(return_value);
 44 |     }
 45 | }
 46 | 
 47 | 
 48 | void FctReturnValuesFile::add_active_vtable(uint64_t func_addr,
 49 |                                             const VTableActive &active_vtable) {
 50 |     lock_guard<mutex> _(_mtx);
 51 | 
 52 |     if(_return_values_map.find(func_addr) == _return_values_map.cend()) {
 53 |         FctReturnValues temp;
 54 |         temp.func_addr = func_addr;
 55 |         temp.active_vtables.push_back(active_vtable);
 56 |         _return_values_map[func_addr] = temp;
 57 |     }
 58 | 
 59 |     else {
 60 |         FctReturnValues &temp = _return_values_map[func_addr];
 61 | 
 62 |         // TODO
 63 |         // Check if active vtable does already exist.
 64 |         temp.active_vtables.push_back(active_vtable);
 65 |     }
 66 | }
 67 | 
 68 | 
 69 | void FctReturnValuesFile::export_return_values(const string &target_dir) {
 70 |     lock_guard<mutex> _(_mtx);
 71 | 
 72 |     stringstream temp_str;
 73 |     temp_str << target_dir << "/" << _module_name << ".ret_values";
 74 |     string target_file = temp_str.str();
 75 | 
 76 |     ofstream ret_file;
 77 |     ret_file.open(target_file, ios::out|ios::binary);
 78 | 
 79 |     // First entry of file is always the module name.
 80 |     ret_file.write(_module_name.c_str(), _module_name.length() + 1);
 81 | 
 82 |     for(const auto &kv : _return_values_map) {
 83 |         // Write function address.
 84 |         ret_file.write(reinterpret_cast<const char *>(&kv.first),
 85 |                        sizeof(kv.first));
 86 | 
 87 |         uint32_t number = kv.second.return_values.size();
 88 |         ret_file.write(reinterpret_cast<const char *>(&number),
 89 |                        sizeof(number));
 90 |         for(const auto &it : kv.second.return_values) {
 91 |             serialize(it.content, ret_file);
 92 |         }
 93 | 
 94 |         number = kv.second.active_vtables.size();
 95 |         ret_file.write(reinterpret_cast<const char *>(&number),
 96 |                        sizeof(number));
 97 |         for(const auto &it : kv.second.active_vtables) {
 98 | 
 99 |             serialize(it.vtbl_ptr_loc, ret_file);
100 | 
101 |             const VTable &vtable = _vtable_file.get_vtable(it.index);
102 | 
103 |             // Write actual vtable representation to file.
104 |             // Length + 1 to have \0 at the end.
105 |             ret_file.write(vtable.module_name.c_str(),
106 |                          vtable.module_name.length() + 1);
107 |             ret_file.write(reinterpret_cast<const char *>(&vtable.addr),
108 |                          sizeof(vtable.addr));
109 |         }
110 |     }
111 | 
112 |     ret_file.close();
113 | }
114 | 
115 | 
116 | void FctReturnValuesFile::import_ext_return_values(const string &module_file) {
117 |     lock_guard<mutex> _(_mtx);
118 | 
119 |     // Make sure that the object is finalized.
120 |     if(_is_finalized) {
121 |         throw runtime_error("FctReturnValuesFile object is finalized.");
122 |     }
123 | 
124 |     ifstream ret_file(module_file + ".ret_values", ios::in|ios::binary);
125 |     if(!ret_file) {
126 |         throw runtime_error("Could not open return values file.");
127 |     }
128 | 
129 |     // First entry of file is always the module name.
130 |     string import_module_name;
131 |     // Read C-like string.
132 |     getline(ret_file, import_module_name, '\0');
133 | 
134 |     while(!ret_file.eof()) {
135 | 
136 |         uint64_t func_addr;
137 |         ret_file.read(reinterpret_cast<char *>(&func_addr),
138 |                        sizeof(func_addr));
139 | 
140 |         // EOF is only present after first read instruction that does
141 |         // reach it.
142 |         if(ret_file.eof()) {
143 |             break;
144 |         }
145 | 
146 |         FctReturnValues func_ret_values;
147 | 
148 |         uint32_t number;
149 |         ret_file.read(reinterpret_cast<char *>(&number),
150 |                        sizeof(number));
151 | 
152 |         for(uint32_t i = 0; i < number; i++) {
153 |             ReturnValue ret_value;
154 |             ret_value.content = unserialize(ret_file);
155 |             ret_value.func_addr = 0;
156 |             func_ret_values.return_values.push_back(ret_value);
157 |         }
158 | 
159 |         ret_file.read(reinterpret_cast<char *>(&number),
160 |                        sizeof(number));
161 | 
162 |         for(uint32_t i = 0; i < number; i++) {
163 | 
164 |             VTableActive act_vtable;
165 |             act_vtable.from_callee = true;
166 |             act_vtable.from_caller = false;
167 |             act_vtable.vtbl_ptr_loc = unserialize(ret_file);
168 | 
169 |             string vtbl_module_name;
170 |             // Read C-like string.
171 |             getline(ret_file, vtbl_module_name, '\0');
172 | 
173 |             uint64_t vtable_addr;
174 |             ret_file.read(reinterpret_cast<char *>(&vtable_addr),
175 |                            sizeof(vtable_addr));
176 |             const VTable &vtable = _vtable_file.get_vtable(vtbl_module_name,
177 |                                                            vtable_addr);
178 |             act_vtable.index = vtable.index;
179 |             func_ret_values.active_vtables.push_back(act_vtable);
180 |         }
181 | 
182 |         // Get corresponding function of return value.
183 |         const ExternalFunction *ext_func;
184 |         ext_func = _external_funcs.get_external_function(import_module_name,
185 |                                                          func_addr);
186 |         if(ext_func == nullptr) {
187 |             throw runtime_error("Imported return value does not belong "\
188 |                                 "to a function.");
189 |         }
190 | 
191 |         // Add external return value.
192 |         ExternalFctReturnValues ext_ret_value;
193 |         ext_ret_value.func_return_values = func_ret_values;
194 |         ext_ret_value.ext_func = ext_func;
195 |         _ext_return_values.push_back(ext_ret_value);
196 |     }
197 | 
198 |     ret_file.close();
199 | }
200 | 
201 | 
202 | const FctReturnValues* FctReturnValuesFile::get_plt_return_values_ptr(
203 |                                                         uint64_t addr) const {
204 |     lock_guard<mutex> _(_mtx);
205 |     // Make sure that the object is finalized.
206 |     if(!_is_finalized) {
207 |         throw runtime_error("FctReturnValuesFile object was not finalized.");
208 |     }
209 | 
210 |     if(_plt_return_values_ptr_map.find(addr)
211 |                                     != _plt_return_values_ptr_map.cend()) {
212 |         return _plt_return_values_ptr_map.at(addr);
213 |     }
214 |     return nullptr;
215 | }
216 | 
217 | 
218 | const FctReturnValues* FctReturnValuesFile::get_ext_return_values_ptr(
219 |                                                  const string &module_name,
220 |                                                  uint64_t func_addr) const {
221 |     lock_guard<mutex> _(_mtx);
222 |     // Make sure that the object is finalized.
223 |     if(!_is_finalized) {
224 |         throw runtime_error("FctReturnValuesFile object was not finalized.");
225 |     }
226 | 
227 |     for(const auto &it : _ext_return_values) {
228 |         if(it.ext_func->addr == func_addr
229 |             && it.ext_func->module_name == module_name) {
230 |             return &(it.func_return_values);
231 |         }
232 |     }
233 |     return nullptr;
234 | }
235 | 
236 | 
237 | ExtReturnValues FctReturnValuesFile::get_return_values() const {
238 |     lock_guard<mutex> _(_mtx);
239 | 
240 |     // Make sure that the object is finalized.
241 |     if(!_is_finalized) {
242 |         throw runtime_error("FctReturnValuesFile object was not finalized.");
243 |     }
244 | 
245 |     return _ext_return_values;
246 | }
247 | 
248 | 
249 | bool FctReturnValuesFile::is_finalized_ext_return_values() const {
250 |     lock_guard<mutex> _(_mtx);
251 |     return _is_finalized;
252 | }
253 | 
254 | 
255 | void FctReturnValuesFile::finalize_ext_return_values() {
256 |     lock_guard<mutex> _(_mtx);
257 | 
258 |     // Make sure that the object is finalized.
259 |     if(_is_finalized) {
260 |         throw runtime_error("FctReturnValuesFile object is finalized.");
261 |     }
262 | 
263 |     // Set up a map that contains only pointer to .plt entry return values
264 |     for(uint32_t i = 0; i < _ext_return_values.size(); i++) {
265 |         ExternalFctReturnValues &ext_ret_value = _ext_return_values[i];
266 | 
267 |         const PltEntry *plt_entry;
268 |         plt_entry = _module_plt.get_plt_entry(ext_ret_value.ext_func->name);
269 |         if(plt_entry == nullptr) {
270 |             continue;
271 |         }
272 | 
273 |         // Use the plt address as function address for the return value.
274 |         ext_ret_value.func_return_values.func_addr = plt_entry->addr;
275 | 
276 |         // Set all function addresses of the return values to the plt entry.
277 |         for(auto &it : ext_ret_value.func_return_values.return_values) {
278 |             it.func_addr = plt_entry->addr;
279 |         }
280 | 
281 |         _plt_return_values_ptr_map[plt_entry->addr] =
282 |                                             &(ext_ret_value.func_return_values);
283 |     }
284 | 
285 |     _is_finalized = true;
286 | }
287 | 


--------------------------------------------------------------------------------
/src/serialization.cpp:
--------------------------------------------------------------------------------
  1 | #include "serialization.h"
  2 | 
  3 | 
  4 | using namespace std;
  5 | 
  6 | 
  7 | void serialize(ExpressionPtr exp, ostream &output) {
  8 | 
  9 |     switch(exp->type()) {
 10 | 
 11 |         case ExpressionUnknown: {
 12 |             output.put(ExpressionUnknown);
 13 |             break;
 14 |         }
 15 | 
 16 |         case ExpressionConstant: {
 17 |             output.put(ExpressionConstant);
 18 |             Constant &temp = static_cast<Constant&>(*exp);
 19 |             uint64_t value = temp.value();
 20 |             output.write(reinterpret_cast<const char *>(&value),
 21 |                          sizeof(value));
 22 |             break;
 23 |         }
 24 | 
 25 |         case ExpressionSymbolic: {
 26 |             output.put(ExpressionSymbolic);
 27 |             Symbolic &temp = static_cast<Symbolic&>(*exp);
 28 |             // Length + 1 to have \0 at the end.
 29 |             output.write(temp.name().c_str(),
 30 |                          temp.name().length() + 1);
 31 |             break;
 32 |         }
 33 | 
 34 |         case ExpressionTemporary: {
 35 |             output.put(ExpressionTemporary);
 36 |             Temporary &temp = static_cast<Temporary&>(*exp);
 37 |             uint32_t id = temp.id();
 38 |             output.write(reinterpret_cast<const char *>(&id),
 39 |                          sizeof(id));
 40 |             break;
 41 |         }
 42 | 
 43 |         case ExpressionRegister: {
 44 |             output.put(ExpressionRegister);
 45 |             Register &temp = static_cast<Register&>(*exp);
 46 |             uint32_t offset = temp.offset();
 47 |             output.write(reinterpret_cast<const char *>(&offset),
 48 |                          sizeof(offset));
 49 |             break;
 50 |         }
 51 | 
 52 |         case ExpressionIndirection: {
 53 |             output.put(ExpressionIndirection);
 54 |             Indirection &temp = static_cast<Indirection&>(*exp);
 55 |             serialize(temp.address(), output);
 56 |             break;
 57 |         }
 58 | 
 59 |         case ExpressionOperation: {
 60 |             output.put(ExpressionOperation);
 61 |             Operation &temp = static_cast<Operation&>(*exp);
 62 |             output.put(temp.operation());
 63 |             serialize(temp.lhs(), output);
 64 |             serialize(temp.rhs(), output);
 65 |             break;
 66 |         }
 67 | 
 68 |         default:
 69 |             throw runtime_error("Do not know how to serialize "\
 70 |                                 "expression type.");
 71 |     }
 72 | }
 73 | 
 74 | 
 75 | ExpressionPtr unserialize(istream &input) {
 76 | 
 77 |     switch(input.get()) {
 78 | 
 79 |         case ExpressionUnknown: {
 80 |             Unknown temp;
 81 |             return make_shared<Unknown>(temp);
 82 |             break;
 83 |         }
 84 | 
 85 |         case ExpressionConstant: {
 86 |             uint64_t value;
 87 |             input.read(reinterpret_cast<char *>(&value),
 88 |                        sizeof(value));
 89 |             Constant temp(value);
 90 |             return make_shared<Constant>(temp);
 91 |         }
 92 | 
 93 |         case ExpressionSymbolic: {
 94 |             string name;
 95 |             // Read C-like string.
 96 |             getline(input, name, '\0');
 97 |             Symbolic temp(name);
 98 |             return make_shared<Symbolic>(temp);
 99 |         }
100 | 
101 |         case ExpressionTemporary: {
102 |             uint32_t id;
103 |             input.read(reinterpret_cast<char *>(&id),
104 |                        sizeof(id));
105 |             Temporary temp(id);
106 |             return make_shared<Temporary>(temp);
107 |         }
108 | 
109 |         case ExpressionRegister: {
110 |             uint32_t offset;
111 |             input.read(reinterpret_cast<char *>(&offset),
112 |                        sizeof(offset));
113 |             Register temp(offset);
114 |             return make_shared<Register>(temp);
115 |         }
116 | 
117 |         case ExpressionIndirection: {
118 |             Indirection temp(unserialize(input));
119 |             return make_shared<Indirection>(temp);
120 |             break;
121 |         }
122 | 
123 |         case ExpressionOperation: {
124 |             OperationType op_type = static_cast<OperationType>(input.get());
125 |             Operation temp(unserialize(input),
126 |                            op_type,
127 |                            unserialize(input));
128 |             return make_shared<Operation>(temp);
129 |             break;
130 |         }
131 | 
132 |         default:
133 |             break;
134 |     }
135 | 
136 |     throw runtime_error("Do not know how to unserialize "\
137 |                         "expression type.");
138 | }
139 | 


--------------------------------------------------------------------------------
/src/state.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "state.h"
  3 | 
  4 | #include <memory>
  5 | #include <sstream>
  6 | 
  7 | using namespace std;
  8 | 
  9 | map<unsigned int, shared_ptr<Symbolic>> State::_initial_values = [] {
 10 |     map<unsigned int, shared_ptr<Symbolic>> result;
 11 | 
 12 |     for(const auto &r : AMD64_REGISTERS) {
 13 |         const auto &initial = make_shared<Symbolic>(format_initial_value(r));
 14 |         result[r] = initial;
 15 |     }
 16 | 
 17 |     return result;
 18 | }();
 19 | 
 20 | /*!
 21 |  * \brief Constructs a new `State` and initializes it (unless specified
 22 |  * otherwise).
 23 |  *
 24 |  * \param initialize `True` per default. Whether the state should be fully
 25 |  * initialized. \see `set_initial_state`
 26 |  *
 27 |  * \todo This is specific to x86_64. Possibly better to provide a generic
 28 |  * base class first.
 29 |  */
 30 | State::State(bool initialize)
 31 |     : _unknown(make_shared<Unknown>()) {
 32 |     if(initialize) {
 33 |         set_initial_state();
 34 |     }
 35 | }
 36 | 
 37 | /*!
 38 |  * \brief Prints the state to the given output stream.
 39 |  * \param stream The output stream to which the state is printed.
 40 |  * \param state The `State` itself.
 41 |  * \return The (modified) output stream `stream`.
 42 |  */
 43 | ostream &operator<<(ostream &stream, const State &state) {
 44 |     for(const auto &kv : state._state) {
 45 |         stream << *kv.first << " -> " << *kv.second << endl;
 46 |     }
 47 | 
 48 |     return stream;
 49 | }
 50 | 
 51 | const string State::format_initial_value(size_t offset) {
 52 |     // TODO: Precompute this.
 53 |     const auto &needle = AMD64_DISPLAY_REGISTERS.find(offset);
 54 |     if(needle != AMD64_DISPLAY_REGISTERS.cend()) {
 55 |         return "init_" + needle->second;
 56 |     }
 57 | 
 58 |     stringstream stream;
 59 |     string symbolic_value;
 60 | 
 61 |     stream << "init_r" << dec << offset;
 62 |     stream >> symbolic_value;
 63 |     return symbolic_value;
 64 | }
 65 | 
 66 | /*!
 67 |  * \brief Formats a return value of a call at the given address.
 68 |  * \param address The address of the call whose return value shall be formatted.
 69 |  * \return A unique string to be used for a symbol describing the return value
 70 |  * of the call.
 71 |  */
 72 | const string State::format_return_value(uintptr_t address) {
 73 |     stringstream stream;
 74 |     stream << "return_" << hex << address;
 75 | 
 76 |     return stream.str();
 77 | }
 78 | 
 79 | /*!
 80 |  * \brief Initializes the state in respect to x86_64 registers.
 81 |  *
 82 |  * Each register gets assigned a symbol depicting its initial value.
 83 |  *
 84 |  * \see `State::format_initial_value`
 85 |  */
 86 | void State::set_initial_state() {
 87 |     for(const auto &r : AMD64_REGISTERS) {
 88 |         // Copy necessary here?
 89 |         const auto &dst = make_shared<Register>(r);
 90 |         const auto &src = make_shared<Symbolic>(format_initial_value(r));
 91 | 
 92 |         _state[dst] = src;
 93 |     }
 94 | }
 95 | 
 96 | /*!
 97 |  * \brief Removes all System V scratch registers from the state.
 98 |  *
 99 |  * \todo Support different calling conventions.
100 |  * \see `system_v_scratch`
101 |  */
102 | void State::purge_scratch_registers(FileFormatType file_format) {
103 |     switch(file_format) {
104 |         case FileFormatELF64:
105 |             for(const auto &scratch : system_v_scratch) {
106 |                 _state.erase(scratch);
107 |             }
108 |             break;
109 |         case FileFormatPE64:
110 |             for(const auto &scratch : msvc_scratch) {
111 |                 _state.erase(scratch);
112 |             }
113 |             break;
114 |         default:
115 |             throw runtime_error("Do not know how to "\
116 |                                 "handle file format.");
117 |     }
118 | }
119 | 
120 | /*!
121 |  * \brief Merge another `State` into this one.
122 |  *
123 |  * Already existing entries are overwritten with the values of the other state.
124 |  *
125 |  * \param other The state that is merged into this.
126 |  */
127 | void State::merge(const State &other) {
128 |     for(const auto &kv : other._state) {
129 |         _state[kv.first] = kv.second;
130 |     }
131 | }
132 | 
133 | /*!
134 |  * \brief Helper function to return all memory indirections recorded in the
135 |  * state.
136 |  *
137 |  * The first entry of the pair denotes the memory address, whereas the second
138 |  * entry denotes the value that is to be written.
139 |  *
140 |  * \return A vector of key/value pairs describing a memory access.
141 |  */
142 | const Expressions State::get_memory_accesses() const {
143 |     Expressions result;
144 |     for(const auto &kv : _state) {
145 |         if(kv.first->type() == ExpressionIndirection) {
146 |             result.push_back(kv);
147 |         }
148 |     }
149 | 
150 |     return result;
151 | }
152 | 
153 | /*!
154 |  * \brief Helper function to optimize the representation of the state.
155 |  * \param do_purge_unchanged `true`, if unchanged registers (those still set to
156 |  * their initial value) shall be removed from state.
157 |  *
158 |  * \todo Purging unchanged registers may lead to issues regarding the binding
159 |  * of `rsp`. It is assumed to be set in some cases. We should rather check for
160 |  * existence and throw `runtime_error` on mismatch.
161 |  */
162 | void State::optimize(bool do_purge_unchanged) {
163 |     // Transitively kill expressions affected by a self-reference.
164 |     for(const auto &kv: _state) {
165 |         if(kv.second->contains(*kv.first)) {
166 |             kill(kv.first, kv.second);
167 |         }
168 |     }
169 | 
170 |     /* Purging unchanged registers will fail when propagating states (e.g.,
171 |      * an AbiHint requires rsp to be defined and cannot implement calling
172 |      * conventions properly if it has been purged. Disabled by default.
173 |      */
174 |     if(optimizer() && do_purge_unchanged) {
175 |         purge_unchanged();
176 |     }
177 | }
178 | 
179 | bool State::optimizer(bool do_purge_unchanged) {
180 |     bool dirty = false;
181 |     if(do_purge_unchanged) {
182 |         dirty |= purge_unchanged();
183 |     }
184 | 
185 |     dirty |= purge_uninteresting();
186 | 
187 |     optimize_entries();
188 |     dirty |= propagate();
189 | 
190 |     return dirty;
191 | }
192 | 
193 | bool State::propagate() {
194 |     bool dirty = false;
195 | 
196 |     // Propagate any values which are also keys in the same state.
197 |     for(const auto &kv : _state) {
198 |         const auto &value = kv.second;
199 | 
200 |         const auto &needle = _state.find(value);
201 |         if(needle != _state.cend()) {
202 |             _state[kv.first] = needle->second;
203 |             dirty = true;
204 |         }
205 |     }
206 | 
207 |     // Propagate sub-expressions.
208 |     for(const auto &kv : _state) {
209 |         for(const auto &p : _state) {
210 |             dirty |= p.first->propagate(kv.first, kv.second);
211 |             dirty |= p.second->propagate(kv.first, kv.second);
212 |         }
213 |     }
214 | 
215 |     return dirty;
216 | }
217 | 
218 | void State::optimize_entries() {
219 |     for(auto i = _state.begin(); i != _state.end(); ++i) {
220 |         i->first->optimize();
221 |         i->second->optimize();
222 |     }
223 | }
224 | 
225 | /* Set explicitly to Unknown instead of deleting and keep Unknown:s? As not to
226 |  * mess up logic trying to get a value regardless. Need to think about this.
227 |  */
228 | bool State::purge_uninteresting() {
229 |     bool dirty = false;
230 | 
231 |     for(auto i = _state.begin(); i != _state.end();) {
232 |         Expression &key = *i->first;
233 |         Expression &value = *i->second;
234 | 
235 |         if(key.type() == ExpressionTemporary) {
236 |             i = _state.erase(i);
237 |             dirty = true;
238 |             continue;
239 |         }
240 | 
241 |         // We want to keep Unknown:s for register values only.
242 |         if(value.type() == ExpressionUnknown &&
243 |            key.type() != ExpressionRegister) {
244 |             i = _state.erase(i);
245 |             dirty = true;
246 |             continue;
247 |         }
248 | 
249 |         if(key.type() == ExpressionRegister) {
250 |             auto reg = static_cast<const Register&>(key);
251 |             if(reg.offset() > OFFB_R15 && reg.offset() != OFFB_RIP) {
252 | 
253 |                 i = _state.erase(i);
254 |                 dirty = true;
255 |                 continue;
256 |             }
257 |         }
258 | 
259 |         ++i;
260 |     }
261 | 
262 |     return dirty;
263 | }
264 | 
265 | bool State::purge_unchanged() {
266 |     bool dirty = false;
267 | 
268 |     for(auto i = _state.begin(); i != _state.end();) {
269 |         Expression &key = *i->first;
270 |         Expression &value = *i->second;
271 | 
272 |         if(key.type() == ExpressionRegister) {
273 |             auto offset = static_cast<const Register&>(key).offset();
274 |             const auto &initial = _initial_values.find(offset);
275 | 
276 |             if(initial != _initial_values.cend()) {
277 |                 if(*initial->second == value) {
278 |                     i = _state.erase(i);
279 |                     dirty = true;
280 |                     continue;
281 |                 }
282 |             }
283 |         }
284 | 
285 |         ++i;
286 |     }
287 | 
288 |     return dirty;
289 | }
290 | 
291 | InternalState::iterator State::erase(const InternalState::iterator &iterator) {
292 |     return _state.erase(iterator);
293 | }
294 | 
295 | size_t State::erase(const InternalState::key_type &key) {
296 |     return _state.erase(key);
297 | }
298 | 
299 | bool State::find(const InternalState::key_type &key,
300 |                  arg_out InternalState::iterator &iterator) {
301 |     InternalState::iterator needle = _state.find(key);
302 |     if(needle == _state.end()) {
303 |         return false;
304 |     }
305 | 
306 |     iterator = needle;
307 |     return true;
308 | }
309 | 
310 | bool State::find(const InternalState::key_type &key,
311 |                  arg_out InternalState::const_iterator &iterator) const {
312 |     InternalState::const_iterator needle = _state.find(key);
313 |     if(needle == _state.cend()) {
314 |         return false;
315 |     }
316 | 
317 |     iterator = needle;
318 |     return true;
319 | }
320 | 
321 | kill_results State::kill_helper(const ExpressionPtr &key,
322 |                                 const ExpressionPtr &value) {
323 |     kill_results affected;
324 | 
325 |     for(auto i = _state.begin(), e = _state.end(); i != e; ++i) {
326 |         if(i->second->contains(*key) || i->second->contains(*value)) {
327 |             if(i->second->type() != ExpressionUnknown) {
328 |                 affected.insert(i->first);
329 |                 i->second = _unknown;
330 |             }
331 |         }
332 |     }
333 | 
334 |     return affected;
335 | }
336 | 
337 | void State::kill(const ExpressionPtr &key, const ExpressionPtr &value) {
338 |     _state[key] = _unknown;
339 | 
340 |     kill_results affected = { key };
341 |     while(!affected.empty()) {
342 |         kill_results work_list;
343 | 
344 |         for(const auto &a : affected) {
345 |             const auto &killed = kill_helper(a, value);
346 | 
347 |             for(const auto &k : killed) {
348 |                 work_list.insert(k);
349 |             }
350 |         }
351 | 
352 |         affected = work_list;
353 |     }
354 | }
355 | 
356 | void State::update(const InternalState::key_type &key,
357 |                    const InternalState::mapped_type &value) {
358 |     _state[key] = value;
359 | }
360 | 


--------------------------------------------------------------------------------
/src/vcall.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "vcall.h"
  3 | #include "expression.h"
  4 | 
  5 | using namespace std;
  6 | 
  7 | 
  8 | VCallFile::VCallFile(const string &module_name,
  9 |                      const VTableHierarchies &vtable_hierarchies,
 10 |                      const VTableFile &vtable_file)
 11 |     : _module_name(module_name),
 12 |       _vtable_hierarchies(vtable_hierarchies),
 13 |       _vtable_file(vtable_file) {}
 14 | 
 15 | 
 16 | const VCalls &VCallFile::get_vcalls() const {
 17 |     lock_guard<mutex> _(_mtx);
 18 | 
 19 |     return _vcalls;
 20 | }
 21 | 
 22 | 
 23 | void VCallFile::add_possible_vcall(uint64_t addr) {
 24 |     lock_guard<mutex> _(_mtx);
 25 | 
 26 |     _possible_vcalls.insert(addr);
 27 | }
 28 | 
 29 | 
 30 | void VCallFile::add_vcall(uint64_t addr, uint32_t index, size_t entry_index) {
 31 |     lock_guard<mutex> _(_mtx);
 32 | 
 33 |     // Check if virtual callsite is already known.
 34 |     for(auto &it : _vcalls) {
 35 |         if(it.addr == addr) {
 36 |             it.indexes.insert(index);
 37 | 
 38 |             // Do a sanity check that the entry indexes have not changed.
 39 |             // (Intuition: Can never be different for the same vcall).
 40 |             if(it.entry_index != entry_index) {
 41 |                 cerr << "Different entry index at vcall 0x"
 42 |                      << hex << addr << endl;
 43 |                 cerr << "Old entry index: "
 44 |                      << dec << it.entry_index << endl;
 45 |                 cerr << "New entry index: "
 46 |                      << dec << entry_index << endl;
 47 |                 throw runtime_error("Different vtable entry indexes "\
 48 |                                     "for same vcall.");
 49 |             }
 50 | 
 51 |             return;
 52 |         }
 53 |     }
 54 | 
 55 |     VCall vcall;
 56 |     vcall.indexes.insert(index);
 57 |     vcall.addr = addr;
 58 |     vcall.entry_index = entry_index;
 59 |     _vcalls.push_back(vcall);
 60 | }
 61 | 
 62 | 
 63 | void VCallFile::export_vcalls(const string &target_dir) {
 64 |     lock_guard<mutex> _(_mtx);
 65 | 
 66 |     stringstream temp_str;
 67 |     temp_str << target_dir << "/" << _module_name << ".vcalls";
 68 |     string target_file = temp_str.str();
 69 | 
 70 |     ofstream vcall_file;
 71 |     vcall_file.open(target_file);
 72 | 
 73 |     stringstream temp_str_ext;
 74 |     temp_str_ext << target_dir << "/" << _module_name << ".vcalls_extended";
 75 |     string target_file_ext = temp_str_ext.str();
 76 | 
 77 |     ofstream vcall_file_ext;
 78 |     vcall_file_ext.open(target_file_ext);
 79 | 
 80 |     vcall_file << _module_name << endl;
 81 |     vcall_file_ext << _module_name << endl;
 82 | 
 83 |     const HierarchiesVTable &hierarchies =
 84 |                             _vtable_hierarchies.get_hierarchies();
 85 |     for(const auto &it : _vcalls) {
 86 | 
 87 |         // Do not consider all vtables used in this vcall as in one hierarchy.
 88 |         unordered_set<uint32_t> allowed_vtables;
 89 |         for(const auto idx : it.indexes) {
 90 |             for(const auto dependent_vtbls : hierarchies) {
 91 |                 if(dependent_vtbls.find(idx) != dependent_vtbls.cend()) {
 92 |                     for(uint32_t hier_idx : dependent_vtbls) {
 93 |                         allowed_vtables.insert(hier_idx);
 94 |                     }
 95 |                 }
 96 |             }
 97 | 
 98 |             // Add vtable index manually afterwards in order to also export
 99 |             // vtables that do not belong to a hierarchy.
100 |             allowed_vtables.insert(idx);
101 |         }
102 | 
103 |         // Address of vcall in module.
104 |         vcall_file << hex << it.addr;
105 |         vcall_file_ext << hex << it.addr;
106 | 
107 |         // Index into vtable that is used by vcall.
108 |         vcall_file_ext << " " << hex << it.entry_index;
109 | 
110 |         // Export the hierarchy in the following format:
111 |         // <module_name:hex_addr_vtable> <module_name:hex_addr_function>
112 |         for(const auto idx : allowed_vtables) {
113 |             const VTable& temp = _vtable_file.get_vtable(idx);
114 | 
115 |             // Export vtable address.
116 |             vcall_file << " "
117 |                        << temp.module_name
118 |                        << ":"
119 |                        << hex << temp.addr;
120 |             vcall_file_ext << " "
121 |                            << temp.module_name
122 |                            << ":"
123 |                            << hex << temp.addr;
124 | 
125 |             // Export target function address.
126 |             uint64_t target_func = 0;
127 |             if(temp.entries.size() > it.entry_index) {
128 |                 target_func = temp.entries.at(it.entry_index);
129 |             }
130 |             vcall_file_ext << " "
131 |                            << temp.module_name
132 |                            << ":"
133 |                            << hex << target_func;
134 |         }
135 | 
136 |         vcall_file << endl;
137 |         vcall_file_ext << endl;
138 |     }
139 | 
140 |     vcall_file.close();
141 |     vcall_file_ext.close();
142 | 
143 |     stringstream temp_str_poss;
144 |     temp_str_poss << target_dir << "/" << _module_name << ".vcalls_possible";
145 |     string target_file_poss = temp_str_poss.str();
146 | 
147 |     ofstream vcall_file_poss;
148 |     vcall_file_poss.open(target_file_poss);
149 | 
150 |     vcall_file_poss << _module_name << endl;
151 | 
152 |     for(const auto &it : _possible_vcalls) {
153 | 
154 |         // Address of possible vcall in module.
155 |         vcall_file_poss << hex << it << endl;
156 |     }
157 | 
158 |     vcall_file_poss.close();
159 | }
160 | 


--------------------------------------------------------------------------------
/src/vex.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "vex.h"
  3 | 
  4 | #include <cstdio>
  5 | #include <cstring>
  6 | #include <sstream>
  7 | #include <ostream>
  8 | #include <stdexcept>
  9 | 
 10 | using namespace std;
 11 | 
 12 | Vex::Vex()
 13 |     : _block(nullptr) {
 14 | 
 15 |     AllocationListener listener = &Vex::incoming_allocation;
 16 |     LibVEX_registerAllocationListener(this, listener);
 17 | 
 18 |     LibVEX_default_VexControl(&_control);
 19 | 
 20 |     _control.iropt_level = 2;
 21 |     _control.iropt_verbosity = 0;
 22 |     _control.iropt_unroll_thresh = 0;
 23 | 
 24 |     _control.guest_chase_thresh = 0;
 25 |     _control.guest_max_insns = MAX_INSTRUCTIONS;
 26 | 
 27 |     LibVEX_Init(&Vex::failure_exit, &Vex::log_bytes, 0, &_control);
 28 | }
 29 | 
 30 | Vex::~Vex() {
 31 |     for(auto allocation : _allocations) {
 32 |         free(allocation);
 33 |     }
 34 | }
 35 | 
 36 | void Vex::incoming_allocation(void *user, void *allocation) {
 37 |     auto self = reinterpret_cast<Vex*>(user);
 38 |     self->manage_allocation(allocation);
 39 | }
 40 | 
 41 | void Vex::manage_allocation(void *allocation) {
 42 |     _allocations.push_back(allocation);
 43 | }
 44 | 
 45 | // FIXME: This is specific to AMD-64.
 46 | void Vex::initialize() {
 47 |     memset(&_args, 0, sizeof(_args));
 48 |     memset(&_abi_info, 0, sizeof(_abi_info));
 49 |     memset(&_arch_info, 0, sizeof(_arch_info));
 50 | 
 51 |     LibVEX_default_VexAbiInfo(&_abi_info);
 52 |     LibVEX_default_VexArchInfo(&_arch_info);
 53 | 
 54 |     _abi_info.guest_amd64_assume_fs_is_const = true;
 55 |     _abi_info.guest_amd64_assume_gs_is_const = true;
 56 | 
 57 |     _args.callback_opaque = this;
 58 | 
 59 |     _args.instrument1 = &Vex::instrument;
 60 |     _args.chase_into_ok = &Vex::chase_into_ok;
 61 |     _args.needs_self_check = &Vex::needs_self_check;
 62 | 
 63 |     const auto dispatch = reinterpret_cast<void*>(&Vex::dispatch);
 64 |     _args.disp_cp_chain_me_to_fastEP = dispatch;
 65 |     _args.disp_cp_chain_me_to_slowEP = dispatch;
 66 |     _args.disp_cp_xassisted = dispatch;
 67 |     _args.disp_cp_xindir = dispatch;
 68 | 
 69 |     _args.guest_extents = &_guest_extents;
 70 | }
 71 | 
 72 | void Vex::initialize_amd64() {
 73 |     initialize();
 74 | 
 75 |     _arch_info.endness = VexEndnessLE;
 76 |     _arch_info.hwcaps =  VEX_HWCAPS_AMD64_SSE3 |
 77 |             VEX_HWCAPS_AMD64_CX16 |
 78 |             VEX_HWCAPS_AMD64_LZCNT |
 79 |             VEX_HWCAPS_AMD64_AVX |
 80 |             VEX_HWCAPS_AMD64_RDTSCP |
 81 |             VEX_HWCAPS_AMD64_BMI |
 82 |             VEX_HWCAPS_AMD64_AVX2;
 83 | 
 84 |     _abi_info.guest_stack_redzone_size = 128;
 85 | 
 86 |     _args.arch_host = VexArchAMD64;
 87 |     _args.arch_guest = VexArchAMD64;
 88 | 
 89 |     _args.archinfo_host = _arch_info;
 90 |     _args.archinfo_guest = _arch_info;
 91 | 
 92 |     _args.abiinfo_both = _abi_info;
 93 | }
 94 | 
 95 | void Vex::log_bytes(const char *bytes, size_t number_bytes) {
 96 |     for(auto i = 0u; i < number_bytes; ++i) {
 97 |         printf("%c", bytes[i]);
 98 |     }
 99 | }
100 | 
101 | IRSB *Vex::instrument(void *callback_opaque, IRSB *block,
102 |                             const VexGuestLayout*, const VexGuestExtents*,
103 |                             const VexArchInfo*, IRType, IRType) {
104 | 
105 |     Vex &self = *static_cast<Vex*>(callback_opaque);
106 |     self._block = deepCopyIRSB(block);
107 | 
108 |     return block;
109 | }
110 | 
111 | /*!
112 |  * \brief Translates bytes at a certain address into a VEX block of type IRSB.
113 |  *
114 |  * Translates the bytes given by array `bytes` which is assumed to lie at
115 |  * virtual address `guest_address`. Outputs the virtual address of the end of
116 |  * the translated block in parameter `vex_block_end`.
117 |  *
118 |  * \param bytes The bytes that are to be processed.
119 |  * \param guest_address The virtual address the bytes originally lie at.
120 |  * \param instruction_count The number of instructions VEX shall translate.
121 |  * \param[out] vex_block_end The virtual address of the end of the translated
122 |  *  block.
123 |  * \return A reference to the translated VEX block (of type IRSB). Due to the
124 |  * way VEX works internally, this reference lives as long as no further
125 |  * translation request is made and hence should be deep-copied immediately.
126 |  *
127 |  * \todo VEX may not respect `instruction_count` properly. This should be
128 |  * handled by the `Translator` class though.
129 |  */
130 | const IRSB &Vex::translate(const uint8_t *bytes, uintptr_t guest_address,
131 |                            size_t instruction_count,
132 |                            arg_out uintptr_t *vex_block_end) {
133 |     initialize_amd64();
134 |     _control.guest_max_insns = instruction_count;
135 | 
136 |     _args.guest_bytes = bytes;
137 |     _args.guest_bytes_addr = guest_address;
138 | 
139 |     const auto result = LibVEX_Translate(&_args);
140 |     if(!(result.status & result.VexTransOK)) {
141 |         stringstream stream;
142 |         stream << "Cannot translate code at address "
143 |                << hex << reinterpret_cast<uintptr_t>(bytes)
144 |                << " (guest address " << hex << guest_address << ").";
145 | 
146 |         throw runtime_error(stream.str());
147 |     }
148 | 
149 |     if(vex_block_end) {
150 |         // FIXME: Assert only one guest extent was used.
151 |         *vex_block_end = guest_address + _args.guest_extents->len[0];
152 |     }
153 | 
154 |     return *_block;
155 | }
156 | 


--------------------------------------------------------------------------------
/src/vtable_file.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "vtable_file.h"
  3 | 
  4 | using namespace std;
  5 | 
  6 | /*!
  7 |  * \brief Constructs a new `VtableFile` object.
  8 |  * \param vtable_file The filename of the `_vtables.txt` file (as produced
  9 |  * by the exporter script).
 10 |  */
 11 | VTableFile::VTableFile(const string &this_module_name) {
 12 |     _this_module_name = this_module_name;
 13 |     _vtables.clear();
 14 |     _index = 0;
 15 | }
 16 | 
 17 | bool VTableFile::parse(const string &vtables_file) {
 18 | 
 19 |     // Make sure that we parse files only if object was not finalized yet.
 20 |     if(_is_finalized) {
 21 |         throw runtime_error("Parse attempt after VTableFile object was"\
 22 |                             " finalized.");
 23 |     }
 24 | 
 25 |     ifstream file(vtables_file + "_vtables.txt");
 26 |     if(!file) {
 27 |         return false;
 28 |     }
 29 | 
 30 |     string line;
 31 | 
 32 |     // Parse first line manually.
 33 |     getline(file, line);
 34 |     istringstream header_parser(line);
 35 | 
 36 |     // First entry of file is always the module name.
 37 |     string module_name;
 38 |     header_parser >> module_name;
 39 |     if(header_parser.fail()) {
 40 |         return false;
 41 |     }
 42 | 
 43 |     // Check if we already parsed a vtables file for this module.
 44 |     if(_managed_modules.find(module_name) != _managed_modules.cend()) {
 45 |         throw runtime_error("A vtables file for this module was already "\
 46 |                             "parsed.");
 47 |     }
 48 | 
 49 |     bool has_vtables = false;
 50 |     while(getline(file, line)) {
 51 |         has_vtables = true;
 52 |         istringstream parser(line);
 53 |         uint64_t vtable_addr = 0;
 54 |         uint64_t vtable_entry = 0;
 55 |         int offset_to_top = 0;
 56 | 
 57 |         parser >> hex >> vtable_addr;
 58 |         if(parser.fail()) {
 59 |             return false;
 60 |         }
 61 | 
 62 |         parser >> dec >> offset_to_top;
 63 |         if(parser.fail()) {
 64 |             return false;
 65 |         }
 66 | 
 67 |         VTable vtable;
 68 |         vtable.addr = vtable_addr;
 69 |         vtable.offset_to_top = offset_to_top;
 70 |         vtable.module_name = module_name;
 71 | 
 72 |         // NOTE: Index is a unique identifier for all vtables in all modules.
 73 |         vtable.index = _index;
 74 | 
 75 |         while(parser >> hex >> vtable_entry) {
 76 |             if(parser.fail()) {
 77 |                 return false;
 78 |             }
 79 | 
 80 |             vtable.entries.push_back(vtable_entry);
 81 |         }
 82 | 
 83 |         _vtables.push_back(vtable);
 84 |         assert(_vtables[_index].module_name == vtable.module_name
 85 |                && _vtables[_index].addr == vtable.addr
 86 |                && _vtables[_index].index == vtable.index
 87 |                && "Index of vtable and index in vector are not the same.");
 88 | 
 89 |         _index++;
 90 |     }
 91 | 
 92 |     // Only add module to managed modules if it has at least one vtable.
 93 |     if(has_vtables) {
 94 |         _managed_modules.insert(module_name);
 95 |     }
 96 | 
 97 |     return true;
 98 | }
 99 | 
100 | 
101 | const VTableMap& VTableFile::get_this_vtables() const {
102 | 
103 |     // Make sure that the object is finalized.
104 |     if(!_is_finalized) {
105 |         throw runtime_error("VTableFile object was not finalized.");
106 |     }
107 | 
108 |     return *(_module_vtables_map.at(_this_module_name));
109 | }
110 | 
111 | 
112 | const VTableMap& VTableFile::get_vtables(const string &module_name) const {
113 | 
114 |     // Make sure that the object is finalized.
115 |     if(!_is_finalized) {
116 |         throw runtime_error("VTableFile object was not finalized.");
117 |     }
118 | 
119 |     if(_module_vtables_map.find(module_name) == _module_vtables_map.cend()) {
120 |         throw runtime_error("VTableFile object does not know module name.");
121 |     }
122 | 
123 |     return *(_module_vtables_map.at(module_name));
124 | }
125 | 
126 | 
127 | const VTableVector& VTableFile::get_all_vtables() const {
128 | 
129 |     // Make sure that the object is finalized.
130 |     if(!_is_finalized) {
131 |         throw runtime_error("VTableFile object was not finalized.");
132 |     }
133 | 
134 |     return _vtables;
135 | }
136 | 
137 | 
138 | void VTableFile::finalize() {
139 | 
140 |     // Make sure that we only finalize this object once.
141 |     if(_is_finalized) {
142 |         throw runtime_error("VTableFile object was already finalized.");
143 |     }
144 |     _is_finalized = true;
145 | 
146 |     if(_managed_modules.find(_this_module_name) == _managed_modules.cend()) {
147 |         throw runtime_error("VTableFile object has no data for the "\
148 |                             "module to analyze.");
149 |     }
150 | 
151 |     // Build up a vector that contains a mapping for each module
152 |     // that maps from vtable address to vtable object.
153 |     uint32_t idx = 0;
154 |     for(auto &module_it : _managed_modules) {
155 |         for(auto &vtbl_it : _vtables) {
156 |             if(vtbl_it.module_name != module_it) {
157 |                 continue;
158 |             }
159 | 
160 |             if(_module_vtables.size() <= idx) {
161 |                 VTableMap temp;
162 |                 temp[vtbl_it.addr] = &vtbl_it;
163 |                 _module_vtables.push_back(temp);
164 |             }
165 |             else {
166 |                 _module_vtables[idx][vtbl_it.addr] = &vtbl_it;
167 |             }
168 |         }
169 |         idx++;
170 |     }
171 | 
172 |     // Build up a mapping that maps a module name to its vtable address
173 |     // to vtable object map.
174 |     idx = 0;
175 |     for(auto &module_it : _managed_modules) {
176 |         _module_vtables_map[module_it] = &_module_vtables[idx];
177 |         idx++;
178 |     }
179 | 
180 |     // Sanity check if module mapping is completely correct
181 |     // (Added for now to exclude this as error source)
182 |     for(auto &module_it : _managed_modules) {
183 |         const auto &vtable_map = *(_module_vtables_map.at(module_it));
184 |         for(const auto &vtbl_kv : vtable_map) {
185 |             if(vtbl_kv.second->module_name != module_it) {
186 |                 throw runtime_error("Error while finalizing vtable mapping.");
187 |             }
188 |         }
189 |     }
190 | 
191 |     return;
192 | }
193 | 
194 | 
195 | bool VTableFile::is_finalized() const {
196 |     return _is_finalized;
197 | }
198 | 
199 | 
200 | const VTable* VTableFile::get_vtable_ptr(const std::string &module_name,
201 |                                         uint64_t addr) const {
202 | 
203 |     // Make sure that the object is finalized.
204 |     if(!_is_finalized) {
205 |         throw runtime_error("VTableFile object was not finalized.");
206 |     }
207 | 
208 |     if(_module_vtables_map.at(module_name)->find(addr)
209 |             != _module_vtables_map.at(module_name)->cend()) {
210 | 
211 |         return (_module_vtables_map.at(module_name)->at(addr));
212 |     }
213 |     return nullptr;
214 | }
215 | 
216 | 
217 | const VTable& VTableFile::get_vtable(const std::string &module_name,
218 |                                      uint64_t addr) const {
219 | 
220 |     // Make sure that the object is finalized.
221 |     if(!_is_finalized) {
222 |         throw runtime_error("VTableFile object was not finalized.");
223 |     }
224 | 
225 |     return *(_module_vtables_map.at(module_name)->at(addr));
226 | 
227 | }
228 | 
229 | 
230 | const VTable& VTableFile::get_vtable(uint32_t index) const {
231 | 
232 |     // Make sure that the object is finalized.
233 |     if(!_is_finalized) {
234 |         throw runtime_error("VTableFile object was not finalized.");
235 |     }
236 | 
237 |     if(_vtables.size() <= index) {
238 |         throw runtime_error("Vtable index is out of range.");
239 |     }
240 | 
241 |     return _vtables[index];
242 | }
243 | 


--------------------------------------------------------------------------------
/src/vtable_update.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "vtable_update.h"
  3 | 
  4 | 
  5 | using namespace std;
  6 | 
  7 | #define DEBUG_PRINT_UPDATES 0
  8 | 
  9 | FctVTableUpdates::FctVTableUpdates(VTableFile &vtable_file,
 10 |                                    const string &module_name)
 11 |     : _vtable_file(vtable_file),
 12 |       _module_name(module_name) {
 13 | 
 14 | }
 15 | 
 16 | 
 17 | void FctVTableUpdates::add_vtable_updates(uint64_t fct_addr,
 18 |                                          const VTableUpdates &vtable_updates) {
 19 |     lock_guard<mutex> _(_mtx);
 20 | 
 21 |     if(_this_vtable_updates.find(fct_addr) == _this_vtable_updates.cend()) {
 22 |         _this_vtable_updates[fct_addr] = vtable_updates;
 23 |     }
 24 |     else {
 25 |         for(const auto &it : vtable_updates) {
 26 |             _this_vtable_updates[fct_addr].push_back(it);
 27 |         }
 28 |     }
 29 | }
 30 | 
 31 | 
 32 | void FctVTableUpdates::export_vtable_updates(const string &target_dir) {
 33 |     lock_guard<mutex> _(_mtx);
 34 | 
 35 |     stringstream temp_str;
 36 |     temp_str << target_dir << "/" << _module_name << ".vtableupdates";
 37 |     string target_file = temp_str.str();
 38 | 
 39 |     ofstream update_file;
 40 |     update_file.open(target_file);
 41 | 
 42 |     update_file << _module_name << endl;
 43 | 
 44 |     for(const auto &it : _this_vtable_updates) {
 45 |         uint64_t fct_addr = it.first;
 46 | 
 47 |         // Get all vtable updates that can be exported.
 48 |         VTableUpdates exportable_vtable_updates;
 49 |         for(const auto &vtable_update : it.second) {
 50 | 
 51 |             // Convert base expression ptr to string for export.
 52 |             ExpressionPtr base = vtable_update.base;
 53 |             string base_str;
 54 |             if(!convert_expression_str(base, base_str)) {
 55 |                 continue;
 56 |             }
 57 |             exportable_vtable_updates.push_back(vtable_update);
 58 |         }
 59 | 
 60 |         // Ignore functions that do not have any vtable updates.
 61 |         if(exportable_vtable_updates.size() == 0) {
 62 |             continue;
 63 |         }
 64 | 
 65 |         update_file << hex << fct_addr
 66 |                     << " ";
 67 | 
 68 |         // Export all vtable updates for this function.
 69 |         for(const auto &vtable_update : exportable_vtable_updates) {
 70 | 
 71 |             // Convert base expression ptr to string for export.
 72 |             ExpressionPtr base = vtable_update.base;
 73 |             string base_str;
 74 |             if(!convert_expression_str(base, base_str)) {
 75 |                 throw runtime_error("Not able to convert vtable update base"\
 76 |                                     "to string.");
 77 |             }
 78 | 
 79 |             const VTable &vtable = _vtable_file.get_vtable(vtable_update.index);
 80 |             uint64_t vtable_addr = vtable.addr;
 81 |             const string module_name = vtable.module_name;
 82 |             size_t offset = vtable_update.offset;
 83 | 
 84 |             update_file << module_name
 85 |                         << ":"
 86 |                         << hex << vtable_addr
 87 |                         << ":"
 88 |                         << base_str
 89 |                         << ":"
 90 |                         << dec << offset
 91 |                         << " ";
 92 | 
 93 | #if DEBUG_PRINT_UPDATES
 94 |             cout << "Fct Addr: 0x" << hex << fct_addr << endl;
 95 |             cout << "Module Name: " << module_name << endl;
 96 |             cout << "VTable Addr: 0x" << hex << vtable_addr << endl;
 97 |             cout << "Base: " << base_str << endl;
 98 |             cout << "Offset: 0x" << hex << offset << endl;
 99 | #endif
100 | 
101 |         }
102 |         update_file << endl;
103 |     }
104 |     update_file.close();
105 | }
106 | 
107 | 
108 | // Convert expression to string (only consider System V argument register
109 | // for now).
110 | bool FctVTableUpdates::convert_expression_str(ExpressionPtr base,
111 |                                               string &base_str) {
112 | 
113 |     if(*_rdi == *base) {
114 |         base_str = "RDI";
115 |     }
116 |     else if(*_rsi == *base) {
117 |         base_str = "RSI";
118 |     }
119 |     else if(*_rdx == *base) {
120 |         base_str = "RDX";
121 |     }
122 |     else if(*_rcx == *base) {
123 |         base_str = "RCX";
124 |     }
125 |     else if(*_r8 == *base) {
126 |         base_str = "R8";
127 |     }
128 |     else if(*_r9 == *base) {
129 |         base_str = "R9";
130 |     }
131 |     else {
132 |         return false;
133 |     }
134 | 
135 |     return true;
136 | }
137 | 
138 | 
139 | // Convert string to expression (only consider System V argument register
140 | // for now).
141 | bool FctVTableUpdates::convert_str_expression(const string &base_str,
142 |                                               ExpressionPtr &base) {
143 | 
144 |     if("RDI" == base_str) {
145 |         base = _rdi;
146 |     }
147 |     else if("RSI" == base_str) {
148 |         base = _rsi;
149 |     }
150 |     else if("RDX" == base_str) {
151 |         base = _rdx;
152 |     }
153 |     else if("RCX" == base_str) {
154 |         base = _rcx;
155 |     }
156 |     else if("R8" == base_str) {
157 |         base = _r8;
158 |     }
159 |     else if("R9" == base_str) {
160 |         base = _r9;
161 |     }
162 |     else {
163 |         return false;
164 |     }
165 | 
166 |     return true;
167 | }
168 | 
169 | 
170 | const VTableUpdates* FctVTableUpdates::get_vtable_updates(
171 |                                         const string &module_name,
172 |                                         uint64_t fct_addr) const {
173 |     lock_guard<mutex> _(_mtx);
174 | 
175 |     // Differentiate between the module that is currently analyzed and
176 |     // the imported modules.
177 |     if(_module_name == module_name) {
178 |         if(_this_vtable_updates.find(fct_addr) == _this_vtable_updates.cend()) {
179 |             return nullptr;
180 |         }
181 |         const VTableUpdates *temp = &(_this_vtable_updates.at(fct_addr));
182 |         return temp;
183 |     }
184 | 
185 |     if(_external_vtable_updates.find(module_name) ==
186 |             _external_vtable_updates.cend()) {
187 |         return nullptr;
188 |     }
189 |     const VTableUpdatesMap &vtable_updates_map =
190 |             _external_vtable_updates.at(module_name);
191 | 
192 |     if(vtable_updates_map.find(fct_addr) == vtable_updates_map.cend()) {
193 |         return nullptr;
194 |     }
195 |     const VTableUpdates *temp = &(vtable_updates_map.at(fct_addr));
196 |     return temp;
197 | }
198 | 
199 | 
200 | void FctVTableUpdates::import_updates(const string &target_file) {
201 |     lock_guard<mutex> _(_mtx);
202 | 
203 |     ifstream file(target_file + ".vtableupdates");
204 |     if(!file) {
205 |         throw runtime_error("Opening vtable update file failed.");
206 |     }
207 | 
208 |     VTableUpdatesMap vtable_updates_map;
209 |     string line;
210 | 
211 |     // Parse first line manually.
212 |     getline(file, line);
213 |     istringstream header_parser(line);
214 | 
215 |     // First entry of file is always the module name.
216 |     string import_module_name;
217 |     header_parser >> import_module_name;
218 |     if(header_parser.fail()) {
219 |         throw runtime_error("Parsing vtable update file failed.");
220 |     }
221 | 
222 |     // Parse each vtable update line which is given in the following form:
223 |     // <fct_addr_hex> <vtable_update_entry_1> ... <vtable_update_entry_n>
224 |     while(getline(file, line)) {
225 |         istringstream parser(line);
226 |         string update_entry;
227 |         VTableUpdates imported_updates;
228 | 
229 |         uint64_t fct_addr;
230 |         parser >> hex >> fct_addr;
231 |         if(parser.fail()) {
232 |             throw runtime_error("Parsing vtable update file failed.");
233 |         }
234 | 
235 |         // Parse each vtable update entry which is given in the following form:
236 |         // <module_name>:<vtable_addr_hex>:<arg_reg>:<offset_dec>
237 |         while(parser >> update_entry) {
238 |             if(parser.fail()) {
239 |                 throw runtime_error("Parsing vtable update file failed.");
240 |             }
241 | 
242 |             string module_name;
243 |             string vtable_addr_str;
244 |             string arg_reg_str;
245 |             string offset_str;
246 |             uint64_t vtable_addr;
247 |             size_t offset;
248 | 
249 |             istringstream parser_entry(update_entry);
250 |             if(parser_entry.fail()) {
251 |                 throw runtime_error("Parsing vtable update file failed.");
252 |             }
253 |             getline(parser_entry, module_name, ':');
254 |             getline(parser_entry, vtable_addr_str, ':');
255 |             getline(parser_entry, arg_reg_str, ':');
256 |             getline(parser_entry, offset_str, ':');
257 | 
258 |             istringstream parser_vtable_addr(vtable_addr_str);
259 |             if(parser_vtable_addr.fail()) {
260 |                 throw runtime_error("Parsing vtable update file failed.");
261 |             }
262 |             parser_vtable_addr >> hex >> vtable_addr;
263 | 
264 |             istringstream parser_offset(offset_str);
265 |             if(parser_offset.fail()) {
266 |                 throw runtime_error("Parsing vtable update file failed.");
267 |             }
268 |             parser_offset >> dec >> offset;
269 | 
270 |             // Convert read data into the local data structure.
271 |             const VTable &vtable = _vtable_file.get_vtable(module_name,
272 |                                                            vtable_addr);
273 | 
274 |             ExpressionPtr base;
275 |             if(!convert_str_expression(arg_reg_str, base)) {
276 |                 throw runtime_error("Parsing vtable update file failed.");
277 |             }
278 | 
279 |             VTableUpdate vtable_update;
280 |             vtable_update.index = vtable.index;
281 |             vtable_update.offset = offset;
282 |             vtable_update.base = base;
283 |             imported_updates.push_back(vtable_update);
284 |         }
285 |         vtable_updates_map[fct_addr] = imported_updates;
286 |     }
287 | 
288 |     _external_vtable_updates[import_module_name] = vtable_updates_map;
289 | }
290 | 


--------------------------------------------------------------------------------
/src/vtv_vcall_gt.cpp:
--------------------------------------------------------------------------------
 1 | #include "vtv_vcall_gt.h"
 2 | 
 3 | using namespace std;
 4 | 
 5 | 
 6 | VTVVcallsFile::VTVVcallsFile(const string &module_name)
 7 |     : _module_name(module_name) {}
 8 | 
 9 | 
10 | void VTVVcallsFile::add_vtv_vcalls(const VTVVcalls &vtv_vcalls) {
11 | 
12 |     for(const auto &it : vtv_vcalls) {
13 |         uint64_t verify_addr = it.second.addr_verify_call;
14 |         if(_vtv_vcalls.find(verify_addr) != _vtv_vcalls.cend()) {
15 |             for(uint64_t it_addr : it.second.addr_vcalls) {
16 |                 _vtv_vcalls[verify_addr].addr_vcalls.insert(it_addr);
17 |             }
18 |         }
19 |         else {
20 |             VTVVcall temp;
21 |             temp.addr_verify_call = verify_addr;
22 |             temp.vtbl_obj = nullptr;
23 |             temp.addr_vcalls = it.second.addr_vcalls;
24 |             _vtv_vcalls[verify_addr] = temp;
25 |         }
26 |     }
27 | }
28 | 
29 | 
30 | void VTVVcallsFile::export_vtv_vcalls(const string &target_dir) {
31 | 
32 |     stringstream temp_str;
33 |     temp_str << target_dir << "/" << _module_name << ".vtv_vcalls";
34 |     string target_file = temp_str.str();
35 | 
36 |     ofstream vtv_file;
37 |     vtv_file.open(target_file);
38 | 
39 |     vtv_file << _module_name << endl;
40 | 
41 |     for(const auto &it_vtv : _vtv_vcalls) {
42 |         vtv_file << hex << it_vtv.second.addr_verify_call;
43 | 
44 |         for(uint64_t vcall_addr : it_vtv.second.addr_vcalls) {
45 |             vtv_file << " " << hex << vcall_addr;
46 |         }
47 |         vtv_file << endl;
48 |     }
49 | 
50 |     vtv_file.close();
51 | }
52 | 
53 | 
54 | const VTVVcalls& VTVVcallsFile::get_vtv_vcalls() const {
55 |     return _vtv_vcalls;
56 | }
57 | 


--------------------------------------------------------------------------------