├── .gitignore ├── __init__.py ├── tests ├── test1 ├── test2 ├── test1.c ├── test2.c └── test3.c ├── .gitmodules ├── DiffView.py ├── binjaplug.py ├── ui.py ├── LICENSE ├── README.md ├── dockwidgets ├── widget.py ├── DiffWidget.py └── DiffView.py ├── functionTypes.py ├── diff.py └── instructionComparator.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from . import DiffView 2 | 3 | dv = DiffView.DiffView() -------------------------------------------------------------------------------- /tests/test1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riverloopsec/ninjadiff/HEAD/tests/test1 -------------------------------------------------------------------------------- /tests/test2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riverloopsec/ninjadiff/HEAD/tests/test2 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "hashashin"] 2 | path = hashashin 3 | url = git@github.com:riverloopsec/hashashin.git 4 | -------------------------------------------------------------------------------- /DiffView.py: -------------------------------------------------------------------------------- 1 | from . import ui 2 | 3 | 4 | class DiffView: 5 | def __init__(self): 6 | ui.initialize_ui() -------------------------------------------------------------------------------- /tests/test1.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void function1(int num) { 4 | for(int i = 0; i < num; ++i) { 5 | if(i % 2 == 0) { 6 | printf("%d", i); 7 | } 8 | printf("hello world!\n"); 9 | } 10 | } 11 | 12 | void function2() { 13 | int i = 0; 14 | while(i < 50) { 15 | ++i; 16 | if (i > 40) { 17 | printf("%p", &i); 18 | } 19 | } 20 | } 21 | 22 | int main() { 23 | function1(10); 24 | function2(); 25 | } -------------------------------------------------------------------------------- /tests/test2.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void function1(int num) { 4 | for(int i = 0; i < num; ++i) { 5 | if(i % 2 == 0) { 6 | printf("%d", i); 7 | } 8 | printf("hello world!\n"); 9 | } 10 | } 11 | 12 | void function2() { 13 | int i = 50; 14 | while(i > 0) { 15 | --i; 16 | if (i > 40) { 17 | printf("%p", &i); 18 | } 19 | } 20 | } 21 | 22 | int main() { 23 | function1(10); 24 | function2(); 25 | } -------------------------------------------------------------------------------- /tests/test3.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void function1(int num) { 4 | for(int i = 0; i < num; ++i) { 5 | if(i % 2 == 0) { 6 | printf("%d", i); 7 | } 8 | printf("hello world!\n"); 9 | } 10 | } 11 | 12 | void function2() { 13 | int i = 0; 14 | while(i < 50) { 15 | ++i; 16 | if (i > 40) { 17 | printf("%p", &i); 18 | } 19 | } 20 | } 21 | 22 | void * function3(void * val) { 23 | return val; 24 | } 25 | 26 | int main() { 27 | function1(10); 28 | function2(); 29 | function3(&function1); 30 | } -------------------------------------------------------------------------------- /binjaplug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import time 4 | import platform 5 | import sys 6 | import traceback 7 | import tempfile 8 | 9 | from binaryninja import Architecture, BinaryView, Symbol, SymbolType, Type, Structure, StructureType, FunctionGraphType, \ 10 | LowLevelILOperation, MediumLevelILOperation, core_ui_enabled 11 | 12 | if core_ui_enabled(): 13 | try: 14 | # create the widgets, debugger, etc. 15 | from .ui import initialize_ui 16 | 17 | initialize_ui() 18 | have_ui = True 19 | except (ModuleNotFoundError, ImportError, IndexError) as e: 20 | have_ui = False 21 | print(e) 22 | print("Could not initialize UI, using headless mode only") 23 | else: 24 | have_ui = False -------------------------------------------------------------------------------- /ui.py: -------------------------------------------------------------------------------- 1 | from PySide6.QtCore import Qt 2 | from binaryninja.plugin import PluginCommand 3 | from binaryninja import execute_on_main_thread_and_wait 4 | from binaryninjaui import DockHandler, ViewType 5 | from .dockwidgets import DiffWidget, DiffView, widget 6 | 7 | 8 | def cb_diff(bv): 9 | def switch_view(): 10 | dh = DockHandler.getActiveDockHandler() 11 | vf = dh.getViewFrame() 12 | vf.setViewType('Diff:' + bv.view_type) 13 | 14 | execute_on_main_thread_and_wait(switch_view) 15 | 16 | 17 | def initialize_ui(): 18 | widget.register_dockwidget(DiffWidget.DiffDestWidget, "Diff", Qt.LeftDockWidgetArea, Qt.Vertical, False) 19 | 20 | PluginCommand.register("Diff\\Run", "Select file to diff", cb_diff) 21 | 22 | ViewType.registerViewType(DiffView.DiffViewType()) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 River Loop Security 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NinjaDiff 2 | 3 | NinjaDiff is a binary diffing plugin for Binary Ninja. It aims to graphically display changes in differing binary executables. Check out our [blog post](https://www.riverloopsecurity.com/blog/2021/02/binary-diffing/) to read more about it's design! 4 | 5 | 6 | This plugin uses [Hashashin](https://github.com/riverloopsec/hashashin) as a niave metric for binary similarity. First, these hashes are used in conjunction with a graph similarity based approach to "align" similar functions accross binaries, then High Level IL instructions are compared line by line to give more granular information about subtle differences between the two binaries. 7 | 8 | 9 | ### Installation 10 | 11 | Copy the contents of this repository into your Binary Ninja plugin directory (`Tools`--> `Open Plugin Folder...` in Binary Ninja) 12 | 13 | The easiest way to do this is with `git clone --recursive …`, take care to ensure that the [Hashashin](https://github.com/riverloopsec/hashashin) sub-module get's pulled along with the rest of the repository, otherwise NinjaDiff will not function correctly. 14 | 15 | 16 | 17 | ### Usage 18 | 19 | 1. Open your source binary as usual in Binary Ninja 20 | 2. Select `Diff` view from the dropdown in the lower right corner 21 | 3. Select the destination binary in the file selection menu 22 | 4. The destination binary will be opened in a split view, and the diffing process will begin (this may take some time on large binaries) 23 | 5. Once diffing is complete, any differences which are found will be added as tags, and will be highlighted red in the split screen view 24 | 25 | 26 | 27 | ### Known Bugs 28 | 29 | * Due to limitations in Binary Ninja, some binaries will map multiple High Level IL instructions to a single address, which leads to false positives when these instructions are compared across binaries 30 | * Due to similar limitations, some instructions may be highlighted or tagged multiple times if it's address collides with another instruction 31 | * Certain HLIL instructions with complex or deeply nested ASTs may lead to false negatives due to binary artifacts 32 | -------------------------------------------------------------------------------- /dockwidgets/widget.py: -------------------------------------------------------------------------------- 1 | from PySide6.QtWidgets import QApplication, QWidget 2 | from binaryninjaui import DockHandler 3 | import sys 4 | import traceback 5 | 6 | debug_dockwidgets = [] 7 | 8 | def create_widget(widget_class, name, parent, data, *args): 9 | # It is imperative this function return *some* value because Shiboken will try to deref what we return 10 | # If we return nothing (or throw) there will be a null pointer deref (and we won't even get to see why) 11 | # So in the event of an error or a nothing, return an empty widget that at least stops the crash 12 | try: 13 | widget = widget_class(parent, name, data, *args) 14 | 15 | if not widget: 16 | raise Exception('expected widget, got None') 17 | 18 | global debug_dockwidgets 19 | 20 | found = False 21 | for (bv, widgets) in debug_dockwidgets: 22 | if bv == data: 23 | widgets[name] = widget 24 | found = True 25 | 26 | if not found: 27 | debug_dockwidgets.append((data, { 28 | name: widget 29 | })) 30 | 31 | widget.destroyed.connect(lambda destroyed: destroy_widget(destroyed, widget, data, name)) 32 | 33 | return widget 34 | except Exception as e: 35 | traceback.print_exc(file=sys.stderr) 36 | return QWidget(parent) 37 | 38 | def destroy_widget(destroyed, old, data, name): 39 | # Gotta be careful to delete the correct widget here 40 | for (bv, widgets) in debug_dockwidgets: 41 | if bv == data: 42 | for (name, widget) in widgets.items(): 43 | if widget == old: 44 | # If there are no other references to it, this will be the only one and the call 45 | # will delete it and invoke __del__. 46 | widgets.pop(name) 47 | return 48 | 49 | 50 | def register_dockwidget(widget_class, name, area, orientation, default_visibility, *args): 51 | dock_handler = DockHandler.getActiveDockHandler() 52 | 53 | # create main debugger controls 54 | dock_handler.addDockWidget(name, lambda n,p,d: create_widget(widget_class, n, p, d, *args), area, orientation, default_visibility) 55 | 56 | def get_dockwidget(data, name): 57 | for (bv, widgets) in debug_dockwidgets: 58 | if bv == data: 59 | return widgets.get(name) 60 | 61 | return None 62 | 63 | -------------------------------------------------------------------------------- /dockwidgets/DiffWidget.py: -------------------------------------------------------------------------------- 1 | from PySide6 import QtCore 2 | from PySide6.QtCore import Qt, QAbstractItemModel, QModelIndex, QSize 3 | from PySide6.QtGui import QPalette, QFontMetricsF 4 | from PySide6.QtWidgets import QApplication, QHBoxLayout, QVBoxLayout, QWidget, QTableView, QItemDelegate, QStyle, QHeaderView, QAbstractItemView 5 | 6 | from binaryninja import Endianness, BinaryView 7 | import binaryninjaui 8 | from binaryninjaui import DockContextHandler, UIActionHandler 9 | 10 | from . import widget 11 | from . import DiffView 12 | 13 | 14 | class DiffDestWidget(QWidget, DockContextHandler): 15 | def __init__(self, parent, name, data): 16 | if not type(data) == BinaryView: 17 | raise Exception('expected widget data to be a BinaryView') 18 | 19 | self.bv = data 20 | self.destination_editor = None 21 | self.dv = None 22 | 23 | QWidget.__init__(self, parent) 24 | DockContextHandler.__init__(self, self, name) 25 | self.actionHandler = UIActionHandler() 26 | self.actionHandler.setupActionHandler(self) 27 | 28 | self.table = QTableView(self) 29 | 30 | 31 | # self.table.setSortingEnabled(True) 32 | self.table.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows) 33 | self.table.setSelectionMode(QAbstractItemView.ExtendedSelection) 34 | 35 | self.table.verticalHeader().setSectionResizeMode(QHeaderView.ResizeToContents) 36 | self.table.verticalHeader().setVisible(False) 37 | 38 | self.table.setHorizontalScrollMode(QAbstractItemView.ScrollPerPixel) 39 | self.table.setVerticalScrollMode(QAbstractItemView.ScrollPerPixel) 40 | 41 | self.table.resizeColumnsToContents() 42 | self.table.resizeRowsToContents() 43 | 44 | layout = QVBoxLayout() 45 | layout.setContentsMargins(0, 0, 0, 0) 46 | layout.setSpacing(0) 47 | layout.addWidget(self.table) 48 | self.setLayout(layout) 49 | 50 | def notifyOffsetChanged(self, offset): 51 | # linear_views = self.getParentWindow().findChildren(binaryninjaui.LinearView) 52 | if self.dv is None: 53 | dvs = self.getParentWindow().findChildren(binaryninjaui.View) 54 | for dv in dvs: 55 | if isinstance(dv, DiffView.DiffView): 56 | self.dv = dv 57 | 58 | if self.dv is not None: 59 | self.dv.navigate(offset) 60 | 61 | def contextMenuEvent(self, event): 62 | self.m_contextMenuManager.show(self.m_menu, self.actionHandler) 63 | 64 | def shouldBeVisible(self, view_frame): 65 | if view_frame is None: 66 | return False 67 | else: 68 | return True 69 | 70 | -------------------------------------------------------------------------------- /functionTypes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2020 River Loop Security LLC, All Rights Reserved 4 | # Author Rylan O'Connell 5 | 6 | from binaryninja import HighLevelILBasicBlock 7 | from binaryninja import HighLevelILInstruction 8 | from binaryninja import Function 9 | 10 | from typing import List, Dict 11 | 12 | from .hashashin.lsh import brittle_hash 13 | 14 | 15 | class BasicBlockWrapper: 16 | def __init__(self, bb: HighLevelILBasicBlock, bb_hash: str): 17 | self.address: int = bb.start + bb.function.start 18 | self.instructions: List[HighLevelILInstruction] = [instr for instr in bb] 19 | self.hash: str = bb_hash 20 | self.source_block: HighLevelILBasicBlock = bb # TODO: inherit/initialize values from HighLevelILBasicBlock 21 | 22 | def __eq__(self, other): 23 | if type(self) == type(other): 24 | return self.hash == other.hash 25 | return False 26 | 27 | def __hash__(self): 28 | return int(self.hash, 16) 29 | 30 | 31 | # minimal graph class to avoid dependency on networkx 32 | class FunctionWrapper: 33 | def __init__(self, function: Function): 34 | self.basic_blocks: List[BasicBlockWrapper] = [] 35 | self.edges: Dict[BasicBlockWrapper, List[BasicBlockWrapper]] = {} 36 | self.address: int = function.start 37 | self.source_function: Function = function # TODO: inherit/initialize properties from Function 38 | 39 | # create BasicBlock objects to represent all blocks in the function 40 | for bb in function.hlil.basic_blocks: 41 | self.add_basic_block(bb) 42 | 43 | def add_basic_block(self, bb: HighLevelILBasicBlock): 44 | bb_hash = brittle_hash(self.source_function.view, bb) 45 | node = BasicBlockWrapper(bb, bb_hash) 46 | 47 | # ensure we don't add a basic block if we've already "discovered" it 48 | if node in self.basic_blocks: 49 | return 50 | 51 | self.basic_blocks.append(node) 52 | 53 | for edge in bb.outgoing_edges: 54 | target_block = edge.target 55 | target_hash = brittle_hash(self.source_function.view, target_block) 56 | target_node = BasicBlockWrapper(target_block, target_hash) 57 | 58 | # recursively discover child nodes 59 | if target_node not in self.basic_blocks: 60 | self.add_basic_block(target_block) 61 | 62 | self.add_edge(node, target_node) 63 | 64 | def add_edge(self, u: BasicBlockWrapper, v: BasicBlockWrapper): 65 | if u in self.edges.keys(): 66 | self.edges[u].append(v) 67 | else: 68 | self.edges[u] = [v] 69 | 70 | def has_node(self, node: BasicBlockWrapper): 71 | return node in self.basic_blocks 72 | 73 | def has_edge(self, u: BasicBlockWrapper, v: BasicBlockWrapper): 74 | if u in self.edges.keys(): 75 | for child in self.edges[u]: 76 | if child == v: 77 | return True 78 | return False 79 | 80 | def number_of_basic_blocks(self): 81 | return len(self.basic_blocks) 82 | 83 | def number_of_edges(self): 84 | return len(self.edges.values()) 85 | 86 | # TODO: experiment with similarity metrics 87 | def distance(self, other) -> float: 88 | distance = 0.0 89 | 90 | for block in self.basic_blocks: 91 | if not other.has_node(block): 92 | distance += 1 93 | for block in other.basic_blocks: 94 | if not self.has_node(block): 95 | distance += 1 96 | 97 | for k in self.edges.keys(): 98 | for v in self.edges[k]: 99 | if not other.has_edge(k, v): 100 | distance += 0.1 101 | for k in other.edges.keys(): 102 | for v in other.edges[k]: 103 | if not self.has_edge(k, v): 104 | distance += 0.1 105 | 106 | return distance 107 | -------------------------------------------------------------------------------- /dockwidgets/DiffView.py: -------------------------------------------------------------------------------- 1 | from PySide6.QtCore import Qt, QTimer 2 | from PySide6.QtWidgets import QApplication, QVBoxLayout, QWidget, QSplitter, QLabel 3 | 4 | import re 5 | import os 6 | 7 | import binaryninjaui 8 | from binaryninja import BinaryView, core_version, interaction, BinaryViewType, plugin, Function 9 | from binaryninjaui import View, ViewType, UIAction, LinearView, ViewFrame, TokenizedTextView, DockHandler 10 | 11 | from .. import diff 12 | 13 | (major, minor, buildid) = re.match(r'^(\d+)\.(\d+)\.?(\d+)?', core_version()).groups() 14 | major = int(major) 15 | minor = int(minor) 16 | buildid = int(buildid) if buildid is not None else 0xffffffff 17 | 18 | 19 | class DiffView(QWidget, View): 20 | def __init__(self, parent, data): 21 | if not type(data) == BinaryView: 22 | raise Exception('expected widget data to be a BinaryView') 23 | 24 | self.src_bv: BinaryView = data 25 | 26 | fname = interaction.get_open_filename_input('File to Diff:') 27 | print('opening {}...'.format(fname)) 28 | 29 | # open secondary file and begin non-blocking analysis 30 | self.dst_bv: BinaryView = BinaryViewType.get_view_of_file(fname, update_analysis=False) 31 | if self.dst_bv is None: 32 | raise Exception('invalid file path') 33 | 34 | self.dst_bv.update_analysis() 35 | 36 | # begin diffing process in background thread 37 | differ = diff.BackgroundDiffer(self.src_bv, self.dst_bv) 38 | differ.start() 39 | self.address_map = differ.address_map 40 | 41 | QWidget.__init__(self, parent) 42 | View.__init__(self) 43 | 44 | self.setupView(self) 45 | 46 | self.current_offset = 0 47 | 48 | self.splitter = QSplitter(Qt.Orientation.Horizontal, self) 49 | 50 | frame = ViewFrame.viewFrameForWidget(self) 51 | self.dst_editor = LinearView(self.dst_bv, frame) 52 | self.dst_editor.setAccessibleName('Destination Editor') 53 | self.src_editor = LinearView(self.src_bv, frame) 54 | self.src_editor.setAccessibleName('Source Editor') 55 | 56 | # sync location between src and dst panes 57 | self.sync = True 58 | 59 | self.binary_text = TokenizedTextView(self, self.src_bv) 60 | self.is_raw_disassembly = False 61 | self.raw_address = 0 62 | 63 | self.is_navigating_history = False 64 | self.memory_history_addr = 0 65 | 66 | small_font = QApplication.font() 67 | small_font.setPointSize(11) 68 | 69 | self.splitter.addWidget(self.src_editor) 70 | self.splitter.addWidget(self.dst_editor) 71 | 72 | # Equally sized 73 | self.splitter.setSizes([0x7fffffff, 0x7fffffff]) 74 | 75 | layout = QVBoxLayout() 76 | layout.setContentsMargins(0, 0, 0, 0) 77 | layout.setSpacing(0) 78 | layout.addWidget(self.splitter, 100) 79 | self.setLayout(layout) 80 | 81 | self.needs_update = True 82 | self.update_timer = QTimer(self) 83 | self.update_timer.setInterval(200) 84 | self.update_timer.setSingleShot(False) 85 | self.update_timer.timeout.connect(lambda: self.updateTimerEvent()) 86 | 87 | def goToReference(self, func: Function, source: int, target: int): 88 | return self.navigate(func.start) 89 | 90 | def navigateToFunction(self, func, offset): 91 | return self.navigate(offset) 92 | 93 | def navigate(self, addr): 94 | function = self.src_bv.get_function_at(addr) 95 | function_addr = None if function is None else function.start 96 | if function_addr is not None: 97 | status = self.src_editor.navigate(function_addr) 98 | 99 | dst_addr = self.address_map.src2dst(function_addr) 100 | if dst_addr is not None: 101 | self.dst_editor.navigate(dst_addr) 102 | return status 103 | 104 | return False 105 | 106 | 107 | def getData(self): 108 | return self.src_bv 109 | 110 | def getFont(self): 111 | return binaryninjaui.getMonospaceFont(self) 112 | 113 | def getCurrentOffset(self): 114 | offset = self.src_editor.getCurrentOffset() 115 | return offset 116 | 117 | def getSelectionOffsets(self): 118 | if not self.is_raw_disassembly: 119 | return self.src_editor.getSelectionOffsets() 120 | return (self.raw_address, self.raw_address) 121 | 122 | def getCurrentArchitecture(self): 123 | if not self.is_raw_disassembly: 124 | return self.src_editor.getCurrentArchitecture() 125 | return None 126 | 127 | def getCurrentLowLevelILFunction(self): 128 | if not self.is_raw_disassembly: 129 | return self.src_editor.getCurrentLowLevelILFunction() 130 | return None 131 | 132 | def getCurrentMediumLevelILFunction(self): 133 | if not self.is_raw_disassembly: 134 | return self.src_editor.getCurrentMediumLevelILFunction() 135 | return None 136 | 137 | def shouldBeVisible(self, view_frame): 138 | if view_frame is None: 139 | return False 140 | else: 141 | return True 142 | 143 | 144 | class DiffViewType(ViewType): 145 | # executed at plugin load time from from ui.py ViewType.registerViewType() 146 | def __init__(self): 147 | super(DiffViewType, self).__init__("Diff", "Diff") 148 | 149 | def getPriority(self, data, filename): 150 | return 1 151 | 152 | # executed when user clicks "Debugger" from dropdown with binary views 153 | def create(self, data, view_frame): 154 | return DiffView(view_frame, data) 155 | 156 | -------------------------------------------------------------------------------- /diff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2019 River Loop Security LLC, All Rights Reserved 4 | # Author Rylan O'Connell 5 | 6 | import binaryninja as binja 7 | 8 | import math 9 | from typing import Tuple, List, Dict 10 | 11 | from . import functionTypes, instructionComparator 12 | 13 | Binary_View = binja.binaryview.BinaryView 14 | 15 | 16 | class BackgroundDiffer(binja.BackgroundTaskThread): 17 | def __init__(self, src_bv: Binary_View, dst_bv: Binary_View): 18 | binja.BackgroundTaskThread.__init__(self, 'Diffing...', True) 19 | self.src_bv = src_bv 20 | self.dst_bv = dst_bv 21 | self.address_map = AddressMap() 22 | 23 | def run(self): 24 | # ensure both views have finished processing before we continue 25 | self.src_bv.update_analysis_and_wait() 26 | self.dst_bv.update_analysis_and_wait() 27 | 28 | print('started diffing...') 29 | diff_tt = self.src_bv.create_tag_type('Difference', '🚫') 30 | new_function_tt = self.src_bv.create_tag_type('New function', '➕') 31 | 32 | dst_functions = self.ingest(self.dst_bv) 33 | src_functions = self.ingest(self.src_bv) 34 | 35 | # attempt to match destination functions to source functions 36 | for src_function in src_functions: 37 | min_pairing, distance = self.get_min_pair(src_function, dst_functions) 38 | if min_pairing is not None: 39 | print('diffing {} against {}'.format(src_function.source_function.name, min_pairing.source_function.name)) 40 | 41 | # if pairing failed (ie. no similar functions in the dest binary), assume it is not present in dest 42 | if min_pairing is None: 43 | print('tagging new function at {}...'.format(hex(src_function.address))) 44 | tag = src_function.source_function.create_tag(new_function_tt, 'No matching functions') 45 | src_function.source_function.add_user_address_tag(src_function.address, tag) 46 | for bb in src_function.basic_blocks: 47 | for instr in bb.source_block: 48 | src_function.source_function.set_user_instr_highlight( 49 | instr.address, 50 | binja.highlight.HighlightStandardColor.RedHighlightColor 51 | ) 52 | continue 53 | 54 | # attempt to build a mapping between addresses in the source and destination binaries 55 | self.address_map.add_mapping(src_addr=src_function.address, dst_addr=min_pairing.address) 56 | src_instrs = list(src_function.source_function.hlil.instructions) 57 | dst_instrs = list(min_pairing.source_function.hlil.instructions) 58 | for instr_index in range(min(len(src_instrs), len(dst_instrs))): 59 | src_instr = src_instrs[instr_index] 60 | dst_instr = dst_instrs[instr_index] 61 | 62 | if instructionComparator.compare_instructions(src_instr, dst_instr): 63 | src_function.source_function.set_user_instr_highlight( 64 | src_instr.address, 65 | binja.highlight.HighlightStandardColor.GreenHighlightColor 66 | ) 67 | 68 | min_pairing.source_function.set_user_instr_highlight( 69 | dst_instr.address, 70 | binja.highlight.HighlightStandardColor.GreenHighlightColor 71 | ) 72 | 73 | else: 74 | print('tagging instruction diff at {}'.format(hex(src_instr.address))) 75 | self.address_map.add_mapping(src_addr=src_instr.address, dst_addr=dst_instr.address) 76 | tag = src_function.source_function.create_tag(diff_tt, 'Instruction differs') 77 | src_function.source_function.add_user_address_tag(src_instr.address, tag) 78 | src_function.source_function.set_user_instr_highlight( 79 | src_instr.address, 80 | binja.highlight.HighlightStandardColor.RedHighlightColor 81 | ) 82 | 83 | min_pairing.source_function.set_user_instr_highlight( 84 | dst_instr.address, 85 | binja.highlight.HighlightStandardColor.RedHighlightColor 86 | ) 87 | 88 | print('finished diffing') 89 | 90 | def get_min_pair(self, function: functionTypes.FunctionWrapper, pairings: List[functionTypes.FunctionWrapper]) -> Tuple[functionTypes.FunctionWrapper, float]: 91 | min_distance = math.inf 92 | min_pairing = None 93 | 94 | for pairing in pairings: 95 | distance = function.distance(pairing) 96 | # only accept pairings "close" to the original (accounting for function size) 97 | if (distance < min_distance) and \ 98 | (distance < 0.40 * (function.number_of_basic_blocks() + .1 * function.number_of_edges())): 99 | min_distance = distance 100 | min_pairing = pairing 101 | 102 | return min_pairing, min_distance 103 | 104 | def ingest(self, bv: Binary_View) -> List[functionTypes.FunctionWrapper]: 105 | functions = [] 106 | for function in bv.functions: 107 | # ignore small functions to minimize false positives 108 | if len(function.basic_blocks) < 5: 109 | continue 110 | 111 | function_with_metadata = functionTypes.FunctionWrapper(function) 112 | functions.append(function_with_metadata) 113 | 114 | return functions 115 | 116 | 117 | class AddressMap: 118 | def __init__(self): 119 | self.src_to_dst = {} 120 | self.dst_to_src = {} 121 | 122 | def add_mapping(self, src_addr, dst_addr): 123 | self.src_to_dst[src_addr] = dst_addr 124 | self.dst_to_src[dst_addr] = src_addr 125 | 126 | def src2dst(self, src_addr): 127 | try: 128 | return self.src_to_dst[src_addr] 129 | except KeyError: 130 | return None 131 | 132 | def dst2src(self, dst_addr): 133 | try: 134 | return self.dst_to_src[dst_addr] 135 | except KeyError: 136 | return None 137 | -------------------------------------------------------------------------------- /instructionComparator.py: -------------------------------------------------------------------------------- 1 | import binaryninja as binja 2 | 3 | 4 | def compare_instructions(src_instr: binja.HighLevelILInstruction, dst_instr: binja.HighLevelILInstruction) -> bool: 5 | if src_instr.operation != dst_instr.operation: 6 | return False 7 | 8 | operation = src_instr.operation 9 | if operation == binja.HighLevelILOperation.HLIL_CALL: 10 | return compare_calls(src_instr, dst_instr) 11 | 12 | if (operation == binja.HighLevelILOperation.HLIL_ASSIGN) or (operation == binja.HighLevelILOperation.HLIL_VAR_INIT): 13 | src_var, src_val = src_instr.operands 14 | dst_var, dst_val = dst_instr.operands 15 | 16 | # left hand side of assignment operation can be variable, field, etc. 17 | if type(src_var) == type(dst_var): 18 | if type(src_var) == binja.Variable: 19 | if src_var.type != dst_var.type: 20 | return False 21 | elif type(src_var) == binja.highlevelil.HighLevelILInstruction: 22 | if src_var.operation != dst_var.operation: 23 | return False 24 | else: 25 | return False 26 | 27 | if src_val.operation != dst_val.operation: 28 | return False 29 | 30 | elif operation == binja.HighLevelILOperation.HLIL_CALL: 31 | return compare_calls(src_instr, dst_instr) 32 | 33 | # TODO: check other arithemetic operations (ie. DIV, MOD, etc.) 34 | if (operation == binja.HighLevelILOperation.HLIL_ADD) or \ 35 | (operation == binja.HighLevelILOperation.HLIL_SUB) or \ 36 | (operation == binja.HighLevelILOperation.HLIL_MUL): 37 | return compare_arithmetic(src_instr, dst_instr) 38 | 39 | # ignore branch targets, comparisions should only be based on the condition 40 | elif (operation == binja.HighLevelILOperation.HLIL_WHILE) or (operation == binja.HighLevelILOperation.HLIL_IF): 41 | src_condition = src_instr.operands[0] 42 | dst_condition = dst_instr.operands[0] 43 | 44 | if len(src_condition.operands) != len(dst_condition.operands): 45 | return False 46 | 47 | for i in range(len(src_condition.operands)): 48 | src_operand = src_condition.operands[i] 49 | dst_operand = dst_condition.operands[i] 50 | if (type(src_operand) != binja.HighLevelILInstruction) or (type(dst_operand) != binja.HighLevelILInstruction): 51 | continue 52 | if (src_operand.operation == binja.HighLevelILOperation.HLIL_STRUCT_FIELD or 53 | src_operand.operation == binja.HighLevelILOperation.HLIL_VAR) and \ 54 | (dst_operand.operation == binja.HighLevelILOperation.HLIL_STRUCT_FIELD or 55 | dst_operand.operation == binja.HighLevelILOperation.HLIL_VAR): 56 | continue 57 | if src_operand != dst_operand: 58 | return False 59 | return True 60 | 61 | # probably nothing address specific 62 | return src_instr == dst_instr 63 | 64 | 65 | def compare_derefs(src_instr: binja.HighLevelILInstruction, dst_instr: binja.HighLevelILInstruction) -> bool: 66 | src_pointer = src_instr.src 67 | dst_pointer = dst_instr.src 68 | if src_pointer.operation != dst_pointer.operation: 69 | return False 70 | 71 | operation = src_pointer.operation 72 | # TODO: extract strings/constants 73 | if operation == binja.HighLevelILOperation.HLIL_CONST_PTR: 74 | pass 75 | elif operation == binja.HighLevelILOperation.HLIL_VAR: 76 | return src_pointer.var.type == dst_pointer.var.type 77 | elif (operation == binja.HighLevelILOperation.HLIL_ADD) or \ 78 | (operation == binja.HighLevelILOperation.HLIL_SUB) or \ 79 | (operation == binja.HighLevelILOperation.HLIL_MUL): 80 | return compare_arithmetic(src_pointer, dst_pointer) 81 | 82 | else: 83 | print('[!] unexpected pointer type {} at {}'.format(operation, hex(src_instr.address))) 84 | return False 85 | 86 | def compare_arithmetic(src_instr: binja.HighLevelILInstruction, dst_instr: binja.HighLevelILInstruction) -> bool: 87 | print(src_instr) 88 | print(dst_instr) 89 | num1_src, num2_src = src_instr.operands 90 | num1_dst, num2_dst = dst_instr.operands 91 | 92 | if (num1_src.operation != num2_src.operation) or (num1_dst.operation != num2_dst.operation): 93 | return False 94 | 95 | # TODO: check for floats as well 96 | # extract numeric constants 97 | if num1_src.operation == binja.HighLevelILOperation.HLIL_CONST: 98 | val1 = num1_src.constant 99 | val2 = num2_src.constant 100 | if val1 != val2: 101 | return False 102 | if num1_dst.operation == binja.HighLevelILOperation.HLIL_CONST: 103 | val1 = num1_dst.constant 104 | val2 = num2_dst.constant 105 | if val1 != val2: 106 | return False 107 | 108 | # compare variable refrences 109 | if num1_src.operation == binja.HighLevelILOperation.HLIL_CONST_PTR: 110 | return compare_derefs(src_instr, dst_instr) 111 | if num1_dst.operation == binja.HighLevelILOperation.HLIL_CONST_PTR: 112 | return compare_derefs(src_instr, dst_instr) 113 | 114 | return True 115 | 116 | 117 | def compare_calls(src_instr: binja.HighLevelILInstruction, dst_instr: binja.HighLevelILInstruction) -> bool: 118 | src_function = src_instr.operands[0] 119 | dst_function = dst_instr.operands[0] 120 | # TODO: verify the function being called is the same 121 | 122 | src_args = src_instr.operands[1] 123 | dst_args = dst_instr.operands[1] 124 | if len(src_args) != len(dst_args): 125 | return False 126 | for i in range(len(src_args)): 127 | src_arg = src_args[i] 128 | dst_arg = dst_args[i] 129 | if (type(src_arg) == binja.HighLevelILInstruction) and (type(dst_arg) == binja.HighLevelILInstruction): 130 | if src_arg.operation != dst_arg.operation: 131 | return False 132 | 133 | # ignore contant pointers, as their addresses will vary 134 | if src_arg.operation == binja.HighLevelILOperation.HLIL_CONST_PTR: 135 | # check if the pointer is a string, and if so compare string values between instructions 136 | src_bv = src_instr.il_basic_block.view 137 | dst_bv = dst_instr.il_basic_block.view 138 | src_string_at = src_bv.get_ascii_string_at(src_bv.start + src_arg.value.value) 139 | dst_string_at = dst_bv.get_ascii_string_at(dst_bv.start + dst_arg.value.value) 140 | if (src_string_at is not None) and (dst_string_at is not None): 141 | if src_string_at.value != dst_string_at.value: 142 | return False 143 | 144 | elif src_arg.operation == binja.HighLevelILOperation.HLIL_CONST: 145 | if src_arg.value != dst_arg.value: 146 | return False 147 | 148 | elif (type(src_arg) == binja.Variable) and (type(dst_arg) == binja.Variable): 149 | if src_arg.type != dst_arg.type: 150 | return False 151 | 152 | return True --------------------------------------------------------------------------------