├── example ├── libkste.so ├── source.out ├── arm64-v8a.so ├── libSeQing.so ├── cff-arm64-v8a.elf └── 6fe4c921d95d0fd139d07e5de03ceee28dcabc8db93bf3b6ebb0f0207bc09648 ├── .gitignore ├── pyrightconfig.json ├── utils ├── __init__.py ├── mikuPlugin.py ├── instruction_analyzer.py ├── state_machine.py ├── instr_vistor.py └── cfg_analyzer.py ├── fix_binaryninja_api ├── lowlevelil.py └── mediumlevelil.py ├── tests ├── process_data.py └── test1.py ├── passes ├── low │ ├── spiltIfPass.py │ ├── inlineIfCondPass.py │ └── copyCommonBlockPass.py └── mid │ ├── reverseIfPass.py │ ├── movStateDefine.py │ ├── deflatHardPass.py │ └── clearPass.py ├── readme.md ├── mikuWorkflow.py ├── plugin.json └── __init__.py /example/libkste.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltlly/MikuCffHelper/HEAD/example/libkste.so -------------------------------------------------------------------------------- /example/source.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltlly/MikuCffHelper/HEAD/example/source.out -------------------------------------------------------------------------------- /example/arm64-v8a.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltlly/MikuCffHelper/HEAD/example/arm64-v8a.so -------------------------------------------------------------------------------- /example/libSeQing.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltlly/MikuCffHelper/HEAD/example/libSeQing.so -------------------------------------------------------------------------------- /example/cff-arm64-v8a.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltlly/MikuCffHelper/HEAD/example/cff-arm64-v8a.elf -------------------------------------------------------------------------------- /example/6fe4c921d95d0fd139d07e5de03ceee28dcabc8db93bf3b6ebb0f0207bc09648: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltlly/MikuCffHelper/HEAD/example/6fe4c921d95d0fd139d07e5de03ceee28dcabc8db93bf3b6ebb0f0207bc09648 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | *.py[cod] 3 | __pycache__/ 4 | *.so 5 | *.pyd 6 | *.pyo 7 | 8 | # Environments 9 | .env 10 | .venv 11 | venv/ 12 | env/ 13 | 14 | # Editor 15 | .vscode/ 16 | .idea/ 17 | 18 | # Binary files 19 | *.so 20 | *.elf 21 | *.out 22 | 23 | # System 24 | .DS_Store 25 | Thumbs.db -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "useLibraryCodeForTypes": true, 3 | "reportGeneralDiagnostics": false, 4 | "reportArgumentType": false, 5 | "reportAttributeAccessIssue":false, 6 | "reportOptionalMemberAccess":false, 7 | "reportAssignmentType":false, 8 | "reportCallIssue": false, 9 | } 10 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """MikuCffHelper工具模块 2 | 包含控制流分析、状态机分析、指令分析等工具类 3 | """ 4 | 5 | from .cfg_analyzer import CFGAnalyzer 6 | from .state_machine import StateMachine 7 | from .instruction_analyzer import ( 8 | InstructionAnalyzer, 9 | unsigned_to_signed_32bit, 10 | ) 11 | from .instr_vistor import SimpleVisitor 12 | from .mikuPlugin import suggest_stateVar, log_info, log_warn, log_error 13 | 14 | __all__ = [ 15 | "CFGAnalyzer", 16 | "SimpleVisitor", 17 | "StateMachine", 18 | "InstructionAnalyzer", 19 | "suggest_stateVar", 20 | "unsigned_to_signed_32bit", 21 | "log_info", 22 | "log_warn", 23 | "log_error", 24 | ] 25 | -------------------------------------------------------------------------------- /fix_binaryninja_api/lowlevelil.py: -------------------------------------------------------------------------------- 1 | from binaryninja import basicblock, LowLevelILFunction 2 | 3 | from ..utils import log_error 4 | from typing import Optional 5 | 6 | 7 | def get_basic_block_at(self, index: int) -> Optional["basicblock.BasicBlock"]: 8 | basic_blocks = self.basic_blocks 9 | bbs = sorted(list(basic_blocks), key=lambda bb: bb.start) 10 | low, high = 0, len(bbs) - 1 11 | while low <= high: 12 | mid = (low + high) // 2 13 | if bbs[mid].start <= index < bbs[mid].end: 14 | return bbs[mid] 15 | elif index < bbs[mid].start: 16 | high = mid - 1 17 | else: 18 | low = mid + 1 19 | log_error(f"can't find basic block at {index}") 20 | return None 21 | 22 | 23 | LowLevelILFunction.get_basic_block_at = get_basic_block_at 24 | -------------------------------------------------------------------------------- /fix_binaryninja_api/mediumlevelil.py: -------------------------------------------------------------------------------- 1 | from binaryninja import basicblock, MediumLevelILFunction 2 | from typing import Optional 3 | 4 | from ..utils import log_error 5 | 6 | 7 | def get_basic_block_at(self, index: int) -> Optional["basicblock.BasicBlock"]: 8 | basic_blocks = self.basic_blocks 9 | bbs = sorted(list(basic_blocks), key=lambda bb: bb.start) 10 | low, high = 0, len(bbs) - 1 11 | while low <= high: 12 | mid = (low + high) // 2 13 | if bbs[mid].start <= index < bbs[mid].end: 14 | return bbs[mid] 15 | elif index < bbs[mid].start: 16 | high = mid - 1 17 | else: 18 | low = mid + 1 19 | log_error(f"can't find basic block at {index}") 20 | return None 21 | 22 | 23 | MediumLevelILFunction.get_basic_block_at = get_basic_block_at 24 | -------------------------------------------------------------------------------- /tests/process_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | with open( 4 | r"C:\\Users\\ltlly\AppData\\Roaming\\Binary Ninja\\plugins\\cfg_analysis.json", 5 | "r", 6 | ) as f: 7 | data = json.load(f) 8 | 9 | new_data = [] 10 | 11 | 12 | for func in data: 13 | if func["branch_complexity"] == 0: 14 | continue 15 | if func["if_instr_len"] == 0: 16 | continue 17 | if func["avg_out_degree"] == 0: 18 | continue 19 | if func["mlil_instructions_len"] > 1000: 20 | continue 21 | new_data.append(func) 22 | 23 | # 根据 cyclomatic_complexity 降序排序 24 | new_data.sort(key=lambda x: x["cyclomatic_complexity"], reverse=True) 25 | 26 | 27 | print(f"Processed {len(new_data)} functions.") 28 | 29 | with open( 30 | r"C:\\Users\\ltlly\AppData\\Roaming\\Binary Ninja\\plugins\\cfg_analysis_processed.json", 31 | "w", 32 | ) as f: 33 | json.dump(new_data, f, indent=4) 34 | print(new_data) 35 | -------------------------------------------------------------------------------- /passes/low/spiltIfPass.py: -------------------------------------------------------------------------------- 1 | # Make sure ifInstr is a single block 2 | 3 | 4 | from binaryninja import LowLevelILIf, LowLevelILLabel, AnalysisContext, ILSourceLocation 5 | 6 | 7 | def pass_spilt_if_block(analysis_context: AnalysisContext): 8 | llil = analysis_context.function.llil 9 | updated = False 10 | for block in llil.basic_blocks: 11 | if block.length == 1: 12 | continue 13 | ifinstr = block[-1] 14 | if not isinstance(ifinstr, LowLevelILIf): 15 | continue 16 | goto_label = LowLevelILLabel() 17 | llil.mark_label(goto_label) 18 | llil.append(llil.copy_expr(ifinstr)) 19 | llil.replace_expr( 20 | ifinstr.expr_index, 21 | llil.goto(goto_label, ILSourceLocation.from_instruction(ifinstr)), 22 | ) 23 | updated = True 24 | llil.finalize() 25 | llil.generate_ssa_form() 26 | return updated 27 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # MikuCffHelper 2 | 3 | ## 0.bug 4 | 5 | 4.3.7045 以上版本会闪退 不知道什么原因 6 | 7 | 8 | ## 1. Introduction 9 | 10 | This is a helper for CFF(Control flow flattening). It can help you to deflat the OLLVM CFF obfuscated code. 11 | 12 | Use binaryninja workflow to deflat the CFF obfuscated code. 13 | 14 | ## 2. Installation 15 | 16 | Move the folder `MikuCffHelper` to the `plugins` folder of BinaryNinja. 17 | 18 | ## 3. Usage 19 | 20 | Open the CFF obfuscated file with MikuCffHelper_workflow. 21 | 22 | 23 | 1. right click the function you want to deflat, and select `Function Analysis/analysis.plugins.workflow_patch_mlil`. 24 | 25 | ## todo 26 | 27 | this is a beta version, and i'm still working on it. 28 | 29 | Only some functions in the example/arm64-v8a.so work well because the feature to trace the source of stateVar assignments has not been implemented yet. 30 | 31 | ## 原理 32 | 33 | 34 | 原始IL → LLIL优化(代码克隆/条件拆分) 35 | → MLIL优化(状态识别/路径消解) 36 | → HLIL优化(语义恢复) 37 | 38 | ### ​低级优化层 39 | 40 | 公共代码块复制(pass_copy_common_block) 41 | 42 | If条件内联(pass_inline_if_cond) 43 | 44 | 分支块独立化(pass_spilt_if_block) 45 | 46 | ### 中级优化层 47 | 48 | 49 | 基于NetworkX的CFG路径搜索 50 | 51 | 部分路径模拟执行(emu_hard) 52 | 53 | ✅ 已实现功能: 54 | 55 | 控制流平坦化解构 56 | 跨架构基础支持 57 | 核心优化流水线 58 | 59 | 60 | ⚠️ 已知限制: 61 | 62 | 需手动选择目标函数 63 | 依赖预定义的状态变量特征 64 | 65 | (来自2025届本科生正在开发的毕设项目) 66 | -------------------------------------------------------------------------------- /passes/mid/reverseIfPass.py: -------------------------------------------------------------------------------- 1 | from binaryninja import ( 2 | MediumLevelILIf, 3 | MediumLevelILCmpNe, 4 | MediumLevelILOperation, 5 | AnalysisContext, 6 | MediumLevelILLabel, 7 | ILSourceLocation, 8 | ) 9 | 10 | 11 | def pass_reverse_if(analysis_context: AnalysisContext): 12 | """ 13 | 把所有的if (a!=123)then 1 else 2 反转为 if a==123 then 2 else 1 14 | """ 15 | mlil = analysis_context.function.mlil 16 | 17 | updated = False 18 | ifInstrs = [] 19 | for block in mlil.basic_blocks: 20 | instr = block[-1] 21 | if isinstance(instr, MediumLevelILIf) and isinstance( 22 | instr.condition, MediumLevelILCmpNe 23 | ): 24 | ifInstrs.append(instr) 25 | for instr in ifInstrs: 26 | condition = instr.condition 27 | trueLabel = MediumLevelILLabel() 28 | falseLabel = MediumLevelILLabel() 29 | trueLabel.operand = instr.false 30 | falseLabel.operand = instr.true 31 | new_condition = mlil.expr( 32 | MediumLevelILOperation.MLIL_CMP_E, 33 | mlil.copy_expr(condition.operands[0]), 34 | mlil.copy_expr(condition.operands[1]), 35 | ) 36 | mlil.replace_expr( 37 | instr, 38 | mlil.if_expr( 39 | new_condition, 40 | trueLabel, 41 | falseLabel, 42 | ILSourceLocation.from_instruction(instr), 43 | ), 44 | ) 45 | updated = True 46 | if updated: 47 | mlil.finalize() 48 | mlil.generate_ssa_form() 49 | -------------------------------------------------------------------------------- /mikuWorkflow.py: -------------------------------------------------------------------------------- 1 | from binaryninja import ( 2 | AnalysisContext, 3 | ) 4 | 5 | from .utils import log_error 6 | from .passes.low.spiltIfPass import pass_spilt_if_block 7 | from .passes.low.copyCommonBlockPass import pass_copy_common_block 8 | from .passes.low.inlineIfCondPass import pass_inline_if_cond 9 | from .passes.mid.reverseIfPass import pass_reverse_if 10 | from .passes.mid.deflatHardPass import pass_deflate_hard 11 | from .passes.mid.clearPass import pass_clear 12 | from .passes.mid.movStateDefine import pass_mov_state_define 13 | 14 | 15 | def workflow_patch_llil(analysis_context: AnalysisContext): 16 | function = analysis_context.function 17 | llil = function.llil 18 | if llil is None: 19 | return 20 | pass_copy_common_block(analysis_context) 21 | pass_inline_if_cond(analysis_context) 22 | pass_spilt_if_block(analysis_context) 23 | 24 | return True 25 | 26 | 27 | def workflow_patch_mlil(analysis_context: AnalysisContext): 28 | function = analysis_context.function 29 | mlil = function.mlil 30 | if mlil is None: 31 | log_error(f"Function {function.name} has no MLIL") 32 | return 33 | pass_clear(analysis_context) 34 | pass_mov_state_define(analysis_context) 35 | pass_reverse_if(analysis_context) 36 | pass_mov_state_define(analysis_context) 37 | pass_deflate_hard(analysis_context) 38 | pass_clear(analysis_context) 39 | pass_mov_state_define(analysis_context) 40 | pass_deflate_hard(analysis_context) 41 | pass_clear(analysis_context) 42 | 43 | 44 | def workflow_patch_hlil(analysis_context: AnalysisContext): 45 | from .utils import suggest_stateVar 46 | 47 | suggest_stateVar(analysis_context.view, analysis_context.function) 48 | -------------------------------------------------------------------------------- /passes/low/inlineIfCondPass.py: -------------------------------------------------------------------------------- 1 | from binaryninja import ( 2 | AnalysisContext, 3 | LowLevelILIf, 4 | LowLevelILInstruction, 5 | LowLevelILFlagSsa, 6 | LowLevelILSetFlagSsa, 7 | LowLevelILFlagPhi, 8 | LowLevelILLabel, 9 | ILSourceLocation, 10 | ) 11 | 12 | 13 | def pass_inline_if_cond(analysis_context: AnalysisContext): 14 | llil = analysis_context.function.llil 15 | for bb in llil.ssa_form.basic_blocks: 16 | if not isinstance(bb[-1], LowLevelILIf): 17 | continue 18 | lastInstrSSA: LowLevelILInstruction = bb[-1] 19 | condition = lastInstrSSA.condition 20 | if not isinstance(condition, LowLevelILFlagSsa): 21 | continue 22 | define = llil.ssa_form.get_ssa_flag_definition(condition.src) 23 | if not isinstance(define, LowLevelILSetFlagSsa): 24 | continue 25 | if not bb.end > int(define.instr_index) >= bb.start: 26 | continue 27 | use = llil.ssa_form.get_ssa_flag_uses(condition.src) 28 | use = [x for x in use if not isinstance(x, LowLevelILFlagPhi)] 29 | if len(use) > 1: 30 | continue 31 | ifInstr: LowLevelILIf = lastInstrSSA.non_ssa_form 32 | defineInstr = define.non_ssa_form 33 | newTrueLabel = LowLevelILLabel() 34 | newTrueLabel.operand = ifInstr.true 35 | newFalseLabel = LowLevelILLabel() 36 | newFalseLabel.operand = ifInstr.false 37 | newIfinstr = llil.if_expr( 38 | llil.copy_expr(defineInstr.src), 39 | newTrueLabel, 40 | newFalseLabel, 41 | ILSourceLocation.from_instruction(ifInstr), 42 | ) 43 | llil.replace_expr(ifInstr.expr_index, newIfinstr) 44 | llil.finalize() 45 | llil.generate_ssa_form() 46 | -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "pluginmetadataversion": 2, 3 | "name": "MikuCffHelper", 4 | "type": [ 5 | "helper" 6 | ], 7 | "api": [ 8 | "python3" 9 | ], 10 | "description": "A helper for deflating OLLVM CFF obfuscated code using BinaryNinja.", 11 | "longdescription": "MikuCffHelper is a plugin designed to assist in deflating OLLVM CFF obfuscated code. It integrates with BinaryNinja to provide workflows and commands for analyzing and transforming obfuscated code.", 12 | "license": { 13 | "name": "MIT", 14 | "text": "Copyright (c) \n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." 15 | }, 16 | "platforms": [ 17 | "Darwin", 18 | "Linux", 19 | "Windows" 20 | ], 21 | "installinstructions": { 22 | "Darwin": "", 23 | "Linux": "", 24 | "Windows": "" 25 | }, 26 | "dependencies": { 27 | "networkx": "" 28 | }, 29 | "version": "1.0.0", 30 | "author": "ltlly", 31 | "minimumbinaryninjaversion": 3164 32 | } -------------------------------------------------------------------------------- /utils/mikuPlugin.py: -------------------------------------------------------------------------------- 1 | from binaryninja import Logger, BinaryView, Function, Variable 2 | 3 | # Initialize logger 4 | mikuLogger = Logger(0, "MikuCffHelper") 5 | 6 | 7 | def log_info(msg: str): 8 | """记录信息日志 9 | Args: 10 | msg (str): 要记录的信息 11 | """ 12 | mikuLogger.log_info(msg) 13 | 14 | 15 | def log_warn(msg: str): 16 | """记录警告日志 17 | Args: 18 | msg (str): 要记录的警告信息 19 | """ 20 | mikuLogger.log_warn(msg) 21 | 22 | 23 | def log_error(msg: str): 24 | """记录错误日志 25 | Args: 26 | msg (str): 要记录的错误信息 27 | """ 28 | mikuLogger.log_error(msg) 29 | 30 | 31 | func_dict = {} 32 | 33 | 34 | def make_stateVar(func: Function, var: Variable): 35 | """创建状态变量 36 | Args: 37 | func (Function): 目标函数 38 | var (Variable): 要标记为状态变量的变量 39 | """ 40 | if func.start not in func_dict: 41 | func_dict[func.start] = {} 42 | var_name_list = [var.name for var in func.vars] 43 | i = 0 44 | while f"state-{i}" in var_name_list or f"state-{i}" in func_dict[func.start]: 45 | i += 1 46 | name = f"state-{i}" 47 | var.set_name_async(name) 48 | func_dict[func.start][name] = var 49 | 50 | 51 | def set_stateVar(bv: BinaryView, func: Function): 52 | """设置状态变量 53 | Args: 54 | bv (BinaryView): 二进制视图 55 | func (Function): 目标函数 56 | """ 57 | from binaryninjaui import UIContext 58 | 59 | ctx = UIContext.activeContext() 60 | h = ctx.contentActionHandler() 61 | a = h.actionContext() 62 | token_state = a.token 63 | var = Variable.from_identifier(func, token_state.token.value) 64 | make_stateVar(func, var) 65 | 66 | 67 | def suggest_stateVar(bv: BinaryView, func: Function): 68 | from .state_machine import StateMachine 69 | 70 | state_vars = StateMachine.find_state_var(func) 71 | for var in state_vars: 72 | if var.name.startswith("state-"): 73 | continue 74 | make_stateVar(func, var) 75 | func_dict[func.start] = {} 76 | 77 | 78 | def isV(bv: BinaryView, inst): 79 | """验证指令是否有效 80 | Args: 81 | bv (BinaryView): 二进制视图 82 | inst: 要验证的指令 83 | Returns: 84 | bool: 如果指令有效返回True 85 | """ 86 | return True 87 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from binaryninja import PluginCommand, Workflow, Activity 3 | from .mikuWorkflow import workflow_patch_llil, workflow_patch_mlil, workflow_patch_hlil 4 | from .utils import log_info 5 | from .fix_binaryninja_api import lowlevelil # noqa: F401 6 | from .fix_binaryninja_api import mediumlevelil # noqa: F401 7 | 8 | 9 | def register_workflow(): 10 | """ 11 | Register the workflow for the plugin. 12 | """ 13 | cff_workflow = Workflow("core.function.metaAnalysis").clone( 14 | "MikuCffHelper_workflow" 15 | ) 16 | 17 | configuration_llil = json.dumps( 18 | { 19 | "name": "analysis.plugins.workflow_patch_llil", 20 | "description": "A activity to patch llil", 21 | "eligibility": {"auto": {"default": True}}, 22 | } 23 | ) 24 | cff_workflow.register_activity( 25 | Activity(configuration_llil, action=workflow_patch_llil) 26 | ) 27 | 28 | configuration_mlil = json.dumps( 29 | { 30 | "name": "analysis.plugins.workflow_patch_mlil", 31 | "description": "A activity to patch mlil", 32 | "eligibility": {"auto": {"default": False}}, 33 | } 34 | ) 35 | 36 | cff_workflow.register_activity( 37 | Activity(configuration_mlil, action=workflow_patch_mlil) 38 | ) 39 | configuration_hlil = json.dumps( 40 | { 41 | "name": "analysis.plugins.workflow_patch_hlil", 42 | "description": "A activity to patch hlil", 43 | "eligibility": {"auto": {"default": True}}, 44 | } 45 | ) 46 | cff_workflow.register_activity( 47 | Activity(configuration_hlil, action=workflow_patch_hlil) 48 | ) 49 | 50 | cff_workflow.insert( 51 | "core.function.generateMediumLevelIL", ["analysis.plugins.workflow_patch_llil"] 52 | ) 53 | cff_workflow.insert( 54 | "core.function.analyzeConditionalNoReturns", 55 | ["analysis.plugins.workflow_patch_mlil"], 56 | ) 57 | cff_workflow.insert( 58 | "core.function.runCompletionCallbacks", ["analysis.plugins.workflow_patch_hlil"] 59 | ) 60 | cff_workflow.register() 61 | log_info(f"Registered workflow: {cff_workflow.name}") 62 | 63 | 64 | def register_commands(): 65 | """ 66 | register commands 67 | """ 68 | from .utils.mikuPlugin import set_stateVar, suggest_stateVar, isV 69 | 70 | PluginCommand.register_for_function("miku\\set_state_var", "", set_stateVar, isV) 71 | PluginCommand.register_for_function( 72 | "miku\\suggest_stateVar ", "", suggest_stateVar, isV 73 | ) 74 | 75 | 76 | register_workflow() 77 | register_commands() 78 | -------------------------------------------------------------------------------- /tests/test1.py: -------------------------------------------------------------------------------- 1 | from binaryninja import * 2 | import networkx as nx 3 | import json 4 | 5 | 6 | def create_cfg_graph(il: MediumLevelILFunction | LowLevelILFunction): 7 | """创建基本块级别的控制流图 8 | Args: 9 | mlil (MediumLevelILFunction): 中间语言函数 10 | Returns: 11 | networkx.DiGraph: 生成的控制流图 12 | """ 13 | if isinstance(il, MediumLevelILFunction): 14 | ifInstrInstance = MediumLevelILIf 15 | gotoInstrInstance = MediumLevelILGoto 16 | elif isinstance(il, LowLevelILFunction): 17 | ifInstrInstance = LowLevelILIf 18 | gotoInstrInstance = LowLevelILGoto 19 | else: 20 | raise TypeError("il must be MediumLevelILFunction or LowLevelILFunction") 21 | G = nx.DiGraph() 22 | for block in il.basic_blocks: 23 | G.add_node(block.start) 24 | if isinstance(block[-1], ifInstrInstance): 25 | G.add_edge(block.start, block[-1].true, edge_label="true") 26 | G.add_edge(block.start, block[-1].false, edge_label="false") 27 | elif isinstance(block[-1], gotoInstrInstance): 28 | G.add_edge(block.start, block[-1].dest, edge_label="goto") 29 | else: 30 | for edge in block.outgoing_edges: 31 | G.add_edge(block.start, edge.target.start, edge_label="unknown") 32 | return G 33 | 34 | 35 | def process_func(func): 36 | try: 37 | mlil = func.mlil 38 | if mlil is None: 39 | return 40 | data = {} 41 | data["name"] = func.name 42 | data["mlil_blocks_len"] = len(mlil.basic_blocks) 43 | data["mlil_instructions_len"] = len(list(mlil.instructions)) 44 | G = create_cfg_graph(mlil) 45 | data["cfg_nodes_len"] = len(G.nodes) 46 | data["cfg_edges_len"] = len(G.edges) 47 | if_instr_len = 0 48 | for instr in mlil.instructions: 49 | if isinstance(instr, MediumLevelILIf): 50 | if_instr_len += 1 51 | data["if_instr_len"] = if_instr_len 52 | # 圈复杂度 53 | data["cyclomatic_complexity"] = len(G.edges) - len(G.nodes) + 2 54 | # 分支复杂度 55 | data["branch_complexity"] = len(G.edges) - len(G.nodes) + 1 56 | out_degrees = [G.out_degree(node) for node in G.nodes] 57 | data["avg_out_degree"] = ( 58 | sum(out_degrees) / len(out_degrees) if out_degrees else 0 59 | ) 60 | return json.dumps(data, indent=4) 61 | except Exception as e: 62 | print(f"Error processing function {func.name}: {e}") 63 | 64 | 65 | result = [] 66 | for func in bv.functions: 67 | data = process_func(func) 68 | if data: 69 | result.append(data) 70 | 71 | with open( 72 | r"C:\\Users\\ltlly\AppData\\Roaming\\Binary Ninja\\plugins\\cfg_analysis_deflat.json", 73 | "w", 74 | ) as f: 75 | json.dump(result, f, indent=4) 76 | print("cfg_analysis.json created") 77 | -------------------------------------------------------------------------------- /passes/mid/movStateDefine.py: -------------------------------------------------------------------------------- 1 | from binaryninja import ( 2 | AnalysisContext, 3 | MediumLevelILSetVar, 4 | MediumLevelILBasicBlock, 5 | ) 6 | 7 | from ...utils import StateMachine 8 | from ...utils import log_error 9 | 10 | 11 | def pass_mov_state_define(analysis_context: AnalysisContext): 12 | func = analysis_context.function 13 | mlil = func.mlil 14 | if mlil is None: 15 | log_error(f"Function {func.name} has no MLIL") 16 | return 17 | 18 | updated = False 19 | _, define_table = StateMachine.collect_stateVar_info(func, False) 20 | l_define_table = [] 21 | for k, v in define_table.items(): 22 | l_define_table += v 23 | 24 | # 按block分组收集statevar定义 25 | block_defines = {} 26 | for define in l_define_table: 27 | if not isinstance(define, MediumLevelILSetVar): 28 | continue 29 | define_block = mlil.get_basic_block_at(define.instr_index) 30 | if len(define_block) == 2: 31 | continue 32 | if define_block not in block_defines: 33 | block_defines[define_block] = [] 34 | block_defines[define_block].append(define) 35 | # 处理每个block 36 | for block, defines in block_defines.items(): 37 | block: MediumLevelILBasicBlock 38 | defines: list[MediumLevelILSetVar] 39 | if len(defines) == block.length - 1: 40 | continue 41 | # 保持相对顺序 42 | defines.sort(key=lambda d: d.instr_index) 43 | # 收集所有待移动语句的读写变量 44 | all_vars = set() 45 | for define in defines: 46 | all_vars.update(define.vars_read) 47 | all_vars.update(define.vars_written) 48 | # 检查冲突 49 | can_move = True 50 | check_index = list(range(defines[0].instr_index, block.end - 1)) 51 | check_index = [ 52 | x for x in check_index if x not in [d.instr_index for d in defines] 53 | ] 54 | # log_info(f"check_index::{check_index}") 55 | for i in check_index: 56 | instr = mlil[i] 57 | v = instr.vars_read + instr.vars_written 58 | if any(var in all_vars for var in v): 59 | can_move = False 60 | break 61 | # log_info(f"can_move::{can_move}") 62 | if not can_move: 63 | continue 64 | # 移动语句 65 | defines_copy = [mlil.copy_expr(define) for define in defines] 66 | not_defines_copy = list(range(block.start, block.end - 1)) 67 | # 最后一句不copy 68 | not_defines_copy = [ 69 | x for x in not_defines_copy if x not in [d.instr_index for d in defines] 70 | ] 71 | not_defines_copy = [mlil.copy_expr(mlil[x]) for x in not_defines_copy] 72 | will_copy = not_defines_copy + defines_copy 73 | for i in range(block.start, block.end - 1): 74 | mlil.replace_expr(mlil[i].expr_index, will_copy[i - block.start]) 75 | updated = True 76 | if updated: 77 | mlil.finalize() 78 | mlil.generate_ssa_form() 79 | -------------------------------------------------------------------------------- /passes/low/copyCommonBlockPass.py: -------------------------------------------------------------------------------- 1 | from binaryninja import ( 2 | LowLevelILFunction, 3 | LowLevelILGoto, 4 | LowLevelILIf, 5 | LowLevelILLabel, 6 | LowLevelILInstruction, 7 | LowLevelILBasicBlock, 8 | AnalysisContext, 9 | ILSourceLocation, 10 | ) 11 | 12 | from ...utils import CFGAnalyzer, log_error 13 | 14 | 15 | def fix_pre_bb( 16 | llil: LowLevelILFunction, 17 | pre_last_instr: LowLevelILInstruction, 18 | bb: LowLevelILBasicBlock, 19 | copy_label: LowLevelILLabel, 20 | ): 21 | if isinstance(pre_last_instr, LowLevelILGoto): 22 | llil.replace_expr( 23 | pre_last_instr.expr_index, 24 | llil.goto(copy_label, ILSourceLocation.from_instruction(pre_last_instr)), 25 | ) 26 | elif isinstance(pre_last_instr, LowLevelILIf): 27 | true_target = pre_last_instr.true 28 | false_target = pre_last_instr.false 29 | if true_target == bb.start: 30 | fix_false_label = LowLevelILLabel() 31 | fix_false_label.operand = false_target 32 | llil.replace_expr( 33 | pre_last_instr.expr_index, 34 | llil.if_expr( 35 | llil.copy_expr( 36 | pre_last_instr.condition, 37 | ), 38 | copy_label, 39 | fix_false_label, 40 | ILSourceLocation.from_instruction(pre_last_instr), 41 | ), 42 | ) 43 | elif false_target == bb.start: 44 | fix_true_label = LowLevelILLabel() 45 | fix_true_label.operand = true_target 46 | llil.replace_expr( 47 | pre_last_instr.expr_index, 48 | llil.if_expr( 49 | llil.copy_expr( 50 | pre_last_instr.condition, 51 | ), 52 | fix_true_label, 53 | copy_label, 54 | ILSourceLocation.from_instruction(pre_last_instr), 55 | ), 56 | ) 57 | else: 58 | log_error("ERROR IF") 59 | else: 60 | log_error("ERROR") 61 | 62 | 63 | def pass_copy_common_block(analysis_context: AnalysisContext): 64 | llil = analysis_context.function.llil 65 | if len(llil.basic_blocks) > 100: 66 | return 67 | for _ in range(len(llil.basic_blocks)): 68 | updated = False 69 | g = CFGAnalyzer.create_cfg_graph(llil) 70 | for bb in llil.basic_blocks: 71 | pre_blocks = CFGAnalyzer.LLIL_get_incoming_blocks(llil, bb.start) 72 | if len(pre_blocks) <= 1: 73 | continue 74 | pre_instrs = [prebb[-1] for prebb in pre_blocks] 75 | if not all( 76 | isinstance(instr, LowLevelILGoto) or isinstance(instr, LowLevelILIf) 77 | for instr in pre_instrs 78 | ): 79 | continue 80 | if CFGAnalyzer.is_node_in_loop(g, bb.start): 81 | continue 82 | for j in range(1, len(pre_blocks)): 83 | updated = True 84 | pre_block = pre_blocks[j] 85 | pre_last_instr = llil[pre_block.end - 1] 86 | copy_label = LowLevelILLabel() 87 | llil.mark_label(copy_label) 88 | for instr_index in range(bb.start, bb.end): 89 | llil.append(llil.copy_expr(llil[instr_index])) 90 | fix_pre_bb(llil, pre_last_instr, bb, copy_label) 91 | if updated: 92 | llil.finalize() 93 | llil.generate_ssa_form() 94 | else: 95 | break 96 | llil.finalize() 97 | llil.generate_ssa_form() 98 | -------------------------------------------------------------------------------- /utils/instruction_analyzer.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, List, Dict, Any 2 | from binaryninja import ( 3 | MediumLevelILFunction, 4 | MediumLevelILIf, 5 | MediumLevelILInstruction, 6 | MediumLevelILSetVar, 7 | MediumLevelILOperation, 8 | Variable, 9 | ) 10 | 11 | 12 | def unsigned_to_signed_32bit(n): 13 | """将32位无符号整数转换为有符号整数""" 14 | # 检查是否在无符号32位整数范围内 15 | if n < 0 or n > 0xFFFFFFFF: 16 | raise ValueError("Input is out of range for a 32-bit unsigned integer") 17 | 18 | # 如果大于 0x7FFFFFFF,则减去 0x100000000 19 | if n > 0x7FFFFFFF: 20 | return n - 0x100000000 21 | else: 22 | return n 23 | 24 | 25 | def get_mask(width: int) -> int: 26 | """根据宽度生成掩码 27 | Args: 28 | width (int): 掩码宽度(字节数) 29 | Returns: 30 | int: 对应的位掩码值 31 | """ 32 | match width: 33 | case 1: 34 | return 0xFF 35 | case 2: 36 | return 0xFFFF 37 | case 4: 38 | return 0xFFFFFFFF 39 | case 8: 40 | return 0xFFFFFFFFFFFFFFFF 41 | case _: 42 | return int(f"0x{'ff' * width}", 16) 43 | 44 | 45 | class InstructionAnalyzer: 46 | """指令分析器,负责处理指令分析和状态转换检测""" 47 | 48 | @staticmethod 49 | def find_state_transition_instructions( 50 | local_if_table: List[MediumLevelILIf], 51 | local_define_table: List[MediumLevelILSetVar], 52 | ) -> Generator[Dict[str, Any], None, None]: 53 | """查找状态转换指令 54 | Args: 55 | local_if_table (List[MediumLevelILIf]): 本地if指令表 56 | local_define_table (List[MediumLevelILSetVar]): 本地定义指令表 57 | Yields: 58 | Dict[str, Any]: 匹配的状态转换指令对 59 | """ 60 | for def_instr in local_define_table: 61 | t_def_const = def_instr.src 62 | t_def_const_width = def_instr.size 63 | key_define = t_def_const.value.value & get_mask(t_def_const_width) 64 | 65 | for if_instr in local_if_table: 66 | if_const = if_instr.condition.right 67 | if_const_width = if_instr.condition.left.size 68 | key_if = if_const.value.value & get_mask(if_const_width) 69 | 70 | if key_define == key_if: 71 | yield { 72 | "if_instr": if_instr, 73 | "def_instr": def_instr, 74 | "def_const": def_instr.src, 75 | "if_const": if_instr.condition.right, 76 | } 77 | 78 | @staticmethod 79 | def find_white_instructions( 80 | mlil: MediumLevelILFunction, possible_state_vars: List[Variable] 81 | ): 82 | """查找白名单指令 83 | Args: 84 | mlil (MediumLevelILFunction): 中间语言函数 85 | possible_state_vars (List[Variable]): 可能的状态变量列表 86 | Returns: 87 | List[MediumLevelILInstruction]: 符合条件的白名单指令列表 88 | """ 89 | white_instructions = [] 90 | for instr in mlil.instructions: 91 | if instr.operation not in [ 92 | MediumLevelILOperation.MLIL_GOTO, 93 | MediumLevelILOperation.MLIL_IF, 94 | MediumLevelILOperation.MLIL_SET_VAR, 95 | ]: 96 | continue 97 | vars = instr.vars_written + instr.vars_read 98 | if not all([var in possible_state_vars for var in vars]): 99 | continue 100 | white_instructions.append(instr) 101 | return white_instructions 102 | 103 | @staticmethod 104 | def check_state_if_instr(instr: MediumLevelILInstruction): 105 | """检查指令是否为状态相关的if指令 106 | Args: 107 | instr (MediumLevelILInstruction): 待检查的指令 108 | Returns: 109 | bool: 如果是状态相关的if指令返回True,否则返回False 110 | """ 111 | if not isinstance(instr, MediumLevelILIf): 112 | return False 113 | condition = instr.condition 114 | if (not hasattr(condition, "left")) or (not hasattr(condition, "right")): 115 | return False 116 | if condition.right.operation != MediumLevelILOperation.MLIL_CONST: 117 | return False 118 | return True 119 | 120 | @staticmethod 121 | def emu_if(left_const: int, if_symbol: MediumLevelILOperation, right_const: int): 122 | """模拟if条件判断 123 | Args: 124 | left_const (int): 左操作数 125 | if_symbol (MediumLevelILOperation): 比较操作符 126 | right_const (int): 右操作数 127 | Returns: 128 | bool: 比较结果 129 | """ 130 | 131 | def cmp_e(a, b): 132 | return a == b 133 | 134 | def cmp_ne(a, b): 135 | return a != b 136 | 137 | def cmp_ult(a, b): 138 | return a < b 139 | 140 | def cmp_ule(a, b): 141 | return a <= b 142 | 143 | def cmp_ugt(a, b): 144 | return a > b 145 | 146 | def cmp_uge(a, b): 147 | return a >= b 148 | 149 | def cmp_slt(a, b): 150 | return unsigned_to_signed_32bit(a) < unsigned_to_signed_32bit(b) 151 | 152 | def cmp_sle(a, b): 153 | return unsigned_to_signed_32bit(a) <= unsigned_to_signed_32bit(b) 154 | 155 | def cmp_sgt(a, b): 156 | return unsigned_to_signed_32bit(a) > unsigned_to_signed_32bit(b) 157 | 158 | def cmp_sge(a, b): 159 | return unsigned_to_signed_32bit(a) >= unsigned_to_signed_32bit(b) 160 | 161 | cmp_funcs = { 162 | MediumLevelILOperation.MLIL_CMP_E: cmp_e, 163 | MediumLevelILOperation.MLIL_CMP_NE: cmp_ne, 164 | MediumLevelILOperation.MLIL_CMP_ULT: cmp_ult, 165 | MediumLevelILOperation.MLIL_CMP_ULE: cmp_ule, 166 | MediumLevelILOperation.MLIL_CMP_UGT: cmp_ugt, 167 | MediumLevelILOperation.MLIL_CMP_UGE: cmp_uge, 168 | MediumLevelILOperation.MLIL_CMP_SLT: cmp_slt, 169 | MediumLevelILOperation.MLIL_CMP_SLE: cmp_sle, 170 | MediumLevelILOperation.MLIL_CMP_SGT: cmp_sgt, 171 | MediumLevelILOperation.MLIL_CMP_SGE: cmp_sge, 172 | } 173 | return cmp_funcs[if_symbol](left_const, right_const) 174 | -------------------------------------------------------------------------------- /utils/state_machine.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, Dict, Tuple 2 | from binaryninja import ( 3 | Function, 4 | Variable, 5 | MediumLevelILFunction, 6 | MediumLevelILSetVar, 7 | MediumLevelILVar, 8 | MediumLevelILIf, 9 | MediumLevelILConst, 10 | MediumLevelILInstruction, 11 | ) 12 | 13 | 14 | class StateMachine: 15 | """状态机分析器,负责状态机分析和状态变量检测""" 16 | 17 | @staticmethod 18 | def collect_stateVar_info( 19 | func: Function, ret_int: bool = True 20 | ) -> Tuple[ 21 | Dict[Variable, List[MediumLevelILInstruction] | List[int]], 22 | Dict[Variable, List[MediumLevelILInstruction] | List[int]], 23 | ]: 24 | """收集函数中的状态变量信息 25 | Args: 26 | func (Function): 目标函数 27 | ret_int (bool): 是否返回整数值 28 | Returns: 29 | Tuple[Dict[Variable, List[MediumLevelILInstruction] | List[int]], 30 | Dict[Variable, List[MediumLevelILInstruction] | List[int]]] 31 | : 返回的字典包含变量和对应的指令列表或整数值列表 32 | """ 33 | args = func.parameter_vars 34 | args_name = [var.name for var in args] 35 | mlil = func.medium_level_il 36 | if not mlil: 37 | return {}, {} 38 | 39 | def find_if_const_compare( 40 | mlil: MediumLevelILFunction, 41 | ) -> Dict[Variable, List[MediumLevelILInstruction] | List[int]]: 42 | ifTable: Dict[Variable, List[MediumLevelILInstruction] | List[int]] = {} 43 | for bb in mlil.basic_blocks: 44 | expr = bb[-1] 45 | if not isinstance(expr, MediumLevelILIf): 46 | continue 47 | condition = expr.condition 48 | if isinstance(condition, MediumLevelILVar): 49 | continue 50 | if not hasattr(condition, "right"): 51 | continue 52 | if isinstance(condition.right, MediumLevelILConst): 53 | left = condition.left 54 | for token in left.tokens: 55 | if token in args_name: 56 | continue 57 | if not isinstance(left, MediumLevelILVar): 58 | continue 59 | if left.src not in ifTable: 60 | ifTable[left.src] = [] 61 | if ret_int: 62 | ifTable[left.src].append(condition.right.value.value) 63 | else: 64 | ifTable[left.src].append(expr) 65 | return ifTable 66 | 67 | def find_define( 68 | mlil: MediumLevelILFunction, 69 | ) -> Dict[Variable, List[MediumLevelILInstruction] | List[int]]: 70 | defineTable: Dict[Variable, List[MediumLevelILInstruction] | List[int]] = {} 71 | for expr in mlil.instructions: 72 | if not isinstance(expr, MediumLevelILSetVar): 73 | continue 74 | if not isinstance(expr.src, MediumLevelILConst): 75 | continue 76 | for token in expr.tokens: 77 | if token in args_name: 78 | continue 79 | if expr.dest not in defineTable: 80 | defineTable[expr.dest] = [] 81 | if ret_int: 82 | defineTable[expr.dest].append(expr.src.value.value) 83 | else: 84 | defineTable[expr.dest].append(expr) 85 | return defineTable 86 | 87 | ifTable = find_if_const_compare(mlil) 88 | defineTable = find_define(mlil) 89 | if not ret_int: 90 | for x in ifTable: 91 | ifTable[x] = [ 92 | instr for instr in ifTable[x] if instr.instr_index < len(mlil) 93 | ] 94 | for x in defineTable: 95 | defineTable[x] = [ 96 | instr for instr in defineTable[x] if instr.instr_index < len(mlil) 97 | ] 98 | return ifTable, defineTable 99 | 100 | @staticmethod 101 | def find_state_var(func: Function) -> List[Variable]: 102 | """查找函数中的状态变量 103 | Args: 104 | func (Function): 目标函数 105 | """ 106 | mlil = func.medium_level_il 107 | if not mlil: 108 | return [] 109 | from .state_machine import StateMachine 110 | 111 | # State variable recognition rules 112 | state_var_rules: List[ 113 | Callable[ 114 | [ 115 | Variable, 116 | Dict[Variable, List[MediumLevelILInstruction] | List[int]], 117 | Dict[Variable, List[MediumLevelILInstruction] | List[int]], 118 | ], 119 | bool, 120 | ] 121 | ] = [ 122 | # Rule 1: Variable appears in both ifTable and defineTable with same value count >= 3 123 | lambda var, ifTable, defineTable: ( 124 | var in ifTable 125 | and var in defineTable 126 | and len(defineTable[var]) == len(ifTable[var]) 127 | and len(defineTable[var]) >= 3 128 | ), 129 | # Rule 2: Variable in defineTable with value count >= 3 and average > 0x10000000 130 | lambda var, ifTable, defineTable: ( 131 | var in defineTable 132 | and len(defineTable[var]) >= 3 133 | and sum(defineTable[var]) // len(defineTable[var]) > 0x10000000 134 | ), 135 | # Rule 3: Variable in ifTable with value count >= 3 and average > 0x10000000 136 | lambda var, ifTable, defineTable: ( 137 | var in ifTable 138 | and len(ifTable[var]) >= 3 139 | and sum(ifTable[var]) // len(ifTable[var]) > 0x10000000 140 | ), 141 | lambda var, ifTable, defineTable: ( 142 | var.name.startswith("state-") and "_" in var.name 143 | ), 144 | ] 145 | state_vars: List[Variable] = [] 146 | ifTable, defineTable = StateMachine.collect_stateVar_info(func) 147 | # Check all variables 148 | for mlil_var in set(list(ifTable.keys()) + list(defineTable.keys())): 149 | for rule in state_var_rules: 150 | if rule(mlil_var, ifTable, defineTable): 151 | state_vars.append(mlil_var) 152 | break 153 | return state_vars 154 | -------------------------------------------------------------------------------- /utils/instr_vistor.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import binaryninja 3 | from binaryninja import ( 4 | Variable, 5 | VariableSourceType, 6 | BinaryView, 7 | MediumLevelILIf, 8 | ) 9 | 10 | from z3 import ( 11 | UGT, 12 | ULE, 13 | ULT, 14 | UGE, 15 | BitVec, 16 | BitVecRef, 17 | BitVecVal, 18 | Bool, 19 | BoolVal, 20 | Extract, 21 | ZeroExt, 22 | simplify, 23 | is_true, 24 | is_false, 25 | BitVecNumRef, 26 | BoolRef, 27 | ) 28 | from binaryninja.log import log_error, log_info 29 | from typing import ( 30 | Any, 31 | Union, 32 | ) 33 | 34 | 35 | class BNILVisitor(object): 36 | def __init__(self, **kw): 37 | super(BNILVisitor, self).__init__() 38 | 39 | def visit(self, expression) -> Any: 40 | method_name = "visit_{}".format(expression.operation.name) 41 | if hasattr(self, method_name): 42 | value = getattr(self, method_name)(expression) 43 | else: 44 | log_info(f"{repr(expression.operation)} not implemented") 45 | raise NotImplementedError 46 | return value 47 | 48 | 49 | def make_variable_z3(var: Variable) -> Union[BitVecRef, BoolRef, BitVecNumRef]: 50 | if var.name == "": 51 | if var.source_type == VariableSourceType.RegisterVariableSourceType: 52 | var.name = var.function.arch.get_reg_by_index(var.storage) 53 | else: 54 | var.name = f"var_{abs(var.storage):x}" 55 | if var.type.__str__() == "Bool": 56 | return Bool(var.name) 57 | elif var.type is None: 58 | log_error(f"var type is None: {var} ,make it to BitVec 64") 59 | return BitVec(var.name, 64) 60 | else: 61 | return BitVec(var.name, var.type.width * 8) 62 | 63 | 64 | @dataclass 65 | class IfResult: 66 | condition: BoolRef 67 | is_boolean: bool = False 68 | bool_result: bool = False 69 | target_index: int = -1 70 | true_target_index: int = -1 71 | false_target_index: int = -1 72 | 73 | 74 | class SimpleVisitor(BNILVisitor): 75 | def __init__(self, view: BinaryView, function: binaryninja.Function): 76 | self.view = view 77 | self.func = function 78 | super().__init__() 79 | self.vars: dict[str, Any] = {} 80 | 81 | def visit_MLIL_GOTO(self, expr): 82 | pass 83 | 84 | def visit_MLIL_IF(self, expr: MediumLevelILIf): 85 | # evaluate and simplify condition 86 | result = self.visit(expr.condition) 87 | r2 = simplify(result) 88 | res = IfResult(condition=r2) 89 | res.true_target_index = expr.true 90 | res.false_target_index = expr.false 91 | # check for concrete boolean value 92 | if is_true(r2): 93 | res.bool_result = True 94 | res.is_boolean = True 95 | res.target_index = expr.true 96 | elif is_false(r2): 97 | res.bool_result = False 98 | res.is_boolean = True 99 | res.target_index = expr.false 100 | else: 101 | res.is_boolean = False 102 | return res 103 | 104 | def visit_both_sides(self, expr): 105 | return self.visit(expr.left), self.visit(expr.right) 106 | 107 | def visit_MLIL_SET_VAR(self, expr): 108 | var = make_variable_z3(expr.dest) 109 | value = simplify(self.visit(expr.src)) 110 | size = expr.dest.type.width 111 | if isinstance(value, int): 112 | value = BitVecVal(value, size * 8) 113 | elif isinstance(value, BoolRef): 114 | pass 115 | else: 116 | value = Extract((size * 8) - 1, 0, value) 117 | self.vars[var.__str__()] = { 118 | "value": value, 119 | "size": expr.dest.type.width, # size表示字节数 120 | "var": var, 121 | } 122 | 123 | def visit_MLIL_VAR(self, expr): 124 | if expr.src.name in self.vars: 125 | return self.vars[expr.src.name]["value"] 126 | else: 127 | newVar = make_variable_z3(expr.src) 128 | self.vars[expr.src.name] = { 129 | "value": newVar, 130 | "size": expr.src.type.width, 131 | "var": newVar, 132 | } 133 | return self.vars[expr.src.name]["value"] 134 | 135 | def visit_MLIL_CONST(self, expr): 136 | if expr.size == 0 and expr.constant in (0, 1): 137 | return BoolVal(True) if expr.constant else BoolVal(False) 138 | return BitVecVal(expr.constant, expr.size * 8) 139 | 140 | def visit_MLIL_CMP_E(self, expr): 141 | left = self.visit(expr.left) 142 | right = self.visit(expr.right) 143 | if right.size() < left.size(): 144 | right = ZeroExt(left.size() - right.size(), right) 145 | elif right.size() > left.size(): 146 | left = ZeroExt(right.size() - left.size(), left) 147 | return left == right 148 | 149 | def visit_MLIL_CMP_NE(self, expr): 150 | left = self.visit(expr.left) 151 | right = self.visit(expr.right) 152 | if right.size() < left.size(): 153 | right = ZeroExt(left.size() - right.size(), right) 154 | elif right.size() > left.size(): 155 | left = ZeroExt(right.size() - left.size(), left) 156 | return left != right 157 | 158 | def visit_MLIL_CMP_SLE(self, expr): 159 | left, right = self.visit_both_sides(expr) 160 | if right.size() < left.size(): 161 | right = ZeroExt(left.size() - right.size(), right) 162 | elif right.size() > left.size(): 163 | left = ZeroExt(right.size() - left.size(), left) 164 | return left <= right 165 | 166 | def visit_MLIL_CMP_SLT(self, expr): 167 | left, right = self.visit_both_sides(expr) 168 | if right.size() < left.size(): 169 | right = ZeroExt(left.size() - right.size(), right) 170 | elif right.size() > left.size(): 171 | left = ZeroExt(right.size() - left.size(), left) 172 | return left < right 173 | 174 | def visit_MLIL_CMP_SGT(self, expr): 175 | left, right = self.visit_both_sides(expr) 176 | if right.size() < left.size(): 177 | right = ZeroExt(left.size() - right.size(), right) 178 | elif right.size() > left.size(): 179 | left = ZeroExt(right.size() - left.size(), left) 180 | return left > right 181 | 182 | def visit_MLIL_CMP_SGE(self, expr): 183 | left, right = self.visit_both_sides(expr) 184 | if right.size() < left.size(): 185 | right = ZeroExt(left.size() - right.size(), right) 186 | elif right.size() > left.size(): 187 | left = ZeroExt(right.size() - left.size(), left) 188 | return left >= right 189 | 190 | def visit_MLIL_CMP_UGT(self, expr): 191 | left, right = self.visit_both_sides(expr) 192 | if right.size() < left.size(): 193 | right = ZeroExt(left.size() - right.size(), right) 194 | elif right.size() > left.size(): 195 | left = ZeroExt(right.size() - left.size(), left) 196 | return UGT(left, right) 197 | 198 | def visit_MLIL_CMP_UGE(self, expr): 199 | left, right = self.visit_both_sides(expr) 200 | if right.size() < left.size(): 201 | right = ZeroExt(left.size() - right.size(), right) 202 | elif right.size() > left.size(): 203 | left = ZeroExt(right.size() - left.size(), left) 204 | return UGE(left, right) 205 | 206 | def visit_MLIL_CMP_ULE(self, expr): 207 | left, right = self.visit_both_sides(expr) 208 | if right.size() < left.size(): 209 | right = ZeroExt(left.size() - right.size(), right) 210 | elif right.size() > left.size(): 211 | left = ZeroExt(right.size() - left.size(), left) 212 | return ULE(left, right) 213 | 214 | def visit_MLIL_CMP_ULT(self, expr): 215 | left, right = self.visit_both_sides(expr) 216 | if right.size() < left.size(): 217 | right = ZeroExt(left.size() - right.size(), right) 218 | elif right.size() > left.size(): 219 | left = ZeroExt(right.size() - left.size(), left) 220 | return ULT(left, right) 221 | -------------------------------------------------------------------------------- /utils/cfg_analyzer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import networkx as nx 3 | from binaryninja import ( 4 | MediumLevelILBasicBlock, 5 | MediumLevelILFunction, 6 | MediumLevelILIf, 7 | MediumLevelILGoto, 8 | LowLevelILFunction, 9 | LowLevelILGoto, 10 | LowLevelILIf, 11 | Logger, 12 | ) 13 | 14 | mikuLogger = Logger(0, "MikuCffHelper") 15 | 16 | 17 | def log_error(msg: str): 18 | mikuLogger.log_error(msg) 19 | 20 | 21 | class CFGAnalyzer: 22 | """控制流图分析器,负责控制流图的分析和操作""" 23 | 24 | @staticmethod 25 | def create_cfg_graph(il: MediumLevelILFunction | LowLevelILFunction): 26 | """创建基本块级别的控制流图 27 | Args: 28 | mlil (MediumLevelILFunction): 中间语言函数 29 | Returns: 30 | networkx.DiGraph: 生成的控制流图 31 | """ 32 | if isinstance(il, MediumLevelILFunction): 33 | ifInstrInstance = MediumLevelILIf 34 | gotoInstrInstance = MediumLevelILGoto 35 | elif isinstance(il, LowLevelILFunction): 36 | ifInstrInstance = LowLevelILIf 37 | gotoInstrInstance = LowLevelILGoto 38 | else: 39 | raise TypeError("il must be MediumLevelILFunction or LowLevelILFunction") 40 | G = nx.DiGraph() 41 | for block in il.basic_blocks: 42 | G.add_node(block.start) 43 | if isinstance(block[-1], ifInstrInstance): 44 | G.add_edge(block.start, block[-1].true, edge_label="true") 45 | G.add_edge(block.start, block[-1].false, edge_label="false") 46 | elif isinstance(block[-1], gotoInstrInstance): 47 | G.add_edge(block.start, block[-1].dest, edge_label="goto") 48 | else: 49 | for edge in block.outgoing_edges: 50 | G.add_edge(block.start, edge.target.start, edge_label="unknown") 51 | return G 52 | 53 | @staticmethod 54 | def create_full_cfg_graph(il: MediumLevelILFunction | LowLevelILFunction): 55 | """创建指令级别的完整控制流图 56 | Args: 57 | mlil (MediumLevelILFunction): 中间语言函数 58 | Returns: 59 | networkx.DiGraph: 生成的完整控制流图 60 | """ 61 | if isinstance(il, MediumLevelILFunction): 62 | ifInstrInstance = MediumLevelILIf 63 | gotoInstrInstance = MediumLevelILGoto 64 | elif isinstance(il, LowLevelILFunction): 65 | ifInstrInstance = LowLevelILIf 66 | gotoInstrInstance = LowLevelILGoto 67 | else: 68 | raise TypeError("il must be MediumLevelILFunction or LowLevelILFunction") 69 | G = nx.DiGraph() 70 | for block in il.basic_blocks: 71 | for i in range(block.start, block.end): 72 | G.add_node(i) 73 | for i in range(block.start, block.end - 1): 74 | G.add_edge(i, i + 1) 75 | for block in il.basic_blocks: 76 | lastInstr = block[-1] 77 | if isinstance(lastInstr, ifInstrInstance): 78 | G.add_edge(lastInstr.instr_index, lastInstr.true, edge_label="true") 79 | G.add_edge(lastInstr.instr_index, lastInstr.false, edge_label="false") 80 | elif isinstance(lastInstr, gotoInstrInstance): 81 | G.add_edge(lastInstr.instr_index, lastInstr.dest, edge_label="goto") 82 | else: 83 | for edge in block.outgoing_edges: 84 | G.add_edge( 85 | lastInstr.instr_index, edge.target.start, edge_label="unknown" 86 | ) 87 | return G 88 | 89 | @staticmethod 90 | def is_node_in_loop(graph: nx.DiGraph, node) -> bool: 91 | # 获取所有强连通分量 92 | sccs = list(nx.strongly_connected_components(graph)) 93 | 94 | # 查找包含目标节点的强连通分量 95 | for scc in sccs: 96 | if node in scc: 97 | # 如果分量大小大于1,节点在循环中 98 | if len(scc) > 1: 99 | return True 100 | # 分量大小为1时,检查是否存在自环边 101 | else: 102 | return graph.has_edge(node, node) 103 | 104 | # 理论上不会执行到这里,因为节点必属于某个分量 105 | return False 106 | 107 | @staticmethod 108 | def get_basic_block_at(basic_blocks, index): 109 | """获取指定索引处的基本块 110 | Args: 111 | basic_blocks: 基本块列表 112 | index: 目标索引 113 | Returns: 114 | 包含指定索引的基本块,如果找不到返回None 115 | """ 116 | bbs = sorted(list(basic_blocks), key=lambda bb: bb.start) 117 | low, high = 0, len(bbs) - 1 118 | while low <= high: 119 | mid = (low + high) // 2 120 | if bbs[mid].start <= index < bbs[mid].end: 121 | return bbs[mid] 122 | elif index < bbs[mid].start: 123 | high = mid - 1 124 | else: 125 | low = mid + 1 126 | log_error(f"can't find basic block at {index}") 127 | return None 128 | 129 | @staticmethod 130 | def LLIL_get_incoming_blocks(llil: LowLevelILFunction, bbIndex: int): 131 | """获取基本块的所有前驱块 132 | Args: 133 | llil (LowLevelILFunction): 低级中间语言函数 134 | bbIndex (int): 目标基本块索引 135 | Returns: 136 | List: 所有前驱基本块列表 137 | """ 138 | bbs = [] 139 | for bb in llil.basic_blocks: 140 | lastInstr = llil[bb.end - 1] 141 | if isinstance(lastInstr, LowLevelILGoto): 142 | if lastInstr.dest == bbIndex: 143 | bbs.append(bb) 144 | elif isinstance(lastInstr, LowLevelILIf): 145 | if lastInstr.true == bbIndex: 146 | bbs.append(bb) 147 | elif lastInstr.false == bbIndex: 148 | bbs.append(bb) 149 | bbs.sort(key=lambda bb: bb.start) 150 | return bbs 151 | 152 | @staticmethod 153 | def MLIL_get_incoming_blocks( 154 | mlil: MediumLevelILFunction, bbIndex: int 155 | ) -> List[MediumLevelILBasicBlock]: 156 | """获取目标基本块的所有前驱基本块 157 | Args: 158 | mlil (MediumLevelILFunction): 中级中间语言函数 159 | bbIndex (int): 目标基本块索引 160 | Returns: 161 | List: 所有前驱基本块列表 162 | """ 163 | bbs = [] 164 | for bb in mlil.basic_blocks: 165 | lastInstr = mlil[bb.end - 1] 166 | if isinstance(lastInstr, MediumLevelILGoto): 167 | if lastInstr.dest == bbIndex: 168 | bbs.append(bb) 169 | elif isinstance(lastInstr, MediumLevelILIf): 170 | if lastInstr.true == bbIndex: 171 | bbs.append(bb) 172 | elif lastInstr.false == bbIndex: 173 | bbs.append(bb) 174 | bbs.sort(key=lambda bb: bb.start) 175 | return bbs 176 | 177 | @staticmethod 178 | def find_cfg_groups(block_cfg: nx.Graph) -> List[List[int]]: 179 | """ 180 | 查找CFG中的线性组 181 | 线性组是指由出度为1的节点组成的链 182 | Args: 183 | block_cfg (nx.Graph): 基本块控制流图 184 | Returns: 185 | List[List[int]]: 线性组列表,返回[[block1.start,block2.start],[block3.start,block4.start]] 186 | """ 187 | visited = set() 188 | groups = [] 189 | for node in block_cfg.nodes(): 190 | if node in visited: 191 | continue # 跳过已访问节点 192 | 193 | # 只允许出度为1的节点作为组的起点 194 | if block_cfg.out_degree(node) != 1: 195 | continue 196 | 197 | current_group = [] 198 | current_node = node 199 | current_group.append(current_node) 200 | visited.add(current_node) 201 | 202 | while True: 203 | # 获取当前节点的唯一后继 204 | successors = list(block_cfg.successors(current_node)) 205 | if len(successors) != 1: 206 | break # 出度不为1时终止 207 | next_node = successors[0] 208 | 209 | # 检查后继节点的入度和出度 210 | if block_cfg.in_degree(next_node) != 1 or block_cfg.out_degree( 211 | next_node 212 | ) not in {0, 1}: 213 | break # 不满足条件时终止 214 | 215 | # 检查后继节点是否已访问 216 | if next_node in visited: 217 | break 218 | 219 | # 将后继节点加入组 220 | current_group.append(next_node) 221 | visited.add(next_node) 222 | current_node = next_node 223 | 224 | # 如果出度为0,终止扩展 225 | if block_cfg.out_degree(current_node) == 0: 226 | break 227 | 228 | # 仅保留有效组(长度≥2) 229 | if len(current_group) >= 2: 230 | groups.append(current_group) 231 | 232 | return groups 233 | -------------------------------------------------------------------------------- /passes/mid/deflatHardPass.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | from binaryninja import ( 3 | MediumLevelILFunction, 4 | MediumLevelILIf, 5 | MediumLevelILGoto, 6 | Function, 7 | AnalysisContext, 8 | MediumLevelILSetVar, 9 | MediumLevelILInstruction, 10 | MediumLevelILLabel, 11 | Variable, 12 | MediumLevelILConst, 13 | MediumLevelILVar, 14 | ILSourceLocation, 15 | MediumLevelILVarSsa, 16 | ) 17 | import z3 18 | from dataclasses import dataclass 19 | 20 | from ...utils.instr_vistor import IfResult 21 | 22 | from ...utils import ( 23 | log_info, 24 | log_error, 25 | CFGAnalyzer, 26 | StateMachine, 27 | InstructionAnalyzer, 28 | SimpleVisitor, 29 | ) 30 | 31 | 32 | # todo 407d8c 33 | 34 | 35 | # todo 407d8c 36 | @dataclass 37 | class PatchInfo: 38 | """ 39 | 用于存储补丁信息的类 40 | """ 41 | 42 | instr: MediumLevelILInstruction 43 | target: MediumLevelILInstruction 44 | type: str # 补丁类型 goto 或 if 45 | branch: bool # 修补if的true分支还是false分支 46 | 47 | 48 | @dataclass 49 | class EmuHardResult: 50 | """ 51 | emu_hard 函数的返回值类型 52 | """ 53 | 54 | success: bool # 是否存在有效路径 55 | unused_instrs: list[MediumLevelILInstruction] # 无关指令列表 56 | patchInfo: PatchInfo # 补丁信息 57 | visitor: SimpleVisitor # 访问器 58 | 59 | 60 | class SimpleEmu: 61 | def __init__(self, mlil: MediumLevelILFunction, state_vars: list[Variable]): 62 | self.func = mlil.source_function 63 | self.mlil = mlil 64 | self.visitor = SimpleVisitor(mlil.view, self.func) 65 | self.solver = z3.Solver() 66 | self.state_vars = state_vars 67 | 68 | def emu_instrs(self, instrs: list[MediumLevelILInstruction]): 69 | """ 70 | 处理状态机的指令,判断是否存在有效路径 71 | 72 | Returns: 73 | (是否存在有效路径, 目标指令索引, 遍历到的指令列表) 74 | Args: 75 | instrs: 指令列表 76 | state_vars: 状态变量列表 77 | """ 78 | func = instrs[0].function.source_function 79 | mlil = func.mlil 80 | v = SimpleVisitor(func.view, func) 81 | walked_instrs = [] 82 | s = z3.Solver() 83 | conditions = [] 84 | false_ret = EmuHardResult(False, [], PatchInfo(None, None, "", False), v) 85 | true_ret = false_ret 86 | last_if = [None, False] 87 | for i, instr in enumerate(instrs): 88 | log_info(f"visit {instr} {v.vars}") 89 | try: 90 | match instr: 91 | case MediumLevelILGoto(): 92 | continue 93 | case MediumLevelILSetVar(): 94 | left = instr.dest 95 | if left in self.state_vars: 96 | v.visit(instr) 97 | walked_instrs.append(instr) 98 | case MediumLevelILIf(): 99 | res: IfResult = v.visit(instr) 100 | if res.is_boolean: 101 | nextip = res.target_index 102 | if ( 103 | i + 1 < len(instrs) 104 | and nextip != instrs[i + 1].instr_index 105 | ): 106 | return false_ret 107 | elif i == len(instrs) - 1: 108 | true_ret = EmuHardResult( 109 | True, 110 | walked_instrs, 111 | PatchInfo(instrs[0], mlil[nextip], "goto", False), 112 | v, 113 | ) 114 | else: 115 | vars = instr.vars_written + instr.vars_read 116 | if not all([var in self.state_vars for var in vars]): 117 | return false_ret 118 | if instrs[i + 1].instr_index == res.true_target_index: 119 | conditions.append(res.condition) 120 | last_if = [instr, True] 121 | else: 122 | conditions.append(z3.Not(res.condition)) 123 | last_if = [instr, False] 124 | case _: 125 | vars_read = instr.vars_read 126 | vars_written = instr.vars_written 127 | if any(var in self.state_vars for var in vars_read) or any( 128 | var in self.state_vars for var in vars_written 129 | ): 130 | return false_ret 131 | walked_instrs.append(instr) 132 | except Exception as e: 133 | if not isinstance(e, NotImplementedError): 134 | log_text = "" 135 | log_text += f"Exception in emu_hard: {e}\n" 136 | for instr in instrs: 137 | log_text += f"{instr.instr_index}::{instr}\n" 138 | return false_ret 139 | if len(conditions) == 0: 140 | return true_ret 141 | s.add(*conditions) 142 | log_error(f"期望成立 {s.check()} {s.model()}") 143 | last_if_index = instrs.index(last_if[0]) 144 | new_instrs = instrs[last_if_index:] 145 | new_walked_instrs = [] 146 | for instr in walked_instrs: 147 | if instr in new_instrs: 148 | new_walked_instrs.append(instr) 149 | # return false_ret 150 | return EmuHardResult( 151 | True, 152 | new_walked_instrs, 153 | PatchInfo(last_if[0], true_ret.patchInfo.target, "if", last_if[1]), 154 | v, 155 | ) 156 | 157 | 158 | def quick_check( 159 | instrs: list[MediumLevelILInstruction], 160 | const_val: int, 161 | ): 162 | define_var: Variable = instrs[0].dest 163 | for instr in instrs[1:]: 164 | if not isinstance(instr, MediumLevelILSetVar): 165 | continue 166 | if ( 167 | instr.dest == define_var 168 | and isinstance(instr.src, MediumLevelILConst) 169 | and instr.src.constant != const_val 170 | ): 171 | return False 172 | return True 173 | 174 | 175 | def check_valid_if_instr(instrs: list[MediumLevelILInstruction]): 176 | instr_indexs = [instr.instr_index for instr in instrs] 177 | for instr in instrs: 178 | if not isinstance(instr, MediumLevelILIf): 179 | continue 180 | mlil_vars = [] 181 | 182 | def visitor(operand_name, inst, instr_type_name, parent): 183 | match inst: 184 | case MediumLevelILVarSsa(): 185 | mlil_vars.append(inst) 186 | 187 | list(instr.ssa_form.visit(visitor)) 188 | print(mlil_vars) 189 | 190 | 191 | def find_valid_paths( 192 | G, source, target, mlil, state_vars, max_paths=10 193 | ) -> list[EmuHardResult]: 194 | """ 195 | 自定义路径搜索算法,在搜索过程中应用剪枝策略 196 | 197 | Args: 198 | G: 控制流图 199 | source: 起始节点 200 | target: 目标节点 201 | mlil: MediumLevelILFunction 202 | state_vars: 状态变量列表 203 | max_paths: 最大返回路径数 204 | 205 | Returns: 206 | 有效路径列表 207 | 407994 208 | """ 209 | # 使用广度优先搜索,同时记录历史路径 210 | queue = [(source, [source])] 211 | valid_paths = [] 212 | visited_prefixes = set() # 记录已经访问过的无效路径前缀 213 | define_instr: MediumLevelILSetVar = mlil[source] 214 | define_il_var: Variable = define_instr.dest 215 | define_const_val = define_instr.src.constant 216 | if_instr: MediumLevelILIf = mlil[target] 217 | if_il_var: Variable = if_instr.condition.left.var 218 | should_quick_check = False 219 | if define_il_var == if_il_var: 220 | should_quick_check = True 221 | log_error( 222 | f"finding::::: {source} {target} {define_instr} {define_il_var} {define_const_val}" 223 | ) 224 | while queue and len(valid_paths) < max_paths: 225 | node, path = queue.pop(0) 226 | if node == target: 227 | instrs = [mlil[i] for i in path] 228 | if should_quick_check and not quick_check(instrs, define_const_val): 229 | continue 230 | emu = SimpleEmu(mlil, state_vars) 231 | ret = emu.emu_instrs(instrs) 232 | import pprint 233 | 234 | text = pprint.pformat( 235 | f"{ret.success}::{path}::{instrs}\n{ret.unused_instrs} \n{'=' * 20}" 236 | ) 237 | log_info(text) 238 | if ret.success: 239 | valid_paths.append(ret) 240 | continue 241 | neighbors = list(G.neighbors(node)) 242 | path_prefix = tuple(path) 243 | if path_prefix in visited_prefixes: 244 | continue 245 | valid_extension = False 246 | for neighbor in neighbors: 247 | if neighbor in path: 248 | continue 249 | extended_path = path + [neighbor] 250 | instrs = [mlil[i] for i in extended_path] 251 | if should_quick_check and not quick_check(instrs, define_const_val): 252 | continue 253 | queue.append((neighbor, extended_path)) 254 | valid_extension = True 255 | if not valid_extension: 256 | visited_prefixes.add(path_prefix) 257 | return valid_paths 258 | 259 | 260 | def pass_deflate_hard(analysis_context: AnalysisContext): 261 | function: Function = analysis_context.function 262 | mlil: MediumLevelILFunction | None = function.mlil 263 | if mlil is None: 264 | log_error(f"Function {function.name} has no MLIL") 265 | return 266 | worked_define = set() 267 | worked_if = set() 268 | max_iterations = len(mlil.basic_blocks) * 33 269 | for _ in range(max_iterations): 270 | updated = False 271 | G_full = CFGAnalyzer.create_full_cfg_graph(mlil) 272 | state_vars = StateMachine.find_state_var(function) 273 | if_table, define_table = StateMachine.collect_stateVar_info(function, False) 274 | l_if_table = [ 275 | instr for v in if_table.values() for instr in v if instr not in worked_if 276 | ] 277 | l_define_table = [ 278 | instr 279 | for v in define_table.values() 280 | for instr in v 281 | if instr not in worked_define 282 | ] 283 | trans_dict = InstructionAnalyzer.find_state_transition_instructions( 284 | l_if_table, l_define_table 285 | ) 286 | for trans in trans_dict: 287 | def_instr: MediumLevelILSetVar = trans["def_instr"] 288 | if_instr: MediumLevelILIf = trans["if_instr"] 289 | try: 290 | valid_paths = find_valid_paths( 291 | G_full, 292 | def_instr.instr_index, 293 | if_instr.instr_index, 294 | mlil, 295 | state_vars, 296 | ) 297 | assert len(valid_paths) <= 1, "too many paths" 298 | for path_data in valid_paths: 299 | worked_if.add(trans["if_instr"]) 300 | worked_define.add(trans["def_instr"]) 301 | if path_data.patchInfo.type == "goto": 302 | target_label = MediumLevelILLabel() 303 | target_label.operand = path_data.patchInfo.target.instr_index 304 | will_patch_instr = path_data.patchInfo.instr 305 | new_block_label = MediumLevelILLabel() 306 | mlil.mark_label(new_block_label) 307 | for instr in path_data.unused_instrs: 308 | mlil.append(mlil.copy_expr(instr)) 309 | mlil.append(mlil.goto(target_label)) 310 | mlil.replace_expr( 311 | will_patch_instr.expr_index, 312 | mlil.goto( 313 | new_block_label, 314 | ILSourceLocation.from_instruction(will_patch_instr), 315 | ), 316 | ) 317 | updated = True 318 | break 319 | elif path_data.patchInfo.type == "if": 320 | target_label = MediumLevelILLabel() 321 | target_label.operand = path_data.patchInfo.target.instr_index 322 | will_patch_instr: MediumLevelILIf = path_data.patchInfo.instr 323 | new_block_label = MediumLevelILLabel() 324 | mlil.mark_label(new_block_label) 325 | for instr in path_data.unused_instrs: 326 | mlil.append(mlil.copy_expr(instr)) 327 | mlil.append(mlil.goto(target_label)) 328 | if path_data.patchInfo.branch: 329 | false_label = MediumLevelILLabel() 330 | false_label.operand = will_patch_instr.false 331 | new_if_instr = mlil.if_expr( 332 | mlil.copy_expr(will_patch_instr.condition), 333 | new_block_label, 334 | false_label, 335 | ILSourceLocation.from_instruction(will_patch_instr), 336 | ) 337 | else: 338 | true_label = MediumLevelILLabel() 339 | true_label.operand = will_patch_instr.true 340 | new_if_instr = mlil.if_expr( 341 | mlil.copy_expr(will_patch_instr.condition), 342 | true_label, 343 | new_block_label, 344 | ILSourceLocation.from_instruction(will_patch_instr), 345 | ) 346 | mlil.replace_expr(def_instr.expr_index, mlil.nop()) 347 | mlil.replace_expr( 348 | will_patch_instr.expr_index, 349 | new_if_instr, 350 | ) 351 | updated = True 352 | break 353 | 354 | except nx.NetworkXNoPath: 355 | continue 356 | except Exception as e: 357 | log_error(f"Error in find_valid_paths: {e}") 358 | continue 359 | if updated: 360 | mlil.finalize() 361 | mlil.generate_ssa_form() 362 | break 363 | if not updated: 364 | break 365 | # mlil.finalize() 366 | # mlil.generate_ssa_form() 367 | mlil.finalize() 368 | mlil.generate_ssa_form() 369 | -------------------------------------------------------------------------------- /passes/mid/clearPass.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from binaryninja import ( 3 | MediumLevelILCmpE, 4 | MediumLevelILFunction, 5 | MediumLevelILGoto, 6 | AnalysisContext, 7 | MediumLevelILLabel, 8 | MediumLevelILIf, 9 | MediumLevelILInstruction, 10 | MediumLevelILConst, 11 | MediumLevelILSetVar, 12 | MediumLevelILVar, 13 | MediumLevelILOperation, 14 | ILSourceLocation, 15 | ) 16 | 17 | from ...utils.state_machine import StateMachine 18 | from ...utils import CFGAnalyzer 19 | from ...utils import log_error 20 | 21 | 22 | def pass_clear_const_if(analysis_context: AnalysisContext): 23 | """ 24 | 清除常量条件if语句的优化pass 25 | 26 | 该pass用于优化MLIL将if(true) 与 if(false)语句替换为直接跳转。 27 | 通过消除不必要的条件判断来简化控制流图。 28 | 参数: 29 | analysis_context: 包含MLIL中间表示的分析上下文 30 | 返回值: 31 | 无 32 | """ 33 | mlil = analysis_context.mlil 34 | for _ in range(len(mlil.basic_blocks)): 35 | updated = False 36 | for bb in mlil.basic_blocks: 37 | if_instr = bb[-1] 38 | if not isinstance(if_instr, MediumLevelILIf): 39 | continue 40 | condition = if_instr.condition 41 | if not isinstance(condition, MediumLevelILConst): 42 | continue 43 | const_val = condition.constant 44 | if const_val not in (0, 1): 45 | continue 46 | label = MediumLevelILLabel() 47 | label.operand = if_instr.true if const_val else if_instr.false 48 | goto_instr = mlil.goto(label, ILSourceLocation.from_instruction(if_instr)) 49 | mlil.replace_expr(if_instr.expr_index, goto_instr) 50 | updated = True 51 | if updated: 52 | mlil.finalize() 53 | mlil.generate_ssa_form() 54 | else: 55 | break 56 | mlil.finalize() 57 | mlil.generate_ssa_form() 58 | 59 | 60 | def pass_clear_SSA_const_if(analysis_context: AnalysisContext): 61 | """ 62 | 清除可以通过SSA推断出是const的if 63 | 64 | >>> current_il_instruction 65 | 66 | >>> current_il_instruction.condition.left 67 | 68 | >>> current_mlil.get_ssa_var_definition(_) 69 | 70 | 参数: 71 | analysis_context: 包含MLIL中间表示的分析上下文 72 | 返回值: 73 | 无 74 | """ 75 | mlil = analysis_context.mlil 76 | function = analysis_context.function 77 | for _ in range(len(mlil.basic_blocks)): 78 | updated = False 79 | state_vars = StateMachine.find_state_var(function) 80 | for bb in mlil.basic_blocks: 81 | if_instr = bb[-1] 82 | if not isinstance(if_instr, MediumLevelILIf): 83 | continue 84 | condition = if_instr.condition 85 | if not isinstance(condition, MediumLevelILCmpE): 86 | continue 87 | if not hasattr(condition, "left"): 88 | continue 89 | left_il_var = condition.left 90 | if not isinstance(left_il_var, MediumLevelILVar): 91 | continue 92 | if left_il_var.var not in state_vars: 93 | continue 94 | right_const_il = condition.right 95 | if not isinstance(right_const_il, MediumLevelILConst): 96 | continue 97 | 98 | if_val = right_const_il.constant & 0xFFFFFFFF 99 | 100 | state_il_var = left_il_var 101 | state_il_var_ssa = state_il_var.ssa_form 102 | 103 | def_instr = mlil.get_ssa_var_definition(state_il_var_ssa) 104 | if not isinstance(def_instr, MediumLevelILSetVar): 105 | continue 106 | if not isinstance(def_instr.src, MediumLevelILConst): 107 | continue 108 | if def_instr.dest.type.width != 4: 109 | continue 110 | define_val = def_instr.src.constant & 0xFFFFFFFF 111 | const_val = if_val == define_val 112 | label = MediumLevelILLabel() 113 | label.operand = if_instr.true if const_val else if_instr.false 114 | goto_instr = mlil.goto(label, ILSourceLocation.from_instruction(if_instr)) 115 | mlil.replace_expr(if_instr.expr_index, goto_instr) 116 | updated = True 117 | if updated: 118 | mlil.finalize() 119 | mlil.generate_ssa_form() 120 | else: 121 | break 122 | mlil.finalize() 123 | mlil.generate_ssa_form() 124 | 125 | 126 | def pass_clear_goto(analysis_context: AnalysisContext): 127 | """ 128 | 优化连续goto结构的Pass 129 | 130 | 该Pass用于优化MLIL中的连续goto结构: 131 | 若goto的目标仍为goto,则将第一个goto的目标直接指向最终的非goto目标, 132 | 递归处理直至目标不再是goto,确保所有连续goto被优化为单一跳转。 133 | 134 | 参数: 135 | analysis_context: 包含MLIL中间表示的分析上下文 136 | 返回值: 137 | 无 138 | """ 139 | mlil = analysis_context.mlil 140 | 141 | def optimize_goto(goto_instr): 142 | """ 143 | 递归优化goto指令 144 | """ 145 | target_instr = mlil[goto_instr.dest] 146 | if not isinstance(target_instr, MediumLevelILGoto): 147 | return target_instr 148 | final_target = optimize_goto(target_instr) 149 | return final_target 150 | 151 | for _ in range(len(mlil.basic_blocks)): 152 | updated = False 153 | # 遍历所有基本块 154 | for bb in mlil.basic_blocks: 155 | goto_instr = bb[-1] 156 | if not isinstance(goto_instr, MediumLevelILGoto): 157 | continue 158 | final_target_instr = optimize_goto(goto_instr) 159 | if final_target_instr.instr_index == goto_instr.dest: 160 | continue 161 | # 创建新的goto指令指向最终目标 162 | label = MediumLevelILLabel() 163 | label.operand = final_target_instr.instr_index 164 | new_goto = mlil.goto(label, ILSourceLocation.from_instruction(goto_instr)) 165 | updated = True 166 | mlil.replace_expr(goto_instr.expr_index, new_goto) 167 | if updated: 168 | # 更新MLIL 169 | mlil.finalize() 170 | mlil.generate_ssa_form() 171 | else: 172 | break 173 | mlil.finalize() 174 | mlil.generate_ssa_form() 175 | 176 | 177 | def pass_clear_if(analysis_context: AnalysisContext): 178 | """ 179 | 优化if语句中指向goto的分支 180 | 181 | 当if语句的then或else分支指向goto时,直接修改为指向goto的目标 182 | """ 183 | mlil = analysis_context.mlil 184 | 185 | def get_final_target(instr) -> MediumLevelILInstruction: 186 | """ 187 | 获取指令的最终目标,处理连续goto 188 | """ 189 | if isinstance(instr, MediumLevelILGoto): 190 | return get_final_target(mlil[instr.dest]) 191 | return instr 192 | 193 | for _ in range(len(mlil.basic_blocks)): 194 | updated = False 195 | for bb in mlil.basic_blocks: 196 | if_instr = bb[-1] 197 | if not isinstance(if_instr, MediumLevelILIf): 198 | continue 199 | true_target = get_final_target(mlil[if_instr.true]) 200 | false_target = get_final_target(mlil[if_instr.false]) 201 | if ( 202 | true_target.instr_index != if_instr.true 203 | or false_target.instr_index != if_instr.false 204 | ): 205 | true_label = MediumLevelILLabel() 206 | false_label = MediumLevelILLabel() 207 | true_label.operand = true_target.instr_index 208 | false_label.operand = false_target.instr_index 209 | new_if = mlil.if_expr( 210 | mlil.copy_expr(if_instr.condition), 211 | true_label, 212 | false_label, 213 | ILSourceLocation.from_instruction(if_instr), 214 | ) 215 | mlil.replace_expr(if_instr.expr_index, new_if) 216 | updated = True 217 | break 218 | if updated: 219 | mlil.finalize() 220 | mlil.generate_ssa_form() 221 | else: 222 | break 223 | 224 | mlil.finalize() 225 | mlil.generate_ssa_form() 226 | 227 | 228 | def merge_block( 229 | mlil: MediumLevelILFunction, 230 | instrs: List[MediumLevelILInstruction], 231 | pre_instrs: List[MediumLevelILInstruction], 232 | ) -> bool: 233 | # Validate pre_instrs types 234 | if pre_instrs and any( 235 | not isinstance(instr, (MediumLevelILGoto, MediumLevelILIf)) 236 | for instr in pre_instrs 237 | ): 238 | log_error(f"Invalid predecessor instructions: {pre_instrs}") 239 | return False 240 | 241 | if not instrs: 242 | return False 243 | 244 | # Create label for merged block start 245 | merged_label = MediumLevelILLabel() 246 | mlil.mark_label(merged_label) 247 | merged_operand = merged_label.operand 248 | 249 | # Copy instructions to new block 250 | for instr in instrs: 251 | mlil.append(mlil.copy_expr(instr)) 252 | 253 | # Redirect control flow from predecessors 254 | for pre_instr in pre_instrs: 255 | if isinstance(pre_instr, MediumLevelILGoto): 256 | # Replace Goto with jump to merged block 257 | new_goto = mlil.goto( 258 | merged_label, ILSourceLocation.from_instruction(pre_instr) 259 | ) 260 | mlil.replace_expr(pre_instr.expr_index, new_goto) 261 | 262 | elif isinstance(pre_instr, MediumLevelILIf): 263 | target_index = instrs[0].instr_index 264 | true_idx, false_idx = pre_instr.true, pre_instr.false 265 | 266 | # Validate branch targets 267 | if target_index not in (true_idx, false_idx): 268 | raise ValueError("If statement branches don't target merged block") 269 | 270 | # Create labels with appropriate targets 271 | def create_label(original_idx: int) -> MediumLevelILLabel: 272 | label = MediumLevelILLabel() 273 | label.operand = ( 274 | merged_operand if original_idx == target_index else original_idx 275 | ) 276 | return label 277 | 278 | true_label = create_label(true_idx) 279 | false_label = create_label(false_idx) 280 | 281 | # Create replacement If expression 282 | new_cond = mlil.copy_expr(pre_instr.condition) 283 | new_if = mlil.if_expr( 284 | new_cond, 285 | true_label, 286 | false_label, 287 | ILSourceLocation.from_instruction(pre_instr), 288 | ) 289 | mlil.replace_expr(pre_instr.expr_index, new_if) 290 | 291 | return True 292 | 293 | 294 | def pass_merge_block(analysis_context: AnalysisContext): 295 | "合并连续几个dirct block 为一个block" 296 | mlil = analysis_context.mlil 297 | if mlil is None: 298 | return 299 | for _ in range(len(mlil.basic_blocks)): 300 | block_cfg = CFGAnalyzer.create_cfg_graph(mlil) 301 | groups = CFGAnalyzer.find_cfg_groups(block_cfg) 302 | updated = False 303 | for group in groups: 304 | # 因为函数必须从0开始, 如果要求合并的话 需要特殊处理0部分,因此不处理 305 | if group[0] == 0: 306 | group.pop(0) 307 | blocks = [mlil.get_basic_block_at(idx) for idx in group] 308 | block0 = blocks[0] 309 | pre_blocks = CFGAnalyzer.MLIL_get_incoming_blocks(mlil, block0.start) 310 | pre_instrs = [x[-1] for x in pre_blocks] 311 | instrs = [] 312 | for x in blocks[:-1]: 313 | instrs += list(x)[:-1] 314 | instrs += list(blocks[-1]) 315 | if merge_block(mlil, instrs, pre_instrs): 316 | updated = True 317 | if updated: 318 | mlil.finalize() 319 | mlil.generate_ssa_form() 320 | else: 321 | break 322 | mlil.finalize() 323 | mlil.generate_ssa_form() 324 | 325 | 326 | def pass_swap_if(analysis_context: AnalysisContext): 327 | func = analysis_context.function 328 | mlil = func.mlil 329 | if mlil is None: 330 | return 331 | reverse_operations = { 332 | MediumLevelILOperation.MLIL_CMP_E: MediumLevelILOperation.MLIL_CMP_E, 333 | MediumLevelILOperation.MLIL_CMP_NE: MediumLevelILOperation.MLIL_CMP_NE, 334 | MediumLevelILOperation.MLIL_CMP_ULT: MediumLevelILOperation.MLIL_CMP_UGT, 335 | MediumLevelILOperation.MLIL_CMP_UGT: MediumLevelILOperation.MLIL_CMP_ULT, 336 | MediumLevelILOperation.MLIL_CMP_ULE: MediumLevelILOperation.MLIL_CMP_UGE, 337 | MediumLevelILOperation.MLIL_CMP_UGE: MediumLevelILOperation.MLIL_CMP_ULE, 338 | MediumLevelILOperation.MLIL_CMP_SLT: MediumLevelILOperation.MLIL_CMP_SGT, 339 | MediumLevelILOperation.MLIL_CMP_SGT: MediumLevelILOperation.MLIL_CMP_SLT, 340 | MediumLevelILOperation.MLIL_CMP_SLE: MediumLevelILOperation.MLIL_CMP_SGE, 341 | MediumLevelILOperation.MLIL_CMP_SGE: MediumLevelILOperation.MLIL_CMP_SLE, 342 | } 343 | if_instrs: List[MediumLevelILIf] = [] 344 | for block in mlil.basic_blocks: 345 | instr = block[-1] 346 | if isinstance(instr, MediumLevelILIf) and not isinstance( 347 | instr.condition, MediumLevelILVar 348 | ): 349 | if hasattr(instr.condition, "left") and hasattr(instr.condition, "right"): 350 | if isinstance(instr.condition.left, MediumLevelILConst) and isinstance( 351 | instr.condition.right, MediumLevelILVar 352 | ): 353 | if_instrs.append(instr) 354 | updated = False 355 | for if_instr in if_instrs: 356 | condition = if_instr.condition 357 | new_condition = mlil.expr( 358 | reverse_operations[condition.operation], 359 | mlil.copy_expr(condition.right), 360 | mlil.copy_expr(condition.left), 361 | 0, 362 | 0, 363 | 0, 364 | if_instr.size, 365 | ILSourceLocation.from_instruction(if_instr), 366 | ) 367 | true_label = MediumLevelILLabel() 368 | true_label.operand = if_instr.true 369 | 370 | false_label = MediumLevelILLabel() 371 | false_label.operand = if_instr.false 372 | 373 | new_if_instr = mlil.if_expr( 374 | new_condition, 375 | true_label, 376 | false_label, 377 | ILSourceLocation.from_instruction(if_instr), 378 | ) 379 | mlil.replace_expr(if_instr.expr_index, new_if_instr) 380 | updated = True 381 | if updated: 382 | mlil.finalize() 383 | mlil.generate_ssa_form() 384 | 385 | 386 | def handle_pre_last_instr(mlil: MediumLevelILFunction, pre_last_instr, bb, copy_label): 387 | if isinstance(pre_last_instr, MediumLevelILGoto): 388 | mlil.replace_expr( 389 | pre_last_instr.expr_index, 390 | mlil.goto(copy_label, ILSourceLocation.from_instruction(pre_last_instr)), 391 | ) 392 | elif isinstance(pre_last_instr, MediumLevelILIf): 393 | true_target = pre_last_instr.true 394 | false_target = pre_last_instr.false 395 | if true_target == bb.start: 396 | fix_false_label = MediumLevelILLabel() 397 | fix_false_label.operand = false_target 398 | mlil.replace_expr( 399 | pre_last_instr.expr_index, 400 | mlil.if_expr( 401 | mlil.copy_expr( 402 | pre_last_instr.condition, 403 | ), 404 | copy_label, 405 | fix_false_label, 406 | ILSourceLocation.from_instruction(pre_last_instr), 407 | ), 408 | ) 409 | elif false_target == bb.start: 410 | fix_true_label = MediumLevelILLabel() 411 | fix_true_label.operand = true_target 412 | mlil.replace_expr( 413 | pre_last_instr.expr_index, 414 | mlil.if_expr( 415 | mlil.copy_expr( 416 | pre_last_instr.condition, 417 | ), 418 | fix_true_label, 419 | copy_label, 420 | ILSourceLocation.from_instruction(pre_last_instr), 421 | ), 422 | ) 423 | else: 424 | log_error("ERROR IF") 425 | else: 426 | log_error("ERROR") 427 | 428 | 429 | def pass_copy_common_block_mid(analysis_context: AnalysisContext): 430 | mlil = analysis_context.function.mlil 431 | for _ in range(len(mlil.basic_blocks)): 432 | updated = False 433 | g = CFGAnalyzer.create_cfg_graph(mlil) 434 | for bb in mlil.basic_blocks: 435 | if bb.length > 5: 436 | continue 437 | pre_blocks = CFGAnalyzer.MLIL_get_incoming_blocks(mlil, bb.start) 438 | pre_instrs = [prebb[-1] for prebb in pre_blocks] 439 | if not all( 440 | isinstance(instr, MediumLevelILGoto) 441 | or isinstance(instr, MediumLevelILIf) 442 | for instr in pre_instrs 443 | ): 444 | continue 445 | if len(pre_blocks) <= 1: 446 | continue 447 | if CFGAnalyzer.is_node_in_loop(g, bb.start): 448 | continue 449 | for j in range(1, len(pre_blocks)): 450 | updated = True 451 | pre_block = pre_blocks[j] 452 | pre_last_instr = mlil[pre_block.end - 1] 453 | copy_label = MediumLevelILLabel() 454 | mlil.mark_label(copy_label) 455 | for copy_instr_index in range(bb.start, bb.end): 456 | mlil.append(mlil.copy_expr(mlil[copy_instr_index])) 457 | handle_pre_last_instr(mlil, pre_last_instr, bb, copy_label) 458 | break 459 | if updated: 460 | mlil.finalize() 461 | mlil.generate_ssa_form() 462 | else: 463 | break 464 | mlil.finalize() 465 | mlil.generate_ssa_form() 466 | 467 | 468 | def pass_clear(analysis_context: AnalysisContext): 469 | pass_clear_const_if(analysis_context) 470 | pass_clear_goto(analysis_context) 471 | pass_clear_if(analysis_context) 472 | pass_swap_if(analysis_context) 473 | pass_merge_block(analysis_context) 474 | pass_copy_common_block_mid(analysis_context) 475 | pass_clear_SSA_const_if(analysis_context) 476 | --------------------------------------------------------------------------------