├── src └── d810 │ ├── __init__.py │ ├── ui │ └── __init__.py │ ├── expr │ ├── __init__.py │ └── utils.py │ ├── hexrays │ └── __init__.py │ ├── optimizers │ ├── __init__.py │ └── microcode │ │ ├── flow │ │ ├── __init__.py │ │ ├── jumps │ │ │ ├── __init__.py │ │ │ └── tricks.py │ │ ├── flattening │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ ├── unflattener_switch_case.py │ │ │ ├── unflattener_indirect.py │ │ │ ├── unflattener_badwhile_loop.py │ │ │ └── unflattener_fake_jump.py │ │ ├── constant_prop │ │ │ └── __init__.py │ │ └── handler.py │ │ ├── instructions │ │ ├── __init__.py │ │ ├── z3 │ │ │ ├── __init__.py │ │ │ ├── handler.py │ │ │ ├── cst.py │ │ │ └── predicates.py │ │ ├── analysis │ │ │ ├── __init__.py │ │ │ ├── handler.py │ │ │ ├── utils.py │ │ │ └── pattern_guess.py │ │ ├── chain │ │ │ ├── __init__.py │ │ │ └── handler.py │ │ ├── early │ │ │ ├── __init__.py │ │ │ ├── handler.py │ │ │ └── mem_read.py │ │ ├── peephole │ │ │ ├── __init__.py │ │ │ ├── handler.py │ │ │ └── constant_call.py │ │ └── pattern_matching │ │ │ ├── __init__.py │ │ │ ├── experimental.py │ │ │ ├── rewrite_mov.py │ │ │ ├── weird.py │ │ │ ├── rewrite_neg.py │ │ │ └── rewrite_mul.py │ │ ├── __init__.py │ │ └── handler.py │ ├── _vendor │ ├── ida_reloader.pyi │ ├── typing_extensions.pyi │ ├── clang │ │ └── __init__.py │ ├── ida_reloader │ │ ├── __init__.py │ │ └── LICENSE │ ├── __init__.py │ └── vendor.txt │ ├── conf │ └── options.json │ ├── errors.py │ ├── _compat.py │ ├── singleton.py │ ├── core │ ├── singleton.py │ ├── ctree_snapshot.py │ ├── merkle.py │ ├── cymode.py │ ├── typing.py │ ├── patching.py │ └── platform.py │ └── project_manager.py ├── tests ├── unit │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── test_merkle.py │ │ ├── test_singleton.py │ │ ├── test_logging.py │ │ └── test_bits.py │ ├── optimizers │ │ └── __init__.py │ ├── conftest.py │ ├── test_singleton.py │ ├── tutils.py │ ├── test_loggers.py │ └── test_conf.py ├── system │ ├── __init__.py │ ├── expr │ │ ├── __init__.py │ │ └── test_ast_proxy.py │ ├── optimizers │ │ ├── __init__.py │ │ └── microcode │ │ │ ├── __init__.py │ │ │ └── instructions │ │ │ └── __init__.py │ ├── stutils.py │ └── test_optimization_rule.py ├── __init__.py └── conftest.py ├── AUTHORS.md ├── .gitattributes ├── resources └── d810ng-logo.png ├── samples ├── bins │ ├── fake_jumps.dylib │ ├── libobfuscated.dll │ ├── libobfuscated.pdb │ ├── libobfuscated.dylib │ ├── ollvm_obfuscated.dll │ ├── zygiskd-flattening │ ├── libobfuscated_darwin_arm64.dll │ ├── libobfuscated_darwin_arm64.dylib │ ├── libobfuscated_darwin_x86_64.dll │ └── libobfuscated_darwin_x86_64.dylib ├── Dockerfile.windows ├── include │ └── export.h ├── libobfuscated.def ├── src │ ├── c │ │ ├── stubs.c │ │ ├── while_switch_flattened.c │ │ ├── manually_obfuscated.c │ │ ├── unwrap_loops.c │ │ ├── abc_xor_dispatch.c │ │ └── tigress_obfuscated.c │ └── asm │ │ └── tiny_x64.asm └── BUILD.md ├── sgconfig.yml ├── docs └── source │ └── images │ ├── test_xor_after.png │ ├── test_xor_before.png │ ├── test_chained_add_after.png │ ├── test_runner_example-01.png │ ├── test_xor_before_white.png │ ├── gui_plugin_configuration.png │ ├── test_mba_guessing_after.png │ ├── test_mba_guessing_before.png │ ├── test_opaque_predicate_after.png │ ├── test_cst_simplification_after.png │ ├── test_cst_simplification_before.png │ ├── test_opaque_predicate_before.png │ └── test_runner_example-ctx-menu.png ├── ida-plugin.json ├── .isort.cfg ├── .gitignore ├── rules └── no-concrete-isinstance.yml ├── .github └── workflows │ └── python.yml ├── pyproject.toml └── scripts ├── ununicode.py └── converter.py /src/d810/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/ui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/expr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/hexrays/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/system/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/system/expr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/system/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/system/optimizers/microcode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/jumps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/flattening/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/z3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/system/optimizers/microcode/instructions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/_vendor/ida_reloader.pyi: -------------------------------------------------------------------------------- 1 | from ida_reloader import * -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/constant_prop/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/chain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/early/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/peephole/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/d810/_vendor/typing_extensions.pyi: -------------------------------------------------------------------------------- 1 | from typing_extensions import * -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/pattern_matching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Authors list 2 | 3 | - Boris Batteux 4 | - w00tzenheimer 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | 2 | # Use bd merge for beads JSONL files 3 | .beads/issues.jsonl merge=beads 4 | -------------------------------------------------------------------------------- /resources/d810ng-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/resources/d810ng-logo.png -------------------------------------------------------------------------------- /samples/bins/fake_jumps.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/fake_jumps.dylib -------------------------------------------------------------------------------- /samples/bins/libobfuscated.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated.dll -------------------------------------------------------------------------------- /samples/bins/libobfuscated.pdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated.pdb -------------------------------------------------------------------------------- /samples/bins/libobfuscated.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated.dylib -------------------------------------------------------------------------------- /samples/bins/ollvm_obfuscated.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/ollvm_obfuscated.dll -------------------------------------------------------------------------------- /samples/bins/zygiskd-flattening: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/zygiskd-flattening -------------------------------------------------------------------------------- /sgconfig.yml: -------------------------------------------------------------------------------- 1 | # ast-grep configuration for d810-ng 2 | # Run: sg scan --rule rules/ 3 | 4 | ruleDirs: 5 | - rules 6 | -------------------------------------------------------------------------------- /docs/source/images/test_xor_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_xor_after.png -------------------------------------------------------------------------------- /docs/source/images/test_xor_before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_xor_before.png -------------------------------------------------------------------------------- /samples/bins/libobfuscated_darwin_arm64.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated_darwin_arm64.dll -------------------------------------------------------------------------------- /docs/source/images/test_chained_add_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_chained_add_after.png -------------------------------------------------------------------------------- /docs/source/images/test_runner_example-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_runner_example-01.png -------------------------------------------------------------------------------- /docs/source/images/test_xor_before_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_xor_before_white.png -------------------------------------------------------------------------------- /samples/bins/libobfuscated_darwin_arm64.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated_darwin_arm64.dylib -------------------------------------------------------------------------------- /samples/bins/libobfuscated_darwin_x86_64.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated_darwin_x86_64.dll -------------------------------------------------------------------------------- /docs/source/images/gui_plugin_configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/gui_plugin_configuration.png -------------------------------------------------------------------------------- /docs/source/images/test_mba_guessing_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_mba_guessing_after.png -------------------------------------------------------------------------------- /docs/source/images/test_mba_guessing_before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_mba_guessing_before.png -------------------------------------------------------------------------------- /samples/bins/libobfuscated_darwin_x86_64.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/samples/bins/libobfuscated_darwin_x86_64.dylib -------------------------------------------------------------------------------- /docs/source/images/test_opaque_predicate_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_opaque_predicate_after.png -------------------------------------------------------------------------------- /docs/source/images/test_cst_simplification_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_cst_simplification_after.png -------------------------------------------------------------------------------- /docs/source/images/test_cst_simplification_before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_cst_simplification_before.png -------------------------------------------------------------------------------- /docs/source/images/test_opaque_predicate_before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_opaque_predicate_before.png -------------------------------------------------------------------------------- /docs/source/images/test_runner_example-ctx-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w00tzenheimer/d810-ng/HEAD/docs/source/images/test_runner_example-ctx-menu.png -------------------------------------------------------------------------------- /samples/Dockerfile.windows: -------------------------------------------------------------------------------- 1 | FROM debian:stable-slim 2 | RUN apt-get update && apt-get install -y clang lld mingw-w64 make && rm -rf /var/lib/apt/lists/* 3 | WORKDIR /work 4 | CMD ["make", "TARGET_OS=windows", "MINGW_SYSROOT=/usr"] 5 | -------------------------------------------------------------------------------- /samples/include/export.h: -------------------------------------------------------------------------------- 1 | #ifndef EXPORT_H 2 | #define EXPORT_H 3 | 4 | #ifdef _WIN32 5 | #define EXPORT __declspec(dllexport) 6 | #else 7 | #define EXPORT __attribute__((visibility("default"))) 8 | #endif 9 | 10 | #endif /* EXPORT_H */ 11 | -------------------------------------------------------------------------------- /tests/unit/conftest.py: -------------------------------------------------------------------------------- 1 | """Pytest configuration for unit tests. 2 | 3 | Unit tests verify rule correctness using Z3 and do not require IDA Pro. 4 | """ 5 | 6 | # Unit tests are self-contained and don't require additional fixtures. 7 | # Add unit-specific fixtures here if needed in the future. 8 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/__init__.py: -------------------------------------------------------------------------------- 1 | # early/ = pattern-based multi-instruction rewrites expressed with AST patterns 2 | # peephole/ = single-instruction canonicalisation & micro-optimisations 3 | # analysis/, z3/, … = non-mutating helpers or heavyweight reasoning passes 4 | # flow/ = control-flow-oriented transformations (flattening, opaque jumps, …) 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import sys 3 | 4 | try: 5 | import d810 6 | except ImportError: 7 | # Ensure 'd810' is importable by adding the sibling 'src' directory to sys.path 8 | current_dir = pathlib.Path(__file__).resolve().parent 9 | src_path = current_dir.parent / "src" 10 | if str(src_path) not in sys.path: 11 | sys.path.insert(0, str(src_path)) 12 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/chain/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from d810.optimizers.microcode.instructions.handler import ( 4 | InstructionOptimizationRule, 5 | InstructionOptimizer, 6 | ) 7 | 8 | 9 | class ChainSimplificationRule(InstructionOptimizationRule): 10 | 11 | @abc.abstractmethod 12 | def check_and_replace(self, blk, ins): 13 | """Return a replacement instruction if the rule matches, otherwise None.""" 14 | 15 | 16 | class ChainOptimizer(InstructionOptimizer): 17 | RULE_CLASSES = [ChainSimplificationRule] 18 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/peephole/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from d810.optimizers.microcode.instructions.handler import ( 4 | InstructionOptimizationRule, 5 | InstructionOptimizer, 6 | ) 7 | 8 | 9 | class PeepholeSimplificationRule(InstructionOptimizationRule): 10 | 11 | @abc.abstractmethod 12 | def check_and_replace(self, blk, ins): 13 | """Return a replacement instruction if the rule matches, otherwise None.""" 14 | 15 | 16 | class PeepholeOptimizer(InstructionOptimizer): 17 | RULE_CLASSES = [PeepholeSimplificationRule] 18 | -------------------------------------------------------------------------------- /src/d810/conf/options.json: -------------------------------------------------------------------------------- 1 | { 2 | "erase_logs_on_reload": true, 3 | "generate_z3_code": false, 4 | "dump_intermediate_microcode": false, 5 | "log_dir": null, 6 | "configurations": [ 7 | "default_instruction_only.json", 8 | "default_unflattening_ollvm.json", 9 | "default_unflattening_switch_case.json", 10 | "default_unflattening_approov.json", 11 | "example_anel.json", 12 | "example_libobfuscated.json", 13 | "eidolon.json", 14 | "hodur_deobfuscation.json" 15 | ], 16 | "last_project_index": 0 17 | } 18 | -------------------------------------------------------------------------------- /samples/libobfuscated.def: -------------------------------------------------------------------------------- 1 | LIBRARY libobfuscated 2 | EXPORTS 3 | constant_folding_test1 4 | constant_folding_test2 5 | outlined_helper_1 6 | outlined_helper_2 7 | AntiDebug_ExceptionFilter 8 | test_chained_add 9 | test_cst_simplification 10 | test_opaque_predicate 11 | test_xor 12 | test_or 13 | test_and 14 | test_neg 15 | test_mba_guessing 16 | test_function_ollvm_fla_bcf_sub 17 | tigress_minmaxarray 18 | unwrap_loops 19 | unwrap_loops_2 20 | unwrap_loops_3 21 | while_switch_flattened 22 | NtCurrentTeb 23 | _hodur_func 24 | -------------------------------------------------------------------------------- /ida-plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "IDAMetadataDescriptorVersion": 1, 3 | "plugin": { 4 | "name": "d810-ng", 5 | "entryPoint": "src/D810.py", 6 | "categories": [ 7 | "decompilation", 8 | "debugging-and-tracing" 9 | ], 10 | "logoPath": "resources/d810ng-logo.png", 11 | "idaVersions": ">=9.0", 12 | "description": "D810-ng (Next Generation) is an updated, tested, refactored, and optimized IDA Pro plugin that serves as a framework and deobfuscation harness used to deobfuscate code at decompilation time by modifying IDA Pro microcode.", 13 | "version": "0.1.0" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/z3/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from d810.expr.ast import AstNode 4 | from d810.optimizers.microcode.instructions.handler import ( 5 | GenericPatternRule, 6 | InstructionOptimizer, 7 | ) 8 | 9 | 10 | class Z3Rule(GenericPatternRule): 11 | 12 | @property 13 | @abc.abstractmethod 14 | def PATTERN(self) -> AstNode: 15 | """Return the pattern to match.""" 16 | 17 | @property 18 | @abc.abstractmethod 19 | def REPLACEMENT_PATTERN(self) -> AstNode: 20 | """Return the replacement pattern.""" 21 | 22 | 23 | class Z3Optimizer(InstructionOptimizer): 24 | RULE_CLASSES = [Z3Rule] 25 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/early/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from d810.expr.ast import AstNode 4 | from d810.optimizers.microcode.instructions.handler import ( 5 | GenericPatternRule, 6 | InstructionOptimizer, 7 | ) 8 | 9 | 10 | class EarlyRule(GenericPatternRule): 11 | @property 12 | @abc.abstractmethod 13 | def PATTERN(self) -> AstNode: 14 | """Return the pattern to match.""" 15 | 16 | @property 17 | @abc.abstractmethod 18 | def REPLACEMENT_PATTERN(self) -> AstNode: 19 | """Return the replacement pattern.""" 20 | 21 | 22 | class EarlyOptimizer(InstructionOptimizer): 23 | RULE_CLASSES = [EarlyRule] 24 | -------------------------------------------------------------------------------- /tests/unit/core/test_merkle.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the Merkle tree utilities.""" 2 | 3 | from d810.core.merkle import MerkleTree 4 | 5 | 6 | def test_build_root(): 7 | leaves = ["a", "b", "c", "d"] 8 | tree = MerkleTree(leaves) 9 | # Root hash should be non-empty and deterministic 10 | assert isinstance(tree.root, str) 11 | assert tree.leaves == leaves 12 | # Building the same tree again should yield the same root 13 | tree2 = MerkleTree(leaves) 14 | assert tree.root == tree2.root 15 | 16 | 17 | def test_diff(): 18 | tree1 = MerkleTree(["h1", "h2", "h3", "h4"]) 19 | tree2 = MerkleTree(["h1", "hX", "h3", "h4"]) 20 | diff = tree1.diff(tree2) 21 | assert diff == [1] 22 | -------------------------------------------------------------------------------- /src/d810/_vendor/clang/__init__.py: -------------------------------------------------------------------------------- 1 | # ===- __init__.py - Clang Python Bindings --------------------*- python -*--===# 2 | # 3 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 | # See https://llvm.org/LICENSE.txt for license information. 5 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | # 7 | # ===------------------------------------------------------------------------===# 8 | 9 | r""" 10 | Clang Library Bindings 11 | ====================== 12 | 13 | This package provides access to the Clang compiler and libraries. 14 | 15 | The available modules are: 16 | 17 | cindex 18 | 19 | Bindings for the Clang indexing library. 20 | """ 21 | 22 | __all__ = ["cindex"] 23 | -------------------------------------------------------------------------------- /tests/system/stutils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | 4 | import idaapi 5 | 6 | from d810.manager import D810State 7 | 8 | 9 | def pseudocode_to_string(pseudo_code: idaapi.strvec_t) -> str: 10 | converted_obj: list[str] = [ 11 | idaapi.tag_remove(line_obj.line) for line_obj in pseudo_code 12 | ] 13 | 14 | return os.linesep.join(converted_obj) 15 | 16 | 17 | @contextlib.contextmanager 18 | def d810_state(): 19 | state = D810State() # singleton 20 | if not (was_loaded := state.is_loaded()): 21 | state.load(gui=False) 22 | if not (was_started := state.manager.started): 23 | state.start_d810() 24 | yield state 25 | if not was_started: 26 | state.stop_d810() 27 | if not was_loaded: 28 | state.unload(gui=False) 29 | -------------------------------------------------------------------------------- /samples/src/c/stubs.c: -------------------------------------------------------------------------------- 1 | #include "polyfill.h" 2 | 3 | /* Stub definitions for undefined external symbols */ 4 | int get_external_value(void) { return 0; } 5 | void external_side_effect(int x) { (void)x; } 6 | int external_transform(int x) { return x; } 7 | void printf2(const char *fmt, ...) { (void)fmt; } 8 | int lolclose(unsigned __int64 hObject) { (void)hObject; return 0; } 9 | void unk_1802CCC58(int x) { (void)x; } 10 | void sub_1800D3BF0(int a1, int a2, int a3, int a4, __int64 a5) { (void)a1; (void)a2; (void)a3; (void)a4; (void)a5; } 11 | void sub_180221640(unsigned __int64 a1, int a2, int a3, unsigned __int64 a4, int a5, int a6) { (void)a1; (void)a2; (void)a3; (void)a4; (void)a5; (void)a6; } 12 | 13 | /* Stubs for approov_flattened.c */ 14 | void sub_D28B(void) { } 15 | void sub_216C8(void) { } 16 | void sub_258F0(void) { } 17 | void sub_26A5ae(void) { } 18 | 19 | -------------------------------------------------------------------------------- /src/d810/_vendor/ida_reloader/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | IDA Reloader - Hot-reload infrastructure for IDA plugins. 3 | 4 | This package provides hot-reload functionality with dependency graph analysis 5 | and cycle detection for IDA Pro plugins. 6 | """ 7 | 8 | from . import ida_reloader as _ida_reloader 9 | 10 | __version__ = _ida_reloader.__version__ 11 | DependencyGraph = _ida_reloader.DependencyGraph 12 | Reloader = _ida_reloader.Reloader 13 | Scanner = _ida_reloader.Scanner 14 | reload_package = _ida_reloader.reload_package 15 | Plugin = _ida_reloader.Plugin 16 | LateInitPlugin = _ida_reloader.LateInitPlugin 17 | ReloadablePluginBase = _ida_reloader.ReloadablePluginBase 18 | 19 | __all__ = [ 20 | "__version__", 21 | "DependencyGraph", 22 | "Reloader", 23 | "Scanner", 24 | "reload_package", 25 | "Plugin", 26 | "LateInitPlugin", 27 | "ReloadablePluginBase", 28 | ] 29 | -------------------------------------------------------------------------------- /src/d810/errors.py: -------------------------------------------------------------------------------- 1 | class D810Exception(Exception): 2 | pass 3 | 4 | 5 | class AstException(D810Exception): 6 | pass 7 | 8 | 9 | class AstEvaluationException(AstException): 10 | pass 11 | 12 | 13 | class D810Z3Exception(D810Exception): 14 | pass 15 | 16 | 17 | class ControlFlowException(D810Exception): 18 | pass 19 | 20 | 21 | class EmulationException(D810Exception): 22 | pass 23 | 24 | 25 | class EmulationIndirectJumpException(EmulationException): 26 | def __init__(self, message, dest_ea, dest_serial_list): 27 | super().__init__(message) 28 | self.dest_ea = dest_ea 29 | self.dest_serial_list = dest_serial_list 30 | 31 | 32 | class UnresolvedMopException(EmulationException): 33 | pass 34 | 35 | 36 | class WritableMemoryReadException(EmulationException): 37 | pass 38 | 39 | 40 | class UnsupportedInstructionException(EmulationException): 41 | pass 42 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile=black 3 | line_length=88 4 | multi_line_output=3 5 | include_trailing_comma=True 6 | force_grid_wrap=0 7 | use_parentheses=True 8 | ensure_newline_before_comments=True 9 | known_idapro=idapro 10 | known_third_party=clang 11 | known_idalib=idaapi,ida_allins,ida_auto,ida_bitrange,ida_bytes,ida_dbg,ida_dirtree,ida_diskio,ida_entry,ida_expr,ida_fixup,ida_fpro,ida_frame,ida_funcs,ida_gdl,ida_graph,ida_hexrays,ida_ida,ida_idaapi,ida_idc,ida_idd,ida_idp,ida_ieee,ida_kernwin,ida_libfuncs,ida_lines,ida_loader,ida_merge,ida_mergemod,ida_moves,ida_nalt,ida_name,ida_netnode,ida_offset,ida_pro,ida_problems,ida_range,ida_regfinder,ida_registry,ida_search,ida_segment,ida_segregs,ida_srclang,ida_strlist,ida_tryblks,ida_typeinf,ida_ua,ida_undo,ida_xref,idautils,idc 12 | known_first_party=capa,unicorn,keystone,angr,capstone,mutilz 13 | sections=FUTURE,STDLIB,THIRDPARTY,IDAPRO,IDALIB,FIRSTPARTY,LOCALFOLDER 14 | no_lines_before=IDALIB -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__/ 2 | **/.idea/ 3 | **/.vscode/ 4 | **/venv/ 5 | **~ 6 | **.pyc 7 | **.log 8 | 9 | <<<<<<< Updated upstream 10 | samples/src/*.d 11 | samples/src/*.o 12 | samples/bins/* 13 | !samples/bins/**/*.dll 14 | !samples/bins/**/*.so 15 | ======= 16 | # AI tools 17 | .cursorindexignore 18 | .cursorignore 19 | .cursorrules 20 | .cursor/ 21 | 22 | # AI CLI Crap 23 | GEMINI.md 24 | CLAUDE.md 25 | CODEX.md 26 | ANTHROPIC.md 27 | 28 | 29 | # C 30 | build/ 31 | c*?_debug/ 32 | 33 | # ionide 34 | .fake 35 | 36 | # Project Specific 37 | samples/src/**/*.d 38 | samples/src/**/*.o 39 | samples/bins/* 40 | !samples/bins/**/*.dll 41 | !samples/bins/**/*.so 42 | !samples/bins/**/*.dylib 43 | 44 | !src/include/**/*.h 45 | !src/include/**/*.hpp 46 | !src/include/**/*.hxx 47 | !src/include/**/*.hpp 48 | 49 | src/d810/**/*.cpp 50 | src/d810/**/*.so 51 | src/d810/**/*.dll 52 | src/d810/**/*.html 53 | src/d810/**/*.so.dSYM 54 | 55 | .env 56 | _indexable 57 | >>>>>>> Stashed changes 58 | -------------------------------------------------------------------------------- /src/d810/_vendor/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | d810._vendor is for vendoring dependencies of d810 to prevent dependency 3 | conflicts with IDA Pro's Python environment. 4 | 5 | Vendored dependencies are isolated in this namespace to avoid conflicts with: 6 | - IDA Pro's bundled packages 7 | - User-installed packages in IDA's Python environment 8 | - System packages 9 | 10 | This follows the same pattern as pip's vendoring: 11 | https://github.com/pypa/pip/tree/main/src/pip/_vendor 12 | 13 | Usage: 14 | # Instead of: 15 | from miasm.arch.x86.ira import ir_a_x86_32 16 | 17 | # Use: 18 | from d810._vendor.miasm.arch.x86.ira import ir_a_x86_32 19 | 20 | Vendored packages are checked into git as full source directories. 21 | See vendor.txt for the list of vendored dependencies. 22 | """ 23 | 24 | # NOTE: Unlike pip, we don't need debundling support since d810 is not 25 | # packaged by downstream redistributors. This keeps the implementation simple. 26 | 27 | __all__ = [] # No exports needed - vendored packages are imported directly 28 | -------------------------------------------------------------------------------- /rules/no-concrete-isinstance.yml: -------------------------------------------------------------------------------- 1 | # Detect isinstance() with concrete classes that may break on hot reload 2 | # Use @runtime_checkable Protocol instead for structural typing 3 | # 4 | # BAD: isinstance(expr, SymbolicExpression) 5 | # GOOD: isinstance(expr, SymbolicExpressionProtocol) 6 | # 7 | # See: REFACTORING.md "Key Lessons Learned" #8 8 | # See: d810ng-0cw for the bug this caused 9 | 10 | id: no-concrete-isinstance 11 | language: python 12 | severity: warning 13 | message: | 14 | isinstance() with concrete class may break after hot reload. 15 | Use a @runtime_checkable Protocol instead for structural typing. 16 | 17 | rule: 18 | pattern: isinstance($OBJ, $CLASS) 19 | 20 | note: | 21 | After hot reload, class identity changes between module load cycles. 22 | Use @runtime_checkable Protocol for structural typing instead. 23 | 24 | Allowlist (ignore these warnings): 25 | - *Protocol suffix (correct pattern) 26 | - Builtins: int, str, float, bool, list, dict, tuple, set, bytes 27 | - Abstract bases: Exception, BaseException, ABC 28 | -------------------------------------------------------------------------------- /samples/src/asm/tiny_x64.asm: -------------------------------------------------------------------------------- 1 | ; ---------------------------------------------------------------------------------------- 2 | ; 3 | ; To assemble: 4 | ; 5 | ; nasm -fmacho64 tiny_x64.asm; clang -arch x86_64 tiny_x64.o -o ../../bins/tiny_x64 && rm -f tiny_x64.o 6 | ; 7 | ; To run: 8 | ; 9 | ; ../../bins/tiny_x64 10 | ; 11 | ; On macOS, this will emit a warning ld: warning: no platform load command ... which can be 12 | ; ignored since it is the correct output for the macOS platform. 13 | ; ---------------------------------------------------------------------------------------- 14 | 15 | global _main 16 | extern _puts 17 | 18 | section .text 19 | _main: push rbx ; Call stack must be aligned 20 | lea rdi, [rel message] ; First argument is address of message 21 | call _puts ; puts(message) 22 | pop rbx ; Fix up stack before returning 23 | ret ; invoke operating system to exit 24 | 25 | section .data 26 | message: db "Hello, World", 10 ; note the newline at the end -------------------------------------------------------------------------------- /src/d810/_vendor/ida_reloader/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Mahmoud Rusty Abdelkader 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/d810/_compat.py: -------------------------------------------------------------------------------- 1 | # typing backports 2 | from __future__ import annotations 3 | 4 | import sys 5 | 6 | # Multiple python version compatible import for typing.override 7 | if sys.version_info >= (3, 12): 8 | from typing import override # noqa: F401 9 | else: 10 | from typing_extensions import override # noqa: F401 11 | 12 | # Multiple python version compatible import for Self, StrEnum, 13 | if sys.version_info >= (3, 11): 14 | from typing import Self # noqa: F401 15 | else: 16 | from typing_extensions import Self # noqa: F401 17 | 18 | # Multiple python version compatible import for typing.NotRequired 19 | if sys.version_info >= (3, 11): 20 | from typing import NotRequired # noqa: F401 21 | else: 22 | from typing_extensions import NotRequired # noqa: F401 23 | 24 | 25 | # Multiple python version compatible import for typing.LiteralString 26 | if sys.version_info >= (3, 11): 27 | from typing import LiteralString # noqa: F401 28 | else: 29 | from typing_extensions import LiteralString # noqa: F401 30 | 31 | 32 | # Multiple python version compatible import for typing.TypeAliasType 33 | if sys.version_info >= (3, 12): 34 | from typing import TypeAliasType # noqa: F401 35 | else: 36 | from typing_extensions import TypeAliasType # noqa: F401 37 | -------------------------------------------------------------------------------- /tests/unit/core/test_singleton.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import unittest 3 | 4 | from d810.core import SingletonMeta, singleton 5 | 6 | 7 | class TestSingleton(unittest.TestCase): 8 | 9 | def test_singleton_decorator(self): 10 | @singleton 11 | class MyClass: 12 | def __init__(self, value: int) -> None: 13 | self.value = value 14 | 15 | a = MyClass(10) 16 | b = MyClass(20) 17 | self.assertIs(a, b) 18 | self.assertEqual(a.value, 10) 19 | self.assertEqual(b.value, 10) 20 | 21 | def test_singleton_meta(self): 22 | class DirectSingleton(metaclass=SingletonMeta): 23 | def __init__(self, x: int) -> None: 24 | self.x = x 25 | 26 | d1: DirectSingleton = DirectSingleton(1) 27 | d2: DirectSingleton = DirectSingleton(2) 28 | self.assertIs(d1, d2) 29 | self.assertEqual(d1.x, 1) 30 | self.assertEqual(d2.x, 1) 31 | 32 | def test_singleton_decorator_on_dataclass(self): 33 | @singleton 34 | @dataclasses.dataclass 35 | class DataSingleton: 36 | a: int 37 | b: int 38 | 39 | ds1 = DataSingleton(5, 6) 40 | ds2 = DataSingleton(7, 8) 41 | self.assertIs(ds1, ds2) 42 | self.assertEqual(ds1.a, 5) 43 | self.assertEqual(ds1.b, 6) 44 | self.assertEqual(ds2.a, 5) 45 | self.assertEqual(ds2.b, 6) 46 | -------------------------------------------------------------------------------- /tests/unit/test_singleton.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import unittest 3 | 4 | from d810.singleton import SingletonMeta, singleton 5 | 6 | 7 | class TestSingleton(unittest.TestCase): 8 | 9 | def test_singleton_decorator(self): 10 | @singleton 11 | class MyClass: 12 | def __init__(self, value: int) -> None: 13 | self.value = value 14 | 15 | a = MyClass(10) 16 | b = MyClass(20) 17 | self.assertIs(a, b) 18 | self.assertEqual(a.value, 10) 19 | self.assertEqual(b.value, 10) 20 | 21 | def test_singleton_meta(self): 22 | class DirectSingleton(metaclass=SingletonMeta): 23 | def __init__(self, x: int) -> None: 24 | self.x = x 25 | 26 | d1: DirectSingleton = DirectSingleton(1) 27 | d2: DirectSingleton = DirectSingleton(2) 28 | self.assertIs(d1, d2) 29 | self.assertEqual(d1.x, 1) 30 | self.assertEqual(d2.x, 1) 31 | 32 | def test_singleton_decorator_on_dataclass(self): 33 | @singleton 34 | @dataclasses.dataclass 35 | class DataSingleton: 36 | a: int 37 | b: int 38 | 39 | ds1 = DataSingleton(5, 6) 40 | ds2 = DataSingleton(7, 8) 41 | self.assertIs(ds1, ds2) 42 | self.assertEqual(ds1.a, 5) 43 | self.assertEqual(ds1.b, 6) 44 | self.assertEqual(ds2.a, 5) 45 | self.assertEqual(ds2.b, 6) 46 | -------------------------------------------------------------------------------- /src/d810/singleton.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import typing 3 | 4 | T = typing.TypeVar("T") 5 | 6 | 7 | class SingletonMeta(type): 8 | """ 9 | Thread-safe implementation of Singleton metaclass. 10 | Can also be used as a decorator. 11 | """ 12 | 13 | _instances: dict[type, object] = {} 14 | _locks: dict[type, threading.Lock] = {} 15 | 16 | def __call__(cls: type[T], *args: typing.Any, **kwargs: typing.Any) -> T: 17 | if cls not in SingletonMeta._instances: 18 | # use class-level _lock if defined, else fallback to internal lock 19 | lock: threading.Lock = getattr( 20 | cls, "_lock", SingletonMeta._locks.setdefault(cls, threading.Lock()) 21 | ) 22 | with lock: 23 | if cls not in SingletonMeta._instances: 24 | instance = type.__call__(cls, *args, **kwargs) 25 | SingletonMeta._instances[cls] = instance 26 | return typing.cast(T, SingletonMeta._instances[cls]) 27 | 28 | 29 | def singleton(cls: typing.Type[T]) -> typing.Type[T]: 30 | """ 31 | Decorator to apply SingletonMeta behavior to a class. 32 | """ 33 | 34 | class SingletonWrapper(cls, metaclass=SingletonMeta): 35 | pass 36 | 37 | SingletonWrapper.__name__ = cls.__name__ 38 | SingletonWrapper.__doc__ = cls.__doc__ 39 | SingletonWrapper.__module__ = cls.__module__ 40 | return typing.cast(typing.Type[T], SingletonWrapper) 41 | -------------------------------------------------------------------------------- /src/d810/core/singleton.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import typing 3 | 4 | T = typing.TypeVar("T") 5 | 6 | 7 | class SingletonMeta(type): 8 | """ 9 | Thread-safe implementation of Singleton metaclass. 10 | Can also be used as a decorator. 11 | """ 12 | 13 | _instances: dict[type, object] = {} 14 | _locks: dict[type, threading.Lock] = {} 15 | 16 | def __call__(cls: type[T], *args: typing.Any, **kwargs: typing.Any) -> T: 17 | if cls not in SingletonMeta._instances: 18 | # use class-level _lock if defined, else fallback to internal lock 19 | lock: threading.Lock = getattr( 20 | cls, "_lock", SingletonMeta._locks.setdefault(cls, threading.Lock()) 21 | ) 22 | with lock: 23 | if cls not in SingletonMeta._instances: 24 | instance = type.__call__(cls, *args, **kwargs) 25 | SingletonMeta._instances[cls] = instance 26 | return typing.cast(T, SingletonMeta._instances[cls]) 27 | 28 | 29 | def singleton(cls: typing.Type[T]) -> typing.Type[T]: 30 | """ 31 | Decorator to apply SingletonMeta behavior to a class. 32 | """ 33 | 34 | class SingletonWrapper(cls, metaclass=SingletonMeta): 35 | pass 36 | 37 | SingletonWrapper.__name__ = cls.__name__ 38 | SingletonWrapper.__doc__ = cls.__doc__ 39 | SingletonWrapper.__module__ = cls.__module__ 40 | return typing.cast(typing.Type[T], SingletonWrapper) 41 | -------------------------------------------------------------------------------- /samples/src/c/while_switch_flattened.c: -------------------------------------------------------------------------------- 1 | #include "polyfill.h" 2 | 3 | const uint64 qword_1802D2C33 = 0x736006A871C63D9ALL; 4 | 5 | // Does not fold ok 6 | uint64 while_switch_flattened(void) 7 | { 8 | int v2 = 0; 9 | int rval = 0x272BCB9A; 10 | int v3 = 0; 11 | int v4 = 0; 12 | int v5 = 0; 13 | uint64 a3 = 0; 14 | uint64 globalConstant = __ROL8__( 15 | (__ROL8__( 16 | __ROL8__(0x736006A871C63D9ALL, 0x28) - 0x43401825757A7203LL, 17 | 0x2B) + 18 | 0x789F447F89C06931LL) ^ 19 | 0x43B6AE2CD812A432LL, 20 | 0x2F); 21 | while (1) 22 | { 23 | switch (v2) 24 | { 25 | case 0: 26 | v3 = *(_QWORD *)NtCurrentTeb()->NtTib.FiberData; 27 | v2 = 1; 28 | break; 29 | 30 | case 1: 31 | v4 = __ROL8__(globalConstant, 0x11); 32 | v2 = 2; 33 | break; 34 | 35 | case 2: 36 | a3 = __ROL8__( 37 | (*(__int64 *)((char *)&qword_1802D2C33 + 3) ^ v4 ^ 0xCD455C5FB8140C43uLL) - 0x789F447F89C06931LL, 38 | 0x15) + 39 | 0x507AF58A6E5A51E3LL; 40 | v2 = 3; 41 | break; 42 | 43 | case 3: 44 | v5 = __ROL8__(a3 - 0xD3ADD64F8DFDFE0LL, 0x18); 45 | v2 = 4; 46 | break; 47 | 48 | case 4: 49 | rval = v3 == v5; 50 | return rval; 51 | 52 | default: 53 | continue; 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/handler.py: -------------------------------------------------------------------------------- 1 | import ida_hexrays 2 | 3 | from d810.hexrays.hexrays_formatters import string_to_maturity 4 | 5 | DEFAULT_INSTRUCTION_MATURITIES = [ 6 | ida_hexrays.MMAT_LOCOPT, 7 | ida_hexrays.MMAT_CALLS, 8 | ida_hexrays.MMAT_GLBOPT1, 9 | ida_hexrays.MMAT_GLBOPT2, 10 | ida_hexrays.MMAT_GLBOPT3, 11 | ida_hexrays.MMAT_LVARS, 12 | ] 13 | DEFAULT_FLOW_MATURITIES = [ida_hexrays.MMAT_CALLS, ida_hexrays.MMAT_GLBOPT1] 14 | 15 | 16 | class OptimizationRule: 17 | NAME = None 18 | DESCRIPTION = None 19 | 20 | def __init__(self): 21 | self.maturities = [] 22 | self.config = {} 23 | self.log_dir = None 24 | self.dump_intermediate_microcode = False 25 | 26 | def set_log_dir(self, log_dir): 27 | self.log_dir = log_dir 28 | 29 | def configure(self, kwargs): 30 | self.config = kwargs if kwargs is not None else {} 31 | if "maturities" in self.config: 32 | self.maturities = [string_to_maturity(x) for x in self.config["maturities"]] 33 | if "dump_intermediate_microcode" in self.config: 34 | self.dump_intermediate_microcode = self.config[ 35 | "dump_intermediate_microcode" 36 | ] 37 | 38 | @property 39 | def name(self): 40 | if self.NAME is not None: 41 | return self.NAME 42 | return self.__class__.__name__ 43 | 44 | @property 45 | def description(self): 46 | if self.DESCRIPTION is not None: 47 | return self.DESCRIPTION 48 | return "No description available" 49 | -------------------------------------------------------------------------------- /.github/workflows/python.yml: -------------------------------------------------------------------------------- 1 | name: d810-ng tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | pull_request: 8 | types: 9 | - synchronize 10 | - opened 11 | - reopened 12 | - ready_for_review 13 | branches: 14 | - "main" 15 | 16 | # cancel previous workflow jobs for PRs 17 | concurrency: 18 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} 19 | cancel-in-progress: true 20 | 21 | jobs: 22 | unit-tests: 23 | runs-on: ubuntu-24.04 24 | strategy: 25 | matrix: 26 | python-version: ["3.10", "3.11", "3.12", "3.13"] 27 | env: 28 | PYTHONPATH: ${{ github.workspace }} 29 | steps: 30 | - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" 31 | uses: actions/checkout@v4 32 | with: 33 | persist-credentials: false 34 | 35 | - name: Set up Python${{ matrix.python-version }} 36 | uses: actions/setup-python@v5 37 | with: 38 | python-version: ${{ matrix.python-version }} 39 | cache: pip 40 | - name: Install dependencies 41 | run: | 42 | pip install --upgrade pip setuptools wheel uv 43 | uv pip install --system -e . 44 | uv pip install --system pytest pytest-cov 45 | - name: Run pytest 46 | run: | 47 | pytest tests/unit/ -v --tb=short 48 | -------------------------------------------------------------------------------- /tests/unit/tutils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import sys 3 | import tempfile 4 | import types 5 | from contextlib import contextmanager 6 | from pathlib import Path 7 | 8 | 9 | class MockIdaDiskio: 10 | @staticmethod 11 | def get_user_idadir(): 12 | return Path("mock_idadir") 13 | 14 | 15 | @contextmanager 16 | def temp_ida_dir(): 17 | """Context manager that sets MockIdaDiskio to a fresh temporary directory.""" 18 | 19 | orig_func = MockIdaDiskio.get_user_idadir 20 | with tempfile.TemporaryDirectory() as tmp_dir_obj: 21 | MockIdaDiskio.get_user_idadir = staticmethod(lambda: Path(tmp_dir_obj)) 22 | try: 23 | yield Path(tmp_dir_obj) 24 | finally: 25 | MockIdaDiskio.get_user_idadir = orig_func 26 | 27 | 28 | @contextmanager 29 | def load_conf_classes(): 30 | # Backup any existing ida_diskio module 31 | orig = sys.modules.get("ida_diskio") 32 | # Inject dummy ida_diskio module before importing d810.conf 33 | dummy_mod = types.ModuleType("ida_diskio") 34 | setattr(dummy_mod, "get_user_idadir", MockIdaDiskio.get_user_idadir) 35 | sys.modules["ida_diskio"] = dummy_mod 36 | try: 37 | if "d810.conf" in sys.modules: 38 | module = importlib.reload(sys.modules["d810.conf"]) 39 | else: 40 | module = importlib.import_module("d810.conf") 41 | yield module.D810Configuration, module.ProjectConfiguration, module.RuleConfiguration 42 | finally: 43 | # Restore original module or remove dummy 44 | if orig is not None: 45 | sys.modules["ida_diskio"] = orig 46 | else: 47 | sys.modules.pop("ida_diskio", None) 48 | -------------------------------------------------------------------------------- /src/d810/_vendor/vendor.txt: -------------------------------------------------------------------------------- 1 | # d810 Vendored Dependencies 2 | # 3 | # This file lists vendored dependencies that are automatically synced 4 | # using the `vendoring` tool (https://pypi.org/project/vendoring/). 5 | # 6 | # To update vendored packages: 7 | # python -m vendoring sync 8 | # 9 | # To update a specific package version: 10 | # 1. Update version in this file 11 | # 2. Run: python -m vendoring sync 12 | # 3. Test thoroughly 13 | # 14 | # All packages must be available on PyPI. 15 | 16 | # ida-reloader - Hot-reload infrastructure for IDA Pro plugins 17 | # Source: https://github.com/mahmoudimus/ida-reloader (PyPI: https://pypi.org/project/ida-reloader/) 18 | # Reason: Core infrastructure for plugin hot-reloading 19 | # Provides: Reloader, DependencyGraph, _Scanner, reload_package() 20 | ida-reloader==0.1.0 21 | 22 | # typing_extensions - Backport of the latest typing features 23 | # Source: https://github.com/python/typing_extensions (PyPI: https://pypi.org/project/typing-extensions/) 24 | # Reason: Ensure consistent typing support across Python 3.10-3.13 25 | # Provides: Protocol, TypedDict, Literal, Final, override, Self, etc. 26 | typing_extensions==4.15.0 27 | 28 | # clang - Python bindings for the Clang indexing library 29 | # Source: https://github.com/llvm/llvm-project/tree/main/clang/bindings/python 30 | # NOTE: Manually vendored (not available as standalone PyPI package) 31 | # Reason: Required for C/C++ AST parsing in test infrastructure 32 | # Provides: cindex module (Index, TranslationUnit, Cursor, etc.) 33 | # License: Apache-2.0 WITH LLVM-exception 34 | clang==13.0.1+d810 35 | 36 | # Candidates for future vendoring: 37 | # - miasm2: If we need specific version/patches for IDA compatibility 38 | # - z3-solver: If bundling is needed for binary distributions 39 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "d810" 7 | requires-python = ">= 3.10" 8 | version = "0.1.0" 9 | authors = [ 10 | { name = "w00tzenheimer", email = "w00tzenheimer@gmail.com" }, 11 | { name = "Boris Batteux", email = "boris.batteux@eshard.com" }, 12 | ] 13 | description = "This is a next generation version of D-810, an IDA Pro plugin which can be used to deobfuscate code at decompilation time by modifying IDA Pro microcode." 14 | dependencies = ["z3-solver", "typing-extensions"] 15 | 16 | [project.optional-dependencies] 17 | dev = [ 18 | "pyinstrument", 19 | "pytest>=7.0.0", 20 | "pytest-cov>=4.0.0", 21 | "coverage[toml]>=7.0.0", 22 | ] 23 | 24 | [tool.setuptools] 25 | package-dir = { "" = "src" } 26 | 27 | [tool.setuptools.packages.find] 28 | where = ["src"] 29 | 30 | # pytest configuration 31 | [tool.pytest.ini_options] 32 | testpaths = ["tests"] 33 | python_files = ["test_*.py"] 34 | python_classes = ["Test*"] 35 | python_functions = ["test_*"] 36 | markers = [ 37 | "pure_python: Tests that run without IDA Pro (fast, no external dependencies)", 38 | "requires_ida: Tests that require IDA Pro to run", 39 | "slow: Slow tests (>10s) - typically Z3 verification or complex deobfuscation", 40 | ] 41 | 42 | # Coverage configuration 43 | [tool.coverage.run] 44 | source = ["src/d810"] 45 | omit = [ 46 | "*/tests/*", 47 | "*/test_*", 48 | "*/__pycache__/*", 49 | ] 50 | 51 | [tool.coverage.report] 52 | exclude_lines = [ 53 | "pragma: no cover", 54 | "def __repr__", 55 | "raise AssertionError", 56 | "raise NotImplementedError", 57 | "if __name__ == .__main__.:", 58 | "if TYPE_CHECKING:", 59 | "@abstractmethod", 60 | ] 61 | -------------------------------------------------------------------------------- /tests/system/test_optimization_rule.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import ida_hexrays 4 | 5 | from d810.optimizers.microcode.handler import OptimizationRule 6 | 7 | 8 | class TestOptimizationRule(unittest.TestCase): 9 | """Basic tests for the OptimizationRule class.""" 10 | 11 | def test_name_and_description_defaults(self): 12 | rule = OptimizationRule() 13 | # Default name comes from class name 14 | self.assertEqual(rule.name, "OptimizationRule") 15 | # No DESCRIPTION set => fallback message 16 | self.assertEqual(rule.description, "No description available") 17 | 18 | def test_set_log_dir(self): 19 | rule = OptimizationRule() 20 | rule.set_log_dir("/tmp/logs") 21 | self.assertEqual(rule.log_dir, "/tmp/logs") 22 | 23 | def test_configure_none(self): 24 | rule = OptimizationRule() 25 | rule.configure(None) 26 | # No maturities => default empty 27 | self.assertEqual(rule.config, {}) 28 | self.assertEqual(rule.maturities, []) 29 | 30 | def test_configure_empty_dict(self): 31 | rule = OptimizationRule() 32 | rule.configure({}) 33 | self.assertEqual(rule.config, {}) 34 | self.assertEqual(rule.maturities, []) 35 | 36 | def test_configure_with_maturities(self): 37 | rule = OptimizationRule() 38 | config = {"maturities": ["LOCOPT", "CALLS"]} 39 | rule.configure(config) 40 | # Config stored 41 | self.assertEqual(rule.config, config) 42 | # Maturities converted to Hex-Rays constants 43 | expected = [ida_hexrays.MMAT_LOCOPT, ida_hexrays.MMAT_CALLS] 44 | self.assertEqual(rule.maturities, expected) 45 | 46 | 47 | if __name__ == "__main__": # for standalone runs 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/analysis/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from ida_hexrays import * 4 | 5 | from d810.conf.loggers import getLogger 6 | from d810.hexrays.hexrays_formatters import format_minsn_t 7 | from d810.optimizers.microcode.instructions.handler import ( 8 | InstructionOptimizationRule, 9 | InstructionOptimizer, 10 | ) 11 | 12 | optimizer_logger = getLogger("D810.optimizer") 13 | 14 | 15 | class InstructionAnalysisRule(InstructionOptimizationRule): 16 | @abc.abstractmethod 17 | def analyze_instruction(self, blk: mblock_t, ins: minsn_t): 18 | """Analyze the instruction and return a replacement instruction if the rule matches, otherwise None.""" 19 | ... 20 | 21 | @abc.abstractmethod 22 | def set_maturity(self, maturity: int): ... 23 | 24 | 25 | class InstructionAnalyzer(InstructionOptimizer): 26 | RULE_CLASSES = [InstructionAnalysisRule] 27 | 28 | def set_maturity(self, maturity: int): 29 | self.cur_maturity = maturity 30 | for rule in self.rules: 31 | rule.set_maturity(self.cur_maturity) 32 | 33 | def analyze(self, blk: mblock_t, ins: minsn_t): 34 | if blk is not None: 35 | self.cur_maturity = blk.mba.maturity 36 | 37 | if self.cur_maturity not in self.maturities: 38 | return None 39 | 40 | for rule in self.rules: 41 | try: 42 | rule.analyze_instruction(blk, ins) 43 | except RuntimeError: 44 | optimizer_logger.error( 45 | "error during rule {0} for instruction {1}".format( 46 | rule, format_minsn_t(ins) 47 | ) 48 | ) 49 | 50 | @property 51 | def name(self): 52 | if self.NAME is not None: 53 | return self.NAME 54 | return self.__class__.__name__ 55 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/pattern_matching/experimental.py: -------------------------------------------------------------------------------- 1 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 2 | from d810.hexrays.hexrays_formatters import format_mop_t 3 | from d810.optimizers.microcode.instructions.pattern_matching.handler import ( 4 | PatternMatchingRule, 5 | ) 6 | 7 | from ida_hexrays import * 8 | 9 | 10 | class ReplaceMovHigh(PatternMatchingRule): 11 | 12 | def check_candidate(self, candidate): 13 | # IDA does not do constant propagation for pattern such as: 14 | # mov #0x65A4.2, r6.2 15 | # mov #0x210F.2, r6^2.2 16 | # jz r0.4, r6.4 17 | # Thus, we try to detect mov to r6^2 and replace by (or #0x210F0000.4, r6.4 & 0x0000ffff.4, r6.4 18 | # By doing that, IDA constant propagation will work again. 19 | 20 | if candidate.dst_mop.t != mop_r: 21 | return False 22 | dst_reg_name = format_mop_t(candidate.dst_mop) 23 | if dst_reg_name is None: 24 | return False 25 | if "^2" in dst_reg_name: 26 | if candidate["c_0"].mop.size != 2: 27 | return False 28 | candidate.add_constant_leaf("new_c_0", candidate["c_0"].value << 16, 4) 29 | candidate.add_constant_leaf("mask", 0xFFFF, 4) 30 | new_dst_reg = mop_t() 31 | new_dst_reg.make_reg(candidate.dst_mop.r - 2, 4) 32 | candidate.add_leaf("new_reg", new_dst_reg) 33 | candidate.dst_mop = new_dst_reg 34 | return True 35 | else: 36 | return False 37 | 38 | @property 39 | def PATTERN(self) -> AstNode: 40 | return AstNode(m_mov, AstConstant("c_0")) 41 | 42 | @property 43 | def REPLACEMENT_PATTERN(self) -> AstNode: 44 | return AstNode( 45 | m_or, 46 | AstConstant("new_c_0"), 47 | AstNode(m_and, AstLeaf("new_reg"), AstConstant("mask")), 48 | ) 49 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/flattening/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from d810.conf.loggers import getLogger 4 | 5 | tracker_logger = getLogger("D810.tracker") 6 | emulator_logger = getLogger("D810.emulator") 7 | 8 | 9 | class UnflatteningException(Exception): 10 | pass 11 | 12 | 13 | class DispatcherUnflatteningException(UnflatteningException): 14 | pass 15 | 16 | 17 | class NotDuplicableFatherException(UnflatteningException): 18 | pass 19 | 20 | 21 | class NotResolvableFatherException(UnflatteningException): 22 | pass 23 | 24 | 25 | def configure_mop_tracker_log_verbosity(verbose=False): 26 | tracker_log_level = tracker_logger.getEffectiveLevel() 27 | emulator_log_level = emulator_logger.getEffectiveLevel() 28 | if not verbose: 29 | tracker_logger.setLevel(logging.ERROR) 30 | emulator_logger.setLevel(logging.ERROR) 31 | return [tracker_log_level, emulator_log_level] 32 | 33 | 34 | def restore_mop_tracker_log_verbosity(tracker_log_level, emulator_log_level): 35 | tracker_logger.setLevel(tracker_log_level) 36 | emulator_logger.setLevel(emulator_log_level) 37 | 38 | 39 | def get_all_possibles_values(mop_histories, searched_mop_list, verbose=False): 40 | log_levels = configure_mop_tracker_log_verbosity(verbose) 41 | mop_cst_values_list = [] 42 | for mop_history in mop_histories: 43 | mop_cst_values_list.append( 44 | [ 45 | mop_history.get_mop_constant_value(searched_mop) 46 | for searched_mop in searched_mop_list 47 | ] 48 | ) 49 | restore_mop_tracker_log_verbosity(*log_levels) 50 | return mop_cst_values_list 51 | 52 | 53 | def check_if_all_values_are_found(mop_cst_values_list): 54 | all_values_are_found = True 55 | for cst_list in mop_cst_values_list: 56 | if None in cst_list: 57 | all_values_are_found = False 58 | break 59 | return all_values_are_found 60 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/analysis/utils.py: -------------------------------------------------------------------------------- 1 | from d810.expr.ast import AstLeaf, AstNode 2 | 3 | 4 | def get_possible_patterns( 5 | ast, min_nb_use=2, ref_ast_info_by_index=None, max_nb_pattern=64 6 | ): 7 | # max_nb_pattern is used to prevent memory explosion when very large patterns are parsed 8 | if ast.is_leaf(): 9 | return [ast] 10 | if ref_ast_info_by_index is None: 11 | if ast.ast_index not in ast.sub_ast_info_by_index.keys(): 12 | ast.compute_sub_ast() 13 | ref_ast_info_by_index = ast.sub_ast_info_by_index 14 | possible_patterns = [] 15 | if ref_ast_info_by_index[ast.ast_index].number_of_use >= min_nb_use: 16 | node_as_leaf = AstLeaf("x_{0}".format(ast.ast_index)) 17 | node_as_leaf.mop = ast.mop 18 | node_as_leaf.ast_index = ast.ast_index 19 | possible_patterns.append(node_as_leaf) 20 | left_patterns = [] 21 | right_patterns = [] 22 | if ast.left is not None: 23 | left_patterns = get_possible_patterns( 24 | ast.left, min_nb_use, ref_ast_info_by_index, max_nb_pattern 25 | ) 26 | if ast.right is not None: 27 | right_patterns = get_possible_patterns( 28 | ast.right, min_nb_use, ref_ast_info_by_index, max_nb_pattern 29 | ) 30 | 31 | for left_pattern in left_patterns: 32 | if ast.right is not None: 33 | for right_pattern in right_patterns: 34 | node = AstNode(ast.opcode, left_pattern, right_pattern) 35 | node.mop = ast.mop 36 | node.ast_index = ast.ast_index 37 | if len(possible_patterns) < max_nb_pattern: 38 | possible_patterns.append(node) 39 | else: 40 | node = AstNode(ast.opcode, left_pattern) 41 | node.mop = ast.mop 42 | node.ast_index = ast.ast_index 43 | if len(possible_patterns) < max_nb_pattern: 44 | possible_patterns.append(node) 45 | return possible_patterns 46 | -------------------------------------------------------------------------------- /tests/unit/core/test_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | 4 | from d810.core import LoggerConfigurator, getLogger 5 | 6 | 7 | class TestLoggerConfigurator(unittest.TestCase): 8 | def setUp(self): 9 | # Ensure a test logger exists under our D810 prefix 10 | self.prefix = "D810" 11 | self.test_logger_name = f"{self.prefix}.testunit" 12 | # Create and reset the test logger 13 | self.logger = getLogger(self.test_logger_name) 14 | self.logger.setLevel(logging.WARNING) 15 | # Also ensure the root prefix logger exists 16 | self.root_logger = logging.getLogger(self.prefix) 17 | self.root_logger.setLevel(logging.WARNING) 18 | 19 | def test_available_loggers_with_prefix(self): 20 | names = LoggerConfigurator.available_loggers(self.prefix) 21 | # The test logger and root prefix should be listed 22 | self.assertIn(self.test_logger_name, names) 23 | self.assertIn(self.prefix, names) 24 | 25 | def test_available_loggers_without_prefix(self): 26 | names = LoggerConfigurator.available_loggers() 27 | # At minimum, core D810 logger should appear 28 | self.assertIn("D810", names) 29 | 30 | def test_set_level_changes_level(self): 31 | # Change to DEBUG and verify 32 | LoggerConfigurator.set_level(self.test_logger_name, "DEBUG") 33 | self.assertEqual(self.logger.level, logging.DEBUG) 34 | 35 | def test_set_level_invalid_raises(self): 36 | with self.assertRaises(ValueError): 37 | LoggerConfigurator.set_level(self.test_logger_name, "NOTALEVEL") 38 | 39 | def test_mdc_maturity_update(self): 40 | """Ensure that the maturity value is carried via the MDC and accessible.""" 41 | maturity_val = "LOCOPT" 42 | log = getLogger(self.test_logger_name) 43 | log.update_maturity(maturity_val) 44 | self.assertEqual(log.get_mdc("maturity"), maturity_val) 45 | 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/pattern_matching/rewrite_mov.py: -------------------------------------------------------------------------------- 1 | from d810.expr.ast import AstLeaf, AstNode 2 | from d810.hexrays.hexrays_helpers import equal_bnot_mop 3 | from d810.optimizers.microcode.instructions.pattern_matching.handler import ( 4 | PatternMatchingRule, 5 | ) 6 | 7 | from ida_hexrays import * 8 | 9 | 10 | # GetIdentRule1: ((x_0 & x_1) + (x_0 & ~x_1)) == x_0 11 | class GetIdentRule1(PatternMatchingRule): 12 | 13 | def check_candidate(self, candidate): 14 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 15 | return False 16 | return True 17 | 18 | @property 19 | def PATTERN(self) -> AstNode: 20 | return AstNode( 21 | m_add, 22 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 23 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("bnot_x_1")), 24 | ) 25 | 26 | @property 27 | def REPLACEMENT_PATTERN(self) -> AstNode: 28 | return AstNode(m_mov, AstLeaf("x_0")) 29 | 30 | 31 | # GetIdentRule2: ((x_0 & x_1) ^ (x_0 & ~x_1)) == x_0 i 32 | class GetIdentRule2(PatternMatchingRule): 33 | 34 | def check_candidate(self, candidate): 35 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 36 | return False 37 | return True 38 | 39 | @property 40 | def PATTERN(self) -> AstNode: 41 | return AstNode( 42 | m_xor, 43 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 44 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("bnot_x_1")), 45 | ) 46 | 47 | @property 48 | def REPLACEMENT_PATTERN(self) -> AstNode: 49 | return AstNode(m_mov, AstLeaf("x_0")) 50 | 51 | 52 | class GetIdentRule3(PatternMatchingRule): 53 | @property 54 | def PATTERN(self) -> AstNode: 55 | return AstNode( 56 | m_and, AstLeaf("x_0"), AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")) 57 | ) 58 | 59 | @property 60 | def REPLACEMENT_PATTERN(self) -> AstNode: 61 | return AstNode(m_mov, AstLeaf("x_0")) 62 | -------------------------------------------------------------------------------- /tests/unit/test_loggers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | 4 | from .tutils import load_conf_classes 5 | 6 | # Ensure d810.conf and submodules import with dummy ida_diskio 7 | with load_conf_classes(): 8 | from d810.conf.loggers import LoggerConfigurator, getLogger 9 | 10 | 11 | class TestLoggerConfigurator(unittest.TestCase): 12 | def setUp(self): 13 | # Ensure a test logger exists under our D810 prefix 14 | self.prefix = "D810" 15 | self.test_logger_name = f"{self.prefix}.testunit" 16 | # Create and reset the test logger 17 | self.logger = getLogger(self.test_logger_name) 18 | self.logger.setLevel(logging.WARNING) 19 | # Also ensure the root prefix logger exists 20 | self.root_logger = logging.getLogger(self.prefix) 21 | self.root_logger.setLevel(logging.WARNING) 22 | 23 | def test_available_loggers_with_prefix(self): 24 | names = LoggerConfigurator.available_loggers(self.prefix) 25 | # The test logger and root prefix should be listed 26 | self.assertIn(self.test_logger_name, names) 27 | self.assertIn(self.prefix, names) 28 | 29 | def test_available_loggers_without_prefix(self): 30 | names = LoggerConfigurator.available_loggers() 31 | # At minimum, core D810 logger should appear 32 | self.assertIn("D810", names) 33 | 34 | def test_set_level_changes_level(self): 35 | # Change to DEBUG and verify 36 | LoggerConfigurator.set_level(self.test_logger_name, "DEBUG") 37 | self.assertEqual(self.logger.level, logging.DEBUG) 38 | 39 | def test_set_level_invalid_raises(self): 40 | with self.assertRaises(ValueError): 41 | LoggerConfigurator.set_level(self.test_logger_name, "NOTALEVEL") 42 | 43 | def test_mdc_maturity_update(self): 44 | """Ensure that the maturity value is carried via the MDC and accessible.""" 45 | maturity_val = "LOCOPT" 46 | log = getLogger(self.test_logger_name) 47 | log.update_maturity(maturity_val) 48 | self.assertEqual(log.get_mdc("maturity"), maturity_val) 49 | 50 | 51 | if __name__ == "__main__": 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /tests/system/expr/test_ast_proxy.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from d810.expr.ast import AstConstant, AstLeaf, AstProxy, get_constant_mop 4 | 5 | 6 | class TestAstProxyForwarding(unittest.TestCase): 7 | """Validate that AstProxy correctly forwards attribute access and implements clone-on-write.""" 8 | 9 | def setUp(self): 10 | # Create a constant leaf 0x42 (8-bit) 11 | self.leaf = AstLeaf("const_42") 12 | const_mop = get_constant_mop(0x42, 1) 13 | self.leaf.mop = const_mop 14 | self.leaf.dest_size = 1 15 | self.leaf.ea = 0x1000 16 | self.leaf.ast_index = 7 17 | 18 | # Freeze to force clone-on-write later 19 | self.leaf.freeze() 20 | self.proxy = AstProxy(self.leaf) 21 | 22 | def test_attribute_forwarding(self): 23 | """Read access through proxy should match underlying leaf.""" 24 | self.assertIs(self.proxy.mop, self.leaf.mop) 25 | self.assertEqual(self.proxy.dest_size, self.leaf.dest_size) 26 | self.assertEqual(self.proxy.ea, self.leaf.ea) 27 | self.assertEqual(self.proxy.ast_index, self.leaf.ast_index) 28 | 29 | def test_clone_on_write(self): 30 | """Writing through proxy must not mutate the frozen original object.""" 31 | # Mutate via proxy 32 | self.proxy.dest_size = 2 33 | self.proxy.ea = 0x2000 34 | 35 | # Original leaf stays untouched 36 | self.assertEqual(self.leaf.dest_size, 1) 37 | self.assertEqual(self.leaf.ea, 0x1000) 38 | 39 | # Proxy now points to a distinct, mutable clone 40 | self.assertEqual(self.proxy.dest_size, 2) 41 | self.assertEqual(self.proxy.ea, 0x2000) 42 | self.assertIsNot(self.proxy._target, self.leaf) 43 | 44 | def test_size_and_dst_mop(self): 45 | """Verify size and dst_mop forwarding and mutability.""" 46 | orig_size = self.proxy.size 47 | self.assertEqual(orig_size, 1) 48 | 49 | # dst_mop is alias of mop on AstLeaf; check getter 50 | self.assertIs(self.proxy.dst_mop, self.proxy.mop) 51 | 52 | # Change via setter and ensure reflected 53 | new_mop = get_constant_mop(0x55, 1) 54 | self.proxy.dst_mop = new_mop 55 | self.assertIs(self.proxy.mop, new_mop) 56 | self.assertIs(self.proxy.dst_mop, new_mop) 57 | 58 | def test_value_forwarding_for_constant(self): 59 | """AstProxy should expose the .value property of an AstConstant leaf.""" 60 | 61 | const_leaf = AstConstant("cst", expected_value=0x99, expected_size=1) 62 | const_leaf.mop = get_constant_mop(0x99, 1) 63 | const_leaf.freeze() 64 | 65 | proxy = AstProxy(const_leaf) 66 | self.assertEqual(proxy.value, 0x99) 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import ida_hexrays 4 | import idc 5 | 6 | from d810.conf.loggers import getLogger 7 | from d810.optimizers.microcode.handler import DEFAULT_FLOW_MATURITIES, OptimizationRule 8 | from d810.registry import Registrant 9 | 10 | logger = getLogger("D810.optimizer") 11 | 12 | 13 | class FlowOptimizationRule(OptimizationRule, Registrant, abc.ABC): 14 | def __init__(self): 15 | super().__init__() 16 | self._current_maturity = ida_hexrays.MMAT_ZERO 17 | self.maturities = DEFAULT_FLOW_MATURITIES 18 | self.use_whitelist = False 19 | self.whitelisted_function_ea_list: list[int] = [] 20 | self.use_blacklist = False 21 | self.blacklisted_function_ea_list: list[int] = [] 22 | 23 | @property 24 | def current_maturity(self): 25 | return self._current_maturity 26 | 27 | @current_maturity.setter 28 | def current_maturity(self, maturity_level): 29 | self._current_maturity = maturity_level 30 | 31 | @abc.abstractmethod 32 | def optimize(self, blk): 33 | """Perform the optimization on *blk* and return the number of changes.""" 34 | raise NotImplementedError 35 | 36 | def configure(self, kwargs): 37 | super().configure(kwargs) 38 | self.use_whitelist = False 39 | self.whitelisted_function_ea_list = [] 40 | self.use_blacklist = False 41 | self.blacklisted_function_ea_list = [] 42 | if "whitelisted_functions" in self.config: 43 | self.use_whitelist = True 44 | for func_ea in self.config["whitelisted_functions"]: 45 | self.whitelisted_function_ea_list.append(int(func_ea, 16)) 46 | func_name_list = [ 47 | idc.get_func_name(ea) for ea in self.whitelisted_function_ea_list 48 | ] 49 | logger.info( 50 | "Whitelisted functions for {0}: {1} -> {2}".format( 51 | self.__class__.__name__, 52 | self.whitelisted_function_ea_list, 53 | func_name_list, 54 | ) 55 | ) 56 | if "blacklisted_functions" in self.config: 57 | self.use_blacklist = True 58 | for func_ea in self.config["whitelisted_functions"]: 59 | self.blacklisted_function_ea_list.append(int(func_ea, 16)) 60 | func_name_list = [ 61 | idc.get_func_name(ea) for ea in self.blacklisted_function_ea_list 62 | ] 63 | logger.info( 64 | "Blacklisted functions for {0}: {1} -> {2}".format( 65 | self.__class__.__name__, 66 | self.blacklisted_function_ea_list, 67 | func_name_list, 68 | ) 69 | ) 70 | if "dump_intermediate_microcode" in self.config: 71 | self.dump_intermediate_microcode = self.config[ 72 | "dump_intermediate_microcode" 73 | ] 74 | -------------------------------------------------------------------------------- /src/d810/core/ctree_snapshot.py: -------------------------------------------------------------------------------- 1 | """Hex‑Rays ctree snapshotting helpers. 2 | 3 | Unflattening dispatchers and applying binary patches can simplify the 4 | decompiler’s output significantly. To avoid running the unflattening 5 | analysis and the decompiler repeatedly on subsequent sessions, this 6 | module provides helpers to serialise and deserialise the final ctree 7 | (C code representation) of a function. The snapshot can be stored in 8 | the persistent cache keyed by the function hash and restored when the 9 | same function is encountered again. 10 | 11 | These helpers are intentionally lightweight; they do not depend on IDA 12 | and simply operate on whatever object the caller provides. When 13 | running inside IDA, ``cfunc_t`` objects expose APIs to traverse the 14 | ctree; here we fall back to serialising the object's `repr` and any 15 | additional attributes that are JSON serialisable. 16 | """ 17 | 18 | from __future__ import annotations 19 | 20 | import json 21 | from typing import Any, Dict, Optional 22 | 23 | 24 | def serialize_ctree(ctree: Any) -> Dict[str, Any]: 25 | """Serialise a ctree object into a JSON‑serialisable dictionary. 26 | 27 | In a real IDA environment, this function should traverse the 28 | ``cinsn_t`` nodes and record the structure of the ctree. In this 29 | implementation we fall back to storing the string representation 30 | of the ctree along with any primitive attributes that may exist. 31 | 32 | Parameters 33 | ---------- 34 | ctree : Any 35 | The ctree object (e.g. ``cfunc_t``) to serialise. 36 | 37 | Returns 38 | ------- 39 | dict 40 | A dictionary that can be written to JSON. 41 | """ 42 | data: Dict[str, Any] = {"repr": repr(ctree)} 43 | # Attempt to extract simple attributes from the ctree 44 | for attr in ("ea", "name", "maturity"): 45 | if hasattr(ctree, attr): 46 | try: 47 | val = getattr(ctree, attr) 48 | # Only store JSON‑serialisable values 49 | json.dumps(val) 50 | data[attr] = val 51 | except Exception: 52 | pass 53 | return data 54 | 55 | 56 | def deserialize_ctree(data: Dict[str, Any]) -> Any: 57 | """Deserialize a ctree snapshot back into a Python object. 58 | 59 | Without the Hex‑Rays SDK, this function simply returns the stored 60 | representation string. In a real environment, one could recreate 61 | a ``cfunc_t`` or use the IDA decompiler API to inject the snapshot 62 | back into the decompiler. 63 | """ 64 | return data.get("repr") 65 | 66 | 67 | def save_ctree_snapshot(func_hash: str, ctree: Any, cache) -> None: 68 | """Serialise and store a ctree snapshot in the given cache.""" 69 | snapshot = serialize_ctree(ctree) 70 | cache.set(f"ctree::{func_hash}", snapshot) 71 | 72 | 73 | def load_ctree_snapshot(func_hash: str, cache) -> Optional[Dict[str, Any]]: 74 | """Load a ctree snapshot from the cache for the given function hash.""" 75 | return cache.get(f"ctree::{func_hash}") -------------------------------------------------------------------------------- /scripts/ununicode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | import sys 5 | from pathlib import Path 6 | 7 | 8 | def clean_text(txt: str) -> str: 9 | # First, handle special characters 10 | replacements = { 11 | "–": "-", # en-dash → hyphen 12 | "‐": "-", # unicode hyphen → hyphen 13 | "\u00a0": " ", # non-breaking space → regular space 14 | "\u200b": "", # zero-width space → removed 15 | "\u200c": "", # zero-width non-joiner → removed 16 | "\u200d": "", # zero-width joiner → removed 17 | "\u202f": " ", # narrow no-break space → regular space 18 | "\u2060": "", # word joiner → removed 19 | "\ufeff": "", # zero-width no-break space (BOM) → removed 20 | "\u2192": "->", # right arrow → -> 21 | "—": "-", # em-dash → hyphen 22 | "’": "'", # right single quote → straight 23 | "‘": "'", # left single quote → straight 24 | "“": '"', # left double quote → straight 25 | "”": '"', # right double quote → straight 26 | } 27 | for uni, ascii_ in replacements.items(): 28 | txt = txt.replace(uni, ascii_) 29 | # bullet points → asterisks 30 | txt = txt.replace("•", "-").replace("◦", "*") 31 | # standardize “. ” → “* ”? 32 | return txt 33 | 34 | 35 | def main(): 36 | parser = argparse.ArgumentParser( 37 | description="Clean weird Unicode characters in Python source (stdin→stdout by default)." 38 | ) 39 | parser.add_argument( 40 | "-i", 41 | "--in-place", 42 | action="store_true", 43 | help="Edit files in-place instead of writing to stdout", 44 | ) 45 | parser.add_argument( 46 | "input_file", nargs="?", help="Path to input .py file (reads stdin if omitted)" 47 | ) 48 | parser.add_argument( 49 | "output_file", 50 | nargs="?", 51 | help="Path to output file (ignored if --in-place; writes stdout if omitted)", 52 | ) 53 | 54 | args = parser.parse_args() 55 | 56 | # in-place requires an input_file 57 | if args.in_place and not args.input_file: 58 | parser.error("--in-place requires an input_file") 59 | 60 | try: 61 | # read 62 | if args.input_file: 63 | try: 64 | content = Path(args.input_file).read_text(encoding="utf-8") 65 | except FileNotFoundError: 66 | parser.error(f"File '{args.input_file}' not found") 67 | else: 68 | if sys.stdin.isatty(): 69 | parser.error("No input_file and nothing piped in") 70 | content = sys.stdin.read() 71 | 72 | cleaned = clean_text(content) 73 | 74 | # write 75 | if args.in_place: 76 | Path(args.input_file).write_text(cleaned, encoding="utf-8") 77 | elif args.output_file: 78 | Path(args.output_file).write_text(cleaned, encoding="utf-8") 79 | else: 80 | sys.stdout.write(cleaned) 81 | 82 | except Exception as e: 83 | parser.error(f"Unexpected error: {e}") 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/flattening/unflattener_switch_case.py: -------------------------------------------------------------------------------- 1 | from ida_hexrays import * 2 | 3 | from d810.conf.loggers import getLogger 4 | from d810.hexrays.hexrays_helpers import append_mop_if_not_in_list 5 | from d810.optimizers.microcode.flow.flattening.generic import ( 6 | GenericDispatcherBlockInfo, 7 | GenericDispatcherCollector, 8 | GenericDispatcherInfo, 9 | GenericDispatcherUnflatteningRule, 10 | ) 11 | 12 | unflat_logger = getLogger("D810.unflat") 13 | FLATTENING_JUMP_OPCODES = [m_jtbl] 14 | 15 | 16 | class TigressSwitchDispatcherBlockInfo(GenericDispatcherBlockInfo): 17 | pass 18 | 19 | 20 | class TigressSwitchDispatcherInfo(GenericDispatcherInfo): 21 | def explore(self, blk: mblock_t): 22 | self.reset() 23 | if not self._is_candidate_for_dispatcher_entry_block(blk): 24 | return False 25 | self.mop_compared, mcases = self._get_comparison_info(blk) 26 | assert mcases is not None 27 | self.entry_block = TigressSwitchDispatcherBlockInfo(blk) 28 | self.entry_block.parse() 29 | for used_mop in self.entry_block.use_list: 30 | append_mop_if_not_in_list(used_mop, self.entry_block.assume_def_list) 31 | self.dispatcher_internal_blocks.append(self.entry_block) 32 | for possible_values, target_block_serial in zip( 33 | mcases.c.values, mcases.c.targets 34 | ): 35 | if target_block_serial == self.entry_block.blk.serial: 36 | continue 37 | exit_block = TigressSwitchDispatcherBlockInfo( 38 | blk.mba.get_mblock(target_block_serial), self.entry_block 39 | ) 40 | self.dispatcher_exit_blocks.append(exit_block) 41 | if len(possible_values) == 0: 42 | continue 43 | self.comparison_values.append(possible_values[0]) 44 | return True 45 | 46 | def _get_comparison_info(self, blk: mblock_t): 47 | # blk.tail must be a jtbl 48 | if (blk.tail is None) or (blk.tail.opcode != m_jtbl): 49 | return None, None 50 | return blk.tail.l, blk.tail.r 51 | 52 | def _is_candidate_for_dispatcher_entry_block(self, blk: mblock_t): 53 | if (blk.tail is None) or (blk.tail.opcode != m_jtbl): 54 | return False 55 | return True 56 | 57 | 58 | class TigressSwitchDispatcherCollector(GenericDispatcherCollector): 59 | DISPATCHER_CLASS = TigressSwitchDispatcherInfo 60 | DEFAULT_DISPATCHER_MIN_INTERNAL_BLOCK = 0 61 | DEFAULT_DISPATCHER_MIN_EXIT_BLOCK = 2 62 | DEFAULT_DISPATCHER_MIN_COMPARISON_VALUE = 2 63 | 64 | 65 | class UnflattenerSwitchCase(GenericDispatcherUnflatteningRule): 66 | DESCRIPTION = "Remove control flow flattening generated by Tigress with Switch case dispatcher" 67 | DEFAULT_UNFLATTENING_MATURITIES = [ 68 | MMAT_CALLS, 69 | MMAT_GLBOPT1, 70 | MMAT_GLBOPT2, 71 | MMAT_GLBOPT3, 72 | ] 73 | DEFAULT_MAX_DUPLICATION_PASSES = 20 74 | DEFAULT_MAX_PASSES = 7 75 | 76 | @property 77 | def DISPATCHER_COLLECTOR_CLASS(self) -> type[GenericDispatcherCollector]: 78 | """Return the class of the dispatcher collector.""" 79 | return TigressSwitchDispatcherCollector 80 | -------------------------------------------------------------------------------- /src/d810/core/merkle.py: -------------------------------------------------------------------------------- 1 | """Merkle tree utilities for micro‑code diffing. 2 | 3 | This module defines simple classes to build a Merkle tree from a list of 4 | leaf hashes and to compute differences between two trees. A Merkle tree 5 | is a binary tree in which each non‑leaf node stores the hash of its two 6 | children concatenated. The root hash represents the entire collection 7 | and can be used to detect changes. When comparing two trees, leaf 8 | indices whose hashes differ represent the modified elements. 9 | 10 | This implementation does not depend on IDA and can be used in unit 11 | tests to verify Merkle diff behaviour. Real usage within the 12 | unflattening pipeline would compute block hashes for each micro‑code 13 | block and build a Merkle tree per function. The tree structure can 14 | then be stored persistently to avoid reprocessing unchanged blocks. 15 | """ 16 | 17 | from __future__ import annotations 18 | 19 | import hashlib 20 | from dataclasses import dataclass 21 | from typing import List, Optional 22 | 23 | 24 | def _sha256_concat(left: str, right: str) -> str: 25 | """Hash the concatenation of two hex digests and return a new hex digest.""" 26 | return hashlib.sha256((left + right).encode("utf-8")).hexdigest() 27 | 28 | 29 | @dataclass 30 | class MerkleTree: 31 | """A simple Merkle tree built from a list of leaf hashes.""" 32 | 33 | leaves: List[str] 34 | levels: List[List[str]] 35 | 36 | def __init__(self, leaves: List[str]) -> None: 37 | if not leaves: 38 | raise ValueError("Cannot build a Merkle tree with no leaves") 39 | # Copy the leaf list to avoid external mutation 40 | self.leaves = list(leaves) 41 | # Build levels from leaves up to the root 42 | self.levels = [self.leaves] 43 | current_level = self.leaves 44 | while len(current_level) > 1: 45 | next_level: List[str] = [] 46 | # Iterate over pairs; if odd number of nodes, duplicate last 47 | for i in range(0, len(current_level), 2): 48 | left = current_level[i] 49 | right = current_level[i + 1] if i + 1 < len(current_level) else current_level[i] 50 | parent = _sha256_concat(left, right) 51 | next_level.append(parent) 52 | self.levels.append(next_level) 53 | current_level = next_level 54 | 55 | @property 56 | def root(self) -> str: 57 | """Return the root hash of the Merkle tree.""" 58 | return self.levels[-1][0] 59 | 60 | def diff(self, other: "MerkleTree") -> List[int]: 61 | """Return indices of leaves that differ between this tree and another. 62 | 63 | The two trees must have the same number of leaves. Differences are 64 | determined by comparing corresponding leaf hashes. 65 | 66 | Parameters 67 | ---------- 68 | other : MerkleTree 69 | The other Merkle tree to compare against. 70 | 71 | Returns 72 | ------- 73 | list[int] 74 | A list of indices of leaves where the two trees differ. 75 | """ 76 | if len(self.leaves) != len(other.leaves): 77 | raise ValueError("Cannot diff Merkle trees with different number of leaves") 78 | differing_indices: List[int] = [] 79 | for idx, (h1, h2) in enumerate(zip(self.leaves, other.leaves)): 80 | if h1 != h2: 81 | differing_indices.append(idx) 82 | return differing_indices 83 | 84 | def to_dict(self) -> dict: 85 | """Serialise the Merkle tree to a JSON‑serialisable dictionary.""" 86 | return {"leaves": self.leaves, "levels": self.levels} 87 | 88 | @classmethod 89 | def from_dict(cls, data: dict) -> "MerkleTree": 90 | """Reconstruct a Merkle tree from a dictionary returned by :meth:`to_dict`.""" 91 | tree = cls(data["leaves"]) 92 | # Overwrite computed levels with stored levels in case they differ 93 | tree.levels = data["levels"] 94 | return tree -------------------------------------------------------------------------------- /src/d810/expr/utils.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | 3 | from d810.cache import CacheImpl 4 | from d810.hexrays.hexrays_helpers import MSB_TABLE 5 | 6 | CTYPE_SIGNED_TABLE = { 7 | 1: ctypes.c_int8, 8 | 2: ctypes.c_int16, 9 | 4: ctypes.c_int32, 10 | 8: ctypes.c_int64, 11 | } 12 | CTYPE_UNSIGNED_TABLE = { 13 | 1: ctypes.c_uint8, 14 | 2: ctypes.c_uint16, 15 | 4: ctypes.c_uint32, 16 | 8: ctypes.c_uint64, 17 | } 18 | 19 | 20 | def get_all_subclasses(python_class): 21 | python_class.__subclasses__() 22 | 23 | subclasses = set() 24 | check_these = [python_class] 25 | 26 | while check_these: 27 | parent = check_these.pop() 28 | for child in parent.__subclasses__(): 29 | if child not in subclasses: 30 | subclasses.add(child) 31 | check_these.append(child) 32 | 33 | return sorted(subclasses, key=lambda x: x.__name__) 34 | 35 | 36 | def unsigned_to_signed(unsigned_value, nb_bytes): 37 | return CTYPE_SIGNED_TABLE[nb_bytes](unsigned_value).value 38 | 39 | 40 | def signed_to_unsigned(signed_value, nb_bytes): 41 | return CTYPE_UNSIGNED_TABLE[nb_bytes](signed_value).value 42 | 43 | 44 | def get_msb(value, nb_bytes): 45 | return (value & MSB_TABLE[nb_bytes]) >> (nb_bytes * 8 - 1) 46 | 47 | 48 | def get_add_cf(op1, op2, nb_bytes): 49 | res = op1 + op2 50 | return get_msb((((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (~(op1 ^ op2)))), nb_bytes) 51 | 52 | 53 | def get_add_of(op1, op2, nb_bytes): 54 | res = op1 + op2 55 | return get_msb(((op1 ^ res) & (~(op1 ^ op2))), nb_bytes) 56 | 57 | 58 | def get_sub_cf(op1, op2, nb_bytes): 59 | res = op1 - op2 60 | return get_msb((((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))), nb_bytes) 61 | 62 | 63 | def get_sub_of(op1, op2, nb_bytes): 64 | res = op1 - op2 65 | return get_msb(((op1 ^ res) & (op1 ^ op2)), nb_bytes) 66 | 67 | 68 | def get_parity_flag(op1, op2, nb_bytes): 69 | tmp = CTYPE_UNSIGNED_TABLE[nb_bytes](op1 - op2).value 70 | return (bin(tmp).count("1") + 1) % 2 71 | 72 | 73 | def ror(x, n, nb_bits=32): 74 | mask = (2**n) - 1 75 | mask_bits = x & mask 76 | return (x >> n) | (mask_bits << (nb_bits - n)) 77 | 78 | 79 | def rol(x, n, nb_bits=32): 80 | return ror(x, nb_bits - n, nb_bits) 81 | 82 | 83 | def __rol__(value: int, count: int, bits: int) -> int: 84 | """ 85 | Rotate left on an unsigned integer of given bit width. 86 | """ 87 | mask = (1 << bits) - 1 88 | count %= bits 89 | value &= mask 90 | return ((value << count) & mask) | (value >> (bits - count)) 91 | 92 | 93 | def __ror__(value: int, count: int, bits: int) -> int: 94 | """ 95 | Rotate right on an unsigned integer of given bit width. 96 | """ 97 | return __rol__(value, -count, bits) 98 | 99 | 100 | def __ROL1__(value: int, count: int) -> int: 101 | return __rol__(value, count, 8) 102 | 103 | 104 | def __ROL2__(value: int, count: int) -> int: 105 | return __rol__(value, count, 16) 106 | 107 | 108 | def __ROL4__(value: int, count: int) -> int: 109 | return __rol__(value, count, 32) 110 | 111 | 112 | def __ROL8__(value: int, count: int) -> int: 113 | return __rol__(value, count, 64) 114 | 115 | 116 | def __ROR1__(value: int, count: int) -> int: 117 | return __ror__(value, count, 8) 118 | 119 | 120 | def __ROR2__(value: int, count: int) -> int: 121 | return __ror__(value, count, 16) 122 | 123 | 124 | def __ROR4__(value: int, count: int) -> int: 125 | return __ror__(value, count, 32) 126 | 127 | 128 | def __ROR8__(value: int, count: int) -> int: 129 | return __ror__(value, count, 64) 130 | 131 | 132 | MOP_CONSTANT_CACHE = CacheImpl( 133 | max_size=20480, 134 | survive_reload=True, 135 | reload_key="_SHARED_MOP_CONSTANT_CACHE", 136 | ) 137 | 138 | MOP_TO_AST_CACHE = CacheImpl( 139 | max_size=20480, 140 | survive_reload=True, 141 | reload_key="_SHARED_MOP_TO_AST_CACHE", 142 | ) 143 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/z3/cst.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from ida_hexrays import * 4 | 5 | from d810 import _compat 6 | from d810.conf.loggers import getLogger 7 | from d810.errors import AstEvaluationException 8 | from d810.expr.ast import AstConstant, AstNode, minsn_to_ast 9 | from d810.expr.z3_utils import z3_check_mop_equality 10 | from d810.hexrays.hexrays_formatters import format_minsn_t 11 | from d810.optimizers.microcode.instructions.z3.handler import Z3Rule 12 | 13 | logger = getLogger(__name__) 14 | 15 | 16 | class Z3ConstantOptimization(Z3Rule): 17 | DESCRIPTION = "Detect and replace obfuscated constants" 18 | 19 | def __init__(self): 20 | super().__init__() 21 | self.min_nb_opcode = 3 22 | self.min_nb_constant = 3 23 | 24 | @property 25 | def PATTERN(self) -> AstNode | None: 26 | """Return the pattern to match.""" 27 | return 28 | 29 | @property 30 | def REPLACEMENT_PATTERN(self) -> AstNode: 31 | return AstNode(m_mov, AstConstant("c_res")) 32 | 33 | @_compat.override 34 | def configure(self, kwargs): 35 | super().configure(kwargs) 36 | if "min_nb_opcode" in kwargs.keys(): 37 | self.min_nb_opcode = kwargs["min_nb_opcode"] 38 | if "min_nb_constant" in kwargs.keys(): 39 | self.min_nb_constant = kwargs["min_nb_constant"] 40 | 41 | @_compat.override 42 | def check_and_replace(self, blk: mblock_t, instruction: minsn_t) -> minsn_t | None: 43 | tmp = minsn_to_ast(instruction) 44 | if tmp is None: 45 | return None 46 | leaf_info_list, cst_leaf_values, opcodes = tmp.get_information() 47 | leaf_num = len(leaf_info_list) 48 | 49 | if ( 50 | leaf_num != 1 51 | or len(opcodes) < self.min_nb_opcode 52 | or len(cst_leaf_values) < self.min_nb_constant 53 | ): 54 | return None 55 | 56 | if logger.debug_on: 57 | logger.debug("Found candidate: %s", format_minsn_t(instruction)) 58 | try: 59 | val_0 = tmp.evaluate_with_leaf_info(leaf_info_list, [0]) # * leaf_num) 60 | val_1 = tmp.evaluate_with_leaf_info( 61 | leaf_info_list, [0xFFFFFFFF] 62 | ) # * leaf_num) 63 | if logger.debug_on: 64 | logger.debug(" val_0: %s, val_1: %s", val_0, val_1) 65 | if val_0 != val_1 or tmp.mop is None: 66 | return None 67 | 68 | # TODO(w00tzenheimer): if we're evaluating (evaluate_with_leaf_info) and the results are equal, 69 | # why do we need to run the z3 equality check? 70 | # why can't this simply be: 71 | # if val_0 != val_1 or tmp.mop is None: 72 | # return None 73 | # tmp.add_constant_leaf("c_res", val_0, tmp.mop.size) 74 | # tmp.compute_sub_ast() 75 | # new_instruction = self.get_replacement(typing.cast(AstNode, tmp)) 76 | # return new_instruction 77 | c_res_mop = mop_t() 78 | c_res_mop.make_number(val_0, tmp.mop.size) 79 | if z3_check_mop_equality(tmp.mop, c_res_mop): 80 | if logger.debug_on: 81 | logger.debug(" z3_check_mop_equality is equal") 82 | 83 | tmp.add_constant_leaf("c_res", val_0, tmp.mop.size) 84 | # TODO(w00tzenheimer): should we recompute caches so that leafs_by_name contains the new constant leaf? 85 | # tmp.compute_sub_ast() 86 | new_instruction = self.get_replacement(typing.cast(AstNode, tmp)) 87 | return new_instruction 88 | except ZeroDivisionError: 89 | logger.error("ZeroDivisionError while evaluating %s", tmp, exc_info=True) 90 | except AstEvaluationException as e: 91 | logger.error("Error while evaluating %s: %s", tmp, e, exc_info=True) 92 | 93 | @_compat.override 94 | def check_candidate(self, candidate: AstNode) -> bool: 95 | """Return True if the candidate matches the rule, otherwise False.""" 96 | return True 97 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/z3/predicates.py: -------------------------------------------------------------------------------- 1 | from ida_hexrays import * 2 | 3 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 4 | from d810.expr.z3_utils import z3_check_mop_equality, z3_check_mop_inequality 5 | from d810.optimizers.microcode.instructions.z3.handler import Z3Rule 6 | 7 | 8 | class Z3setzRuleGeneric(Z3Rule): 9 | DESCRIPTION = "Check with Z3 if a m_setz check is always True or False" 10 | 11 | @property 12 | def PATTERN(self) -> AstNode: 13 | """Return the pattern to match.""" 14 | return AstNode(m_setz, AstLeaf("x_0"), AstLeaf("x_1")) 15 | 16 | @property 17 | def REPLACEMENT_PATTERN(self) -> AstNode: 18 | return AstNode(m_mov, AstConstant("val_res")) 19 | 20 | def check_candidate(self, candidate): 21 | res_size = candidate["x_0"].size or 1 # fall back to 1 byte if unknown 22 | if z3_check_mop_equality(candidate["x_0"].mop, candidate["x_1"].mop): 23 | candidate.add_constant_leaf("val_res", 1, res_size) 24 | return True 25 | if z3_check_mop_inequality(candidate["x_0"].mop, candidate["x_1"].mop): 26 | candidate.add_constant_leaf("val_res", 0, res_size) 27 | return True 28 | return False 29 | 30 | 31 | class Z3setnzRuleGeneric(Z3Rule): 32 | DESCRIPTION = "Check with Z3 if a m_setnz check is always True or False" 33 | 34 | @property 35 | def PATTERN(self) -> AstNode: 36 | """Return the pattern to match.""" 37 | return AstNode(m_setnz, AstLeaf("x_0"), AstLeaf("x_1")) 38 | 39 | @property 40 | def REPLACEMENT_PATTERN(self) -> AstNode: 41 | return AstNode(m_mov, AstConstant("val_res")) 42 | 43 | def check_candidate(self, candidate): 44 | res_size = candidate["x_0"].size or 1 45 | if z3_check_mop_equality(candidate["x_0"].mop, candidate["x_1"].mop): 46 | candidate.add_constant_leaf("val_res", 0, res_size) 47 | return True 48 | if z3_check_mop_inequality(candidate["x_0"].mop, candidate["x_1"].mop): 49 | candidate.add_constant_leaf("val_res", 1, res_size) 50 | return True 51 | return False 52 | 53 | 54 | class Z3lnotRuleGeneric(Z3Rule): 55 | DESCRIPTION = "Check with Z3 if a m_lnot check is always True or False" 56 | 57 | @property 58 | def PATTERN(self) -> AstNode: 59 | """Return the pattern to match.""" 60 | return AstNode(m_lnot, AstLeaf("x_0")) 61 | 62 | @property 63 | def REPLACEMENT_PATTERN(self) -> AstNode: 64 | return AstNode(m_mov, AstConstant("val_res")) 65 | 66 | def check_candidate(self, candidate): 67 | val_0_mop = mop_t() 68 | val_0_mop.make_number(0, candidate["x_0"].size) 69 | res_size = candidate["x_0"].size or 1 70 | if z3_check_mop_equality(candidate["x_0"].mop, val_0_mop): 71 | candidate.add_constant_leaf("val_res", 1, res_size) 72 | return True 73 | if z3_check_mop_inequality(candidate["x_0"].mop, val_0_mop): 74 | candidate.add_constant_leaf("val_res", 0, res_size) 75 | return True 76 | return False 77 | 78 | 79 | class Z3SmodRuleGeneric(Z3Rule): 80 | DESCRIPTION = "Check with Z3 if a m_setz check is always True or False" 81 | 82 | @property 83 | def PATTERN(self) -> AstNode: 84 | """Return the pattern to match.""" 85 | return AstNode(m_smod, AstLeaf("x_0"), AstConstant("2", 2)) 86 | 87 | @property 88 | def REPLACEMENT_PATTERN(self) -> AstNode: 89 | return AstNode(m_mov, AstConstant("val_res")) 90 | 91 | def check_candidate(self, candidate): 92 | res_size = candidate["x_0"].size or 1 93 | cst_0_mop = mop_t() 94 | cst_0_mop.make_number(0, res_size) 95 | if z3_check_mop_equality(candidate.mop, cst_0_mop): 96 | candidate.add_leaf("val_res", cst_0_mop) 97 | return True 98 | cst_1_mop = mop_t() 99 | cst_1_mop.make_number(1, res_size) 100 | if z3_check_mop_equality(candidate.mop, cst_1_mop): 101 | candidate.add_leaf("val_res", cst_1_mop) 102 | return True 103 | return False 104 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/early/mem_read.py: -------------------------------------------------------------------------------- 1 | from ida_hexrays import * 2 | from idaapi import ( 3 | SEGPERM_READ, 4 | SEGPERM_WRITE, 5 | XREF_DATA, 6 | dr_W, 7 | getseg, 8 | is_loaded, 9 | segment_t, 10 | xrefblk_t, 11 | ) 12 | 13 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 14 | from d810.optimizers.microcode.instructions.early.handler import EarlyRule 15 | 16 | 17 | class SetGlobalVariablesToZero(EarlyRule): 18 | DESCRIPTION = "This rule can be used to patch memory read" 19 | 20 | @property 21 | def PATTERN(self) -> AstNode: 22 | """Return the pattern to match.""" 23 | return AstNode(m_mov, AstLeaf("ro_dword")) 24 | 25 | @property 26 | def REPLACEMENT_PATTERN(self) -> AstNode: 27 | return AstNode(m_mov, AstConstant("val_res")) 28 | 29 | def __init__(self): 30 | super().__init__() 31 | self.ro_dword_min_ea = None 32 | self.ro_dword_max_ea = None 33 | 34 | def configure(self, kwargs): 35 | super().configure(kwargs) 36 | self.ro_dword_min_ea = None 37 | self.ro_dword_max_ea = None 38 | if "ro_dword_min_ea" in kwargs.keys(): 39 | self.ro_dword_min_ea = int(kwargs["ro_dword_min_ea"], 16) 40 | if "ro_dword_max_ea" in kwargs.keys(): 41 | self.ro_dword_max_ea = int(kwargs["ro_dword_max_ea"], 16) 42 | 43 | def check_candidate(self, candidate): 44 | if (self.ro_dword_min_ea is None) or (self.ro_dword_max_ea is None): 45 | return False 46 | if candidate["ro_dword"].mop.t != mop_v: 47 | return False 48 | mem_read_address = candidate["ro_dword"].mop.g 49 | if not (self.ro_dword_min_ea <= mem_read_address <= self.ro_dword_max_ea): 50 | return False 51 | 52 | candidate.add_constant_leaf("val_res", 0, candidate["ro_dword"].mop.size) 53 | return True 54 | 55 | 56 | # This rule is from 57 | # https://www.carbonblack.com/blog/defeating-compiler-level-obfuscations-used-in-apt10-malware/ 58 | class SetGlobalVariablesToZeroIfDetectedReadOnly(EarlyRule): 59 | DESCRIPTION = "WARNING: Use it only if you know what you are doing as it may patch data not related to obfuscation" 60 | 61 | @property 62 | def PATTERN(self) -> AstNode: 63 | """Return the pattern to match.""" 64 | return AstNode(m_mov, AstLeaf("ro_dword")) 65 | 66 | @property 67 | def REPLACEMENT_PATTERN(self) -> AstNode: 68 | return AstNode(m_mov, AstConstant("val_res")) 69 | 70 | def __init__(self): 71 | super().__init__() 72 | # If we optimized too early (in MMAT_GENERATED), we may replace something like 73 | # 'mov &($dword_10020CC8).4, eoff.4' by 'mov #0.4, eoff.4' 74 | # and this will lead to incorrect decompilation where MEMORY[0] is used 75 | # Thus, we explicitly specify the MMAT_PREOPTIMIZED maturity. 76 | self.maturities = [MMAT_PREOPTIMIZED] 77 | 78 | def is_read_only_inited_var(self, address): 79 | s: segment_t = getseg(address) 80 | if s is None: 81 | return False 82 | if s.perm != (SEGPERM_READ | SEGPERM_WRITE): 83 | return False 84 | if is_loaded(address): 85 | return False 86 | ref_finder = xrefblk_t() 87 | is_ok = ref_finder.first_to(address, XREF_DATA) 88 | while is_ok: 89 | if ref_finder.type == dr_W: 90 | return False 91 | is_ok = ref_finder.next_to() 92 | return True 93 | 94 | def check_candidate(self, candidate): 95 | mem_read_address = None 96 | if candidate["ro_dword"].mop.t == mop_v: 97 | mem_read_address = candidate["ro_dword"].mop.g 98 | elif candidate["ro_dword"].mop.t == mop_a: 99 | if candidate["ro_dword"].mop.a.t == mop_v: 100 | mem_read_address = candidate["ro_dword"].mop.a.g 101 | 102 | if mem_read_address is None: 103 | return False 104 | 105 | if not self.is_read_only_inited_var(mem_read_address): 106 | return False 107 | candidate.add_constant_leaf("val_res", 0, candidate["ro_dword"].mop.size) 108 | return True 109 | -------------------------------------------------------------------------------- /samples/src/c/manually_obfuscated.c: -------------------------------------------------------------------------------- 1 | #include "export.h" 2 | 3 | EXPORT int test_chained_add(int *a) { 4 | return (((a[0] + 23) + a[2]) - 5 | (-a[1] + (-17 - (12 + ((a[1] - a[0]) + ~a[2]))))); 6 | } 7 | 8 | EXPORT int test_cst_simplification(int *a) { 9 | int a1 = ((a[0] & 0x3) | 0x222E69C2) - ((a[0] & 0x3) | 0x2); 10 | a[1] = a1; 11 | int a2 = 12 | ((a[1] & 0x50211120) | 0x83020001) + ((a[1] & 0x50211120) ^ 0x50295930); 13 | a[2] = a2; 14 | int a3 = 15 | (((~a[2] & 0x10500855) | 0x5204000) + ((a[2] & 0x10500855) | 0x2009500)) ^ 16 | 0x15482637; 17 | a[3] = a3; 18 | int a4 = ((((a[3] + 0x4) - (a3 | 0x4)) & 0x7FFFFC) >> 2) | 0xA29; 19 | a[4] = a4; 20 | return a1 + a2 + a3 + a4; 21 | } 22 | 23 | EXPORT int test_opaque_predicate(volatile int *a) { 24 | if ((a[0] * (a[0] + 1)) % 2 != 0) { 25 | return 91; 26 | } 27 | int a1 = (int)((a[1] * (a[1] - 1)) % 2 == 0); 28 | int a2 = (int)(((a[1] & a[2]) | (~a[1] & ~a[2])) != ~(a[1] ^ a[2])); 29 | int a3 = (int)(((a[3] | a[4]) - (a[3] & a[4])) != (a[3] ^ a[4])); 30 | int a4 = (int)((a[4] & 0x23) == 0x1); 31 | int a5 = (int)((a[6] & 0x42) != 0x2); 32 | a[1] = a1; 33 | a[2] = a2; 34 | a[3] = a3; 35 | a[4] = a4; 36 | a[5] = a5; 37 | return 12 + 3 * a1 + 5 * a2 + 7 * a3 + 9 * a4 + 11 * a5; 38 | } 39 | 40 | EXPORT long test_xor(long a, long b, long c, long *d) { 41 | d[0] = (a + b) - 2 * (a & b); 42 | d[1] = (a * c + (b - 3)) - 2 * ((a * c) & (b - 3)); 43 | return d[0] + d[1]; 44 | } 45 | 46 | EXPORT long test_or(long a, long b, long c, long *d) { 47 | // MBA pattern for OR: (a & b) + (a ^ b) => a | b 48 | d[0] = (a & b) + (a ^ b); 49 | d[1] = (b & c) + (b ^ c); 50 | d[2] = ((a + 1) & (b - 2)) + ((a + 1) ^ (b - 2)); 51 | return d[0] + d[1] + d[2]; 52 | } 53 | 54 | EXPORT long test_and(long a, long b, long c, long *d) { 55 | // MBA pattern for AND: (a | b) - (a ^ b) => a & b 56 | d[0] = (a | b) - (a ^ b); 57 | d[1] = (b | c) - (b ^ c); 58 | d[2] = ((a * 2) | (b + c)) - ((a * 2) ^ (b + c)); 59 | return d[0] + d[1] + d[2]; 60 | } 61 | 62 | EXPORT long test_neg(long a, long *d) { 63 | // Negation pattern: -x can be expressed as ~x + 1 (two's complement) 64 | d[0] = ~a + 1; 65 | d[1] = ~(a + 5) + 1; 66 | d[2] = ~(a * 2) + 1; 67 | return d[0] + d[1] + d[2]; 68 | } 69 | 70 | EXPORT long test_mba_guessing(long a, long b, long c, long d) { 71 | return (((((~(((a ^ ~d) + ((a | d) + (a | d))) + 1) | a) + 72 | (((a ^ ~d) + ((a | d) + (a | d))) + 1)) + 73 | 1) - 74 | ((a ^ c) + ((a & c) + (a & c)))) - 75 | (((((~(((a ^ ~d) + ((a | d) + (a | d))) + 1) | a) + 76 | (((a ^ ~d) + ((a | d) + (a | d))) + 1)) + 77 | 1) | 78 | ~((a ^ c) + ((a & c) + (a & c)))) + 79 | ((((~(((a ^ ~d) + ((a | d) + (a | d))) + 1) | a) + 80 | (((a ^ ~d) + ((a | d) + (a | d))) + 1)) + 81 | 1) | 82 | ~((a ^ c) + ((a & c) + (a & c)))))) - 83 | 2; 84 | } 85 | 86 | // ============================================================================ 87 | // Hard MBA patterns that IDA Pro 9+ cannot simplify natively 88 | // ============================================================================ 89 | 90 | /** 91 | * Multi-layer nested XOR MBA 92 | * Layer 1: (a + b) - 2*(a & b) = a ^ b 93 | * Layer 2: (layer1 + c) - 2*(layer1 & c) = a ^ b ^ c 94 | */ 95 | EXPORT long test_multilayer_xor(long a, long b, long c, long *out) { 96 | long layer1 = (a + b) - 2 * (a & b); 97 | out[0] = (layer1 + c) - 2 * (layer1 & c); 98 | return out[0]; 99 | } 100 | 101 | /** 102 | * Nested OR via two MBA layers 103 | * Layer 1: (a & b) + (a ^ b) = a | b 104 | * Layer 2: (layer1 & c) + (layer1 ^ c) = a | b | c 105 | */ 106 | EXPORT long test_nested_or(long a, long b, long c, long *out) { 107 | long layer1 = (a & b) + (a ^ b); 108 | out[0] = (layer1 & c) + (layer1 ^ c); 109 | return out[0]; 110 | } 111 | 112 | /** 113 | * Cross-instruction MBA through temps 114 | * t1 + 2*t2 = a + b when t1 = a ^ b, t2 = a & b 115 | */ 116 | EXPORT long test_chained_temps(long a, long b, long *out) { 117 | long t1 = a ^ b; 118 | long t2 = a & b; 119 | out[0] = t1 + 2 * t2; 120 | return out[0]; 121 | } -------------------------------------------------------------------------------- /samples/src/c/unwrap_loops.c: -------------------------------------------------------------------------------- 1 | #include "polyfill.h" 2 | 3 | RTL_CRITICAL_SECTION g_mutex = {0}; 4 | 5 | extern void unk_1802CCC58(int); 6 | extern int lolclose(unsigned __int64 hObject); 7 | extern void sub_1800D3BF0(int, int, int, int, __int64); 8 | extern void sub_180221640(unsigned __int64, int, int, unsigned __int64, int, int); 9 | 10 | /** 11 | * @brief 12 | * 13 | * @param n0x59 14 | * @param n0xA 15 | * @param n0x48 16 | * @param a4 17 | * 18 | */ 19 | __int64 __fastcall SafeCloseHandle( 20 | int unusedParam1, 21 | int unusedParam2, 22 | int unusedParam3, 23 | unsigned __int64 hHandleToClose) 24 | { 25 | int cleanupState; 26 | int finalState; 27 | unsigned __int64 hObject; 28 | 29 | for (cleanupState = 0;; cleanupState = 2) 30 | { 31 | while (1) 32 | { 33 | finalState = cleanupState; 34 | if (cleanupState) 35 | break; 36 | 37 | hObject = hHandleToClose; 38 | if (hHandleToClose == (unsigned __int64)0xFFFFFFFFFFFFFFFFULL) 39 | cleanupState = 2; 40 | else 41 | cleanupState = 1; 42 | } 43 | 44 | if (cleanupState != 1) 45 | break; 46 | 47 | lolclose(hObject); 48 | hHandleToClose = (unsigned __int64)0xFFFFFFFFFFFFFFFFULL; 49 | } 50 | 51 | return finalState; 52 | } 53 | 54 | void bogus_loops(int n0x59, int n0xA, int n0x48, unsigned int *a4) 55 | { 56 | int i; // [rsp+Ch] [rbp-Ch] 57 | unsigned int v5; // [rsp+14h] [rbp-4h] 58 | 59 | for (i = 0; !i; i = 1) 60 | v5 = *a4 ^ (*a4 << 0xD) ^ ((*a4 ^ (*a4 << 0xD)) >> 0x11); 61 | 62 | *a4 = v5 ^ (0x20 * v5); 63 | } 64 | 65 | __int64 unwrap_loops() 66 | { 67 | int i; 68 | int v2; 69 | int result; 70 | 71 | for (i = 0;; i = 2) 72 | { 73 | while (1) 74 | { 75 | result = i; 76 | if (i) 77 | break; 78 | 79 | v2 = 0; 80 | if (!_InterlockedCompareExchange((volatile signed __int32 *)&g_mutex.SpinCount, 1, 0)) 81 | goto LABEL_x379; 82 | 83 | LABEL_x2B0: 84 | i = 1; 85 | } 86 | 87 | if (i != 1) 88 | break; 89 | 90 | unk_1802CCC58(v2++ >= 0x20); 91 | if (_InterlockedCompareExchange((volatile signed __int32 *)&g_mutex.SpinCount, 1, 0)) 92 | goto LABEL_x2B0; 93 | 94 | LABEL_x379:; 95 | } 96 | 97 | return result; 98 | } 99 | 100 | __int64 unwrap_loops_2(unsigned int n8, __int64 a2, int n0x4E, int n0x5F) 101 | { 102 | int i; 103 | int v9; 104 | int v6; 105 | int result; 106 | 107 | for (i = 0;; i = 1) 108 | { 109 | while (1) 110 | { 111 | result = i; 112 | if (i) 113 | break; 114 | 115 | v9 = *(_DWORD *)(a2 + 0x14); 116 | if (v9 >= n8) 117 | i = 1; 118 | else 119 | i = 2; 120 | } 121 | 122 | if (i != 2) 123 | break; 124 | 125 | v6 = v9 + (v9 >> 1); 126 | if (v6 <= n8) 127 | v6 = n8; 128 | 129 | if (v6 < 9) 130 | v6 = 8; 131 | 132 | sub_1800D3BF0(6, 0x4D, 0x3B, v6, a2); 133 | } 134 | 135 | return result; 136 | } 137 | 138 | // Hidden C++ exception states: #wind=1 139 | __int64 unwrap_loops_3(int n0x5C, unsigned int a2, int n0x53, int n9, __int64 a5) 140 | { 141 | int v5; 142 | int v10; 143 | int v7; 144 | int v8; 145 | int v9; 146 | 147 | v5 = a2; 148 | if (a2) 149 | { 150 | v10 = 0; 151 | for (HIDWORD(v7) = 2;; HIDWORD(v7) = 2) 152 | { 153 | v9 = v10; 154 | sub_180221640( 155 | *(_QWORD *)(a5 + (v10 << 6) + 0x28), 156 | 0xA, 157 | 0x5E, 158 | *(_QWORD *)(a5 + (v10 << 6) + 0x30), 159 | 0x45, 160 | v7); 161 | HIDWORD(v8) = 3; 162 | sub_180221640( 163 | *(_QWORD *)(a5 + (v10 << 6) + 0x10), 164 | 0x62, 165 | 0x2A, 166 | *(_QWORD *)(a5 + (v10 << 6) + 0x18), 167 | 0x19, 168 | v8); 169 | ++v10; 170 | if (v9 + 1 == v5) 171 | break; 172 | } 173 | } 174 | 175 | return 4; 176 | } 177 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/jumps/tricks.py: -------------------------------------------------------------------------------- 1 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 2 | from d810.hexrays.hexrays_helpers import equal_bnot_mop, equal_mops_bypass_xdu 3 | from d810.optimizers.microcode.flow.jumps.handler import JumpOptimizationRule 4 | 5 | from ida_hexrays import * 6 | 7 | 8 | class CompareConstantRule1(JumpOptimizationRule): 9 | ORIGINAL_JUMP_OPCODES = [m_jge] 10 | LEFT_PATTERN = AstNode( 11 | m_and, 12 | AstNode(m_or, AstLeaf("xdu_x_0"), AstConstant("c_2")), 13 | AstNode( 14 | m_or, 15 | AstNode(m_xor, AstLeaf("x_0"), AstConstant("c_1")), 16 | AstNode(m_bnot, AstNode(m_sub, AstLeaf("x_0"), AstConstant("c_1"))), 17 | ), 18 | ) 19 | RIGHT_PATTERN = AstConstant("0", 0) 20 | 21 | REPLACEMENT_OPCODE = m_jl 22 | REPLACEMENT_LEFT_PATTERN = AstLeaf("x_0") 23 | REPLACEMENT_RIGHT_PATTERN = AstLeaf("c_1") 24 | 25 | def check_candidate(self, opcode, left_candidate, right_candidate): 26 | if not equal_mops_bypass_xdu( 27 | left_candidate["xdu_x_0"].mop, left_candidate["x_0"].mop 28 | ): 29 | return False 30 | if not equal_bnot_mop(left_candidate["c_2"].mop, left_candidate["c_1"].mop): 31 | return False 32 | self.jump_replacement_block_serial = self.jump_original_block_serial 33 | return True 34 | 35 | 36 | class CompareConstantRule2(JumpOptimizationRule): 37 | ORIGINAL_JUMP_OPCODES = [m_jge] 38 | LEFT_PATTERN = AstNode( 39 | m_or, 40 | AstNode( 41 | m_xdu, AstNode(m_and, AstNode(m_bnot, AstLeaf("x_0")), AstConstant("c_1")) 42 | ), 43 | AstNode( 44 | m_and, 45 | AstNode(m_sub, AstLeaf("xdu_x_0"), AstConstant("xdu_c_1")), 46 | AstNode( 47 | m_bnot, 48 | AstNode( 49 | m_xdu, AstNode(m_xor, AstLeaf("xdu1_x_0"), AstConstant("xdu_c_1")) 50 | ), 51 | ), 52 | ), 53 | ) 54 | RIGHT_PATTERN = AstConstant("0", 0) 55 | 56 | REPLACEMENT_OPCODE = m_jge 57 | REPLACEMENT_LEFT_PATTERN = AstLeaf("x_0") 58 | REPLACEMENT_RIGHT_PATTERN = AstLeaf("c_1") 59 | 60 | def check_candidate(self, opcode, left_candidate, right_candidate): 61 | if not equal_mops_bypass_xdu( 62 | left_candidate["xdu_x_0"].mop, left_candidate["x_0"].mop 63 | ): 64 | return False 65 | if not equal_mops_bypass_xdu( 66 | left_candidate["xdu1_x_0"].mop, left_candidate["x_0"].mop 67 | ): 68 | return False 69 | self.jump_replacement_block_serial = self.jump_original_block_serial 70 | return True 71 | 72 | 73 | class CompareConstantRule3(JumpOptimizationRule): 74 | ORIGINAL_JUMP_OPCODES = [m_jge] 75 | LEFT_PATTERN = AstNode( 76 | m_and, 77 | AstNode(m_sub, AstLeaf("x_0"), AstConstant("c_1")), 78 | AstNode(m_bnot, AstLeaf("x_0")), 79 | ) 80 | RIGHT_PATTERN = AstConstant("0", 0) 81 | 82 | REPLACEMENT_OPCODE = m_jg 83 | REPLACEMENT_LEFT_PATTERN = AstLeaf("x_0") 84 | REPLACEMENT_RIGHT_PATTERN = AstLeaf("c_1") 85 | 86 | def check_candidate(self, opcode, left_candidate, right_candidate): 87 | self.jump_replacement_block_serial = self.jump_original_block_serial 88 | return True 89 | 90 | 91 | class CompareConstantRule4(JumpOptimizationRule): 92 | ORIGINAL_JUMP_OPCODES = [m_jl, m_jge] 93 | LEFT_PATTERN = AstNode( 94 | m_and, 95 | AstNode( 96 | m_or, 97 | AstNode(m_bnot, AstNode(m_sub, AstLeaf("x_0"), AstConstant("c_1"))), 98 | AstNode(m_xor, AstLeaf("x_0"), AstConstant("c_1")), 99 | ), 100 | AstNode(m_or, AstLeaf("xdu_x_0"), AstConstant("bnot_c_1")), 101 | ) 102 | 103 | RIGHT_PATTERN = AstConstant("0", 0) 104 | 105 | REPLACEMENT_OPCODE = m_jge 106 | REPLACEMENT_LEFT_PATTERN = AstLeaf("x_0") 107 | REPLACEMENT_RIGHT_PATTERN = AstLeaf("c_1") 108 | 109 | def check_candidate(self, opcode, left_candidate, right_candidate): 110 | print("dflighdrth") 111 | if not equal_mops_bypass_xdu( 112 | left_candidate["xdu_x_0"].mop, left_candidate["x_0"].mop 113 | ): 114 | return False 115 | if not equal_bnot_mop( 116 | left_candidate["c_1"].mop, left_candidate["bnot_c_1"].mop 117 | ): 118 | return False 119 | self.jump_replacement_block_serial = self.jump_original_block_serial 120 | return True 121 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Pytest configuration for d810-ng tests. 2 | 3 | Shared configuration for all test suites. 4 | Note: Clang-related fixtures are in tests/system/conftest.py for system tests. 5 | """ 6 | 7 | import logging 8 | import os 9 | import pathlib 10 | import sys 11 | from dataclasses import dataclass 12 | 13 | import pytest 14 | 15 | # Configure logging 16 | logging.basicConfig(level=logging.INFO) 17 | logger = logging.getLogger(__name__) 18 | 19 | # Add project root to path for all tests to ensure imports work 20 | PROJECT_ROOT = pathlib.Path(__file__).parent.parent 21 | sys.path.insert(0, str(PROJECT_ROOT / "src")) 22 | sys.path.insert(0, str(PROJECT_ROOT / "tests")) 23 | 24 | 25 | # region .env Loader 26 | def _maybe_load_dotenv(path: pathlib.Path) -> None: 27 | """ 28 | Load a simple .env file into os.environ without external deps. 29 | Does not override existing environment variables. 30 | """ 31 | if not path.is_file(): 32 | return 33 | 34 | logger.info("Loading environment from %s", path) 35 | 36 | try: 37 | content = path.read_text(encoding="utf-8") 38 | except OSError: 39 | logger.warning("Failed to read .env at %s", path) 40 | return 41 | 42 | for line in content.splitlines(): 43 | line = line.strip() 44 | 45 | # Skip comments and empty lines 46 | if not line or line.startswith("#") or "=" not in line: 47 | continue 48 | 49 | # Handle 'export ' prefix 50 | if line.startswith("export "): 51 | line = line[7:] 52 | 53 | key, value = line.split("=", 1) 54 | key = key.strip() 55 | value = value.strip() 56 | 57 | # Remove surrounding quotes 58 | if len(value) >= 2 and value[0] in ('"', "'") and value[0] == value[-1]: 59 | value = value[1:-1] 60 | 61 | # Only set if not already present in environment 62 | if key and key not in os.environ: 63 | os.environ[key] = value 64 | 65 | 66 | # endregion 67 | 68 | 69 | # region Helpers 70 | @dataclass(frozen=True) 71 | class EnvWrapper: 72 | """Helper object to access environment variables with type conversion.""" 73 | 74 | def as_str(self, key: str, default: str = "") -> str: 75 | return os.environ.get(key, default) 76 | 77 | def as_int(self, key: str, default: int = 0) -> int: 78 | val = os.environ.get(key) 79 | if val is None: 80 | return default 81 | try: 82 | return int(val) 83 | except (ValueError, TypeError): 84 | return default 85 | 86 | def as_bool(self, key: str, default: bool = False) -> bool: 87 | val = os.environ.get(key) 88 | if val is None: 89 | return default 90 | return val.lower() in ("1", "true", "yes", "on") 91 | 92 | def as_list(self, key: str, separator: str = ",") -> list[str]: 93 | val = os.environ.get(key) 94 | if not val: 95 | return [] 96 | # Handle both comma and semicolon, just in case 97 | cleaned = val.replace(";", separator) 98 | return [item.strip() for item in cleaned.split(separator) if item.strip()] 99 | 100 | def as_path(self, key: str, default: pathlib.Path | str = "") -> pathlib.Path: 101 | """ 102 | Returns the environment variable as a Path, or default if not set. 103 | The default can be either a str or Path. 104 | """ 105 | val = os.environ.get(key) 106 | if val: 107 | return pathlib.Path(val) 108 | return pathlib.Path(default) 109 | 110 | 111 | # endregion 112 | 113 | 114 | @pytest.fixture(scope="session") 115 | def env() -> EnvWrapper: 116 | """ 117 | Session fixture that loads .env and returns a helper object. 118 | 119 | Usage in tests: 120 | def test_something(env): 121 | if env.as_bool("DEBUG"): 122 | assert env.as_int("MAX_RETRIES") == 5 123 | """ 124 | # 1. Try current working directory 125 | env_path = pathlib.Path.cwd() / ".env" 126 | 127 | # 2. Fallback to project root 128 | if not env_path.exists(): 129 | env_path = PROJECT_ROOT / ".env" 130 | 131 | _maybe_load_dotenv(env_path) 132 | 133 | return EnvWrapper() 134 | 135 | 136 | def pytest_configure(config: pytest.Config) -> None: 137 | """Configure pytest with custom markers.""" 138 | config.addinivalue_line("markers", "ida_required: mark test as requiring IDA Pro") 139 | config.addinivalue_line("markers", "integration: mark test as integration test") 140 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/pattern_matching/weird.py: -------------------------------------------------------------------------------- 1 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 2 | from d810.hexrays.hexrays_helpers import equal_bnot_mop 3 | from d810.optimizers.microcode.instructions.pattern_matching.handler import ( 4 | PatternMatchingRule, 5 | ) 6 | 7 | from ida_hexrays import * 8 | 9 | 10 | class WeirdRule1(PatternMatchingRule): 11 | 12 | def check_candidate(self, candidate): 13 | candidate.add_constant_leaf("val_1", 1, candidate.size) 14 | return True 15 | 16 | @property 17 | def PATTERN(self) -> AstNode: 18 | return AstNode( 19 | m_sub, AstLeaf("x_0"), AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")) 20 | ) 21 | 22 | @property 23 | def REPLACEMENT_PATTERN(self) -> AstNode: 24 | return AstNode( 25 | m_add, 26 | AstNode(m_or, AstLeaf("x_0"), AstNode(m_bnot, AstLeaf("x_1"))), 27 | AstConstant("val_1"), 28 | ) 29 | 30 | 31 | class WeirdRule2(PatternMatchingRule): 32 | @property 33 | def PATTERN(self) -> AstNode: 34 | return AstNode( 35 | m_sub, 36 | AstNode(m_mul, AstConstant("2", 2), AstLeaf("x_0")), 37 | AstNode(m_and, AstLeaf("x_0"), AstNode(m_bnot, AstLeaf("x_1"))), 38 | ) 39 | 40 | @property 41 | def REPLACEMENT_PATTERN(self) -> AstNode: 42 | return AstNode( 43 | m_add, AstLeaf("x_0"), AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")) 44 | ) 45 | 46 | 47 | class WeirdRule3(PatternMatchingRule): 48 | @property 49 | def PATTERN(self) -> AstNode: 50 | return AstNode( 51 | m_sub, 52 | AstNode(m_and, AstLeaf("x_0"), AstNode(m_bnot, AstLeaf("x_1"))), 53 | AstNode(m_mul, AstConstant("2", 2), AstLeaf("x_0")), 54 | ) 55 | 56 | @property 57 | def REPLACEMENT_PATTERN(self) -> AstNode: 58 | return AstNode( 59 | m_neg, 60 | AstNode( 61 | m_add, AstLeaf("x_0"), AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")) 62 | ), 63 | ) 64 | 65 | 66 | class WeirdRule4(PatternMatchingRule): 67 | 68 | def check_candidate(self, candidate): 69 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 70 | return False 71 | return True 72 | 73 | @property 74 | def PATTERN(self) -> AstNode: 75 | return AstNode( 76 | m_sub, 77 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("bnot_x_1")), 78 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 79 | ) 80 | 81 | @property 82 | def REPLACEMENT_PATTERN(self) -> AstNode: 83 | return AstNode( 84 | m_sub, AstNode(m_xor, AstLeaf("x_0"), AstLeaf("x_1")), AstLeaf("x_1") 85 | ) 86 | 87 | 88 | class WeirdRule5(PatternMatchingRule): 89 | 90 | def check_candidate(self, candidate): 91 | if not equal_bnot_mop(candidate["x_0"].mop, candidate["bnot_x_0"].mop): 92 | return False 93 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 94 | return False 95 | return True 96 | 97 | @property 98 | def PATTERN(self) -> AstNode: 99 | return AstNode( 100 | m_sub, 101 | AstNode( 102 | m_add, 103 | AstNode( 104 | m_or, 105 | AstLeaf("bnot_x_0"), 106 | AstNode(m_and, AstLeaf("bnot_x_1"), AstLeaf("x_2")), 107 | ), 108 | AstNode( 109 | m_add, 110 | AstLeaf("x_0"), 111 | AstNode(m_and, AstLeaf("x_1"), AstLeaf("x_2")), 112 | ), 113 | ), 114 | AstLeaf("x_2"), 115 | ) 116 | 117 | @property 118 | def REPLACEMENT_PATTERN(self) -> AstNode: 119 | return AstNode( 120 | m_or, 121 | AstLeaf("x_0"), 122 | AstNode(m_or, AstLeaf("x_1"), AstNode(m_bnot, AstLeaf("x_2"))), 123 | ) 124 | 125 | 126 | class WeirdRule6(PatternMatchingRule): 127 | @property 128 | def PATTERN(self) -> AstNode: 129 | return AstNode( 130 | m_add, 131 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 132 | AstNode(m_and, AstLeaf("x_0"), AstNode(m_bnot, AstLeaf("x_1"))), 133 | ) 134 | 135 | @property 136 | def REPLACEMENT_PATTERN(self) -> AstNode: 137 | return AstNode( 138 | m_add, AstNode(m_xor, AstLeaf("x_0"), AstLeaf("x_1")), AstLeaf("x_0") 139 | ) 140 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/pattern_matching/rewrite_neg.py: -------------------------------------------------------------------------------- 1 | from ida_hexrays import * 2 | 3 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 4 | from d810.hexrays.hexrays_helpers import AND_TABLE 5 | from d810.optimizers.microcode.instructions.pattern_matching.handler import ( 6 | PatternMatchingRule, 7 | ) 8 | 9 | 10 | class Neg_HackersDelightRule_1(PatternMatchingRule): 11 | @property 12 | def PATTERN(self) -> AstNode: 13 | return AstNode(m_add, AstNode(m_bnot, AstLeaf("x_0")), AstConstant("1", 1)) 14 | 15 | @property 16 | def REPLACEMENT_PATTERN(self) -> AstNode: 17 | return AstNode(m_neg, AstLeaf("x_0")) 18 | 19 | 20 | class Neg_HackersDelightRule_2(PatternMatchingRule): 21 | @property 22 | def PATTERN(self) -> AstNode: 23 | return AstNode(m_bnot, AstNode(m_sub, AstLeaf("x_0"), AstConstant("1", 1))) 24 | 25 | @property 26 | def REPLACEMENT_PATTERN(self) -> AstNode: 27 | return AstNode(m_neg, AstLeaf("x_0")) 28 | 29 | 30 | class NegSub_HackersDelightRule_1(PatternMatchingRule): 31 | @property 32 | def PATTERN(self) -> AstNode: 33 | return AstNode( 34 | m_sub, 35 | AstNode(m_xor, AstLeaf("x_0"), AstLeaf("x_1")), 36 | AstNode( 37 | m_mul, 38 | AstConstant("2", 2), 39 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 40 | ), 41 | ) 42 | 43 | @property 44 | def REPLACEMENT_PATTERN(self) -> AstNode: 45 | return AstNode(m_neg, AstNode(m_add, AstLeaf("x_0"), AstLeaf("x_1"))) 46 | 47 | 48 | class NegAdd_HackersDelightRule_2(PatternMatchingRule): 49 | @property 50 | def PATTERN(self) -> AstNode: 51 | return AstNode( 52 | m_sub, 53 | AstNode( 54 | m_xor, AstLeaf("x_0"), AstNode(m_or, AstLeaf("x_1"), AstLeaf("x_2")) 55 | ), 56 | AstNode( 57 | m_mul, 58 | AstConstant("2", 2), 59 | AstNode( 60 | m_or, AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), AstLeaf("x_2") 61 | ), 62 | ), 63 | ) 64 | 65 | @property 66 | def REPLACEMENT_PATTERN(self) -> AstNode: 67 | return AstNode( 68 | m_neg, 69 | AstNode( 70 | m_add, AstLeaf("x_0"), AstNode(m_or, AstLeaf("x_1"), AstLeaf("x_2")) 71 | ), 72 | ) 73 | 74 | 75 | class NegAdd_HackersDelightRule_1(PatternMatchingRule): 76 | 77 | def check_candidate(self, candidate): 78 | if (candidate["val_fe"].value + 2) & AND_TABLE[candidate["val_fe"].size] != 0: 79 | return False 80 | return True 81 | 82 | @property 83 | def PATTERN(self) -> AstNode: 84 | return AstNode( 85 | m_add, 86 | AstNode( 87 | m_mul, 88 | AstConstant("val_fe"), 89 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 90 | ), 91 | AstNode(m_xor, AstLeaf("x_0"), AstLeaf("x_1")), 92 | ) 93 | 94 | @property 95 | def REPLACEMENT_PATTERN(self) -> AstNode: 96 | return AstNode(m_neg, AstNode(m_add, AstLeaf("x_0"), AstLeaf("x_1"))) 97 | 98 | 99 | class NegOr_HackersDelightRule_1(PatternMatchingRule): 100 | @property 101 | def PATTERN(self) -> AstNode: 102 | return AstNode( 103 | m_sub, 104 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 105 | AstNode(m_add, AstLeaf("x_0"), AstLeaf("x_1")), 106 | ) 107 | 108 | @property 109 | def REPLACEMENT_PATTERN(self) -> AstNode: 110 | return AstNode(m_neg, AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1"))) 111 | 112 | 113 | class NegXor_HackersDelightRule_1(PatternMatchingRule): 114 | @property 115 | def PATTERN(self) -> AstNode: 116 | return AstNode( 117 | m_sub, 118 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 119 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 120 | ) 121 | 122 | @property 123 | def REPLACEMENT_PATTERN(self) -> AstNode: 124 | return AstNode(m_neg, AstNode(m_xor, AstLeaf("x_0"), AstLeaf("x_1"))) 125 | 126 | 127 | class NegXor_HackersDelightRule_2(PatternMatchingRule): 128 | @property 129 | def PATTERN(self) -> AstNode: 130 | return AstNode( 131 | m_sub, 132 | AstNode(m_add, AstLeaf("x_0"), AstLeaf("x_1")), 133 | AstNode( 134 | m_mul, 135 | AstConstant("2", 2), 136 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 137 | ), 138 | ) 139 | 140 | @property 141 | def REPLACEMENT_PATTERN(self) -> AstNode: 142 | return AstNode(m_neg, AstNode(m_xor, AstLeaf("x_0"), AstLeaf("x_1"))) 143 | -------------------------------------------------------------------------------- /samples/src/c/abc_xor_dispatch.c: -------------------------------------------------------------------------------- 1 | /** 2 | * ABC XOR Dispatch Pattern Test 3 | * 4 | * This sample tests the father_patcher_abc_from_or_xor_* methods in 5 | * GenericDispatcherUnflatteningRule. It uses XOR-based state transitions 6 | * instead of simple assignment, which exercises the ABC (Arithmetic/Bitwise/Constant) 7 | * patching logic. 8 | * 9 | * Pattern tested: 10 | * state = state ^ CONSTANT (XOR transition) 11 | * switch ((state ^ DISPATCH_KEY) & MASK) { ... } 12 | */ 13 | 14 | #include "ida_types.h" 15 | 16 | /* Volatile to prevent compiler optimization */ 17 | volatile int global_accumulator = 0; 18 | 19 | /** 20 | * XOR-based flattened control flow 21 | * 22 | * State transitions use XOR: state = state ^ constant 23 | * Dispatcher uses: switch ((state ^ 0xDEADBEEF) & 0xFF) 24 | */ 25 | int abc_xor_dispatch(int input) 26 | { 27 | unsigned int state = 0x12345678; 28 | int result = 0; 29 | int i; 30 | 31 | while (1) 32 | { 33 | /* XOR-based dispatcher - tests father_patcher_abc_from_or_xor_v1 */ 34 | switch ((state ^ 0xDEADBEEF) & 0xFF) 35 | { 36 | case 0x00: /* Entry: Initialize */ 37 | result = input; 38 | state = state ^ 0x11111111; /* XOR transition to state 1 */ 39 | break; 40 | 41 | case 0x11: /* State 1: Add operation */ 42 | result = result + 42; 43 | state = state ^ 0x22222222; /* XOR transition to state 2 */ 44 | break; 45 | 46 | case 0x33: /* State 2: Multiply */ 47 | result = result * 2; 48 | state = state ^ 0x44444444; /* XOR transition to state 3 */ 49 | break; 50 | 51 | case 0x77: /* State 3: Check condition */ 52 | if (result > 100) 53 | { 54 | state = state ^ 0x88888888; /* XOR to exit path A */ 55 | } 56 | else 57 | { 58 | state = state ^ 0x99999999; /* XOR to exit path B */ 59 | } 60 | break; 61 | 62 | case 0xFF: /* Exit A: Return positive */ 63 | return result; 64 | 65 | case 0xEE: /* Exit B: Return negative */ 66 | return -result; 67 | 68 | default: 69 | /* Dead code - should never reach */ 70 | global_accumulator++; 71 | state = state ^ 0xFFFFFFFF; 72 | break; 73 | } 74 | } 75 | } 76 | 77 | /** 78 | * OR-based state manipulation 79 | * 80 | * State transitions use OR with masks: state = (state & ~mask) | value 81 | * Tests father_patcher_abc_from_or_xor_v2/v3 82 | */ 83 | int abc_or_dispatch(int input) 84 | { 85 | unsigned int state = 0; 86 | int result = input; 87 | 88 | while (1) 89 | { 90 | /* OR-based dispatcher */ 91 | switch (state & 0xF) 92 | { 93 | case 0: /* Entry */ 94 | result = result + 10; 95 | state = (state & ~0xF) | 1; /* Set low nibble to 1 */ 96 | break; 97 | 98 | case 1: /* Process */ 99 | result = result * 3; 100 | state = (state & ~0xF) | 2; /* Set low nibble to 2 */ 101 | break; 102 | 103 | case 2: /* Finalize */ 104 | result = result - 5; 105 | state = (state & ~0xF) | 3; /* Set low nibble to 3 */ 106 | break; 107 | 108 | case 3: /* Exit */ 109 | return result; 110 | 111 | default: 112 | state = (state & ~0xF) | 3; /* Force exit */ 113 | break; 114 | } 115 | } 116 | } 117 | 118 | /** 119 | * Combined XOR/OR state manipulation 120 | * 121 | * Uses both XOR and OR in transitions to test multiple ABC patterns 122 | */ 123 | int abc_mixed_dispatch(int input, int mode) 124 | { 125 | unsigned int state = 0x00000000; 126 | int result = input; 127 | 128 | while (1) 129 | { 130 | switch (state) 131 | { 132 | case 0x00000000: /* Entry */ 133 | if (mode == 0) 134 | { 135 | state = state ^ 0xAAAAAAAA; /* XOR path */ 136 | } 137 | else 138 | { 139 | state = state | 0x55555555; /* OR path */ 140 | } 141 | break; 142 | 143 | case 0xAAAAAAAA: /* XOR path processing */ 144 | result = result ^ 0x12345678; 145 | state = state ^ 0xFFFFFFFF; /* XOR to common exit */ 146 | break; 147 | 148 | case 0x55555555: /* OR path processing */ 149 | result = result | 0x87654321; 150 | state = state ^ 0xAAAAAAAA; /* XOR to common exit */ 151 | break; 152 | 153 | case 0x55555555 ^ 0xAAAAAAAA: /* Common exit: 0xFFFFFFFF */ 154 | return result; 155 | 156 | default: 157 | return -1; /* Error state */ 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /samples/src/c/tigress_obfuscated.c: -------------------------------------------------------------------------------- 1 | #include "export.h" 2 | 3 | // https://github.com/Neutrino6/thesis_project/blob/main/C_files_analysis/C_obfuscated/minmaxarray_tigress.c 4 | /** Original code: 5 | * #include 6 | * int main(int argc, char* argv[]){ 7 | * if(argc < 11) return 1; 8 | * int a[10],i,big,small; 9 | * 10 | * // printf("\nEnter the size of the array: "); 11 | * // scanf("%d",&size); 12 | * // printf("\nEnter %d elements in to the array: ", size); 13 | * // for(i=0;ia[i]) 29 | * small=a[i]; 30 | * } 31 | * printf("Smallest element: %d\n",small); 32 | * 33 | * return 0; 34 | * } 35 | */ 36 | 37 | extern int printf2(char const * __restrict __format , ...); 38 | 39 | 40 | int _global_argc; 41 | char **_global_argv; 42 | char **_global_envp; 43 | 44 | EXPORT int tigress_minmaxarray(int argc , char **argv , char **_formal_envp ) 45 | { 46 | int a[10] ; 47 | int i ; 48 | int big ; 49 | int small ; 50 | int _BARRIER_0 ; 51 | unsigned long _1_main_next ; 52 | 53 | { 54 | { 55 | { 56 | { 57 | { 58 | goto _global_envp_i$nit_INLINE__global_envp_i$nit; 59 | } 60 | _global_envp_i$nit_INLINE__global_envp_i$nit: /* CIL Label */ ; 61 | } 62 | { 63 | { 64 | goto _global_argv_i$nit_INLINE__global_argv_i$nit; 65 | } 66 | _global_argv_i$nit_INLINE__global_argv_i$nit: /* CIL Label */ ; 67 | } 68 | { 69 | { 70 | goto _global_argc_i$nit_INLINE__global_argc_i$nit; 71 | } 72 | _global_argc_i$nit_INLINE__global_argc_i$nit: /* CIL Label */ ; 73 | } 74 | goto megaInit_INLINE_megaInit; 75 | } 76 | megaInit_INLINE_megaInit: /* CIL Label */ ; 77 | } 78 | _global_argc = argc; 79 | _global_argv = argv; 80 | _global_envp = _formal_envp; 81 | _BARRIER_0 = 1; 82 | { 83 | _1_main_next = 11UL; 84 | } 85 | while (1) { 86 | switch (_1_main_next) { 87 | case 18: 88 | small = a[i]; 89 | { 90 | _1_main_next = 3UL; 91 | } 92 | break; 93 | case 4: ; 94 | if (argc < 11) { 95 | { 96 | _1_main_next = 9UL; 97 | } 98 | } else { 99 | { 100 | _1_main_next = 13UL; 101 | } 102 | } 103 | break; 104 | case 14: ; 105 | if (small > a[i]) { 106 | { 107 | _1_main_next = 18UL; 108 | } 109 | } else { 110 | { 111 | _1_main_next = 3UL; 112 | } 113 | } 114 | break; 115 | case 15: ; 116 | if (i < argc - 1) { 117 | { 118 | _1_main_next = 14UL; 119 | } 120 | } else { 121 | { 122 | _1_main_next = 22UL; 123 | } 124 | } 125 | break; 126 | case 12: 127 | big = a[0]; 128 | i = 1; 129 | { 130 | _1_main_next = 17UL; 131 | } 132 | break; 133 | case 8: ; 134 | if (big < a[i]) { 135 | { 136 | _1_main_next = 1UL; 137 | } 138 | } else { 139 | { 140 | _1_main_next = 16UL; 141 | } 142 | } 143 | break; 144 | case 1: 145 | big = a[i]; 146 | { 147 | _1_main_next = 16UL; 148 | } 149 | break; 150 | case 23: ; 151 | if (i < argc) { 152 | { 153 | _1_main_next = 0UL; 154 | } 155 | } else { 156 | { 157 | _1_main_next = 12UL; 158 | } 159 | } 160 | break; 161 | case 3: 162 | i ++; 163 | { 164 | _1_main_next = 15UL; 165 | } 166 | break; 167 | case 16: 168 | i ++; 169 | { 170 | _1_main_next = 17UL; 171 | } 172 | break; 173 | case 11: ; 174 | { 175 | _1_main_next = 4UL; 176 | } 177 | break; 178 | case 9: ; 179 | return (1); 180 | break; 181 | case 13: 182 | i = 1; 183 | { 184 | _1_main_next = 23UL; 185 | } 186 | break; 187 | case 19: ; 188 | return (0); 189 | break; 190 | case 17: ; 191 | if (i < argc - 1) { 192 | { 193 | _1_main_next = 8UL; 194 | } 195 | } else { 196 | { 197 | _1_main_next = 7UL; 198 | } 199 | } 200 | break; 201 | case 22: 202 | printf2((char const */* __restrict */)"Smallest element: %d\n", small); 203 | { 204 | _1_main_next = 19UL; 205 | } 206 | break; 207 | case 0: 208 | a[i - 1] = (int )*(*(argv + i) + 0); 209 | i ++; 210 | { 211 | _1_main_next = 23UL; 212 | } 213 | break; 214 | case 7: 215 | printf2((char const */* __restrict */)"Largest element: %d\n", big); 216 | small = a[0]; 217 | i = 1; 218 | { 219 | _1_main_next = 15UL; 220 | } 221 | break; 222 | } 223 | } 224 | } 225 | } -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/flattening/unflattener_indirect.py: -------------------------------------------------------------------------------- 1 | import idaapi 2 | from ida_hexrays import * 3 | 4 | from d810.conf.loggers import getLogger 5 | from d810.hexrays.hexrays_helpers import AND_TABLE, append_mop_if_not_in_list 6 | from d810.hexrays.tracker import MopHistory, MopTracker 7 | from d810.optimizers.microcode.flow.flattening.generic import ( 8 | GenericDispatcherBlockInfo, 9 | GenericDispatcherCollector, 10 | GenericDispatcherInfo, 11 | GenericDispatcherUnflatteningRule, 12 | ) 13 | 14 | unflat_logger = getLogger("D810.unflat") 15 | FLATTENING_JUMP_OPCODES = [m_jtbl] 16 | 17 | 18 | class TigressIndirectDispatcherBlockInfo(GenericDispatcherBlockInfo): 19 | pass 20 | 21 | 22 | class TigressIndirectDispatcherInfo(GenericDispatcherInfo): 23 | def explore(self, blk: mblock_t): 24 | self.reset() 25 | if not self._is_candidate_for_dispatcher_entry_block(blk): 26 | return False 27 | self.mop_compared = self._get_comparison_info(blk) 28 | self.entry_block = TigressIndirectDispatcherBlockInfo(blk) 29 | self.entry_block.parse() 30 | for used_mop in self.entry_block.use_list: 31 | append_mop_if_not_in_list(used_mop, self.entry_block.assume_def_list) 32 | self.dispatcher_internal_blocks.append(self.entry_block) 33 | 34 | self.dispatcher_exit_blocks = [] 35 | self.comparison_values = [] 36 | return True 37 | 38 | def _get_comparison_info(self, blk: mblock_t): 39 | if (blk.tail is None) or (blk.tail.opcode != m_ijmp): 40 | return None 41 | return blk.tail.l 42 | 43 | def _is_candidate_for_dispatcher_entry_block(self, blk: mblock_t): 44 | if (blk.tail is None) or (blk.tail.opcode != m_ijmp): 45 | return False 46 | return True 47 | 48 | def should_emulation_continue(self, cur_blk: mblock_t): 49 | if (cur_blk is not None) and (cur_blk.serial == self.entry_block.serial): 50 | return True 51 | return False 52 | 53 | 54 | class TigressIndirectDispatcherCollector(GenericDispatcherCollector): 55 | DISPATCHER_CLASS = TigressIndirectDispatcherInfo 56 | DEFAULT_DISPATCHER_MIN_INTERNAL_BLOCK = 0 57 | DEFAULT_DISPATCHER_MIN_EXIT_BLOCK = 0 58 | DEFAULT_DISPATCHER_MIN_COMPARISON_VALUE = 0 59 | 60 | 61 | class LabelTableInfo(object): 62 | def __init__(self, sp_offset, mem_offset, nb_elt, ptr_size=8): 63 | self.sp_offset = sp_offset 64 | self.mem_offset = mem_offset 65 | self.nb_elt = nb_elt 66 | self.ptr_size = ptr_size 67 | 68 | def update_mop_tracker(self, mba: mbl_array_t, mop_tracker: MopTracker): 69 | stack_array_base_address = mba.stkoff_ida2vd(self.sp_offset) 70 | for i in range(self.nb_elt): 71 | tmp_mop = mop_t() 72 | tmp_mop.erase() 73 | tmp_mop._make_stkvar(mba, stack_array_base_address + self.ptr_size * i) 74 | tmp_mop.size = self.ptr_size 75 | mem_val = ( 76 | idaapi.get_qword(self.mem_offset + self.ptr_size * i) 77 | & AND_TABLE[self.ptr_size] 78 | ) 79 | mop_tracker.add_mop_definition(tmp_mop, mem_val) 80 | 81 | 82 | class UnflattenerTigressIndirect(GenericDispatcherUnflatteningRule): 83 | DESCRIPTION = "" 84 | DEFAULT_UNFLATTENING_MATURITIES = [MMAT_LOCOPT] 85 | DEFAULT_MAX_DUPLICATION_PASSES = 20 86 | DEFAULT_MAX_PASSES = 1 87 | 88 | def __init__(self): 89 | super().__init__() 90 | self.label_info = None 91 | self.goto_table_info = {} 92 | 93 | @property 94 | def DISPATCHER_COLLECTOR_CLASS(self) -> type[GenericDispatcherCollector]: 95 | """Return the class of the dispatcher collector.""" 96 | return TigressIndirectDispatcherCollector 97 | 98 | def configure(self, kwargs): 99 | super().configure(kwargs) 100 | if "goto_table_info" in self.config.keys(): 101 | for ea_str, table_info in self.config["goto_table_info"].items(): 102 | self.goto_table_info[int(ea_str, 16)] = LabelTableInfo( 103 | sp_offset=int(table_info["stack_table_offset"], 16), 104 | mem_offset=int(table_info["table_address"], 16), 105 | nb_elt=table_info["table_nb_elt"], 106 | ) 107 | 108 | def check_if_rule_should_be_used(self, blk: mblock_t): 109 | if not super().check_if_rule_should_be_used(blk): 110 | return False 111 | if self.mba.entry_ea not in self.goto_table_info: 112 | return False 113 | if (self.cur_maturity_pass >= 1) and (self.last_pass_nb_patch_done == 0): 114 | return False 115 | self.label_info = self.goto_table_info[self.mba.entry_ea] 116 | return True 117 | 118 | def register_initialization_variables(self, mop_tracker: MopTracker): 119 | self.label_info.update_mop_tracker(self.mba, mop_tracker) 120 | 121 | def check_if_histories_are_resolved(self, mop_histories: list[MopHistory]): 122 | return True 123 | -------------------------------------------------------------------------------- /samples/BUILD.md: -------------------------------------------------------------------------------- 1 | # Building libobfuscated.dll 2 | 3 | ## Problem 4 | 5 | The current `libobfuscated.dll` in this repository was built **without exporting function names**. 6 | 7 | This causes all tests in `test_libdeobfuscated.py` to fail because IDA Pro cannot find functions by their expected names (`test_cst_simplification`, `test_chained_add`, etc.). Instead, IDA only sees generic names like `sub_180001000`. 8 | 9 | ## Solution 10 | 11 | The source files have been updated with `EXPORT` macros to properly export function names. **The DLL must be rebuilt** for these changes to take effect. 12 | 13 | --- 14 | 15 | ## Building on Windows 16 | 17 | ### Quick Start (Any Compiler) 18 | 19 | The Makefile auto-detects your compiler. Just run: 20 | 21 | ```cmd 22 | cd samples 23 | nmake clean # If using Visual Studio Developer Command Prompt 24 | make clean # If using MinGW/Git Bash/WSL 25 | 26 | nmake # MSVC 27 | make # GCC/Clang/MinGW 28 | ``` 29 | 30 | Output: `bins/libobfuscated.dll` 31 | 32 | --- 33 | 34 | ### Option 1: Visual Studio (MSVC) - Recommended 35 | 36 | **Prerequisites:** 37 | 38 | - Visual Studio 2019 or later (Community Edition works) 39 | - "Desktop development with C++" workload 40 | 41 | **Steps:** 42 | 43 | 1. Open **Developer Command Prompt for VS** (or **x64 Native Tools Command Prompt**) 44 | 45 | 2. Build: 46 | 47 | ```cmd 48 | cd samples 49 | nmake clean 50 | nmake 51 | ``` 52 | 53 | 3. Verify exports: 54 | 55 | ```cmd 56 | dumpbin /EXPORTS bins\libobfuscated.dll 57 | ``` 58 | 59 | You should see: 60 | 61 | ``` 62 | ordinal hint RVA name 63 | 1 0 00001000 test_and 64 | 2 1 00001050 test_chained_add 65 | 3 2 000010A0 test_cst_simplification 66 | ... 67 | ``` 68 | 69 | --- 70 | 71 | ### Option 2: MinGW-w64 (GCC) 72 | 73 | **Prerequisites:** 74 | 75 | - MinGW-w64 ([download](https://winlibs.com/) or install via MSYS2/Cygwin) 76 | - Make for Windows 77 | 78 | **Steps:** 79 | 80 | 1. Add MinGW to PATH or use MSYS2 shell 81 | 82 | 2. Build: 83 | 84 | ```bash 85 | cd samples 86 | make clean 87 | make 88 | ``` 89 | 90 | 3. Verify: 91 | 92 | ```bash 93 | objdump -p bins/libobfuscated.dll | grep "test_" 94 | ``` 95 | 96 | --- 97 | 98 | ### Option 3: Clang (LLVM) 99 | 100 | **Prerequisites:** 101 | 102 | - LLVM for Windows ([download](https://releases.llvm.org/)) 103 | - Make for Windows 104 | 105 | **Steps:** 106 | 107 | 1. Build: 108 | 109 | ```bash 110 | cd samples 111 | make clean 112 | CC=clang make 113 | ``` 114 | 115 | --- 116 | 117 | ## Building on Linux/macOS (for comparison) 118 | 119 | The Makefile defaults to building a Windows DLL, but you can build native shared libraries: 120 | 121 | ```bash 122 | cd samples 123 | make clean 124 | TARGET_OS=native make 125 | ``` 126 | 127 | This creates: 128 | 129 | - **Linux**: `bins/libobfuscated.so` 130 | - **macOS**: `bins/libobfuscated.dylib` 131 | 132 | Note: Tests expect the Windows DLL, so this is mainly for development/testing. 133 | 134 | --- 135 | 136 | ## Troubleshooting 137 | 138 | ### "nmake: command not found" 139 | 140 | You're not in a Visual Studio Developer Command Prompt. Either: 141 | 142 | - Use **Start Menu → Visual Studio → Developer Command Prompt** 143 | - Or use `make` instead of `nmake` (requires MinGW/Git Bash) 144 | 145 | ### "cl: command not found" with nmake 146 | 147 | Run from **Developer Command Prompt**, not regular CMD. 148 | 149 | ### "gcc: command not found" 150 | 151 | Install MinGW-w64 or use MSVC instead. 152 | 153 | ### Build succeeds but tests still fail 154 | 155 | 1. Verify exports: 156 | 157 | ```cmd 158 | dumpbin /EXPORTS bins\libobfuscated.dll | findstr test_ 159 | ``` 160 | 161 | 2. You should see function names like `test_cst_simplification`, NOT just addresses 162 | 163 | 3. If exports are missing, ensure: 164 | - Source files have `#include "export.h"` at the top 165 | - Functions have `EXPORT` prefix (e.g., `EXPORT int test_xor(...)`) 166 | 167 | ### Linker errors about missing symbols 168 | 169 | This is expected - the DLL references IDA Pro functions that don't exist at build time. The linker flags allow unresolved symbols. 170 | 171 | --- 172 | 173 | ## Makefile Features 174 | 175 | The updated Makefile now: 176 | 177 | ✅ **Auto-detects Windows compiler** (MSVC → GCC → Clang) 178 | ✅ **Supports Visual Studio (MSVC)** with proper `/` flags 179 | ✅ **Supports MinGW/GCC** with `-` flags 180 | ✅ **Uses export.h macros** for cross-platform exports 181 | ✅ **Handles .obj (MSVC) and .o (GCC)** object files 182 | ✅ **Cleans all build artifacts** properly 183 | 184 | --- 185 | 186 | ## Next Steps After Building 187 | 188 | 1. **Verify the DLL** has proper exports (see commands above) 189 | 190 | 2. **Commit the rebuilt DLL:** 191 | 192 | ```bash 193 | git add samples/bins/libobfuscated.dll 194 | git commit -m "rebuild: add function name exports to libobfuscated.dll" 195 | git push 196 | ``` 197 | 198 | 3. **GitHub Actions will test** the new DLL automatically 199 | 200 | 4. **Tests should pass** - functions will be found by name! ✅ 201 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/analysis/pattern_guess.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import ida_hexrays 4 | 5 | from d810.conf.loggers import getLogger 6 | from d810.expr.ast import minsn_to_ast 7 | from d810.hexrays.hexrays_formatters import ( 8 | format_minsn_t, 9 | format_mop_t, 10 | maturity_to_string, 11 | opcode_to_string, 12 | ) 13 | from d810.optimizers.microcode.handler import DEFAULT_INSTRUCTION_MATURITIES 14 | from d810.optimizers.microcode.instructions.analysis.handler import ( 15 | InstructionAnalysisRule, 16 | ) 17 | from d810.optimizers.microcode.instructions.analysis.utils import get_possible_patterns 18 | 19 | optimizer_logger = getLogger("D810.optimizer") 20 | 21 | 22 | class ExampleGuessingRule(InstructionAnalysisRule): 23 | DESCRIPTION = "Detect pattern with variable used multiple times and with multiple different opcodes" 24 | 25 | def __init__(self): 26 | super().__init__() 27 | self.maturities = DEFAULT_INSTRUCTION_MATURITIES 28 | self.cur_maturity = None 29 | self.min_nb_var = 1 30 | self.max_nb_var = 3 31 | self.min_nb_diff_opcodes = 3 32 | self.max_nb_diff_opcodes = -1 33 | 34 | self.cur_index = 0 35 | self.max_index = 1000 36 | self.cur_ins_guessed = [""] * self.max_index 37 | self.pattern_filename_path = None 38 | 39 | def log_info(self, message: str): 40 | if self.pattern_filename_path is None: 41 | return 42 | with open(self.pattern_filename_path, "a") as f: 43 | f.write("{0}\n".format(message)) 44 | 45 | def set_maturity(self, maturity): 46 | self.log_info( 47 | "Patterns guessed at maturity {0}".format(maturity_to_string(maturity)) 48 | ) 49 | self.cur_maturity = maturity 50 | 51 | def set_log_dir(self, log_dir): 52 | super().set_log_dir(log_dir) 53 | if self.log_dir is None: 54 | return 55 | self.pattern_filename_path = os.path.join(self.log_dir, "pattern_guess.log") 56 | open(self.pattern_filename_path, "w").close() 57 | 58 | def configure(self, kwargs): 59 | super().configure(kwargs) 60 | if "min_nb_var" in kwargs.keys(): 61 | self.min_nb_var = kwargs["min_nb_var"] 62 | if "max_nb_var" in kwargs.keys(): 63 | self.max_nb_var = kwargs["max_nb_var"] 64 | if "min_nb_diff_opcodes" in kwargs.keys(): 65 | self.min_nb_diff_opcodes = kwargs["min_nb_diff_opcodes"] 66 | if "max_nb_diff_opcodes" in kwargs.keys(): 67 | self.max_nb_diff_opcodes = kwargs["max_nb_diff_opcodes"] 68 | 69 | if self.max_nb_var == -1: 70 | self.max_nb_var = 0xFF 71 | if self.max_nb_diff_opcodes == -1: 72 | self.max_nb_diff_opcodes = 0xFF 73 | 74 | def analyze_instruction(self, blk, ins) -> bool: 75 | if self.cur_maturity not in self.maturities: 76 | return False 77 | formatted_ins = str(format_minsn_t(ins)) 78 | if formatted_ins in self.cur_ins_guessed: 79 | return False 80 | if ins.opcode == ida_hexrays.m_nop: 81 | optimizer_logger.debug("Skipping pattern guess for nop instruction") 82 | return False 83 | 84 | tmp = minsn_to_ast(ins) 85 | if tmp is None: 86 | optimizer_logger.debug( 87 | "Skipping pattern guess: no AST for opcode %s", 88 | opcode_to_string(ins.opcode), 89 | ) 90 | return False 91 | is_good_candidate = self.check_if_possible_pattern(tmp) 92 | if is_good_candidate: 93 | self.cur_ins_guessed[self.cur_index] = formatted_ins 94 | self.cur_index = (self.cur_index + 1) % self.max_index 95 | return is_good_candidate 96 | 97 | def check_if_possible_pattern(self, test_ast) -> bool: 98 | patterns = get_possible_patterns( 99 | test_ast, min_nb_use=2, ref_ast_info_by_index=None, max_nb_pattern=64 100 | ) 101 | for pattern in patterns: 102 | leaf_info_list, cst_leaf_values, opcodes = pattern.get_information() 103 | leaf_nb_use = [leaf_info.number_of_use for leaf_info in leaf_info_list] 104 | if not (self.min_nb_var <= len(leaf_info_list) <= self.max_nb_var): 105 | continue 106 | if not ( 107 | self.min_nb_diff_opcodes 108 | <= len(set(opcodes)) 109 | <= self.max_nb_diff_opcodes 110 | ): 111 | continue 112 | if not (min(leaf_nb_use) >= 2): 113 | continue 114 | ins = pattern.mop.d 115 | self.log_info("IR: 0x{0:x} - {1}".format(ins.ea, format_minsn_t(ins))) 116 | for leaf_info in leaf_info_list: 117 | self.log_info( 118 | " {0} -> {1}".format( 119 | leaf_info.ast, format_mop_t(leaf_info.ast.mop) 120 | ) 121 | ) 122 | self.log_info("Pattern: {0}".format(pattern)) 123 | self.log_info("AstNode: {0}\n".format(pattern.get_pattern())) 124 | return True 125 | return False 126 | -------------------------------------------------------------------------------- /src/d810/project_manager.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import functools 3 | import threading 4 | 5 | from d810.conf import D810Configuration, ProjectConfiguration 6 | from d810.conf.loggers import getLogger 7 | 8 | logger = getLogger(__name__) 9 | 10 | 11 | @dataclasses.dataclass 12 | class ProjectManager: 13 | """Manages project configurations: discovery, lookup, add, update, delete, load.""" 14 | 15 | config: D810Configuration 16 | _lock: threading.Lock = dataclasses.field( 17 | default_factory=threading.Lock, init=False 18 | ) 19 | _projects: dict[str, ProjectConfiguration] = dataclasses.field(init=False) 20 | 21 | def __post_init__(self): 22 | self.load_all() 23 | 24 | def __len__(self) -> int: 25 | with self._lock: 26 | return len(self._projects) 27 | 28 | def __bool__(self) -> bool: 29 | with self._lock: 30 | return bool(self._projects) 31 | 32 | def load_all(self) -> None: 33 | """ 34 | Discover and register project configurations by scanning for JSON files 35 | in both the user's configuration directory and the plugin's built-in 36 | template directory. This ensures that newly added files are 37 | automatically detected. 38 | """ 39 | projects = self.config.discover_projects() 40 | with self._lock: 41 | self._projects = {p.path.name: p for p in projects} 42 | 43 | if logger.debug_on: 44 | for k, v in self._projects.items(): 45 | logger.debug("Project %s loaded from %s", k, v.path) 46 | 47 | def index(self, name: str) -> int: 48 | return self.project_names().index(name) 49 | 50 | def project_names(self) -> list[str]: 51 | with self._lock: 52 | return list(self._projects.keys()) 53 | 54 | def projects(self) -> list[ProjectConfiguration]: 55 | with self._lock: 56 | return list(self._projects.values()) 57 | 58 | @functools.singledispatchmethod 59 | def get(self, identifier: str) -> ProjectConfiguration: 60 | with self._lock: 61 | return self._projects[identifier] 62 | 63 | @get.register 64 | def _(self, identifier: int) -> ProjectConfiguration: 65 | with self._lock: 66 | lst = list(self._projects.values()) 67 | if logger.debug_on and 0 > identifier >= len(lst): 68 | logger.error("Unknown project index: %s", identifier) 69 | return lst[identifier] 70 | 71 | def add(self, project: ProjectConfiguration) -> None: 72 | name = project.path.name 73 | with self._lock: 74 | self._projects[name] = project 75 | # TODO: should be part of the config responsibility 76 | cfg_list = self.config.get("configurations") or [] 77 | if name not in cfg_list: 78 | cfg_list.append(name) 79 | self.config["configurations"] = cfg_list 80 | self.config.save() 81 | 82 | @functools.singledispatchmethod 83 | def update(self, old_name: str, new_project: ProjectConfiguration) -> None: 84 | with self._lock: 85 | if old_name not in self._projects: 86 | raise KeyError(f"Unknown project: {old_name}") 87 | if new_project.path.name != old_name: 88 | del self._projects[old_name] 89 | self._projects[new_project.path.name] = new_project 90 | 91 | @update.register 92 | def _(self, identifier: int, new_project: ProjectConfiguration) -> None: 93 | with self._lock: 94 | names = list(self._projects.keys()) 95 | if 0 <= identifier < len(names): 96 | old_name = names[identifier] 97 | else: 98 | raise IndexError(f"Unknown project index: {identifier}") 99 | self.update(old_name, new_project) 100 | 101 | @functools.singledispatchmethod 102 | def delete(self, name: str) -> None: 103 | with self._lock: 104 | project = self._projects.pop(name, None) 105 | cfg_list = self.config.get("configurations") or [] 106 | if not project: 107 | raise KeyError(f"Unknown project: {name}") 108 | if name in cfg_list: 109 | cfg_list.remove(name) 110 | self.config["configurations"] = cfg_list 111 | self.config.save() 112 | # Only allow deletion when the file lives in the user cfg directory 113 | user_cfg_dir = self.config.config_dir.resolve() 114 | path = project.path.resolve() 115 | if user_cfg_dir in path.parents: 116 | try: 117 | path.unlink(missing_ok=True) 118 | except Exception as e: 119 | logger.error("Failed to delete project file %s: %s", path, e) 120 | else: 121 | logger.warning("Refusing to delete read-only template: %s", path) 122 | 123 | @delete.register 124 | def _(self, project: ProjectConfiguration) -> None: 125 | self.delete(project.path.name) 126 | 127 | @delete.register 128 | def _(self, identifier: int) -> None: 129 | with self._lock: 130 | names = list(self._projects.keys()) 131 | if 0 <= identifier < len(names): 132 | name = names[identifier] 133 | else: 134 | raise IndexError(f"Unknown project index: {identifier}") 135 | self.delete(name) 136 | -------------------------------------------------------------------------------- /tests/unit/core/test_bits.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | # Import from d810.core.bits (IDA-independent bitwise utilities) 4 | from d810.core.bits import signed_to_unsigned, unsigned_to_signed, get_parity_flag 5 | 6 | def test_signed_to_unsigned_small_sizes(): 7 | # Test positive values 8 | assert signed_to_unsigned(42, 1) == 42 9 | assert signed_to_unsigned(1000, 2) == 1000 10 | assert signed_to_unsigned(123456, 4) == 123456 11 | assert signed_to_unsigned(9876543210, 8) == 9876543210 12 | 13 | # Test negative values (should wrap around) 14 | assert signed_to_unsigned(-1, 1) == 255 15 | assert signed_to_unsigned(-1, 2) == 65535 16 | assert signed_to_unsigned(-1, 4) == 4294967295 17 | assert signed_to_unsigned(-1, 8) == 18446744073709551615 18 | 19 | # Test edge cases 20 | assert signed_to_unsigned(0, 1) == 0 21 | assert signed_to_unsigned(127, 1) == 127 22 | assert signed_to_unsigned(-128, 1) == 128 23 | 24 | def test_unsigned_to_signed_small_sizes(): 25 | # Test positive values 26 | assert unsigned_to_signed(42, 1) == 42 27 | assert unsigned_to_signed(1000, 2) == 1000 28 | assert unsigned_to_signed(123456, 4) == 123456 29 | assert unsigned_to_signed(9876543210, 8) == 9876543210 30 | 31 | # Test values that should be interpreted as negative 32 | assert unsigned_to_signed(255, 1) == -1 33 | assert unsigned_to_signed(65535, 2) == -1 34 | assert unsigned_to_signed(4294967295, 4) == -1 35 | assert unsigned_to_signed(18446744073709551615, 8) == -1 36 | 37 | # Test edge cases 38 | assert unsigned_to_signed(0, 1) == 0 39 | assert unsigned_to_signed(127, 1) == 127 40 | assert unsigned_to_signed(128, 1) == -128 41 | 42 | def test_signed_to_unsigned_16_bytes(): 43 | # Test positive values 44 | test_val = 123456789012345678901234567890123456789 45 | assert signed_to_unsigned(test_val, 16) == test_val 46 | 47 | # Test negative values (should be treated as unsigned 128-bit) 48 | negative_val = -1 49 | expected = (1 << 128) - 1 50 | assert signed_to_unsigned(negative_val, 16) == expected 51 | 52 | # Test zero 53 | assert signed_to_unsigned(0, 16) == 0 54 | 55 | # Test large positive value 56 | large_val = (1 << 127) - 1 57 | assert signed_to_unsigned(large_val, 16) == large_val 58 | 59 | def test_unsigned_to_signed_16_bytes(): 60 | # Test positive values 61 | test_val = 123456789012345678901234567890123456789 62 | assert unsigned_to_signed(test_val, 16) == test_val 63 | 64 | # Test values that should be interpreted as negative (MSB set) 65 | msb_set = 1 << 127 66 | expected = msb_set - (1 << 128) 67 | assert unsigned_to_signed(msb_set, 16) == expected 68 | 69 | # Test all bits set (should be -1) 70 | all_bits_set = (1 << 128) - 1 71 | assert unsigned_to_signed(all_bits_set, 16) == -1 72 | 73 | # Test zero 74 | assert unsigned_to_signed(0, 16) == 0 75 | 76 | def test_roundtrip_conversion(): 77 | # Signed -> unsigned -> signed 78 | signed_test_values = [0, 1, -1, 42, -42, 127, -128, -1, 42, -100] 79 | for val in signed_test_values: 80 | for size in [1, 2, 4, 8]: 81 | unsigned = signed_to_unsigned(val, size) 82 | back_to_signed = unsigned_to_signed(unsigned, size) 83 | assert back_to_signed == val, f"Signed roundtrip failed for {val} at size {size}: {val} -> {unsigned} -> {back_to_signed}" 84 | 85 | # Unsigned -> signed -> unsigned 86 | unsigned_test_cases = [ 87 | (0, [1, 2, 4, 8]), 88 | (1, [1, 2, 4, 8]), 89 | (42, [1, 2, 4, 8]), 90 | (127, [1, 2, 4, 8]), 91 | (255, [1, 2, 4, 8]), 92 | (65535, [2, 4, 8]), 93 | (4294967295, [4, 8]), 94 | ] 95 | for val, sizes in unsigned_test_cases: 96 | for size in sizes: 97 | signed = unsigned_to_signed(val, size) 98 | back_to_unsigned = signed_to_unsigned(signed, size) 99 | assert back_to_unsigned == val, f"Unsigned roundtrip failed for {val} at size {size}: {val} -> {signed} -> {back_to_unsigned}" 100 | 101 | def test_get_parity_flag(): 102 | # Even number of 1s (should return 1) 103 | assert get_parity_flag(1, 2, 4) == 1 104 | assert get_parity_flag(4, 4, 4) == 1 105 | assert get_parity_flag(3, 0, 1) == 1 106 | 107 | # Odd number of 1s (should return 0) 108 | assert get_parity_flag(1, 0, 4) == 0 109 | assert get_parity_flag(7, 2, 4) == 1 110 | assert get_parity_flag(1, 0, 1) == 0 111 | 112 | # 16-byte cases 113 | assert get_parity_flag(1, 0, 16) == 0 114 | assert get_parity_flag(3, 0, 16) == 1 115 | 116 | def test_large_values_16_bytes(): 117 | # Test maximum 128-bit unsigned value 118 | max_u128 = (1 << 128) - 1 119 | assert signed_to_unsigned(max_u128, 16) == max_u128 120 | assert unsigned_to_signed(max_u128, 16) == -1 121 | 122 | # Test maximum 128-bit signed value 123 | max_s128 = (1 << 127) - 1 124 | assert unsigned_to_signed(max_s128, 16) == max_s128 125 | 126 | # Test minimum 128-bit signed value 127 | min_s128 = -(1 << 127) 128 | assert signed_to_unsigned(min_s128, 16) == 1 << 127 129 | 130 | def test_invalid_sizes(): 131 | with pytest.raises(KeyError): 132 | signed_to_unsigned(42, 3) 133 | with pytest.raises(KeyError): 134 | unsigned_to_signed(42, 32) 135 | with pytest.raises(KeyError): 136 | get_parity_flag(1, 2, 64) 137 | 138 | -------------------------------------------------------------------------------- /src/d810/core/cymode.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import os 3 | 4 | from .registry import survives_reload 5 | from .singleton import SingletonMeta 6 | 7 | 8 | def _get_default_cython_enabled() -> bool: 9 | """Check D810_NO_CYTHON env var to determine default state.""" 10 | env_val = os.environ.get("D810_NO_CYTHON", "").lower() 11 | if env_val in ("1", "true", "yes"): 12 | return False 13 | return True 14 | 15 | 16 | @survives_reload() 17 | @dataclasses.dataclass(slots=True) 18 | class CythonMode(metaclass=SingletonMeta): 19 | """ 20 | Provides a controller to enable or disable the Cython-accelerated 21 | implementations of performance-critical code at runtime. 22 | 23 | Set D810_NO_CYTHON=1 environment variable to disable Cython at startup. 24 | 25 | Three Integration Patterns 26 | --------------------------- 27 | 28 | Pattern 1: Gate Module (Recommended) 29 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 30 | Use a dispatcher module that attempts Cython import with CythonMode check, 31 | falls back to pure Python on failure. 32 | 33 | File structure: 34 | - `module.py` - gate module (imports from c_module or p_module) 35 | - `c_module.pyx` - Cython implementation 36 | - `p_module.py` - pure Python implementation 37 | 38 | Example (module.py): 39 | from d810.core.cymode import CythonMode 40 | 41 | if CythonMode().is_enabled(): 42 | try: 43 | from d810.speedups.c_module import fast_function 44 | _USING_CYTHON = True 45 | except ImportError: 46 | from d810.module.p_module import fast_function 47 | _USING_CYTHON = False 48 | else: 49 | from d810.module.p_module import fast_function 50 | _USING_CYTHON = False 51 | 52 | Pattern 2: CythonImporter Helper 53 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 54 | Use the CythonImporter helper for consistent import behavior. 55 | 56 | Example: 57 | from d810.core.cymode import CythonImporter 58 | 59 | importer = CythonImporter() 60 | try: 61 | hash_mop = importer.import_attr("d810.speedups.cythxr._chexrays_api", "hash_mop") 62 | except ImportError: 63 | hash_mop = None 64 | 65 | Pattern 3: Runtime Dispatch 66 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 67 | For modules with mixed Cython/Python functions, use runtime checks. 68 | 69 | Example: 70 | from d810.core.cymode import CythonMode 71 | 72 | def process_data(data): 73 | if CythonMode().is_enabled(): 74 | try: 75 | from d810.speedups import fast_process 76 | return fast_process(data) 77 | except ImportError: 78 | pass 79 | return slow_process(data) 80 | """ 81 | 82 | _enabled: bool = dataclasses.field(default_factory=_get_default_cython_enabled) 83 | 84 | def _set_flag(self, value: bool) -> None: 85 | self._enabled = bool(value) 86 | 87 | def enable(self) -> None: 88 | """Point the public API to the fast Cython implementations.""" 89 | if not self._enabled: 90 | self._set_flag(True) 91 | print("Cython speedups ENABLED.") 92 | 93 | def disable(self) -> None: 94 | """Point the public API to the pure Python implementations for debugging.""" 95 | if self._enabled: 96 | self._set_flag(False) 97 | print("Cython speedups DISABLED (using pure Python).") 98 | 99 | def is_enabled(self) -> bool: 100 | """Check if the Cython implementation is currently active.""" 101 | return self._enabled 102 | 103 | def toggle(self) -> None: 104 | """Toggle the Cython implementation on/off.""" 105 | if self._enabled: 106 | self.disable() 107 | else: 108 | self.enable() 109 | 110 | 111 | class CythonImporter: 112 | """Helper for importing Cython modules with CythonMode awareness. 113 | 114 | Raises ImportError if CythonMode is disabled, allowing normal fallback logic. 115 | 116 | Example: 117 | importer = CythonImporter() 118 | try: 119 | fast_func = importer.import_attr("d810.speedups.module", "fast_func") 120 | except ImportError: 121 | fast_func = slow_func 122 | """ 123 | 124 | def __init__(self): 125 | self._mode = CythonMode() 126 | 127 | def import_module(self, module_name: str): 128 | """Import a Cython module if CythonMode is enabled. 129 | 130 | Args: 131 | module_name: Fully qualified module name 132 | 133 | Returns: 134 | The imported module 135 | 136 | Raises: 137 | ImportError: If CythonMode is disabled or module doesn't exist 138 | """ 139 | if not self._mode.is_enabled(): 140 | raise ImportError(f"CythonMode disabled, skipping {module_name}") 141 | 142 | import importlib 143 | return importlib.import_module(module_name) 144 | 145 | def import_attr(self, module_name: str, attr_name: str): 146 | """Import a specific attribute from a Cython module. 147 | 148 | Args: 149 | module_name: Fully qualified module name 150 | attr_name: Attribute name to import 151 | 152 | Returns: 153 | The imported attribute 154 | 155 | Raises: 156 | ImportError: If CythonMode is disabled or attribute doesn't exist 157 | """ 158 | module = self.import_module(module_name) 159 | return getattr(module, attr_name) 160 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/peephole/constant_call.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import functools 4 | import logging 5 | import typing 6 | 7 | import ida_hexrays 8 | 9 | from d810 import _compat 10 | from d810.conf.loggers import getLogger 11 | from d810.expr import utils 12 | from d810.hexrays.hexrays_formatters import format_mop_t, opcode_to_string, sanitize_ea 13 | from d810.hexrays.hexrays_helpers import AND_TABLE # already maps size→mask 14 | from d810.hexrays.hexrays_helpers import extract_literal_from_mop, is_rotate_helper_call 15 | from d810.optimizers.microcode.instructions.peephole.handler import ( 16 | PeepholeSimplificationRule, 17 | ) 18 | 19 | logger = getLogger(__name__) 20 | 21 | 22 | def example(msg: str) -> typing.Callable: 23 | def decorator(func: typing.Callable) -> typing.Callable: 24 | @functools.wraps(func) 25 | def wrapper(*args: typing.Any, **kwargs: typing.Any) -> typing.Any: 26 | return func(*args, **kwargs) 27 | 28 | return wrapper 29 | 30 | return decorator 31 | 32 | 33 | class ConstantCallResultFoldRule(PeepholeSimplificationRule): 34 | """Collapse helper calls whose *result* is already a literal into a constant""" 35 | 36 | DESCRIPTION = ( 37 | "Fold helper calls with literal destination into single constant expression" 38 | ) 39 | 40 | def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None: 41 | super().__init__(*args, **kwargs) 42 | self.maturities = [ida_hexrays.MMAT_LOCOPT, ida_hexrays.MMAT_CALLS] 43 | 44 | @example( 45 | "opcode=call l= r= d=.8>" 46 | ) 47 | @_compat.override 48 | def check_and_replace( 49 | self, blk: ida_hexrays.mblock_t | None, ins: ida_hexrays.minsn_t 50 | ) -> ida_hexrays.minsn_t | None: 51 | 52 | if logger.debug_on: 53 | logger.debug( 54 | "[const-call] considering ea=%X, opcode=%s l=%s r=%s d=%s", 55 | sanitize_ea(ins.ea), 56 | opcode_to_string(ins.opcode), 57 | format_mop_t(ins.l), 58 | format_mop_t(ins.r), 59 | format_mop_t(ins.d), 60 | ) 61 | 62 | # Only consider calls that have a destination result 63 | if ins.opcode != ida_hexrays.m_call or ins.d is None: 64 | return None 65 | 66 | # log_mop_tree(ins.l) 67 | # log_mop_tree(ins.r) 68 | # log_mop_tree(ins.d) 69 | # only consider rotate helper calls (for now) 70 | if not is_rotate_helper_call(ins): 71 | if logger.debug_on: 72 | logger.debug( 73 | "[const-call] not a rotate helper call, it is a %s", 74 | ins.dstr(), 75 | ) 76 | return None 77 | 78 | # extract helper name and width from helper string (e.g., __ROL4__) 79 | helper_name = (ins.l.helper or "").lstrip("!") 80 | if not helper_name: 81 | if logger.debug_on: 82 | logger.debug( 83 | "[const-call] helper name is None, bail out", 84 | format_mop_t(ins.l.d), 85 | ) 86 | return None 87 | 88 | extracted = extract_literal_from_mop(ins.d) 89 | if not extracted: 90 | if logger.debug_on: 91 | logger.debug( 92 | "[const-call] no extracted literals from %s", 93 | format_mop_t(ins.d), 94 | ) 95 | return None 96 | 97 | if len(extracted) != 2: 98 | if logger.debug_on: 99 | logger.debug("[const-call] unexpected arg count: %d", len(extracted)) 100 | return None 101 | 102 | lhs_val, lhs_size = extracted[0] 103 | rhs_val, _ = extracted[1] 104 | 105 | if lhs_size > ins.d.size: 106 | logger.warning( 107 | "[const-call] lhs_size > ins.d.size, will have to truncate!", 108 | lhs_size, 109 | ins.d.size, 110 | ) 111 | 112 | helper_func = getattr(utils, helper_name) 113 | result = helper_func(lhs_val, rhs_val) & AND_TABLE[ins.d.size] 114 | 115 | new = ida_hexrays.minsn_t(sanitize_ea(ins.ea)) 116 | new.opcode = ida_hexrays.m_ldc 117 | cst = ida_hexrays.mop_t() 118 | cst.make_number(result, ins.d.size) 119 | new.l = cst 120 | # clone destination when it's a real l-value 121 | if ins.d.t in { 122 | ida_hexrays.mop_r, 123 | ida_hexrays.mop_l, 124 | ida_hexrays.mop_S, 125 | ida_hexrays.mop_v, 126 | }: 127 | new.d = ida_hexrays.mop_t() 128 | new.d.assign(ins.d) 129 | new.d.size = ins.d.size 130 | else: 131 | new.d = ida_hexrays.mop_t() 132 | new.d.erase() 133 | new.d.size = ins.d.size 134 | # 'r' is unused for m_ldc. Keep it as a mop_z with size 0 so that 135 | # later optimizers (e.g. stack-var propagation) can update it safely 136 | # without breaking the size invariants. 137 | new.r = ida_hexrays.mop_t() 138 | new.r.erase() # will set t=mop_z and size=0 139 | if logger.debug_on: 140 | logger.debug( 141 | "[const-call] 0x%X call -> ldc 0x%X (size=%d)", 142 | sanitize_ea(ins.ea), 143 | result, 144 | ins.d.size, 145 | ) 146 | return new 147 | -------------------------------------------------------------------------------- /src/d810/core/typing.py: -------------------------------------------------------------------------------- 1 | """ 2 | d810.typing: Compatibility typing imports for d810 plugins and integration. 3 | 4 | This module provides imports and unified access to typing symbols 5 | used throughout the d810 project. It wraps Python's built-in typing module 6 | and selected backports (e.g., typing_extensions) to ensure consistent 7 | availability of type hints and protocols across different Python versions 8 | used in IDA Pro environments. 9 | 10 | Use d810.typing instead of directly importing from typing or typing_extensions in plugin code. 11 | All exported names are explicitly added to __all__. 12 | """ 13 | 14 | # isort: skip_file 15 | from __future__ import annotations 16 | import sys 17 | 18 | # taken from: https://github.com/python/cpython/blob/3.10/Lib/typing.py 19 | from typing import ( 20 | # Super-special typing primitives. 21 | Annotated, 22 | Any, 23 | Callable, 24 | ClassVar, 25 | Concatenate, 26 | Final, 27 | ForwardRef, 28 | Generic, 29 | Literal, 30 | Optional, 31 | ParamSpec, 32 | Protocol, 33 | Tuple, 34 | Type, 35 | TypeVar, 36 | Union, 37 | # ABCs (from collections.abc). 38 | AbstractSet, # collections.abc.Set. 39 | ByteString, 40 | Container, 41 | ContextManager, 42 | Hashable, 43 | ItemsView, 44 | Iterable, 45 | Iterator, 46 | KeysView, 47 | Mapping, 48 | MappingView, 49 | MutableMapping, 50 | MutableSequence, 51 | MutableSet, 52 | Sequence, 53 | Sized, 54 | ValuesView, 55 | Awaitable, 56 | AsyncIterator, 57 | AsyncIterable, 58 | Coroutine, 59 | Collection, 60 | AsyncGenerator, 61 | AsyncContextManager, 62 | # Structural checks, a.k.a. protocols. 63 | Reversible, 64 | SupportsAbs, 65 | SupportsBytes, 66 | SupportsComplex, 67 | SupportsFloat, 68 | SupportsIndex, 69 | SupportsInt, 70 | SupportsRound, 71 | # Concrete collection types. 72 | ChainMap, 73 | Counter, 74 | Deque, 75 | Dict, 76 | DefaultDict, 77 | List, 78 | OrderedDict, 79 | Set, 80 | FrozenSet, 81 | NamedTuple, # Not really a type. 82 | TypedDict, # Not really a type. 83 | Generator, 84 | # Other concrete types. 85 | BinaryIO, 86 | IO, 87 | Match, 88 | Pattern, 89 | TextIO, 90 | # One-off things. 91 | AnyStr, 92 | cast, 93 | final, 94 | get_args, 95 | get_origin, 96 | get_type_hints, 97 | is_typeddict, 98 | NewType, 99 | no_type_check, 100 | no_type_check_decorator, 101 | NoReturn, 102 | overload, 103 | ParamSpecArgs, 104 | ParamSpecKwargs, 105 | runtime_checkable, 106 | Text, 107 | TYPE_CHECKING, 108 | TypeAlias, 109 | TypeGuard, 110 | ) 111 | 112 | 113 | # Multiple python version compatible import for typing.override 114 | if sys.version_info >= (3, 11): 115 | from typing import Self # noqa: F401 116 | from typing import NotRequired # noqa: F401 117 | from typing import LiteralString # noqa: F401 118 | 119 | if sys.version_info >= (3, 12): 120 | from typing import override # noqa: F401 121 | from typing import TypeAliasType # noqa: F401 122 | 123 | 124 | if sys.version_info.major == 3 and sys.version_info.minor in (10, 11): 125 | # Multiple python version compatible import for override, TypeAliasType, Self, NotRequired, LiteralString 126 | if sys.version_info.minor <= 11: 127 | from d810._vendor.typing_extensions import override # noqa: F401 128 | from d810._vendor.typing_extensions import TypeAliasType # noqa: F401 129 | if sys.version_info.minor == 10: 130 | from d810._vendor.typing_extensions import Self # noqa: F401 131 | from d810._vendor.typing_extensions import NotRequired # noqa: F401 132 | from d810._vendor.typing_extensions import LiteralString # noqa: F401 133 | 134 | 135 | # Please keep __all__ alphabetized within each category. 136 | __all__ = [ 137 | # Super-special typing primitives. 138 | "Annotated", 139 | "Any", 140 | "Callable", 141 | "ClassVar", 142 | "Concatenate", 143 | "Final", 144 | "ForwardRef", 145 | "Generic", 146 | "Literal", 147 | "Optional", 148 | "ParamSpec", 149 | "Protocol", 150 | "Tuple", 151 | "Type", 152 | "TypeVar", 153 | "Union", 154 | # ABCs (from collections.abc). 155 | "AbstractSet", # collections.abc.Set. 156 | "ByteString", 157 | "Container", 158 | "ContextManager", 159 | "Hashable", 160 | "ItemsView", 161 | "Iterable", 162 | "Iterator", 163 | "KeysView", 164 | "Mapping", 165 | "MappingView", 166 | "MutableMapping", 167 | "MutableSequence", 168 | "MutableSet", 169 | "Sequence", 170 | "Sized", 171 | "ValuesView", 172 | "Awaitable", 173 | "AsyncIterator", 174 | "AsyncIterable", 175 | "Coroutine", 176 | "Collection", 177 | "AsyncGenerator", 178 | "AsyncContextManager", 179 | # Structural checks, a.k.a. protocols. 180 | "Reversible", 181 | "SupportsAbs", 182 | "SupportsBytes", 183 | "SupportsComplex", 184 | "SupportsFloat", 185 | "SupportsIndex", 186 | "SupportsInt", 187 | "SupportsRound", 188 | # Concrete collection types. 189 | "ChainMap", 190 | "Counter", 191 | "Deque", 192 | "Dict", 193 | "DefaultDict", 194 | "List", 195 | "OrderedDict", 196 | "Set", 197 | "FrozenSet", 198 | "NamedTuple", # Not really a type. 199 | "TypedDict", # Not really a type. 200 | "Generator", 201 | # Other concrete types. 202 | "BinaryIO", 203 | "IO", 204 | "Match", 205 | "Pattern", 206 | "TextIO", 207 | # One-off things. 208 | "AnyStr", 209 | "cast", 210 | "final", 211 | "get_args", 212 | "get_origin", 213 | "get_type_hints", 214 | "is_typeddict", 215 | "NewType", 216 | "no_type_check", 217 | "no_type_check_decorator", 218 | "NoReturn", 219 | "overload", 220 | "ParamSpecArgs", 221 | "ParamSpecKwargs", 222 | "runtime_checkable", 223 | "Text", 224 | "TYPE_CHECKING", 225 | "TypeAlias", 226 | "TypeGuard", 227 | ] + [ 228 | "override", 229 | "TypeAliasType", 230 | "Self", 231 | "NotRequired", 232 | "LiteralString", 233 | ] 234 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/instructions/pattern_matching/rewrite_mul.py: -------------------------------------------------------------------------------- 1 | from d810.expr.ast import AstConstant, AstLeaf, AstNode 2 | from d810.hexrays.hexrays_helpers import SUB_TABLE, equal_bnot_mop, is_check_mop 3 | from d810.optimizers.microcode.instructions.pattern_matching.handler import ( 4 | PatternMatchingRule, 5 | ) 6 | 7 | from ida_hexrays import * 8 | 9 | 10 | class Mul_MbaRule_1(PatternMatchingRule): 11 | 12 | def check_candidate(self, candidate): 13 | if not equal_bnot_mop(candidate["x_0"].mop, candidate["bnot_x_0"].mop): 14 | return False 15 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 16 | return False 17 | return True 18 | 19 | @property 20 | def PATTERN(self) -> AstNode: 21 | return AstNode( 22 | m_add, 23 | AstNode( 24 | m_mul, 25 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 26 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 27 | ), 28 | AstNode( 29 | m_mul, 30 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("bnot_x_1")), 31 | AstNode(m_and, AstLeaf("x_1"), AstLeaf("bnot_x_0")), 32 | ), 33 | ) 34 | 35 | @property 36 | def REPLACEMENT_PATTERN(self) -> AstNode: 37 | return AstNode(m_mul, AstLeaf("x_0"), AstLeaf("x_1")) 38 | 39 | 40 | # This is false. 41 | class Mul_MbaRule_2(PatternMatchingRule): 42 | 43 | def check_candidate(self, candidate): 44 | if not is_check_mop(candidate["x_0"].mop): 45 | return False 46 | if candidate["c_1"].value & 0x1 != 1: 47 | return False 48 | if not equal_bnot_mop(candidate["c_1"].mop, candidate["bnot_c_1"].mop): 49 | return False 50 | if not equal_bnot_mop(candidate["x_0"].mop, candidate["bnot_x_0"].mop): 51 | return False 52 | return True 53 | 54 | @property 55 | def PATTERN(self) -> AstNode: 56 | return AstNode( 57 | m_add, 58 | AstNode( 59 | m_mul, AstNode(m_or, AstLeaf("x_0"), AstConstant("c_1")), AstLeaf("x_0") 60 | ), 61 | AstNode( 62 | m_mul, 63 | AstNode(m_and, AstLeaf("x_0"), AstConstant("bnot_c_1")), 64 | AstNode(m_and, AstConstant("c_1"), AstLeaf("bnot_x_0")), 65 | ), 66 | ) 67 | 68 | @property 69 | def REPLACEMENT_PATTERN(self) -> AstNode: 70 | return AstNode(m_mul, AstLeaf("x_0"), AstConstant("c_1")) 71 | 72 | 73 | # This is false. 74 | class Mul_MbaRule_3(PatternMatchingRule): 75 | 76 | def check_candidate(self, candidate): 77 | if not is_check_mop(candidate["x_0"].mop): 78 | return False 79 | if candidate["c_1"].value & 0x1 == 1: 80 | return False 81 | if not equal_bnot_mop(candidate["x_0"].mop, candidate["bnot_x_0"].mop): 82 | return False 83 | return True 84 | 85 | @property 86 | def PATTERN(self) -> AstNode: 87 | return AstNode( 88 | m_add, 89 | AstNode( 90 | m_mul, 91 | AstNode(m_or, AstLeaf("x_0"), AstConstant("c_1")), 92 | AstNode(m_and, AstLeaf("x_0"), AstConstant("c_1")), 93 | ), 94 | AstNode( 95 | m_mul, 96 | AstLeaf("x_0"), 97 | AstNode(m_and, AstConstant("c_1"), AstLeaf("bnot_x_0")), 98 | ), 99 | ) 100 | 101 | @property 102 | def REPLACEMENT_PATTERN(self) -> AstNode: 103 | return AstNode(m_mul, AstLeaf("x_0"), AstConstant("c_1")) 104 | 105 | 106 | class Mul_MbaRule_4(PatternMatchingRule): 107 | 108 | def check_candidate(self, candidate): 109 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 110 | return False 111 | return True 112 | 113 | @property 114 | def PATTERN(self) -> AstNode: 115 | return AstNode( 116 | m_add, 117 | AstNode( 118 | m_mul, 119 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("x_1")), 120 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 121 | ), 122 | AstNode( 123 | m_mul, 124 | AstNode(m_bnot, AstNode(m_or, AstLeaf("x_0"), AstLeaf("bnot_x_1"))), 125 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("bnot_x_1")), 126 | ), 127 | ) 128 | 129 | @property 130 | def REPLACEMENT_PATTERN(self) -> AstNode: 131 | return AstNode(m_mul, AstLeaf("x_0"), AstLeaf("x_1")) 132 | 133 | 134 | class Mul_FactorRule_1(PatternMatchingRule): 135 | 136 | def check_candidate(self, candidate): 137 | if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop): 138 | return False 139 | return True 140 | 141 | @property 142 | def PATTERN(self) -> AstNode: 143 | return AstNode( 144 | m_add, 145 | AstConstant("2", 2), 146 | AstNode( 147 | m_mul, 148 | AstConstant("2", 2), 149 | AstNode( 150 | m_add, 151 | AstLeaf("x_1"), 152 | AstNode(m_or, AstLeaf("x_0"), AstLeaf("bnot_x_1")), 153 | ), 154 | ), 155 | ) 156 | 157 | @property 158 | def REPLACEMENT_PATTERN(self) -> AstNode: 159 | return AstNode( 160 | m_mul, AstConstant("2", 2), AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")) 161 | ) 162 | 163 | 164 | class Mul_FactorRule_2(PatternMatchingRule): 165 | 166 | def check_candidate(self, candidate): 167 | candidate.add_constant_leaf( 168 | "val_fe", SUB_TABLE[candidate.size] - 2, candidate.size 169 | ) 170 | return True 171 | 172 | @property 173 | def PATTERN(self) -> AstNode: 174 | return AstNode( 175 | m_sub, 176 | AstNode(m_neg, AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1"))), 177 | AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")), 178 | ) 179 | 180 | @property 181 | def REPLACEMENT_PATTERN(self) -> AstNode: 182 | return AstNode( 183 | m_mul, AstConstant("val_fe"), AstNode(m_and, AstLeaf("x_0"), AstLeaf("x_1")) 184 | ) 185 | -------------------------------------------------------------------------------- /src/d810/core/patching.py: -------------------------------------------------------------------------------- 1 | """Binary patching helpers for dispatcher unflattening. 2 | 3 | This module provides data structures to record and apply control‑flow 4 | patches to the binary. After a dispatcher has been identified and 5 | simplified, the plugin can either modify the micro‑code (which is 6 | ephemeral) or patch the actual binary instructions. By converting 7 | micro‑code jumps back into assembly and rewriting the corresponding 8 | bytes, the control flow becomes permanently simplified and survives 9 | reanalysis. Patch descriptions are stored in a simple serialisable 10 | format so that they can be persisted to disk and reapplied on future 11 | sessions. 12 | 13 | These helpers do not perform any IDA‑specific operations on their own. 14 | Instead, they encapsulate patch actions which can later be executed 15 | when the Hex‑Rays/IDA environment is available. The :class:`BinaryPatcher` 16 | class shows how one might apply these actions using IDA APIs. 17 | """ 18 | 19 | from __future__ import annotations 20 | 21 | from dataclasses import dataclass, field 22 | from typing import Any, List, Dict, Optional 23 | 24 | 25 | @dataclass 26 | class PatchAction: 27 | """Represents a single binary patch action. 28 | 29 | Attributes 30 | ---------- 31 | action : str 32 | The type of patch action (e.g. "replace", "delete", "rename"). 33 | target_block_serial : int 34 | The serial number of the micro‑code block to which the patch 35 | applies. When converting to assembly, this serial should be 36 | mapped to an address. 37 | params : dict 38 | Additional parameters required by the patch. For a ``replace`` 39 | action, ``params`` might contain a "jump_target" key specifying 40 | the destination block serial. 41 | """ 42 | 43 | action: str 44 | target_block_serial: int 45 | params: Dict[str, Any] = field(default_factory=dict) 46 | 47 | def to_dict(self) -> dict: 48 | return { 49 | "action": self.action, 50 | "target_block_serial": self.target_block_serial, 51 | "params": self.params, 52 | } 53 | 54 | @classmethod 55 | def from_dict(cls, data: dict) -> "PatchAction": 56 | return cls( 57 | action=data.get("action", ""), 58 | target_block_serial=data.get("target_block_serial", -1), 59 | params=dict(data.get("params", {})), 60 | ) 61 | 62 | 63 | @dataclass 64 | class PatchRecorder: 65 | """Collects patch actions produced during unflattening.""" 66 | 67 | actions: List[PatchAction] = field(default_factory=list) 68 | 69 | def record_replace(self, block_serial: int, jump_target_serial: int) -> None: 70 | """Record a replace action which will replace the block with an unconditional jump.""" 71 | self.actions.append( 72 | PatchAction( 73 | action="replace", 74 | target_block_serial=block_serial, 75 | params={"jump_target": jump_target_serial}, 76 | ) 77 | ) 78 | 79 | def record_delete(self, block_serial: int) -> None: 80 | """Record a delete action to remove a dead block.""" 81 | self.actions.append( 82 | PatchAction( 83 | action="delete", 84 | target_block_serial=block_serial, 85 | params={}, 86 | ) 87 | ) 88 | 89 | def record_rename(self, old_serial: int, new_name: str) -> None: 90 | """Record a rename action for a state variable or symbol.""" 91 | self.actions.append( 92 | PatchAction( 93 | action="rename", 94 | target_block_serial=old_serial, 95 | params={"new_name": new_name}, 96 | ) 97 | ) 98 | 99 | def to_dict(self) -> dict: 100 | return {"actions": [a.to_dict() for a in self.actions]} 101 | 102 | @classmethod 103 | def from_dict(cls, data: dict) -> "PatchRecorder": 104 | pr = cls() 105 | for a in data.get("actions", []): 106 | pr.actions.append(PatchAction.from_dict(a)) 107 | return pr 108 | 109 | 110 | class BinaryPatcher: 111 | """Applies recorded patch actions using IDA APIs. 112 | 113 | This class provides a thin wrapper around IDA Pro functions such as 114 | ``ida_bytes.patch_bytes`` or ``idaapi.convert_to_jump``. It is 115 | intentionally lightweight: the heavy lifting of deciding which 116 | blocks to patch is done during unflattening, and this class simply 117 | executes the recorded actions. When running outside of IDA, the 118 | ``apply`` method will do nothing. 119 | """ 120 | 121 | def __init__(self, patch_recorder: PatchRecorder): 122 | self.recorder = patch_recorder 123 | 124 | def apply(self) -> None: 125 | """Apply all recorded patches to the binary. 126 | 127 | When IDA APIs are available, the actions will be executed. If 128 | running outside of IDA, this method logs the intended 129 | operations but performs no changes. 130 | """ 131 | try: 132 | import ida_bytes 133 | import idaapi 134 | except Exception: 135 | # No IDA environment; print debug output instead 136 | for act in self.recorder.actions: 137 | print(f"[BinaryPatcher] Would apply {act.action} to block {act.target_block_serial} with params {act.params}") 138 | return 139 | # IDA environment: actual patching logic goes here 140 | for act in self.recorder.actions: 141 | if act.action == "replace": 142 | # Replace the block with an unconditional jump to the target 143 | # In a real implementation, we would compute the address of 144 | # the target block and emit the appropriate jump opcode. 145 | # For example: 146 | # addr = get_block_address(act.target_block_serial) 147 | # ida_bytes.patch_byte(addr, ...) 148 | # idaapi.create_insn(addr) 149 | pass # TODO: implement actual patching logic 150 | elif act.action == "delete": 151 | # Delete the block by patching NOPs or deleting code 152 | pass # TODO: implement deletion logic 153 | elif act.action == "rename": 154 | # Rename a symbol or variable 155 | pass # TODO: implement rename logic -------------------------------------------------------------------------------- /src/d810/core/platform.py: -------------------------------------------------------------------------------- 1 | """Platform and file format detection utilities. 2 | 3 | This module provides utilities for detecting the binary file format 4 | and platform characteristics, useful for architecture-specific rule configuration. 5 | """ 6 | 7 | from __future__ import annotations 8 | 9 | from enum import Enum, auto 10 | from typing import TYPE_CHECKING 11 | 12 | from .logging import getLogger 13 | 14 | logger = getLogger(__name__) 15 | 16 | 17 | class FileFormat(Enum): 18 | """Binary file format types.""" 19 | 20 | UNKNOWN = auto() 21 | MACHO = auto() # Mach-O (macOS, iOS) 22 | ELF = auto() # ELF (Linux, BSD) 23 | PE = auto() # PE/COFF (Windows) 24 | RAW = auto() # Raw binary 25 | 26 | 27 | class Platform(Enum): 28 | """Target platform/OS.""" 29 | 30 | UNKNOWN = auto() 31 | DARWIN = auto() # macOS, iOS 32 | LINUX = auto() 33 | WINDOWS = auto() 34 | 35 | 36 | # IDA file type constants (from ida_loader.h) 37 | # These match the f_XXX constants in IDA SDK 38 | _IDA_FILETYPE_ELF = 18 # f_ELF 39 | _IDA_FILETYPE_MACHO = 25 # f_MACHO 40 | _IDA_FILETYPE_PE = 11 # f_PE 41 | _IDA_FILETYPE_COFF = 20 # f_COFF 42 | 43 | 44 | def detect_file_format() -> FileFormat: 45 | """Detect the file format of the currently loaded binary. 46 | 47 | Returns: 48 | FileFormat enum value based on the current IDB. 49 | 50 | Note: 51 | Must be called from within IDA with a database loaded. 52 | """ 53 | try: 54 | import idaapi 55 | # Use idaapi shim - works across IDA versions 56 | filetype = idaapi.inf_get_filetype() 57 | 58 | if filetype == _IDA_FILETYPE_MACHO: 59 | return FileFormat.MACHO 60 | elif filetype == _IDA_FILETYPE_ELF: 61 | return FileFormat.ELF 62 | elif filetype in (_IDA_FILETYPE_PE, _IDA_FILETYPE_COFF): 63 | return FileFormat.PE 64 | else: 65 | logger.debug("Unknown file type: %d", filetype) 66 | return FileFormat.UNKNOWN 67 | except Exception as e: 68 | logger.warning("Failed to detect file format: %s", e) 69 | return FileFormat.UNKNOWN 70 | 71 | 72 | def detect_platform() -> Platform: 73 | """Detect the target platform based on the file format. 74 | 75 | This is a heuristic based on file format - Mach-O implies Darwin, 76 | PE implies Windows, ELF implies Linux (though ELF is used on BSD too). 77 | 78 | Returns: 79 | Platform enum value. 80 | """ 81 | file_format = detect_file_format() 82 | 83 | if file_format == FileFormat.MACHO: 84 | return Platform.DARWIN 85 | elif file_format == FileFormat.PE: 86 | return Platform.WINDOWS 87 | elif file_format == FileFormat.ELF: 88 | return Platform.LINUX 89 | else: 90 | return Platform.UNKNOWN 91 | 92 | 93 | def get_format_config_keys(file_format: FileFormat | None = None) -> list[str]: 94 | """Get configuration keys to check for the given file format. 95 | 96 | Returns a list of keys in order of precedence (most specific first). 97 | This allows configs to specify overrides by format name. 98 | 99 | Args: 100 | file_format: The file format, or None to detect automatically. 101 | 102 | Returns: 103 | List of config keys to check, e.g., ["macho", "darwin", "default"] 104 | """ 105 | if file_format is None: 106 | file_format = detect_file_format() 107 | 108 | keys = [] 109 | 110 | # File format specific key (most specific) 111 | if file_format == FileFormat.MACHO: 112 | keys.extend(["macho", "darwin"]) 113 | elif file_format == FileFormat.ELF: 114 | keys.extend(["elf", "linux"]) 115 | elif file_format == FileFormat.PE: 116 | keys.extend(["pe", "windows"]) 117 | 118 | # Default fallback (least specific) 119 | keys.append("default") 120 | 121 | return keys 122 | 123 | 124 | # Reserved keys that indicate arch-specific config structure 125 | ARCH_CONFIG_KEYS = frozenset( 126 | {"default", "macho", "elf", "pe", "darwin", "linux", "windows"} 127 | ) 128 | 129 | 130 | def is_arch_specific_config(config: dict) -> bool: 131 | """Check if a config dict uses architecture-specific structure. 132 | 133 | Args: 134 | config: The rule configuration dict. 135 | 136 | Returns: 137 | True if the config contains architecture-specific keys. 138 | """ 139 | return bool(ARCH_CONFIG_KEYS & set(config.keys())) 140 | 141 | 142 | def resolve_arch_config(config: dict, file_format: FileFormat | None = None) -> dict: 143 | """Resolve architecture-specific configuration to effective config. 144 | 145 | If the config contains architecture-specific keys (default, macho, elf, pe, 146 | darwin, linux, windows), this merges the default config with the most 147 | specific matching config for the current binary. 148 | 149 | If the config doesn't use arch-specific structure, returns it unchanged 150 | for backwards compatibility. 151 | 152 | Args: 153 | config: The rule configuration dict (may or may not be arch-specific). 154 | file_format: The file format, or None to detect automatically. 155 | 156 | Returns: 157 | The effective configuration dict. 158 | 159 | Example: 160 | >>> config = { 161 | ... "default": {"min_size": 4}, 162 | ... "macho": {"allow_executable_readonly": True} 163 | ... } 164 | >>> # On a Mach-O binary: 165 | >>> resolve_arch_config(config) 166 | {"min_size": 4, "allow_executable_readonly": True} 167 | """ 168 | if not is_arch_specific_config(config): 169 | # No arch-specific structure - return as-is for backwards compatibility 170 | return config 171 | 172 | # Start with default config 173 | result = dict(config.get("default", {})) 174 | 175 | # Get keys to check for this platform (most specific to least) 176 | keys_to_check = get_format_config_keys(file_format) 177 | 178 | # Apply overrides from most specific matching key 179 | for key in keys_to_check: 180 | if key in config and key != "default": 181 | override = config[key] 182 | if isinstance(override, dict): 183 | result.update(override) 184 | logger.debug( 185 | "Applied arch-specific config for '%s': %s", key, override 186 | ) 187 | break # Only apply the most specific override 188 | 189 | return result 190 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/flattening/unflattener_badwhile_loop.py: -------------------------------------------------------------------------------- 1 | from ida_hexrays import * 2 | 3 | from d810.conf.loggers import getLogger 4 | from d810.hexrays.hexrays_helpers import append_mop_if_not_in_list 5 | from d810.optimizers.microcode.flow.flattening.generic import ( 6 | GenericDispatcherBlockInfo, 7 | GenericDispatcherCollector, 8 | GenericDispatcherInfo, 9 | GenericDispatcherUnflatteningRule, 10 | ) 11 | 12 | unflat_logger = getLogger(__name__) 13 | FLATTENING_JUMP_OPCODES = [m_jz] 14 | 15 | 16 | class BadWhileLoopBlockInfo(GenericDispatcherBlockInfo): 17 | pass 18 | 19 | 20 | class BadWhileLoopInfo(GenericDispatcherInfo): 21 | def explore(self, blk: mblock_t) -> bool: 22 | """ 23 | ; 1WAY-BLOCK 13 [START=0000E1BE END=0000E1D0] STK=48/ARG=250, MAXBSP: 0 24 | ; - INBOUND: [12, 24, 25, 8] OUTBOUND: [14] 25 | 13. 0 mov #0xF6A1F.4, eax.4 ; 0000E1BE 26 | goto 16 27 | 28 | ; 2WAY-BLOCK 14 [START=0000E1D0 END=0000E1DB] STK=48/ARG=250, MAXBSP: 0 29 | ; - INBOUND: [13, 18] OUTBOUND: [15, 21] 30 | 14. 0 jz eax.4, #0xF6A1E.4, @21 ; 0000E1D5 31 | 32 | ; 2WAY-BLOCK 15 [START=0000E1DB END=0000E1E2] STK=48/ARG=250, MAXBSP: 0 33 | ; - INBOUND: [14] OUTBOUND: [16, 19] 34 | 15. 0 jz eax.4, #0xF6A20.4, @19 35 | 36 | ; 2WAY-BLOCK 16 [START=0000E204 END=0000E213] STK=48/ARG=250, MAXBSP: 0 37 | ; - INBOUND: [15] OUTBOUND: [17, 26] 38 | 16. 0 mov #0xF6A25.8, rcx.8 ; 0000E21F 39 | 16. 1 jz [ds.2:r12.8].1, #0.1, @26 40 | 41 | 17. 0 mov #0xF6A1E.4, eax.4 42 | 43 | 18. 0 mov #0.8, rdx.8{18} ; 0000E0FD 44 | 18. 1 goto @21 45 | 46 | ; - INBOUND: [16] OUTBOUND: [18] 47 | 26. 0 mov #0xF6A20.4, eax.4 ; 0000E218 48 | 26. 1 goto @19 49 | 50 | 51 | entry_block = 14 52 | exit_blocks = 21 & 16 & 19 53 | 54 | 55 | """ 56 | self.reset() 57 | if not self._is_candidate_for_dispatcher_entry_block(blk): 58 | return False 59 | self.entry_block = BadWhileLoopBlockInfo(blk) 60 | self.mop_compared = blk.tail.l 61 | self.entry_block.parse() 62 | for used_mop in self.entry_block.use_list: 63 | append_mop_if_not_in_list(used_mop, self.entry_block.assume_def_list) 64 | self.dispatcher_internal_blocks.append(self.entry_block) 65 | if ( 66 | blk.tail.opcode == m_jz 67 | and blk.tail.r.t == mop_n 68 | and blk.nextb != None 69 | and blk.prevb != None 70 | ): 71 | right_cnst = blk.tail.r.signed_value() 72 | if right_cnst > 0xF6000 and right_cnst < 0xF6FFF: 73 | if blk.prevb.tail.opcode == m_mov and blk.prevb.tail.l.t == mop_n: 74 | jz0_cnst = blk.prevb.tail.l.signed_value() 75 | if blk.nextb.tail.opcode == m_jz and blk.nextb.tail.r.t == mop_n: 76 | jz1_cnst = blk.nextb.tail.r.signed_value() 77 | if ( 78 | jz1_cnst > 0xF6000 79 | and jz1_cnst < 0xF6FFF 80 | and jz0_cnst > 0xF6000 81 | and jz0_cnst < 0xF6FFF 82 | ): 83 | exit_block0 = BadWhileLoopBlockInfo( 84 | blk.mba.get_mblock(blk.nextb.tail.d.b), self.entry_block 85 | ) 86 | self.dispatcher_exit_blocks.append(exit_block0) 87 | self.comparison_values.append(jz1_cnst) 88 | exit_block1 = BadWhileLoopBlockInfo( 89 | blk.mba.get_mblock(blk.nextb.nextb.serial), 90 | self.entry_block, 91 | ) 92 | self.dispatcher_exit_blocks.append(exit_block1) 93 | self.comparison_values.append(right_cnst) 94 | exit_block2 = BadWhileLoopBlockInfo( 95 | blk.mba.get_mblock(blk.prevb.serial), self.entry_block 96 | ) 97 | self.dispatcher_exit_blocks.append(exit_block2) 98 | self.comparison_values.append(jz0_cnst) 99 | 100 | return True 101 | 102 | def _is_candidate_for_dispatcher_entry_block(self, blk): 103 | if ( 104 | blk.tail.opcode == m_jz 105 | and blk.tail.r.t == mop_n 106 | and blk.nextb != None 107 | and blk.prevb != None 108 | ): 109 | right_cnst = blk.tail.r.signed_value() 110 | if right_cnst > 0xF6000 and right_cnst < 0xF6FFF: 111 | if blk.prevb.tail.opcode == m_mov and blk.prevb.tail.l.t == mop_n: 112 | jz0_cnst = blk.prevb.tail.l.signed_value() 113 | if blk.nextb.tail.opcode == m_jz and blk.nextb.tail.r.t == mop_n: 114 | jz1_cnst = blk.nextb.tail.r.signed_value() 115 | if ( 116 | jz1_cnst > 0xF6000 117 | and jz1_cnst < 0xF6FFF 118 | and jz0_cnst > 0xF6000 119 | and jz0_cnst < 0xF6FFF 120 | ): 121 | return True 122 | return False 123 | 124 | def _get_comparison_info(self, blk: mblock_t): 125 | # blk.tail must be a jtbl 126 | if (blk.tail is None) or (blk.tail.opcode != m_jtbl): 127 | return None, None 128 | return blk.tail.l, blk.tail.r 129 | 130 | 131 | class BadWhileLoopCollector(GenericDispatcherCollector): 132 | DISPATCHER_CLASS = BadWhileLoopInfo 133 | DEFAULT_DISPATCHER_MIN_INTERNAL_BLOCK = 1 134 | DEFAULT_DISPATCHER_MIN_EXIT_BLOCK = 3 135 | DEFAULT_DISPATCHER_MIN_COMPARISON_VALUE = 3 136 | 137 | 138 | class BadWhileLoop(GenericDispatcherUnflatteningRule): 139 | DESCRIPTION = "Remove control flow flattening generated by approov" 140 | DEFAULT_UNFLATTENING_MATURITIES = [MMAT_GLBOPT1] 141 | DEFAULT_MAX_DUPLICATION_PASSES = 20 142 | DEFAULT_MAX_PASSES = 5 143 | 144 | @property 145 | def DISPATCHER_COLLECTOR_CLASS(self) -> type[GenericDispatcherCollector]: 146 | """Return the class of the dispatcher collector.""" 147 | return BadWhileLoopCollector 148 | -------------------------------------------------------------------------------- /src/d810/optimizers/microcode/flow/flattening/unflattener_fake_jump.py: -------------------------------------------------------------------------------- 1 | from ida_hexrays import * 2 | 3 | from d810.conf.loggers import getLogger 4 | from d810.hexrays.cfg_utils import change_1way_block_successor, safe_verify 5 | from d810.hexrays.hexrays_formatters import dump_microcode_for_debug, format_minsn_t 6 | from d810.hexrays.tracker import MopTracker 7 | from d810.optimizers.microcode.flow.flattening.generic import GenericUnflatteningRule 8 | from d810.optimizers.microcode.flow.flattening.utils import get_all_possibles_values 9 | 10 | unflat_logger = getLogger("D810.unflat") 11 | 12 | FAKE_LOOP_OPCODES = [m_jz, m_jnz] 13 | 14 | 15 | class UnflattenerFakeJump(GenericUnflatteningRule): 16 | DESCRIPTION = ( 17 | "Check if a jump is always taken for each father blocks and remove them" 18 | ) 19 | DEFAULT_UNFLATTENING_MATURITIES = [MMAT_CALLS, MMAT_GLBOPT1] 20 | DEFAULT_MAX_PASSES = None 21 | 22 | def analyze_blk(self, blk: mblock_t) -> int: 23 | if (blk.tail is None) or blk.tail.opcode not in FAKE_LOOP_OPCODES: 24 | return 0 25 | if blk.get_reginsn_qty() != 1: 26 | return 0 27 | if blk.tail.r.t != mop_n: 28 | return 0 29 | unflat_logger.info( 30 | "Checking if block %s is fake loop: %s", 31 | blk.serial, 32 | format_minsn_t(blk.tail), 33 | ) 34 | op_compared = mop_t(blk.tail.l) 35 | blk_preset_list = [x for x in blk.predset] 36 | nb_change = 0 37 | for pred_serial in blk_preset_list: 38 | cmp_variable_tracker = MopTracker( 39 | [op_compared], max_nb_block=100, max_path=1000 40 | ) 41 | cmp_variable_tracker.reset() 42 | pred_blk = blk.mba.get_mblock(pred_serial) 43 | pred_histories = cmp_variable_tracker.search_backward( 44 | pred_blk, pred_blk.tail 45 | ) 46 | 47 | father_is_resolvable = all( 48 | [father_history.is_resolved() for father_history in pred_histories] 49 | ) 50 | if not father_is_resolvable: 51 | return 0 52 | pred_values = get_all_possibles_values(pred_histories, [op_compared]) 53 | pred_values = [x[0] for x in pred_values] 54 | if None in pred_values: 55 | unflat_logger.info("Some path are not resolved, can't fix jump") 56 | return 0 57 | unflat_logger.info( 58 | "Pred %s has %s possible path (%s different cst): %s", 59 | pred_blk.serial, 60 | len(pred_values), 61 | len(set(pred_values)), 62 | pred_values, 63 | ) 64 | if self.fix_successor(blk, pred_blk, pred_values): 65 | nb_change += 1 66 | return nb_change 67 | 68 | def fix_successor( 69 | self, 70 | fake_loop_block: mblock_t, 71 | pred: mblock_t, 72 | pred_comparison_values: list[int], 73 | ) -> bool: 74 | if len(pred_comparison_values) == 0: 75 | return False 76 | jmp_ins = fake_loop_block.tail 77 | compared_value = jmp_ins.r.nnn.value 78 | jmp_taken = False 79 | jmp_not_taken = False 80 | dst_serial = None 81 | if jmp_ins.opcode == m_jz: 82 | jmp_taken = all( 83 | [ 84 | possible_value == compared_value 85 | for possible_value in pred_comparison_values 86 | ] 87 | ) 88 | 89 | jmp_not_taken = all( 90 | [ 91 | possible_value != compared_value 92 | for possible_value in pred_comparison_values 93 | ] 94 | ) 95 | elif jmp_ins.opcode == m_jnz: 96 | jmp_taken = all( 97 | [ 98 | possible_value != compared_value 99 | for possible_value in pred_comparison_values 100 | ] 101 | ) 102 | jmp_not_taken = all( 103 | [ 104 | possible_value == compared_value 105 | for possible_value in pred_comparison_values 106 | ] 107 | ) 108 | # TODO: handles other jumps cases 109 | if jmp_taken: 110 | unflat_logger.info( 111 | "It seems that '%s' is always taken when coming from %s: %s", 112 | format_minsn_t(jmp_ins), 113 | pred.serial, 114 | pred_comparison_values, 115 | ) 116 | dst_serial = jmp_ins.d.b 117 | if jmp_not_taken: 118 | unflat_logger.info( 119 | "It seems that '%s' is never taken when coming from %s: %s", 120 | format_minsn_t(jmp_ins), 121 | pred.serial, 122 | pred_comparison_values, 123 | ) 124 | dst_serial = fake_loop_block.serial + 1 125 | if dst_serial is None: 126 | unflat_logger.debug( 127 | "Jump seems legit '%s' from %s: %s", 128 | format_minsn_t(jmp_ins), 129 | pred.serial, 130 | pred_comparison_values, 131 | ) 132 | return False 133 | if self.dump_intermediate_microcode: 134 | dump_microcode_for_debug( 135 | self.mba, 136 | self.log_dir, 137 | f"{self.cur_maturity_pass}_before_fake_jump", 138 | ) 139 | unflat_logger.info( 140 | "Making pred %s with value %s goto %s (%s)", 141 | pred.serial, 142 | pred_comparison_values, 143 | dst_serial, 144 | format_minsn_t(jmp_ins), 145 | ) 146 | if self.dump_intermediate_microcode: 147 | dump_microcode_for_debug( 148 | self.mba, 149 | self.log_dir, 150 | f"{self.cur_maturity_pass}_after_fake_jump", 151 | ) 152 | return change_1way_block_successor(pred, dst_serial) 153 | 154 | def optimize(self, blk: mblock_t) -> int: 155 | self.mba = blk.mba 156 | if not self.check_if_rule_should_be_used(blk): 157 | return 0 158 | self.last_pass_nb_patch_done = self.analyze_blk(blk) 159 | if self.last_pass_nb_patch_done > 0: 160 | self.mba.mark_chains_dirty() 161 | self.mba.optimize_local(0) 162 | safe_verify( 163 | self.mba, 164 | "optimizing UnflattenerFakeJump", 165 | logger_func=unflat_logger.error, 166 | ) 167 | return self.last_pass_nb_patch_done 168 | -------------------------------------------------------------------------------- /scripts/converter.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from pathlib import Path 4 | 5 | import libcst as cst 6 | import libcst.matchers as m 7 | 8 | 9 | class RuleBaseConverter(cst.CSTTransformer): 10 | """Convert every class inheriting from ``PatternMatchingRule`` so that 11 | its ``PATTERN`` and ``REPLACEMENT_PATTERN`` attributes become 12 | read-only ``@property`` methods. 13 | 14 | The transformation is performed for *all* classes under the processed 15 | files; it is safe to run repeatedly (idempotent) and will skip classes 16 | that already expose these names as properties. 17 | """ 18 | 19 | TARGET_BASE: str = "PatternMatchingRule" 20 | 21 | def leave_ClassDef( 22 | self, original_node: cst.ClassDef, updated_node: cst.ClassDef 23 | ) -> cst.CSTNode: 24 | # Process only classes that (directly) inherit from PatternMatchingRule. 25 | if not any( 26 | m.matches(base.value, m.Name(self.TARGET_BASE)) 27 | for base in updated_node.bases 28 | ): 29 | return updated_node 30 | 31 | # Ensure the class exclusively lists PatternMatchingRule as its sole base. 32 | new_base = cst.Arg(value=cst.Name(self.TARGET_BASE)) 33 | new_cls = updated_node.with_changes(bases=[new_base]) 34 | 35 | # ------------------------------------------------------------------ 36 | # 2) Transform ``PATTERN = ...`` and ``REPLACEMENT_PATTERN = ...`` 37 | # assignments into @property methods returning the same AST 38 | # expressions. 39 | # ------------------------------------------------------------------ 40 | 41 | pattern_expr: cst.BaseExpression | None = None 42 | replacement_expr: cst.BaseExpression | None = None 43 | remaining_body: list[cst.CSTNode] = [] 44 | 45 | for stmt in new_cls.body.body: 46 | if ( 47 | isinstance(stmt, cst.SimpleStatementLine) 48 | and len(stmt.body) == 1 49 | and isinstance(stmt.body[0], cst.Assign) 50 | ): 51 | assign: cst.Assign = stmt.body[0] 52 | # Only single-target assignments are expected here. 53 | if len(assign.targets) == 1 and isinstance( 54 | assign.targets[0].target, cst.Name 55 | ): 56 | target_name = assign.targets[0].target.value 57 | if target_name == "PATTERN": 58 | pattern_expr = assign.value 59 | continue # remove assignment 60 | if target_name == "REPLACEMENT_PATTERN": 61 | replacement_expr = assign.value 62 | continue # remove assignment 63 | remaining_body.append(stmt) 64 | 65 | # Build new function definitions for collected expressions. 66 | new_members: list[cst.CSTNode] = [] 67 | 68 | def _make_property(name: str, expr: cst.BaseExpression) -> cst.FunctionDef: 69 | return cst.FunctionDef( 70 | name=cst.Name(name), 71 | decorators=[cst.Decorator(decorator=cst.Name("property"))], 72 | params=cst.Parameters(params=[cst.Param(name=cst.Name("self"))]), 73 | returns=cst.Annotation(annotation=cst.Name("AstNode")), 74 | body=cst.IndentedBlock( 75 | [cst.SimpleStatementLine([cst.Return(value=expr)])] 76 | ), 77 | ) 78 | 79 | if pattern_expr is not None: 80 | # Only add if a property with that name doesn't already exist. 81 | if not any( 82 | isinstance(el, cst.FunctionDef) and el.name.value == "PATTERN" 83 | for el in remaining_body 84 | ): 85 | new_members.append(_make_property("PATTERN", pattern_expr)) 86 | 87 | if replacement_expr is not None: 88 | if not any( 89 | isinstance(el, cst.FunctionDef) 90 | and el.name.value == "REPLACEMENT_PATTERN" 91 | for el in remaining_body 92 | ): 93 | new_members.append( 94 | _make_property("REPLACEMENT_PATTERN", replacement_expr) 95 | ) 96 | 97 | # Assemble the final body order: original (sans assignments) + new members. 98 | final_body = remaining_body + new_members 99 | 100 | # If nothing left (possible but unlikely), add a pass. 101 | if len(final_body) == 0: 102 | final_body.append(cst.SimpleStatementLine([cst.Pass()])) 103 | 104 | new_cls = new_cls.with_changes(body=new_cls.body.with_changes(body=final_body)) 105 | 106 | return new_cls 107 | 108 | 109 | def _process_file(path: Path, in_place: bool = False) -> None: 110 | """Parse *path* with LibCST, apply the transformer, and either write the 111 | transformed code back to disk (``in_place=True``) or print it to STDOUT. 112 | """ 113 | source = path.read_text(encoding="utf-8") 114 | module = cst.parse_module(source) 115 | transformed = module.visit(RuleBaseConverter()) 116 | 117 | if in_place: 118 | path.write_text(transformed.code, encoding="utf-8") 119 | else: 120 | sys.stdout.write(transformed.code) 121 | 122 | 123 | def _iter_py_files(target: Path): 124 | """Yield all ``*.py`` files under *target* (recursively if *target* is a 125 | directory). If *target* is already a Python file, yield it directly. 126 | """ 127 | if target.is_file() and target.suffix == ".py": 128 | yield target 129 | elif target.is_dir(): 130 | for file_path in target.rglob("*.py"): 131 | # Skip virtual environments / hidden directories for safety. 132 | if any(part.startswith(".") for part in file_path.parts): 133 | continue 134 | yield file_path 135 | 136 | 137 | def main(argv: list[str] | None = None) -> None: 138 | parser = argparse.ArgumentParser( 139 | description=( 140 | "Rewrite Add_HackersDelightRule_1 so that it directly inherits from " 141 | "PatternMatchingRule. The script uses LibCST, ensuring the output is " 142 | "properly formatted and syntactically correct." 143 | ) 144 | ) 145 | parser.add_argument( 146 | "paths", 147 | nargs="+", 148 | type=Path, 149 | help="Python files or directories to process.", 150 | ) 151 | parser.add_argument( 152 | "--in-place", 153 | action="store_true", 154 | help="Overwrite files with the transformed code instead of printing to STDOUT.", 155 | ) 156 | 157 | args = parser.parse_args(argv) 158 | 159 | for path in args.paths: 160 | for py_file in _iter_py_files(path): 161 | _process_file(py_file, in_place=args.in_place) 162 | 163 | 164 | if __name__ == "__main__": 165 | main() 166 | -------------------------------------------------------------------------------- /tests/unit/test_conf.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import unittest 4 | from pathlib import Path 5 | 6 | from .tutils import MockIdaDiskio, load_conf_classes, temp_ida_dir 7 | 8 | 9 | class TestConfiguration(unittest.TestCase): 10 | 11 | def setUp(self): 12 | """Set up dummy files for testing.""" 13 | self.dummy_options_file = Path("./options.json") 14 | self.dummy_options_file.write_text('{"api_key": "secret", "timeout": 60}') 15 | 16 | self.dummy_project_file = Path("./project.json") 17 | self.dummy_project_content = { 18 | "description": "My Test Project", 19 | "ins_rules": [ 20 | { 21 | "name": "check_string_format", 22 | "is_activated": True, 23 | "config": {"min_len": 5}, 24 | } 25 | ], 26 | "blk_rules": [], 27 | } 28 | with self.dummy_project_file.open("w") as f: 29 | json.dump(self.dummy_project_content, f, indent=2) 30 | 31 | def tearDown(self): 32 | """Clean up dummy files after testing.""" 33 | self.dummy_options_file.unlink(missing_ok=True) 34 | self.dummy_project_file.unlink(missing_ok=True) 35 | 36 | def test_d810_configuration(self): 37 | """Test D810Configuration loading and logging.""" 38 | with temp_ida_dir() as ida_dir: 39 | # Place template in read-only area (simulate packaged conf) 40 | packaged_path = ida_dir / "cfg/d810/options.json" 41 | packaged_path.parent.mkdir(parents=True, exist_ok=True) 42 | packaged_path.write_text('{"template_key": "tmpl"}') 43 | with load_conf_classes() as (D810Configuration, _, _): 44 | # Instance with no explicit path should read template but save to user dir 45 | app_config = D810Configuration() 46 | # Value should initially be whatever is in config (template or pre-existing user copy) 47 | self.assertIn(app_config.get("template_key"), ("tmpl", "user")) 48 | # After save(), a user copy must exist 49 | app_config.set("template_key", "user") 50 | app_config.save() 51 | self.assertTrue(app_config.config_file.exists()) 52 | # log_dir should use MockIdaDiskio path 53 | self.assertEqual( 54 | str(app_config.log_dir), 55 | str(Path(MockIdaDiskio.get_user_idadir(), "logs")), 56 | ) 57 | 58 | def test_project_configuration(self): 59 | """Test ProjectConfiguration loading, modification, and saving.""" 60 | with load_conf_classes() as (_, ProjectConfiguration, RuleConfiguration): 61 | project_config = ProjectConfiguration.from_file(self.dummy_project_file) 62 | self.assertEqual(project_config.description, "My Test Project") 63 | 64 | # Modify and save 65 | new_rule = RuleConfiguration(name="check_buffer_size", is_activated=False) 66 | project_config.ins_rules.append(new_rule) 67 | project_config.description = "My updated test project" 68 | project_config.save() 69 | 70 | # Reload and verify changes 71 | project_config_reloaded = ProjectConfiguration.from_file( 72 | self.dummy_project_file 73 | ) 74 | self.assertEqual( 75 | project_config_reloaded.description, "My updated test project" 76 | ) 77 | self.assertIn(new_rule, project_config_reloaded.ins_rules) 78 | 79 | def test_get_and_set_methods(self): 80 | """Test get() and set() methods of D810Configuration.""" 81 | with load_conf_classes() as (D810Configuration, _, _): 82 | app_config = D810Configuration(self.dummy_options_file) 83 | # default when missing 84 | self.assertIsNone(app_config.get("missing_key")) 85 | self.assertEqual(app_config.get("missing_key", "default"), "default") 86 | # assign and retrieve 87 | app_config.set("new_key", "new_value") 88 | self.assertEqual(app_config.get("new_key"), "new_value") 89 | 90 | def test_discover_projects(self): 91 | """Test project discovery, including user overrides and ignoring options.json.""" 92 | with temp_ida_dir() as ida_dir: 93 | with load_conf_classes() as (D810Configuration, ProjectConfiguration, _): 94 | # 1. Setup a user directory with a new project and an overriding project 95 | user_conf_dir = Path(ida_dir) / "cfg" / "d810" 96 | user_conf_dir.mkdir(parents=True, exist_ok=True) 97 | 98 | # This project is unique to the user directory 99 | (user_conf_dir / "my_user_project.json").write_text( 100 | '{"description": "My User Project"}' 101 | ) 102 | 103 | # This project overrides a built-in template 104 | (user_conf_dir / "hodur_deobfuscation.json").write_text( 105 | '{"description": "User Override for Hodur"}' 106 | ) 107 | 108 | # 2. Run discovery 109 | config = D810Configuration() 110 | projects = config.discover_projects() 111 | 112 | # 3. Verify results 113 | project_map = {p.path.name: p for p in projects} 114 | 115 | # Check that options.json is NOT treated as a project 116 | self.assertNotIn("options.json", project_map) 117 | 118 | # Check that the user's new project is found 119 | self.assertIn("my_user_project.json", project_map) 120 | self.assertEqual( 121 | project_map["my_user_project.json"].description, "My User Project" 122 | ) 123 | 124 | # Check that the user's overriding project is used 125 | self.assertIn("hodur_deobfuscation.json", project_map) 126 | self.assertEqual( 127 | project_map["hodur_deobfuscation.json"].description, 128 | "User Override for Hodur", 129 | ) 130 | 131 | # Check that a built-in project that was NOT overridden is still loaded 132 | self.assertIn("default_instruction_only.json", project_map) 133 | 134 | # Check that the list of configurations was saved back to options.json 135 | config.save() 136 | reloaded_config = D810Configuration(config.config_file) 137 | saved_configs = reloaded_config.get("configurations") 138 | self.assertIn("my_user_project.json", saved_configs) 139 | self.assertIn("hodur_deobfuscation.json", saved_configs) 140 | 141 | 142 | if __name__ == "__main__": 143 | logging.basicConfig(level=logging.INFO) 144 | unittest.main() 145 | --------------------------------------------------------------------------------