├── README.md ├── sample.c ├── LICENSE ├── .gitignore └── __main__.py /README.md: -------------------------------------------------------------------------------- 1 | # sumo 2 | 3 | Binary Ninja MLIL to LLVM IR lifter 4 | 5 | This software is not maintained, but you are encouraged to adapt it into your own working program. This work is public domain. 6 | -------------------------------------------------------------------------------- /sample.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // https://blog.quarkslab.com/deobfuscation-recovering-an-ollvm-protected-program.html 4 | uint32_t target(uint32_t n) { 5 | uint32_t mod = n % 4; 6 | uint32_t result = 0; 7 | 8 | if (mod == 0) { 9 | result = (n | 0xbaaad0bf) * (2 ^ n); 10 | } else if (mod == 1) { 11 | result = (n & 0xbaaad0bf) * (3 + n); 12 | } else if (mod == 2) { 13 | result = (n ^ 0xbaaad0bf) * (4 | n); 14 | } else { 15 | result = (n + 0xbaaad0bf) * (5 & n); 16 | } 17 | 18 | return result; 19 | } 20 | 21 | int main() { 22 | // Test against the lifted execution 23 | printf("%d\n", target(10)); 24 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### PyCharm+all ### 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | # User-specific stuff 6 | .idea/**/workspace.xml 7 | .idea/**/tasks.xml 8 | .idea/**/usage.statistics.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # Generated files 13 | .idea/**/contentModel.xml 14 | 15 | # Sensitive or high-churn files 16 | .idea/**/dataSources/ 17 | .idea/**/dataSources.ids 18 | .idea/**/dataSources.local.xml 19 | .idea/**/sqlDataSources.xml 20 | .idea/**/dynamic.xml 21 | .idea/**/uiDesigner.xml 22 | .idea/**/dbnavigator.xml 23 | 24 | # Gradle 25 | .idea/**/gradle.xml 26 | .idea/**/libraries 27 | 28 | # Gradle and Maven with auto-import 29 | # When using Gradle or Maven with auto-import, you should exclude module files, 30 | # since they will be recreated, and may cause churn. Uncomment if using 31 | # auto-import. 32 | # .idea/modules.xml 33 | # .idea/*.iml 34 | # .idea/modules 35 | # *.iml 36 | # *.ipr 37 | 38 | # CMake 39 | cmake-build-*/ 40 | 41 | # Mongo Explorer plugin 42 | .idea/**/mongoSettings.xml 43 | 44 | # File-based project format 45 | *.iws 46 | 47 | # IntelliJ 48 | out/ 49 | 50 | # mpeltonen/sbt-idea plugin 51 | .idea_modules/ 52 | 53 | # JIRA plugin 54 | atlassian-ide-plugin.xml 55 | 56 | # Cursive Clojure plugin 57 | .idea/replstate.xml 58 | 59 | # Crashlytics plugin (for Android Studio and IntelliJ) 60 | com_crashlytics_export_strings.xml 61 | crashlytics.properties 62 | crashlytics-build.properties 63 | fabric.properties 64 | 65 | # Editor-based Rest Client 66 | .idea/httpRequests 67 | 68 | # Android studio 3.1+ serialized cache file 69 | .idea/caches/build_file_checksums.ser 70 | 71 | ### PyCharm+all Patch ### 72 | # Ignores the whole .idea folder and all .iml files 73 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 74 | 75 | .idea/ 76 | 77 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 78 | 79 | *.iml 80 | modules.xml 81 | .idea/misc.xml 82 | *.ipr 83 | 84 | # Sonarlint plugin 85 | .idea/sonarlint 86 | 87 | ### Python ### 88 | # Byte-compiled / optimized / DLL files 89 | __pycache__/ 90 | *.py[cod] 91 | *$py.class 92 | 93 | # C extensions 94 | *.so 95 | 96 | # Distribution / packaging 97 | .Python 98 | build/ 99 | develop-eggs/ 100 | dist/ 101 | downloads/ 102 | eggs/ 103 | .eggs/ 104 | lib/ 105 | lib64/ 106 | parts/ 107 | sdist/ 108 | var/ 109 | wheels/ 110 | pip-wheel-metadata/ 111 | share/python-wheels/ 112 | *.egg-info/ 113 | .installed.cfg 114 | *.egg 115 | MANIFEST 116 | 117 | # PyInstaller 118 | # Usually these files are written by a python script from a template 119 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 120 | *.manifest 121 | *.spec 122 | 123 | # Installer logs 124 | pip-log.txt 125 | pip-delete-this-directory.txt 126 | 127 | # Unit test / coverage reports 128 | htmlcov/ 129 | .tox/ 130 | .nox/ 131 | .coverage 132 | .coverage.* 133 | .cache 134 | nosetests.xml 135 | coverage.xml 136 | *.cover 137 | .hypothesis/ 138 | .pytest_cache/ 139 | 140 | # Translations 141 | *.mo 142 | *.pot 143 | 144 | # Scrapy stuff: 145 | .scrapy 146 | 147 | # Sphinx documentation 148 | docs/_build/ 149 | 150 | # PyBuilder 151 | target/ 152 | 153 | # pyenv 154 | .python-version 155 | 156 | # pipenv 157 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 158 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 159 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 160 | # install all needed dependencies. 161 | #Pipfile.lock 162 | 163 | # celery beat schedule file 164 | celerybeat-schedule 165 | 166 | # SageMath parsed files 167 | *.sage.py 168 | 169 | # Spyder project settings 170 | .spyderproject 171 | .spyproject 172 | 173 | # Rope project settings 174 | .ropeproject 175 | 176 | # Mr Developer 177 | .mr.developer.cfg 178 | .project 179 | .pydevproject 180 | 181 | # mkdocs documentation 182 | /site 183 | 184 | # mypy 185 | .mypy_cache/ 186 | .dmypy.json 187 | dmypy.json 188 | 189 | # Pyre type checker 190 | .pyre/ 191 | 192 | venv 193 | *.out 194 | *.bndb 195 | *.png 196 | *.dot 197 | *.so 198 | .NOTES.txt -------------------------------------------------------------------------------- /__main__.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from binaryninja import * 3 | from llvmlite import ir 4 | from llvmlite import binding as llvm 5 | 6 | from ctypes import CFUNCTYPE, c_int64, c_int 7 | 8 | llvm.initialize() 9 | llvm.initialize_native_target() 10 | llvm.initialize_native_asmprinter() 11 | 12 | def ninja_to_ir_type(t: Type): 13 | # TODO(keegan) check if values are signed or unsigned 14 | if t.type_class == TypeClass.IntegerTypeClass: 15 | return ir.IntType(t.width * 8) 16 | else: 17 | raise ValueError() 18 | 19 | 20 | # TODO(keegan) check if function can return 21 | # TODO(keegan) check if function has variable arguments 22 | def ir_function_type(f: Function) -> ir.FunctionType: 23 | return_type: Type = f.function_type.return_value 24 | parameters: List[FunctionParameter] = f.function_type.parameters 25 | 26 | ir_return_type = ninja_to_ir_type(return_type) 27 | ir_parameters_type = list(map(lambda x: ninja_to_ir_type(x.type), parameters)) 28 | 29 | return ir.FunctionType(ir_return_type, ir_parameters_type) 30 | 31 | 32 | def create_execution_engine(): 33 | target = llvm.Target.from_default_triple() 34 | target_machine = target.create_target_machine() 35 | backing_mod = llvm.parse_assembly("") 36 | engine = llvm.create_mcjit_compiler(backing_mod, target_machine) 37 | return engine 38 | 39 | 40 | def compile_ir(engine, mod: llvm.ModuleRef): 41 | mod.verify() 42 | engine.add_module(mod) 43 | engine.finalize_object() 44 | engine.run_static_constructors() 45 | return mod 46 | 47 | 48 | SSAVariable.__hash__ = lambda self: hash(str(self.var) + "#" + str(self.version)) 49 | 50 | 51 | # https://github.com/joshwatson/f-ing-around-with-binaryninja/blob/master/ep4-emulator/vm_visitor.py#L4 52 | class BNILVisitor(object): 53 | def __init__(self, **kw): 54 | super(BNILVisitor, self).__init__() 55 | 56 | def visit(self, expression): 57 | method_name = 'visit_{}'.format(expression.operation.name) 58 | if hasattr(self, method_name): 59 | value = getattr(self, method_name)(expression) 60 | else: 61 | print(method_name + ' undefined ') 62 | value = None 63 | return value 64 | 65 | 66 | class QueuedIncomingPhi: 67 | """A phi instruction ready for incoming node insertion""" 68 | def __init__(self, phi: ir.PhiInstr, var: str, ir_block: ir.Block): 69 | self.phi = phi 70 | self.var = var 71 | self.ir_block = ir_block 72 | 73 | 74 | class FunctionLifter: 75 | """Lift a MediumLevelILFunction to a LLVM IR function""" 76 | def __init__(self, ir_module: ir.Module, f: Function): 77 | self.ir_basic_blocks = {} 78 | self.phis: List[QueuedIncomingPhi] = [] 79 | self.variables = {} 80 | self.f = f 81 | 82 | self.ir_module = ir_module 83 | self.ir_function = ir.Function(self.ir_module, ir_function_type(self.f), name=self.f.name) 84 | 85 | self.pass_manager = llvm.create_module_pass_manager() 86 | self.pass_manager_b = llvm.create_pass_manager_builder() 87 | 88 | def run(self): 89 | for basic_block in self.f.medium_level_il.ssa_form.basic_blocks: 90 | self.ir_basic_blocks[basic_block.index] = self.ir_function.append_basic_block() 91 | 92 | self.ir_basic_blocks[0].name = "entry" 93 | 94 | for basic_block in self.f.medium_level_il.ssa_form.basic_blocks: 95 | v = LifterVisitor(self, basic_block.index, basic_block) 96 | for instr in basic_block: 97 | v.visit(instr) 98 | 99 | for entry in self.phis: 100 | phi: ir.PhiInstr = entry.phi 101 | phi.add_incoming(self.variables[entry.var], entry.ir_block) 102 | 103 | def optimize(self, level: int) -> llvm.ModuleRef: 104 | """Optimize the produced LLVM IR, 3 corresponds to -O3, etc.""" 105 | opt_module = llvm.parse_assembly(str(self.ir_module)) 106 | self.pass_manager_b = llvm.create_pass_manager_builder() 107 | self.pass_manager_b.opt_level = level 108 | self.pass_manager_b.populate(self.pass_manager) 109 | self.pass_manager.run(opt_module) 110 | return opt_module 111 | 112 | 113 | class LifterVisitor(BNILVisitor): 114 | def __init__(self, function_lifter: FunctionLifter, basic_block_index: int, basic_block: MediumLevelILBasicBlock): 115 | super(LifterVisitor, self).__init__() 116 | self.ir_basic_blocks = function_lifter.ir_basic_blocks 117 | self.ir_basic_block = function_lifter.ir_basic_blocks[basic_block_index] 118 | self.basic_block = basic_block 119 | self.builder = ir.IRBuilder(self.ir_basic_block) 120 | self.f = function_lifter.f 121 | self.variables = function_lifter.variables 122 | self.phis = function_lifter.phis 123 | self.ir_function = function_lifter.ir_function 124 | 125 | def ir_bb_for_ssa_var(self, var: SSAVariable) -> ir.Block: 126 | """Return the IR basic block for which an SSAVariable was defined""" 127 | basic_block: MediumLevelILBasicBlock = f.medium_level_il.get_ssa_var_definition(var).il_basic_block 128 | return self.ir_basic_blocks[basic_block.index] 129 | 130 | def ir_bb_for_instr(self, instr_index: int) -> ir.Block: 131 | """Return the IR basic block which contains the MLIL instruction""" 132 | return self.ir_basic_blocks[self.f.mlil[instr_index].il_basic_block.index] 133 | 134 | def visit_MLIL_SET_VAR_SSA(self, expr): 135 | self.variables[expr.dest] = self.visit(expr.src) 136 | 137 | def visit_MLIL_VAR_SSA_FIELD(self, expr): 138 | # FIXME(keegan) this doesn't pull function arg properly 139 | 140 | src = None 141 | if expr.src.var.name == "arg1": 142 | src = self.ir_function.args[0] 143 | else: 144 | if expr.src in self.variables: 145 | src = self.variables[expr.src] 146 | else: 147 | raise ValueError() 148 | 149 | # FIXME(keegan) not always an integer type 150 | # FIXME(keegan) truncating does not work for structure offsets, how to do this in LLVM? 151 | if expr.offset != 0: 152 | raise ValueError('structure offsets not supported yet') 153 | 154 | return self.builder.trunc(src, ir.IntType(expr.size * 8)) 155 | 156 | def visit_MLIL_CONST(self, expr): 157 | return ir.Constant(ninja_to_ir_type(expr.expr_type), expr.value.value) 158 | 159 | def visit_MLIL_VAR_SSA(self, expr): 160 | if expr.src in self.variables: 161 | return self.variables[expr.src] 162 | else: 163 | raise ValueError('asking for variable before definition') 164 | 165 | def visit_MLIL_VAR_PHI(self, expr): 166 | dest, srcs = expr.operands 167 | 168 | phi = self.builder.phi(ninja_to_ir_type(expr.dest.var.type)) 169 | self.variables[dest] = phi 170 | for src in srcs: 171 | self.phis.append(QueuedIncomingPhi(phi, src, self.ir_bb_for_ssa_var(src))) 172 | return phi 173 | 174 | def visit_MLIL_AND(self, expr): 175 | lhs, rhs = expr.operands 176 | return self.builder.and_(self.visit(lhs), self.visit(rhs)) 177 | 178 | def visit_MLIL_ZX(self, expr): 179 | return self.builder.zext(self.visit(expr.src), ninja_to_ir_type(expr.expr_type)) 180 | 181 | def visit_MLIL_ADD(self, expr): 182 | lhs, rhs = expr.operands 183 | return self.builder.add(self.visit(lhs), self.visit(rhs)) 184 | 185 | def visit_MLIL_MUL(self, expr): 186 | lhs, rhs = expr.operands 187 | return self.builder.mul(self.visit(lhs), self.visit(rhs)) 188 | 189 | def visit_MLIL_XOR(self, expr): 190 | lhs, rhs = expr.operands 191 | return self.builder.xor(self.visit(lhs), self.visit(rhs)) 192 | 193 | def visit_MLIL_OR(self, expr): 194 | lhs, rhs = expr.operands 195 | return self.builder.or_(self.visit(lhs), self.visit(rhs)) 196 | 197 | def visit_MLIL_GOTO(self, expr): 198 | return self.builder.branch(self.ir_bb_for_instr(expr.dest)) 199 | 200 | def visit_MLIL_LOW_PART(self, expr): 201 | return self.builder.trunc(self.visit(expr.src), ir.IntType(expr.size * 8)) 202 | 203 | def visit_MLIL_CMP_NE(self, expr): 204 | lhs, rhs = expr.operands 205 | return self.builder.icmp_unsigned('!=', self.visit(lhs), self.visit(rhs)) 206 | 207 | def visit_MLIL_IF(self, expr): 208 | condition = expr.condition 209 | true_branch = self.ir_bb_for_instr(expr.true) 210 | false_branch = self.ir_bb_for_instr(expr.false) 211 | return self.builder.cbranch(self.visit(condition), true_branch, false_branch) 212 | 213 | def visit_MLIL_RET(self, expr): 214 | if len(expr.src) > 1: 215 | raise ValueError('multiple return types not supported') 216 | 217 | return self.builder.ret(self.visit(expr.src[0])) 218 | 219 | 220 | def main(bv: BinaryView): 221 | # Lift the `target` function to IR 222 | module = ir.Module(name=__file__) 223 | f: Function = bv.get_function_at(bv.get_symbols_by_name('target')[0].address) 224 | lifter = FunctionLifter(module, f) 225 | lifter.run() 226 | 227 | # Output the optimized IR to a CFG 228 | opt_module: llvm.ModuleRef = lifter.optimize(3) 229 | opt_target = opt_module.get_function(f.name) 230 | dot = llvm.get_function_cfg(opt_target) 231 | open('output.opt.dot', 'w').write(dot) 232 | 233 | # Execute the LLVM IR 234 | engine = create_execution_engine() 235 | mod = compile_ir(engine, opt_module) 236 | func_ptr = engine.get_function_address("target_0") 237 | cfunc = CFUNCTYPE(c_int64, c_int)(func_ptr) 238 | print(cfunc(10)) --------------------------------------------------------------------------------