├── .gitignore ├── LICENSE.md ├── README.md ├── __init__.py ├── deflatten.py ├── plugin.json └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | test/ 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Toshi Piazza and Kareem El-Faramawi 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | llvm-deobfuscator 2 | ================= 3 | 4 | Performs the inverse operation of the control flow flattening pass performed by 5 | LLVM-Obfuscator. It does not yet undo the bogus control flow and expression 6 | substitution passes. 7 | 8 | Makes use of the BinaryNinja SSA form to determine all usages of the state variable. To 9 | use, right click on the state variable and click "Deobfuscate (OLLVM)". Note that the 10 | instruction writing to the state variable is typically in the first basic block of the 11 | function, and looks something like: 12 | 13 | ```asm 14 | mov dword [rbp-0xf8], 0x962e7c4e 15 | ``` 16 | 17 | with minor variations in the large constant and variable offset. 18 | 19 | For more information on llvm obfuscator itself, the [source][llvm-obfuscator] is an 20 | obvious ground truth :) 21 | 22 | ## Installation 23 | 24 | Should just be able to git clone the repository into your plugins repository. 25 | 26 | ## Other Protections 27 | 28 | * Undoing [Bogus Control Flow](https://github.com/RPISEC/llvm-deobfuscator/wiki/Handling-Bogus-Control-Flow-Pass-Manually) 29 | 30 | [llvm-obfuscator]: https://github.com/obfuscator-llvm/obfuscator/tree/llvm-4.0/lib/Transforms/Obfuscation 31 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | from binaryninja import * 3 | from .deflatten import deflatten_cfg 4 | from .util import * 5 | 6 | 7 | class RunInBackground(BackgroundTaskThread): 8 | def __init__(self, bv, addr, msg, func): 9 | BackgroundTaskThread.__init__(self, msg, True) 10 | self.bv = bv 11 | self.addr = addr 12 | self.func = func 13 | 14 | def run(self): 15 | bv = self.bv 16 | bv.begin_undo_actions() 17 | fix_analysis(bv, self.addr) 18 | self.func(bv, self.addr) 19 | bv.commit_undo_actions() 20 | bv.update_analysis() 21 | 22 | 23 | def DeFlattenBackgrounder(bv, addr): 24 | s = RunInBackground(bv, addr, "Removing Control Flow Flattening", deflatten_cfg) 25 | s.start() 26 | 27 | 28 | def fix_analysis(bv, addr): 29 | # Binja may have skipped analysis of the function 30 | # force analysis so we can use llil/mlil 31 | f = get_func_containing(bv, addr) 32 | if f is not None and f.analysis_skipped: 33 | f.analysis_skip_override = FunctionAnalysisSkipOverride.NeverSkipFunctionAnalysis 34 | bv.update_analysis_and_wait() 35 | 36 | # I have no idea how to make a dropdown, so we'll just have these separate things for now 37 | PluginCommand.register_for_address("Deobfuscate (OLLVM)", 38 | "Remove Control Flow Flattening given switch variable", 39 | DeFlattenBackgrounder) 40 | -------------------------------------------------------------------------------- /deflatten.py: -------------------------------------------------------------------------------- 1 | from binaryninja import * 2 | from operator import * 3 | from pprint import * 4 | from itertools import chain 5 | from .util import * 6 | 7 | 8 | class CFGLink(object): 9 | def __init__(self, block, true_block, false_block=None, def_il=None): 10 | """ Create a link from a block to its real successors 11 | 12 | Args: 13 | block (BasicBlock): block to start from 14 | true_block (BasicBlock): The target block of an unconditional jump, 15 | or the true branch of a conditional jump 16 | false_block (BasicBlock): The false branch of a conditional jump 17 | def_il (MediumLevelILInstruction): The instruction that was used 18 | to discover this link. This will be a definition of the state 19 | variable 20 | """ 21 | self.il = def_il # The definition il we used to find this link 22 | self.block = block 23 | 24 | # Resolve the true/false blocks 25 | self.true_block = true_block.outgoing_edges[0].target 26 | self.false_block = false_block 27 | if self.false_block is not None: 28 | self.false_block = self.false_block.outgoing_edges[0].target 29 | 30 | @property 31 | def is_uncond(self): 32 | return self.false_block is None 33 | 34 | @property 35 | def is_cond(self): 36 | return not self.is_uncond 37 | 38 | def gen_asm(self, bv, base_addr): 39 | """ Generates a patch to repair this link 40 | 41 | For an unconditional jump, this will generate 42 | jmp next_block 43 | 44 | For a conditional jump, this will generate 45 | jcc true_block 46 | jmp false_block 47 | where cc is the condition used in the original CMOVcc in the flattening logic 48 | 49 | Args: 50 | bv (BinaryView) 51 | base_addr (int): The address where these instructions will be placed. 52 | This is necessary to calculate relative addresses 53 | 54 | Returns: 55 | str: The assembled patch opcodes 56 | """ 57 | # It's assumed that base_addr is the start of free space 58 | # at the end of a newly recovered block 59 | def rel(addr): 60 | return hex(addr - base_addr).rstrip('L') 61 | 62 | # Unconditional jmp 63 | if self.is_uncond: 64 | next_addr = self.true_block.start 65 | print '[+] Patching from {:x} to {:x}'.format(base_addr, next_addr) 66 | return safe_asm(bv, 'jmp {}'.format(rel(next_addr))) 67 | 68 | # Branch based on original cmovcc 69 | else: 70 | assert self.il is not None 71 | true_addr = self.true_block.start 72 | false_addr = self.false_block.start 73 | print '[+] Patching from {:x} to T: {:x} F: {:x}'.format(base_addr, 74 | true_addr, 75 | false_addr) 76 | 77 | # Find the cmovcc by looking at the def il's incoming edges 78 | # Both parent blocks are part of the same cmov 79 | il_bb = next(bb for bb in self.il.function if bb.start <= self.il.instr_index < bb.end) 80 | cmov_addr = il_bb.incoming_edges[0].source[-1].address 81 | cmov = bv.get_disassembly(cmov_addr).split(' ')[0] 82 | 83 | # It was actually painful to write this 84 | jmp_instr = cmov.replace('cmov', 'j') 85 | 86 | # Generate the branch instructions 87 | asm = safe_asm(bv, '{} {}'.format(jmp_instr, rel(true_addr))) 88 | base_addr += len(asm) 89 | asm += safe_asm(bv, 'jmp {}'.format(rel(false_addr))) 90 | 91 | return asm 92 | 93 | def __repr__(self): 94 | if self.is_uncond: 95 | return ' {}>'.format(self.block, 96 | self.true_block) 97 | else: 98 | return ' T: {}, F: {}>'.format(self.block, 99 | self.true_block, 100 | self.false_block) 101 | 102 | 103 | def compute_backbone_map(bv, mlil, state_var): 104 | """ Recover the map of state values to backbone blocks 105 | 106 | This will generate a map of 107 | { 108 | state1 => BasicBlock1, 109 | state2 => BasicBlock2, 110 | ... 111 | } 112 | 113 | Where BasicBlock1 is the block in the backbone that will dispatch to 114 | an original block if the state is currently equal to state1 115 | 116 | Args: 117 | bv (BinaryView) 118 | mlil (MediumLevelILFunction): The MLIL for the function to be deflattened 119 | state_var (Variable): The state variable in the MLIL 120 | 121 | Returns: 122 | dict: map of {state value => backbone block} 123 | """ 124 | backbone = {} 125 | 126 | # The state variable itself isn't always the one referenced in the 127 | # backbone blocks, they may instead use another pointer to it. 128 | # Find the variable that all subdispatchers use in comparisons 129 | var = state_var 130 | uses = mlil.get_var_uses(var) 131 | # The variable with >2 uses is probable the one in the backbone blocks 132 | while len(uses) <= 2: 133 | var = mlil[uses[-1]].dest 134 | uses = mlil.get_var_uses(var) 135 | uses += mlil.get_var_definitions(var) 136 | 137 | # Gather the blocks where this variable is used 138 | blks = (b for il in uses for b in mlil.basic_blocks if b.start <= il.instr_index < b.end) 139 | 140 | # In each of these blocks, find the value of the state 141 | for bb in blks: 142 | # Find the comparison 143 | cond_var = bb[-1].condition.src 144 | cmp_il = mlil[mlil.get_var_definitions(cond_var)[0]] 145 | 146 | # Pull out the state value 147 | state = cmp_il.src.right.constant 148 | backbone[state] = bv.get_basic_blocks_at(bb[0].address)[0] 149 | 150 | return backbone 151 | 152 | 153 | def compute_original_blocks(bv, mlil, state_var): 154 | """ Gathers all MLIL instructions that (re)define the state variable 155 | Args: 156 | bv (BinaryView) 157 | mlil (MediumLevelILFunction): The MLIL for the function to be deflattened 158 | state_var (Variable): The state variable in the MLIL 159 | 160 | Returns: 161 | tuple: All MediumLevelILInstructions in mlil that update state_var 162 | """ 163 | original = mlil.get_var_definitions(state_var) 164 | return itemgetter(*original)(mlil) 165 | 166 | 167 | def resolve_cfg_link(bv, mlil, il, backbone): 168 | """ Resolves the true successors of a block 169 | 170 | When there is only one successor, the state variable is set to a constant, 171 | so we simply look this new state in the backbone map 172 | 173 | When there are 2 successors, we rely on SSA form to decide which successor 174 | state is the true/false branch. Of the two possible values that the next state 175 | may be, the earlier version (default value) corresponds to the false branch 176 | 177 | Args: 178 | bv (BinaryView) 179 | mlil (MediumLevelILFunction): The MLIL for the function to be deflattened 180 | il (MediumLevelILInstruction): An instruction in one of the original blocks 181 | that updates the state variable 182 | backbone (dict): map of {state value => backbone block} 183 | 184 | Returns: 185 | CFGLink: a link with the resolved successors for the block il was contained in 186 | """ 187 | # il refers to a definition of the state_var 188 | bb = bv.get_basic_blocks_at(il.address)[0] 189 | 190 | # Unconditional jumps will set the state to a constant 191 | if il.src.operation == MediumLevelILOperation.MLIL_CONST or il.src.operation == MediumLevelILOperation.MLIL_CONST_PTR: 192 | return CFGLink(bb, backbone[il.src.constant], def_il=il) 193 | 194 | # Conditional jumps choose between two values 195 | else: 196 | # Go into SSA to figure out which state is the false branch 197 | # Get the phi for the state variable at this point 198 | phi = get_ssa_def(mlil, il.ssa_form.src.src) 199 | assert phi.operation == MediumLevelILOperation.MLIL_VAR_PHI 200 | 201 | # The cmov (select) will only ever replace the default value (false) 202 | # with another if the condition passes (true) 203 | # So all we need to do is take the earliest version of the SSA var 204 | # as the false state 205 | f_def, t_def = sorted(phi.src, key=lambda var: var.version) 206 | 207 | # There will always be one possible value here 208 | false_state = get_ssa_def(mlil, f_def).src.possible_values.value 209 | true_state = get_ssa_def(mlil, t_def).src.possible_values.value 210 | 211 | return CFGLink(bb, backbone[true_state], backbone[false_state], il) 212 | 213 | 214 | def clean_block(bv, mlil, link): 215 | """ Return the data for a block with all unnecessary instructions removed 216 | 217 | Args: 218 | bv (BinaryView) 219 | mlil (MediumLevelILFunction): The MLIL for the function to be deflattened 220 | link (CFGLink): a link with the resolved successors for a block 221 | 222 | Returns: 223 | str: A copy of the block link is based on with all dead instructions removed 224 | """ 225 | 226 | # Helper for resolving new addresses for relative calls 227 | def _fix_call(bv, addr, newaddr): 228 | tgt = llil_at(bv, addr).dest.constant 229 | reladdr = hex(tgt - newaddr).rstrip('L') 230 | return safe_asm(bv, 'call {}'.format(reladdr)) 231 | 232 | # The terminator gets replaced anyway 233 | block = link.block 234 | old_len = block.length 235 | nop_addrs = {block.disassembly_text[-1].address} 236 | 237 | # Gather all addresses related to the state variable 238 | if link.il is not None: 239 | gather_defs(link.il.ssa_form, nop_addrs) 240 | 241 | # Rebuild the block, skipping the bad instrs 242 | addr = block.start 243 | data = '' 244 | while addr < block.end: 245 | # How much data to read 246 | ilen = bv.get_instruction_length(addr) 247 | 248 | # Only process this instruction if we haven't blacklisted it 249 | if addr not in nop_addrs: 250 | # Calls need to be handled separately to fix relative addressing 251 | if is_call(bv, addr): 252 | data += _fix_call(bv, addr, block.start + len(data)) 253 | else: 254 | data += bv.read(addr, ilen) 255 | 256 | # Next instruction 257 | addr += ilen 258 | return data, block.start + len(data), old_len 259 | 260 | 261 | def gather_full_backbone(backbone_map): 262 | """ Collect all blocks that are part of the backbone 263 | 264 | Args: 265 | backbone_map (dict): map of {state value => backbone block} 266 | 267 | Returns: 268 | set: All BasicBlocks involved in any form in the backbone 269 | """ 270 | # Get the immediately known blocks from the map 271 | backbone_blocks = backbone_map.values() 272 | backbone_blocks += [bb.outgoing_edges[1].target for bb in backbone_blocks] 273 | 274 | # Some of these blocks might be part of a chain of unconditional jumps back to the top of the backbone 275 | # Find the rest of the blocks in the chain and add them to be removed 276 | for bb in backbone_blocks: 277 | blk = bb 278 | while len(blk.outgoing_edges) == 1: 279 | if blk not in backbone_blocks: 280 | backbone_blocks.append(blk) 281 | blk = blk.outgoing_edges[0].target 282 | return set(backbone_blocks) 283 | 284 | 285 | def deflatten_cfg(bv, addr): 286 | """ Reverses the control flow flattening pass from OLLVM 287 | 288 | Args: 289 | bv (BinaryView) 290 | addr (int): Selected address in the view. This should be an 291 | instruction where the state variable is updated 292 | """ 293 | func = get_func_containing(bv, addr) 294 | mlil = func.medium_level_il 295 | state_var = func.get_low_level_il_at(addr).medium_level_il.dest 296 | 297 | # compute all usages of the state_var 298 | backbone = compute_backbone_map(bv, mlil, state_var) 299 | print '[+] Computed backbone' 300 | pprint(backbone) 301 | 302 | # compute all the defs of the state_var in the original basic blocks 303 | original = compute_original_blocks(bv, mlil, state_var) 304 | print '[+] Usages of the state variable in original basic blocks' 305 | pprint(original) 306 | 307 | # at this point we have all the information to reconstruct the CFG 308 | CFG = [resolve_cfg_link(bv, mlil, il, backbone) for il in original] 309 | print '[+] Computed original CFG' 310 | pprint(CFG) 311 | 312 | # patch in all the changes 313 | print '[+] Patching all discovered links' 314 | for link in CFG: 315 | # Clean out instructions we don't need to make space 316 | blockdata, cave_addr, orig_len = clean_block(bv, mlil, link) 317 | 318 | # Add the new instructions and patch, nop the rest of the block 319 | blockdata += link.gen_asm(bv, cave_addr) 320 | blockdata = blockdata.ljust(orig_len, safe_asm(bv, 'nop')) 321 | bv.write(link.block.start, blockdata) 322 | 323 | # Do some final cleanup 324 | print '[+] NOPing backbone' 325 | nop = safe_asm(bv, 'nop') 326 | for bb in gather_full_backbone(backbone): 327 | print '[+] NOPing block: {}'.format(bb) 328 | bv.write(bb.start, nop * bb.length) 329 | 330 | 331 | """ 332 | Example CFG: 333 | [ => T: , F: >, 334 | => >, 335 | => >, 336 | => >, 337 | => >, 338 | => T: , F: >, 339 | => >] 340 | """ 341 | -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "plugin": { 3 | "name": "LLVM Deobfuscator", 4 | "type": ["core", "ui"], 5 | "api": "python2", 6 | "description": "Deobfuscator for LLVM-Obfuscator", 7 | "longdescription": "Emits a cleaned binary from one that has been obfuscated via LLVM-obfuscator's control-flow flattening pass", 8 | "license": { 9 | "name": "MIT", 10 | "text": "Copyright (c) 2017 Toshi Piazza and Kareem El-Faramawi.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." 11 | }, 12 | "version": "0.1", 13 | "author": "Toshi Piazza & Kareem El-Faramawi", 14 | "minimumBinaryNinjaVersion": { 15 | "dev": "1.1.dev-1616" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | from binaryninja import * 2 | 3 | 4 | def safe_asm(bv, asm_str): 5 | return bv.arch.assemble(asm_str) 6 | 7 | 8 | def get_ssa_def(mlil, var): 9 | """ Gets the IL that defines var in the SSA form of mlil """ 10 | return mlil.ssa_form.get_ssa_var_definition(var) 11 | 12 | 13 | def gather_defs(il, defs): 14 | """ Walks up a def chain starting at the given il (mlil-ssa) 15 | until constants are found, gathering all addresses along the way 16 | """ 17 | defs.add(il.address) 18 | op = il.operation 19 | 20 | if op == MediumLevelILOperation.MLIL_CONST: 21 | return 22 | 23 | if op in [MediumLevelILOperation.MLIL_VAR_SSA_FIELD, 24 | MediumLevelILOperation.MLIL_VAR_SSA]: 25 | gather_defs(get_ssa_def(il.function, il.src), defs) 26 | 27 | if op == MediumLevelILOperation.MLIL_VAR_PHI: 28 | for var in il.src: 29 | gather_defs(get_ssa_def(il.function, var), defs) 30 | 31 | if hasattr(il, 'src') and isinstance(il.src, MediumLevelILInstruction): 32 | gather_defs(il.src, defs) 33 | 34 | 35 | def llil_at(bv, addr): 36 | funcs = bv.get_functions_containing(addr) 37 | if not funcs: 38 | return None 39 | 40 | return funcs[0].get_low_level_il_at(addr) 41 | 42 | 43 | def is_call(bv, addr): 44 | llil = llil_at(bv, addr) 45 | if llil is None: 46 | return False 47 | 48 | return llil.operation == LowLevelILOperation.LLIL_CALL 49 | 50 | 51 | def get_func_containing(bv, addr): 52 | """ Finds the function, if any, containing the given address """ 53 | funcs = bv.get_functions_containing(addr) 54 | return funcs[0] if funcs else None 55 | --------------------------------------------------------------------------------