├── .gitignore
├── LICENSE.md
├── README.md
├── __init__.py
├── deflatten.py
├── plugin.json
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | test/
4 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Toshi Piazza and Kareem El-Faramawi
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | llvm-deobfuscator
 2 | =================
 3 | 
 4 | Performs the inverse operation of the control flow flattening pass performed by
 5 | LLVM-Obfuscator. It does not yet undo the bogus control flow and expression
 6 | substitution passes.
 7 | 
 8 | Makes use of the BinaryNinja SSA form to determine all usages of the state variable. To
 9 | use, right click on the state variable and click "Deobfuscate (OLLVM)".  Note that the
10 | instruction writing to the state variable is typically in the first basic block of the
11 | function, and looks something like:
12 | 
13 | ```asm
14 | mov dword [rbp-0xf8], 0x962e7c4e
15 | ```
16 | 
17 | with minor variations in the large constant and variable offset.
18 | 
19 | For more information on llvm obfuscator itself, the [source][llvm-obfuscator] is an
20 | obvious ground truth :)
21 | 
22 | ## Installation
23 | 
24 | Should just be able to git clone the repository into your plugins repository.
25 | 
26 | ## Other Protections
27 | 
28 | * Undoing [Bogus Control Flow](https://github.com/RPISEC/llvm-deobfuscator/wiki/Handling-Bogus-Control-Flow-Pass-Manually)
29 | 
30 | [llvm-obfuscator]: https://github.com/obfuscator-llvm/obfuscator/tree/llvm-4.0/lib/Transforms/Obfuscation
31 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | from binaryninja import *
 3 | from .deflatten import deflatten_cfg
 4 | from .util import *
 5 | 
 6 | 
 7 | class RunInBackground(BackgroundTaskThread):
 8 |     def __init__(self, bv, addr, msg, func):
 9 |             BackgroundTaskThread.__init__(self, msg, True)
10 |             self.bv = bv
11 |             self.addr = addr
12 |             self.func = func
13 | 
14 |     def run(self):
15 |             bv = self.bv
16 |             bv.begin_undo_actions()
17 |             fix_analysis(bv, self.addr)
18 |             self.func(bv, self.addr)
19 |             bv.commit_undo_actions()
20 |             bv.update_analysis()
21 | 
22 | 
23 | def DeFlattenBackgrounder(bv, addr):
24 |     s = RunInBackground(bv, addr, "Removing Control Flow Flattening", deflatten_cfg)
25 |     s.start()
26 | 
27 | 
28 | def fix_analysis(bv, addr):
29 |     # Binja may have skipped analysis of the function
30 |     # force analysis so we can use llil/mlil
31 |     f = get_func_containing(bv, addr)
32 |     if f is not None and f.analysis_skipped:
33 |         f.analysis_skip_override = FunctionAnalysisSkipOverride.NeverSkipFunctionAnalysis
34 |         bv.update_analysis_and_wait()
35 | 
36 | # I have no idea how to make a dropdown, so we'll just have these separate things for now
37 | PluginCommand.register_for_address("Deobfuscate (OLLVM)",
38 |                                    "Remove Control Flow Flattening given switch variable",
39 |                                    DeFlattenBackgrounder)
40 | 


--------------------------------------------------------------------------------
/deflatten.py:
--------------------------------------------------------------------------------
  1 | from binaryninja import *
  2 | from operator    import *
  3 | from pprint      import *
  4 | from itertools   import chain
  5 | from .util       import *
  6 | 
  7 | 
  8 | class CFGLink(object):
  9 |     def __init__(self, block, true_block, false_block=None, def_il=None):
 10 |         """ Create a link from a block to its real successors
 11 | 
 12 |         Args:
 13 |             block (BasicBlock): block to start from
 14 |             true_block (BasicBlock): The target block of an unconditional jump,
 15 |                 or the true branch of a conditional jump
 16 |             false_block (BasicBlock): The false branch of a conditional jump
 17 |             def_il (MediumLevelILInstruction): The instruction that was used
 18 |                 to discover this link. This will be a definition of the state
 19 |                 variable
 20 |         """
 21 |         self.il = def_il  # The definition il we used to find this link
 22 |         self.block = block
 23 | 
 24 |         # Resolve the true/false blocks
 25 |         self.true_block = true_block.outgoing_edges[0].target
 26 |         self.false_block = false_block
 27 |         if self.false_block is not None:
 28 |             self.false_block = self.false_block.outgoing_edges[0].target
 29 | 
 30 |     @property
 31 |     def is_uncond(self):
 32 |         return self.false_block is None
 33 | 
 34 |     @property
 35 |     def is_cond(self):
 36 |         return not self.is_uncond
 37 | 
 38 |     def gen_asm(self, bv, base_addr):
 39 |         """ Generates a patch to repair this link
 40 | 
 41 |         For an unconditional jump, this will generate
 42 |             jmp next_block
 43 | 
 44 |         For a conditional jump, this will generate
 45 |             jcc true_block
 46 |             jmp false_block
 47 |         where cc is the condition used in the original CMOVcc in the flattening logic
 48 | 
 49 |         Args:
 50 |             bv (BinaryView)
 51 |             base_addr (int): The address where these instructions will be placed.
 52 |                 This is necessary to calculate relative addresses
 53 | 
 54 |         Returns:
 55 |             str: The assembled patch opcodes
 56 |         """
 57 |         # It's assumed that base_addr is the start of free space
 58 |         # at the end of a newly recovered block
 59 |         def rel(addr):
 60 |             return hex(addr - base_addr).rstrip('L')
 61 | 
 62 |         # Unconditional jmp
 63 |         if self.is_uncond:
 64 |             next_addr = self.true_block.start
 65 |             print '[+] Patching from {:x} to {:x}'.format(base_addr, next_addr)
 66 |             return safe_asm(bv, 'jmp {}'.format(rel(next_addr)))
 67 | 
 68 |         # Branch based on original cmovcc
 69 |         else:
 70 |             assert self.il is not None
 71 |             true_addr = self.true_block.start
 72 |             false_addr = self.false_block.start
 73 |             print '[+] Patching from {:x} to T: {:x} F: {:x}'.format(base_addr,
 74 |                                                                      true_addr,
 75 |                                                                      false_addr)
 76 | 
 77 |             # Find the cmovcc by looking at the def il's incoming edges
 78 |             # Both parent blocks are part of the same cmov
 79 |             il_bb = next(bb for bb in self.il.function if bb.start <= self.il.instr_index < bb.end)
 80 |             cmov_addr = il_bb.incoming_edges[0].source[-1].address
 81 |             cmov = bv.get_disassembly(cmov_addr).split(' ')[0]
 82 | 
 83 |             # It was actually painful to write this
 84 |             jmp_instr = cmov.replace('cmov', 'j')
 85 | 
 86 |             # Generate the branch instructions
 87 |             asm = safe_asm(bv, '{} {}'.format(jmp_instr, rel(true_addr)))
 88 |             base_addr += len(asm)
 89 |             asm += safe_asm(bv, 'jmp {}'.format(rel(false_addr)))
 90 | 
 91 |             return asm
 92 | 
 93 |     def __repr__(self):
 94 |         if self.is_uncond:
 95 |             return '<U Link: {} => {}>'.format(self.block,
 96 |                                                self.true_block)
 97 |         else:
 98 |             return '<C Link: {} => T: {}, F: {}>'.format(self.block,
 99 |                                                          self.true_block,
100 |                                                          self.false_block)
101 | 
102 | 
103 | def compute_backbone_map(bv, mlil, state_var):
104 |     """ Recover the map of state values to backbone blocks
105 | 
106 |     This will generate a map of
107 |     {
108 |         state1 => BasicBlock1,
109 |         state2 => BasicBlock2,
110 |         ...
111 |     }
112 | 
113 |     Where BasicBlock1 is the block in the backbone that will dispatch to
114 |     an original block if the state is currently equal to state1
115 | 
116 |     Args:
117 |         bv (BinaryView)
118 |         mlil (MediumLevelILFunction): The MLIL for the function to be deflattened
119 |         state_var (Variable): The state variable in the MLIL
120 | 
121 |     Returns:
122 |         dict: map of {state value => backbone block}
123 |     """
124 |     backbone = {}
125 | 
126 |     # The state variable itself isn't always the one referenced in the
127 |     # backbone blocks, they may instead use another pointer to it.
128 |     # Find the variable that all subdispatchers use in comparisons
129 |     var = state_var
130 |     uses = mlil.get_var_uses(var)
131 |     # The variable with >2 uses is probable the one in the backbone blocks
132 |     while len(uses) <= 2:
133 |         var = mlil[uses[-1]].dest
134 |         uses = mlil.get_var_uses(var)
135 |     uses += mlil.get_var_definitions(var)
136 | 
137 |     # Gather the blocks where this variable is used
138 |     blks = (b for il in uses for b in mlil.basic_blocks if b.start <= il.instr_index < b.end)
139 | 
140 |     # In each of these blocks, find the value of the state
141 |     for bb in blks:
142 |         # Find the comparison
143 |         cond_var = bb[-1].condition.src
144 |         cmp_il = mlil[mlil.get_var_definitions(cond_var)[0]]
145 | 
146 |         # Pull out the state value
147 |         state = cmp_il.src.right.constant
148 |         backbone[state] = bv.get_basic_blocks_at(bb[0].address)[0]
149 | 
150 |     return backbone
151 | 
152 | 
153 | def compute_original_blocks(bv, mlil, state_var):
154 |     """ Gathers all MLIL instructions that (re)define the state variable
155 |     Args:
156 |         bv (BinaryView)
157 |         mlil (MediumLevelILFunction): The MLIL for the function to be deflattened
158 |         state_var (Variable): The state variable in the MLIL
159 | 
160 |     Returns:
161 |         tuple: All MediumLevelILInstructions in mlil that update state_var
162 |     """
163 |     original = mlil.get_var_definitions(state_var)
164 |     return itemgetter(*original)(mlil)
165 | 
166 | 
167 | def resolve_cfg_link(bv, mlil, il, backbone):
168 |     """ Resolves the true successors of a block
169 | 
170 |     When there is only one successor, the state variable is set to a constant,
171 |     so we simply look this new state in the backbone map
172 | 
173 |     When there are 2 successors, we rely on SSA form to decide which successor
174 |     state is the true/false branch. Of the two possible values that the next state
175 |     may be, the earlier version (default value) corresponds to the false branch
176 | 
177 |     Args:
178 |         bv (BinaryView)
179 |         mlil (MediumLevelILFunction): The MLIL for the function to be deflattened
180 |         il (MediumLevelILInstruction): An instruction in one of the original blocks
181 |             that updates the state variable
182 |         backbone (dict): map of {state value => backbone block}
183 | 
184 |     Returns:
185 |         CFGLink: a link with the resolved successors for the block il was contained in
186 |     """
187 |     # il refers to a definition of the state_var
188 |     bb = bv.get_basic_blocks_at(il.address)[0]
189 | 
190 |     # Unconditional jumps will set the state to a constant
191 |     if il.src.operation == MediumLevelILOperation.MLIL_CONST or il.src.operation == MediumLevelILOperation.MLIL_CONST_PTR:
192 |         return CFGLink(bb, backbone[il.src.constant], def_il=il)
193 | 
194 |     # Conditional jumps choose between two values
195 |     else:
196 |         # Go into SSA to figure out which state is the false branch
197 |         # Get the phi for the state variable at this point
198 |         phi = get_ssa_def(mlil, il.ssa_form.src.src)
199 |         assert phi.operation == MediumLevelILOperation.MLIL_VAR_PHI
200 | 
201 |         # The cmov (select) will only ever replace the default value (false)
202 |         # with another if the condition passes (true)
203 |         # So all we need to do is take the earliest version of the SSA var
204 |         # as the false state
205 |         f_def, t_def = sorted(phi.src, key=lambda var: var.version)
206 | 
207 |         # There will always be one possible value here
208 |         false_state = get_ssa_def(mlil, f_def).src.possible_values.value
209 |         true_state  = get_ssa_def(mlil, t_def).src.possible_values.value
210 | 
211 |         return CFGLink(bb, backbone[true_state], backbone[false_state], il)
212 | 
213 | 
214 | def clean_block(bv, mlil, link):
215 |     """ Return the data for a block with all unnecessary instructions removed
216 | 
217 |     Args:
218 |         bv (BinaryView)
219 |         mlil (MediumLevelILFunction): The MLIL for the function to be deflattened
220 |         link (CFGLink): a link with the resolved successors for a block
221 | 
222 |     Returns:
223 |         str: A copy of the block link is based on with all dead instructions removed
224 |     """
225 | 
226 |     # Helper for resolving new addresses for relative calls
227 |     def _fix_call(bv, addr, newaddr):
228 |         tgt = llil_at(bv, addr).dest.constant
229 |         reladdr = hex(tgt - newaddr).rstrip('L')
230 |         return safe_asm(bv, 'call {}'.format(reladdr))
231 | 
232 |     # The terminator gets replaced anyway
233 |     block = link.block
234 |     old_len = block.length
235 |     nop_addrs = {block.disassembly_text[-1].address}
236 | 
237 |     # Gather all addresses related to the state variable
238 |     if link.il is not None:
239 |         gather_defs(link.il.ssa_form, nop_addrs)
240 | 
241 |     # Rebuild the block, skipping the bad instrs
242 |     addr = block.start
243 |     data = ''
244 |     while addr < block.end:
245 |         # How much data to read
246 |         ilen = bv.get_instruction_length(addr)
247 | 
248 |         # Only process this instruction if we haven't blacklisted it
249 |         if addr not in nop_addrs:
250 |             # Calls need to be handled separately to fix relative addressing
251 |             if is_call(bv, addr):
252 |                 data += _fix_call(bv, addr, block.start + len(data))
253 |             else:
254 |                 data += bv.read(addr, ilen)
255 | 
256 |         # Next instruction
257 |         addr += ilen
258 |     return data, block.start + len(data), old_len
259 | 
260 | 
261 | def gather_full_backbone(backbone_map):
262 |     """ Collect all blocks that are part of the backbone
263 | 
264 |     Args:
265 |         backbone_map (dict): map of {state value => backbone block}
266 | 
267 |     Returns:
268 |         set: All BasicBlocks involved in any form in the backbone
269 |     """
270 |     # Get the immediately known blocks from the map
271 |     backbone_blocks = backbone_map.values()
272 |     backbone_blocks += [bb.outgoing_edges[1].target for bb in backbone_blocks]
273 | 
274 |     # Some of these blocks might be part of a chain of unconditional jumps back to the top of the backbone
275 |     # Find the rest of the blocks in the chain and add them to be removed
276 |     for bb in backbone_blocks:
277 |         blk = bb
278 |         while len(blk.outgoing_edges) == 1:
279 |             if blk not in backbone_blocks:
280 |                 backbone_blocks.append(blk)
281 |             blk = blk.outgoing_edges[0].target
282 |     return set(backbone_blocks)
283 | 
284 | 
285 | def deflatten_cfg(bv, addr):
286 |     """ Reverses the control flow flattening pass from OLLVM
287 | 
288 |     Args:
289 |         bv (BinaryView)
290 |         addr (int): Selected address in the view. This should be an
291 |             instruction where the state variable is updated
292 |     """
293 |     func = get_func_containing(bv, addr)
294 |     mlil = func.medium_level_il
295 |     state_var = func.get_low_level_il_at(addr).medium_level_il.dest
296 | 
297 |     # compute all usages of the state_var
298 |     backbone = compute_backbone_map(bv, mlil, state_var)
299 |     print '[+] Computed backbone'
300 |     pprint(backbone)
301 | 
302 |     # compute all the defs of the state_var in the original basic blocks
303 |     original = compute_original_blocks(bv, mlil, state_var)
304 |     print '[+] Usages of the state variable in original basic blocks'
305 |     pprint(original)
306 | 
307 |     # at this point we have all the information to reconstruct the CFG
308 |     CFG = [resolve_cfg_link(bv, mlil, il, backbone) for il in original]
309 |     print '[+] Computed original CFG'
310 |     pprint(CFG)
311 | 
312 |     # patch in all the changes
313 |     print '[+] Patching all discovered links'
314 |     for link in CFG:
315 |         # Clean out instructions we don't need to make space
316 |         blockdata, cave_addr, orig_len = clean_block(bv, mlil, link)
317 | 
318 |         # Add the new instructions and patch, nop the rest of the block
319 |         blockdata += link.gen_asm(bv, cave_addr)
320 |         blockdata = blockdata.ljust(orig_len, safe_asm(bv, 'nop'))
321 |         bv.write(link.block.start, blockdata)
322 | 
323 |     # Do some final cleanup
324 |     print '[+] NOPing backbone'
325 |     nop = safe_asm(bv, 'nop')
326 |     for bb in gather_full_backbone(backbone):
327 |         print '[+] NOPing block: {}'.format(bb)
328 |         bv.write(bb.start, nop * bb.length)
329 | 
330 | 
331 | """
332 | Example CFG:
333 | [<C Link: <block: x86_64@0x4006e7-0x400700> => T: <block: x86_64@0x400700-0x400720>, F: <block: x86_64@0x400735-0x400741>>,
334 |  <U Link: <block: x86_64@0x4006d4-0x4006e7> => <block: x86_64@0x4006e7-0x400700>>,
335 |  <U Link: <block: x86_64@0x400700-0x400720> => <block: x86_64@0x400720-0x400735>>,
336 |  <U Link: <block: x86_64@0x4006b4-0x4006d4> => <block: x86_64@0x400741-0x400749>>,
337 |  <U Link: <block: x86_64@0x400735-0x400741> => <block: x86_64@0x400741-0x400749>>,
338 |  <C Link: <block: x86_64@0x400699-0x4006b4> => T: <block: x86_64@0x4006b4-0x4006d4>, F: <block: x86_64@0x4006d4-0x4006e7>>,
339 |  <U Link: <block: x86_64@0x400720-0x400735> => <block: x86_64@0x4006e7-0x400700>>]
340 | """
341 | 


--------------------------------------------------------------------------------
/plugin.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"plugin": {
 3 | 		"name": "LLVM Deobfuscator",
 4 | 		"type": ["core", "ui"],
 5 | 		"api": "python2",
 6 | 		"description": "Deobfuscator for LLVM-Obfuscator",
 7 | 		"longdescription": "Emits a cleaned binary from one that has been obfuscated via LLVM-obfuscator's control-flow flattening pass",
 8 | 		"license": {
 9 | 			"name": "MIT",
10 | 			"text": "Copyright (c) 2017 Toshi Piazza and Kareem El-Faramawi.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."
11 | 		},
12 | 		"version": "0.1",
13 | 		"author": "Toshi Piazza & Kareem El-Faramawi",
14 | 		"minimumBinaryNinjaVersion": {
15 | 			"dev": "1.1.dev-1616"
16 | 		}
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | from binaryninja import *
 2 | 
 3 | 
 4 | def safe_asm(bv, asm_str):
 5 |     return bv.arch.assemble(asm_str)
 6 | 
 7 | 
 8 | def get_ssa_def(mlil, var):
 9 |     """ Gets the IL that defines var in the SSA form of mlil """
10 |     return mlil.ssa_form.get_ssa_var_definition(var)
11 | 
12 | 
13 | def gather_defs(il, defs):
14 |     """ Walks up a def chain starting at the given il (mlil-ssa)
15 |     until constants are found, gathering all addresses along the way
16 |     """
17 |     defs.add(il.address)
18 |     op = il.operation
19 | 
20 |     if op == MediumLevelILOperation.MLIL_CONST:
21 |         return
22 | 
23 |     if op in [MediumLevelILOperation.MLIL_VAR_SSA_FIELD,
24 |               MediumLevelILOperation.MLIL_VAR_SSA]:
25 |         gather_defs(get_ssa_def(il.function, il.src), defs)
26 | 
27 |     if op == MediumLevelILOperation.MLIL_VAR_PHI:
28 |         for var in il.src:
29 |             gather_defs(get_ssa_def(il.function, var), defs)
30 | 
31 |     if hasattr(il, 'src') and isinstance(il.src, MediumLevelILInstruction):
32 |         gather_defs(il.src, defs)
33 | 
34 | 
35 | def llil_at(bv, addr):
36 |     funcs = bv.get_functions_containing(addr)
37 |     if not funcs:
38 |         return None
39 | 
40 |     return funcs[0].get_low_level_il_at(addr)
41 | 
42 | 
43 | def is_call(bv, addr):
44 |     llil = llil_at(bv, addr)
45 |     if llil is None:
46 |         return False
47 | 
48 |     return llil.operation == LowLevelILOperation.LLIL_CALL
49 | 
50 | 
51 | def get_func_containing(bv, addr):
52 |     """ Finds the function, if any, containing the given address """
53 |     funcs = bv.get_functions_containing(addr)
54 |     return funcs[0] if funcs else None
55 | 


--------------------------------------------------------------------------------