├── .gitignore ├── LICENSE ├── README.md ├── am_graph.py ├── check_deed_loop.py ├── deflat_arm64.py ├── deflat_armeabi.py └── example ├── libOllvmTest.so └── libvdog.so /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deollvm64 2 | this is deobfuscator llvm arm64 script 3 | 4 | ## Refer 5 | 1. [ARM64 OLLVM反混淆(续)](https://bbs.pediy.com/thread-253533.htm) 6 | 1. [利用符号执行去除控制流平坦化](https://security.tencent.com/index.php/blog/msg/112) 7 | 2. [ARM64 OLLVM反混淆](https://bbs.pediy.com/thread-252321.htm) 8 | 3. [deflat](https://github.com/cq674350529/deflat) 9 | -------------------------------------------------------------------------------- /am_graph.py: -------------------------------------------------------------------------------- 1 | 2 | import itertools 3 | from collections import defaultdict 4 | 5 | import networkx 6 | 7 | from angr.knowledge_plugins import Function 8 | 9 | 10 | def grouper(iterable, n, fillvalue=None): 11 | "Collect data into fixed-length chunks or blocks" 12 | args = [iter(iterable)] * n 13 | return itertools.izip_longest(*args, fillvalue=fillvalue) 14 | 15 | 16 | def to_supergraph(transition_graph): 17 | """ 18 | Convert transition graph of a function to a super transition graph. A super transition graph is a graph that looks 19 | like IDA Pro's CFG, where calls to returning functions do not terminate basic blocks. 20 | 21 | :param networkx.DiGraph transition_graph: The transition graph. 22 | :return: A converted super transition graph 23 | :rtype networkx.DiGraph 24 | """ 25 | 26 | # make a copy of the graph 27 | transition_graph = networkx.DiGraph(transition_graph) 28 | 29 | # remove all edges that transitions to outside 30 | for src, dst, data in list(transition_graph.edges(data=True)): 31 | if data['type'] == 'transition' and data.get('outside', False) is True: 32 | transition_graph.remove_edge(src, dst) 33 | if transition_graph.in_degree(dst) == 0: 34 | transition_graph.remove_node(dst) 35 | 36 | edges_to_shrink = set() 37 | 38 | # Find all edges to remove in the super graph 39 | for src in transition_graph.nodes(): 40 | edges = transition_graph[src] 41 | 42 | # there are two types of edges we want to remove: 43 | # - call or fakerets, since we do not want blocks to break at calls 44 | # - boring jumps that directly transfer the control to the block immediately after the current block. this is 45 | # usually caused by how VEX breaks down basic blocks, which happens very often in MIPS 46 | 47 | 48 | 49 | if len(edges) == 1 and src.addr + src.size == next(iter(edges.keys())).addr: 50 | dst = next(iter(edges.keys())) 51 | dst_in_edges = transition_graph.in_edges(dst) 52 | if len(dst_in_edges) == 1: 53 | edges_to_shrink.add((src, dst)) 54 | continue 55 | 56 | if any(iter('type' in data and data['type'] not in ('fake_return', 'call') for data in edges.values())): 57 | continue 58 | 59 | for dst, data in edges.items(): 60 | if isinstance(dst, Function): 61 | continue 62 | if 'type' in data and data['type'] == 'fake_return': 63 | if all(iter('type' in data and data['type'] in ('fake_return', 'return_from_call') 64 | for _, _, data in transition_graph.in_edges(dst, data=True))): 65 | edges_to_shrink.add((src, dst)) 66 | break 67 | 68 | # Create the super graph 69 | super_graph = networkx.DiGraph() 70 | 71 | supernodes_map = {} 72 | 73 | function_nodes = set() # it will be traversed after all other nodes are added into the supergraph 74 | 75 | for node in transition_graph.nodes(): 76 | 77 | if isinstance(node, Function): 78 | function_nodes.add(node) 79 | # don't put functions into the supergraph 80 | continue 81 | 82 | dests_and_data = transition_graph[node] 83 | 84 | # make a super node 85 | if node in supernodes_map: 86 | src_supernode = supernodes_map[node] 87 | else: 88 | src_supernode = SuperCFGNode.from_cfgnode(node) 89 | supernodes_map[node] = src_supernode 90 | # insert it into the graph 91 | super_graph.add_node(src_supernode) 92 | 93 | if not dests_and_data: 94 | # might be an isolated node 95 | continue 96 | 97 | for dst, data in dests_and_data.items(): 98 | 99 | edge = (node, dst) 100 | 101 | if edge in edges_to_shrink: 102 | 103 | if dst in supernodes_map: 104 | dst_supernode = supernodes_map[dst] 105 | else: 106 | dst_supernode = None 107 | 108 | src_supernode.insert_cfgnode(dst) 109 | 110 | # update supernodes map 111 | supernodes_map[dst] = src_supernode 112 | 113 | # merge the other supernode 114 | if dst_supernode is not None: 115 | src_supernode.merge(dst_supernode) 116 | 117 | for src in dst_supernode.cfg_nodes: 118 | supernodes_map[src] = src_supernode 119 | 120 | # link all out edges of dst_supernode to src_supernode 121 | for dst_, data_ in super_graph[dst_supernode].items(): 122 | super_graph.add_edge(src_supernode, dst_, **data_) 123 | 124 | # link all in edges of dst_supernode to src_supernode 125 | for src_, _, data_ in super_graph.in_edges([dst_supernode], data=True): 126 | super_graph.add_edge(src_, src_supernode, **data_) 127 | 128 | if 'type' in data_ and data_['type'] == 'transition': 129 | if not ('ins_addr' in data_ and 'stmt_idx' in data_): 130 | # this is a hack to work around the issue in Function.normalize() where ins_addr and 131 | # stmt_idx weren't properly set onto edges 132 | continue 133 | src_supernode.register_out_branch(data_['ins_addr'], data_['stmt_idx'], data_['type'], 134 | dst_supernode.addr 135 | ) 136 | 137 | super_graph.remove_node(dst_supernode) 138 | 139 | else: 140 | if isinstance(dst, Function): 141 | # skip all functions 142 | continue 143 | 144 | # make a super node 145 | if dst in supernodes_map: 146 | dst_supernode = supernodes_map[dst] 147 | else: 148 | dst_supernode = SuperCFGNode.from_cfgnode(dst) 149 | supernodes_map[dst] = dst_supernode 150 | 151 | super_graph.add_edge(src_supernode, dst_supernode, **data) 152 | 153 | if 'type' in data and data['type'] == 'transition': 154 | if not ('ins_addr' in data and 'stmt_idx' in data): 155 | # this is a hack to work around the issue in Function.normalize() where ins_addr and 156 | # stmt_idx weren't properly set onto edges 157 | continue 158 | src_supernode.register_out_branch(data['ins_addr'], data['stmt_idx'], data['type'], 159 | dst_supernode.addr 160 | ) 161 | 162 | for node in function_nodes: 163 | in_edges = transition_graph.in_edges(node, data=True) 164 | 165 | for src, _, data in in_edges: 166 | if not ('ins_addr' in data and 'stmt_idx' in data): 167 | # this is a hack to work around the issue in Function.normalize() where ins_addr and 168 | # stmt_idx weren't properly set onto edges 169 | continue 170 | supernode = supernodes_map[src] 171 | supernode.register_out_branch(data['ins_addr'], data['stmt_idx'], data['type'], node.addr) 172 | 173 | return super_graph 174 | 175 | 176 | class OutBranch(object): 177 | def __init__(self, ins_addr, stmt_idx, branch_type): 178 | self.ins_addr = ins_addr 179 | self.stmt_idx = stmt_idx 180 | self.type = branch_type 181 | 182 | self.targets = set() 183 | 184 | def __repr__(self): 185 | return "" % (self.ins_addr, self.type) 186 | 187 | def add_target(self, addr): 188 | self.targets.add(addr) 189 | 190 | def merge(self, other): 191 | """ 192 | Merge with the other OutBranch descriptor. 193 | 194 | :param OutBranch other: The other item to merge with. 195 | :return: None 196 | """ 197 | 198 | assert self.ins_addr == other.ins_addr 199 | assert self.type == other.type 200 | 201 | o = self.copy() 202 | o.targets |= other.targets 203 | 204 | return o 205 | 206 | def copy(self): 207 | o = OutBranch(self.ins_addr, self.stmt_idx, self.type) 208 | o.targets = self.targets.copy() 209 | return o 210 | 211 | def __eq__(self, other): 212 | if not isinstance(other, OutBranch): 213 | return False 214 | 215 | return self.ins_addr == other.ins_addr and \ 216 | self.stmt_idx == other.stmt_idx and \ 217 | self.type == other.type and \ 218 | self.targets == other.targets 219 | 220 | def __hash__(self): 221 | return hash((self.ins_addr, self.stmt_idx, self.type)) 222 | 223 | 224 | class SuperCFGNode(object): 225 | def __init__(self, addr): 226 | self.addr = addr 227 | 228 | self.cfg_nodes = [ ] 229 | 230 | self.out_branches = defaultdict(dict) 231 | 232 | @property 233 | def size(self): 234 | return sum(node.size for node in self.cfg_nodes) 235 | 236 | @classmethod 237 | def from_cfgnode(cls, cfg_node): 238 | s = cls(cfg_node.addr) 239 | 240 | s.cfg_nodes.append(cfg_node) 241 | 242 | return s 243 | 244 | def insert_cfgnode(self, cfg_node): 245 | # TODO: Make it binary search/insertion 246 | for i, n in enumerate(self.cfg_nodes): 247 | if cfg_node.addr < n.addr: 248 | # insert before n 249 | self.cfg_nodes.insert(i, cfg_node) 250 | break 251 | elif cfg_node.addr == n.addr: 252 | break 253 | else: 254 | self.cfg_nodes.append(cfg_node) 255 | 256 | # update addr 257 | self.addr = self.cfg_nodes[0].addr 258 | 259 | def register_out_branch(self, ins_addr, stmt_idx, branch_type, target_addr): 260 | if ins_addr not in self.out_branches or stmt_idx not in self.out_branches[ins_addr]: 261 | self.out_branches[ins_addr][stmt_idx] = OutBranch(ins_addr, stmt_idx, branch_type) 262 | 263 | self.out_branches[ins_addr][stmt_idx].add_target(target_addr) 264 | 265 | def merge(self, other): 266 | """ 267 | Merge another supernode into the current one. 268 | 269 | :param SuperCFGNode other: The supernode to merge with. 270 | :return: None 271 | """ 272 | 273 | for n in other.cfg_nodes: 274 | self.insert_cfgnode(n) 275 | 276 | for ins_addr, outs in other.out_branches.items(): 277 | if ins_addr in self.out_branches: 278 | for stmt_idx, item in outs.items(): 279 | if stmt_idx in self.out_branches[ins_addr]: 280 | self.out_branches[ins_addr][stmt_idx].merge(item) 281 | else: 282 | self.out_branches[ins_addr][stmt_idx] = item 283 | 284 | else: 285 | item = next(iter(outs.values())) 286 | self.out_branches[ins_addr][item.stmt_idx] = item 287 | 288 | def __repr__(self): 289 | return "" % (self.addr, len(self.cfg_nodes), 290 | len(self.out_branches) 291 | ) 292 | 293 | def __hash__(self): 294 | return hash(('supercfgnode', self.addr)) 295 | 296 | def __eq__(self, other): 297 | if not isinstance(other, SuperCFGNode): 298 | return False 299 | 300 | return self.addr == other.addr -------------------------------------------------------------------------------- /check_deed_loop.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | 3 | from capstone import * 4 | from capstone.arm import * 5 | 6 | 7 | offset = 0x11D94 #function start 8 | end = 0x14CF8 #function end 9 | 10 | bin = open('libdynamicMono.so','rb').read() 11 | md = Cs(CS_ARCH_ARM, CS_MODE_ARM) 12 | md.detail = True #enable detail analyise 13 | 14 | 15 | for i in md.disasm(bin[offset:end], offset): 16 | print "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str) 17 | for op in i.operands: 18 | if op.type == ARM_OP_IMM and op.value.imm == i.address: 19 | print "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str) 20 | 21 | -------------------------------------------------------------------------------- /deflat_arm64.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | 3 | import collections 4 | import claripy 5 | import angr 6 | import pyvex 7 | import am_graph 8 | from keystone import * 9 | from unicorn import * 10 | from unicorn.arm64_const import * 11 | from capstone import * 12 | from capstone.arm64 import * 13 | 14 | def reg_ctou(regname):# 15 | # This function covert capstone reg name to unicorn reg const. 16 | type1 = regname[0] 17 | if type1 == 'w' or type1 =='x': 18 | idx = int(regname[1:]) 19 | if type1 == 'w': 20 | return idx + UC_ARM64_REG_W0 21 | else: 22 | if idx == 29: 23 | return UC_ARM64_REG_X29 24 | elif idx == 30: 25 | return UC_ARM64_REG_X30 26 | else: 27 | return idx + UC_ARM64_REG_X0 28 | elif regname == 'sp': 29 | return UC_ARM64_REG_SP 30 | return None 31 | 32 | def asm_no_branch(ori,dist): 33 | ks = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN) 34 | print ("patch addr: 0x%x--> b #0x%x" % (ori,dist)) 35 | ins, count = ks.asm(("b #0x%x" % dist),ori) 36 | return ins 37 | 38 | def asm_has_branch(ori,dist1,dist2,cond): 39 | ks = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN) 40 | print "patch addr: 0x%x--> b%s #0x%x;b #0x%x" % (ori,cond,dist1,dist2) 41 | ins, count = ks.asm("b%s #0x%x;b #0x%x" % (cond,dist1,dist2),ori) 42 | return ins 43 | 44 | def get_context(): 45 | global mu 46 | regs = [] 47 | for i in range(31): 48 | idx = UC_ARM64_REG_X0 + i 49 | regs.append(mu.reg_read(idx)) 50 | regs.append(mu.reg_read(UC_ARM64_REG_SP)) 51 | return regs 52 | 53 | def set_context(regs): 54 | global mu 55 | if regs == None: 56 | return 57 | for i in range(31): 58 | idx = UC_ARM64_REG_X0 + i 59 | mu.reg_write(idx,regs[i]) 60 | mu.reg_write(UC_ARM64_REG_SP,regs[31]) 61 | 62 | # callback for memory exception 63 | def hook_mem_access(uc,type,address,size,value,userdata): 64 | pc = uc.reg_read(UC_ARM64_REG_PC) 65 | print 'pc:%x type:%d addr:%x size:%x' % (pc,type,address,size) 66 | #uc.emu_stop() 67 | return False 68 | 69 | def hook_code(uc, address, size, user_data): 70 | 71 | global base 72 | global is_debug 73 | global is_success 74 | global list_trace 75 | global relevant_block_addrs 76 | global next_real_block_addr 77 | global block_start_addr 78 | global branch_control 79 | global list_blocks 80 | 81 | ban_ins = ["bl"] 82 | 83 | if is_success: 84 | mu.emu_stop() 85 | return 86 | 87 | if (address + base) > end: 88 | uc.emu_stop() 89 | return 90 | 91 | for ins in md.disasm(bin[address:address + size], address): 92 | #print(">>> Tracing instruction at 0x%x, instruction size = 0x%x" % (address, size)) 93 | #print(">>> 0x%x:\t%s\t%s" % (ins.address, ins.mnemonic, ins.op_str)) 94 | #print 95 | 96 | if address == 0x96C: 97 | uc.emu_stop() 98 | return 99 | 100 | if (address + base) in relevant_block_addrs: 101 | if list_trace.has_key(address): 102 | print "sssssss" 103 | #ch = raw_input("This maybe a fake block. codesign:%s " % get_code_sign(list_blocks[address])) 104 | uc.emu_stop() 105 | else: 106 | list_trace[address] = 1 107 | 108 | if (address + base) in relevant_block_addrs and address != block_start_addr: 109 | is_success = True 110 | next_real_block_addr = address 111 | #print 'find:%x' % address 112 | uc.emu_stop() 113 | return 114 | 115 | #是否跳过指令 116 | flag_pass = False 117 | for b in ban_ins: 118 | if ins.mnemonic.startswith(b): 119 | flag_pass = True 120 | break 121 | 122 | #只允许对栈的操作 123 | if ins.op_str.find('[') != -1: 124 | if ins.op_str.find('[sp') == -1: 125 | print(">>> 0x%x:\t%s\t%s" % (ins.address, ins.mnemonic, ins.op_str)) 126 | flag_pass = True 127 | for op in ins.operands: 128 | if op.type == ARM64_OP_MEM: 129 | addr = 0 130 | if op.value.mem.base != 0: 131 | addr += mu.reg_read(reg_ctou(ins.reg_name(op.value.mem.base))) 132 | elif op.value.index != 0: 133 | addr += mu.reg_read(reg_ctou(ins.reg_name(op.value.mem.index))) 134 | elif op.value.disp != 0: 135 | addr += op.value.disp 136 | if addr >= 0x80000000 and addr < 0x80000000 + 0x10000 * 8: 137 | flag_pass = False 138 | if flag_pass: 139 | #print("will pass 0x%x:\t%s\t%s" %(ins.address, ins.mnemonic, ins.op_str)) 140 | uc.reg_write(UC_ARM64_REG_PC, address + size) 141 | return 142 | 143 | #breaks 0x31300 144 | if address in [ 0x225AE8 ] or is_debug: 145 | is_debug = True 146 | print("0x%x:\t%s\t%s" % (ins.address, ins.mnemonic, ins.op_str)) 147 | while True: 148 | c = raw_input('>') 149 | if c == '': 150 | break 151 | if c == 's': 152 | uc.emu_stop() 153 | return 154 | if c == 'r': 155 | is_debug = False 156 | break 157 | if c[0] == '!': 158 | reg = reg_ctou(c[1:]) 159 | print "%s=%x (%d)" % (c[1:], mu.reg_read(reg),mu.reg_read(reg)) 160 | continue 161 | 162 | if ins.id == ARM64_INS_RET: 163 | uc.reg_write(UC_ARM64_REG_PC, 0) 164 | is_success = False 165 | print "ret ins.." 166 | mu.emu_stop() 167 | 168 | #ollvm branch 169 | if ins.mnemonic == 'csel': 170 | #print("csel 0x%x:\t%s\t%s" %(ins.address, ins.mnemonic, ins.op_str)) 171 | regs = [reg_ctou(x) for x in ins.op_str.split(', ')] 172 | assert len(regs) == 4 173 | v1 = uc.reg_read(regs[1]) 174 | v2 = uc.reg_read(regs[2]) 175 | if branch_control == 1: 176 | uc.reg_write(regs[0], v1) 177 | else: 178 | uc.reg_write(regs[0], v2) 179 | uc.reg_write(UC_ARM64_REG_PC, address + size) 180 | 181 | 182 | def find_path(start_addr,branch = None): 183 | global real_blocks 184 | global bin 185 | global base 186 | global mu 187 | global list_trace 188 | global block_start_addr 189 | global next_real_block_addr 190 | global is_success 191 | global branch_control 192 | try: 193 | list_trace = {} 194 | block_start_addr = start_addr - base 195 | is_success = False 196 | next_real_block_addr = 0 197 | branch_control = branch 198 | mu.emu_start(start_addr - base, 0x10000) 199 | 200 | except UcError as e: 201 | pc = mu.reg_read(UC_ARM64_REG_PC) 202 | # print ("111 pc:%x" % pc) 203 | if pc != 0: 204 | #mu.reg_write(UC_ARM64_REG_PC, pc + 4) 205 | return find_path(pc + 4, branch) + base 206 | else: 207 | print("ERROR: %s pc:%x" % (e,pc)) 208 | if is_success: 209 | return next_real_block_addr + base 210 | return None 211 | 212 | def fix(bin): 213 | global base 214 | 215 | queue = [start] 216 | check = [] 217 | while len(queue) > 0: 218 | pc = queue.pop() 219 | if pc in check: 220 | continue 221 | check.append(pc) 222 | 223 | node = None 224 | for relevant in relevants: 225 | if relevant.addr == pc: 226 | node = relevant 227 | 228 | block = project.factory.block(pc, size=node.size) 229 | 230 | if(len(flow[pc]) == 2): 231 | ins = block.capstone.insns[-2] 232 | if ins.insn.mnemonic.startswith('csel'): 233 | patch_offset = ins.address - base 234 | branch1 = flow[pc][0] - base 235 | branch2 = flow[pc][1] - base 236 | 237 | opcode = asm_has_branch(patch_offset, branch1, branch2, ins.insn.op_str[-2:]) 238 | op_str = "".join([ chr(i) for i in opcode ]) 239 | bin = bin[:patch_offset] + op_str + bin[patch_offset+8:] 240 | else: 241 | ins = block.capstone.insns[-3] 242 | if ins.insn.mnemonic.startswith('csel'): 243 | patch_offset = ins.address - base 244 | branch_offset = patch_offset + 4 245 | branch1 = flow[pc][0] - base 246 | branch2 = flow[pc][1] - base 247 | 248 | opcode = asm_has_branch(branch_offset, branch1, branch2, ins.insn.op_str[-2:]) 249 | op_str = "".join([ chr(i) for i in opcode ]) 250 | bin = bin[:patch_offset] + bin[branch_offset:branch_offset+4] + op_str + bin[branch_offset+8:] 251 | else: 252 | print "error !!!!!! %x" % (ins.address - base) 253 | raw_input() 254 | 255 | if(len(flow[pc]) == 1): 256 | patch_offset = block.capstone.insns[-1].address - base 257 | branch = flow[pc][0] - base 258 | opcode = asm_no_branch(patch_offset, branch) 259 | op_str = "".join([ chr(i) for i in opcode ]) 260 | bin = bin[:patch_offset] + op_str + bin[patch_offset+4:] 261 | 262 | if(len(flow[pc]) == 0): 263 | #ret block 264 | continue 265 | 266 | for i in flow[pc]: 267 | if i != None: 268 | queue.append(i) 269 | 270 | return bin 271 | 272 | ''' 273 | def symbolic_execution(start_addr, hook_addr=None, state=None, modify=None, inspect=False): 274 | def retn_procedure(state): 275 | global project 276 | ip = state.se.eval(state.regs.ip) 277 | project.unhook(ip) 278 | return 279 | 280 | #只处理真实块的条件分支 281 | def statement_inspect(state): 282 | global modify_value 283 | 284 | #IR 表达式 数组 285 | expressions = list(state.scratch.irsb.statements[state.inspect.statement].expressions) 286 | 287 | state.scratch.irsb.statements[state.inspect.statement].pp() 288 | if len(expressions) != 0 and isinstance(expressions[0], pyvex.expr.ITE): 289 | state.scratch.temps[expressions[0].cond.tmp] = modify_value 290 | #清空statement 291 | #state.inspect._breakpoints['statement'] = [] 292 | 293 | global project, relevant_block_addrs, modify_value 294 | 295 | if state == None: 296 | state = project.factory.blank_state(addr=start_addr, remove_options={angr.sim_options.LAZY_SOLVES}) 297 | if hook_addr != None: 298 | for i in hook_addr: 299 | project.hook(hook_addr, retn_procedure, length=4) 300 | if inspect: 301 | state.inspect.b('statement', when=angr.state_plugins.inspect.BP_BEFORE, action=statement_inspect) 302 | 303 | sm = project.factory.simulation_manager(state) 304 | sm.step() 305 | 306 | while len(sm.active) > 0: 307 | if len(sm.active) != 1: 308 | print 1 309 | for active_state in sm.active: 310 | print hex(active_state.addr) 311 | if active_state.addr in relevant_block_addrs: 312 | #sm.step() 313 | return (active_state.addr, active_state) 314 | sm.step() 315 | ''' 316 | 317 | def get_relevant_nodes(supergraph, node, founded_node): 318 | global relevant_nodes 319 | branch_nodes = list(supergraph.successors(node)) 320 | 321 | if len(branch_nodes) == 1 and branch_nodes[0] in founded_node: 322 | if node in relevant_nodes: 323 | for i in supergraph.predecessors(node): 324 | relevant_nodes.append(i) 325 | else: 326 | relevant_nodes.append(node) 327 | else: 328 | founded_node.append(node) 329 | for i in branch_nodes: 330 | if i not in founded_node: 331 | get_relevant_nodes(supergraph, i, founded_node) 332 | 333 | base = 0x400000 334 | start = 0x2264FC + base 335 | end = 0x2270DC + base 336 | filename = "libtersafe2.so" 337 | new_filename = filename + '.new' 338 | 339 | md = Cs(CS_ARCH_ARM64,CS_MODE_ARM) 340 | md.detail = True 341 | 342 | with open(filename, 'rb') as fp: 343 | bin = fp.read() 344 | 345 | project = angr.Project(filename, load_options={'auto_load_libs': False}) 346 | #cfg = project.analyses.CFGFast(normalize=True) 347 | cfg = project.analyses.CFGFast(normalize=True,regions=[(start, end)]) 348 | #start += project.entry 349 | target_function = cfg.functions.get(start) 350 | 351 | assert target_function != None 352 | 353 | end = start + target_function.size 354 | supergraph = am_graph.to_supergraph(target_function.transition_graph) 355 | 356 | retn_node = None 357 | prologue_node = None #序言块 358 | 359 | 360 | for node in supergraph.nodes(): 361 | if supergraph.in_degree(node) == 0: 362 | prologue_node = node 363 | if supergraph.out_degree(node) == 0: 364 | if retn_node == None: 365 | retn_node = node 366 | elif retn_node != None: 367 | assert len(list(supergraph.predecessors(node))) == 1 368 | assert len(list(supergraph.predecessors(retn_node))) == 1 369 | assert list(supergraph.predecessors(retn_node))[0] == list(supergraph.predecessors(node))[0] 370 | 371 | retn_node = list(supergraph.predecessors(retn_node))[0] 372 | 373 | 374 | if prologue_node is None or prologue_node.addr != start: 375 | print("Something must be wrong...") 376 | exit(0) 377 | 378 | main_dispatcher_node = list(supergraph.successors(prologue_node))[0] 379 | relevant_nodes = [] 380 | get_relevant_nodes(supergraph, main_dispatcher_node, []) 381 | relevant_block_addrs = [(node.addr) for node in relevant_nodes] 382 | 383 | print('*******************relevant blocks************************') 384 | print('prologue: %#x' % start) 385 | print('main_dispatcher: %#x' % main_dispatcher_node.addr) 386 | print('retn: %#x' % retn_node.addr) 387 | print('relevant_blocks:', [hex(addr) for addr in relevant_block_addrs]) 388 | 389 | print('*******************symbolic execution*********************') 390 | relevants = relevant_nodes 391 | relevants.append(prologue_node) 392 | relevants_without_retn = list(relevants) 393 | relevants.append(retn_node) 394 | for i in supergraph.successors(retn_node): 395 | relevants.append(i) 396 | 397 | relevant_block_addrs.extend([prologue_node.addr, retn_node.addr]) 398 | 399 | flow = collections.defaultdict(list) 400 | modify_value = None 401 | patch_instrs = {} 402 | 403 | ''' 404 | state = project.factory.blank_state(addr=prologue_node.addr, remove_options={angr.sim_options.LAZY_SOLVES}) 405 | sm = project.factory.simulation_manager(state) 406 | sm.step() 407 | 408 | queue = [(prologue_node.addr, None)] 409 | 410 | while len(queue) != 0: 411 | env = queue.pop() 412 | 413 | address = env[0] 414 | state = env[1] 415 | 416 | node = None 417 | for relevant in relevants: 418 | if relevant.addr == address: 419 | node = relevant 420 | block = project.factory.block(address, size=node.size) 421 | 422 | if node.addr in flow: 423 | #print "???" 424 | continue 425 | 426 | has_branches = False 427 | hook_addr = [] 428 | 429 | #代码块中有ollvm生成的分支 430 | for ins in block.capstone.insns: 431 | if ins.insn.mnemonic.startswith('csel'): 432 | has_branches = True 433 | elif ins.insn.mnemonic.startswith('bl'): 434 | hook_addr.append(ins.insn.address) 435 | 436 | if has_branches == True: 437 | (p1, next_state) = symbolic_execution(address, hook_addr, state, claripy.BVV(0, 1), True) 438 | (p2, next_state) = symbolic_execution(address, hook_addr, state, claripy.BVV(1, 1), True) 439 | print hex(p1) 440 | print hex(p2) 441 | if p1 != None: 442 | queue.append((p1, next_state)) 443 | flow[node].append(p1) 444 | if p1 == p2: 445 | p2 = None 446 | 447 | if p2 != None: 448 | queue.append((p2, next_state)) 449 | flow[node].append(p2) 450 | else: 451 | (p, next_state) = symbolic_execution(address, hook_addr, state) 452 | print hex(p) 453 | if p != None: 454 | queue.append((p, next_state)) 455 | flow[node].append(p) 456 | ''' 457 | 458 | mu = Uc(UC_ARCH_ARM64, UC_MODE_ARM) 459 | #init stack 460 | mu.mem_map(0x80000000,0x10000 * 8) 461 | # map 4MB memory for this emulation 462 | mu.mem_map(0, 16 * 1024 * 1024) 463 | 464 | # write machine code to be emulated to memory 465 | mu.mem_write(0, bin) 466 | mu.reg_write(UC_ARM64_REG_SP, 0x80000000 + 0x10000 * 6) 467 | mu.hook_add(UC_HOOK_CODE, hook_code) 468 | mu.hook_add(UC_HOOK_MEM_UNMAPPED, hook_mem_access) 469 | 470 | #set function argv 471 | mu.reg_write(UC_ARM64_REG_X2, 1) 472 | 473 | list_trace = {} 474 | is_debug = False 475 | queue = [(start, None)] 476 | 477 | while len(queue) != 0: 478 | 479 | env = queue.pop() 480 | address = env[0] 481 | context = env[1] 482 | 483 | set_context(context) 484 | 485 | if address in flow: 486 | #print "???" 487 | continue 488 | 489 | node = None 490 | for relevant in relevants: 491 | if relevant.addr == address: 492 | node = relevant 493 | 494 | block = project.factory.block(address, size=node.size) 495 | has_branches = False 496 | hook_addr = [] 497 | 498 | #代码块中有ollvm生成的分支 499 | for ins in block.capstone.insns: 500 | if ins.insn.mnemonic.startswith('csel'): 501 | has_branches = True 502 | 503 | #代码块中有ollvm生成的分支 504 | if has_branches: 505 | ctx = get_context() 506 | p1 = find_path(address, 0) 507 | if p1 != None: 508 | queue.append((p1, get_context())) 509 | flow[address].append(p1) 510 | 511 | set_context(ctx) 512 | p2 = find_path(address, 1) 513 | 514 | if p1 == p2: 515 | p2 = None 516 | 517 | if p2 != None: 518 | queue.append((p2, get_context())) 519 | flow[address].append(p2) 520 | else: 521 | p = find_path(address) 522 | if p != None: 523 | queue.append((p, get_context())) 524 | flow[address].append(p) 525 | 526 | print('************************flow******************************') 527 | for k, v in flow.items(): 528 | print('%#x: ' % k, [hex(child) for child in v]) 529 | 530 | print('************************fix******************************') 531 | new_bin = fix(bin) 532 | 533 | ks = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN) 534 | ins, count = ks.asm("nop") 535 | op_nop_str = "".join([ chr(i) for i in ins]) 536 | 537 | for node in supergraph.nodes(): 538 | if node not in relevants: 539 | nop_node = op_nop_str * (node.size / 4) 540 | new_bin = new_bin[:node.addr-base] + nop_node + new_bin[node.addr-base+node.size:] 541 | 542 | with open(new_filename,"wb") as fp: 543 | fp.write(new_bin) -------------------------------------------------------------------------------- /deflat_armeabi.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | 3 | import collections 4 | import claripy 5 | import angr 6 | import pyvex 7 | import am_graph 8 | from keystone import * 9 | from unicorn import * 10 | from unicorn.arm_const import * 11 | from capstone import * 12 | from capstone.arm import * 13 | 14 | 15 | def is_ollvm_branch(index, insn): 16 | if (index + 1) > len(insn): 17 | return False 18 | if insn[index].mnemonic.startswith('movw') and len(insn[index].mnemonic) == 6 and insn[index+1].mnemonic.startswith('movt') and len(insn[index+1].mnemonic) == 6: 19 | return True 20 | return False 21 | 22 | def reg_ctou(regname):# 23 | # This function covert capstone reg name to unicorn reg const. 24 | type1 = regname[0] 25 | if type1 == 'r': 26 | idx = int(regname[1:]) 27 | return idx + UC_ARM_REG_R0 28 | elif regname == 'sp': 29 | return UC_ARM_REG_SP 30 | elif regname == 'pc': 31 | return UC_ARM_REG_PC 32 | elif regname == 'fp': 33 | return UC_ARM_REG_FP 34 | return None 35 | 36 | def asm_no_branch(ori,dist): 37 | global ks 38 | print ("patch addr: 0x%x--> b #0x%x" % (ori,dist)) 39 | ins, count = ks.asm(("b #0x%x" % dist),ori) 40 | return ins 41 | 42 | def asm_has_branch(ori,dist1,dist2,cond): 43 | global ks 44 | print ("patch addr: 0x%x--> b%s #0x%x;b #0x%x" % (ori,cond,dist1,dist2)) 45 | ins, count = ks.asm("b%s #0x%x;b #0x%x" % (cond,dist1,dist2),ori) 46 | return ins 47 | 48 | def get_context(): 49 | global mu 50 | regs = [] 51 | for i in range(12): 52 | idx = UC_ARM_REG_R0 + i 53 | regs.append(mu.reg_read(idx)) 54 | regs.append(mu.reg_read(UC_ARM_REG_SP)) 55 | regs.append(mu.reg_read(UC_ARM_REG_FP)) 56 | regs.append(mu.reg_read(UC_ARM_REG_IP)) 57 | return regs 58 | 59 | def set_context(regs): 60 | global mu 61 | if regs == None: 62 | return 63 | for i in range(12): 64 | idx = UC_ARM_REG_R0 + i 65 | mu.reg_write(idx, regs[i]) 66 | mu.reg_write(UC_ARM_REG_SP, regs[12]) 67 | mu.reg_write(UC_ARM_REG_FP, regs[13]) 68 | mu.reg_write(UC_ARM_REG_IP, regs[14]) 69 | 70 | # callback for memory exception 71 | def hook_mem_access(uc, type, address, size, value, userdata): 72 | pc = uc.reg_read(UC_ARM_REG_PC) 73 | print ('pc:%x type:%d addr:%x size:%x' % (pc,type,address,size)) 74 | #uc.emu_stop() 75 | return False 76 | 77 | def hook_code(uc, address, size, user_data): 78 | 79 | global base 80 | global is_debug 81 | global is_success 82 | global list_trace 83 | global relevant_block_addrs 84 | global next_real_block_addr 85 | global block_start_addr 86 | global branch_control 87 | global list_blocks 88 | 89 | ban_ins = ["bl"] 90 | 91 | if is_success: 92 | mu.emu_stop() 93 | return 94 | 95 | if (address + base) > end: 96 | uc.emu_stop() 97 | return 98 | 99 | for ins in md.disasm(bin[address:address + size], address): 100 | #print(">>> Tracing instruction at 0x%x, instruction size = 0x%x" % (address, size)) 101 | #print(">>> 0x%x:\t%s\t%s" % (ins.address, ins.mnemonic, ins.op_str)) 102 | #print 103 | 104 | if (address + base) in relevant_block_addrs: 105 | if list_trace.has_key(address): 106 | print ("sssssss") 107 | #ch = raw_input("This maybe a fake block. codesign:%s " % get_code_sign(list_blocks[address])) 108 | uc.emu_stop() 109 | else: 110 | list_trace[address] = 1 111 | 112 | if (address + base) in relevant_block_addrs and address != block_start_addr: 113 | is_success = True 114 | next_real_block_addr = address 115 | #print 'find:%x' % address 116 | uc.emu_stop() 117 | return 118 | 119 | #是否跳过指令 120 | flag_pass = False 121 | for b in ban_ins: 122 | if ins.mnemonic.startswith(b): 123 | flag_pass = True 124 | break 125 | 126 | #只允许对栈的操作 127 | if ins.op_str.find('[') != -1: 128 | if ins.op_str.find('[sp') == -1: 129 | flag_pass = True 130 | for op in ins.operands: 131 | if op.type == ARM_OP_MEM: 132 | addr = 0 133 | if op.value.mem.base != 0: 134 | addr += mu.reg_read(reg_ctou(ins.reg_name(op.value.mem.base))) 135 | elif op.value.index != 0: 136 | addr += mu.reg_read(reg_ctou(ins.reg_name(op.value.mem.index))) 137 | elif op.value.disp != 0: 138 | addr += op.value.disp 139 | if addr >= 0x80000000 and addr < 0x80000000 + 0x10000 * 8: 140 | flag_pass = False 141 | if flag_pass: 142 | #print("will pass 0x%x:\t%s\t%s" %(ins.address, ins.mnemonic, ins.op_str)) 143 | uc.reg_write(UC_ARM_REG_PC, address + size) 144 | return 145 | 146 | #breaks 0x31300 147 | if address in [ 0xB72EC ] or is_debug: 148 | is_debug = True 149 | print("0x%x:\t%s\t%s" % (ins.address, ins.mnemonic, ins.op_str)) 150 | while True: 151 | c = raw_input('>') 152 | if c == '': 153 | break 154 | if c == 's': 155 | uc.emu_stop() 156 | return 157 | if c == 'r': 158 | is_debug = False 159 | break 160 | if c[0] == '!': 161 | reg = reg_ctou(c[1:]) 162 | print ("%s=%x (%d)" % (c[1:], mu.reg_read(reg),mu.reg_read(reg))) 163 | continue 164 | 165 | if ins.id == ARM_INS_POP: 166 | uc.reg_write(UC_ARM_REG_PC, -1) 167 | is_success = False 168 | print ("ret ins..") 169 | mu.emu_stop() 170 | 171 | #ollvm branch 172 | if ins.mnemonic.startswith('movw') and len(ins.mnemonic) == 6: 173 | ureg = reg_ctou(ins.reg_name(ins.operands[0].value.reg)) 174 | imm = ins.operands[1].value.imm 175 | 176 | if branch_control == 1: 177 | uc.reg_write(ureg, imm) 178 | uc.reg_write(UC_ARM_REG_PC, address + size) 179 | 180 | if ins.mnemonic.startswith('movt') and len(ins.mnemonic) == 6: 181 | ureg = reg_ctou(ins.reg_name(ins.operands[0].value.reg)) 182 | imm = ins.operands[1].value.imm 183 | 184 | v1 = uc.reg_read(ureg) 185 | v1 = v1 | (imm << 16) 186 | 187 | if branch_control == 1: 188 | uc.reg_write(ureg, v1) 189 | uc.reg_write(UC_ARM_REG_PC, address + size) 190 | 191 | def find_path(start_addr,branch = None): 192 | global real_blocks 193 | global bin 194 | global base 195 | global mu 196 | global list_trace 197 | global block_start_addr 198 | global next_real_block_addr 199 | global is_success 200 | global branch_control 201 | try: 202 | list_trace = {} 203 | block_start_addr = start_addr - base 204 | is_success = False 205 | next_real_block_addr = 0 206 | branch_control = branch 207 | mu.emu_start(start_addr - base, 0x10000) 208 | 209 | except UcError as e: 210 | pc = mu.reg_read(UC_ARM_REG_PC) 211 | # print ("111 pc:%x" % pc) 212 | if pc > 0 and pc < len(bin): 213 | pc = pc + base + 4 214 | #mu.reg_write(UC_ARM64_REG_PC, pc + 4) 215 | return find_path(pc, branch) 216 | else: 217 | print("ERROR: %s pc:%x" % (e,pc)) 218 | if is_success: 219 | return next_real_block_addr + base 220 | return None 221 | 222 | def fix(bin): 223 | global base 224 | 225 | queue = [start] 226 | check = [] 227 | while len(queue) > 0: 228 | pc = queue.pop() 229 | if pc in check: 230 | continue 231 | check.append(pc) 232 | 233 | node = None 234 | for relevant in relevants: 235 | if relevant.addr == pc: 236 | node = relevant 237 | 238 | block = project.factory.block(pc, size=node.size) 239 | 240 | if(len(flow[pc]) == 2): 241 | founded = False 242 | 243 | for index in range(len(block.capstone.insns)-1, -1, -1): 244 | ins = block.capstone.insns[index] 245 | 246 | if is_ollvm_branch(index, block.capstone.insns): 247 | patch_offset = ins.address - base 248 | branch1 = flow[pc][0] - base 249 | branch2 = flow[pc][1] - base 250 | cond = ins.insn.mnemonic[-2:] 251 | opcode = asm_has_branch(patch_offset, branch1, branch2, cond) 252 | op_str = "".join([ chr(i) for i in opcode ]) 253 | 254 | bin = bin[:patch_offset] + op_str + bin[patch_offset+len(op_str):] 255 | founded = True 256 | 257 | break 258 | #ins, count = ks.asm("nop") 259 | #op_str += "".join([ chr(i) for i in ins]) 260 | 261 | if founded == True: 262 | for i in range(index+2, len(block.capstone.insns)): 263 | ins = block.capstone.insns[i] 264 | patch_offset = ins.address - base 265 | if ins.id == ARM_INS_B: 266 | ins, count = ks.asm("nop") 267 | op_nop_str = "".join([ chr(i) for i in ins]) 268 | bin = bin[:patch_offset] + op_nop_str + bin[patch_offset+len(op_nop_str):] 269 | else: 270 | swap_offset = patch_offset - 8 271 | 272 | opcode = asm_has_branch(swap_offset+ins.size, branch1, branch2, cond) 273 | op_str = "".join([ chr(i) for i in opcode ]) 274 | 275 | bin = bin[:swap_offset] + bin[patch_offset:patch_offset+ins.size] + op_str + bin[patch_offset+ins.size:] 276 | 277 | ''' 278 | opcode = ks.asm(ins.op_str) + asm_has_branch(swap_offset+ins.size, branch1, branch2, cond) 279 | op_str = "".join([ chr(i) for i in opcode ]) 280 | 281 | bin = bin[:swap_offset] + op_str + bin[patch_offset+ins.size:] 282 | ''' 283 | else: 284 | print ("error !!!!!!") 285 | raw_input() 286 | 287 | if(len(flow[pc]) == 1): 288 | patch_offset = block.capstone.insns[-1].address - base 289 | branch = flow[pc][0] - base 290 | opcode = asm_no_branch(patch_offset, branch) 291 | op_str = "".join([ chr(i) for i in opcode ]) 292 | bin = bin[:patch_offset] + op_str + bin[patch_offset+len(op_str):] 293 | 294 | if(len(flow[pc]) == 0): 295 | #ret block 296 | continue 297 | 298 | 299 | for i in flow[pc]: 300 | if i != None: 301 | queue.append(i) 302 | 303 | return bin 304 | 305 | ''' 306 | def symbolic_execution(start_addr, hook_addr=None, state=None, modify=None, inspect=False): 307 | def retn_procedure(state): 308 | global project 309 | ip = state.se.eval(state.regs.ip) 310 | project.unhook(ip) 311 | return 312 | 313 | #只处理真实块的条件分支 314 | def statement_inspect(state): 315 | global modify_value 316 | 317 | #IR 表达式 数组 318 | expressions = list(state.scratch.irsb.statements[state.inspect.statement].expressions) 319 | 320 | state.scratch.irsb.statements[state.inspect.statement].pp() 321 | if len(expressions) != 0 and isinstance(expressions[0], pyvex.expr.ITE): 322 | state.scratch.temps[expressions[0].cond.tmp] = modify_value 323 | #清空statement 324 | #state.inspect._breakpoints['statement'] = [] 325 | 326 | global project, relevant_block_addrs, modify_value 327 | 328 | if state == None: 329 | state = project.factory.blank_state(addr=start_addr, remove_options={angr.sim_options.LAZY_SOLVES}) 330 | if hook_addr != None: 331 | for i in hook_addr: 332 | project.hook(hook_addr, retn_procedure, length=4) 333 | if inspect: 334 | state.inspect.b('statement', when=angr.state_plugins.inspect.BP_BEFORE, action=statement_inspect) 335 | 336 | sm = project.factory.simulation_manager(state) 337 | sm.step() 338 | 339 | while len(sm.active) > 0: 340 | if len(sm.active) != 1: 341 | print 1 342 | for active_state in sm.active: 343 | print hex(active_state.addr) 344 | if active_state.addr in relevant_block_addrs: 345 | #sm.step() 346 | return (active_state.addr, active_state) 347 | sm.step() 348 | ''' 349 | 350 | def get_relevant_nodes(supergraph, node, founded_node): 351 | global relevant_nodes 352 | branch_nodes = list(supergraph.successors(node)) 353 | 354 | if len(branch_nodes) == 1 and branch_nodes[0] in founded_node: 355 | if node in relevant_nodes: 356 | for i in supergraph.predecessors(node): 357 | relevant_nodes.append(i) 358 | else: 359 | relevant_nodes.append(node) 360 | else: 361 | founded_node.append(node) 362 | for i in branch_nodes: 363 | if i not in founded_node: 364 | get_relevant_nodes(supergraph, i, founded_node) 365 | 366 | base = 0x400000 367 | start = 0x11D94 + base 368 | end = 0x14CF8 + base 369 | 370 | filename = "libdynamicMono.so" 371 | new_filename = filename + '.new' 372 | 373 | md = Cs(CS_ARCH_ARM, CS_MODE_ARM) 374 | ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) 375 | md.detail = True 376 | 377 | with open(filename, 'rb') as fp: 378 | bin = fp.read() 379 | 380 | project = angr.Project(filename, load_options={'auto_load_libs': False}) 381 | #cfg = project.analyses.CFGAccurate(context_sensitivity_level=3,keep_state=True,starts=[start]) 382 | cfg = project.analyses.CFGFast(normalize=True,regions=[(start, end)]) 383 | #start += project.entry 384 | target_function = cfg.functions.get(start) 385 | 386 | assert target_function != None 387 | 388 | supergraph = am_graph.to_supergraph(target_function.transition_graph) 389 | 390 | retn_node = None 391 | prologue_node = None #序言块 392 | 393 | for node in supergraph.nodes(): 394 | if supergraph.in_degree(node) == 0: 395 | prologue_node = node 396 | if supergraph.out_degree(node) == 0: 397 | if retn_node == None: 398 | retn_node = node 399 | elif retn_node != None: 400 | assert len(list(supergraph.predecessors(node))) == 1 401 | assert len(list(supergraph.predecessors(retn_node))) == 1 402 | assert list(supergraph.predecessors(retn_node))[0] == list(supergraph.predecessors(node))[0] 403 | 404 | retn_node = list(supergraph.predecessors(retn_node))[0] 405 | 406 | 407 | if prologue_node is None or prologue_node.addr != start: 408 | print("Something must be wrong...") 409 | exit(0) 410 | 411 | main_dispatcher_node = list(supergraph.successors(prologue_node))[0] 412 | relevant_nodes = [] 413 | get_relevant_nodes(supergraph, main_dispatcher_node, []) 414 | relevant_block_addrs = [(node.addr) for node in relevant_nodes] 415 | 416 | print('*******************relevant blocks************************') 417 | print('prologue: %#x' % start) 418 | print('main_dispatcher: %#x' % main_dispatcher_node.addr) 419 | print('retn: %#x' % retn_node.addr) 420 | print('relevant_blocks:', [hex(addr) for addr in relevant_block_addrs]) 421 | 422 | print('*******************symbolic execution*********************') 423 | relevants = relevant_nodes 424 | relevants.append(prologue_node) 425 | relevants_without_retn = list(relevants) 426 | relevants.append(retn_node) 427 | for i in supergraph.successors(retn_node): 428 | relevants.append(i) 429 | 430 | relevant_block_addrs.extend([prologue_node.addr, retn_node.addr]) 431 | 432 | flow = collections.defaultdict(list) 433 | modify_value = None 434 | patch_instrs = {} 435 | 436 | ''' 437 | state = project.factory.blank_state(addr=prologue_node.addr, remove_options={angr.sim_options.LAZY_SOLVES}) 438 | sm = project.factory.simulation_manager(state) 439 | sm.step() 440 | 441 | queue = [(prologue_node.addr, None)] 442 | 443 | while len(queue) != 0: 444 | env = queue.pop() 445 | 446 | address = env[0] 447 | state = env[1] 448 | 449 | node = None 450 | for relevant in relevants: 451 | if relevant.addr == address: 452 | node = relevant 453 | block = project.factory.block(address, size=node.size) 454 | 455 | if node.addr in flow: 456 | #print "???" 457 | continue 458 | 459 | has_branches = False 460 | hook_addr = [] 461 | 462 | #代码块中有ollvm生成的分支 463 | for ins in block.capstone.insns: 464 | if ins.insn.mnemonic.startswith('csel'): 465 | has_branches = True 466 | elif ins.insn.mnemonic.startswith('bl'): 467 | hook_addr.append(ins.insn.address) 468 | 469 | if has_branches == True: 470 | (p1, next_state) = symbolic_execution(address, hook_addr, state, claripy.BVV(0, 1), True) 471 | (p2, next_state) = symbolic_execution(address, hook_addr, state, claripy.BVV(1, 1), True) 472 | print hex(p1) 473 | print hex(p2) 474 | if p1 != None: 475 | queue.append((p1, next_state)) 476 | flow[node].append(p1) 477 | if p1 == p2: 478 | p2 = None 479 | 480 | if p2 != None: 481 | queue.append((p2, next_state)) 482 | flow[node].append(p2) 483 | else: 484 | (p, next_state) = symbolic_execution(address, hook_addr, state) 485 | print hex(p) 486 | if p != None: 487 | queue.append((p, next_state)) 488 | flow[node].append(p) 489 | ''' 490 | 491 | mu = Uc(UC_ARCH_ARM, UC_MODE_ARM) 492 | #init stack 493 | mu.mem_map(0x80000000,0x10000 * 8) 494 | # map 4MB memory for this emulation 495 | mu.mem_map(0, 4 * 1024 * 1024) 496 | 497 | # write machine code to be emulated to memory 498 | mu.mem_write(0, bin) 499 | mu.reg_write(UC_ARM_REG_SP, 0x80000000 + 0x10000 * 6) 500 | mu.hook_add(UC_HOOK_CODE, hook_code) 501 | mu.hook_add(UC_HOOK_MEM_UNMAPPED, hook_mem_access) 502 | 503 | list_trace = {} 504 | is_debug = False 505 | queue = [(start, None)] 506 | 507 | while len(queue) != 0: 508 | 509 | env = queue.pop() 510 | address = env[0] 511 | context = env[1] 512 | 513 | set_context(context) 514 | 515 | if address in flow: 516 | #print "???" 517 | continue 518 | 519 | node = None 520 | for relevant in relevants: 521 | if relevant.addr == address: 522 | node = relevant 523 | 524 | block = project.factory.block(address, size=node.size) 525 | has_branches = False 526 | hook_addr = [] 527 | 528 | #代码块中有ollvm生成的分支 529 | for index in range(len(block.capstone.insns)): 530 | if is_ollvm_branch(index, block.capstone.insns): 531 | has_branches = True 532 | 533 | #代码块中有ollvm生成的分支 534 | if has_branches: 535 | ctx = get_context() 536 | p1 = find_path(address, 0) 537 | if p1 != None: 538 | queue.append((p1, get_context())) 539 | flow[address].append(p1) 540 | 541 | set_context(ctx) 542 | p2 = find_path(address, 1) 543 | 544 | if p1 == p2: 545 | p2 = None 546 | 547 | if p2 != None: 548 | queue.append((p2, get_context())) 549 | flow[address].append(p2) 550 | else: 551 | p = find_path(address) 552 | if p != None: 553 | queue.append((p, get_context())) 554 | flow[address].append(p) 555 | 556 | print('************************flow******************************') 557 | for k, v in flow.items(): 558 | print('%#x: ' % k, [hex(child) for child in v]) 559 | 560 | print('************************fix******************************') 561 | new_bin = fix(bin) 562 | 563 | ins, count = ks.asm("nop") 564 | op_nop_str = "".join([ chr(i) for i in ins]) 565 | 566 | for node in supergraph.nodes(): 567 | if node not in relevants: 568 | nop_node = op_nop_str * (node.size / 4) 569 | new_bin = new_bin[:node.addr-base] + nop_node + new_bin[node.addr-base+node.size:] 570 | 571 | with open(new_filename,"wb") as fp: 572 | fp.write(new_bin) -------------------------------------------------------------------------------- /example/libOllvmTest.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeT1t/deollvm64/588a9f4c25e0ee2313a7e572c3d72bc9c66647f6/example/libOllvmTest.so -------------------------------------------------------------------------------- /example/libvdog.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeT1t/deollvm64/588a9f4c25e0ee2313a7e572c3d72bc9c66647f6/example/libvdog.so --------------------------------------------------------------------------------