├── IDAGenDFG.py ├── LICENSE ├── README.md ├── libdataflow.py ├── testcase ├── test ├── test-idapython.py ├── test-miasm.py ├── test.cpp └── test.dump └── utils.py /IDAGenDFG.py: -------------------------------------------------------------------------------- 1 | import idaapi 2 | import idautils 3 | import idc 4 | import ida_pro 5 | import ida_auto 6 | import os, sys 7 | from libdataflow import ida_dataflow_analysis 8 | from argparse import ArgumentParser 9 | 10 | def main(OUTPUT_DIR:str) -> None: 11 | os.makedirs(OUTPUT_DIR, exist_ok=True) 12 | 13 | textStartEA = 0 14 | textEndEA = 0 15 | for seg in idautils.Segments(): 16 | if (idc.get_segm_name(seg)==".text"): 17 | textStartEA = idc.get_segm_start(seg) 18 | textEndEA = idc.get_segm_end(seg) 19 | break 20 | 21 | for func in idautils.Functions(textStartEA, textEndEA): 22 | # Ignore Library Code 23 | flags = idc.get_func_attr(func, idc.FUNCATTR_FLAGS) 24 | if flags & idc.FUNC_LIB: 25 | print(hex(func), "FUNC_LIB", idc.get_func_name(func)) 26 | continue 27 | try: 28 | ida_dataflow_analysis(func, idc.get_func_name(func), OUTPUT_DIR, defuse_only=True) 29 | except Exception as e: 30 | print('Skip function {} due to dataflow analysis error: {}'.format(idc.get_func_name(func),e)) 31 | 32 | if __name__ == '__main__': 33 | if len(idc.ARGV) < 2: 34 | print('\n\nGenerating DFG & Def-Use Graph with IDA Pro and MIASM') 35 | print('\tNeed to specify the output dir with -o option') 36 | print('\tUsage: /path/to/ida -A -Lida.log -S"{} -o " /path/to/binary\n\n'.format(idc.ARGV[0])) 37 | ida_pro.qexit(1) 38 | 39 | parser = ArgumentParser(description="IDAPython script for generating dataflow graph of each function in the given binary") 40 | parser.add_argument("-o", "--output_dir", help="Output dir", default='./outputs', nargs='?') 41 | # parser.add_argument("-s", "--symb", help="Symbolic execution mode", 42 | # action="store_true") 43 | args = parser.parse_args() 44 | 45 | ida_auto.auto_wait() 46 | 47 | main(args.output_dir) 48 | 49 | ida_pro.qexit(0) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | ida-dfg 294 | Copyright (C) 2022 wenyu zhu 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataFlowAnalysis-miasm 2 | 3 | Data-flow graph & Def-use graph generator 4 | 5 | Tested with IDA Pro 7.6 and miasm 7ee593d 6 | 7 | ## libdataflow.py 8 | 9 | 封装了两个核心接口给其他脚本用 10 | - `ida_dataflow_analysis`: 面向IDA + MIASM的场景 11 | - `miasm_dataflow_analysis`: 单独使用,不需要IDA Pro 12 | 13 | ## IDAGenDFG.py 14 | 15 | IDAPython调用的脚本 16 | 17 | `/path/to/ida -A -Lida.log -S"path/to/IDAGenDFG.py -o " /path/to/binary` 18 | 19 | ## deprecated/graph_dataflow.py 20 | 21 | 新版miasm支持的DFG/ReachinDefinition/DefUse分析 22 | 23 | ## deprecated/libdfg.py 24 | 25 | 代码升级 & debug工作停止,因为新版miasm自身支持dfg生成。 26 | 27 | 但是这部分代码的价值在于学习如何将miasm用到IDAPython里,详见`dataflow_analysis`函数。 28 | 29 | 30 | 31 | ## miasm的一些核心概念: 32 | - machine类: 定义架构、反汇编引擎、lifter 33 | - LocationDB类:各类数据结构的loc_key(unique id),例如AsmBlock, IRBlock的loc_key;以及定义了offset和loc_key相互转换的函数 34 | - Instruction类:可以在miasm.core.cpu内查看其成员函数、变量 35 | - AsmCFG类、AsmBlock类:汇编控制流图、基本块 36 | - IRBlock类、AssignBlock类:AsmBlock经Lifter翻译得到IRBlock,每一个IRBlock有若干个AssignBlock 37 | * 每个AssignBlock对应一条IR赋值语句(src -> dst),同时也可以对应回一条汇编指令(assignblk.instr) 38 | 39 | ## miasm的局限性 40 | 41 | - 反汇编较慢 42 | - 无法处理80bit浮点数 43 | -------------------------------------------------------------------------------- /libdataflow.py: -------------------------------------------------------------------------------- 1 | import os 2 | from future.utils import viewitems, viewvalues 3 | from utils import guess_machine 4 | 5 | from miasm.analysis.binary import Container 6 | from miasm.analysis.machine import Machine 7 | from miasm.expression.expression import get_expr_mem 8 | from miasm.analysis.data_analysis import inter_block_flow #, intra_block_flow_raw 9 | from miasm.core.graph import DiGraph 10 | from miasm.ir.symbexec import SymbolicExecutionEngine 11 | from miasm.analysis.data_flow import DeadRemoval, ReachingDefinitions, DiGraphDefUse 12 | from miasm.core.locationdb import LocationDB 13 | from miasm.core.bin_stream_ida import bin_stream_ida 14 | 15 | def intra_block_flow_symb(lifter, _, flow_graph, irblock, in_nodes, out_nodes): 16 | symbols_init = lifter.arch.regs.regs_init.copy() 17 | sb = SymbolicExecutionEngine(lifter, symbols_init) 18 | sb.eval_updt_irblock(irblock) 19 | print('*' * 40) 20 | print(irblock) 21 | 22 | 23 | out = sb.modified(mems=False) 24 | current_nodes = {} 25 | # Gen mem arg to mem node links 26 | for dst, src in out: 27 | src = sb.eval_expr(dst) 28 | for n in [dst, src]: 29 | 30 | all_mems = set() 31 | all_mems.update(get_expr_mem(n)) 32 | 33 | for n in all_mems: 34 | node_n_w = (irblock.loc_key, 0, n) 35 | if not n == src: 36 | continue 37 | o_r = n.ptr.get_r(mem_read=False, cst_read=True) 38 | for i, n_r in enumerate(o_r): 39 | if n_r in current_nodes: 40 | node_n_r = current_nodes[n_r] 41 | else: 42 | node_n_r = (irblock.loc_key, i, n_r) 43 | if not n_r in in_nodes: 44 | in_nodes[n_r] = node_n_r 45 | flow_graph.add_uniq_edge(node_n_r, node_n_w) 46 | 47 | # Gen data flow links 48 | for dst in out: 49 | src = sb.eval_expr(dst) 50 | nodes_r = src.get_r(mem_read=False, cst_read=True) 51 | nodes_w = set([dst]) 52 | for n_r in nodes_r: 53 | if n_r in current_nodes: 54 | node_n_r = current_nodes[n_r] 55 | else: 56 | node_n_r = (irblock.loc_key, 0, n_r) 57 | if not n_r in in_nodes: 58 | in_nodes[n_r] = node_n_r 59 | 60 | flow_graph.add_node(node_n_r) 61 | for n_w in nodes_w: 62 | node_n_w = (irblock.loc_key, 1, n_w) 63 | out_nodes[n_w] = node_n_w 64 | 65 | flow_graph.add_node(node_n_w) 66 | flow_graph.add_uniq_edge(node_n_r, node_n_w) 67 | 68 | 69 | 70 | def intra_block_flow_raw(lifter, ircfg, flow_graph, irb, in_nodes, out_nodes): 71 | """ 72 | Create data flow for an irbloc using raw IR expressions 73 | """ 74 | current_nodes = {} 75 | for i, assignblk in enumerate(irb): 76 | dict_rw = assignblk.get_rw(cst_read=True) 77 | current_nodes.update(out_nodes) 78 | 79 | # gen mem arg to mem node links 80 | all_mems = set() 81 | for node_w, nodes_r in viewitems(dict_rw): 82 | for n in nodes_r.union([node_w]): 83 | all_mems.update(get_expr_mem(n)) 84 | if not all_mems: 85 | continue 86 | 87 | for n in all_mems: 88 | node_n_w = (hex(assignblk.instr.offset), i, n) 89 | if not n in nodes_r: 90 | continue 91 | o_r = n.ptr.get_r(mem_read=False, cst_read=True) 92 | for n_r in o_r: 93 | if n_r in current_nodes: 94 | node_n_r = current_nodes[n_r] 95 | else: 96 | node_n_r = (hex(assignblk.instr.offset), i, n_r) 97 | current_nodes[n_r] = node_n_r 98 | in_nodes[n_r] = node_n_r 99 | flow_graph.add_uniq_edge(node_n_r, node_n_w) 100 | 101 | # gen data flow links 102 | for node_w, nodes_r in viewitems(dict_rw): 103 | for n_r in nodes_r: 104 | if n_r in current_nodes: 105 | node_n_r = current_nodes[n_r] 106 | else: 107 | node_n_r = (hex(assignblk.instr.offset), i, n_r) 108 | current_nodes[n_r] = node_n_r 109 | in_nodes[n_r] = node_n_r 110 | 111 | flow_graph.add_node(node_n_r) 112 | 113 | node_n_w = (hex(assignblk.instr.offset), i + 1, node_w) 114 | out_nodes[node_w] = node_n_w 115 | 116 | flow_graph.add_node(node_n_w) 117 | flow_graph.add_uniq_edge(node_n_r, node_n_w) 118 | 119 | 120 | 121 | def node2str(node): 122 | out = "%s,%s\\l\\\n%s" % node 123 | return out 124 | 125 | 126 | def gen_function_data_flow_graph(lifter, ircfg, ad, block_flow_cb) -> DiGraph: 127 | ''' 128 | generate data flow graph for a given function 129 | ''' 130 | irblock_0 = None 131 | for irblock in viewvalues(ircfg.blocks): 132 | loc_key = irblock.loc_key 133 | offset = ircfg.loc_db.get_location_offset(loc_key) 134 | # print('{} -> {}'.format(hex(offset), irblock.loc_key)) 135 | if offset == ad: 136 | irblock_0 = irblock 137 | break 138 | assert irblock_0 is not None 139 | flow_graph = DiGraph() 140 | flow_graph.node2str = node2str 141 | 142 | 143 | irb_in_nodes = {} 144 | irb_out_nodes = {} 145 | for label in ircfg.blocks: 146 | irb_in_nodes[label] = {} 147 | irb_out_nodes[label] = {} 148 | 149 | for label, irblock in viewitems(ircfg.blocks): 150 | block_flow_cb(lifter, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) 151 | 152 | # for label in ircfg.blocks: 153 | # print(label) 154 | # print('IN', [str(x) for x in irb_in_nodes[label]]) 155 | # print('OUT', [str(x) for x in irb_out_nodes[label]]) 156 | 157 | # print('*' * 20, 'interblock', '*' * 20) 158 | inter_block_flow(lifter, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) 159 | 160 | return flow_graph 161 | 162 | 163 | def ida_dataflow_analysis(function_addr:int, function_name:str, output_dir:str, defuse_only: bool = False) -> None: 164 | 165 | loc_db = LocationDB() 166 | 167 | ###################### IDA specific ####################### 168 | machine = guess_machine() 169 | bin_stream = bin_stream_ida() 170 | 171 | # Populate symbols with ida names 172 | import idautils 173 | for ad, name in idautils.Names(): 174 | if name is None: 175 | continue 176 | loc_db.add_location(name, ad) 177 | 178 | 179 | ###################### Reverse-tool-independent ###################### 180 | 181 | mdis = machine.dis_engine(bin_stream, loc_db=loc_db, dont_dis_nulstart_bloc=True) 182 | mdis.follow_call = True 183 | lifter = machine.lifter_model_call(loc_db=loc_db) 184 | 185 | print('disassembling function: {}:{}'.format(hex(function_addr), function_name)) 186 | asmcfg = mdis.dis_multiblock(function_addr) 187 | 188 | print('generating IR...') 189 | ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) 190 | deadrm = DeadRemoval(lifter) 191 | # deadrm(ircfg) # TODO: 这里会删掉一部分IR,需要研究一下 192 | 193 | with open(os.path.join(output_dir, '{}.asm2ir'.format(function_name)),'w') as f: 194 | # print('\tOFFSET\t| ASM\t| SRC -> DST') 195 | f.write('\tOFFSET\t| ASM\t| SRC -> DST\n') 196 | for lbl, irblock in ircfg.blocks.items(): 197 | insr = [] 198 | for assignblk in irblock: 199 | for dst, src in assignblk.iteritems(): 200 | # print('\t{}\t| {}\t| {} -> {}'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) 201 | f.write('\t{}\t| {}\t| {} -> {}\n'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) 202 | 203 | if not defuse_only: 204 | block_flow_cb = intra_block_flow_raw # if args.symb else intra_block_flow_symb 205 | 206 | dfg = gen_function_data_flow_graph(lifter, ircfg, function_addr, block_flow_cb) 207 | open(os.path.join(output_dir,'{}_dfg.dot'.format(function_name)), 'w').write(dfg.dot()) 208 | 209 | reaching_defs = ReachingDefinitions(ircfg) 210 | defuse = DiGraphDefUse(reaching_defs) 211 | open(os.path.join(output_dir,'{}_defuse.dot'.format(function_name)), 'w').write(defuse.dot()) 212 | 213 | ''' 214 | 根据block_loc_key + assignblk_idx 可以推算出instr offset,所以这个def-use图也是可以对应回指令的 215 | ''' 216 | LocKeyIdx2InstrOffset = {} 217 | for block in viewvalues(reaching_defs.ircfg.blocks): 218 | for index, assignblk in enumerate(block): 219 | LocKeyIdx2InstrOffset['{}_{}'.format(block.loc_key, index)] = hex(assignblk.instr.offset) 220 | 221 | # print(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset]) 222 | open(os.path.join(output_dir,'{}_LocKeyIdx2InstrOffset.map'.format(function_name)), 'w').write( 223 | '\n'.join(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset])) 224 | 225 | 226 | def miasm_dataflow_analysis(function_addr:int, function_name:str, output_dir:str, filepath:str, arch:str = "X86_64", defuse_only: bool = False) -> None: 227 | 228 | bin_stream = Container.from_stream(open(filepath, 'rb'), loc_db).bin_stream 229 | machine = Machine(arch) 230 | 231 | loc_db = LocationDB() 232 | mdis = machine.dis_engine(bin_stream, loc_db=loc_db, dont_dis_nulstart_bloc=True) 233 | mdis.follow_call = True 234 | lifter = machine.lifter_model_call(loc_db=loc_db) 235 | 236 | print('disassembling function: {}:{}'.format(hex(function_addr), function_name)) 237 | asmcfg = mdis.dis_multiblock(function_addr) 238 | 239 | print('generating IR...') 240 | ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) 241 | deadrm = DeadRemoval(lifter) 242 | # deadrm(ircfg) # TODO: 这里会删掉一部分IR,需要研究一下 243 | 244 | with open(os.path.join(output_dir, '{}.asm2ir'.format(function_name)),'w') as f: 245 | # print('\tOFFSET\t| ASM\t| SRC -> DST') 246 | f.write('\tOFFSET\t| ASM\t| SRC -> DST\n') 247 | for lbl, irblock in ircfg.blocks.items(): 248 | insr = [] 249 | for assignblk in irblock: 250 | for dst, src in assignblk.iteritems(): 251 | # print('\t{}\t| {}\t| {} -> {}'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) 252 | f.write('\t{}\t| {}\t| {} -> {}\n'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) 253 | 254 | if not defuse_only: 255 | block_flow_cb = intra_block_flow_raw # if args.symb else intra_block_flow_symb 256 | 257 | dfg = gen_function_data_flow_graph(lifter, ircfg, function_addr, block_flow_cb) 258 | open(os.path.join(output_dir,'{}_dfg.dot'.format(function_name)), 'w').write(dfg.dot()) 259 | 260 | reaching_defs = ReachingDefinitions(ircfg) 261 | defuse = DiGraphDefUse(reaching_defs) 262 | open(os.path.join(output_dir,'{}_defuse.dot'.format(function_name)), 'w').write(defuse.dot()) 263 | 264 | ''' 265 | 根据block_loc_key + assignblk_idx 可以推算出instr offset,所以这个def-use图也是可以对应回指令的 266 | ''' 267 | LocKeyIdx2InstrOffset = {} 268 | for block in viewvalues(reaching_defs.ircfg.blocks): 269 | for index, assignblk in enumerate(block): 270 | LocKeyIdx2InstrOffset['{}_{}'.format(block.loc_key, index)] = hex(assignblk.instr.offset) 271 | 272 | # print(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset]) 273 | open(os.path.join(output_dir,'{}_LocKeyIdx2InstrOffset.map'.format(function_name)), 'w').write( 274 | '\n'.join(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset])) 275 | 276 | 277 | -------------------------------------------------------------------------------- /testcase/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Learner0x5a/DataFlowAnalysis-miasm/c9461672af8eab4016eb407e12b9da3d16d377cc/testcase/test -------------------------------------------------------------------------------- /testcase/test-idapython.py: -------------------------------------------------------------------------------- 1 | import idc 2 | import idautils 3 | import idaapi 4 | import ida_pro 5 | import ida_auto 6 | ida_auto.auto_wait() 7 | 8 | 9 | for func in idautils.Functions(): 10 | 11 | func_name = idc.get_func_name(func) 12 | print(hex(func),':',func_name) 13 | 14 | 15 | 16 | 17 | ida_pro.qexit(0) 18 | -------------------------------------------------------------------------------- /testcase/test-miasm.py: -------------------------------------------------------------------------------- 1 | import idc 2 | import idautils 3 | import idaapi 4 | import ida_pro 5 | import ida_auto 6 | ida_auto.auto_wait() 7 | 8 | 9 | from miasm.analysis.binary import Container 10 | from miasm.core.asmblock import log_asmblock, AsmCFG 11 | from miasm.core.interval import interval 12 | from miasm.analysis.machine import Machine 13 | from miasm.analysis.data_flow import \ 14 | DiGraphDefUse, ReachingDefinitions, load_from_int 15 | from miasm.expression.simplifications import expr_simp 16 | from miasm.analysis.ssa import SSADiGraph 17 | from miasm.ir.ir import AssignBlock, IRBlock 18 | from miasm.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA 19 | from miasm.core.locationdb import LocationDB 20 | 21 | print("[+] miasm loading success.") 22 | 23 | ida_pro.qexit(0) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /testcase/test.cpp: -------------------------------------------------------------------------------- 1 | // C++ program to demonstrate 2 | // accessing of data members 3 | 4 | #include 5 | using namespace std; 6 | class Geeks 7 | { 8 | // Access specifier 9 | public: 10 | 11 | // Data Members 12 | string geekname; 13 | 14 | // Member Functions() 15 | void printname() 16 | { 17 | cout << "Geekname is: " << geekname; 18 | } 19 | }; 20 | 21 | int main() { 22 | 23 | // Declare an object of class geeks 24 | Geeks obj1; 25 | 26 | // accessing data member 27 | obj1.geekname = "Abhi"; 28 | 29 | // accessing member function 30 | obj1.printname(); 31 | return 0; 32 | } 33 | 34 | -------------------------------------------------------------------------------- /testcase/test.dump: -------------------------------------------------------------------------------- 1 | 2 | test: file format elf64-x86-64 3 | 4 | 5 | Disassembly of section .init: 6 | 7 | 0000000000001000 <_init>: 8 | 1000: 48 83 ec 08 sub $0x8,%rsp 9 | 1004: 48 8b 05 dd 2f 00 00 mov 0x2fdd(%rip),%rax # 3fe8 <__gmon_start__> 10 | 100b: 48 85 c0 test %rax,%rax 11 | 100e: 74 02 je 1012 <_init+0x12> 12 | 1010: ff d0 callq *%rax 13 | 1012: 48 83 c4 08 add $0x8,%rsp 14 | 1016: c3 retq 15 | 16 | Disassembly of section .plt: 17 | 18 | 0000000000001020 <.plt>: 19 | 1020: ff 35 e2 2f 00 00 pushq 0x2fe2(%rip) # 4008 <_GLOBAL_OFFSET_TABLE_+0x8> 20 | 1026: ff 25 e4 2f 00 00 jmpq *0x2fe4(%rip) # 4010 <_GLOBAL_OFFSET_TABLE_+0x10> 21 | 102c: 0f 1f 40 00 nopl 0x0(%rax) 22 | 23 | 0000000000001030 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev@plt>: 24 | 1030: ff 25 e2 2f 00 00 jmpq *0x2fe2(%rip) # 4018 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev@GLIBCXX_3.4.21> 25 | 1036: 68 00 00 00 00 pushq $0x0 26 | 103b: e9 e0 ff ff ff jmpq 1020 <.plt> 27 | 28 | 0000000000001040 <__cxa_atexit@plt>: 29 | 1040: ff 25 da 2f 00 00 jmpq *0x2fda(%rip) # 4020 <__cxa_atexit@GLIBC_2.2.5> 30 | 1046: 68 01 00 00 00 pushq $0x1 31 | 104b: e9 d0 ff ff ff jmpq 1020 <.plt> 32 | 33 | 0000000000001050 <_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_stringIS4_S5_T1_EE@plt>: 34 | 1050: ff 25 d2 2f 00 00 jmpq *0x2fd2(%rip) # 4028 <_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_stringIS4_S5_T1_EE@GLIBCXX_3.4.21> 35 | 1056: 68 02 00 00 00 pushq $0x2 36 | 105b: e9 c0 ff ff ff jmpq 1020 <.plt> 37 | 38 | 0000000000001060 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt>: 39 | 1060: ff 25 ca 2f 00 00 jmpq *0x2fca(%rip) # 4030 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@GLIBCXX_3.4> 40 | 1066: 68 03 00 00 00 pushq $0x3 41 | 106b: e9 b0 ff ff ff jmpq 1020 <.plt> 42 | 43 | 0000000000001070 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc@plt>: 44 | 1070: ff 25 c2 2f 00 00 jmpq *0x2fc2(%rip) # 4038 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc@GLIBCXX_3.4.21> 45 | 1076: 68 04 00 00 00 pushq $0x4 46 | 107b: e9 a0 ff ff ff jmpq 1020 <.plt> 47 | 48 | 0000000000001080 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1Ev@plt>: 49 | 1080: ff 25 ba 2f 00 00 jmpq *0x2fba(%rip) # 4040 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1Ev@GLIBCXX_3.4.21> 50 | 1086: 68 05 00 00 00 pushq $0x5 51 | 108b: e9 90 ff ff ff jmpq 1020 <.plt> 52 | 53 | 0000000000001090 <_ZNSt8ios_base4InitC1Ev@plt>: 54 | 1090: ff 25 b2 2f 00 00 jmpq *0x2fb2(%rip) # 4048 <_ZNSt8ios_base4InitC1Ev@GLIBCXX_3.4> 55 | 1096: 68 06 00 00 00 pushq $0x6 56 | 109b: e9 80 ff ff ff jmpq 1020 <.plt> 57 | 58 | 00000000000010a0 <_Unwind_Resume@plt>: 59 | 10a0: ff 25 aa 2f 00 00 jmpq *0x2faa(%rip) # 4050 <_Unwind_Resume@GCC_3.0> 60 | 10a6: 68 07 00 00 00 pushq $0x7 61 | 10ab: e9 70 ff ff ff jmpq 1020 <.plt> 62 | 63 | Disassembly of section .plt.got: 64 | 65 | 00000000000010b0 <__cxa_finalize@plt>: 66 | 10b0: ff 25 1a 2f 00 00 jmpq *0x2f1a(%rip) # 3fd0 <__cxa_finalize@GLIBC_2.2.5> 67 | 10b6: 66 90 xchg %ax,%ax 68 | 69 | Disassembly of section .text: 70 | 71 | 00000000000010c0 <_start>: 72 | 10c0: 31 ed xor %ebp,%ebp 73 | 10c2: 49 89 d1 mov %rdx,%r9 74 | 10c5: 5e pop %rsi 75 | 10c6: 48 89 e2 mov %rsp,%rdx 76 | 10c9: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp 77 | 10cd: 50 push %rax 78 | 10ce: 54 push %rsp 79 | 10cf: 4c 8d 05 6a 02 00 00 lea 0x26a(%rip),%r8 # 1340 <__libc_csu_fini> 80 | 10d6: 48 8d 0d 03 02 00 00 lea 0x203(%rip),%rcx # 12e0 <__libc_csu_init> 81 | 10dd: 48 8d 3d c1 00 00 00 lea 0xc1(%rip),%rdi # 11a5
82 | 10e4: ff 15 f6 2e 00 00 callq *0x2ef6(%rip) # 3fe0 <__libc_start_main@GLIBC_2.2.5> 83 | 10ea: f4 hlt 84 | 10eb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 85 | 86 | 00000000000010f0 : 87 | 10f0: 48 8d 3d 79 2f 00 00 lea 0x2f79(%rip),%rdi # 4070 <__TMC_END__> 88 | 10f7: 48 8d 05 72 2f 00 00 lea 0x2f72(%rip),%rax # 4070 <__TMC_END__> 89 | 10fe: 48 39 f8 cmp %rdi,%rax 90 | 1101: 74 15 je 1118 91 | 1103: 48 8b 05 ce 2e 00 00 mov 0x2ece(%rip),%rax # 3fd8 <_ITM_deregisterTMCloneTable> 92 | 110a: 48 85 c0 test %rax,%rax 93 | 110d: 74 09 je 1118 94 | 110f: ff e0 jmpq *%rax 95 | 1111: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 96 | 1118: c3 retq 97 | 1119: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 98 | 99 | 0000000000001120 : 100 | 1120: 48 8d 3d 49 2f 00 00 lea 0x2f49(%rip),%rdi # 4070 <__TMC_END__> 101 | 1127: 48 8d 35 42 2f 00 00 lea 0x2f42(%rip),%rsi # 4070 <__TMC_END__> 102 | 112e: 48 29 fe sub %rdi,%rsi 103 | 1131: 48 c1 fe 03 sar $0x3,%rsi 104 | 1135: 48 89 f0 mov %rsi,%rax 105 | 1138: 48 c1 e8 3f shr $0x3f,%rax 106 | 113c: 48 01 c6 add %rax,%rsi 107 | 113f: 48 d1 fe sar %rsi 108 | 1142: 74 14 je 1158 109 | 1144: 48 8b 05 a5 2e 00 00 mov 0x2ea5(%rip),%rax # 3ff0 <_ITM_registerTMCloneTable> 110 | 114b: 48 85 c0 test %rax,%rax 111 | 114e: 74 08 je 1158 112 | 1150: ff e0 jmpq *%rax 113 | 1152: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 114 | 1158: c3 retq 115 | 1159: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 116 | 117 | 0000000000001160 <__do_global_dtors_aux>: 118 | 1160: 80 3d 29 30 00 00 00 cmpb $0x0,0x3029(%rip) # 4190 119 | 1167: 75 2f jne 1198 <__do_global_dtors_aux+0x38> 120 | 1169: 55 push %rbp 121 | 116a: 48 83 3d 5e 2e 00 00 cmpq $0x0,0x2e5e(%rip) # 3fd0 <__cxa_finalize@GLIBC_2.2.5> 122 | 1171: 00 123 | 1172: 48 89 e5 mov %rsp,%rbp 124 | 1175: 74 0c je 1183 <__do_global_dtors_aux+0x23> 125 | 1177: 48 8b 3d e2 2e 00 00 mov 0x2ee2(%rip),%rdi # 4060 <__dso_handle> 126 | 117e: e8 2d ff ff ff callq 10b0 <__cxa_finalize@plt> 127 | 1183: e8 68 ff ff ff callq 10f0 128 | 1188: c6 05 01 30 00 00 01 movb $0x1,0x3001(%rip) # 4190 129 | 118f: 5d pop %rbp 130 | 1190: c3 retq 131 | 1191: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 132 | 1198: c3 retq 133 | 1199: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 134 | 135 | 00000000000011a0 : 136 | 11a0: e9 7b ff ff ff jmpq 1120 137 | 138 | 00000000000011a5
: 139 | 11a5: 55 push %rbp 140 | 11a6: 48 89 e5 mov %rsp,%rbp 141 | 11a9: 53 push %rbx 142 | 11aa: 48 83 ec 28 sub $0x28,%rsp 143 | 11ae: 48 8d 45 d0 lea -0x30(%rbp),%rax 144 | 11b2: 48 89 c7 mov %rax,%rdi 145 | 11b5: e8 e8 00 00 00 callq 12a2 <_ZN5GeeksC1Ev> 146 | 11ba: 48 8d 45 d0 lea -0x30(%rbp),%rax 147 | 11be: 48 8d 35 f9 0e 00 00 lea 0xef9(%rip),%rsi # 20be <_ZNSt8__detailL19_S_invalid_state_idE+0x16> 148 | 11c5: 48 89 c7 mov %rax,%rdi 149 | 11c8: e8 a3 fe ff ff callq 1070 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc@plt> 150 | 11cd: 48 8d 45 d0 lea -0x30(%rbp),%rax 151 | 11d1: 48 89 c7 mov %rax,%rdi 152 | 11d4: e8 95 00 00 00 callq 126e <_ZN5Geeks9printnameEv> 153 | 11d9: bb 00 00 00 00 mov $0x0,%ebx 154 | 11de: 48 8d 45 d0 lea -0x30(%rbp),%rax 155 | 11e2: 48 89 c7 mov %rax,%rdi 156 | 11e5: e8 d4 00 00 00 callq 12be <_ZN5GeeksD1Ev> 157 | 11ea: 89 d8 mov %ebx,%eax 158 | 11ec: eb 1a jmp 1208 159 | 11ee: 48 89 c3 mov %rax,%rbx 160 | 11f1: 48 8d 45 d0 lea -0x30(%rbp),%rax 161 | 11f5: 48 89 c7 mov %rax,%rdi 162 | 11f8: e8 c1 00 00 00 callq 12be <_ZN5GeeksD1Ev> 163 | 11fd: 48 89 d8 mov %rbx,%rax 164 | 1200: 48 89 c7 mov %rax,%rdi 165 | 1203: e8 98 fe ff ff callq 10a0 <_Unwind_Resume@plt> 166 | 1208: 48 83 c4 28 add $0x28,%rsp 167 | 120c: 5b pop %rbx 168 | 120d: 5d pop %rbp 169 | 120e: c3 retq 170 | 171 | 000000000000120f <_Z41__static_initialization_and_destruction_0ii>: 172 | 120f: 55 push %rbp 173 | 1210: 48 89 e5 mov %rsp,%rbp 174 | 1213: 48 83 ec 10 sub $0x10,%rsp 175 | 1217: 89 7d fc mov %edi,-0x4(%rbp) 176 | 121a: 89 75 f8 mov %esi,-0x8(%rbp) 177 | 121d: 83 7d fc 01 cmpl $0x1,-0x4(%rbp) 178 | 1221: 75 32 jne 1255 <_Z41__static_initialization_and_destruction_0ii+0x46> 179 | 1223: 81 7d f8 ff ff 00 00 cmpl $0xffff,-0x8(%rbp) 180 | 122a: 75 29 jne 1255 <_Z41__static_initialization_and_destruction_0ii+0x46> 181 | 122c: 48 8d 3d 5e 2f 00 00 lea 0x2f5e(%rip),%rdi # 4191 <_ZStL8__ioinit> 182 | 1233: e8 58 fe ff ff callq 1090 <_ZNSt8ios_base4InitC1Ev@plt> 183 | 1238: 48 8d 15 21 2e 00 00 lea 0x2e21(%rip),%rdx # 4060 <__dso_handle> 184 | 123f: 48 8d 35 4b 2f 00 00 lea 0x2f4b(%rip),%rsi # 4191 <_ZStL8__ioinit> 185 | 1246: 48 8b 05 ab 2d 00 00 mov 0x2dab(%rip),%rax # 3ff8 <_ZNSt8ios_base4InitD1Ev@GLIBCXX_3.4> 186 | 124d: 48 89 c7 mov %rax,%rdi 187 | 1250: e8 eb fd ff ff callq 1040 <__cxa_atexit@plt> 188 | 1255: 90 nop 189 | 1256: c9 leaveq 190 | 1257: c3 retq 191 | 192 | 0000000000001258 <_GLOBAL__sub_I_main>: 193 | 1258: 55 push %rbp 194 | 1259: 48 89 e5 mov %rsp,%rbp 195 | 125c: be ff ff 00 00 mov $0xffff,%esi 196 | 1261: bf 01 00 00 00 mov $0x1,%edi 197 | 1266: e8 a4 ff ff ff callq 120f <_Z41__static_initialization_and_destruction_0ii> 198 | 126b: 5d pop %rbp 199 | 126c: c3 retq 200 | 126d: 90 nop 201 | 202 | 000000000000126e <_ZN5Geeks9printnameEv>: 203 | 126e: 55 push %rbp 204 | 126f: 48 89 e5 mov %rsp,%rbp 205 | 1272: 48 83 ec 10 sub $0x10,%rsp 206 | 1276: 48 89 7d f8 mov %rdi,-0x8(%rbp) 207 | 127a: 48 8d 35 2f 0e 00 00 lea 0xe2f(%rip),%rsi # 20b0 <_ZNSt8__detailL19_S_invalid_state_idE+0x8> 208 | 1281: 48 8d 3d f8 2d 00 00 lea 0x2df8(%rip),%rdi # 4080 <_ZSt4cout@@GLIBCXX_3.4> 209 | 1288: e8 d3 fd ff ff callq 1060 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 210 | 128d: 48 89 c2 mov %rax,%rdx 211 | 1290: 48 8b 45 f8 mov -0x8(%rbp),%rax 212 | 1294: 48 89 c6 mov %rax,%rsi 213 | 1297: 48 89 d7 mov %rdx,%rdi 214 | 129a: e8 b1 fd ff ff callq 1050 <_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_stringIS4_S5_T1_EE@plt> 215 | 129f: 90 nop 216 | 12a0: c9 leaveq 217 | 12a1: c3 retq 218 | 219 | 00000000000012a2 <_ZN5GeeksC1Ev>: 220 | 12a2: 55 push %rbp 221 | 12a3: 48 89 e5 mov %rsp,%rbp 222 | 12a6: 48 83 ec 10 sub $0x10,%rsp 223 | 12aa: 48 89 7d f8 mov %rdi,-0x8(%rbp) 224 | 12ae: 48 8b 45 f8 mov -0x8(%rbp),%rax 225 | 12b2: 48 89 c7 mov %rax,%rdi 226 | 12b5: e8 c6 fd ff ff callq 1080 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1Ev@plt> 227 | 12ba: 90 nop 228 | 12bb: c9 leaveq 229 | 12bc: c3 retq 230 | 12bd: 90 nop 231 | 232 | 00000000000012be <_ZN5GeeksD1Ev>: 233 | 12be: 55 push %rbp 234 | 12bf: 48 89 e5 mov %rsp,%rbp 235 | 12c2: 48 83 ec 10 sub $0x10,%rsp 236 | 12c6: 48 89 7d f8 mov %rdi,-0x8(%rbp) 237 | 12ca: 48 8b 45 f8 mov -0x8(%rbp),%rax 238 | 12ce: 48 89 c7 mov %rax,%rdi 239 | 12d1: e8 5a fd ff ff callq 1030 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev@plt> 240 | 12d6: 90 nop 241 | 12d7: c9 leaveq 242 | 12d8: c3 retq 243 | 12d9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 244 | 245 | 00000000000012e0 <__libc_csu_init>: 246 | 12e0: 41 57 push %r15 247 | 12e2: 49 89 d7 mov %rdx,%r15 248 | 12e5: 41 56 push %r14 249 | 12e7: 49 89 f6 mov %rsi,%r14 250 | 12ea: 41 55 push %r13 251 | 12ec: 41 89 fd mov %edi,%r13d 252 | 12ef: 41 54 push %r12 253 | 12f1: 4c 8d 25 b0 2a 00 00 lea 0x2ab0(%rip),%r12 # 3da8 <__frame_dummy_init_array_entry> 254 | 12f8: 55 push %rbp 255 | 12f9: 48 8d 2d b8 2a 00 00 lea 0x2ab8(%rip),%rbp # 3db8 <__init_array_end> 256 | 1300: 53 push %rbx 257 | 1301: 4c 29 e5 sub %r12,%rbp 258 | 1304: 48 83 ec 08 sub $0x8,%rsp 259 | 1308: e8 f3 fc ff ff callq 1000 <_init> 260 | 130d: 48 c1 fd 03 sar $0x3,%rbp 261 | 1311: 74 1b je 132e <__libc_csu_init+0x4e> 262 | 1313: 31 db xor %ebx,%ebx 263 | 1315: 0f 1f 00 nopl (%rax) 264 | 1318: 4c 89 fa mov %r15,%rdx 265 | 131b: 4c 89 f6 mov %r14,%rsi 266 | 131e: 44 89 ef mov %r13d,%edi 267 | 1321: 41 ff 14 dc callq *(%r12,%rbx,8) 268 | 1325: 48 83 c3 01 add $0x1,%rbx 269 | 1329: 48 39 dd cmp %rbx,%rbp 270 | 132c: 75 ea jne 1318 <__libc_csu_init+0x38> 271 | 132e: 48 83 c4 08 add $0x8,%rsp 272 | 1332: 5b pop %rbx 273 | 1333: 5d pop %rbp 274 | 1334: 41 5c pop %r12 275 | 1336: 41 5d pop %r13 276 | 1338: 41 5e pop %r14 277 | 133a: 41 5f pop %r15 278 | 133c: c3 retq 279 | 133d: 0f 1f 00 nopl (%rax) 280 | 281 | 0000000000001340 <__libc_csu_fini>: 282 | 1340: c3 retq 283 | 284 | Disassembly of section .fini: 285 | 286 | 0000000000001344 <_fini>: 287 | 1344: 48 83 ec 08 sub $0x8,%rsp 288 | 1348: 48 83 c4 08 add $0x8,%rsp 289 | 134c: c3 retq 290 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file comes from the official miasm repo 3 | miasm/example/ida/utils.py 4 | ''' 5 | from __future__ import print_function 6 | from builtins import map 7 | import idaapi 8 | from idc import * 9 | 10 | from miasm.analysis.machine import Machine 11 | from miasm.ir.translators import Translator 12 | import miasm.expression.expression as m2_expr 13 | 14 | def guess_machine(addr=None): 15 | "Return an instance of Machine corresponding to the IDA guessed processor" 16 | 17 | processor_name = get_inf_attr(INF_PROCNAME) 18 | info = idaapi.get_inf_structure() 19 | 20 | if info.is_64bit(): 21 | size = 64 22 | elif info.is_32bit(): 23 | size = 32 24 | else: 25 | size = None 26 | 27 | if processor_name == "metapc": 28 | size2machine = { 29 | 64: "x86_64", 30 | 32: "x86_32", 31 | None: "x86_16", 32 | } 33 | 34 | machine = Machine(size2machine[size]) 35 | 36 | elif processor_name == "ARM": 37 | # TODO ARM/thumb 38 | # hack for thumb: set armt = True in globals :/ 39 | # set bigendiant = True is bigendian 40 | # Thumb, size, endian 41 | info2machine = {(True, 32, True): "armtb", 42 | (True, 32, False): "armtl", 43 | (False, 32, True): "armb", 44 | (False, 32, False): "arml", 45 | (False, 64, True): "aarch64b", 46 | (False, 64, False): "aarch64l", 47 | } 48 | 49 | # Get T reg to detect arm/thumb function 50 | # Default is arm 51 | is_armt = False 52 | if addr is not None: 53 | t_reg = get_sreg(addr, "T") 54 | is_armt = t_reg == 1 55 | 56 | is_bigendian = info.is_be() 57 | infos = (is_armt, size, is_bigendian) 58 | if not infos in info2machine: 59 | raise NotImplementedError('not fully functional') 60 | machine = Machine(info2machine[infos]) 61 | 62 | from miasm.analysis.disasm_cb import guess_funcs, guess_multi_cb 63 | from miasm.analysis.disasm_cb import arm_guess_subcall, arm_guess_jump_table 64 | guess_funcs.append(arm_guess_subcall) 65 | guess_funcs.append(arm_guess_jump_table) 66 | 67 | elif processor_name == "msp430": 68 | machine = Machine("msp430") 69 | elif processor_name == "mipsl": 70 | machine = Machine("mips32l") 71 | elif processor_name == "mipsb": 72 | machine = Machine("mips32b") 73 | elif processor_name == "PPC": 74 | machine = Machine("ppc32b") 75 | else: 76 | print(repr(processor_name)) 77 | raise NotImplementedError('not fully functional') 78 | 79 | return machine 80 | 81 | 82 | class TranslatorIDA(Translator): 83 | """Translate a Miasm expression to a IDA colored string""" 84 | 85 | # Implemented language 86 | __LANG__ = "ida_w_color" 87 | 88 | def __init__(self, loc_db=None, **kwargs): 89 | super(TranslatorIDA, self).__init__(**kwargs) 90 | self.loc_db = loc_db 91 | 92 | def str_protected_child(self, child, parent): 93 | return ("(%s)" % ( 94 | self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) 95 | else self.from_expr(child) 96 | ) 97 | 98 | def from_ExprInt(self, expr): 99 | return idaapi.COLSTR(str(expr), idaapi.SCOLOR_NUMBER) 100 | 101 | def from_ExprId(self, expr): 102 | out = idaapi.COLSTR(str(expr), idaapi.SCOLOR_REG) 103 | return out 104 | 105 | def from_ExprLoc(self, expr): 106 | if self.loc_db is not None: 107 | out = self.loc_db.pretty_str(expr.loc_key) 108 | else: 109 | out = str(expr) 110 | out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) 111 | return out 112 | 113 | def from_ExprMem(self, expr): 114 | ptr = self.from_expr(expr.ptr) 115 | size = idaapi.COLSTR('@' + str(expr.size), idaapi.SCOLOR_RPTCMT) 116 | out = '%s[%s]' % (size, ptr) 117 | return out 118 | 119 | def from_ExprSlice(self, expr): 120 | base = self.from_expr(expr.arg) 121 | start = idaapi.COLSTR(str(expr.start), idaapi.SCOLOR_RPTCMT) 122 | stop = idaapi.COLSTR(str(expr.stop), idaapi.SCOLOR_RPTCMT) 123 | out = "(%s)[%s:%s]" % (base, start, stop) 124 | return out 125 | 126 | def from_ExprCompose(self, expr): 127 | out = "{" 128 | out += ", ".join(["%s, %s, %s" % (self.from_expr(subexpr), 129 | idaapi.COLSTR(str(idx), idaapi.SCOLOR_RPTCMT), 130 | idaapi.COLSTR(str(idx + subexpr.size), idaapi.SCOLOR_RPTCMT)) 131 | for idx, subexpr in expr.iter_args()]) 132 | out += "}" 133 | return out 134 | 135 | def from_ExprCond(self, expr): 136 | cond = self.str_protected_child(expr.cond, expr) 137 | src1 = self.from_expr(expr.src1) 138 | src2 = self.from_expr(expr.src2) 139 | out = "%s?(%s,%s)" % (cond, src1, src2) 140 | return out 141 | 142 | def from_ExprOp(self, expr): 143 | if expr._op == '-': # Unary minus 144 | return '-' + self.str_protected_child(expr._args[0], expr) 145 | if expr.is_associative() or expr.is_infix(): 146 | return (' ' + expr._op + ' ').join([self.str_protected_child(arg, expr) 147 | for arg in expr._args]) 148 | return (expr._op + '(' + 149 | ', '.join( 150 | self.from_expr(arg) 151 | for arg in expr._args 152 | ) + ')') 153 | 154 | def from_ExprAssign(self, expr): 155 | return "%s = %s" % tuple(map(expr.from_expr, (expr.dst, expr.src))) 156 | 157 | 158 | 159 | def expr2colorstr(expr, loc_db): 160 | """Colorize an Expr instance for IDA 161 | @expr: Expr instance to colorize 162 | @loc_db: LocationDB instance 163 | """ 164 | 165 | translator = TranslatorIDA(loc_db=loc_db) 166 | return translator.from_expr(expr) 167 | 168 | 169 | class translatorForm(idaapi.Form): 170 | """Translator Form. 171 | Offer a ComboBox with available languages (ie. IR translators) and the 172 | corresponding translation.""" 173 | 174 | flags = (idaapi.Form.MultiLineTextControl.TXTF_FIXEDFONT | \ 175 | idaapi.Form.MultiLineTextControl.TXTF_READONLY) 176 | 177 | def __init__(self, expr): 178 | "@expr: Expr instance" 179 | 180 | # Init 181 | self.languages = list(Translator.available_languages()) 182 | self.expr = expr 183 | 184 | # Initial translation 185 | text = Translator.to_language(self.languages[0]).from_expr(self.expr) 186 | 187 | # Create the Form 188 | idaapi.Form.__init__(self, r"""STARTITEM 0 189 | Python Expression 190 | {FormChangeCb} 191 | 192 | 193 | """, { 194 | 'result': idaapi.Form.MultiLineTextControl(text=text, 195 | flags=translatorForm.flags), 196 | 'cbLanguage': idaapi.Form.DropdownListControl( 197 | items=self.languages, 198 | readonly=True, 199 | selval=0), 200 | 'FormChangeCb': idaapi.Form.FormChangeCb(self.OnFormChange), 201 | }) 202 | 203 | def OnFormChange(self, fid): 204 | if fid == self.cbLanguage.id: 205 | # Display the Field (may be hide) 206 | self.ShowField(self.result, True) 207 | 208 | # Translate the expression 209 | dest_lang = self.languages[self.GetControlValue(self.cbLanguage)] 210 | try: 211 | text = Translator.to_language(dest_lang).from_expr(self.expr) 212 | except Exception as error: 213 | self.ShowField(self.result, False) 214 | return -1 215 | 216 | # Update the form 217 | self.SetControlValue(self.result, 218 | idaapi.textctrl_info_t(text=str(text), 219 | flags=translatorForm.flags)) 220 | return 1 --------------------------------------------------------------------------------