├── LICENSE
├── README.md
├── binnavi_db.py
├── bn_disasm.py
├── elf_loader.py
├── fREedom.py
├── loader.py
├── pe_loader.py
├── postgresql_tables.sql
└── x86_disasm.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | fREedom is a primitive attempt to provide an IDA Pro independent means
 2 | of extracting disassembly information from executables for use with
 3 | binnavi (https://github.com/google/binnavi).
 4 | 
 5 | WARNING: This software is in its infancy
 6 | 
 7 | Background: binnavi is a graphical "binary navigator" useful for reverse
 8 | engineering software. binnavi does not contain its own disassembler, instead
 9 | relying upon the capabilities of the commercial disassembler, IDA Pro.
10 | binnavi ships with an IDA plugin that extracts required information from an
11 | existing IDA database into a set of binnavi compatible, Postgres tables. The
12 | amount of work that IDA does on behalf of binnavi is not trivial. There is 
13 | a reason there are no open source competitors to IDA. Eliminating binnavi's
14 | dependency on IDA is not quite as trivial as slapping some glue code on top
15 | of a disassembly framework like Capstone (http://www.capstone-engine.org/)
16 | and calling it a day. This project takes some small steps in that direction.
17 | it is thrown together, not well thought out, and it has a long way to go.
18 | 
19 | Basic use:  
20 | * Use the provided postgres script to setup the initial postgres database.
21 | * Configure your postgres instance appropriately (pg_hba.conf ...)
22 | * `python fREedom.py --database=my_binnavi --user=someone --pass=itsasecret --dbhost=127.0.0.1 --binary=foo.exe`
23 | * Launch binnavi to browse foo.exe
24 | 
25 | What's here:   
26 | * binnavi's postgres script to build the required Postgres database
27 | * Python scripts to extract from PE32, PE32+, and ELF binaries containing
28 | x86 or x86_64 code. 
29 | 
30 | What's not here:  
31 | * A Postgres tutorial (see http://www.postgresql.org/). Among other things,
32 | you'll need psycopg2.
33 | * A Capstone installation tutorial (see http://www.capstone-engine.org/)
34 | * Support for anything other than PE32, PE32+, and ELF
35 | * Support for anything other than x86 and x86_64
36 | 
37 | Limitations:  
38 | * fREedom's disassembly engine is not as thorough as IDA's, lacking many of
39 | the heuristics that IDA uses to identify code.
40 | * There is currently no support for known data types and library function
41 | signatures. binnavi's type system is complex and not well documented.
42 | Substantial effort will be required to process development header files from
43 | many platforms in order to incorporate this information into fREedom generated
44 | disassemblies.
45 | * Parsers (crude at best) are included for only PE32, PE32+, and ELF.
46 | * Disassembly generators are included for only x86 and x86_64.
47 | * My python skills are not good.
48 | 


--------------------------------------------------------------------------------
/binnavi_db.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | The database interface for a stand-alone binnavi compatible disassembler
  5 | '''
  6 | 
  7 | __author__ = "Chris Eagle"
  8 | __copyright__ = "Copyright 2015, Chris Eagle"
  9 | __credits__ = ["Chris Eagle"]
 10 | __license__ = "GPL"
 11 | __version__ = "2.0"
 12 | __maintainer__ = "Chris Eagle"
 13 | __email__ = "cseagle@gmail.com"
 14 | __status__ = "Use at your own risk"
 15 | 
 16 | import sys
 17 | import traceback
 18 | import struct
 19 | import hashlib
 20 | import psycopg2
 21 | import capstone
 22 | import bn_disasm
 23 | 
 24 | FUNCTION_TYPES = {'NORMAL':0, 'LIBRARY':1, 'IMPORTED':2, 'THUNK':3, 'INAVALID':4,}
 25 | 
 26 | class binnavi_db(object):
 27 | 
 28 |    def __init__(self, db, user, passwd, host='localhost'):
 29 | #      try:
 30 |          self.conn = psycopg2.connect("dbname='%s' user='%s' host='%s' password='%s'" % (db, user, host, passwd))
 31 |          self.create_empty_tables()
 32 | #      except Exception, e:
 33 | #         raise Exception("db connect fail: %s:%s" % (type(e), e.message))
 34 | 
 35 |    def export(self, module_data):
 36 |       return self.add_module(module_data)
 37 | 
 38 |    def add_functions(self, curs, id, module_data):
 39 |       for addr in module_data.call_targets:
 40 |          name = module_data.names[addr]
 41 |          named = not name.startswith('sub_')
 42 |          demangled = None   #demangle(name)
 43 |          ftype = FUNCTION_TYPES['NORMAL']  #NORMAL
 44 |          if addr in module_data.loader.imports_by_addr:
 45 |             ftype = FUNCTION_TYPES['IMPORT']
 46 |          elif addr in module_data.thunks:
 47 |             ftype = FUNCTION_TYPES['THUNK']
 48 |          #not working yet, but if a function has a stack frame, it will be named: "__SF%x" % addr
 49 |          stkframe = None
 50 |          if ("__SF%x" % addr) in module_data.types:
 51 |             stkframe = module_data.types["__SF%x" % addr].id
 52 |          curs.execute("insert into ex_%d_functions values (%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s);" % id,
 53 |                      (addr, name, demangled, named, ftype, module_data.loader.name, stkframe, None))
 54 | 
 55 |    def add_instructions(self, curs, id, module_data):
 56 |       for addr in module_data.visited:
 57 |          insn = module_data.insts[addr]
 58 |          curs.execute("insert into ex_%d_instructions values (%%s, %%s, %%s);" % id, (addr, insn.mnemonic, insn.bytes))
 59 | 
 60 |    #called from inside a with block already, so take a cursor from the caller
 61 |    #computes the basic block members from the give start address
 62 |    def add_basic_block_instructions(self, curs, id, module_data):
 63 |       for addr in module_data.visited:
 64 |          i = module_data.insts[addr]
 65 |          if hasattr(i, "bb"):
 66 |             for b in i.bb:
 67 |                curs.execute("insert into ex_%d_basic_block_instructions values (%%s, %%s, %%s);" % id, (b.bid, addr, b.seq))
 68 | 
 69 |    # also, build the cgf while we're at it
 70 |    def add_basic_blocks(self, curs, id, module_data):
 71 |       for addr,bb in module_data.basic_blocks.iteritems():
 72 |          for block in bb:
 73 |             curs.execute("insert into ex_%d_basic_blocks values (%%s, %%s, %%s);" % id, (block[0], block[1], addr))
 74 | 
 75 |    def drop_table(self, curs, table):
 76 |       curs.execute("drop table if exists %s cascade;" % table)
 77 | 
 78 |    def delete_raw_module(self, curs, id):
 79 |       self.drop_table(curs, "ex_%d_address_comments" % id)
 80 |       self.drop_table(curs, "ex_%d_address_references" % id)
 81 |       self.drop_table(curs, "ex_%d_expression_substitutions" % id)
 82 |       self.drop_table(curs, "ex_%d_operands" % id)
 83 |       self.drop_table(curs, "ex_%d_expression_tree_nodes" % id)
 84 |       self.drop_table(curs, "ex_%d_expression_trees" % id)
 85 |       self.drop_table(curs, "ex_%d_expression_nodes" % id)
 86 |       self.drop_table(curs, "ex_%d_control_flow_graphs" % id)
 87 |       self.drop_table(curs, "ex_%d_callgraph" % id)
 88 |       self.drop_table(curs, "ex_%d_basic_block_instructions" % id)
 89 |       self.drop_table(curs, "ex_%d_instructions" % id)
 90 |       self.drop_table(curs, "ex_%d_basic_blocks" % id)
 91 |       self.drop_table(curs, "ex_%d_functions" % id)
 92 |       self.drop_table(curs, "ex_%d_type_renderers" % id)
 93 |       self.drop_table(curs, "ex_%d_base_types" % id)
 94 |       self.drop_table(curs, "ex_%d_expression_type_instances" % id)
 95 |       self.drop_table(curs, "ex_%d_expression_types" % id)
 96 |       self.drop_table(curs, "ex_%d_types" % id)
 97 |       self.drop_table(curs, "ex_%d_type_instances" % id)
 98 |       self.drop_table(curs, "ex_%d_sections" % id)
 99 |       self.drop_table(curs, "ex_%d_type_substitution_paths" % id)
100 | 
101 |    def create_raw_module(self, curs, id):
102 |       curs.execute('create table ex_%d_functions ("address" bigint not null, "name" text not null,"demangled_name" text null default null,"has_real_name" boolean not null,"type" int not null default 0 check( "type" in ( 0, 1, 2, 3, 4 )),"module_name" text null default null,"stack_frame" int null default null,"prototype" int null default null);' % id)
103 |       curs.execute('create table ex_%d_basic_blocks ("id" int not null,"parent_function" bigint not null,"address" bigint not null);' % id)
104 |       curs.execute('create table ex_%d_instructions ("address" bigint not null,"mnemonic" varchar( 32 ) not null,"data" bytea not null);' % id)
105 |       curs.execute('create table ex_%d_basic_block_instructions ("basic_block_id" int not null,"instruction" bigint not null,"sequence" int not null);' % id)
106 |       curs.execute('create table ex_%d_callgraph ("id" serial,"source" bigint not null,"source_basic_block_id" int not null,"source_address" bigint not null,"destination" bigint not null);' % id)
107 |       curs.execute('create table ex_%d_control_flow_graphs ("id" serial,"parent_function" bigint not null,"source" int not null,"destination" int not null,"type" int not null default 0 check( "type" in ( 0, 1, 2, 3 )));' % id)
108 |       curs.execute('create table ex_%d_expression_trees ("id" serial);' % id)
109 |       curs.execute('create table ex_%d_expression_nodes ("id" serial,"type" int not null default 0 check( "type" >= 0 and "type" <= 7 ),"symbol" varchar( 256 ),"immediate" bigint,"position" int,"parent_id" int check( "id" > "parent_id" ));' % id)
110 |       curs.execute('create table ex_%d_expression_tree_nodes ("expression_tree_id" int not null,"expression_node_id" int not null);' % id)
111 |       curs.execute('create table ex_%d_operands ("address" bigint not null,"expression_tree_id" int not null,"position" int not null);' % id)
112 |       curs.execute('create table ex_%d_expression_substitutions ("id" serial,"address" bigint not null,"position" int not null,"expression_node_id" int not null,"replacement" text not null);' % id)
113 |       curs.execute('create table ex_%d_address_references ("address" bigint not null,"position" int null,"expression_node_id" int null,"destination" bigint not null,"type" int not null default 0 check( "type" >= 0 and "type" <= 8 ));' % id)
114 |       curs.execute('create table ex_%d_address_comments ("address" bigint not null,"comment" text not null);' % id)
115 |       curs.execute('drop type if exists ex_%d_type_category;' % id)
116 |       curs.execute("create type ex_%d_type_category as enum ('atomic', 'pointer', 'array','struct', 'union', 'function_pointer');" % id)
117 |       curs.execute('create table ex_%d_base_types ("id" integer not null,"name" text not null,"size" integer not null,"pointer" integer,"signed" bool,"category" ex_%d_type_category not null);' % (id, id))
118 |       curs.execute('create table ex_%d_types ("id" serial not null,"name" text not null,"base_type" integer not null,"parent_id" integer,"offset" integer,"argument" integer,"number_of_elements" integer);' % id)
119 |       curs.execute('drop type if exists ex_%d_type_renderers_renderer_type;' % id)
120 |       curs.execute("create type ex_%d_type_renderers_renderer_type as enum ('integer','floating point', 'boolean', 'ascii', 'utf8', 'utf16');" % id)
121 |       curs.execute('create table ex_%d_type_renderers ("type_id" int not null,"renderer" ex_%d_type_renderers_renderer_type not null);' % (id, id))
122 |       curs.execute('drop type if exists ex_%d_section_permission_type;' % id)
123 |       curs.execute("create type ex_%d_section_permission_type as enum ('READ', 'WRITE','EXECUTE', 'READ_WRITE', 'READ_EXECUTE', 'WRITE_EXECUTE','READ_WRITE_EXECUTE');" % id)
124 |       curs.execute('create table ex_%d_sections ("id" serial not null,"name" text not null,"start_address" bigint not null,"end_address" bigint not null,"permission" ex_%d_section_permission_type not null,"data" bytea not null);' % (id, id))
125 |       curs.execute('create table ex_%d_expression_types ("address" bigint not null,"position" integer not null,"expression_id" integer not null,"type" integer not null,"path" integer[] not null,"offset" integer);' % id)
126 |       curs.execute('create table ex_%d_expression_type_instances ("address" bigint not null,"position" integer not null,"expression_node_id" integer not null,"type_instance_id" integer not null);' % id)
127 |       curs.execute('create table ex_%d_type_instances ("id" integer not null,"name" text not null,"section_offset" bigint not null,"type_id" integer not null,"section_id" integer not null);' % id)
128 |       curs.execute('create table ex_%d_type_substitution_paths ("id" integer not null,"child_id" integer,"type_id" integer not null);' % id)
129 | 
130 |    def vaccuum_raw_tables(self, id):
131 |       try:
132 |          with self.conn as conn:
133 |             old_iso = conn.isolation_level
134 |             conn.set_isolation_level(0)
135 |             with conn.cursor() as curs:
136 |                curs.execute('vacuum analyze "ex_%d_operands";' % id)
137 |                curs.execute('vacuum analyze "ex_%d_functions";' % id)
138 |                curs.execute('vacuum analyze "ex_%d_basic_blocks";' % id)
139 |                curs.execute('vacuum analyze "ex_%d_instructions";' % id)
140 |                curs.execute('vacuum analyze "ex_%d_basic_block_instructions";' % id)
141 |                curs.execute('vacuum analyze "ex_%d_callgraph";' % id)
142 |                curs.execute('vacuum analyze "ex_%d_control_flow_graphs";' % id)
143 |                curs.execute('vacuum analyze "ex_%d_expression_trees";' % id)
144 |                curs.execute('vacuum analyze "ex_%d_expression_nodes";' % id)
145 |                curs.execute('vacuum analyze "ex_%d_expression_tree_nodes";' % id)
146 |                curs.execute('vacuum analyze "ex_%d_expression_substitutions";' % id)
147 |                curs.execute('vacuum analyze "ex_%d_address_references";' % id)
148 |                curs.execute('vacuum analyze "ex_%d_address_comments";' % id)
149 |                curs.execute('vacuum analyze "ex_%d_type_renderers";' % id)
150 |                curs.execute('vacuum analyze "ex_%d_base_types";' % id)
151 |                curs.execute('vacuum analyze "ex_%d_types";' % id)
152 |                curs.execute('vacuum analyze "ex_%d_expression_types";' % id)
153 |                curs.execute('vacuum analyze "ex_%d_sections";' % id)
154 |             conn.set_isolation_level(old_iso)
155 |       except psycopg2.Error, p:
156 |          print "vaccuum_raw_tables: %s" % p.message
157 |          raise p
158 | 
159 |    def create_raw_indicies(self, curs, id):
160 |       curs.execute('create unique index ex_%d_functions_address_idx on ex_%d_functions( "address" );' % (id, id))
161 |       curs.execute('create unique index ex_%d_basic_blocks_id_idx on ex_%d_basic_blocks( "id" );' % (id, id))
162 |       curs.execute('create index ex_%d_basic_blocks_address_idx on ex_%d_basic_blocks( "address" );' % (id, id))
163 |       curs.execute('create unique index ex_%d_instructions_address_idx on ex_%d_instructions( "address" );' % (id, id))
164 |       curs.execute('create unique index ex_%d_expression_trees_id_idx on ex_%d_expression_trees( "id" );' % (id, id))
165 |       curs.execute('create unique index ex_%d_expression_nodes_id_idx on ex_%d_expression_nodes( "id" );' % (id, id))
166 | 
167 |    def delete_cleanup(self, curs, id):
168 |       curs.execute("delete from ex_%d_instructions as instructions using ex_%d_basic_block_instructions as basic_block_instructions where basic_block_instructions.instruction = instructions.address and basic_block_id is null;" % (id, id))
169 |       curs.execute("delete from ex_%d_basic_block_instructions where basic_block_id is null;" % id)
170 |       curs.execute("delete from ex_%d_address_references where address in ( select address from ex_%d_address_references except select address from ex_%d_instructions);" % (id, id, id))
171 |       curs.execute("delete from ex_%d_address_comments where address in ( select address from ex_%d_address_comments except select address from ex_%d_instructions);" % (id, id, id))
172 |       curs.execute("delete from ex_%d_expression_substitutions where address in ( select address from ex_%d_expression_substitutions except select address from ex_%d_instructions);" % (id, id, id))
173 |       curs.execute("delete from ex_%d_operands where address in ( select address from ex_%d_operands except select address from ex_%d_instructions);" % (id, id, id))
174 |       curs.execute("delete from ex_%d_expression_type_instances where address in ( select address from ex_%d_expression_type_instances except select address from ex_%d_operands);" % (id, id, id))
175 | 
176 |    def create_raw_keys(self, curs, id):
177 |       curs.execute('alter table ex_%d_functions add primary key( "address" );' % id)
178 |       curs.execute('alter table ex_%d_basic_blocks add primary key( "id" );' % id)
179 |       curs.execute('alter table ex_%d_basic_blocks add constraint ex_%d_basic_blocks_parent_function_fkey foreign key ( "parent_function" ) references ex_%d_functions( "address" ) on delete cascade on update cascade;' % (id, id, id))
180 |       curs.execute('alter table ex_%d_instructions add primary key( "address" );' % id)
181 |       curs.execute('alter table ex_%d_basic_block_instructions add constraint ex_%d_basic_block_instructions_bb_fkey foreign key ( "basic_block_id" ) references ex_%d_basic_blocks( "id" ) on delete cascade on update cascade;' % (id, id, id))
182 |       curs.execute('alter table ex_%d_basic_block_instructions add constraint ex_%d_basic_block_instructions_ins_fkey foreign key ( "instruction" ) references ex_%d_instructions( "address" ) on delete cascade on update cascade;' % (id, id, id))
183 |       curs.execute('alter table ex_%d_callgraph add primary key( "id" );' % id)
184 |       curs.execute('alter table ex_%d_callgraph add constraint ex_%d_callgraph_source_fkey foreign key ( "source" ) references ex_%d_functions( "address" ) on delete cascade on update cascade;' % (id, id, id))
185 |       curs.execute('alter table ex_%d_callgraph add constraint ex_%d_callgraph_destination_fkey foreign key ( "destination" ) references ex_%d_functions( "address" ) on delete cascade on update cascade;' % (id, id, id))
186 |       curs.execute('alter table ex_%d_callgraph add constraint ex_%d_callgraph_source_basic_block_id_fkey foreign key ( "source_basic_block_id" ) references ex_%d_basic_blocks( "id" ) on delete cascade on update cascade;' % (id, id, id))
187 |       curs.execute('alter table ex_%d_callgraph add constraint ex_%d_callgraph_source_address_fkey foreign key ( "source_address" ) references ex_%d_instructions( "address" ) on delete cascade on update cascade;' % (id, id, id))
188 |       curs.execute('alter table ex_%d_control_flow_graphs add primary key( "id" );' % id)
189 |       curs.execute('alter table ex_%d_control_flow_graphs add constraint ex_%d_control_flow_graphs_parent_function_fkey foreign key ( "parent_function" ) references ex_%d_functions( "address" ) on delete cascade on update cascade;' % (id, id, id))
190 |       curs.execute('alter table ex_%d_control_flow_graphs add constraint ex_%d_control_flow_graphs_source_fkey foreign key ( "source" ) references ex_%d_basic_blocks( "id" ) on delete cascade on update cascade;' % (id, id, id))
191 |       curs.execute('alter table ex_%d_control_flow_graphs add constraint ex_%d_control_flow_graphs_destination_fkey foreign key ( "destination" ) references ex_%d_basic_blocks( "id" ) on delete cascade on update cascade;' % (id, id, id))
192 |       curs.execute('alter table ex_%d_expression_trees add primary key( "id" );' % id)
193 |       curs.execute('alter table ex_%d_expression_nodes add primary key( "id" );' % id)
194 |       curs.execute('alter table ex_%d_expression_nodes add constraint ex_%d_expression_nodes_parent_id_fkey foreign key ( "parent_id" ) references ex_%d_expression_nodes( "id" ) on delete cascade on update cascade;' % (id, id, id))
195 |       curs.execute('alter table ex_%d_expression_tree_nodes add constraint ex_%d_expression_tree_nodes_expression_tree_id_fkey foreign key ( "expression_tree_id" ) references ex_%d_expression_trees( "id" ) on delete cascade on update cascade;' % (id, id, id))
196 |       curs.execute('alter table ex_%d_expression_tree_nodes add constraint ex_%d_expression_tree_nodes_expression_node_id_fkey foreign key ( "expression_node_id" ) references ex_%d_expression_nodes( "id" ) on delete cascade on update cascade;' % (id, id, id))
197 |       curs.execute('alter table ex_%d_operands add primary key ( "address", "position" );' % id)
198 |       curs.execute('alter table ex_%d_operands add constraint ex_%d_operands_expression_tree_id_fkey foreign key ( "expression_tree_id" ) references ex_%d_expression_trees( "id" ) on delete cascade on update cascade;' % (id, id, id))
199 |       curs.execute('alter table ex_%d_operands add constraint ex_%d_operands_address_fkey foreign key ( "address" ) references ex_%d_instructions( "address" ) on delete cascade on update cascade;' % (id, id, id))
200 |       curs.execute('alter table ex_%d_expression_substitutions add constraint ex_%d_expression_substitutions_address_position_fkey foreign key ( "address", "position" ) references ex_%d_operands( "address", "position" ) on delete cascade on update cascade;' % (id, id, id))
201 |       curs.execute('alter table ex_%d_expression_substitutions add constraint ex_%d_expression_substitutions_expression_node_id_fkey foreign key ( "expression_node_id" ) references ex_%d_expression_nodes( "id" ) on delete cascade on update cascade;' % (id, id, id))
202 |       curs.execute('alter table ex_%d_address_references add constraint ex_%d_address_references_address_position foreign key ( "address", "position" ) references ex_%d_operands( "address", "position" ) on delete cascade on update cascade;' % (id, id, id))
203 |       curs.execute('alter table ex_%d_address_references add constraint ex_%d_address_references_expression_node_id_fkey foreign key ( "expression_node_id" ) references ex_%d_expression_nodes( "id" ) on delete cascade on update cascade;' % (id, id, id))
204 |       curs.execute('alter table ex_%d_base_types add primary key ( "id" );' % id)
205 |       curs.execute('alter table ex_%d_base_types add constraint ex_%d_base_types_pointer_fkey foreign key ( "pointer" ) references ex_%d_base_types( "id" ) on delete cascade on update cascade deferrable initially deferred;' % (id, id, id))
206 |       curs.execute('alter table ex_%d_types add primary key ( "id");' % id)
207 |       curs.execute('alter table ex_%d_types add constraint ex_%d_types_parent_id_fkey foreign key ( "parent_id" ) references ex_%d_base_types ( "id" ) on delete cascade on update cascade deferrable initially deferred;' % (id, id, id))
208 |       curs.execute('alter table ex_%d_types add constraint ex_%d_types_base_type_fkey foreign key ( "base_type" ) references ex_%d_base_types ( "id" ) on delete cascade on update cascade;' % (id, id, id))
209 |       curs.execute('alter table ex_%d_expression_types add primary key ( "address", "position", "expression_id" );' % id)
210 |       curs.execute('alter table ex_%d_expression_types add constraint ex_%d_expression_type_type_fkey foreign key ( "type" ) references ex_%d_base_types ( "id" ) on update no action on delete cascade deferrable initially deferred;' % (id, id, id))
211 |       curs.execute('alter table ex_%d_sections add primary key ( "id" );' % id)
212 |       curs.execute('alter table ex_%d_type_instances add primary key ( "id" );' % id)
213 |       curs.execute('alter table ex_%d_type_instances add constraint ex_%d_type_instances_type_id_fkey foreign key ( "type_id" ) references ex_%d_base_types ( "id" ) match simple on update cascade on delete cascade;' % (id, id, id))
214 |       curs.execute('alter table ex_%d_type_instances add constraint ex_%d_type_instances_section_id_fkey foreign key ( "section_id" ) references ex_%d_sections ( "id" ) match simple on update cascade on delete cascade;' % (id, id, id))
215 |       curs.execute('alter table ex_%d_expression_type_instances add primary key ( "address", "position", "expression_node_id" );' % id)
216 |       curs.execute('alter table ex_%d_expression_type_instances add constraint ex_%d_expression_type_instances_type_instance_id_fkey foreign key ( "type_instance_id" ) references ex_%d_type_instances ( "id" ) match simple on update cascade on delete cascade;' % (id, id, id))
217 |       curs.execute('alter table ex_%d_expression_type_instances add constraint ex_%d_expression_type_instances_address_position_fkey foreign key ( "address", "position" ) references ex_%d_operands ( "address", "position" ) match simple on update cascade on delete cascade;' % (id, id, id))
218 |       curs.execute('alter table ex_%d_expression_type_instances add constraint ex_%d_expression_type_instances_expression_node_id_fkey foreign key ( "expression_node_id" ) references ex_%d_expression_nodes ( "id" ) match simple on update cascade on delete cascade;' % (id, id, id))
219 | 
220 |    def has_table(self, curs, table):
221 |       result = False
222 |       try:
223 |          curs.execute("SELECT relname FROM pg_class WHERE relname = %s;", (table, ))
224 |          result = curs.rowcount == 1
225 |       except psycopg2.Error, p:
226 |          print "has_table: %s" % p.message
227 |          raise p
228 |       return result
229 | 
230 |    def need_pg_init(self, curs):
231 |       try:
232 |          curs.execute('''SELECT count(*) FROM pg_class WHERE relname in ('bn_projects','bn_modules',
233 |             'bn_address_spaces','bn_space_modules','bn_functions','bn_function_views','bn_instructions',
234 |             'bn_operands','bn_expression_tree','bn_expression_tree_ids','bn_expression_tree_mapping',
235 |             'bn_code_nodes','bn_codenode_instructions','bn_edges','bn_edge_paths','bn_function_nodes',
236 |             'bn_group_nodes','bn_nodes','bn_project_settings','bn_module_settings','bn_traces','bn_trace_events',
237 |             'bn_trace_event_values','bn_views','bn_module_views','bn_project_views','bn_view_settings',
238 |             'bn_global_edge_comments','bn_global_node_comments','bn_project_debuggers','bn_debuggers',
239 |             'bn_tags','bn_tagged_views','bn_tagged_nodes','bn_expression_substitutions','bn_comments',
240 |             'bn_comments_audit','bn_types','bn_base_types','bn_users','bn_expression_types')''')
241 |          res = curs.fetchone()[0]
242 |          return res != 41
243 |       except psycopg2.Error, p:
244 |          print "need_pg_init: %s" % p.message
245 |          raise p
246 |       return True
247 | 
248 |    def create_modules_table(self):
249 |       try:
250 |          with self.conn as conn:
251 |             with conn.cursor() as curs:
252 |                query = ("CREATE TABLE modules ("
253 |                         " id serial, "
254 |                         " name text NOT NULL, "
255 |                         " architecture varchar( 32 ) NOT NULL, "
256 |                         " base_address bigint NOT NULL, "
257 |                         " exporter varchar( 256 ) NOT NULL, "
258 |                         " version int NOT NULL, "
259 |                         " md5 char( 32 ) NOT NULL, "
260 |                         " sha1 char( 40 ) NOT NULL, "
261 |                         " comment TEXT, "
262 |                         " import_time timestamp NOT NULL DEFAULT current_timestamp, "
263 |                         " PRIMARY KEY (id));")
264 |                curs.execute(query)
265 |       except psycopg2.Error, p:
266 |          print "create_modules_table: %s" % p.message
267 |          raise p
268 | 
269 |    def delete_module(self, id):
270 |       try:
271 |          with self.conn as conn:
272 |             with conn.cursor() as curs:
273 |                curs.execute("delete from modules where id = %s;", (id, ))
274 |                self.delete_raw_module(curs, id)
275 |       except psycopg2.Error, p:
276 |          print "delete_module: %s" % p.message
277 |          raise p
278 | 
279 |    def insert_module(self, id, module_data):
280 |       try:
281 |          with self.conn as conn:
282 |             with conn.cursor() as curs:
283 |                curs.execute("insert into modules values(%s, %s, %s, %s, %s, %s, %s, %s, %s, now());",
284 |                             (id, module_data.loader.name, module_data.loader.arch_name, module_data.loader.image_base, 'infiltrated', 0,
285 |                              module_data.loader.md5, module_data.loader.sha1, module_data.comment))
286 |       except psycopg2.Error, p:
287 |          print "insert_module: %s" % p.message
288 |          raise p
289 | 
290 |    def add_sections(self, curs, id, module_data):
291 |       for s in module_data.loader.sections:
292 |          raw = s.get_raw_bytes(module_data.loader)
293 |          if raw is not None:
294 |             curs.execute(("insert into ex_%d_sections"
295 |                          "(name, start_address, end_address, permission, data)"
296 |                          " values (%%s, %%s, %%s, %%s, %%s);" % id),
297 |                          (s.name, s.start, s.end, bn_disasm.PERMISSIONS[s.perms], bytearray(raw)))
298 | 
299 |    def add_operands(self, curs, id, module_data):
300 |       for addr in module_data.visited:
301 |          op_exprs = module_data.operands[addr]
302 |          opnum = 0
303 |          '''
304 |          if not hasattr(insn, "op_exprs"):
305 |             print "Missing op_exprs for 0x%x" % insn.address
306 |             continue
307 |          if insn.op_exprs is None:
308 |             print "op_exprs == None for 0x%x" % insn.address
309 |             continue
310 |          '''
311 |          for expr in op_exprs:
312 |             curs.execute(("insert into ex_%d_operands"
313 |                          "(address, expression_tree_id, position)"
314 |                          " values (%%s, %%s, %%s);" % id),
315 |                          (addr, expr, opnum))
316 |             opnum += 1
317 | 
318 |    def add_nodes(self, curs, id, module_data, nodes, parent):
319 |       for key,value in nodes.iteritems():
320 |          node = value[0]
321 |          val = None
322 |          if node.op_type == bn_disasm.IMMEDIATE_INT:
323 |             val = node.value
324 |             if node.value in module_data.names:
325 |                key = module_data.names[node.value]
326 |             else:
327 |                key = None
328 |          curs.execute(("insert into ex_%d_expression_nodes"
329 |                       "(id, type, symbol, immediate, position, parent_id)"
330 |                       " values (%%s, %%s, %%s, %%s, %%s, %%s);" % id),
331 |                       (node.node_id, node.op_type % 10, key, val, node.pos, parent))
332 |          for pos,op in value[1].iteritems():
333 |             self.add_nodes(curs, id, module_data, op, node.node_id)
334 | 
335 |    def add_trees(self, curs, id, module_data):
336 |       for expr in module_data.exprs.keys():
337 |          curs.execute(("insert into ex_%d_expression_trees"
338 |                       "(id)"
339 |                       " values (%%s);" % id),
340 |                       (expr, ))
341 | 
342 |    def add_tree_nodes(self, curs, id, module_data):
343 |       for expr,nodes in module_data.exprs.iteritems():
344 |          for n in nodes:
345 |             curs.execute(("insert into ex_%d_expression_tree_nodes"
346 |                          "(expression_tree_id, expression_node_id)"
347 |                          " values (%%s, %%s);" % id),
348 |                          (expr, n))
349 | 
350 |    def add_types(self, curs, id, module_data):
351 |       for name,btype in module_data.types.iteritems():
352 |          curs.execute(("insert into ex_%d_base_types"
353 |                       "(id, name, size, pointer, signed, category)"
354 |                       " values (%%s, %%s, %%s, %%s, %%s, %%s);" % id),
355 |                       (btype.id, btype.name, btype.size, btype.pointer, btype.signed, bn_disasm.TYPE_CATEGORIES[btype.category]))
356 | 
357 |    def add_arefs(self, curs, id, module_data):
358 |       for aref in module_data.arefs:
359 |          curs.execute(("insert into ex_%d_address_references"
360 |                       "(address, position, expression_node_id, destination, type)"
361 |                       " values (%%s, %%s, %%s, %%s, %%s);" % id),
362 |                       (aref.addr, aref.pos, aref.node_id, aref.dest, aref.rtype))
363 | 
364 |    def add_module(self, module_data):
365 |       try:
366 |          id = 0
367 |          with self.conn as conn:
368 |             with conn.cursor() as curs:
369 |                curs.execute("select coalesce(max(id), 0) + 1 from modules;")
370 |                id = curs.fetchone()[0]
371 |          self.insert_module(id, module_data)
372 | 
373 |          with self.conn as conn:
374 |             with conn.cursor() as curs:
375 |                # ordering as binnavi's Ida plugin seems to
376 |                #begin is here
377 |                self.delete_raw_module(curs, id)
378 |                self.create_raw_module(curs, id)
379 |                #binnavi then adds sections here
380 |                sys.stderr.write("add_sections\n")
381 |                self.add_sections(curs, id, module_data)
382 | 
383 |                #next binnavi inserts into base_types table
384 |                #    some basic types, then enumerates IDA's structs window, then adds types for all functions ('struct' ???)
385 |                #   (1,'BYTE',8,181,true,'atomic'),
386 |                #   (2,'WORD',16,181,true,'atomic'),
387 |                #   (3,'DWORD',32,181,true,'atomic'),
388 |                #   (4,'QWORD',64,null,true,'atomic'),
389 |                #   (5,'void',32,181,false,'atomic'),
390 |                #   (6,'void *',32,5,false,'atomic')
391 |                sys.stderr.write("add_types\n")
392 |                self.add_types(curs, id, module_data)
393 |                #next binnavi inserts into types table
394 |                #next into expression_types
395 |                #          type_instances
396 |                #          expression_type_instances
397 |                #          address_comments
398 | 
399 |                sys.stderr.write("add_operands\n")
400 |                self.add_operands(curs, id, module_data)
401 |                sys.stderr.write("add_instructions\n")
402 |                self.add_instructions(curs, id, module_data)
403 |                #functions must have non-null stack_frame
404 |                sys.stderr.write("add_functions\n")
405 |                self.add_functions(curs, id, module_data)
406 |                sys.stderr.write("add_basic_blocks\n")
407 |                self.add_basic_blocks(curs, id, module_data)
408 |                #          basic_block_instructions
409 |                self.add_basic_block_instructions(curs, id, module_data)
410 | 
411 |                cfg_query = ("insert into ex_%d_control_flow_graphs"
412 |                             "(parent_function, source, destination, type)"
413 |                             " values (%%s, %%s, %%s, %%s);") % id
414 |                for edge in module_data.cfg:
415 |                   curs.execute(cfg_query, (edge.parent_func, edge.src_bb, edge.dest_bb, edge.edge_type))
416 | 
417 |                cg_query = ("insert into ex_%d_callgraph"
418 |                            "(source, source_basic_block_id, source_address, destination)"
419 |                            " values (%%s, %%s, %%s, %%s);") % id
420 |                for edge in module_data.callgraph:
421 |                   curs.execute(cg_query, (edge.src_func, edge.src_bb, edge.src_addr, edge.dest))
422 | 
423 |                sys.stderr.write("add_nodes\n")
424 |                self.add_nodes(curs, id, module_data, module_data.nodes, None)
425 |                #          expression_trees
426 |                sys.stderr.write("add_trees\n")
427 |                self.add_trees(curs, id, module_data)
428 |                #          expression_tree_nodes
429 |                sys.stderr.write("add_tree_nodes\n")
430 |                self.add_tree_nodes(curs, id, module_data)
431 | 
432 |                #create indicies
433 |                self.create_raw_indicies(curs, id)
434 | 
435 |                #          expression_substitutions
436 |                #          address_references
437 |                self.add_arefs(curs, id, module_data)
438 | 
439 |                #next a number of delete queries are executed
440 |                self.delete_cleanup(curs, id)
441 |                #now add indicies/foreign keys on all tables that need them
442 |                self.create_raw_keys(curs, id)
443 |                #commit is here
444 | 
445 |          self.vaccuum_raw_tables(id)
446 | 
447 |          return id
448 |       except psycopg2.Error, p:
449 |          traceback.print_exc()
450 |          print "add_module: %s" % p.message
451 |          raise p
452 |       return -1
453 | 
454 |    def create_empty_tables(self):
455 |       try:
456 |          with self.conn as conn:
457 |             with conn.cursor() as curs:
458 |                if not self.has_table(curs, "modules"):
459 |                   query = ("CREATE TABLE modules ("
460 |                            " id serial, "
461 |                            " name text NOT NULL, "
462 |                            " architecture varchar( 32 ) NOT NULL, "
463 |                            " base_address bigint NOT NULL, "
464 |                            " exporter varchar( 256 ) NOT NULL, "
465 |                            " version int NOT NULL, "
466 |                            " md5 char( 32 ) NOT NULL, "
467 |                            " sha1 char( 40 ) NOT NULL, "
468 |                            " comment TEXT, "
469 |                            " import_time timestamp NOT NULL DEFAULT current_timestamp, "
470 |                            " PRIMARY KEY (id));")
471 |                   curs.execute(query)
472 | 
473 |                if self.need_pg_init(curs):
474 |                   with open('postgresql_tables.sql') as sql:
475 |                      build_tables = sql.read()
476 |                      curs.execute(build_tables)
477 |                   curs.execute("INSERT INTO bn_users VALUES (DEFAULT, 'identity', null, null);")
478 |       except psycopg2.Error, p:
479 |          print "create_empty_tables: %s" % p.message
480 | 


--------------------------------------------------------------------------------
/bn_disasm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | The disassembly engine for a stand-alone binnavi compatible disassembler
  5 | '''
  6 | 
  7 | __author__ = "Chris Eagle"
  8 | __copyright__ = "Copyright 2015, Chris Eagle"
  9 | __credits__ = ["Chris Eagle"]
 10 | __license__ = "GPL"
 11 | __version__ = "2.0"
 12 | __maintainer__ = "Chris Eagle"
 13 | __email__ = "cseagle@gmail.com"
 14 | __status__ = "Use at your own risk"
 15 | 
 16 | import os
 17 | import hashlib
 18 | import sys
 19 | import capstone
 20 | import loader
 21 | 
 22 | XR_FLOW = 1
 23 | XR_CALL = 2
 24 | XR_JUMP = 3
 25 | XR_JCC  = 4
 26 | 
 27 | CONDITION_TRUE = 0
 28 | CONDITION_FALSE = 1
 29 | UNCONDITIONAL = 2
 30 | SWITCH        = 3
 31 | CALL_DIRECT   = 4
 32 | CALL_INDIRECT = 5
 33 | CALL_VIRTUAL  = 6
 34 | DATA          = 7
 35 | DATA_STRING   = 8
 36 | 
 37 | AREF_TYPES = {
 38 |    0:'conditional_true',
 39 |    1:'conditional_false',  
 40 |    2:'unconditional',
 41 |    3:'switch',
 42 |    4:'call_direct',
 43 |    5:'call_indirect',
 44 |    6:'call_virtual',
 45 |    7:'data',
 46 |    8:'data_string'
 47 | }
 48 | 
 49 | PERMISSIONS = {
 50 |   1:'READ',
 51 |   2:'WRITE',
 52 |   4:'EXECUTE',
 53 |   3:'READ_WRITE',
 54 |   5:'READ_EXECUTE',
 55 |   6:'WRITE_EXECUTE',
 56 |   7:'READ_WRITE_EXECUTE'
 57 | }
 58 | 
 59 | NO_TYPE = 0
 60 | SYMBOL = 1           # String to be displayed.
 61 | IMMEDIATE_INT = 2
 62 | IMMEDIATE_FLOAT = 3
 63 | OPERATOR = 4         # '+', '*' etc.
 64 | REGISTER = 5
 65 | SIZE_PREFIX = 6     # 'B4, 'B8', etc.
 66 | DEREFERENCE = 7
 67 | 
 68 | ATOMIC  = 0
 69 | POINTER = 1
 70 | ARRAY   = 2
 71 | STRUCT  = 3
 72 | UNION   = 4
 73 | FUNCTION_POINTER = 5
 74 | 
 75 | TYPE_CATEGORIES = {
 76 |   0:'atomic',
 77 |   1:'pointer',
 78 |   2:'array',
 79 |   3:'struct',
 80 |   4:'union',
 81 |   5:'function_pointer',
 82 | }
 83 | 
 84 | #do the xrefs in the given list describe a conditional jump
 85 | def is_conditional(xrefs):
 86 |    if len(xrefs) != 2:
 87 |       return False
 88 |    return (xrefs[0][1] == XR_JCC or xrefs[1][1] == XR_JCC)
 89 | 
 90 | # return as (False target, True target)
 91 | def get_conditional_targets(xrefs):
 92 |    if len(xrefs) != 2:
 93 |       return None
 94 |    if xrefs[0][1] == XR_FLOW:
 95 |       return (xrefs[0][0], xrefs[1][0])
 96 |    return (xrefs[1][0], xrefs[0][0])
 97 | 
 98 | # return as (return target, call target)
 99 | def get_call_targets(xrefs):
100 |    if len(xrefs) != 2:
101 |       return None
102 |    if xrefs[0][1] == XR_FLOW:
103 |       return (xrefs[0][0], xrefs[1][0])
104 |    return (xrefs[1][0], xrefs[0][0])
105 | 
106 | class OpNode(object):
107 | 
108 |    def __init__(self, op_type, value):
109 |       self.op_type = op_type
110 |       self.value = value
111 |       self.node_id = 0
112 |       self.pos = 0
113 | 
114 | class Operand(object):
115 | 
116 |    def __init__(self, addr, expr, pos):
117 |       self.addr = addr
118 |       self.expr = expr
119 |       self.pos = pos
120 | 
121 | class AddressRef(object):
122 |    
123 |    def __init__(self, addr, pos, node_id, dest, rtype):
124 |       self.addr = addr
125 |       self.pos = pos
126 |       self.node_id = node_id
127 |       self.dest = dest
128 |       self.rtype = rtype
129 | 
130 | class TypeInfo(object):
131 | 
132 |    def __init__(self, id, name, size, pointer, signed, category):
133 |       self.id = id
134 |       self.name = name
135 |       self.size = size
136 |       self.pointer = pointer
137 |       self.signed = signed
138 |       self.category = category
139 | 
140 | class BlockInfo(object):
141 |    def __init__(self, bid, seq, func_addr):
142 |       self.bid = bid
143 |       self.seq = seq
144 |       self.func = func_addr
145 | 
146 | #callgraph edge
147 | class CG_Edge(object):
148 |    def __init__(self, src_func, src_bb, src_addr, dest):
149 |       self.src_func = src_func
150 |       self.src_bb = src_bb
151 |       self.src_addr = src_addr
152 |       self.dest = dest
153 | 
154 | #control flow graph edge
155 | class CFG_Edge(object):
156 |    def __init__(self, parent_func, src_bb, dest_bb, edge_type):
157 |       self.parent_func = parent_func
158 |       self.src_bb = src_bb
159 |       self.dest_bb = dest_bb
160 |       self.edge_type = edge_type
161 | 
162 | class Disassembly(object):
163 | 
164 |    def __init__(self, loader):
165 |       self.loader = loader
166 | 
167 |       self.comment = ''
168 | 
169 |       self.locs = []             # addr - to be visited
170 |       self.visited = set()       # addr - instructions we have actually examined
171 |       self.insts = {}            # addr:cs.CsInsn - cache of disassemled instructions
172 |       self.names = {}            # addr:string
173 |       self.jmp_targets = set()   # addr
174 |       self.call_targets = set()  # addr
175 |       self.xrefs_to = {}         # addr:list of (int,int)  (addr,type)
176 |       self.xrefs_from = {}       # addr:list of (int,int)  (addr,type)
177 |       self.thunks = set()        # addr
178 | 
179 |       self.bb_id = 0
180 |       self.basic_block_starts = set()     # star address for basic blocks
181 |       self.basic_blocks = {}     # addr:(int, set)  block_start:[(block_id, parent funcs)]
182 | 
183 |       self.callgraph = []        # CG_Edge
184 |       self.cfg = []              # CFG_Edge
185 | 
186 |       self.nodes = {}            # {str:tuple}   tuple is node,{str:tuple}
187 |       self.exprs = {}            # int:[]    int expression_id, list of nodes in expression
188 |       self.expr_strings = {}     # string representations of expressions : expr_id
189 |       self.node_id = 0
190 |       self.expr_id = 0
191 |       self.operands = {}         # addr:[int]   instruction address -> list of operand expressions
192 |       self.arefs = []            # AddressRef
193 |       self.type_id = 0
194 |       self.types = {}            # name:TypeInfo
195 |       self.func_sigs = []        # str - list of function header signatures for signature matching
196 |       self.data_locs = {}        # {addr:size} - locations known to be data and their sizes
197 |       
198 |       #these should really come from disassembly process ??
199 |       #rather than just priming the pump here
200 |       self.add_type("char", 8, None, True, ATOMIC)
201 |       self.add_type("short", 16, None, True, ATOMIC)
202 |       self.add_type("int", 32, None, True, ATOMIC)
203 |       self.add_type("BYTE", 8, None, True, ATOMIC)
204 |       self.add_type("WORD", 16, None, True, ATOMIC)
205 |       self.add_type("DWORD", 32, None, True, ATOMIC)
206 |       self.add_type("QWORD", 32, None, True, ATOMIC)
207 |       self.add_type("int8_t", 8, None, True, ATOMIC)
208 |       self.add_type("int16_t", 16, None, True, ATOMIC)
209 |       self.add_type("int32_t", 32, None, True, ATOMIC)
210 |       self.add_type("int64_t", 64, None, True, ATOMIC)
211 |       self.add_type("uint8_t", 8, None, False, ATOMIC)
212 |       self.add_type("uint16_t", 16, None, False, ATOMIC)
213 |       self.add_type("uint32_t", 32, None, False, ATOMIC)
214 |       self.add_type("uint64_t", 64, None, False, ATOMIC)
215 | 
216 |       for addr in loader.imports_by_addr:
217 |          self.data_locs[addr] = loader.sizeof_ptr
218 | 
219 |    #do the xrefs in the given list describe a function call that returns
220 |    def is_returning_call(self, xrefs):
221 |       if len(xrefs) != 2:
222 |          return False
223 |       if xrefs[1][1] == XR_CALL:
224 |          tgt = xrefs[1][0]
225 |       elif xrefs[0][1] == XR_CALL:
226 |          tgt = xrefs[0][0]
227 |       else:
228 |          return False   # not a call
229 |       if tgt in self.names and self.names[tgt] in self.loader.non_returning_funcs:
230 |          return False
231 |       return (xrefs[0][1] == XR_FLOW and xrefs[1][1] == XR_CALL) or \
232 |              (xrefs[1][1] == XR_FLOW and xrefs[0][1] == XR_CALL)
233 | 
234 |    def add_type(self, name, size, pointer, signed, category):
235 |       self.type_id += 1
236 |       self.types[name] = TypeInfo(self.type_id, name, size, pointer, signed, category)
237 | 
238 |    def add_basic_block_start(self, addr):
239 |       self.basic_block_starts.add(addr)
240 | 
241 |    #returns new basic block id
242 |    def add_basic_block(self, addr, parent):
243 |       if addr not in self.basic_block_starts:
244 |          return
245 |       self.bb_id += 1
246 |       if addr not in self.basic_blocks:
247 |          self.basic_blocks[addr] = []
248 |       bb = (self.bb_id, parent)
249 |       self.basic_blocks[addr].append(bb)
250 |       return bb[0]
251 | 
252 |    def is_bb_start(self, addr):
253 |       return addr in self.basic_block_starts
254 | 
255 |    def get_bb_id(self, func, addr):
256 |       inst = self.insts[addr]
257 |       if hasattr(inst, "bb"):
258 |          for block in inst.bb:
259 |             if func == block.func:
260 |                return block.bid
261 |          '''
262 |          sys.stderr.write("Unable to get_bb_id for 0x%x in func 0x%x\n" % (addr, func))
263 |          for block in inst.bb:
264 |             sys.stderr.write("(%d, %d, 0x%x), " % (block.bid, block.seq, block.func))
265 |          sys.stderr.write("\n")
266 |          '''
267 |       '''
268 |       else:
269 |          sys.stderr.write("0x%x has no bb attr\n" % addr)
270 |          sys.stderr.write("Unable to get_bb_id for 0x%x in func 0x%x\n" % (addr, func))
271 |       '''
272 |       return -1
273 | 
274 |    def print_func_owners(self, addr):
275 |       insn = self.insts[addr]
276 |       if hasattr(insn, "bb"):
277 |          for b in insn.bb:
278 |             sys.stderr.write("0x%x, " % b.func)
279 |          sys.stderr.write("\n")
280 |             
281 | 
282 |    def build_cfg(self):
283 |       for addr,bb in self.basic_blocks.iteritems():
284 |          if addr in self.call_targets:
285 |             continue
286 |          if addr in self.xrefs_to:
287 |             #look at the instructions that refer to this basic block start address
288 |             for xr in self.xrefs_to[addr]:
289 |                src = xr[0]
290 |                #add an edge for each block that the referring instruction belongs to
291 |                for block in bb:
292 |                   src_bb = self.get_bb_id(block[1], src)
293 |                   if src_bb == -1:
294 |                      # this seems to happen when we don't have a complete understanding
295 |                      # of whether a function call fails to return or not
296 |                      # which leads to the incorrect conclusion that the instruction
297 |                      # following the call is reachable
298 |                      '''
299 |                      sys.stderr.write("0x%x refers to 0x%x but failed to find bid for 0x%x\n" % (src, addr, src))
300 |                      sys.stderr.write("0x%x belongs to: " % src)
301 |                      self.print_func_owners(src)
302 |                      sys.stderr.write("0x%x belongs to: " % addr)
303 |                      self.print_func_owners(addr)
304 |                      '''
305 |                      continue
306 |                   xr_type = CONDITION_FALSE
307 |                   if xr[1] == XR_FLOW:
308 |                      if len(self.xrefs_from[src]) == 1:
309 |                         xr_type = UNCONDITIONAL
310 |                      else:
311 |                         xr_type = CONDITION_FALSE
312 |                   elif xr[1] == XR_JCC:
313 |                      xr_type = CONDITION_TRUE
314 |                   elif xr[1] == XR_JUMP:
315 |                      xr_type = UNCONDITIONAL
316 |                   else:  #should not get here
317 |                      continue
318 |                   edge = CFG_Edge(block[1], src_bb, block[0], xr_type)
319 |                   self.cfg.append(edge)
320 | 
321 |    def build_callgraph(self):
322 |       for func in self.call_targets:
323 |          if func in self.xrefs_to:
324 |             for xr in self.xrefs_to[func]:
325 |                src = xr[0]
326 |                inst = self.insts[src]
327 |                if hasattr(inst, "bb"):
328 |                   for block in inst.bb:
329 |                      edge = CG_Edge(block.func, block.bid, src, func)
330 |                      self.callgraph.append(edge)
331 | 
332 |    #need to traverse to figure out the parent functions for 
333 |    #all basic blocks. Note we have more work to do than we should
334 |    #this is a result of the binnavi database schema failing to actually
335 |    #set the ex_N_basic_blocks primary key to (id, parent_function) as they 
336 |    #claim to in
337 |    #binnavi/src/main/java/com/google/security/zynamics/binnavi/manual/html/dbformat.htm
338 |    #instead they only use id so we need a unique id when a block is part of more than
339 |    #one function
340 |    def extract_basic_block_data(self, func, addr, func_insts):
341 |       bb = -1
342 |       while True:
343 |          if addr in func_insts:
344 |             break
345 |          func_insts.add(addr)
346 |          if self.is_bb_start(addr):
347 |             if addr == 0:
348 |                print "tried to add basic block at 0 for func 0x%x" % func
349 |             else:
350 |                bb = self.add_basic_block(addr, func)
351 |          if addr in self.xrefs_from:
352 |             flows_to = -1
353 |             xrefs = self.xrefs_from[addr]
354 |             for xr in xrefs:
355 |                if xr[1] == XR_FLOW:
356 |                   flows_to = xr[0]
357 |                elif xr[1] == XR_CALL:
358 |                   continue
359 |                elif xr[1] == XR_JCC:
360 |                   self.extract_basic_block_data(func, xr[0], func_insts)
361 |                elif xr[0] in self.thunks:  # must be XR_JUMP
362 |                   continue
363 |                elif xr[0] in self.call_targets:  # must be XR_JUMP to a function
364 |                   # this might/probably needs a callgraph edge
365 |                   continue
366 |                elif xr[0] in self.loader.imports_by_addr:  # must be XR_JUMP
367 |                   continue
368 |                else:  # XR_JUMP, perhaps switch jump ???
369 |                   self.extract_basic_block_data(func, xr[0], func_insts)
370 |             if flows_to != -1:
371 |                addr = flows_to
372 |             else: #no normal flow from here
373 |                break
374 |          else: #no xrefs from here
375 |             break
376 | 
377 |    #assumes we have all basic blocks identified, we make a second pass here
378 |    #in case we need to associate a bansic block with more than one function
379 |    #this is a result of the binnavi database schema failing to actually
380 |    #set the ex_N_basic_blocks primary key to (id, parent_function) as they 
381 |    #claim to in
382 |    #binnavi/src/main/java/com/google/security/zynamics/binnavi/manual/html/dbformat.htm
383 |    #instead they only use id so we need a unique id when a block is part of more than
384 |    #one function
385 |    def set_basic_block_instructions(self):
386 |       for addr,bb in self.basic_blocks.iteritems():
387 |          seq = 0
388 |          while True:
389 |             if addr not in self.insts:
390 |                #may have reference to invalid isntruction
391 |                break
392 |             inst = self.insts[addr]
393 |             inst.bb = [BlockInfo(b[0], seq, b[1]) for b in bb] #block may belong to more than one function
394 |             seq += 1
395 |             if addr in self.xrefs_from:
396 |                xrefs = self.xrefs_from[addr]
397 |                if self.is_returning_call(xrefs):
398 |                   addr = get_call_targets(xrefs)[0]
399 |                elif len(xrefs) > 1:
400 |                   break
401 |                else: # len(xrefs) == 1
402 |                   addr = xrefs[0][0]
403 |             else: # no xrefs from so at end of block
404 |                break
405 |             if addr in self.basic_blocks: #hit start of different basic block
406 |                break
407 | 
408 |    #tree is a list of OpNode
409 |    def insert_tree(self, root, tree, depth, pos):
410 |       n = tree[depth]
411 |       n.pos = pos
412 |       depth += 1
413 |       arity = 0
414 |       if (n.op_type % 10) == OPERATOR:
415 |          #operator types are encoded as #4 where # is the arity of the operator
416 |          arity = n.op_type // 10
417 |       if n.op_type == SIZE_PREFIX or n.op_type == DEREFERENCE:
418 |          #also descend on a SIZE_PREFIX
419 |          arity = 1
420 |       
421 |       if n.value not in root:
422 |          #new node at this level
423 |          self.node_id += 1
424 |          n.node_id = self.node_id
425 |          root[n.value] = (n, {})
426 |       else:
427 |          n.node_id = root[n.value][0].node_id      
428 |       self.exprs[self.expr_id].append(root[n.value][0].node_id)
429 |       op_root = root[n.value][1]
430 |       for i in range(arity):
431 |          if i not in op_root:
432 |             op_root[i] = {}
433 |          root = op_root[i]    #different subtrees for different operand position
434 |          #parse the operands for the operator
435 |          depth = self.insert_tree(root, tree, depth, i)
436 |       return depth
437 | 
438 |    def tree_to_str(self, tree):
439 |       s = ''
440 |       for o in tree:
441 |          s += '(%s)' % str(o.value)
442 |       return s
443 | 
444 |    def add_expr_tree(self, tree):
445 |       if len(tree) == 0:
446 |          return 0
447 |       s = self.tree_to_str(tree)
448 |       if s in self.expr_strings:
449 |          #we have seen this expression before
450 |          expr_id = self.expr_strings[s]
451 |          idx = 0
452 |          for i in self.exprs[expr_id]:
453 |             tree[idx].node_id = i
454 |             idx += 1
455 |          return expr_id
456 |       # will be making a new expression
457 |       self.expr_id += 1
458 |       self.exprs[self.expr_id] = []
459 |       self.insert_tree(self.nodes, tree, 0, 0)
460 |       self.expr_strings[s] = self.expr_id
461 |       return self.expr_id
462 | 
463 |    def print_disassembly(self):
464 |       keylist = [a for a in self.visited]    # self.insts.keys()
465 |       keylist.sort()
466 |       last = None
467 |       for a in keylist:
468 |          i = self.insts[a]
469 |          if a in self.names:
470 |             print "%s:" % self.names[a]
471 |          ref = ''
472 |          if i.address not in self.xrefs_to:
473 |             ref = "\t\t**** NOT REFERENCED ****"
474 |          operand = self.get_op_name(i.address, i.op_str)
475 |          print "\t0x%08x:\t%s%s%s" % (i.address, i.mnemonic.ljust(8), operand, ref)
476 |          '''
477 |          if i.address in self.xrefs_from:
478 |             xr = self.xrefs_from[i.address]
479 |             sys.stdout.write('\t')
480 |             for x in xr:
481 |                sys.stdout.write("0x%x(%d), " % (x[0], x[1]))           
482 |             sys.stdout.write('\n')
483 |          '''
484 |          last = i
485 | 
486 |    def scan_gaps(self, header):
487 |       keylist = [a for a in self.visited]   # self.insts.keys()
488 |       keylist.sort()
489 |       last = None
490 |       count = 0
491 |       for a in keylist:
492 |          i = self.insts[a]
493 |          if last is not None and (last.address + last.size) != a:
494 |             gap_start = last.address + last.size
495 |             gap = self.loader.get_bytes(gap_start, a - gap_start)
496 |             if gap is None:
497 |                print "That's odd, gap is None"
498 |                continue
499 |             idx = 0
500 |             while True:
501 |                loc = gap.find(header, idx)
502 |                if loc != -1 and (loc + gap_start) not in self.visited:
503 |                   self.locs.append(loc + gap_start)
504 |                   #print "Adding gap function 0x%x" % (loc + gap_start)
505 |                   count += 1
506 |                   idx = loc + 1
507 |                else:
508 |                   break
509 |          last = i
510 |       #print "Gap analysis added %d new locations" % count
511 | 
512 |    #Scan the data sections for possible references back to code
513 |    #such as vtables, switch jumps, and other function pointers
514 |    def scan_data(self):
515 |       pass
516 | 
517 |    #Scan unanalyzed gaps in the code section for possible references
518 |    #to code such as switch jumps
519 |    def scan_gap_data(self):
520 |       pass
521 | 
522 |    #subclasses should implement this as it's very platform specific
523 |    def process_operands(self, inst):
524 |       raise Exception("Please implement process_operands")
525 | 
526 |    #subclasses should implement this
527 |    def process_jump(self, inst):
528 |       raise Exception("Please implement process_jump")
529 | 
530 |    #subclasses should implement this
531 |    def process_call(self, inst):
532 |       raise Exception("Please implement process_jump")
533 | 
534 |    #subclasses should implement this
535 |    def get_op_name(self, addr, default_val):
536 |       raise Exception("Please implement get_op_name")
537 | 
538 |    def add_xref(self, frm, to, xr_type=XR_FLOW):
539 |       raise Exception("Please implement add_xref")
540 | 
541 |    def nextinst(self, addr):
542 |       #take enough to get at least 1 instruction in majority case
543 |       if addr in self.insts:
544 |          # previously decoded this with capstone
545 |          return self.insts[addr]
546 |       # grab a block of bytes following the current address
547 |       mc = self.loader.get_bytes(addr, 256)
548 |       if mc is None or len(mc) == 0:
549 |          return None
550 |       for i in self.dis.disasm(mc, addr):
551 |          self.insts[i.address] = i
552 |       if addr in self.insts:
553 |          return self.insts[addr]
554 |       return None
555 | 
556 |    def is_possible_code(self, addr):
557 |       if addr in self.data_locs:
558 |          return False
559 |       for s in self.loader.sections:
560 |          if (s.perms & loader.PROT_EXEC) and s.contains(addr):
561 |             return True
562 |       return False
563 | 
564 |    def generate_disassembly(self):
565 |       while len(self.locs) > 0:
566 |          addr = self.locs.pop(0)
567 |          if not self.is_possible_code(addr):
568 |             continue
569 |          dead_end = False
570 |          while True:
571 |             i = self.nextinst(addr)
572 |             if i is None:
573 |                # but we should have gotten an instruction so this is odd
574 |                # remove all xrefs to this address
575 |                if addr in self.xrefs_to:
576 |                   srcs = self.xrefs_to[addr]
577 |                   for s in srcs:
578 |                      if s[0] in self.xrefs_from:
579 |                         dests = self.xrefs_from[s[0]]
580 |                         for tgt in dests:
581 |                            if tgt[0] == addr:
582 |                               dests.remove(tgt)
583 |                               break
584 |                         if len(dests) == 0:
585 |                            self.xrefs_from.pop(s[0])
586 |                   self.xrefs_to.pop(addr, None)
587 |                break
588 |             if i.address in self.visited:
589 |                #already been here, won't learn anything new
590 |                break
591 |             self.visited.add(i.address)
592 |             self.insts[i.address] = i
593 |             self.process_operands(i)
594 |             
595 |             dead_end = False
596 |             if i.group(capstone.CS_GRP_JUMP):
597 |                dead_end = self.process_jump(i)
598 |             elif i.group(capstone.CS_GRP_CALL):
599 |                dead_end = self.process_call(i)
600 |             elif i.group(capstone.CS_GRP_RET):
601 |                dead_end = True
602 |             elif i.group(capstone.CS_GRP_IRET):
603 |                dead_end = True
604 |             if not dead_end:
605 |                next_addr = i.address + i.size
606 |                self.add_xref(i.address, next_addr)
607 |             else:
608 |                #dead end return to instruction list
609 |                break
610 | 
611 |    def generate_data(self):
612 |       self.generate_disassembly()
613 |    
614 |       print "After first pass, have %d insts" % len(self.visited)
615 |    
616 |       main = self.loader.find_main(self.insts, self.xrefs_to, self.xrefs_from)
617 |       if main is not None and main not in self.visited:
618 |          print "Found main at 0x%x" % main
619 |          self.locs.append(main)
620 |          self.call_targets.add(main)
621 |          self.add_basic_block_start(main)
622 |          if "main" not in self.names:
623 |             self.names[main] = "main"
624 |          elif "_main" not in self.names:
625 |             self.names[main] = "_main"
626 |          else:
627 |             self.names[main] = "sub_%x" % main
628 |       self.generate_disassembly()   
629 | 
630 |       print "After 'find_main' pass, have %d insts" % len(self.visited)
631 |    
632 |       #pick up pointers in the rdata section
633 | #      self.scan_data()
634 | #      self.generate_disassembly()
635 |    
636 | #      for sig in self.func_sigs:
637 |          #try to find more code by looking for standard prologue
638 | #         self.scan_gaps(sig)
639 | #         self.generate_disassembly()
640 |    
641 |       #pick up pointers in the text section
642 | #      self.scan_gap_data()
643 | #      self.generate_disassembly()
644 | 
645 |       for f in self.call_targets:
646 |          self.extract_basic_block_data(f, f, set())
647 |       self.set_basic_block_instructions()
648 |       self.build_cfg()
649 |       self.build_callgraph()
650 |       for addr,bb in self.basic_blocks.iteritems():
651 |          if len(bb) == 0:
652 |             print "no parent found for basic block at 0x%x" % addr
653 |       for addr in self.visited:
654 |          i = self.insts[addr]
655 |          if not hasattr(i, "bb"):
656 |             print "Instruction 0x%x has no bb" % addr
657 | 


--------------------------------------------------------------------------------
/elf_loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | Crude ELF loader, conforming to the Loader interface, for a stand-alone binnavi compatible disassembler
  5 | '''
  6 | 
  7 | __author__ = "Chris Eagle"
  8 | __copyright__ = "Copyright 2015, Chris Eagle"
  9 | __credits__ = ["Chris Eagle"]
 10 | __license__ = "GPL"
 11 | __version__ = "2.0"
 12 | __maintainer__ = "Chris Eagle"
 13 | __email__ = "cseagle@gmail.com"
 14 | __status__ = "Use at your own risk"
 15 | 
 16 | import sys
 17 | import struct
 18 | import hashlib
 19 | import binascii
 20 | import capstone
 21 | from loader import *
 22 | 
 23 | XR_FLOW = 1
 24 | XR_CALL = 2
 25 | XR_JUMP = 3
 26 | XR_JCC  = 4
 27 | 
 28 | EI_CLASS    = 4        # File class byte index
 29 | ELFCLASSNONE = 0    # Invalid class
 30 | ELFCLASS32  = 1     # 32-bit objects
 31 | ELFCLASS64  = 2     # 64-bit objects
 32 | ELFCLASSNUM = 3
 33 | 
 34 | EI_DATA     = 5     # Data encoding byte index
 35 | ELFDATANONE = 0     # Invalid data encoding
 36 | ELFDATA2LSB = 1     # 2's complement, little endian
 37 | ELFDATA2MSB = 2     # 2's complement, big endian
 38 | ELFDATANUM  = 3
 39 | 
 40 | EI_VERSION  = 6     # File version byte index
 41 |       # Value must be EV_CURRENT
 42 | 
 43 | EI_OSABI = 7     # OS ABI identification
 44 | ELFOSABI_NONE    = 0  # UNIX System V ABI
 45 | ELFOSABI_SYSV    = 0  # Alias.
 46 | ELFOSABI_HPUX    = 1  # HP-UX
 47 | ELFOSABI_NETBSD  = 2  # NetBSD.
 48 | ELFOSABI_GNU     = 3  # Object uses GNU ELF extensions.
 49 | ELFOSABI_LINUX   = ELFOSABI_GNU # Compatibility alias.
 50 | ELFOSABI_SOLARIS = 6  # Sun Solaris.
 51 | ELFOSABI_AIX     = 7  # IBM AIX.
 52 | ELFOSABI_IRIX    = 8  # SGI Irix.
 53 | ELFOSABI_FREEBSD = 9  # FreeBSD.
 54 | ELFOSABI_TRU64   = 10 # Compaq TRU64 UNIX.
 55 | ELFOSABI_MODESTO = 11 # Novell Modesto.
 56 | ELFOSABI_OPENBSD = 12 # OpenBSD.
 57 | ELFOSABI_ARM_AEABI = 64 # ARM EABI
 58 | ELFOSABI_ARM     = 97 # ARM
 59 | ELFOSABI_STANDALONE = 255   # Standalone (embedded) application
 60 | 
 61 | EI_ABIVERSION = 8     # ABI version
 62 | 
 63 | EI_PAD    = 9     # Byte index of padding bytes
 64 | 
 65 | # Legal values for e_type (object file type).
 66 | 
 67 | ET_NONE   = 0     # No file type
 68 | ET_REL    = 1     # Relocatable file
 69 | ET_EXEC   = 2     # Executable file
 70 | ET_DYN    = 3     # Shared object file
 71 | ET_CORE   = 4     # Core file
 72 | ET_NUM    = 5     # Number of defined types
 73 | ET_LOOS   = 0xfe00      # OS-specific range start
 74 | ET_HIOS   = 0xfeff      # OS-specific range end
 75 | ET_LOPROC = 0xff00      # Processor-specific range start
 76 | ET_HIPROC = 0xffff      # Processor-specific range end
 77 | 
 78 | 
 79 | EM_NONE    =  0    # No machine
 80 | EM_SPARC   =  2    # SUN SPARC
 81 | EM_386     =  3    # Intel 80386
 82 | EM_68K     =  4    # Motorola m68k family
 83 | EM_MIPS    =  8    # MIPS R3000 big-endian
 84 | EM_MIPS_RS3_LE = 10 # MIPS R3000 little-endian
 85 | 
 86 | EM_PPC     =  20    # PowerPC
 87 | EM_PPC64   =  21    # PowerPC 64-bit
 88 | 
 89 | EM_ARM     =  40    # ARM
 90 | EM_SPARCV9 =  43    # SPARC v9 64-bit
 91 | 
 92 | EM_X86_64  =  62    # AMD x86-64 architecture
 93 | 
 94 | EM_AARCH64 =  183   # ARM AARCH64
 95 | 
 96 | # Legal values for p_type (segment type).
 97 | 
 98 | PT_NULL     = 0     # Program header table entry unused
 99 | PT_LOAD     = 1     # Loadable program segment
100 | PT_DYNAMIC  = 2     # Dynamic linking information
101 | PT_INTERP   = 3     # Program interpreter
102 | PT_NOTE     = 4     # Auxiliary information
103 | PT_SHLIB    = 5     # Reserved
104 | PT_PHDR     = 6     # Entry for header table itself
105 | PT_TLS      = 7     # Thread-local storage segment
106 | PT_NUM      = 8     # Number of defined types
107 | PT_LOOS     = 0x60000000  # Start of OS-specific
108 | PT_GNU_EH_FRAME   = 0x6474e550  # GCC .eh_frame_hdr segment
109 | PT_GNU_STACK   = 0x6474e551  # Indicates stack executability
110 | PT_GNU_RELRO   = 0x6474e552  # Read-only after relocation
111 | PT_LOSUNW   = 0x6ffffffa
112 | PT_SUNWBSS  = 0x6ffffffa  # Sun Specific segment
113 | PT_SUNWSTACK = 0x6ffffffb  # Stack segment
114 | PT_HISUNW   = 0x6fffffff
115 | PT_HIOS     = 0x6fffffff  # End of OS-specific
116 | PT_LOPROC   = 0x70000000  # Start of processor-specific
117 | PT_HIPROC   = 0x7fffffff  # End of processor-specific
118 | 
119 | # Legal values for e_version (version).
120 | 
121 | EV_NONE     = 0     # Invalid ELF version
122 | EV_CURRENT  = 1     # Current version
123 | EV_NUM      = 2
124 | 
125 | # Legal values for p_flags (segment flags).
126 | 
127 | PF_X        = (1 << 0) # Segment is executable
128 | PF_W        = (1 << 1) # Segment is writable
129 | PF_R        = (1 << 2) # Segment is readable
130 | PF_MASKOS   = 0x0ff00000  # OS-specific
131 | PF_MASKPROC = 0xf0000000  # Processor-specific
132 | 
133 | # Legal values for sh_type (section type).
134 | 
135 | SHT_NULL  = 0     # Section header table entry unused
136 | SHT_PROGBITS   = 1      # Program data
137 | SHT_SYMTAB  = 2      # Symbol table
138 | SHT_STRTAB  = 3      # String table
139 | SHT_RELA = 4      # Relocation entries with addends
140 | SHT_HASH = 5      # Symbol hash table
141 | SHT_DYNAMIC = 6      # Dynamic linking information
142 | SHT_NOTE = 7      # Notes
143 | SHT_NOBITS  = 8      # Program space with no data (bss)
144 | SHT_REL     = 9      # Relocation entries, no addends
145 | SHT_SHLIB   = 10     # Reserved
146 | SHT_DYNSYM  = 11     # Dynamic linker symbol table
147 | SHT_INIT_ARRAY = 14     # Array of constructors
148 | SHT_FINI_ARRAY = 15     # Array of destructors
149 | SHT_PREINIT_ARRAY = 16    # Array of pre-constructors
150 | SHT_GROUP   = 17     # Section group
151 | SHT_SYMTAB_SHNDX = 18    # Extended section indeces
152 | SHT_NUM     = 19     # Number of defined types.
153 | SHT_LOOS = 0x60000000   # Start OS-specific.
154 | SHT_GNU_ATTRIBUTES = 0x6ffffff5 # Object attributes.
155 | SHT_GNU_HASH   = 0x6ffffff6   # GNU-style hash table.
156 | SHT_GNU_LIBLIST   = 0x6ffffff7   # Prelink library list
157 | SHT_CHECKSUM   = 0x6ffffff8   # Checksum for DSO content.
158 | SHT_LOSUNW   = 0x6ffffffa   # Sun-specific low bound.
159 | SHT_SUNW_move   = 0x6ffffffa
160 | SHT_SUNW_COMDAT  = 0x6ffffffb
161 | SHT_SUNW_syminfo = 0x6ffffffc
162 | SHT_GNU_verdef  = 0x6ffffffd   # Version definition section.
163 | SHT_GNU_verneed   = 0x6ffffffe   # Version needs section.
164 | SHT_GNU_versym  = 0x6fffffff   # Version symbol table.
165 | SHT_HISUNW   = 0x6fffffff   # Sun-specific high bound.
166 | SHT_HIOS  = 0x6fffffff   # End OS-specific type
167 | SHT_LOPROC  = 0x70000000   # Start of processor-specific
168 | SHT_HIPROC  = 0x7fffffff   # End of processor-specific
169 | SHT_LOUSER  = 0x80000000   # Start of application-specific
170 | SHT_HIUSER  = 0x8fffffff   # End of application-specific
171 | 
172 | # Legal values for sh_flags (section flags).
173 | 
174 | SHF_WRITE        = (1 << 0)  # Writable
175 | SHF_ALLOC        = (1 << 1)  # Occupies memory during execution
176 | SHF_EXECINSTR    = (1 << 2)  # Executable
177 | SHF_MERGE        = (1 << 4)  # Might be merged
178 | SHF_STRINGS      = (1 << 5)  # Contains nul-terminated strings
179 | SHF_INFO_LINK    = (1 << 6)  # `sh_info' contains SHT index
180 | SHF_LINK_ORDER   = (1 << 7)  # Preserve order after combining
181 | SHF_OS_NONCONFORMING = (1 << 8) # Non-standard OS specific handling required
182 | SHF_GROUP       = (1 << 9)  # Section is member of a group.
183 | SHF_TLS         = (1 << 10) # Section hold thread-local data.
184 | SHF_MASKOS      = 0x0ff00000   # OS-specific.
185 | SHF_MASKPROC    = 0xf0000000   # Processor-specific
186 | SHF_ORDERED     = (1 << 30) # Special ordering requirement (Solaris).
187 | SHF_EXCLUDE     = (1 << 31) # Section is excluded unless referenced or allocated (Solaris).
188 | 
189 | # Legal values for ST_TYPE subfield of st_info (symbol type).
190 | 
191 | STT_NOTYPE   = 0     # Symbol type is unspecified
192 | STT_OBJECT   = 1     # Symbol is a data object
193 | STT_FUNC     = 2     # Symbol is a code object
194 | STT_SECTION  = 3     # Symbol associated with a section
195 | STT_FILE     = 4     # Symbol's name is file name
196 | STT_COMMON   = 5     # Symbol is a common data object
197 | STT_TLS      = 6     # Symbol is thread-local data object
198 | STT_NUM      = 7     # Number of defined types.
199 | STT_LOOS     = 10    # Start of OS-specific
200 | STT_GNU_IFUNC = 10    # Symbol is indirect code object
201 | STT_HIOS     = 12    # End of OS-specific
202 | STT_LOPROC   = 13    # Start of processor-specific
203 | STT_HIPROC   = 15    # End of processor-specific
204 | 
205 | # Legal values for d_tag (dynamic entry type).
206 | 
207 | DT_NULL      = 0     # Marks end of dynamic section
208 | DT_NEEDED    = 1     # Name of needed library
209 | DT_PLTRELSZ  = 2     # Size in bytes of PLT relocs
210 | DT_PLTGOT    = 3     # Processor defined value
211 | DT_HASH      = 4     # Address of symbol hash table
212 | DT_STRTAB    = 5     # Address of string table
213 | DT_SYMTAB    = 6     # Address of symbol table
214 | DT_RELA      = 7     # Address of Rela relocs
215 | DT_RELASZ    = 8     # Total size of Rela relocs
216 | DT_RELAENT   = 9     # Size of one Rela reloc
217 | DT_STRSZ     = 10    # Size of string table
218 | DT_SYMENT    = 11    # Size of one symbol table entry
219 | DT_INIT      = 12    # Address of init function
220 | DT_FINI      = 13    # Address of termination function
221 | DT_SONAME    = 14    # Name of shared object
222 | DT_RPATH     = 15    # Library search path (deprecated)
223 | DT_SYMBOLIC  = 16    # Start symbol search here
224 | DT_REL       = 17    # Address of Rel relocs
225 | DT_RELSZ     = 18    # Total size of Rel relocs
226 | DT_RELENT    = 19    # Size of one Rel reloc
227 | DT_PLTREL    = 20    # Type of reloc in PLT
228 | DT_DEBUG     = 21    # For debugging; unspecified
229 | DT_TEXTREL   = 22    # Reloc might modify .text
230 | DT_JMPREL    = 23    # Address of PLT relocs
231 | DT_BIND_NOW  = 24    # Process relocations of object
232 | DT_INIT_ARRAY = 25    # Array with addresses of init fct
233 | DT_FINI_ARRAY = 26    # Array with addresses of fini fct
234 | DT_INIT_ARRAYSZ   = 27    # Size in bytes of DT_INIT_ARRAY
235 | DT_FINI_ARRAYSZ   = 28    # Size in bytes of DT_FINI_ARRAY
236 | DT_RUNPATH   = 29    # Library search path
237 | DT_FLAGS     = 30    # Flags for the object being loaded
238 | DT_ENCODING  = 32    # Start of encoded range
239 | DT_PREINIT_ARRAY = 32      # Array with addresses of preinit fct
240 | DT_PREINIT_ARRAYSZ = 33    # size in bytes of DT_PREINIT_ARRAY
241 | DT_NUM       = 34    # Number used
242 | DT_LOOS      = 0x6000000d  # Start of OS-specific
243 | DT_HIOS      = 0x6ffff000  # End of OS-specific
244 | DT_LOPROC    = 0x70000000  # Start of processor-specific
245 | DT_HIPROC    = 0x7fffffff  # End of processor-specific
246 | #DT_PROCNUM  = DT_MIPS_NUM # Most used by any processor
247 | 
248 | # DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the
249 | #   Dyn.d_un.d_val field of the Elf*_Dyn structure.  This follows Sun's
250 | #   approach.
251 | DT_VALRNGLO  = 0x6ffffd00
252 | DT_GNU_PRELINKED  = 0x6ffffdf5 # Prelinking timestamp
253 | DT_GNU_CONFLICTSZ = 0x6ffffdf6   # Size of conflict section
254 | DT_GNU_LIBLISTSZ  = 0x6ffffdf7 # Size of library list
255 | DT_CHECKSUM  = 0x6ffffdf8
256 | DT_PLTPADSZ  = 0x6ffffdf9
257 | DT_MOVEENT   = 0x6ffffdfa
258 | DT_MOVESZ    = 0x6ffffdfb
259 | DT_FEATURE_1 = 0x6ffffdfc  # Feature selection (DTF_*).
260 | DT_POSFLAG_1 = 0x6ffffdfd  # Flags for DT_* entries, effecting the following DT_* entry.
261 | DT_SYMINSZ   = 0x6ffffdfe  # Size of syminfo table (in bytes)
262 | DT_SYMINENT  = 0x6ffffdff  # Entry size of syminfo
263 | DT_VALRNGHI  = 0x6ffffdff
264 | #DT_VALTAGIDX(tag)  (DT_VALRNGHI - (tag))   # Reverse order!
265 | DT_VALNUM    = 12
266 | 
267 | # DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the
268 | #   Dyn.d_un.d_ptr field of the Elf*_Dyn structure.
269 | 
270 | #   If any adjustment is made to the ELF object after it has been
271 | #   built these entries will need to be adjusted.
272 | DT_ADDRRNGLO = 0x6ffffe00
273 | DT_GNU_HASH  = 0x6ffffef5  # GNU-style hash table.
274 | DT_TLSDESC_PLT  = 0x6ffffef6
275 | DT_TLSDESC_GOT  = 0x6ffffef7
276 | DT_GNU_CONFLICT = 0x6ffffef8  # Start of conflict section
277 | DT_GNU_LIBLIST  = 0x6ffffef9  # Library list
278 | DT_CONFIG    = 0x6ffffefa  # Configuration information.
279 | DT_DEPAUDIT  = 0x6ffffefb  # Dependency auditing.
280 | DT_AUDIT     = 0x6ffffefc  # Object auditing.
281 | DT_PLTPAD    = 0x6ffffefd  # PLT padding.
282 | DT_MOVETAB   = 0x6ffffefe  # Move table.
283 | DT_SYMINFO   = 0x6ffffeff  # Syminfo table.
284 | DT_ADDRRNGHI = 0x6ffffeff
285 | #DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag))  # Reverse order!
286 | DT_ADDRNUM   = 11
287 | 
288 | # The versioning entry types.  The next are defined as part of the GNU extension.
289 | DT_VERSYM    = 0x6ffffff0
290 | 
291 | DT_RELACOUNT = 0x6ffffff9
292 | DT_RELCOUNT  = 0x6ffffffa
293 | 
294 | # These were chosen by Sun.
295 | DT_FLAGS_1   = 0x6ffffffb  # State flags, see DF_1_* below.
296 | DT_VERDEF    = 0x6ffffffc  # Address of version definition table
297 | DT_VERDEFNUM = 0x6ffffffd  # Number of version definitions
298 | DT_VERNEED   = 0x6ffffffe  # Address of table with needed versions
299 | DT_VERNEEDNUM  = 0x6fffffff  # Number of needed versions
300 | #DT_VERSIONTAGIDX(tag) (DT_VERNEEDNUM - (tag)) # Reverse order!
301 | DT_VERSIONTAGNUM = 16
302 | 
303 | # Sun added these machine-independent extensions in the "processor-specific" range.  Be compatible.
304 | DT_AUXILIARY    = 0x7ffffffd      # Shared object to load before self
305 | DT_FILTER       = 0x7fffffff      # Shared object to get values from
306 | #DT_EXTRATAGIDX(tag)   ((Elf32_Word)-((Elf32_Sword) (tag) <<1>>1)-1)
307 | DT_EXTRANUM     = 3
308 | 
309 | class InvalidHeader(Exception):
310 |    def __init__(self, msg):
311 |       Exception.__init__(self, msg)
312 | 
313 | class ElfSectionHeader(object):
314 |    # do our best to handle both Elf32_Shdr and Elf64_Shdr
315 |    def __init__(self, elf, offset):
316 |       try:
317 |          self.elf = elf
318 |          self.raw = elf.raw[offset:offset+elf.e_shentsize]
319 |          if elf.sizeof_ptr == 8:
320 |             fields = struct.unpack(elf.endian + "IIQQQQIIQQ", self.raw)
321 |          else:
322 |             fields = struct.unpack(elf.endian + "IIIIIIIIII", self.raw)
323 |          self.sh_name = fields[0]
324 |          self.sh_type = fields[1]
325 |          self.sh_flags = fields[2]
326 |          self.sh_addr = fields[3]
327 |          self.sh_offset = fields[4]
328 |          self.sh_size = fields[5]
329 |          self.sh_link = fields[6]
330 |          self.sh_info = fields[7]
331 |          self.sh_addralign = fields[8]
332 |          self.sh_entsize = fields[9]
333 | 
334 |          self.perms = PROT_READ
335 | 
336 |          if self.sh_type == SHT_NOBITS:
337 |             size = 0
338 |          else:
339 |             size = self.sh_size
340 | 
341 |          if self.sh_flags & SHF_WRITE:
342 |             self.perms |= PROT_WRITE
343 |          if self.sh_flags & SHF_EXECINSTR:
344 |             self.perms |= PROT_EXEC
345 |             
346 |          self.content = elf.raw
347 | 
348 |       except:
349 |          raise InvalidHeader("Invalid section header")
350 | 
351 |    def __del__(self):
352 |       del self.raw
353 | 
354 |    def get_string(self, offset):
355 |       #if this isn't a STRTAB section we should probably throw an exception
356 |       res = ''
357 |       while offset < self.sh_size:
358 |          ch = self.content[self.sh_offset + offset]
359 |          if ch == '\x00':
360 |             break
361 |          res += ch
362 |          offset += 1
363 |       return res
364 | 
365 |    def get_symbol(self, offset):
366 |       #if this isn't a SYMTAB section we should probably throw an exception
367 |       strtab = self.elf.shdrs[self.sh_link]
368 |       sym_start = self.sh_offset + offset
369 |       st_name = struct.unpack(self.elf.endian + "I", self.content[sym_start:sym_start + 4])[0]
370 |       if self.elf.sizeof_ptr == 4:
371 |          idx = 2
372 |          fields = struct.unpack(self.elf.endian + "IIBBH", self.content[sym_start + 4:sym_start + 16])
373 |       else:
374 |          idx = 0
375 |          fields = struct.unpack(self.elf.endian + "BBHQQ", self.content[sym_start + 4:sym_start + 24])
376 |       st_info = fields[idx]
377 |       st_other = fields[idx + 1]
378 |       st_shndx = fields[idx + 2]
379 |       st_value = fields[(idx + 3) % 5]
380 |       st_size = fields[(idx + 4) % 5]
381 |       name = strtab.get_string(st_name)
382 |       #print "Symbol name: %s" % name
383 |       return ElfSymbol(name, st_value, st_size, st_info, st_other, st_shndx)
384 | 
385 | class ElfProgramHeader(object):
386 |    # do our best to handle both Elf32_Phdr and Elf64_Phdr
387 |    def __init__(self, elf, offset):
388 |       try:
389 |          self.raw = elf.raw[offset:offset+elf.e_phentsize]
390 |          i = elf.sizeof_ptr >> 2
391 |          if elf.sizeof_ptr == 8:
392 |             fields = struct.unpack(elf.endian + "IIQQQQQQ", self.raw)
393 |             self.p_flags = fields[1]
394 |          else:
395 |             fields = struct.unpack(elf.endian + "IIIIIIII", self.raw)
396 |             self.p_flags = fields[6]
397 | 
398 |          self.p_type = fields[0]
399 |          self.p_offset = fields[i]
400 |          self.p_vaddr = fields[i + 1]
401 |          self.p_paddr = fields[i + 2]
402 |          self.p_filesz = fields[i + 3]
403 |          self.p_memsz = fields[i + 4]
404 |          self.p_align = fields[7]
405 | 
406 |          self.perms = 0
407 |          if self.p_flags & PF_R:
408 |             self.perms |= PROT_READ
409 |          if self.p_flags & PF_W:
410 |             self.perms |= PROT_WRITE
411 |          if self.p_flags & PF_X:
412 |             self.perms |= PROT_EXEC
413 | 
414 |          if self.p_type == PT_DYNAMIC:
415 |             self.dyns = {}
416 |             dyn_size = 2 * elf.sizeof_ptr
417 |             num_dyns = self.p_filesz // dyn_size
418 |             for i in range(num_dyns):
419 |                d_tag = elf.get_pointer(self.p_vaddr + i * dyn_size)
420 |                d_un = elf.get_pointer(self.p_vaddr + i * dyn_size + elf.sizeof_ptr)
421 |                if d_tag == DT_NEEDED:
422 |                   if d_tag not in self.dyns:
423 |                      self.dyns[d_tag] = []
424 |                   self.dyns[d_tag].append(d_un)
425 |                elif d_tag == DT_NULL:
426 |                   break
427 |                elif d_tag == DT_STRTAB:
428 |                   if elf.symbol_strtab != 0:
429 |                      #print "Existing strtab: 0x%x" % elf.symbol_strtab
430 |                      #print "DT_STRTAB: 0x%x" % d_un
431 |                      pass
432 |                   elf.symbol_strtab = d_un
433 |                else:
434 |                   if d_tag in self.dyns:
435 |                      print "Unexpected duplicate of d_tag %d" % d_tag
436 |                   self.dyns[d_tag] = d_un
437 |       except:
438 |          raise InvalidHeader("Invalid program header")
439 | 
440 |    def __del__(self):
441 |       del self.raw
442 | 
443 | class ElfSymbol(object):
444 | 
445 |    def __init__(self, name, value, size, info, other, shndx):
446 |       self.name = name
447 |       self.value = value
448 |       self.size = size
449 |       self.info = info
450 |       self.other = other
451 |       self.shndx = shndx
452 |       self.bind = (info >> 4) & 0xf
453 |       self.type = info & 0xf
454 | 
455 |    def __del__(self):
456 |       del self.name
457 | 
458 | class ElfBase(Loader):
459 | 
460 |    def __init__(self, elf_file):
461 |       Loader.__init__(self, elf_file)
462 | 
463 |       self.pe_offset = 0
464 |       self.shdrs = []
465 |       self.phdrs = []
466 |       self.symbols = []
467 | 
468 |       #need algorithm to propogate this attribute to callers when possible
469 |       self.non_returning_funcs.append("abort")
470 |       self.non_returning_funcs.append("err")
471 |       self.non_returning_funcs.append("errx")
472 |       self.non_returning_funcs.append("exit")
473 |       self.non_returning_funcs.append("_exit")
474 |       self.non_returning_funcs.append("__assert_fail")
475 |       self.non_returning_funcs.append("pthread_exit")
476 |       self.non_returning_funcs.append("verr")
477 |       self.non_returning_funcs.append("verrx")
478 | 
479 |    def __del__(self):
480 |       del self.shdrs[:]
481 |       del self.shdrs
482 |       del self.phdrs[:]
483 |       del self.phdrs
484 |       del self.symbols[:]
485 |       del self.symbols
486 |       Loader.__del__(self)
487 | 
488 |    # Perform common ELF validation tasks
489 |    def is_valid(self):
490 |       if self.raw[0:4] != '\x7fELF':
491 |          return False
492 | 
493 |       if ord(self.raw[EI_VERSION]) != EV_CURRENT:
494 |          return False
495 | 
496 |       if ord(self.raw[EI_CLASS]) != ELFCLASS32 and ord(self.raw[EI_CLASS]) != ELFCLASS64:
497 |          return False
498 | 
499 |       if ord(self.raw[EI_DATA]) != ELFDATA2MSB and ord(self.raw[EI_DATA]) != ELFDATA2LSB:
500 |          return False
501 | 
502 |       if ord(self.raw[EI_DATA]) == ELFDATA2MSB:
503 |          self.set_endianness(BIG_ENDIAN)
504 | 
505 |       self.e_type = self.get_word(16)
506 | 
507 |       if self.e_type < ET_REL or self.e_type > ET_CORE:
508 |          return False
509 | 
510 |       self.e_machine = self.get_word(18)
511 | 
512 |       if self.e_machine == EM_386:
513 |          self.arch = capstone.CS_ARCH_X86
514 |          self.mode = capstone.CS_MODE_32
515 |          self.arch_name = 'x86-32'
516 |       elif self.e_machine == EM_X86_64:
517 |          self.arch = capstone.CS_ARCH_X86
518 |          self.mode = capstone.CS_MODE_64
519 |          self.arch_name = 'x86-64'
520 |       elif self.e_machine == EM_ARM:
521 |          self.arch = capstone.CS_ARCH_ARM
522 |          self.mode = capstone.CS_MODE_ARM
523 |          self.arch_name = 'ARM'
524 |       elif self.e_machine == EM_AARCH64:
525 |          self.arch = capstone.CS_ARCH_ARM64
526 |          self.mode = capstone.CS_MODE_ARM
527 |          self.arch_name = 'AARCH64'
528 |       elif self.e_machine == EM_PPC:
529 |          self.arch = capstone.CS_ARCH_PPC
530 |          self.mode = capstone.CS_MODE_32
531 |          self.arch_name = 'PPC'
532 |       elif self.e_machine == EM_PPC64:
533 |          self.arch = capstone.CS_ARCH_PPC
534 |          self.mode = capstone.CS_MODE_64
535 |          self.arch_name = 'PPC-64'
536 |       elif self.e_machine == EM_SPARC:
537 |          self.arch = capstone.CS_ARCH_SPARC
538 |          self.mode = capstone.CS_MODE_32
539 |          self.arch_name = 'SPARC'
540 |       elif self.e_machine == EM_MIPS:
541 |          self.arch = capstone.CS_ARCH_MIPS
542 |          if self.sizeof_ptr == 4:
543 |             self.mode = capstone.CS_MODE_MIPS32
544 |             self.arch_name = 'MIPS32'
545 |          elif self.sizeof_ptr == 8:
546 |             self.mode = capstone.CS_MODE_MIPS64
547 |             self.arch_name = 'MIPS64'
548 |       else:
549 |          # anything else, we don't recognize
550 |          # could move this check into the caller
551 |          # to allow it to determine whether it has an appropriate
552 |          # disassembler
553 |          return False
554 | 
555 |       if self.endian == BIG_ENDIAN:
556 |          self.mode |= capstone.CS_MODE_BIG_ENDIAN
557 | 
558 |       self.e_version = self.get_dword(20)
559 |       self.e_entry = self.get_pointer(24)
560 |       self.e_phoff = self.get_pointer(24 + self.sizeof_ptr)
561 |       self.e_shoff = self.get_pointer(24 + self.sizeof_ptr * 2)
562 |       self.e_flags = self.get_dword(24 + self.sizeof_ptr * 3)
563 |       fields_offset = 28 + self.sizeof_ptr * 3
564 |       fields = []
565 |       for i in range(6):
566 |          # could do all this with struct.unpack, would need to ensure
567 |          # we honor endian-ness in the format string that is used
568 |          fields.append(self.get_word(fields_offset + i * 2))
569 |       self.e_ehsize = fields[0]
570 |       self.e_phentsize = fields[1]
571 |       self.e_phnum = fields[2]
572 |       self.e_shentsize = fields[3]
573 |       self.e_shnum = fields[4]
574 |       self.e_shstrndx = fields[5]
575 | 
576 |       self.symbol_strtab = 0
577 | 
578 |       # some sanity checks
579 | 
580 |       # check e_ehsize
581 |       if self.e_ehsize != (40 + 3 * self.sizeof_ptr):
582 |          return False
583 | 
584 |       if self.e_shstrndx >= self.e_shnum:
585 |          return False
586 | 
587 |       # check e_shentsize
588 |       if self.e_shentsize != (16 + 6 * self.sizeof_ptr):
589 |          return False
590 | 
591 |       # check e_phentsize
592 |       if self.e_phentsize != (8 + 6 * self.sizeof_ptr):
593 |          return False
594 | 
595 |       # Check that there is room for the phdr table
596 |       if self.e_phoff > (len(self.raw) - self.e_phentsize * self.e_phnum):
597 |          return False
598 | 
599 |       # Check that there is room for the shdr table
600 |       if self.e_shoff > (len(self.raw) - self.e_shentsize * self.e_shnum):
601 |          return False
602 | 
603 |       # many other checks we could perform
604 |       return True
605 | 
606 |    def resolve_sym(self, symidx, addr):
607 |       if symidx < len(self.symbols):
608 |          sym = self.symbols[symidx]
609 |          #print "Resolving symbol: %s" % sym.name
610 |          self.add_symbol(addr, sym.name)
611 |          if sym.type == STT_FUNC:
612 |             self.add_import(addr, sym.name)
613 | 
614 |    def parse_rel(self, addr, size):
615 |       if self.sizeof_ptr == 4:
616 |          mask = 0xff
617 |          shift = 8
618 |       else:
619 |          mask = 0xffffffff
620 |          shift = 32
621 |       relsz = 2 * self.sizeof_ptr
622 |       num_rels = size // relsz
623 |       for i in range(num_rels):
624 |          r_offset = self.get_pointer(addr + i * relsz)
625 |          r_info = self.get_pointer(addr + i * relsz + self.sizeof_ptr)
626 |          r_sym = r_info >> shift
627 |          r_type = r_info & mask
628 |          #print "REL r_offset 0x%x" % r_offset
629 |          self.resolve_sym(r_sym, r_offset)
630 | 
631 |    def parse_rela(self, addr, size):
632 |       if self.sizeof_ptr == 4:
633 |          mask = 0xff
634 |          shift = 8
635 |       else:
636 |          mask = 0xffffffff
637 |          shift = 32
638 |       relsz = 3 * self.sizeof_ptr
639 |       num_rels = size // relsz
640 |       for i in range(num_rels):
641 |          r_offset = self.get_pointer(addr + i * relsz)
642 |          r_info = self.get_pointer(addr + i * relsz + self.sizeof_ptr)
643 |          r_addend = self.get_pointer(addr + i * relsz + 2 * self.sizeof_ptr)
644 |          r_sym = r_info >> shift
645 |          r_type = r_info & mask
646 |          #print "RELA r_offset 0x%x" % r_offset
647 |          self.resolve_sym(r_sym, r_offset)
648 | 
649 |    def parse_imports(self):
650 |       if self.dyn_hdr is None:
651 |          return
652 |       jmprel = None
653 |       pltgot = None
654 |       if DT_JMPREL in self.dyn_hdr.dyns:
655 |          jmprel = self.dyn_hdr.dyns[DT_JMPREL]
656 |          pltrelsz = self.dyn_hdr.dyns[DT_PLTRELSZ]
657 |          pltrel = self.dyn_hdr.dyns[DT_PLTREL]
658 |       if DT_PLTGOT in self.dyn_hdr.dyns:
659 |          pltgot = self.dyn_hdr.dyns[DT_PLTGOT]
660 | 
661 |       if jmprel is not None:
662 |          if pltrel == DT_REL:
663 |             self.parse_rel(jmprel, pltrelsz)
664 |          elif pltrel == DT_RELA:
665 |             self.parse_rela(jmprel, pltrelsz)
666 |          else:
667 |             print "UNEXPECTED PLTREL value: %d" % pltrel
668 | 
669 |    def parse_symbols(self):
670 |       symsz = 8 + 2 * self.sizeof_ptr
671 |       for s in self.shdrs:
672 |          if s.sh_type == SHT_SYMTAB or s.sh_type == SHT_DYNSYM:
673 |             num_syms = s.sh_size // symsz
674 |             #print "Section %s has %d symbols" % (s.name, num_syms)
675 |             for i in range(num_syms):
676 |                sym = s.get_symbol(i * symsz)
677 |                self.symbols.append(sym)
678 |                #if sym.type == STT_FUNC:
679 |                   #print "Function symbol %s at address 0x%x" % (name, st_value)
680 | 
681 |    def parse_exports(self):
682 |       self.add_export(self.start, "_start")
683 |       # add DT_INIT == init_proc and DT_FINI == term_proc
684 |       if self.dyn_hdr is not None:
685 |          if DT_INIT in self.dyn_hdr.dyns:
686 |             self.add_export(self.dyn_hdr.dyns[DT_INIT], ".init_proc")
687 |          if DT_FINI in self.dyn_hdr.dyns:
688 |             self.add_export(self.dyn_hdr.dyns[DT_FINI], ".term_proc")
689 |       for sym in self.symbols:
690 |          if sym.type == STT_FUNC and sym.value != 0:
691 |             self.add_export(sym.value, sym.name)
692 |       #for addr,name in self.exports_by_addr.iteritems():
693 |          #print "EXPORT: 0x%x - %s" % (addr, name)
694 | 
695 |    def load_phdrs(self):
696 |       self.dyn_hdr = None
697 |       for i in range(self.e_phnum):
698 |          phdr = ElfProgramHeader(self, self.e_phoff + self.e_phentsize * i)
699 |          self.phdrs.append(phdr)
700 |          if phdr.p_type == PT_DYNAMIC:
701 |             self.dyn_hdr = phdr
702 |          if phdr.p_type == PT_LOAD:
703 |             va = phdr.p_vaddr
704 |             if self.image_base is None or va < self.image_base:
705 |                self.image_base = va
706 |             mr = self.raw[phdr.p_offset:phdr.p_offset+phdr.p_filesz].ljust(phdr.p_memsz, '\x00')
707 |             self.add_mapped(va, va + phdr.p_memsz, phdr.perms, mr)
708 | 
709 |    def load_shdrs(self):
710 |       self.sections_by_name.clear()
711 | 
712 |       for i in range(self.e_shnum):
713 |          shdr = ElfSectionHeader(self, self.e_shoff + self.e_shentsize * i)
714 |          self.shdrs.append(shdr)
715 |          if shdr.sh_type == SHT_STRTAB and i != self.e_shstrndx and self.symbol_strtab != 0:
716 |             self.symbol_strtab = shdr.sh_addr
717 | 
718 |       # now that we have sections, go back and pull section names
719 |       # out of the sh names table
720 |       strtab = self.shdrs[self.e_shstrndx]
721 |       for s in self.shdrs:
722 |          # defer setting the name until we are sure we know about the shstrtab
723 |          s.name = strtab.get_string(s.sh_name)
724 | 
725 |          va = s.sh_addr
726 |          # match perms against phdrs? sh_flags ??
727 | 
728 |          if (s.sh_flags & SHF_ALLOC) == 0:
729 |             print 'Skipping section %s' % s.name
730 |             continue
731 |          self.add_section(s.name, va, va + s.sh_size, s.perms, s.sh_size)
732 | 
733 |    def load(self):
734 |       if self.is_valid():
735 |          del self.mapped[:]
736 |          del self.sections[:]
737 |          self.phdrs = []
738 |          self.shdrs = []
739 | 
740 |          self.osabi = ord(self.raw[EI_OSABI])
741 |          self.image_base = None   # set in load_phdrs
742 |          self.start = self.e_entry
743 | 
744 |          self.load_phdrs()
745 |          self.load_shdrs()
746 | 
747 |          # deal with dynamic section imports
748 |          # deal with .got .plt
749 |          # deal with exports
750 |          # deal with symbol table
751 |          # deal with dwarf and other debug info
752 | 
753 |          self.parse_symbols()
754 |          self.parse_imports()
755 |          self.parse_exports()
756 |          return True
757 |       return False
758 | 
759 |    def find_main(self, insts, to, frm):
760 |       if self.arch != capstone.CS_ARCH_X86:
761 |          return None
762 |       addr = self.start
763 |       if self.osabi != ELFOSABI_LINUX:
764 |          #find main by scanning Linux start stup
765 |          while addr in frm:
766 |             inst = insts[addr]
767 |             if inst.group(capstone.CS_GRP_JUMP):
768 |                break
769 |             xrefs = frm[addr]
770 |             if inst.group(capstone.CS_GRP_CALL):
771 |                for x in xrefs:
772 |                   if x[1] == XR_CALL:
773 |                      #call to libc_start_main
774 |                      last = to[addr][0][0]
775 |                      inst = insts[last]
776 |                      main = inst.operands[-1].value.imm
777 |                      return main
778 |                break
779 |             elif len(xrefs) == 1:
780 |                if xrefs[0][1] == XR_FLOW:
781 |                   addr = xrefs[0][0]
782 |                else:
783 |                   break
784 |             else:
785 |                break
786 |       return None
787 | 
788 | class Elf32(ElfBase):
789 | 
790 |    def __init__(self, elf_file):
791 |       ElfBase.__init__(self, elf_file)
792 | 
793 |    # override to perform file type validation checks such
794 |    # as checking magic numbers, etc
795 |    def is_valid(self):
796 | #      try:
797 |          if ord(self.raw[EI_CLASS]) != ELFCLASS32:
798 |             return False
799 |          self.set_pointer_size(4)
800 |          if not ElfBase.is_valid(self):
801 |             return False
802 |          # now do Elf32 specific checks
803 |          # following e_ident we have: self.endian + "HHIIIIIHHHHHH"
804 | #      except Exception as e:
805 |          #any exception means it's not a PE32
806 | #         raise e
807 |          return True
808 | 
809 | class Elf64(ElfBase):
810 | 
811 |    def __init__(self, elf_file):
812 |       ElfBase.__init__(self, elf_file)
813 | 
814 |    # override to perform file type validation checks such
815 |    # as checking magic numbers, etc
816 |    def is_valid(self):
817 |       try:
818 |          if ord(self.raw[EI_CLASS]) != ELFCLASS64:
819 |             return False
820 |          self.set_pointer_size(8)
821 |          if not ElfBase.is_valid(self):
822 |             return False
823 |          #now do Elf64 specific checks
824 |          # following e_ident we have: self.endian + "HHIQQQIHHHHHH
825 |       except Exception as e:
826 |          #any exception means it's not a PE32
827 |          raise e
828 | #         return False
829 |       return True
830 | 


--------------------------------------------------------------------------------
/fREedom.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Stand-alone binnavi compatible disassembler based on capstone
 5 | '''
 6 | 
 7 | __author__ = "Chris Eagle"
 8 | __copyright__ = "Copyright 2015, Chris Eagle"
 9 | __credits__ = ["Chris Eagle"]
10 | __license__ = "GPL"
11 | __version__ = "2.0"
12 | __maintainer__ = "Chris Eagle"
13 | __email__ = "cseagle@gmail.com"
14 | __status__ = "Use at your own risk"
15 | 
16 | import sys
17 | import argparse
18 | import capstone
19 | import x86_disasm
20 | import pe_loader
21 | import elf_loader
22 | import binnavi_db
23 | 
24 | class UnsupportedArch(Exception):
25 |    def __init__(self, msg):
26 |       Exception.__init__(self, msg)
27 | 
28 | class UnsupportedFormat(Exception):
29 |    def __init__(self, msg):
30 |       Exception.__init__(self,msg)
31 | 
32 | def main(args):
33 | 
34 |    # cycle through available loaders, if one matches
35 |    # pass it into the disassembler
36 |    ldr = pe_loader.Pe32(args.binary)
37 |    if not ldr.load():
38 |       del ldr
39 |       ldr = pe_loader.Pe64(args.binary)
40 |       if not ldr.load():
41 |          del ldr
42 |          ldr = elf_loader.Elf32(args.binary)
43 |          if not ldr.load():
44 |             del ldr
45 |             ldr = elf_loader.Elf64(args.binary)
46 |             if not ldr.load():
47 |                del ldr
48 |                raise UnsupportedFormat("Unsupported file format for %s" % args.binary)
49 | 
50 |    if ldr.arch == capstone.CS_ARCH_X86:
51 |       dis = x86_disasm.x86_disasm(ldr)   
52 |    else:
53 |       raise UnsupportedArch("Unsupported processor architecture for %s" % args.binary)
54 |    
55 |    dis.generate_data()
56 | 
57 |    print "found %d instructions" % len(dis.visited)
58 |    print "found %d basic blocks" % len(dis.basic_blocks)
59 |    print "found %d functions" % len(dis.call_targets)
60 | 
61 |    '''
62 |    print "Functions identified at:"
63 |    dis.call_targets.sort()
64 |    for c in dis.call_targets:
65 |       print "   0x%x" % c
66 |    '''
67 | 
68 |    #dis.print_disassembly()
69 | 
70 |    db = binnavi_db.binnavi_db(args.database, args.user, args.passwd, args.dbhost)
71 |    db.export(dis)
72 | 
73 | # add argument parsing for database commection parameters
74 | if __name__ == "__main__":
75 |    parser = argparse.ArgumentParser(description='Export to binnavi.')
76 |    parser.add_argument('--database', help='name of database to export to')
77 |    parser.add_argument('--user', help='database user name')
78 |    parser.add_argument('--pass', dest='passwd', help='database user password')
79 |    parser.add_argument('--dbhost', help='database host name')
80 |    parser.add_argument('--binary', type=str, required=False, help='binary file to export')
81 |    parser.add_argument('--delete', action='store_true', required=False,
82 |                        help='flag to initiate module deletion')
83 |    parser.add_argument('--modules', type=int, nargs='+', required=False,
84 |                       help='module numbers to delete')
85 | 
86 |    args = parser.parse_args()
87 | 
88 |    if args.delete:
89 |       db = binnavi_db.binnavi_db(args.database, args.user, args.passwd, args.dbhost)
90 |       for m in args.modules:
91 |          db.delete_module(m)
92 |    else:
93 |       main(args)
94 | 


--------------------------------------------------------------------------------
/loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | Base class for loaders (file parsers) for a stand-alone binnavi compatible disassembler
  5 | '''
  6 | 
  7 | __author__ = "Chris Eagle"
  8 | __copyright__ = "Copyright 2015, Chris Eagle"
  9 | __credits__ = ["Chris Eagle"]
 10 | __license__ = "GPL"
 11 | __version__ = "2.0"
 12 | __maintainer__ = "Chris Eagle"
 13 | __email__ = "cseagle@gmail.com"
 14 | __status__ = "Use at your own risk"
 15 | 
 16 | import sys
 17 | import struct
 18 | import hashlib
 19 | import os
 20 | 
 21 | LITTLE_ENDIAN = '<'
 22 | BIG_ENDIAN    = '>'
 23 | 
 24 | PROT_READ =  1
 25 | PROT_WRITE = 2
 26 | PROT_EXEC =  4
 27 | PROT_ALL  = PROT_READ | PROT_WRITE | PROT_EXEC
 28 | 
 29 | class MappedRegion(object):
 30 |    def __init__(self, start, end, perms, raw):
 31 |       self.start = start
 32 |       self.end = end
 33 |       self.perms = perms
 34 |       self.raw = raw
 35 | 
 36 |    def contains(self, addr, blen = 1):
 37 |       _end = addr + blen
 38 |       return addr >= self.start and addr < self.end and _end <= self.end
 39 |       
 40 |    def get_bytes(self, addr, blen = 1):
 41 |       if self.contains(addr, blen):
 42 |          offset = addr - self.start
 43 |          return self.raw[offset:offset+blen]
 44 |       return None
 45 | 
 46 | class Section(object):
 47 |    def __init__(self, name, start, end, perms, filesz):
 48 |       self.name = name
 49 |       self.start = start
 50 |       self.end = end
 51 |       self.perms = perms
 52 |       self.filesz = filesz
 53 |       
 54 |       print "Created section %s, 0x%x:0x%x, raw length 0x%x, perms %d" % (self.name, self.start, self.end, filesz, self.perms)
 55 | 
 56 |    def contains(self, addr):
 57 |       return addr >= self.start and addr < self.end
 58 | 
 59 |    def get_raw_bytes(self, ldr):
 60 |       raw = ldr.get_bytes(self.start, self.filesz)
 61 |       if raw is None:
 62 |          sys.stderr.write("Failed to get raw content for section %s at address 0x%x for size 0x%s\n" % (self.name, self.start, self.filesz))
 63 |       return ldr.get_bytes(self.start, self.filesz)
 64 | 
 65 | class Loader(object):
 66 | 
 67 |    def __init__(self, fname):
 68 |       self.exe = fname
 69 |       f = open(fname, 'rb')
 70 |       self.raw = f.read()
 71 |       self.md5 = hashlib.md5(self.raw).hexdigest()
 72 |       self.sha1 = hashlib.sha1(self.raw).hexdigest()
 73 |       f.close()
 74 | 
 75 |       self.name = os.path.basename(fname)
 76 | 
 77 |       self.image_base = 0
 78 |       self.start = 0
 79 | 
 80 |       self.sections = []           # Section
 81 |       self.sections_by_name = {}   # str:Section
 82 |       self.imports_by_name = {}    # str:int
 83 |       self.imports_by_addr = {}    # int:str
 84 |       self.exports_by_addr = {}    # int:str
 85 | 
 86 |       self.symbols_by_addr = {}    # int:str
 87 |       self.symbols_by_name = {}    # str:int
 88 |       self.mapped = []             # MappedRegion
 89 | 
 90 |       self.non_returning_funcs = []
 91 | 
 92 |       self.add_mapped(0, len(self.raw), PROT_ALL, self.raw)
 93 | 
 94 |       self.set_endianness(LITTLE_ENDIAN)
 95 |       self.sizeof_ptr = 4
 96 |       self.arch = None
 97 |       self.mode = None
 98 |       self.cached_section = None
 99 |       self.cached_region = None
100 | 
101 |    def __del__(self):
102 |       del self.mapped[:]
103 |       del self.sections[:]
104 |       del self.sections
105 |       del self.raw
106 |       del self.name
107 |       self.sections_by_name.clear()
108 |       self.imports_by_name.clear()
109 |       self.imports_by_addr.clear()
110 |       self.exports_by_addr.clear()
111 |       self.symbols_by_name.clear()
112 |       self.symbols_by_addr.clear()
113 | 
114 |    def set_endianness(self, which_endian):
115 |       self.endian = which_endian
116 | 
117 |    def set_pointer_size(self, sizeof_ptr):
118 |       self.sizeof_ptr = sizeof_ptr
119 | 
120 |    # override to create a mapped process binary image where
121 |    # raw does not match the memory layout of the running
122 |    # process.
123 |    def load(self):
124 |       # probably want to start with:
125 |       # del sections[:]
126 |       # sections_by_name.clear()
127 |       pass
128 | 
129 |    def get_mapped(self, addr):
130 |       if self.cached_region is not None and self.cached_region.contains(addr):
131 |          return self.cached_region
132 |       for m in self.mapped:
133 |          if m.contains(addr):
134 |             self.cached_region = m
135 |             return m
136 |       return None
137 | 
138 |    #regions should not overlap!
139 |    def add_mapped(self, start, end, perms, raw):
140 |       self.mapped.append(MappedRegion(start, end, perms, raw))
141 | 
142 |    def del_mapped(self, start):
143 |       rem = None
144 |       for m in self.mapped:
145 |          if m.start == addr:
146 |             if self.cached_region == m:
147 |                self.cached_region = None
148 |             rem = m
149 |             break
150 |       if rem is not None:
151 |          self.mapped.remove(rem)
152 |          del rem
153 | 
154 |    # override to perform file type validation checks such
155 |    # as checking magic numbers, etc
156 |    def is_valid(self):
157 |       return True
158 | 
159 |    def get_bytes(self, addr, len):
160 |       m = self.get_mapped(addr)
161 |       if m is not None:
162 |          return m.get_bytes(addr, len)
163 |       return None
164 | 
165 |    def get_byte(self, addr):
166 |       return self.get_bytes(addr, 1)
167 | 
168 |    def get_word(self, addr):
169 |       return struct.unpack(self.endian + "H", self.get_bytes(addr, 2))[0]
170 | 
171 |    def get_dword(self, addr):
172 |       try:
173 | 	 return struct.unpack(self.endian + "I", self.get_bytes(addr, 4))[0]
174 |       except Exception, e:
175 |          print "Unable to read dword from address 0x%x" % addr
176 |          raise e
177 | 
178 |    def get_qword(self, addr):
179 |       return struct.unpack(self.endian + "Q", self.get_bytes(addr, 8))[0]
180 | 
181 |    def get_pointer(self, addr):
182 |       if self.sizeof_ptr == 4:
183 |          return self.get_dword(addr)
184 |       elif self.sizeof_ptr == 8:
185 |          return self.get_qword(addr)
186 | 
187 |    def get_string(self, addr):
188 |       res = ''
189 |       while True:
190 |          ch = self.get_byte(addr)
191 |          if ch == '\x00':
192 |             break
193 |          addr += 1
194 |          res += ch
195 |       return res
196 | 
197 |    # get containing section for given address
198 |    def get_section(self, addr):
199 |       if self.cached_section is not None and self.cached_section.contains(addr):
200 |          return self.cached_section
201 |       for s in self.sections:
202 |          if s.contains(addr):
203 |             self.cached_section = s
204 |             return s
205 |       return None
206 | 
207 |    def add_section(self, name, start, end, perms, filesz):
208 |       sect = Section(name, start, end, perms, filesz)
209 |       self.sections.append(sect)
210 |       self.sections_by_name[name] = sect
211 | 
212 |    def add_import(self, addr, name):
213 |       self.imports_by_addr[addr] = name
214 |       self.imports_by_name[name] = addr
215 | 
216 |    def add_symbol(self, addr, name):
217 |       self.symbols_by_addr[addr] = name
218 |       self.symbols_by_name[name] = addr
219 | 
220 |    def add_export(self, addr, name):
221 |       self.exports_by_addr[addr] = name
222 |       
223 |    #override in subclasses if you have an algorithm
224 |    #for finding main given the address of start
225 |    #and all currently known instructions
226 |    def find_main(self, insts, to, frm):
227 |       return None
228 | 
229 | 


--------------------------------------------------------------------------------
/pe_loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | Crude PE32 / PE32+ loader, conforming to the Loader interface, for a stand-alone binnavi compatible disassembler
  5 | '''
  6 | 
  7 | __author__ = "Chris Eagle"
  8 | __copyright__ = "Copyright 2015, Chris Eagle"
  9 | __credits__ = ["Chris Eagle"]
 10 | __license__ = "GPL"
 11 | __version__ = "2.0"
 12 | __maintainer__ = "Chris Eagle"
 13 | __email__ = "cseagle@gmail.com"
 14 | __status__ = "Use at your own risk"
 15 | 
 16 | import sys
 17 | import struct
 18 | import hashlib
 19 | import binascii
 20 | import capstone
 21 | from loader import *
 22 | 
 23 | IMAGE_FILE_MACHINE_I386 = 0x14c
 24 | IMAGE_FILE_MACHINE_ARM = 0x1c0
 25 | IMAGE_FILE_MACHINE_THUMB = 0x1c2
 26 | IMAGE_FILE_MACHINE_ARMV7 = 0x1c4
 27 | IMAGE_FILE_MACHINE_AMD64 = 0x8664
 28 | 
 29 | OK_PE_MACHINES = [IMAGE_FILE_MACHINE_I386, IMAGE_FILE_MACHINE_ARM,
 30 |                   IMAGE_FILE_MACHINE_THUMB, IMAGE_FILE_MACHINE_ARMV7,
 31 |                   IMAGE_FILE_MACHINE_AMD64]
 32 | 
 33 | IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b
 34 | IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b
 35 | 
 36 | IMAGE_DOS_SIGNATURE = 0x5A4D
 37 | IMAGE_NT_SIGNATURE = 0x00004550
 38 | 
 39 | IMAGE_SCN_MEM_EXECUTE = 0x20000000
 40 | IMAGE_SCN_MEM_READ = 0x40000000
 41 | IMAGE_SCN_MEM_WRITE = 0x80000000
 42 | 
 43 | DATA_DIRECTORY_EXPORT = 0
 44 | DATA_DIRECTORY_IMPORT = 1
 45 | 
 46 | class InvalidHeader(Exception):
 47 |    def __init__(self, msg):
 48 |       Exception.__init__(self, msg)
 49 | 
 50 | class FileHeader(object):
 51 | 
 52 |    def __init__(self, raw, offset):
 53 |       self.raw = raw[offset:offset+20]
 54 |       fields = struct.unpack("<HHIIIHH", self.raw)
 55 |       self.Machine = fields[0]
 56 |       self.NumberOfSections = fields[1]
 57 |       self.TimeDateStamp = fields[2]
 58 |       self.PointerToSymbolTable = fields[3]
 59 |       self.NumberOfSynbols = fields[4]
 60 |       self.SizeOfOptionalHeader = fields[5]
 61 |       self.Characteristics = fields[6]
 62 | 
 63 |    def __del__(self):
 64 |       del self.raw
 65 | 
 66 |    def sizeof(self):
 67 |       return len(self.raw)
 68 | 
 69 | class ImportDirectory(object):
 70 | 
 71 |    # enough loading has taken place by the time that we get here
 72 |    # that we need to start dealing with RVA
 73 |    def __init__(self, pe, va):
 74 |       self.raw = pe.get_bytes(va, 20)
 75 |       fields = struct.unpack("<IIIII", self.raw)
 76 |       self.ilt = fields[0]
 77 |       self.time_date = fields[1]
 78 |       self.forwarder = fields[2]
 79 |       self.name_rva = fields[3]
 80 |       self.iat = fields[4]
 81 |       self.pe = pe
 82 | 
 83 |    def __del__(self):
 84 |       del self.raw
 85 | 
 86 |    def parse(self):
 87 |       self.dll = self.pe.get_string(self.name_rva + self.pe.image_base)
 88 |       if self.ilt != 0:
 89 |          iat = self.ilt
 90 |       else:
 91 |          iat = self.iat
 92 |       mask = 0x80 << (self.pe.sizeof_ptr * 8 - 8)
 93 |       while True:
 94 |          addr = iat + self.pe.image_base
 95 |          iat += self.pe.sizeof_ptr
 96 |          ie = self.pe.get_pointer(addr)
 97 |          if ie == 0:
 98 |             break
 99 |          if ie & mask:
100 |             # it's an ordinal
101 |             func = "%s_%d" % (self.dll.replace('.', '_'), ie & 0xffff)
102 |          else:
103 |             # it's a name rva
104 |             func = self.pe.get_string(ie + 2 + self.pe.image_base)
105 |          self.pe.add_import(addr, func)
106 | 
107 |    def is_null_dir(self):
108 |       return self.raw == ('\x00'*20)
109 | 
110 | class ExportDirectory(object):
111 | 
112 |    # enough loading has taken place by the time that we get here
113 |    # that we need to start dealing with RVA
114 |    def __init__(self, pe, va, size):
115 |       self.raw = pe.get_bytes(va, 40)
116 |       self.rva = va - pe.image_base
117 |       self.end_rva = self.rva + size
118 |       fields = struct.unpack("<7I", self.raw[12:])
119 |       self.NameRva = fields[0]
120 |       self.OrdinalBase = fields[1]
121 |       self.NumberOfFunctions = fields[2]
122 |       self.NumberOfNames = fields[3]
123 |       self.AddressOfFunctions = fields[4]
124 |       self.AddressOfNames = fields[5]
125 |       self.AddressOfNameOrdinals = fields[6]
126 |       self.pe = pe
127 | 
128 |    def __del__(self):
129 |       del self.raw
130 | 
131 |    def parse(self):
132 |       self.dll = self.pe.get_string(self.NameRva + self.pe.image_base)
133 |       aof = self.AddressOfFunctions + self.pe.image_base
134 |       aon = self.AddressOfNames + self.pe.image_base
135 |       aono = self.AddressOfNameOrdinals + self.pe.image_base
136 |       fcount = 0
137 |       for f in range(self.NumberOfNames):
138 |          name_rva = self.pe.get_dword(aon)
139 |          aon += 4
140 |          name = self.pe.get_string(name_rva + self.pe.image_base)
141 |          func_idx = self.pe.get_word(aono + f * 2)
142 |          func_rva = self.pe.get_dword(aof + func_idx * 4)
143 |          if func_rva >= self.rva and func_rva < self.end_rva:
144 |             #this is a forwarded entry
145 |             fcount += 1
146 |             continue
147 |          else:
148 |             self.pe.add_export(func_rva + self.pe.image_base, name)
149 | 
150 |       for f in range(self.NumberOfNames, self.NumberOfFunctions):
151 |          name = "%s_%d" % (self.dll.replace('.', '_'), f)
152 |          func_idx = self.pe.get_word(aono + f * 2)
153 |          func_rva = self.pe.get_dword(aof + func_idx * 4)
154 |          self.pe.add_export(func_rva + self.pe.image_base, name)
155 | 
156 | class OptionalHeaderBase(object):
157 | 
158 |    def __init__(self, raw, offset):
159 |       try:
160 |          self.common = raw[offset:offset+24]
161 |          fields = struct.unpack("<HBBIIIII", self.common)
162 |          self.Magic = fields[0]
163 |          self.SizeOfCode = fields[3]
164 |          self.SizeOfInitializedData = fields[4]
165 |          self.SizeOfUninitializedData = fields[5]
166 |          self.AddressOfEntryPoint = fields[6]
167 |          self.BaseOfCode = fields[7]
168 |          if self.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC:
169 |             bod = raw[offset+24:offset+28]
170 |             self.common += bod
171 |             self.BaseOfData = struct.unpack("<I", bod)[0]
172 |          self.DataDirectories = None
173 |          self.ImageBase = 0
174 |       except Exception as e:
175 |          print e.message
176 |          raise InvalidHeader("Invalid PE header")
177 | 
178 |    def __del__(self):
179 |       del self.common
180 | 
181 |    # return va (not rva),size
182 |    def get_directory(self, n):
183 |       rva,size = struct.unpack("<II", self.DataDirectories[n * 8:8 + n * 8])
184 |       if size == 0:
185 |          return None, None
186 |       return self.ImageBase + rva, size
187 | 
188 | class OptionalHeader32(OptionalHeaderBase):
189 | 
190 |    def __init__(self, raw, offset):
191 |       OptionalHeaderBase.__init__(self, raw, offset)
192 |       try:
193 |          self.fields = raw[offset+28:offset+96]
194 |          tmp = struct.unpack("<III", self.fields[0:12])
195 |          self.ImageBase = tmp[0]
196 |          self.SectionAlignment = tmp[1]
197 |          self.FileAlignment = tmp[2]
198 | 
199 |          self.NumberOfRvaAndSizes = struct.unpack("<I", self.fields[-4:])[0]
200 | 
201 |          self.DataDirectories = raw[offset+96:offset+96+self.NumberOfRvaAndSizes*8]
202 |       except Exception as e:
203 |          print e.message
204 |          raise InvalidHeader("Invalid PE32 header")
205 | 
206 |    def __del__(self):
207 |       del self.fields
208 |       del self.DataDirectories
209 |       OptionalHeaderBase.__del__(self)
210 | 
211 | class OptionalHeader64(OptionalHeaderBase):
212 | 
213 |    def __init__(self, raw, offset):
214 |       OptionalHeaderBase.__init__(self, raw, offset)
215 |       try:
216 |          self.fields = raw[offset+24:offset+112]
217 | 
218 |          tmp = struct.unpack("<QII", self.fields[0:16])
219 |          self.ImageBase = tmp[0]
220 |          self.SectionAlignment = tmp[1]
221 |          self.FileAlignment = tmp[2]
222 | 
223 |          self.NumberOfRvaAndSizes = struct.unpack("<I", self.fields[-4:])[0]
224 | 
225 |          self.DataDirectories = raw[offset+112:offset+112+self.NumberOfRvaAndSizes*8]
226 |       except Exception as e:
227 |          raise InvalidHeader("Invalid PE64 header")
228 | 
229 |    def __del__(self):
230 |       del self.fields
231 |       del self.DataDirectories
232 |       OptionalHeaderBase.__del__(self)
233 | 
234 | class SectionHeader(object):
235 | 
236 |    def __init__(self, raw, offset):
237 | #      try:
238 |          self.raw = raw[offset:offset+40]
239 |          fields = struct.unpack("<8sIIIIIIHHI", self.raw)
240 |          self.Name = fields[0].rstrip('\x00')
241 |          self.VirtualSize = fields[1]
242 |          self.VirtualAddress = fields[2]
243 |          self.SizeOfRawData = fields[3]
244 |          self.PointerToRawData = fields[4]
245 |          self.PointerToRelocations = fields[5]
246 |          self.PointerToLinenumbers = fields[6]
247 |          self.NumberOfRelocations = fields[7]
248 |          self.NumberOfLinenumbers = fields[8]
249 |          self.Characteristics = fields[9]
250 |          self.perms = 0
251 |          if self.Characteristics & IMAGE_SCN_MEM_READ:
252 |             self.perms |= PROT_READ
253 |          if self.Characteristics & IMAGE_SCN_MEM_WRITE:
254 |             self.perms |= PROT_WRITE
255 |          if self.Characteristics & IMAGE_SCN_MEM_EXECUTE:
256 |             self.perms |= PROT_EXEC
257 | #      except:
258 | #         raise InvalidHeader("Invalid section header")
259 | 
260 |    def __del__(self):
261 |       del self.raw
262 | 
263 | class PeBase(Loader):
264 | 
265 |    def __init__(self, pe_file):
266 |       Loader.__init__(self, pe_file)
267 | 
268 |       self.pe_offset = 0
269 |       self.section_headers = []
270 |       
271 |       self.non_returning_funcs.append("ExitProcess")
272 |       self.non_returning_funcs.append("ExitThread")
273 |       self.non_returning_funcs.append("_ExitProcess")
274 |       self.non_returning_funcs.append("_ExitThread")
275 | 
276 |    def __del__(self):
277 |       del self.section_headers[:]
278 |       del self.section_headers
279 |       Loader.__del__(self)
280 | 
281 |    # Perform common PE validation tasks
282 |    def is_valid(self):
283 |       if self.raw[0:2] != 'MZ':
284 |          return False
285 |       # image sections are still in .raw mode at this point
286 |       self.pe_offset = self.get_dword(0x3c)
287 |       if self.get_dword(self.pe_offset) != IMAGE_NT_SIGNATURE:
288 |          return False
289 |       self.FileHeader = FileHeader(self.raw, self.pe_offset + 4)
290 | 
291 |       if self.FileHeader.Machine == IMAGE_FILE_MACHINE_I386:
292 |          self.arch = capstone.CS_ARCH_X86
293 |          self.mode = capstone.CS_MODE_32
294 |          self.arch_name = 'x86-32'
295 |       elif self.FileHeader.Machine == IMAGE_FILE_MACHINE_ARM or self.FileHeader.Machine == IMAGE_FILE_MACHINE_THUMB:
296 |          self.arch = capstone.CS_ARCH_ARM
297 |          self.mode = capstone.CS_MODE_ARM
298 |          self.arch_name = 'ARM-32'
299 |       elif self.FileHeader.Machine == IMAGE_FILE_MACHINE_ARMV7:
300 |          self.arch = capstone.CS_ARCH_ARM
301 |          self.mode = capstone.CS_MODE_THUMB
302 |          self.arch_name = 'ARM-THUMB'
303 |       elif self.FileHeader.Machine == IMAGE_FILE_MACHINE_AMD64:
304 |          self.arch = capstone.CS_ARCH_X86
305 |          self.mode = capstone.CS_MODE_64
306 |          self.arch_name = 'x86-64'
307 |       else:
308 |          # anything else, we don't recognize
309 |          # could move this check into the caller
310 |          # to allow it to determine whether it has an appropriate 
311 |          # disassembler
312 |          return False
313 | 
314 |       oh_magic = self.get_word(self.pe_offset + 24)
315 |       if oh_magic != IMAGE_NT_OPTIONAL_HDR32_MAGIC and oh_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC:
316 |          return False
317 |       #many other checks we could perform
318 |       return True
319 | 
320 |    def load_sections(self):
321 |       del self.mapped[:]
322 |       del self.sections[:]
323 |       self.sections_by_name.clear()
324 |       for i in range(self.FileHeader.NumberOfSections):
325 |          self.section_headers.append(SectionHeader(self.raw, self.pe_offset + 24 + self.FileHeader.SizeOfOptionalHeader + i * 40))
326 |       for s in self.section_headers:
327 |          va = self.OptionalHeader.ImageBase + s.VirtualAddress
328 |          mr = self.raw[s.PointerToRawData:s.PointerToRawData+s.SizeOfRawData].ljust(s.VirtualSize, '\x00')
329 |          self.add_mapped(va, va + max(s.VirtualSize, s.SizeOfRawData), s.perms, mr)
330 |          self.add_section(s.Name, va, va + s.VirtualSize, s.perms, s.SizeOfRawData)
331 | 
332 |    def parse_imports(self):
333 |       va,size = self.OptionalHeader.get_directory(DATA_DIRECTORY_IMPORT)
334 |       if size is not None:
335 |          while True:
336 |             id = ImportDirectory(self, va)
337 |             if id.is_null_dir():
338 |                break
339 |             id.parse()
340 |             va += 20
341 | 
342 |    def parse_symbols(self):
343 |       pass
344 | 
345 |    def parse_exports(self):
346 |       self.add_export(self.start, "_start")
347 |       va,size = self.OptionalHeader.get_directory(DATA_DIRECTORY_EXPORT)
348 |       if size is not None:
349 |          exp = ExportDirectory(self, va, size)
350 |          exp.parse()
351 | 
352 |    def load(self):
353 |       if self.is_valid():
354 |          self.image_base = self.OptionalHeader.ImageBase
355 |          self.start = self.OptionalHeader.AddressOfEntryPoint + self.image_base
356 |          self.load_sections()
357 |          self.parse_imports()
358 |          self.parse_exports()
359 |          return True
360 |       return False
361 | 
362 | class Pe32(PeBase):
363 | 
364 |    def __init__(self, pe_file):
365 |       PeBase.__init__(self, pe_file)
366 | 
367 |    # override to perform file type validation checks such
368 |    # as checking magic numbers, etc
369 |    def is_valid(self):
370 |       try:
371 |          if not PeBase.is_valid(self):
372 |             return False
373 |          #now do PE32 specific checks
374 |          self.OptionalHeader = OptionalHeader32(self.raw, self.pe_offset + 24)
375 |          if self.OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR32_MAGIC:
376 |             return False
377 |          self.set_pointer_size(4)
378 |       except Exception as e:
379 |          #any exception means it's not a PE32
380 |          raise e
381 | #         return False
382 |       return True
383 | 
384 | class Pe64(PeBase):
385 | 
386 |    def __init__(self, pe_file):
387 |       PeBase.__init__(self, pe_file)
388 | 
389 |    # override to perform file type validation checks such
390 |    # as checking magic numbers, etc
391 |    def is_valid(self):
392 |       try:
393 |          if not PeBase.is_valid(self):
394 |             return False
395 |          #now do PE64 specific checks
396 |          self.OptionalHeader = OptionalHeader64(self.raw, self.pe_offset + 24)
397 |          if self.OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC:
398 |             return False
399 |          self.set_pointer_size(8)
400 |       except Exception as e:
401 |          #any exception means it's not a PE32
402 |          raise e
403 | #         return False
404 |       return True
405 | 


--------------------------------------------------------------------------------
/x86_disasm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | An x86/X64 disassembly module for a stand-alone binnavi compatible disassembler
  5 | '''
  6 | 
  7 | __author__ = "Chris Eagle"
  8 | __copyright__ = "Copyright 2015, Chris Eagle"
  9 | __credits__ = ["Chris Eagle"]
 10 | __license__ = "GPL"
 11 | __version__ = "2.0"
 12 | __maintainer__ = "Chris Eagle"
 13 | __email__ = "cseagle@gmail.com"
 14 | __status__ = "Use at your own risk"
 15 | 
 16 | import sys
 17 | import struct
 18 | import hashlib
 19 | import os
 20 | import binascii
 21 | import psycopg2
 22 | import capstone
 23 | import capstone.x86_const
 24 | import pe_loader
 25 | import elf_loader
 26 | import binnavi_db
 27 | from bn_disasm import *
 28 | 
 29 | '''
 30 | TODO
 31 |    resolve switch table jumps
 32 |    find virtual functions
 33 |    locate unfound instruction bytes
 34 |    identify thunk functions and what they thunk to
 35 |    figure out how expression_substitutions work
 36 |    figure out when 'symbol' field in expression_nodes is used
 37 |    get address_references working
 38 |    build stack_frames
 39 | '''
 40 | 
 41 | def signed_byte(b):
 42 |    if b & 0x80:
 43 |       return -(256 - b)
 44 |    return b
 45 | 
 46 | def signed_dword(d):
 47 |    val = unsigned_dword(d)
 48 |    if val & 0x80000000:
 49 |       return -(0x100000000 - val)
 50 |    return val
 51 | 
 52 | def unsigned_dword(d):
 53 |    val = 0
 54 |    shift = 0
 55 |    for b in d:
 56 |       val = val + (b << shift)
 57 |       shift += 8
 58 |    return val
 59 | 
 60 | def tostr(inst):
 61 |    res = ''
 62 |    for b in inst.bytes:
 63 |       res += chr(b)
 64 |    return binascii.hexlify(res)
 65 | 
 66 | class x86_disasm(Disassembly):
 67 | 
 68 |    def __init__(self, loader):
 69 |       Disassembly.__init__(self, loader)
 70 | 
 71 |       self.dis = capstone.Cs(loader.arch, loader.mode)
 72 |       self.dis.detail = True
 73 | 
 74 |       for addr,name in self.loader.exports_by_addr.iteritems():
 75 |          self.names[addr] = name
 76 |          if self.is_possible_code(addr):
 77 |             self.locs.append(addr)
 78 |             self.call_targets.add(addr)
 79 |             if addr != 0:
 80 |                #treat address zero differently, don't add xrefs to it
 81 |                self.add_basic_block_start(addr)
 82 | 
 83 |       if loader.mode == capstone.CS_MODE_32 and loader.arch == capstone.CS_ARCH_X86:
 84 |          self.func_sigs.append("\x8b\xff\x55\x8b\xec")
 85 |          self.func_sigs.append("\x55\x8b\xec")
 86 | 
 87 |    def get_dword(self, addr):
 88 |       return self.loader.get_dword(addr)
 89 | 
 90 |    def get_qword(self, addr):
 91 |       return self.loader.get_qword(addr)
 92 | 
 93 |    def get_pointer(self, addr):
 94 |       return self.loader.get_pointer(addr)
 95 | 
 96 |    def scan_data(self):
 97 |       for s in self.loader.sections:
 98 |          if s.name == ".rdata":
 99 |             ptr_mask = ~(self.loader.sizeof_ptr - 1)
100 |             for addr in range(s.start, s.end & ptr_mask, self.loader.sizeof_ptr):
101 |                val = self.get_pointer(addr)
102 |                if val is None:
103 |                   break
104 |                if self.is_possible_code(val) and val not in self.visited:
105 |                   self.locs.append(val)
106 | 
107 |    def get_op_name(self, addr, default):
108 |       if addr in self.xrefs_from:
109 |          refs = self.xrefs_from[addr]
110 |          for r in refs:
111 |             if r[1] != XR_FLOW:   # this is a jump or call
112 |                if r[0] in self.loader.imports_by_addr:
113 |                   return self.loader.imports_by_addr[r[0]]
114 |                return self.names[r[0]]
115 |       return default
116 | 
117 |    def resolve_thunk(self, addr):
118 |       if addr in self.loader.imports_by_addr:
119 |          return self.loader.imports_by_addr[addr]
120 |       return ''
121 | 
122 |    def add_xref(self, frm, to, xr_type=XR_FLOW):
123 |       if frm not in self.xrefs_from:
124 |          self.xrefs_from[frm] = []
125 |       self.xrefs_from[frm].append((to, xr_type))
126 |       from_list = self.xrefs_from[frm]
127 |       if len(from_list) >= 2 and not self.is_returning_call(from_list):
128 |          # add all destinations to basic_blocks
129 |          for xr in from_list:
130 |             if xr[0] != 0:
131 |                #treat address zero differently, don't add xrefs to it
132 |                self.add_basic_block_start(xr[0])
133 |       
134 |       if to not in self.xrefs_to:
135 |          self.xrefs_to[to] = []
136 |       self.xrefs_to[to].append((frm, xr_type))
137 |       to_list = self.xrefs_to[to]
138 |       if to not in self.names:
139 |          if xr_type == XR_CALL:
140 |             self.names[to] = 'sub_%x' % to
141 |          elif xr_type >= XR_JUMP:  # JUMP or JCC
142 |             self.names[to] = 'loc_%x' % to
143 |          self.add_loc(to)
144 |       elif xr_type == XR_CALL and self.names[to] == ('loc_%x' % to):
145 |          #update loc_ label to sub_ label now that a call was found 
146 |          self.names[to] = 'sub_%x' % to
147 |       if to not in self.basic_blocks:
148 |          if xr_type == XR_CALL or len(to_list) > 1:
149 |             if to != 0:
150 |                #treat address zero differently, don't add xrefs to it
151 |                self.add_basic_block_start(to)
152 | 
153 |    #add an address we need to explore
154 |    def add_loc(self, addr):
155 |       if addr in self.visited:
156 |          return
157 |       self.locs.append(addr)
158 | 
159 |    def is_conditional(self, i):
160 |       op = i.bytes[0]
161 |       if (op >= 0x70 and op <= 0x7f) or (op >= 0xe0 and op <= 0xe3):
162 |          return True
163 |       elif op == 0x0f:
164 |          op2 = i.bytes[1]
165 |          if op2 >= 0x80 and op2 <= 0x8f:
166 |             return True
167 |       return False
168 | 
169 |    def process_jump(self, i):
170 |       opcode = i.bytes[0]
171 |       offset = signed_byte(i.bytes[1])
172 |       short_tgt = i.address + i.size + offset
173 |       if opcode == 0xeb:   # jmp disp8
174 |          self.add_xref(i.address, short_tgt, XR_JUMP)
175 |          self.jmp_targets.add(short_tgt)
176 |          return True
177 |       if opcode == 0xe9:   # jmp disp32
178 |          offset = signed_dword(i.bytes[1:5])
179 |          tgt = i.address + i.size + offset
180 |          self.add_xref(i.address, tgt, XR_JUMP)
181 |          self.jmp_targets.add(tgt)
182 |          return True
183 |       if (opcode >= 0x70 and opcode <= 0x7f) or opcode == 0xe3: # jcc jecx disp8
184 |          self.add_xref(i.address, short_tgt, XR_JCC)
185 |          self.jmp_targets.add(short_tgt)
186 |          return False
187 |       elif opcode == 0x0f:  # jcc disp32
188 |          op2 = i.bytes[1]
189 |          if op2 >= 0x80 and op2 <= 0x8f:
190 |             offset = signed_dword(i.bytes[2:6])
191 |             tgt = i.address + i.size + offset
192 |             self.add_xref(i.address, tgt, XR_JCC)
193 |             self.jmp_targets.add(tgt)
194 |             return False
195 | #         else:
196 | #            sys.stderr.write("Classified jump (0x0f), not categorized at address 0x%x: %s\n" % (i.address, tostr(i)))
197 |       elif opcode == 0xff:
198 |          modrm = i.modrm   # i.bytes[1]
199 |          if modrm == 0x25: #near jump [disp]
200 |             slot = unsigned_dword(i.bytes[2:6])
201 |             if i.address in self.loader.imports_by_addr: #this is a thunk  DO BETTER HERE
202 |                self.thunks.add(i.address)
203 |                self.names[i.address] = self.loader.imports_by_addr[i.address]
204 | #         else:
205 | #            sys.stderr.write("Classified jump (0xff), not categorized at address 0x%x: %s\n" % (i.address, tostr(i)))
206 |          return True
207 | #      else:
208 | #         sys.stderr.write("Classified jump, not categorized at address 0x%x: %s\n" % (i.address, tostr(i)))
209 |       return True
210 | 
211 |    def process_call(self, i):
212 |       opcode = i.bytes[0]
213 |       if opcode == 0xe8:   # call disp32
214 |          offset = signed_dword(i.bytes[1:5])
215 |          tgt = i.address + i.size + offset
216 |          self.call_targets.add(tgt)
217 |          self.add_xref(i.address, tgt, XR_CALL)
218 |          #add a minimal stack frame for this function, it will have at least a
219 |          #return address
220 |          #self.add_type("__SF%x" % tgt, self.loader.sizeof_ptr, None, False, STRUCT)
221 |          return tgt in self.names and self.names[tgt] in self.loader.non_returning_funcs
222 |       elif opcode == 0xff:
223 |          modrm = i.modrm   # i.bytes[1]
224 |          if modrm == 0x15: #near call [disp]
225 |             slot = unsigned_dword(i.bytes[2:6])
226 |             #sometimes this will be an imported function other times not
227 |             #only xref that is really taking place here is a data reference
228 |             #self.add_xref(i.address, slot, XR_CALL)
229 | #         else:
230 | #            sys.stderr.write("Classified call (0xff), not categorized at address 0x%x: %s\n" % (i.address, tostr(i)))
231 | #      else:
232 | #         sys.stderr.write("Classified call, not categorized at address 0x%x: %s\n" % (i.address, tostr(i)))
233 |       #assume all calls return
234 |       return False
235 | 
236 |    def add_address_ref(self, inst, opnum, node_id, aref_addr, false_id):
237 |       is_jump = inst.group(capstone.CS_GRP_JUMP)
238 |       is_call = inst.group(capstone.CS_GRP_CALL)
239 |       if inst.operands[opnum].type == capstone.x86_const.X86_OP_IMM:
240 |          if is_jump:
241 |             if self.is_conditional(inst):
242 |                self.arefs.append(AddressRef(inst.address, opnum, node_id, aref_addr, CONDITION_TRUE))
243 |                self.arefs.append(AddressRef(inst.address, opnum, false_id, inst.address + inst.size, CONDITION_FALSE))
244 |             else:
245 |                self.arefs.append(AddressRef(inst.address, opnum, node_id, aref_addr, UNCONDITIONAL))
246 |          elif is_call:
247 |             self.arefs.append(AddressRef(inst.address, opnum, node_id, aref_addr, CALL_DIRECT))
248 |          else:
249 |             #raw data, aref_addr is an offset
250 |             pass
251 |       elif inst.operands[opnum].type == capstone.x86_const.X86_OP_MEM:
252 |          if is_jump:
253 |             dest = self.get_pointer(aref_addr)
254 |             #try to determine whether this is a switch table
255 |             if dest is not None and self.is_possible_code(dest):
256 |                self.arefs.append(AddressRef(inst.address, opnum, node_id, dest, UNCONDITIONAL))
257 |          elif is_call:
258 |             dest = self.get_pointer(aref_addr)
259 |             #try to determine whether this is a switch table
260 |             if dest is not None and self.is_possible_code(dest):
261 |                self.arefs.append(AddressRef(inst.address, opnum, node_id, dest, CALL_INDIRECT))
262 |          else:
263 |             #raw data, aref_addr is a pointer
264 |             #could check content at aref_addr to see if its a string
265 |             self.arefs.append(AddressRef(inst.address, opnum, node_id, aref_addr, DATA))
266 | 
267 |    #THIS IS HIGHLY ARCHITECTURE DEPENDENT
268 |    def process_operands(self, inst):
269 |       opnum = 0
270 |       #annotate the CsInsn with the operands we build here
271 |       op_exprs = []
272 |       for op in inst.operands:
273 |          add_aref = False
274 |          aref_addr = 0
275 |          aref_op = 0
276 |          aref_type = -1
277 |          op_size = 'b%d' % op.size
278 |          tree = []
279 |          tree.append(OpNode(SIZE_PREFIX, op_size))
280 |          if op.type == capstone.x86_const.X86_OP_REG:
281 |             reg = inst.reg_name(op.reg)
282 |             #operand expr is: op_size reg
283 |             tree.append(OpNode(REGISTER, reg))
284 |          elif op.type == capstone.x86_const.X86_OP_IMM:
285 |             imm = op.imm
286 |             #operand expr is: op_size imm
287 |             tree.append(OpNode(IMMEDIATE_INT, imm))
288 |             s = self.loader.get_section(imm)
289 |             if s is not None:
290 |                #immediate refers to a memory address
291 |                #let's add an AddressRef
292 |                add_aref = True
293 |                aref_op = 1
294 |                aref_addr = imm
295 |          elif op.type == capstone.x86_const.X86_OP_MEM:
296 |             if op.mem.segment == capstone.x86_const.X86_REG_INVALID:
297 |                op_seg = None
298 |             else:
299 |                op_seg = '%s:' % inst.reg_name(op.mem.segment)
300 |                tree.append(OpNode(OPERATOR + 10, op_seg))  # 10 = unary operator
301 | 
302 |             op_disp = op.mem.disp
303 |             tree.append(OpNode(DEREFERENCE, '['))
304 |             s = self.loader.get_section(op_disp)
305 |             if s is not None:
306 |                #immediate refers to a memory address
307 |                #let's add an AddressRef
308 |                add_aref = True
309 |                aref_addr = op_disp
310 | 
311 |             if op.mem.base != capstone.x86_const.X86_REG_INVALID:   #has a base reg
312 |                op_base = inst.reg_name(op.mem.base)
313 |                if op.mem.index != capstone.x86_const.X86_REG_INVALID:   #has an index reg
314 |                   op_scale = op.mem.scale
315 |                   op_index = inst.reg_name(op.mem.index)
316 |                   tree.append(OpNode(OPERATOR + 20, '+'))  # 20 = unary operator
317 |                   tree.append(OpNode(REGISTER, op_base))
318 |                   if op_scale == 1:
319 |                      if op_disp == 0:
320 |                         #operand expr is: op_size op_seg [ + op_base op_index
321 |                         tree.append(OpNode(REGISTER, op_index))
322 |                      else:
323 |                         #operand expr is: op_size op_seg [ + op_base + op_index op_disp
324 |                         tree.append(OpNode(OPERATOR + 20, '+'))  # 20 = unary operator
325 |                         tree.append(OpNode(REGISTER, op_index))
326 |                         aref_op = len(tree)
327 |                         tree.append(OpNode(IMMEDIATE_INT, op_disp))
328 |                   else:
329 |                      if op_disp == 0:
330 |                         #operand expr is: op_size op_seg [ + op_base * op_index op_scale
331 |                         tree.append(OpNode(OPERATOR + 20, '*'))  # 20 = unary operator
332 |                         tree.append(OpNode(REGISTER, op_index))
333 |                         tree.append(OpNode(IMMEDIATE_INT, op_scale))
334 |                      else:
335 |                         #operand expr is: op_size op_seg [ + op_base + * op_index op_scale op_disp
336 |                         tree.append(OpNode(OPERATOR + 20, '+'))  # 20 = unary operator
337 |                         tree.append(OpNode(OPERATOR + 20, '*'))  # 20 = unary operator
338 |                         tree.append(OpNode(REGISTER, op_index))
339 |                         tree.append(OpNode(IMMEDIATE_INT, op_scale))
340 |                         aref_op = len(tree)
341 |                         tree.append(OpNode(IMMEDIATE_INT, op_disp))
342 |                else:
343 |                   if op_disp == 0:
344 |                      #operand expr is: op_size op_seg [ op_base
345 |                      tree.append(OpNode(REGISTER, op_base))
346 |                   else:
347 |                      #operand expr is: op_size op_seg [ + op_base op_disp
348 |                      tree.append(OpNode(OPERATOR + 20, '+'))  # 20 = unary operator
349 |                      tree.append(OpNode(REGISTER, op_base))
350 |                      aref_op = len(tree)
351 |                      tree.append(OpNode(IMMEDIATE_INT, op_disp))
352 |             elif op.mem.index != capstone.x86_const.X86_REG_INVALID:   #has an index reg
353 |                op_scale = op.mem.scale
354 |                op_index = inst.reg_name(op.mem.index)
355 |                if op_scale == 1:
356 |                   if op_disp == 0:
357 |                      #operand expr is: op_size op_seg [ op_index
358 |                      tree.append(OpNode(REGISTER, op_index))
359 |                   else:
360 |                      #operand expr is: op_size op_seg [ + op_index op_disp
361 |                      tree.append(OpNode(OPERATOR + 20, '+'))  # 20 = unary operator
362 |                      tree.append(OpNode(REGISTER, op_index))
363 |                      aref_op = len(tree)
364 |                      tree.append(OpNode(IMMEDIATE_INT, op_disp))
365 |                else:
366 |                   if op_disp == 0:
367 |                      #operand expr is: op_size op_seg [ * op_index op_scale
368 |                      tree.append(OpNode(OPERATOR + 20, '*'))  # 20 = unary operator
369 |                      tree.append(OpNode(REGISTER, op_index))
370 |                      tree.append(OpNode(IMMEDIATE_INT, op_scale))
371 |                   else:
372 |                      #operand expr is: op_size op_seg [ + * op_index op_scale op_disp
373 |                      tree.append(OpNode(OPERATOR + 20, '+'))  # 20 = unary operator
374 |                      tree.append(OpNode(OPERATOR + 20, '*'))  # 20 = unary operator
375 |                      tree.append(OpNode(REGISTER, op_index))
376 |                      tree.append(OpNode(IMMEDIATE_INT, op_scale))
377 |                      aref_op = len(tree)
378 |                      tree.append(OpNode(IMMEDIATE_INT, op_disp))
379 |             else:  #must be [disp] only, mem with no registers
380 |                #operand expr is: op_size op_seg [ op_disp
381 |                aref_op = len(tree)
382 |                tree.append(OpNode(IMMEDIATE_INT, op_disp))
383 |          elif op.type == capstone.x86_const.X86_OP_FP:
384 |             sys.stderr.write("found an FP operand at 0x%x, op %d\n" % (inst.address, opnum))
385 |          else:
386 |             sys.stderr.write("Unknown operand at 0x%x, op %d\n" % (inst.address, opnum))
387 |          # store operand expression tree for inst.addr, opnum
388 |          if len(tree) > 0:
389 |             expr = self.add_expr_tree(tree)
390 |             if expr != 0:
391 |                op_exprs.append(expr)
392 |                if add_aref:
393 |                   self.add_address_ref(inst, opnum, tree[aref_op].node_id, aref_addr, tree[0].node_id)
394 |          opnum += 1
395 |       self.operands[inst.address] = op_exprs
396 | 
397 |    def scan_gap_data(self):
398 |       ptr_sz = self.loader.sizeof_ptr
399 | 
400 |       keylist = [a for a in self.visited]
401 |       keylist.sort()
402 |       last = None
403 |       count = 0
404 |       for a in keylist:
405 |          i = self.insts[a]
406 |          if last is not None and (last.address + last.size) != a:
407 |             gap_start = last.address + last.size
408 |             #round up to ptr aligned address
409 |             gap_start = (gap_start + ptr_sz - 1) & ~(ptr_sz - 1)
410 |             if gap_start >= a:
411 |                continue
412 |             for addr in range(gap_start, a, ptr_sz):
413 |                val = self.get_pointer(addr)
414 |                if val is None:
415 |                   break
416 |                if self.is_possible_code(val) and val not in self.visited:
417 |                   self.locs.append(val)
418 |                   #print "Adding text ptr 0x%x" % val
419 |                   count += 1
420 |          last = i
421 |       #print "Gap data analysis added %d new locations" % count
422 | 
423 | 
424 | def main(exe_file):
425 |    ldr = pe_loader.Pe32(exe_file)
426 |    if not ldr.load():
427 |       del ldr
428 |       ldr = pe_loader.Pe64(exe_file)
429 |       if not ldr.load():
430 |          del ldr
431 |          ldr = elf_loader.Elf32(exe_file)
432 |          if not ldr.load():
433 |             del ldr
434 |             ldr = elf_loader.Elf64(exe_file)
435 |             if not ldr.load():
436 |                del ldr
437 |                print "Failed to recognize input file type"
438 |                return
439 | 
440 |    dis = x86_disasm(ldr)
441 |    print "starting with %d initial locations" % len(dis.locs)
442 |    dis.generate_data()
443 | 
444 |    print "found %d instructions" % len(dis.visited)
445 | 
446 |    '''
447 |    print "Functions identified at:"
448 |    dis.call_targets.sort()
449 |    for c in dis.call_targets:
450 |       print "   0x%x" % c
451 |    '''
452 | 
453 |    dis.print_disassembly()
454 | 
455 | if __name__ == "__main__":
456 |    main(sys.argv[1])
457 | 


--------------------------------------------------------------------------------