├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── addnop.py
├── bin_write.py
├── brute_force_disassembler.py
├── brute_force_mapper.py
├── context.py
├── disassembler.py
├── icount.py
├── mapper.py
├── msearch.py
├── multiverse.py
├── parse_popgm.sh
├── rewrite.py
├── runtime.py
├── simplest.c
├── translator.py
├── x64_assembler.py
├── x64_populate_gm.c
├── x64_runtime.py
├── x64_translator.py
├── x86_assembler.py
├── x86_populate_gm.c
├── x86_runtime.py
└── x86_translator.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *
 2 | !*/
 3 | !*.*
 4 | *~
 5 | nolibc
 6 | teeny
 7 | *.o
 8 | .*
 9 | *.pyc
10 | peda-session-*
11 | uncached.txt
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: popgm simplest
 3 | 
 4 | popgm: 
 5 | 	gcc -o x86_populate_gm -m32 -Wall -nostdlib -fno-toplevel-reorder -masm=intel -O1 x86_populate_gm.c
 6 | 	gcc -o x64_populate_gm -m64 -Wall -nostdlib -fno-toplevel-reorder -masm=intel -O1 x64_populate_gm.c
 7 | 	bash parse_popgm.sh
 8 | 
 9 | simplest:
10 | 	gcc -o simplest64 -m64 -O1 simplest.c
11 | 	gcc -o simplest32 -m32 simplest.c 
12 | 
13 | clean:
14 | 	rm -f x86_populate_gm x64_populate_gm x86_popgm x64_popgm simplest64 simplest32 simplest64-r simplest32-r
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multiverse
 2 | 
 3 | *Multiverse* is a static binary rewriter with an emphasis on simplicity and correctness.  It does not rely on heuristics to perform its rewriting, and it attempts to make as few assumptions as possible to produce a rewritten binary.  Details about Multiverse can be found in the paper "Superset Disassembly: Statically Rewriting x86 Binaries Without Heuristics."
 4 | 
 5 | Multiverse currently supports 32-bit and 64-bit x86 binaries.
 6 | 
 7 | ## Requirements
 8 | 
 9 | Multiverse requires the following Python libraries:
10 | * capstone (linear disassembler) (we use a slightly modified version that is needed to rewrite 64-bit binaries.  Our modified version can be found [here](https://github.com/baumane/capstone))
11 | * pwntools (for its assembler bindings)
12 | * pyelftools (for reading elf binaries)
13 | * elfmanip (for modifying elf binaries) (can be found [here](https://github.com/schieb/ELFManip))
14 | 
15 | ## Compiling
16 | 
17 | Multiverse is written in Python, but its code to generate a binary's global mapping is written in C.  This must be compiled before binaries can be rewritten.  To do so, run `make` and the global mapping code will be compiled.
18 | 
19 | ## Running
20 | 
21 | Multiverse can be run directly, but this will only rewrite binaries with no instrumentation.  This can be used to make sure that everything is installed correctly or to debug changes to the rewriter.  Running `multiverse.py` on a binary will rewrite it.  It can be run like this: `./multiverse.py [options] <filename>`.  There are several flags that can be passed to Multiverse to control how a binary is rewritten:
22 | * --so to rewrite a shared object
23 | * --execonly to rewrite only a main binary (it will use the original, unmodified libraries)
24 | * --nopic to write a binary without support for arbitrary position-independent code.  It still supports common compiler-generated pic, but not arbitrary accesses to the program counter.  This is not currently recommended for 64-bit binaries.
25 | * --arch to select the architecture of the binary.  Current supported architectures are `x86` and `x86-64`.  The default is `x86`.
26 | 
27 | Rewritten binaries are named as the original filename with "-r" appended (e.g. `simplest64` becomes `simplest64-r`).
28 | 
29 | Rewritten binaries *must* be run with the `LD_BIND_NOW` environment variable set to 1.  This prevents control from flowing to the dynamic linker at runtime.  Since we do not rewrite the dynamic linker, this is necessary for correct execution (e.g. to run `simplest-r`, type `LD_BIND_NOW=1 ./simplest-r`).
30 | 
31 | A very simple example program is provided (`simplest.c`), which is automatically compiled when building Multiverse's global mapping code.  This can be used to test that Multiverse is installed correctly.  For example, to rewrite only the main executable for `simplest64`, the 64-bit version of `simplest`, type `./multiverse.py --execonly --arch x86-64 simplest64` and then run it with `LD_BIND_NOW=1 ./simplest64-r`.
32 | 
33 | `rewrite.py` is a utility script to rewrite a binary and its libraries, so that `multiverse.py` does not have to be run manually for each library, and it automatically creates a directory for the rewritten libraries, plus a shell script to run the rewritten binary.  For simplicity when rewriting binaries, we recommend using this script.  For example, to rewrite `simplest64`, type `./rewrite.py -64 simplest64`, and the script will rewrite the main binary and all its required libraries (as long as they are not dynamically loaded via a mechanism such as `dlopen`; since statically determining dynamically loaded libraries is difficult, they must be manually extracted and their paths be placed in `<filename>-dynamic-libs.txt`, and then `rewrite.py` will rewrite them).  This may take several minutes.  When it is complete, run the rewritten binary with `bash simplest64-r.sh`.
34 | 
35 | ## Instrumentation
36 | 
37 | Multiverse is used as a Python library to instrument binaries.  Right now, the instrumentation API is very simple and consists only of the function `set_before_inst_callback`, which takes a function that is called for every instruction that is encountered and will insert whichever bytes the callback function returns before the corresponding instruction.  The callback function should accept a single argument: an instruction object, as created by the Capstone disassembler.  It should return a byte array containing the assembled instructions to be inserted.
38 | 
39 | In order to use multiverse, a script should import the Rewriter object (`from multiverse import Rewriter`) and then create an instance of Rewriter.  Its constructor takes three boolean arguments:
40 | * `write_so` to rewrite a shared object
41 | * `exec_only` to rewrite only a main binary (it will use the original, unmodified libraries)
42 | * `no_pic` to write a binary without support for arbitrary position-independent code.  It still supports common compiler-generated pic, but not arbitrary accesses to the program counter.  This is not currently recommended for 64-bit binaries.
43 | 
44 | `exec_only` and `no_pic` are performance optimizations that will not work on all binaries.  For a main executable, `write_so` should be False, and for shared objects, `write_so` should be True.  If `exec_only` is False, then all shared objects used by the binary must be rewritten.
45 | 
46 | Two simple instrumentation examples can be found in `icount.py` (insert code to increment a counter before every instruction) and `addnop.py` (insert a nop before every instruction).  These are currently configured to instrument only the main executable of 64-bit binaries.  For example, to insert nops into `simplest64`, type `python addnop.py simplest64`, and to run the instrumented binary, type `LD_BIND_NOW=1 ./simplest64-r`.
47 | 
48 | We are working on a higher-level API that will allow code written in C to be seamlessly called at instrumentation points, but it is not yet available.
49 | 
50 | ## Citing
51 | 
52 | If you create a research work that uses Multiverse, please cite the associated paper:
53 | 
54 | ```
55 | @inproceedings{Multiverse:NDSS18,
56 |   author    = {Erick Bauman and Zhiqiang Lin and Kevin Hamlen},
57 |   title     = {Superset Disassembly: Statically Rewriting x86 Binaries Without Heuristics},
58 |   booktitle = {Proceedings of the 25th Annual Network and Distributed System Security Symposium (NDSS'18)},
59 |   address   = {San Diego, CA},
60 |   month     = {February},
61 |   year      = 2018,
62 | }
63 | ```
64 | 


--------------------------------------------------------------------------------
/addnop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | from elftools.elf.elffile import ELFFile
 5 | from multiverse import Rewriter
 6 | from x64_assembler import _asm
 7 | 
 8 | def count_instruction(inst):
 9 |   template = '''
10 |   nop
11 |   '''
12 |   inc = template
13 |   return _asm( inc )
14 | 
15 | if __name__ == '__main__':
16 |   if len(sys.argv) == 2:
17 |     f = open(sys.argv[1])
18 |     e = ELFFile(f)
19 |     entry_point = e.header.e_entry
20 |     f.close()
21 |     #write_so = False, exec_only = True, no_pic = True
22 |     rewriter = Rewriter(False,True,False)
23 |     rewriter.set_before_inst_callback(count_instruction)
24 |     rewriter.rewrite(sys.argv[1],'x86-64')
25 |   else:
26 |     print "Error: must pass executable filename.\nCorrect usage: %s <filename>"%sys.argv[0]
27 | 


--------------------------------------------------------------------------------
/bin_write.py:
--------------------------------------------------------------------------------
  1 | #import sys
  2 | #sys.path.insert(0,'/home/erick/git/delinker/Delinker/src')
  3 | from elfmanip.elfmanip import ELFManip, CustomSection, CustomSegment
  4 | from elfmanip.constants import PT_LOAD, SHF_TLS, PT_TLS
  5 | 
  6 | from elftools.elf.elffile import ELFFile
  7 | 
  8 | tls_section_added = False
  9 | tls_section_contents = b''
 10 | tls_section_offset = 0
 11 | 
 12 | def add_tls_section(fname,contents):
 13 |     # This does not require ELFManip because it must
 14 |     # be called earlier on, before we actually rewrite the
 15 |     # binary, because I need the new TLS offset.
 16 |     # We could obviously create the ELFManip object now, 
 17 |     # but it won't be used again until we write it out at
 18 |     # the end.
 19 |     global tls_section_added
 20 |     global tls_section_contents
 21 |     tls_section_added = True
 22 |     #Pad contents to 4-byte alignment
 23 |     tls_section_contents = contents+('\0'*(4-len(contents)%4))
 24 |     with open(fname) as f:
 25 |         elf = ELFFile(f)
 26 |        	for s in elf.iter_segments():
 27 |             #Assume only one TLS segment exists (will fail on an already modified binary)
 28 |             if s.header['p_type'] == 'PT_TLS':
 29 |                 tls_section_offset = s.header['p_memsz']+len(tls_section_contents)
 30 |                 print 'old section is 0x%x (%x with padding)'%(s.header['p_memsz'], s.header['p_memsz']+(4-s.header['p_memsz']%4))
 31 |                 print 'new content is 0x%x (%x with padding)'%(len(contents), len(contents)+(4-len(contents)%4))
 32 |                 print 'overall        0x%x (%x with padding)'%(tls_section_offset, tls_section_offset+(4-tls_section_offset%4))
 33 |                 return tls_section_offset + (4-tls_section_offset%4)
 34 |     return len(contents) + (4-len(contents)%4) #If there is no TLS segment
 35 | 
 36 | def get_tls_content(elf):
 37 |     # For now assume that the TLS sections are adjacent and
 38 |     # we can append their contents directly
 39 |     # I also am assuming that there will probably be only
 40 |     # two sections, .tdata and .tbss, which seems likely.
 41 |     # This may work under different circumstances but it is
 42 |     # hard to predict.
 43 |     content = b''
 44 |     if tls_section_added:
 45 |         content+=tls_section_contents
 46 |     print 'length of new contents: 0x%x'%len(content)
 47 |     for entry in elf.shdrs['entries']:
 48 |         if (entry.sh_flags & SHF_TLS) == SHF_TLS:
 49 |             if entry.sh_type == SHT_NOBITS: # bss has no contents
 50 |                 content+='\0'*entry.sh_size # fill bss space with 0
 51 |                 print 'adding .tbss section of length: 0x%x'%entry.sh_size
 52 |             else:
 53 |                 content+=entry.contents
 54 |                 print 'adding .tdata section of length: 0x%x'%len(entry.contents)
 55 |     return content
 56 | 
 57 | def rewrite_noglobal(fname,nname,newcode,newbase,entry):
 58 |   elf = ELFManip(fname,num_adtl_segments=1)
 59 |   with open(newcode) as f:
 60 |     newbytes = f.read()
 61 |     elf.relocate_phdrs()
 62 |     newtext_section = CustomSection(newbytes, sh_addr = newbase)
 63 |     if newtext_section is None:
 64 |       raise Exception
 65 |     newtext_segment = CustomSegment(PT_LOAD)
 66 |     newtext_segment = elf.add_segment(newtext_segment)
 67 |     elf.add_section(newtext_section, newtext_segment)
 68 |     elf.set_entry_point(entry)
 69 |     elf.write_new_elf(nname)
 70 | 
 71 | def rewrite(fname,nname,newcode,newbase,newglobal,newglobalbase,entry,text_section_offs,text_section_size,num_new_segments,arch):
 72 |   #TODO: change rewrite to take the context instead, and just retrieve the data it needs from that.
 73 |   elf = ELFManip(fname,num_adtl_segments=num_new_segments)
 74 |   if text_section_size >= elf.ehdr['e_phentsize']*(elf.ehdr['e_phnum']+num_new_segments+1):
 75 |     num_new_segments += 1 # Add an extra segment for the overwritten contents of the text section
 76 |   newtls = get_tls_content(elf) #Right now there will ALWAYS be a new TLS section
 77 |   with open(newcode) as f:
 78 |     newbytes = f.read()
 79 |     # IF the text section is large enough to hold the phdrs (true for a nontrivial program)
 80 |     # AND the architecture is x86-64, because I have not written 32-bit code to restore the text section yet
 81 |     # TODO: add support to 32-bit rewriter to use .text section for phdrs
 82 |     if arch == 'x86-64' and text_section_size >= elf.ehdr['e_phentsize']*(elf.ehdr['e_phnum']+num_new_segments):
 83 |       # Place the phdrs at the start of the (original) text section, overwriting the contents
 84 |       print 'placing phdrs in .text section, overwriting contents until runtime'
 85 |       #print 'BUT for now, still do it the original way so we can do a quick test...'
 86 |       #elf.relocate_phdrs()
 87 |       elf.relocate_phdrs(custom_offset=text_section_offs,new_size=elf.ehdr['e_phentsize']*(elf.ehdr['e_phnum']+num_new_segments))
 88 |       # Assume that the phdrs won't be larger than a page, and just copy that entire first page of the text section.
 89 |       duptext_section = CustomSection(elf.elf.get_section_by_name('.text').data()[:4096], sh_addr = newglobalbase-0x20000) #TODO: make this address flexible
 90 |       duptext_segment = CustomSegment(PT_LOAD)
 91 |       duptext_segment = elf.add_segment(duptext_segment)
 92 |       elf.add_section(duptext_section, duptext_segment)
 93 |     else:
 94 |       # Use the previous heuristics to relocate the phdrs and hope for the best
 95 |       print '.text section too small to hold phdrs (or 32-bit binary); using other heuristics to relocate phdrs'
 96 |       elf.relocate_phdrs()
 97 |     newtext_section = CustomSection(newbytes, sh_addr = newbase)
 98 |     newglobal_section = CustomSection(newglobal, sh_addr = newglobalbase)
 99 |     newtls_section = CustomSection(newtls, sh_addr = newglobalbase-0x10000) #TODO: make this address flexible
100 |     if newtext_section is None or newglobal_section is None:
101 |       raise Exception
102 |     newtext_segment = CustomSegment(PT_LOAD)
103 |     newtext_segment = elf.add_segment(newtext_segment)
104 |     newglobal_segment = CustomSegment(PT_LOAD)
105 |     newglobal_segment = elf.add_segment(newglobal_segment)
106 |     elf.add_section(newtext_section, newtext_segment)
107 |     elf.add_section(newglobal_section, newglobal_segment)
108 |     
109 |     newtls_segment = CustomSegment(PT_LOAD)
110 |     newtls_segment = elf.add_segment(newtls_segment)
111 |     elf.add_section(newtls_section, newtls_segment)
112 |     newtls_segment = CustomSegment(PT_TLS, p_align=4)
113 |     newtls_segment = elf.add_segment(newtls_segment)
114 |     elf.add_section(newtls_section, newtls_segment)
115 | 
116 |     elf.set_entry_point(entry)
117 |     elf.write_new_elf(nname)
118 | 
119 | if __name__ == '__main__':
120 |   if len(sys.argv) != 2:
121 |     print "needs filename"
122 | 
123 |   fn = sys.argv[1]
124 | 
125 |   elf = ELFManip(fn)
126 | 
127 |   newcode = 'newbytes'
128 | 
129 |   elf.add_section(newcode, sh_addr = 0x09000000)
130 |   #elf.set_entry_point(0x09000200) #teeny
131 |   #elf.set_entry_point(0x09000854) #simplest main
132 |   #elf.set_entry_point(0x09000230) #eip
133 |   #elf.set_entry_point(0x09000228) #mem
134 |   #elf.set_entry_point(0x09002278) #64-bit echo (which therefore wouldn't work regardless)
135 |   #elf.set_entry_point(0x09000765) #simplest (_init at 0xc78)
136 |   #elf.set_entry_point(0x0900026c) #lookup
137 |   #(0x8048cf0 - 0x8048000)+0x59838 = 0x5a428 (lookup index)
138 |   #elf.set_entry_point(0x09001ce8) #bzip2
139 |   elf.set_entry_point(0x090013ef) #ssimplest
140 | 
141 |   elf.write_new_elf('relocated')
142 | 
143 | 


--------------------------------------------------------------------------------
/brute_force_disassembler.py:
--------------------------------------------------------------------------------
 1 | import capstone
 2 | from disassembler import Disassembler
 3 | 
 4 | class BruteForceDisassembler(Disassembler):
 5 |   ''' Brute-force disassembler that disassembles bytes
 6 |       from every offset; all possible code that could 
 7 |       execute is disassembled.  Overlapping instructions are
 8 |       flattened out and duplicate sequences are connected
 9 |       with jump instructions.
10 | 
11 |       Uses Capstone as its underlying linear disassembler.'''
12 | 
13 |   def __init__(self,arch):
14 |     if arch == 'x86':
15 |       self.md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32)
16 |     elif arch == 'x86-64':
17 |       self.md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
18 |     else:
19 |       raise NotImplementedError( 'Architecture %s is not supported'%arch )
20 |     self.md.detail = True
21 | 
22 |   def disasm(self,bytes,base):
23 |     print 'Starting disassembly...'
24 |     dummymap = {}
25 |     ten_percent = len(bytes)/10
26 |     for instoff in range(0,len(bytes)):
27 |       if instoff%ten_percent == 0:
28 |         print 'Disassembly %d%% complete...'%((instoff/ten_percent)*10)
29 |       while instoff < len(bytes):
30 |         off = base+instoff
31 |         try:
32 |           if not off in dummymap: #If this offset has not been disassembled
33 |             insts = self.md.disasm(bytes[instoff:instoff+15],base+instoff)#longest x86/x64 instr is 15 bytes
34 |             ins = insts.next() #May raise StopIteration
35 |             instoff+=len(ins.bytes)
36 |             dummymap[ins.address] = True # Show that we have disassembled this address
37 |             yield ins
38 |           else: #If this offset has already been disassembled
39 |             yield None #Indicates we encountered this offset before
40 |             break #Stop disassembling from this starting offset
41 |         except StopIteration: #Not a valid instruction
42 |           break #Stop disassembling from this starting offset
43 |     raise StopIteration
44 | 
45 | 


--------------------------------------------------------------------------------
/brute_force_mapper.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | from mapper import Mapper
  3 | from brute_force_disassembler import BruteForceDisassembler
  4 | 
  5 | class BruteForceMapper(Mapper):
  6 |   ''' This mapper disassembled from every offset and includes a
  7 |       mapping for instructions at every byte offset in the code.
  8 |       To avoid duplicate code, when the disassembler encounters instructions
  9 |       it has encountered before, the mapper simply includes a jump instruction
 10 |       to link the current sequence to a previously mapped sequence.'''
 11 |   
 12 |   def __init__(self,arch,bytes,base,entry,context):
 13 |     self.disassembler = BruteForceDisassembler(arch)
 14 |     self.bytes = bytes
 15 |     self.base = base
 16 |     self.entry = entry
 17 |     self.context = context
 18 |     if arch == 'x86':
 19 |       #NOTE: We are currently NOT supporting instrumentation because we are passing
 20 |       #None to the translator.  TODO: Add back instrumentation after everything gets
 21 |       #working again, and make instrumentation feel more organized
 22 |       from x86_translator import X86Translator
 23 |       from x86_runtime import X86Runtime
 24 |       self.translator = X86Translator(context.before_inst_callback,self.context)
 25 |       self.runtime = X86Runtime(self.context)
 26 |       global assembler
 27 |       import x86_assembler as assembler
 28 |     elif arch == 'x86-64':
 29 |       from x64_translator import X64Translator
 30 |       from x64_runtime import X64Runtime
 31 |       self.translator = X64Translator(context.before_inst_callback,self.context)
 32 |       self.runtime = X64Runtime(self.context)
 33 |       global assembler
 34 |       import x64_assembler as assembler
 35 |     else:
 36 |       raise NotImplementedError( 'Architecture %s is not supported'%arch )
 37 | 
 38 |   def gen_mapping(self):
 39 |     print 'Generating mapping...'
 40 |     mapping = {}
 41 |     maplist = []
 42 |     currmap = {}
 43 |     last = None #Last instruction disassembled
 44 |     reroute = assembler.asm('jmp $+0x8f') #Dummy jmp to imitate connecting jmp; we may not know dest yet
 45 |     for ins in self.disassembler.disasm(self.bytes,self.base):
 46 |       if ins is None and last is not None: # Encountered a previously disassembled instruction and have not redirected
 47 |         currmap[last.address] += len(reroute)
 48 |         last = None #If we have not found any more new instructions since our last redirect, don't redirect again
 49 |         maplist.append(currmap)
 50 |         currmap = {}
 51 |       elif ins is not None:
 52 |         last = ins #Remember the last disassembled instruction
 53 |         newins = self.translator.translate_one(ins,None) #In this pass, the mapping is incomplete
 54 |         if newins is not None:
 55 |           currmap[ins.address] = len(newins)
 56 |         else:
 57 |           currmap[ins.address] = len(ins.bytes)
 58 |     self.context.lookup_function_offset = 0 #Place lookup function at start of new text section
 59 |     lookup_size = len(self.runtime.get_lookup_code(self.base,len(self.bytes),0,0x8f)) #TODO: Issue with mapping offset & size
 60 |     offset = lookup_size
 61 |     if self.context.exec_only:
 62 |       self.context.secondary_lookup_function_offset = offset
 63 |       secondary_lookup_size = len(self.runtime.get_secondary_lookup_code(self.base,len(self.bytes),offset,0x8f))
 64 |       offset += secondary_lookup_size
 65 |     for m in maplist:
 66 |       for k in sorted(m.keys()):
 67 |         size = m[k]
 68 |         mapping[k] = offset
 69 |         offset+=size #Add the size of this instruction to the total offset
 70 |     #Now that the mapping is complete, we know the length of it
 71 |     self.context.mapping_offset = len(self.bytes)+self.base #Where we pretend the mapping was in the old code
 72 |     if not self.context.write_so:
 73 |       self.context.new_entry_off = offset #Set entry point to start of auxvec
 74 |       offset+=len(self.runtime.get_auxvec_code(0x8f))#Unknown entry addr here, but not needed b/c we just need len
 75 |     mapping[self.context.lookup_function_offset] = self.context.lookup_function_offset
 76 |     if self.context.exec_only:
 77 |       #This is a very low number and therefore will not be written out into the final mapping.
 78 |       #It is used to convey this offset for the second phase when generating code, specifically
 79 |       #for the use of remap_target.  Without setting this it always sets the target to 0x8f. Sigh.
 80 |       mapping[self.context.secondary_lookup_function_offset] = self.context.secondary_lookup_function_offset
 81 |     #Don't yet know mapping offset; we must compute it
 82 |     mapping[len(self.bytes)+self.base] = offset
 83 |     print 'final offset for mapping is: 0x%x' % offset
 84 |     if not self.context.write_so:
 85 |       #For NOW, place the global data/function at the end of this because we can't necessarily fit
 86 |       #another section.  TODO: put this somewhere else
 87 |       #The first time, sysinfo's and flag's location is unknown,
 88 |       #so they are wrong in the first call to get_global_lookup_code
 89 |       #However, the global_flag is moving to a TLS section, so it takes
 90 |       #up no space in the global lookup
 91 |       #global_flag = global_lookup + len(get_global_lookup_code())
 92 |       #popgm goes directly after the global lookup, and global_sysinfo directly after that.
 93 |       self.context.popgm_offset = len(self.runtime.get_global_lookup_code())
 94 |       self.context.global_sysinfo = self.context.global_lookup + self.context.popgm_offset + len(self.runtime.get_popgm_code())
 95 |       #Now that this is set, the auxvec code should work
 96 |     return mapping
 97 | 
 98 |   def gen_newcode(self,mapping):
 99 |     print 'Generating new code...'
100 |     newbytes = ''
101 |     bytemap = {}
102 |     maplist = []
103 |     last = None #Last instruction disassembled
104 |     for ins in self.disassembler.disasm(self.bytes,self.base):
105 |       if ins is None and last is not None: # Encountered a previously disassembled instruction and have not redirected
106 |         target = last.address + len(last.bytes) #address of where in the original code we would want to jmp to
107 |         next_target = self.translator.remap_target(last.address, mapping, target, len(bytemap[last.address]) )
108 |         reroute = assembler.asm( 'jmp $+%s'%(next_target) )
109 |         #Maximum relative displacement is 32 for x86 and x64, so this works for both platforms
110 |         if len(reroute) == 2: #Short encoding, which we do not want
111 |           reroute+='\x90\x90\x90' #Add padding of 3 NOPs
112 |         bytemap[last.address] += reroute
113 |         last = None
114 |         maplist.append(bytemap)
115 |         bytemap = {}
116 |       elif ins is not None:
117 |         last = ins
118 |         newins = self.translator.translate_one(ins,mapping) #In this pass, the mapping is incomplete
119 |         if newins is not None:
120 |           bytemap[ins.address] = newins #Old address maps to these new instructions
121 |         else:
122 |           bytemap[ins.address] = str(ins.bytes) #This instruction is unchanged, and its old address maps to it
123 |     #Add the lookup function as the first thing in the new text section
124 |     newbytes+=self.runtime.get_lookup_code(self.base,len(self.bytes),self.context.lookup_function_offset,mapping[self.context.mapping_offset])
125 |     if self.context.exec_only:
126 |       newbytes += self.runtime.get_secondary_lookup_code(self.base,len(self.bytes),self.context.secondary_lookup_function_offset,mapping[self.context.mapping_offset])
127 |     count = 0
128 |     for m in maplist:
129 |       for k in sorted(m.keys()): #For each original address to code, in order of original address
130 |         newbytes+=m[k]
131 |     if not self.context.write_so:
132 |       newbytes+=self.runtime.get_auxvec_code(mapping[self.entry])
133 |     print 'mapping is being placed at offset: 0x%x' % len(newbytes)
134 |     #Append mapping to end of bytes
135 |     newbytes+=self.write_mapping(mapping,self.base,len(self.bytes))
136 |     return newbytes
137 | 
138 |   def write_mapping(self,mapping,base,size):
139 |     bytes = b''
140 |     for addr in range(base,base+size):
141 |       if addr in mapping:
142 |         if addr < 10:
143 |           print 'offset for 0x%x: 0x%x' % (addr, mapping[addr])
144 |         bytes+=struct.pack('<I',mapping[addr]) #Write our offset in little endian
145 |       else:
146 |         #print 'No mapping for 0x%x'%addr
147 |         bytes+=struct.pack('<I',0xffffffff) #Write an invalid offset if not in mapping
148 |     print 'last address in mapping was 0x%x'%(base+size)
149 |     return bytes
150 | 


--------------------------------------------------------------------------------
/context.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Context(object):
 3 | 
 4 |   def __init__(self):
 5 |     self.plt = {}
 6 |     self.oldbase = 0x0
 7 |     self.newbase = 0x09000000
 8 |     self.lookup_function_offset = 0x8f
 9 |     self.secondary_lookup_function_offset = 0x8f #ONLY used when rewriting ONLY main executable
10 |     self.mapping_offset = 0x8f
11 |     self.global_sysinfo = 0x8f	#Address containing sysinfo's address
12 |     self.global_flag = 0x8f
13 |     self.global_lookup = 0x7000000	#Address containing global lookup function
14 |     self.popgm = 'popgm'
15 |     self.popgm_offset = 0x8f
16 |     self.new_entry_off = 0x8f
17 |     self.write_so = False
18 |     self.exec_only = False
19 |     self.no_pic = False
20 |     self.get_pc_thunk = 0 #Set to zero by default so that if it isn't in the binary, no instruction will match
21 |     self.num_new_segments = 4 # 4 new segments in the main binary
22 |     self.move_phdrs_to_text = False # Do not relocate phdrs to text section by default
23 |     self.stat = {}
24 |     self.stat['indcall'] = 0
25 |     self.stat['indjmp'] = 0
26 |     self.stat['dircall'] = 0
27 |     self.stat['dirjmp'] = 0
28 |     self.stat['jcc'] = 0
29 |     self.stat['ret'] = 0
30 |     self.stat['origtext'] = 0
31 |     self.stat['newtext'] = 0
32 |     self.stat['origfile'] = 0
33 |     self.stat['newfile'] = 0
34 |     self.stat['mapsize'] = 0
35 |     self.stat['lookupsize'] = 0
36 |     #List of library functions that have callback args; each function in the dict has a list of
37 |     #the arguments passed to it that are a callback (measured as the index of which argument it is)
38 |     self.callbacks = {'__libc_start_main':[0,3,4]}
39 |     self.before_inst_callback = (lambda x: None)
40 |     self.alloc_globals = 0
41 | 


--------------------------------------------------------------------------------
/disassembler.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Disassembler(object):
 3 |   ''' A disassembler takes a sequence of bytes and a base address,
 4 |       and iterates through all the instructions it disassembles.
 5 |       
 6 |       This is a generic Disassembler object.  All disassemblers
 7 |       used by this system should inherit from this parent
 8 |       object and provide implementations for all functions listed. '''
 9 |   def __init__(self,arch):
10 |     raise NotImplementedError('Override __init__() in a child class')
11 |   def disasm(self,bytes,base):
12 |     raise NotImplementedError('Override disasm() in a child class')
13 | 


--------------------------------------------------------------------------------
/icount.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | from elftools.elf.elffile import ELFFile
 5 | from multiverse import Rewriter
 6 | from x64_assembler import _asm
 7 | 
 8 | entry_point = 0
 9 | global_addr = 0
10 | 
11 | '''
12 |   This counts the number of instructions in a 32-bit binary with an 8-byte counter.
13 |   The counter is not printed at the end of execution, so a breakpoint must be set
14 |   with a debugger and the value of the counter must be manually verified.
15 |   While the assembly was originally written to work with 32-bit binaries, it also
16 |   works for 64-bit binaries, although it is less efficient than it needs to be.
17 |   Right now, this is configured to rewrite 64-bit binaries, although it can be
18 |   easily modified to rewrite 32-bit binaries.
19 | '''
20 | def count_instruction(inst): 
21 |   increment_template = '''
22 |   push ax
23 |   lahf
24 |   seto al
25 |   add DWORD PTR [0x%x],1
26 |   adc DWORD PTR [0x%x],0
27 |   cmp al,0x81
28 |   sahf
29 |   pop ax
30 |   '''
31 |   inc = increment_template%( global_addr, global_addr+4 )
32 |   return _asm( inc )
33 | 
34 | if __name__ == '__main__':
35 |   if len(sys.argv) == 2:
36 |     f = open(sys.argv[1])
37 |     e = ELFFile(f)
38 |     entry_point = e.header.e_entry
39 |     f.close()
40 |     rewriter = Rewriter(False,True,False)
41 |     global_addr = rewriter.alloc_globals(8,'x86-64') #8 bytes
42 |     rewriter.set_before_inst_callback(count_instruction)
43 |     rewriter.rewrite(sys.argv[1],'x86-64')
44 |   else:
45 |     print "Error: must pass executable filename.\nCorrect usage: %s <filename>"%sys.argv[0]
46 | 


--------------------------------------------------------------------------------
/mapper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Mapper(object):
 3 |   ''' A mapper maps old addresses to new addresses and old
 4 |       instructions to new instructions.
 5 | 
 6 |       This is a generic Mapper object.  All mappers
 7 |       used by this system should inherit from this parent
 8 |       object and provide implementations for all functions listed.'''
 9 |   
10 |   def __init__(self,arch,bytes,base,entry,context):
11 |     raise NotImplementedError('Override __init__() in a child class')
12 |   def gen_mapping(self):
13 |     raise NotImplementedError('Override gen_mapping() in a child class')
14 |   def gen_newcode(self):
15 |     raise NotImplementedError('Override gen_newcode() in a child class')
16 | 


--------------------------------------------------------------------------------
/msearch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import json,sys
 3 | 
 4 | def search(item):
 5 |   with open('mapdump.json','rb') as f:
 6 |     mapping = json.load(f)
 7 |     if str(item) in mapping:
 8 |       return '0x%x'%int(mapping[str(item)])
 9 |     else:
10 |       return 'not found'
11 | 
12 | def rsearch(item):
13 |   with open('mapdump.json','rb') as f:
14 |     mapping = json.load(f)
15 |     for key,value in mapping.iteritems():
16 |       if item == value:
17 |         return '0x%x'%int(key)
18 |     return 'not found'
19 | 
20 | if __name__ == '__main__':
21 |   if len(sys.argv) < 2 or len(sys.argv) > 3:
22 |     print "Correct usage: %s [-r] <address>"
23 |   if len(sys.argv) == 2:
24 |     print search(int(sys.argv[1],16))
25 |   if len(sys.argv) == 3 and sys.argv[1] == '-r':
26 |     print rsearch(int(sys.argv[2],16))
27 | 


--------------------------------------------------------------------------------
/multiverse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from elftools.elf.elffile import ELFFile
  3 | import capstone
  4 | import sys
  5 | #import cProfile
  6 | import x64_assembler
  7 | import bin_write
  8 | import json
  9 | import os
 10 | import re
 11 | 
 12 | from context import Context
 13 | from brute_force_mapper import BruteForceMapper
 14 | 
 15 | save_reg_template = '''
 16 | mov DWORD PTR [esp%s], %s
 17 | '''
 18 | restore_reg_template = '''
 19 | mov %s, DWORD PTR [esp%s]
 20 | '''
 21 | 
 22 | save_register = '''
 23 | mov %s, -12
 24 | mov %s, %s'''
 25 | 
 26 | memory_ref_string = re.compile(u'^dword ptr \[(?P<address>0x[0-9a-z]+)\]$')
 27 | 
 28 | '''
 29 | call X
 30 | '''#%('eax',cs_insn.reg_name(opnd.reg),'eax')
 31 | '''
 32 | class Context(object):
 33 |   def __init__():
 34 |     self'''
 35 | 
 36 | #Transforms the 'r_info' field in a relocation entry to the offset into another table
 37 | #determined by the host reloc table's 'sh_link' entry.  In our case it's the dynsym table.
 38 | def ELF32_R_SYM(val):
 39 |   return (val) >> 8
 40 | def ELF64_R_SYM(val):
 41 |   return (val) >> 32
 42 | 
 43 | #Globals: If there end up being too many of these, put them in a Context & pass them around
 44 | '''plt = {}
 45 | newbase = 0x09000000
 46 | #TODO: Set actual address of function
 47 | lookup_function_offset = 0x8f
 48 | secondary_lookup_function_offset = 0x8f #ONLY used when rewriting ONLY main executable
 49 | mapping_offset = 0x8f
 50 | global_sysinfo = 0x8f	#Address containing sysinfo's address
 51 | global_flag = 0x8f
 52 | global_lookup = 0x7000000	#Address containing global lookup function
 53 | popgm = 'popgm'
 54 | popgm_offset = 0x8f
 55 | new_entry_off = 0x8f
 56 | write_so = False
 57 | exec_only = False
 58 | no_pic = False
 59 | get_pc_thunk = None
 60 | stat = {}
 61 | stat['indcall'] = 0
 62 | stat['indjmp'] = 0
 63 | stat['dircall'] = 0
 64 | stat['dirjmp'] = 0
 65 | stat['jcc'] = 0
 66 | stat['ret'] = 0
 67 | stat['origtext'] = 0
 68 | stat['newtext'] = 0
 69 | stat['origfile'] = 0
 70 | stat['newfile'] = 0
 71 | stat['mapsize'] = 0
 72 | stat['lookupsize'] = 0
 73 | #stat['auxvecsize'] = 0
 74 | #stat['globmapsize'] = 0
 75 | #stat['globlookupsize'] = 0
 76 | #List of library functions that have callback args; each function in the dict has a list of
 77 | #the arguments passed to it that are a callback (measured as the index of which argument it is)
 78 | #TODO: Handle more complex x64 calling convention
 79 | #TODO: Should I count _rtlf_fini (offset 5)?  It seems to be not in the binary
 80 | callbacks = {'__libc_start_main':[0,3,4]}'''
 81 | 
 82 | class Rewriter(object):
 83 | 
 84 |   def __init__(self,write_so,exec_only,no_pic):
 85 |     self.context = Context()
 86 |     self.context.write_so = write_so
 87 |     self.context.exec_only = exec_only
 88 |     self.context.no_pic = no_pic
 89 | 
 90 |   def set_before_inst_callback(self,func):
 91 |     '''Pass a function that will be called when translating each instruction.
 92 |        This function should accept an instruction argument (the instruction type returned from capstone),
 93 |        which can be read to determine what code to insert (if any).  A byte string of assembled bytes
 94 |        should be returned to be inserted before the instruction, or if none are to be inserted, return None.
 95 |   
 96 |        NOTE: NOTHING is done to protect the stack, registers, flags, etc!  If ANY of these are changed, there
 97 |        is a chance that EVERYTHING will go wrong!  Leave everything as you found it or suffer the consequences!
 98 |     '''
 99 |     self.context.before_inst_callback = func
100 | 
101 |   def alloc_globals(self,size,arch):
102 |     '''Allocate an arbitrary amount of contiguous space for global variables for use by instrumentation code.
103 |        Returns the address of the start of this space.
104 |     '''
105 |     #create a temporary mapper to get where the globals would be inserted
106 |     self.context.alloc_globals = 0
107 |     mapper = BruteForceMapper(arch,b'',0,0,self.context)
108 |     retval = self.context.global_lookup + len(mapper.runtime.get_global_mapping_bytes())
109 |     #Now actually set the size of allocated space
110 |     self.context.alloc_globals = size
111 |     return retval
112 |   
113 |   #Find the earliest address we can place the new code
114 |   def find_newbase(self,elffile):
115 |     maxaddr = 0
116 |     for seg in elffile.iter_segments():
117 |       segend = seg.header['p_vaddr']+seg.header['p_memsz']
118 |       if segend > maxaddr:
119 |         maxaddr = segend
120 |     maxaddr += ( 0x1000 - maxaddr%0x1000 ) # Align to page boundary
121 |     return maxaddr
122 | 
123 |   def rewrite(self,fname,arch):
124 |     offs = size = addr = 0
125 |     with open(fname,'rb') as f:
126 |       elffile = ELFFile(f)
127 |       relplt = None
128 |       relaplt = None
129 |       dynsym = None
130 |       entry = elffile.header.e_entry #application entry point
131 |       for section in elffile.iter_sections():
132 |         if section.name == '.text':
133 |           print "Found .text"
134 |           offs = section.header.sh_offset
135 |           size = section.header.sh_size
136 |           addr = section.header.sh_addr
137 |           self.context.oldbase = addr
138 |           # If .text section is large enough to hold all new segments, we can move the phdrs there
139 |           if size >= elffile.header['e_phentsize']*(elffile.header['e_phnum']+self.context.num_new_segments+1):
140 |             self.context.move_phdrs_to_text = True
141 |         if section.name == '.plt':
142 |           self.context.plt['addr'] = section.header['sh_addr']
143 |           self.context.plt['size'] = section.header['sh_size']
144 |           self.context.plt['data'] = section.data()
145 |         if section.name == '.rel.plt':
146 |           relplt = section
147 |         if section.name == '.rela.plt': #x64 has .rela.plt
148 |           relaplt = section
149 |         if section.name == '.dynsym':
150 |           dynsym = section
151 |         if section.name == '.symtab':
152 |           for sym in section.iter_symbols():
153 |             if sym.name == '__x86.get_pc_thunk.bx':
154 |               self.context.get_pc_thunk = sym.entry['st_value'] #Address of thunk
155 |           #section.get_symbol_by_name('__x86.get_pc_thunk.bx')) #Apparently this is in a newer pyelftools
156 |       self.context.plt['entries'] = {}
157 |       if relplt is not None:
158 |         for rel in relplt.iter_relocations():
159 |           got_off = rel['r_offset'] #Get GOT offset address for this entry
160 |           ds_ent = ELF32_R_SYM(rel['r_info']) #Get offset into dynamic symbol table
161 |           if dynsym:
162 |             name = dynsym.get_symbol(ds_ent).name #Get name of symbol
163 |             self.context.plt['entries'][got_off] = name #Insert this mapping from GOT offset address to symbol name
164 |       elif relaplt is not None:
165 |         for rel in relaplt.iter_relocations():
166 |           got_off = rel['r_offset'] #Get GOT offset address for this entry
167 |           ds_ent = ELF64_R_SYM(rel['r_info']) #Get offset into dynamic symbol table
168 |           if dynsym:
169 |             name = dynsym.get_symbol(ds_ent).name #Get name of symbol
170 |             self.context.plt['entries'][got_off] = name #Insert this mapping from GOT offset address to symbol name
171 |         #print self.context.plt
172 |       else:
173 |           print 'binary does not contain plt'
174 |       if self.context.write_so:
175 |         print 'Writing as .so file'
176 |         self.context.newbase = self.find_newbase(elffile)
177 |       elif self.context.exec_only:
178 |         print 'Writing ONLY main binary, without support for rewritten .so files'
179 |         self.context.newbase = 0x09000000
180 |       else:
181 |         print 'Writing as main binary'
182 |         self.context.newbase = 0x09000000
183 |       if self.context.no_pic:
184 |         print 'Rewriting without support for generic PIC'
185 |       for seg in elffile.iter_segments():
186 |         if seg.header['p_flags'] == 5 and seg.header['p_type'] == 'PT_LOAD': #Executable load seg
187 |           print "Base address: %s"%hex(seg.header['p_vaddr'])
188 |           bytes = seg.data()
189 |           base = seg.header['p_vaddr']
190 |           mapper = BruteForceMapper(arch,bytes,base,entry,self.context)
191 |           mapping = mapper.gen_mapping()
192 |           newbytes = mapper.gen_newcode(mapping)
193 |           #Perhaps I could find a better location to set the value of global_flag
194 |           #(which is the offset from gs)
195 |           #I only need one byte for the global flag, so I am adding a tiny bit to TLS
196 |           #add_tls_section returns the offset, but we must make it negative
197 |           self.context.global_flag = -bin_write.add_tls_section(fname,b'\0')
198 |           print 'just set global_flag value to 0x%x'%self.context.global_flag
199 |           #maptext = write_mapping(mapping,base,len(bytes))
200 |           #(mapping,newbytes) = translate_all(seg.data(),seg.header['p_vaddr'])
201 |           #insts = md.disasm(newbytes[0x8048360-seg.header['p_vaddr']:0x8048441-seg.header['p_vaddr']],0x8048360)
202 |           #The "mysterious" bytes between the previously patched instruction 
203 |           #(originally at 0x804830b) are the remaining bytes from that jmp instruction!
204 |           #So even though there was nothing between that jmp at the end of that plt entry
205 |           #and the start of the next plt entry, now there are 4 bytes from the rest of the jmp.
206 |           #This is a good example of why I need to take a different approach to generating the mapping.
207 |           #insts = md.disasm(newbytes[0x80483af-seg.header['p_vaddr']:0x80483bf-seg.header['p_vaddr']],0x80483af)
208 |           #insts = md.disasm(newbytes,0x8048000)
209 |           #for ins in insts:
210 |           #  print '0x%x:\t%s\t%s'%(ins.address,ins.mnemonic,ins.op_str)
211 |           #tmpdct = {hex(k): (lambda x:hex(x+seg.header['p_vaddr']))(v) for k,v in mapping.items()}
212 |           #keys = tmpdct.keys()
213 |           #keys.sort()
214 |           #output = ''
215 |           #for key in keys:
216 |           #  output+='%s:%s '%(key,tmpdct[key])
217 |           with open('newbytes','wb') as f2:
218 |             f2.write(newbytes)
219 |           if not self.context.write_so:
220 |             with open('newglobal','wb') as f2:
221 |               f2.write(mapper.runtime.get_global_mapping_bytes())
222 |           #print output
223 |           print mapping[base]
224 |           print mapping[base+1]
225 |           maptext = mapper.write_mapping(mapping,base,len(bytes))
226 |           cache = ''
227 |           for x in maptext:
228 |             #print x
229 |             cache+='%d,'%int(x.encode('hex'),16)
230 |           #print cache
231 |   	  #print maptext.encode('hex')
232 |           print '0x%x'%(base+len(bytes))
233 |   	  print 'code increase: %d%%'%(((len(newbytes)-len(bytes))/float(len(bytes)))*100)
234 |           lookup = mapper.runtime.get_lookup_code(base,len(bytes),self.context.lookup_function_offset,0x8f)
235 |           print 'lookup w/unknown mapping %s'%len(lookup)
236 |           #insts = md.disasm(lookup,0x0)
237 |   	  #for ins in insts:
238 |           #  print '0x%x:\t%s\t%s\t%s'%(ins.address,str(ins.bytes).encode('hex'),ins.mnemonic,ins.op_str)
239 |           lookup = mapper.runtime.get_lookup_code(base,len(bytes),self.context.lookup_function_offset,mapping[self.context.mapping_offset])
240 |           print 'lookup w/known mapping %s'%len(lookup)
241 |           #insts = md.disasm(lookup,0x0)
242 |   	  #for ins in insts:
243 |           #  print '0x%x:\t%s\t%s\t%s'%(ins.address,str(ins.bytes).encode('hex'),ins.mnemonic,ins.op_str)
244 |           if not self.context.write_so:
245 |             print 'new entry point: 0x%x'%(self.context.newbase + self.context.new_entry_off)
246 |             print 'new _start point: 0x%x'%(self.context.newbase + mapping[entry])
247 |             print 'global lookup: 0x%x'%self.context.global_lookup
248 |           print 'local lookup: 0x%x'%self.context.lookup_function_offset
249 |           print 'secondary local lookup: 0x%x'%self.context.secondary_lookup_function_offset
250 |           print 'mapping offset: 0x%x'%mapping[self.context.mapping_offset]
251 |           with open('%s-r-map.json'%fname,'wb') as f:
252 |             json.dump(mapping,f)
253 |           if not self.context.write_so:
254 |             bin_write.rewrite(fname,fname+'-r','newbytes',self.context.newbase,mapper.runtime.get_global_mapping_bytes(),self.context.global_lookup,self.context.newbase+self.context.new_entry_off,offs,size,self.context.num_new_segments,arch)
255 |           else:
256 |             self.context.new_entry_off = mapping[entry]
257 |             bin_write.rewrite_noglobal(fname,fname+'-r','newbytes',self.context.newbase,self.context.newbase+self.context.new_entry_off)
258 |           self.context.stat['origtext'] = len(bytes)
259 |           self.context.stat['newtext'] = len(newbytes)
260 |           self.context.stat['origfile'] = os.path.getsize(fname)
261 |           self.context.stat['newfile'] = os.path.getsize(fname+'-r')
262 |           self.context.stat['mapsize'] = len(maptext)
263 |           self.context.stat['lookupsize'] = \
264 |             len(mapper.runtime.get_lookup_code(base,len(bytes),self.context.lookup_function_offset,mapping[self.context.mapping_offset]))
265 |           if self.context.exec_only:
266 |             self.context.stat['secondarylookupsize'] = \
267 |               len(mapper.runtime.get_secondary_lookup_code(base,len(bytes), \
268 |                 self.context.secondary_lookup_function_offset,mapping[self.context.mapping_offset]))
269 |           if not self.context.write_so:
270 |             self.context.stat['auxvecsize'] = len(mapper.runtime.get_auxvec_code(mapping[entry]))
271 |             popgm = 'x86_popgm' if arch == 'x86' else 'x64_popgm' # TODO: if other architectures are added, this will need to be changed
272 |             with open(popgm) as f:
273 |               tmp=f.read()
274 |               self.context.stat['popgmsize'] = len(tmp)
275 |             self.context.stat['globmapsectionsize'] = len(mapper.runtime.get_global_mapping_bytes())
276 |             self.context.stat['globlookupsize'] = len(mapper.runtime.get_global_lookup_code())
277 |           with open('%s-r-stat.json'%fname,'wb') as f:
278 |             json.dump(self.context.stat,f,sort_keys=True,indent=4,separators=(',',': '))
279 |           
280 | '''
281 |   with open(fname,'rb') as f:
282 |     f.read(offs)
283 |     bytes = f.read(size)
284 |     (mapping,newbytes) = translate_all(bytes,addr)
285 |     md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
286 |     for i in range(0,size):
287 |       #print dir(md.disasm(bytes[i:i+15],addr+i))
288 |       insts = md.disasm(newbytes[i:i+15],addr+i)
289 |       ins = None
290 |       try:
291 |         ins = insts.next()#longest possible x86/x64 instruction is 15 bytes
292 |         #print str(ins.bytes).encode('hex')
293 |         #print ins.size
294 |         #print dir(ins)
295 |       except StopIteration:
296 |         pass
297 |       if ins is None:
298 |         pass#print 'no legal decoding'
299 |       else:
300 |       	pass#print '0x%x:\t%s\t%s'%(ins.address,ins.mnemonic,ins.op_str)
301 |     print {k: (lambda x:x+addr)(v) for k,v in mapping.items()}
302 |     print asm(save_register%('eax','eax','eax')).encode('hex')'''
303 |     
304 | if __name__ == '__main__':
305 |   import argparse
306 | 
307 |   parser = argparse.ArgumentParser(description='''Rewrite a binary so that the code is relocated.
308 | Running this script from the terminal does not allow any instrumentation.
309 | For that, use this as a library instead.''')
310 |   parser.add_argument('filename',help='The executable file to rewrite.')
311 |   parser.add_argument('--so',action='store_true',help='Write a shared object.')
312 |   parser.add_argument('--execonly',action='store_true',help='Write only a main executable without .so support.')
313 |   parser.add_argument('--nopic',action='store_true',help='Write binary without support for arbitrary pic.  It still supports common compiler-generated pic.')
314 |   parser.add_argument('--arch',default='x86',help='The architecture of the binary.  Default is \'x86\'.')
315 |   args = parser.parse_args()
316 |   rewriter = Rewriter(args.so,args.execonly,args.nopic)
317 |   rewriter.rewrite(args.filename,args.arch)
318 |   #cProfile.run('renable(args.filename,args.arch)')
319 | 
320 | 


--------------------------------------------------------------------------------
/parse_popgm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Match the offset (index 1) and size (index 2) of the .text section so we can create a file
 3 | # containing only the raw bytes of the .text section.
 4 | re='.text[[:space:]]+PROGBITS[[:space:]]+[0-9a-f]+[[:space:]]+([0-9a-f]+)[[:space:]]+([0-9a-f]+)'
 5 | textsection=$(readelf -S -W x86_populate_gm | grep '.text') 
 6 | if [[ ${textsection} =~ ${re} ]]; then 
 7 | 	dd if=x86_populate_gm of=x86_popgm skip=$((0x${BASH_REMATCH[1]})) bs=1 count=$((0x${BASH_REMATCH[2]})) 
 8 | fi 
 9 | textsection=$(readelf -S -W x64_populate_gm | grep '.text')
10 | if [[ ${textsection} =~ ${re} ]]; then
11 | 	dd if=x64_populate_gm of=x64_popgm skip=$((0x${BASH_REMATCH[1]})) bs=1 count=$((0x${BASH_REMATCH[2]})) 
12 | fi
13 | 


--------------------------------------------------------------------------------
/rewrite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys,os
 3 | import subprocess
 4 | import shutil
 5 | 
 6 | from multiverse import Rewriter
 7 | 
 8 | def extract_libraries(fname):
 9 |   result = subprocess.check_output('ldd %s'%fname, shell=True)
10 |   libs = result.split('\n')
11 |   paths = []
12 |   for lib in libs:
13 |     if '=>' in lib:
14 |       path = lib[lib.find('=>')+2:lib.find(' (0x')].strip()
15 |       if path != '':
16 |         paths.append(path)
17 |   return paths
18 | 
19 | def extract_dynamic_libraries(fname, libpath):
20 |   paths = []
21 |   dynlib = os.path.join(libpath, fname+'-dynamic-libs.txt')
22 |   if os.path.exists(dynlib):
23 |     with open(dynlib) as f:
24 |       path = f.readline()
25 |       while path != '':
26 |         paths.append(path.strip())
27 |         path = f.readline()
28 |   return paths
29 | 
30 | def rewrite_libraries(libpath,paths,arch):
31 |   rewriter = Rewriter(True,False,False)
32 |   for path in paths:
33 |     (base,fname) = os.path.split(path)
34 |     libname = os.path.join(libpath,fname)
35 |     shutil.copy(path,libname)
36 |     rewriter.rewrite(libname,arch)
37 |     os.remove(libname)
38 |     shutil.move(libname+'-r',libname)
39 |     shutil.move(libname+'-r-map.json',libname+'-map.json')
40 |     shutil.move(libname+'-r-stat.json',libname+'-stat.json')
41 | 
42 | if __name__ == '__main__':
43 |   arch = 'x86'
44 |   if len(sys.argv) == 2 or len(sys.argv) == 3:
45 |     fpath = ''
46 |     dynamic_only = False
47 |     if len(sys.argv) == 2:
48 |       fpath = sys.argv[1]
49 |     else:
50 |       fpath = sys.argv[2]
51 |       if sys.argv[1] == '-d':
52 |         dynamic_only = True
53 |       if sys.argv[1] == '-64':
54 |         arch = 'x86-64'
55 |     
56 |     paths = []
57 |     
58 |     if not dynamic_only:
59 |       print 'Getting required libraries for %s'%fpath
60 |       paths = extract_libraries(fpath)
61 |     
62 |     (base,fname) = os.path.split(fpath)
63 |     libpath = os.path.join(base,fname+'-libs-r')
64 |     if not os.path.exists(libpath):
65 |       os.makedirs(libpath)
66 |     print 'Getting dynamic libraries'
67 |     paths.extend(extract_dynamic_libraries(fname,libpath))
68 |     print 'Rewriting libraries'
69 |     print paths
70 |     rewrite_libraries(libpath,paths,arch)
71 |     
72 |     if not dynamic_only:
73 |       print 'Rewriting main binary'
74 |       rewriter = Rewriter(False,False,False)
75 |       rewriter.rewrite(fpath,arch)
76 |     
77 |     print 'Writing runnable .sh'
78 |     with open(fpath+'-r.sh', 'w') as f:
79 |       ld_preload = ''
80 |       for path in extract_dynamic_libraries(fname,libpath):
81 |         (lbase,lname) = os.path.split(path)
82 |         ld_preload += os.path.join(libpath,lname) + ' '
83 |       f.write('#!/bin/bash\nLD_LIBRARY_PATH=./%s LD_BIND_NOW=1 LD_PRELOAD="%s" ./%s'%( fname+'-libs-r', ld_preload, fname+'-r' ) )
84 |   else:
85 |     print "Error: must pass executable filename.\nCorrect usage: %s [-d -64] <filename>\nUse -d flag to rewrite only dynamic libaries.\nUse -64 flag to rewrite 64-bit binaries."%sys.argv[0]
86 | 


--------------------------------------------------------------------------------
/runtime.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Runtime(object):
 3 |   ''' The BinForce runtime library includes all code needed to run
 4 |       the rewritten binary.  This includes the functions to populate
 5 |       the global mapping and perform lookups in mappings.
 6 | 
 7 |       This is a generic Runtime object.  All runtimes
 8 |       used by this system should inherit from this parent
 9 |       object and provide implementations for all functions listed.'''
10 |   def __init__(self,context):
11 |     raise NotImplementedError('Override __init__() in a child class')
12 |   def get_lookup_code(self,base,size,lookup_off,mapping_off):
13 |     raise NotImplementedError('Override get_lookup_code() in a child class')
14 |   def get_secondary_lookup_code(self,base,size,sec_lookup_off,mapping_off):
15 |     raise NotImplementedError('Override get_secondary_lookup_code() in a child class')
16 |   def get_global_lookup_code(self):
17 |     raise NotImplementedError('Override get_global_lookup_code() in a child class')
18 |   def get_auxvec_code(self,entry):
19 |     raise NotImplementedError('Override get_auxvec_code() in a child class')
20 |   def get_popgm_code(self):
21 |     raise NotImplementedError('Override get_popgm_code() in a child class')
22 |   def get_global_mapping_bytes(self):
23 |     raise NotImplementedError('Override get_global_mapping_bytes() in a child class')
24 | 


--------------------------------------------------------------------------------
/simplest.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int add(int a, int b){
 4 |   return a+b;
 5 | }
 6 | 
 7 | int main(int argc, char** argv){
 8 |   printf("%d\n",add(2,4));
 9 | }
10 | 


--------------------------------------------------------------------------------
/translator.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Translator(object):
 3 |   ''' A Translator converts the original instructions from a source
 4 |       binary into their corresponding translated instructions for
 5 |       the rewritten binary.  This includes translating addresses
 6 |       for jmp/JCC/call/ret destinations and inserting user-defined
 7 |       instrumentation code around instructions. 
 8 | 
 9 |       This is a generic Translator object.  All translators
10 |       used by this system should inherit from this parent
11 |       object and provide implementations for all functions listed.'''
12 |   def __init__(self,before_callback,context):
13 |     raise NotImplementedError('Override __init__() in a child class')
14 |   def translate_one(self,ins,mapping):
15 |     raise NotImplementedError('Override translate_one() in a child class')
16 |   def translate_uncond(self,ins,mapping):
17 |     raise NotImplementedError('Override translate_uncond() in a child class')
18 |   def translate_cond(self,ins,mapping):
19 |     raise NotImplementedError('Override translate_cond() in a child class')
20 |   def translate_ret(self,ins,mapping):
21 |     raise NotImplementedError('Override translate_ret() in a child class')
22 |   def remap_target(self,addr,mapping,target,offs):
23 |     raise NotImplementedError('Override remap_target() in a child class')
24 | 


--------------------------------------------------------------------------------
/x64_assembler.py:
--------------------------------------------------------------------------------
  1 | import pwn
  2 | pwn.context(os='linux',arch='amd64')
  3 | import re
  4 | import struct
  5 | 
  6 | cache = {}
  7 | # Metacache stores data about an assembled instruction.
  8 | # Specifically, right now it only holds the offset of the
  9 | # displacement value (if the instruction encodes a 4-byte displacement).
 10 | # This is only used for efficient modification of 
 11 | # already-assembled instructions containing a reference to rip.
 12 | # This value allows us to change the offset from rip regardless of
 13 | # the instruction.
 14 | # even if
 15 | # there is an immediate value (which appears at the end of an
 16 | # encoded instruction's bytes).
 17 | metacache = {}
 18 | pat = re.compile('\$\+[-]?0x[0-9a-f]+')
 19 | pat2 = re.compile('[ ]*push [0-9]+[ ]*')
 20 | pat3 = re.compile('[ ]*mov eax, (d)?word ptr \[0x[0-9a-f]+\][ ]*')
 21 | pat4 = re.compile('[ ]*mov eax, (dword ptr )?\[(?P<register>e[a-z][a-z])( )?[+-]( )?(0x)?[0-9a-f]+\][ ]*')
 22 | pat5 = re.compile('(0x[0-9a-f]+|[0-9]+)')
 23 | pat6 = re.compile('[ ]*(?P<mnemonic>(add)|(sub)) (?P<register>(esp)|(ebx)),(?P<amount>[0-9]+)[ ]*')
 24 | pat7 = re.compile('[ ]*mov eax, word ptr.*')#Match stupid size mismatch
 25 | pat8 = re.compile('[ ]*mov eax, .[xip]')#Match ridiculous register mismatch
 26 | rip_with_offset = re.compile(u'\[rip(?: (?P<offset>[\+\-] [0x]?[0-9a-z]+))?\]') #Apparently the hex prefix is optional if the number is...unambiguous?
 27 | 
 28 | #jcxz and jecxz are removed because they don't have a large expansion
 29 | JCC = ['jo','jno','js','jns','je','jz','jne','jnz','jb','jnae',
 30 |   'jc','jnb','jae','jnc','jbe','jna','ja','jnbe','jl','jnge','jge',
 31 |   'jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo']
 32 | 
 33 | #Simple cache code.  Called after more complex preprocessing of assembly source.
 34 | def _asm(text):
 35 |   if text in cache:
 36 |     return cache[text]
 37 |   else:
 38 |     with open('uncached.txt','a') as f:
 39 |       f.write(text+'\n')
 40 |     code = pwn.asm(text)
 41 |     cache[text] = code
 42 |     return code
 43 | 
 44 | def asm(text):
 45 |   code = b''
 46 |   for line in text.split('\n'):
 47 |     if not line.find(';') == -1:
 48 |       line = line[:line.find(';')]#Eliminate comments
 49 |     #Check for offsets ($+)
 50 |     match = pat.search(line)
 51 |     if match and match.group() != '$+0x8f':
 52 |       off = int(match.group()[2:],16)
 53 |       line = line.strip()
 54 |       mnemonic = line[:line.find(' ')]
 55 |       line = pat.sub('$+0x8f',line) #Replace actual offset with dummy
 56 |       newcode = _asm(line) #Assembled code with dummy offset
 57 |       if mnemonic in ['jmp','call']:
 58 |         off-=5 #Subtract 5 because the large encoding knows it's 5 bytes long
 59 |         newcode = newcode[0]+struct.pack('<i',off) #Signed int for negative jumps 
 60 |       elif mnemonic in JCC:
 61 |         off-=6 #Subtract 6 because the large encoding knows it's 6 bytes long
 62 |         newcode = newcode[0:2]+struct.pack('<i',off) #Signed int for negative jumps
 63 |       code+=newcode
 64 |     #Check for push instruction
 65 |     elif pat2.match(line):
 66 |       code+=b'\x68' + struct.pack('<I',int(line.strip().split(' ')[1]) ) #push case
 67 |     #Check for mov instruction to eax from immediate
 68 |     elif pat3.match(line):
 69 |       #mov eax, dword ptr [0xcafecafe]
 70 |       if ' word' in line:
 71 |         print 'WARNING: silently converting "mov eax, word ptr [<number>]" to "mov eax, dword ptr [<number>]"'
 72 |       code+=b'\xa1' + struct.pack('<I',int(line[line.find('[')+1:line.find(']')],16) )
 73 |     #Check for mov instruction to eax from some register plus or minus an offset
 74 |     #NOTE: This does NOT WORK for esp!  The instruction encoding pattern is DIFFERENT!
 75 |     #To handle this, right now this will only replace e[a-z]x registers, although it 
 76 |     #seems as if esi,edi, or ebp would also work.
 77 |     elif pat4.match(line):
 78 |       m = pat4.match(line)
 79 |       #f = open('crazyq.txt','a')
 80 |       #f.write(line+'\n')
 81 |       #ocode = _asm(line)
 82 |       #f.write(str(ocode).encode('hex')+'\n')
 83 |       original = pat5.search(line).group()
 84 |       if original.startswith('0x'):
 85 |         original = int(original,16)
 86 |       else:
 87 |         original = int(original)
 88 |       if '-' in line:
 89 |         original=-original
 90 |       if abs(original) > 0x7f:
 91 |         line = pat5.sub('0x8f',line)
 92 |         original = struct.pack('<i',original)
 93 |       else:
 94 |         line = pat5.sub('0x7f',line)
 95 |         original = struct.pack('<b',original)
 96 |       newcode = _asm(line)
 97 |       #f.write(str(newcode).encode('hex')+'\n')
 98 |       if m.group('register') == 'esp':
 99 |         newcode = newcode[0:3]+original
100 |       else:
101 |         newcode = newcode[0:2]+original
102 |       #f.write(str(newcode).encode('hex')+'\n')
103 |       #if newcode != ocode:
104 |       #  print 'NO MATCH %s:\n%s\n%s'%(line,newcode.encode('hex'),ocode.encode('hex'))
105 |       #  raise Exception
106 |       #f.close() 
107 |       code+=newcode
108 |     elif pat6.match(line):
109 |       print 'WARNING: Using assumption to efficiently assemble "%s"' % line
110 |       #ocode = _asm(line)
111 |       m = pat6.match(line)
112 |       amount = int(m.group('amount'))
113 |       register = m.group('register')
114 |       mnemonic = m.group('mnemonic')
115 |       if amount > 0x7f:
116 |         newcode = _asm('%s %s,0x8f'%(mnemonic,register) )
117 |         newcode = newcode[:2] + struct.pack('<i',amount)
118 |       else:
119 |         newcode = _asm('%s %s,0x7f'%(mnemonic,register) )
120 |         newcode = newcode[:2] + struct.pack('<b',amount)
121 |       #if newcode != ocode:
122 |       #  print 'NO MATCH %s:\n%s\n%s'%(line,newcode.encode('hex'),ocode.encode('hex'))
123 |       #  raise Exception
124 |       code+=newcode
125 |     elif pat7.match(line):
126 |       print 'WARNING: silently converting "mov eax, word ptr [<value>]" to "mov eax, dword ptr [<value>]"'
127 |       code+=_asm(line.replace(' word',' dword'))
128 |     elif pat8.match(line):
129 |       print 'WARNING: silently converting "mov eax, <letter>[xip]" to "mov eax, e<letter>[xip]"'
130 |       code+=_asm(line.replace(', ',', e'))
131 |     elif rip_with_offset.search(line):
132 |       #print 'WARNING: using assumption to efficiently assemble "%s"' % line
133 |       m = rip_with_offset.search(line)
134 |       newstr = rip_with_offset.sub('[rip]', line)
135 |       if newstr in metacache:
136 |         # Assemble it with no offset, which must have have already been added to the cache
137 |         newcode = _asm( newstr )
138 |         if m.group('offset'):
139 |           #immediate = newcode[-metacache[newstr]:] if newstr in metacache else b''
140 |           #print 'WARNING: using assumption to efficiently assemble "%s"' % line
141 |           # Replace 4 bytes of displacement with little-endian encoded offset retrieved from the original assembly
142 |           #code += newcode[:-(4+len(immediate))] + struct.pack( '<i', int(m.group('offset'),16) ) + immediate
143 |           code += newcode[:metacache[newstr]] + struct.pack( '<i', int(m.group('offset'),16) ) + newcode[metacache[newstr]+4:]
144 |         else:
145 |           code += newcode
146 |       else:
147 |         code+=_asm(line) # if we don't have it properly cached, just assemble the original
148 |     else:
149 |       code+=_asm(line)
150 |   return code
151 | 


--------------------------------------------------------------------------------
/x64_populate_gm.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * debug: gcc -o x64_populate_gm -Wall -DDEBUG -fno-toplevel-reorder -masm=intel -O1 x64_populate_gm.c
  3 |  * build: gcc -o x64_populate_gm -Wall -nostdlib -fno-toplevel-reorder -masm=intel -O1 x64_populate_gm.c
  4 |  * dd if=x64_populate_gm of=x64_popgm skip=text_offset bs=1 count=text_size
  5 |  *
  6 |  * Read and parse /proc/self/maps filling in the global mapping
  7 |  *
  8 |  * This file uses no external libraries and no global variables
  9 |  * The .text section of the compiled binary can be cut out and reused without modification
 10 |  *
 11 |  *
 12 |  * TODO: [?] implement blacklist of mapped file names
 13 |  * 			  e.g., [vdso], ld-X.XX.so, etc
 14 |  * 		 [?] read /proc in buf_size chunks
 15 |  * 		 [!] handle reading less bytes than requested (when there are more to read)
 16 |  * 		 		- can happen in rare(?) cases... the file is not a "normal file"
 17 |  *
 18 |  */
 19 | #ifdef DEBUG
 20 | #include <stdio.h>
 21 | #include <sys/mman.h>
 22 | #include <stdlib.h>
 23 | #include <sys/types.h>
 24 | #include <sys/stat.h>
 25 | #include <fcntl.h>
 26 | #else
 27 | #define NULL ( (void *) 0)
 28 | #endif
 29 | 
 30 | struct gm_entry {
 31 | 	unsigned long lookup_function;
 32 | 	unsigned long start;
 33 | 	unsigned long length;
 34 | };
 35 | 
 36 | unsigned int __attribute__ ((noinline)) my_read(int, char *, unsigned int);
 37 | int __attribute__ ((noinline)) my_open(const char *);
 38 | void populate_mapping(unsigned int, unsigned long, unsigned long, unsigned long, struct gm_entry *);
 39 | void process_maps(char *, struct gm_entry *);
 40 | struct gm_entry lookup(unsigned long, struct gm_entry *);
 41 | 
 42 | #ifdef DEBUG
 43 | int wrapper(struct gm_entry *global_mapping){
 44 | #else
 45 | int _start(struct gm_entry *global_mapping){
 46 | #endif
 47 | 	// force string to be stored on the stack even with optimizations
 48 | 	//char maps_path[] = "/proc/self/maps\0";
 49 | 	volatile int maps_path[] = {
 50 | 			0x6f72702f,
 51 | 			0x65732f63,
 52 | 			0x6d2f666c,
 53 | 			0x00737061,
 54 | 	};
 55 | 
 56 | 	unsigned int buf_size = 0x10000;
 57 | 	char buf[buf_size];
 58 | 	int proc_maps_fd;
 59 | 	int cnt, offset = 0;
 60 | 
 61 | 
 62 | 	proc_maps_fd = my_open((char *) &maps_path);
 63 | 	cnt = my_read(proc_maps_fd, buf, buf_size);
 64 |         while( cnt != 0 && offset < buf_size ){
 65 |                 offset += cnt;
 66 |         	cnt = my_read(proc_maps_fd, buf+offset, buf_size-offset);
 67 | 	}
 68 | 	buf[offset] = '\0';// must null terminate
 69 | 
 70 | #ifdef DEBUG
 71 | 	printf("READ:\n%s\n", buf);
 72 | 	process_maps(buf,global_mapping);
 73 | 	int items = global_mapping[0].lookup_function;
 74 | 	// simulation for testing
 75 | 	populate_mapping(items + 0, 0x08800000, 0x08880000, 0x07000000, global_mapping);
 76 | 	populate_mapping(items + 1, 0x09900000, 0x09990000, 0x07800000, global_mapping);
 77 | 	global_mapping[0].lookup_function += 2;//Show that we have added these
 78 | 	/*
 79 | 	int i;
 80 | 	for (i = 0x08800000; i < 0x08880000; i++){
 81 | 		if (lookup(i, global_mapping) != 0x07000000){
 82 | 			printf("Failed lookup of 0x%08x\n", i);
 83 | 		}
 84 | 	}
 85 | 	*/
 86 | 	//check edge cases
 87 | 
 88 | 	printf("Testing %x (out of range)\n",0x08800000-1);
 89 | 	lookup(0x08800000-1, global_mapping);
 90 | 	printf("Testing %x (in range)\n",0x08800000);
 91 | 	lookup(0x08800000, global_mapping);
 92 | 	printf("Testing %x (in range)\n",0x08800001);
 93 | 	lookup(0x08800001, global_mapping);
 94 | 	printf("Testing %x (in range)\n",0x08880000);
 95 | 	lookup(0x08880000, global_mapping);
 96 | 	printf("Testing %x (out of range)\n",0x08880000+1);
 97 | 	lookup(0x08880000+1, global_mapping);
 98 | 	//printf("0x08812345 => 0x%08x\n", lookup(0x08812345, global_mapping));
 99 | #else
100 | 	process_maps(buf, global_mapping);
101 | #endif
102 | 	return 0;
103 | }
104 | 
105 | #ifdef DEBUG
106 | struct gm_entry lookup(unsigned long addr, struct gm_entry *global_mapping){
107 | 	unsigned int index;
108 | 	unsigned long gm_size = global_mapping[0].lookup_function;//Size is stored in first entry
109 | 	global_mapping++;//Now we point at the true first entry
110 | 	//Use binary search on the already-sorted entries
111 | 	//Here is a linear search for simple testing purposes.
112 | 	//For small arrays, binary search may not be as useful, so I may for now just use linear search.
113 | 	//I can try using binary search later and doing a performance comparison.
114 | 	//However, if I want to do binary search, I should do a conditional mov to reduce the number of branches
115 | 	for(index = 0; index < gm_size; index++){
116 | 		//printf("SEARCHING 0x%lx :: mapping[%d] :: 0x%lx :: 0x%lx :: 0x%lx\n", addr, index, global_mapping[index].lookup_function, global_mapping[index].start, global_mapping[index].length);
117 | 		if( addr - global_mapping[index].start <= global_mapping[index].length){
118 | 			printf("0x%lx :: mapping[%d] :: 0x%lx :: 0x%lx :: 0x%lx\n", addr, index, global_mapping[index].lookup_function, global_mapping[index].start, global_mapping[index].length);
119 | 		}
120 | 	}
121 | 	
122 | 	return global_mapping[index];
123 | }
124 | #endif
125 | 
126 | unsigned int __attribute__ ((noinline)) my_read(int fd, char *buf, unsigned int count){
127 | 	unsigned long bytes_read;
128 | 	asm volatile(
129 | 		".intel_syntax noprefix\n"
130 | 		"mov rax, 0\n"
131 | 		"mov rdi, %1\n"
132 | 		"mov rsi, %2\n"
133 | 		"mov rdx, %3\n"
134 | 		"syscall\n"
135 | 		"mov %0, rax\n"
136 | 		: "=g" (bytes_read)
137 | 		: "g" ((long)fd), "g" (buf), "g" ((long)count)
138 | 		: "rax", "rdi", "rsi", "rdx", "rcx", "r11"
139 | 	);
140 | 	return (unsigned int) bytes_read;
141 | }
142 | 
143 | int __attribute__ ((noinline)) my_open(const char *path){
144 | 	unsigned long fp;
145 | 	asm volatile(
146 | 		".intel_syntax noprefix\n"
147 | 		"mov rax, 2\n"
148 | 		"mov rdi, %1\n"
149 | 		"mov rsi, 0\n"
150 | 		"mov rdx, 0\n"
151 | 		"syscall\n"
152 | 		"mov %0, rax\n"
153 | 		: "=r" (fp)
154 | 		: "g" (path)
155 | 		: "rcx", "r11"
156 | 	);
157 | 	return (int) fp;
158 | }
159 | 
160 | #define PERM_WRITE 1
161 | #define PERM_EXEC 2
162 | unsigned char get_permissions(char *line){
163 | 	// e.g., "08048000-08049000 r-xp ..." or "08048000-08049000 rw-p ..."
164 | 	unsigned char permissions = 0;
165 | 	while( *line != ' ' ) line++;
166 | 	line+=2; //Skip space and 'r' entry, go to 'w'
167 | 	if( *line == 'w' ) permissions |= PERM_WRITE;
168 | 	line++; //Go to 'x'
169 | 	if( *line == 'x' ) permissions |= PERM_EXEC;
170 | 	return permissions;
171 | }
172 | 
173 | #define is_write(p) (p & PERM_WRITE)
174 | #define is_exec(p) (p & PERM_EXEC)
175 | 
176 | #define NUM_EXTERNALS 3
177 | 
178 | /*
179 |  Check whether the memory range is not rewritten by our system:
180 |  This includes [vsyscall], [vdso], and the dynamic loader
181 | */
182 | unsigned char is_external(char *line){
183 | 	volatile char externals[][11] = {
184 | 		"/ld-",
185 | 		"[vdso]",
186 | 		"[vsyscall]"
187 | 	};
188 | 	unsigned int offset,i;
189 | 	char *lineoff;
190 | 	while( *line != ' ' ) line++; // Skip memory ranges
191 |         line += 21; // Skip permissions and some other fields
192 |         while( *line != ' ' ) line++; // Skip last field
193 | 	while( *line == ' ' ) line++; // Skip whitespace
194 |         if( *line != '\n'){ // If line has text at the end
195 | 		// Could have done a string matching state machine here, but
196 |                 // it would be harder to add extra strings to later.
197 | 		for( i = 0; i < NUM_EXTERNALS; i++ ){
198 | 			offset = 0;
199 | 			lineoff = line-1;
200 | 			while( *lineoff != '\n' && *lineoff != '\0' ){
201 | 				// This is not perfect string matching, and will not work in general cases
202 | 				// because we do not backtrack.  It should work with the strings we are searching
203 | 				// for now, plus it's relatively simple to do it this way, so I'm leaving it like
204 | 				// this for the time being.
205 | 				lineoff++; //Increment lineoff here so that we compare to the previous char for the loop
206 | 				if( externals[i][offset] == '\0' ){
207 | 					return 1;// Matched
208 | 				}
209 | 				if( *lineoff == externals[i][offset] ){
210 | 					offset++; // If we are matching, move forward one in external
211 | 				}else{
212 | 					offset = 0; // If they failed to match, start over at the beginning
213 | 				}
214 | 			}
215 | 		}
216 | 	}
217 | 	return 0; //Not an external
218 | }
219 | 
220 | char *next_line(char *line){
221 | 	/*
222 | 	 * finds the next line to process
223 | 	 */
224 | 	for (; line[0] != '\0'; line++){
225 | 		if (line[0] == '\n'){
226 | 			if (line[1] == '\0')
227 | 				return NULL;
228 | 			return line+1;
229 | 		}
230 | 	}
231 | 	return NULL;
232 | }
233 | 
234 | unsigned long my_atol(char *a){
235 | 	/*
236 | 	 * convert unknown length (max 16) hex string into its integer representation
237 | 	 * assumes input is from /proc/./maps
238 | 	 * i.e., 'a' is a left-padded 16 byte lowercase hex string
239 | 	 * e.g., "000000000804a000"
240 | 	 */
241 | #ifdef DEBUG
242 | 	//printf("Converting string to long: \"%s\"\n", a);
243 | #endif
244 | 	unsigned long l = 0;
245 | 	unsigned char digit = *a;
246 | 	while( (digit >= '0' && digit <= '9') || (digit >= 'a' && digit <= 'f') ){
247 | 		digit -= '0';
248 | 		if( digit > 9 ) digit -= 0x27; // digit was hex character
249 | 		l <<= 4; // Shift by half a byte
250 | 		l += digit;
251 | 		digit = *(++a);
252 | 	}
253 | #ifdef DEBUG
254 | 	//printf("Resulting value: %lx\n", l);
255 | #endif
256 | 	return l;
257 | }
258 | 
259 | void parse_range(char *line, unsigned long *start, unsigned long *end){
260 | 	/* 
261 | 	 * e.g., "08048000-08049000 ..."
262 | 	 * Unfortunately, for 64-bit applications, the address ranges do not have a
263 | 	 * consistent length!  We must determine how many digits are in each number.
264 | 	 */
265 | 	char *line_start = line;
266 | 	while( *line != '-' ) line++;
267 | 	*start = my_atol(line_start);
268 | 	*end   = my_atol(line+1);
269 | }
270 | 
271 | void populate_mapping(unsigned int gm_index, unsigned long start, unsigned long end, unsigned long lookup_function, struct gm_entry *global_mapping){
272 | 	global_mapping[gm_index].lookup_function = lookup_function;
273 | 	global_mapping[gm_index].start = start;
274 | 	global_mapping[gm_index].length = end - start;
275 | #ifdef DEBUG
276 | 	printf("Added gm entry @ %d: (0x%lx, 0x%lx, 0x%lx)\n", gm_index, global_mapping[gm_index].lookup_function, global_mapping[gm_index].start, global_mapping[gm_index].length);
277 | #endif
278 | }
279 | 
280 | void process_maps(char *buf, struct gm_entry *global_mapping){
281 | 	/*
282 | 	 * Process buf which contains output of /proc/self/maps
283 | 	 * populate global_mapping for each executable set of pages
284 | 	 */
285 | 	char *line = buf;
286 | 	unsigned int gm_index = 1;//Reserve first entry for metadata
287 | 	unsigned char permissions = 0;
288 | 	//unsigned int global_start, global_end;
289 | 	unsigned long old_text_start, old_text_end = 0;
290 | 	unsigned long new_text_start, new_text_end = 0;
291 | 
292 | 	//Assume global mapping is first entry at 0x200000 and that there is nothing before
293 | 	//Skip global mapping (put at 0x200000 in 64-bit binaries, as opposed to 0x7000000 for x86)
294 | 	line = next_line(line);
295 | 	do{ // process each block of maps
296 | 		permissions = get_permissions(line);
297 | 		// process all segments from this object under very specific assumptions
298 | 		if ( is_exec(permissions) ){
299 | 			if( !is_write(permissions) ){
300 | 				parse_range(line, &old_text_start, &old_text_end);
301 | #ifdef DEBUG
302 | 				printf("Parsed range for r-xp: %lx-%lx\n", old_text_start, old_text_end);
303 | #endif
304 | 				if( is_external(line) ){
305 | #ifdef DEBUG
306 | 					printf("Region is external: %lx-%lx\n", old_text_start, old_text_end);
307 | #endif
308 | 					// Populate external regions with 0x00000000, which will be checked for in the global lookup.
309 | 					// It will then rewrite the return address on the stack and return the original address.
310 | 					populate_mapping(gm_index, old_text_start, old_text_end, 0x00000000, global_mapping);
311 | 					gm_index++;
312 | 				}
313 | 			}else{
314 | 				parse_range(line, &new_text_start, &new_text_end);
315 | #ifdef DEBUG
316 | 				printf("Parsed range for rwxp: %lx-%lx\n", new_text_start, new_text_end);
317 | #endif
318 | 				populate_mapping(gm_index, old_text_start, old_text_end, new_text_start, global_mapping);
319 | 				gm_index++;
320 | 			}
321 | 		}
322 | 		line = next_line(line);
323 | 	} while(line != NULL);
324 | 	global_mapping[0].lookup_function = gm_index;// Use first entry for storing how many entries there are
325 | }
326 | 
327 | #ifdef DEBUG
328 | int main(void){
329 | 	void *mapping_base = (void *)0x200000;
330 | 	void *new_section = (void *)0x8000000;
331 | 	int fd = open("/dev/zero", O_RDWR);
332 | 	void *global_mapping = mmap(mapping_base, 0x10000, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
333 | 	mmap(new_section, 0x4000, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); //Create a mock new "text" section that would be added by process_maps
334 | 	if (global_mapping != mapping_base){
335 | 		printf("failed to get requested base addr\n");
336 | 		exit(1);
337 | 	}
338 | 	wrapper(global_mapping);
339 | 
340 | 	return 0;
341 | }
342 | #endif
343 | 
344 | 


--------------------------------------------------------------------------------
/x64_runtime.py:
--------------------------------------------------------------------------------
  1 | from x64_assembler import _asm,asm
  2 | 
  3 | class X64Runtime(object):
  4 |   def __init__(self,context):
  5 |     self.context = context
  6 |     self.context.global_lookup = 0x200000 # Set global lookup offset for 64-bit
  7 | 
  8 |   def get_lookup_code(self,base,size,lookup_off,mapping_off):
  9 |     #Example assembly for lookup function
 10 |     '''
 11 |   	push edx
 12 |   	mov edx,eax
 13 |   	call get_eip
 14 |     get_eip:
 15 |   	pop eax			;Get current instruction pointer
 16 |   	sub eax,0x8248		;Subtract offset from instruction pointer val to get new text base addr
 17 |   	sub edx,0x8048000	;Compare to start (exclusive) and set edx to an offset in the mapping
 18 |   	jl outside		;Out of bounds (too small)
 19 |   	cmp edx,0x220		;Compare to end (inclusive) (note we are now comparing to the size)
 20 |   	jge outside		;Out of bounds (too big)
 21 |   	mov edx,[mapping+edx*4]	;Retrieve mapping entry (can't do this directly in generated func)
 22 |   	cmp edx, 0xffffffff	;Compare to invalid entry
 23 |   	je failure		;It was an invalid entry
 24 |   	add eax,edx		;Add the offset of the destination to the new text section base addr
 25 |   	pop edx
 26 |   	ret
 27 |     outside:			;If the address is out of the mapping bounds, return original address
 28 |   	add edx,0x8048000	;Undo subtraction of base, giving us the originally requested address
 29 |   	mov eax,edx		;Place the original request back in eax
 30 |   	pop edx
 31 |   	jmp global_lookup	;Check if global lookup can find this
 32 |     failure:
 33 |   	hlt
 34 |     '''
 35 |     #TODO: support lookup for binary/library combination
 36 |     lookup_template = '''
 37 |   	push rbx
 38 |   	mov rbx,rax
 39 |   	lea rax, [rip-%s]
 40 |     	%s
 41 |   	jb outside
 42 |   	cmp rbx,%s
 43 |   	jae outside
 44 |   	mov ebx,[rax+rbx*4+%s]
 45 |   	cmp ebx, 0xffffffff
 46 |   	je failure
 47 |   	add rax,rbx
 48 |   	pop rbx
 49 |   	ret
 50 |     outside:
 51 |   	%s
 52 |   	mov rax,rbx
 53 |   	pop rbx
 54 |   	mov QWORD PTR [rsp-8],%s
 55 |     	jmp [rsp-8]
 56 |     failure:
 57 |   	hlt
 58 |     '''
 59 |     exec_code = '''
 60 |     	sub rbx,%s
 61 |     '''
 62 |     exec_restore = '''
 63 |     	add rbx,%s
 64 |     '''
 65 |     #Notice that we only move a DWORD from the mapping (into ebx) because the
 66 |     #mapping only stores 4-byte offsets.  Therefore, if a text section is >4GB,
 67 |     #this mapping strategy will fail
 68 |     exec_only_lookup = '''
 69 |     lookup:
 70 |   	push rbx
 71 |   	mov rbx,rax
 72 |   	lea rax, [rip-%s]
 73 |   	sub rbx,%s
 74 |   	jb outside
 75 |   	cmp rbx,%s
 76 |   	jae outside
 77 |   	mov ebx, [rax+rbx*4+%s]
 78 |   	add rax,rbx
 79 |   	pop rbx
 80 |   	ret
 81 |   
 82 |     outside:
 83 |   	add rbx,%s
 84 |   	mov rax,[rsp+16]
 85 |   	call lookup
 86 |   	mov [rsp+16],rax
 87 |   	mov rax,rbx
 88 |   	pop rbx
 89 |   	ret
 90 |     '''
 91 |     #For an .so, it can be loaded at an arbitrary address, so we cannot depend on
 92 |     #the base address being in a fixed location.  Therefore, we instead compute 
 93 |     #the old text section's start address by using the new text section's offset
 94 |     #from it.  
 95 |     # rax holds the address of the lookup function, which is at the start of the new
 96 |     # section we are adding.
 97 |     # rbx at the start holds the address we want to look up, and we want to compute
 98 |     # how many bytes the address is from the start of the original text section.  So
 99 |     # we add the newbase address to rbx to add the offset there is between the old and
100 |     # new text sections, and then subtract off the address of the lookup.
101 |     so_code = '''
102 | 	add rbx, %s
103 | 	sub rbx, rax
104 |     '''
105 |     so_restore = '''
106 | 	add rbx, rax
107 | 	sub rbx, %s
108 |     '''
109 |     #retrieve rip 11 bytes after start of lookup function (right after first lea instruction)
110 |     if self.context.write_so:
111 |       return _asm(lookup_template%(lookup_off+11,so_code%(self.context.newbase),size,mapping_off,so_restore%(self.context.newbase),self.context.global_lookup))
112 |     elif self.context.exec_only:
113 |       return _asm( exec_only_lookup%(lookup_off+11,base,size,mapping_off,base) )
114 |     else:
115 |       return _asm(lookup_template%(lookup_off+11,exec_code%base,size,mapping_off,exec_restore%base,self.context.global_lookup))
116 | 
117 |   def get_secondary_lookup_code(self,base,size,sec_lookup_off,mapping_off):
118 |     '''This secondary lookup is only used when rewriting only the main executable.  It is a second, simpler
119 |        lookup function that is used by ret instructions and does NOT rewrite a return address on the stack
120 |        when the destination is outside the mapping.  It instead simply returns the original address and that's
121 |        it.  The only reason I'm doing this by way of a secondary lookup is this should be faster than a
122 |        a parameter passed at runtime, so I need to statically have an offset to jump to in the case of returns.
123 |        This is a cleaner way to do it than split the original lookup to have two entry points.'''
124 |     #Notice that we only move a DWORD from the mapping (into ebx) because the
125 |     #mapping only stores 4-byte offsets.  Therefore, if a text section is >4GB,
126 |     #this mapping strategy will fail
127 |     secondary_lookup = '''
128 |     lookup:
129 |   	push rbx
130 |   	mov rbx,rax
131 |         lea rax, [rip-%s]
132 |   	sub rbx,%s
133 |   	jb outside
134 |   	cmp rbx,%s
135 |   	jae outside
136 |   	mov ebx,[rax+rbx*4+%s]
137 |   	add rax,rbx
138 |   	pop rbx
139 |   	ret
140 |   
141 |     outside:
142 |   	add rbx,%s
143 |   	mov rax,rbx
144 |   	pop rbx
145 |   	ret
146 |     '''
147 |     return _asm( secondary_lookup%(sec_lookup_off+11,base,size,mapping_off,base) )
148 | 
149 |   def get_global_lookup_code(self):
150 |     #TODO: Support global lookup, executable + library rewriting
151 |     #I have to modify it so it will assemble since we write out the global lookup
152 |     #regardless of whether it's used, but it obviously won't work in this state...
153 |     #addr - global_mapping[index].start <= global_mapping[index].length
154 |     # rbx = length
155 |     # rcx = base/entry
156 |     # rdx = index
157 |     # r10 = entry
158 |     #struct gm_entry {
159 |     #	unsigned long lookup_function;
160 |     #	unsigned long start;
161 |     #	unsigned long length;
162 |     #};
163 |     #TODO: still need to handle code entering the loader region....
164 |     '''
165 | 	; Get rid of sysinfo comparison because we instead are going to be comparing based on entire address ranges
166 | 	;cmp rax,[%s]		; If rax is sysinfo
167 |     	;je sysinfo		; Go to rewrite return address
168 |     glookup:
169 | 	push rcx		; Save working registers
170 | 	push rbx		
171 | 	push rdx
172 | 	push r10
173 | 	mov rcx, %s		; Load address of first entry
174 | 	mov rbx, [rcx]		; Load first value in first entry (lookup_function, serving as length)
175 | 	xor rdx, rdx		; Clear rdx
176 |     searchloop:
177 | 	cmp rbx, rdx		; Check if we are past last entry
178 | 	je failure		; Did not find successful entry, so fail
179 | 	add rcx, 24		; Set rcx to next entry
180 | 	mov r10, [rcx+8]	; Load second item in entry (start)
181 | 	neg r10			; Negate r10 so it can act like it is being subtracted
182 | 	add r10, rax		; Get difference between lookup address and start
183 | 	cmp r10, [rcx+16]	; Compare: address - start <= end - start (length)
184 | 	jle success		; If so, we found the right entry.
185 | 	inc rdx			; Add one to our index
186 | 	jmp searchloop		; Loop for next entry
187 |     success:
188 | 	mov rcx,[rcx]		; Load lookup address into rcx so we can compare it to 0
189 | 	test rcx,rcx		; If lookup address is zero it means this region is not rewritten!
190 | 	jz external		; Jump to external so we can rewrite return address on the stack (assume only calls into external regions)
191 | 	pop r10			; Restore the saved values first to grow the stack as little as possible
192 | 	pop rdx
193 | 	pop rbx
194 | 	call rcx		; Call the lookup, as specified by the first value in global mapping entry (lookup_function) 	
195 | 	pop rcx			; Restore rcx since we were using it to save the lookup function address 
196 | 	ret			; rax should now have the right value, so return
197 |     external:
198 | 	pop r10			; Restore all saved registers, as the subsequent call to glookup will save them again.
199 | 	pop rdx			; Restoring the saved registers before the recursive call means the stack will not grow as much,
200 | 	pop rbx			; avoiding overwriting the value of rax saved outside the stack before the local lookup call without
201 | 	pop rcx			; having to increase the distance that rax is saved outside the stack as much as we would otherwise.
202 |     	mov [rsp-64],rax	; Save original rax (not with push so we don't increase the stack pointer any more)
203 |     	mov rax,[rsp+8]		; Load the return address we want to overwrite (address of instruction calling the local lookup)
204 |     	call glookup		; Lookup the translated value
205 |     	mov [rsp+8],rax		; Overwrite with the translated value
206 |     	mov rax,[rsp-64]	; Restore original rax, returned unmodified so we call unmodified external code
207 |   	ret
208 |     failure:
209 | 	hlt
210 |     '''
211 |     global_lookup_template = '''
212 |     glookup:	
213 | 	push rcx	
214 | 	push rbx		
215 | 	push rdx
216 | 	push r10
217 | 	mov rcx, %s		
218 | 	mov rbx, [rcx]		
219 | 	xor rdx, rdx		
220 |     searchloop:
221 | 	cmp rbx, rdx		
222 | 	je failure		
223 | 	add rcx, 24		
224 | 	mov r10, [rcx+8]
225 | 	neg r10	
226 | 	add r10, rax	
227 | 	cmp r10, [rcx+16]
228 | 	jle success
229 | 	inc rdx			
230 | 	jmp searchloop		
231 |     success:
232 | 	mov rcx,[rcx]
233 | 	test rcx,rcx
234 | 	jz external
235 | 	pop r10			
236 | 	pop rdx
237 | 	pop rbx
238 | 	call rcx		 
239 | 	pop rcx
240 | 	ret
241 |     external:
242 | 	pop r10			
243 | 	pop rdx
244 | 	pop rbx
245 | 	pop rcx	
246 |     	mov [rsp-64],rax		
247 |     	mov rax,[rsp+8]	
248 |     	call glookup		
249 |     	mov [rsp+8],rax	
250 |     	mov rax,[rsp-64]		
251 |   	ret
252 |     failure:
253 | 	hlt
254 |     '''
255 |     return _asm(global_lookup_template%(self.context.global_sysinfo+8))
256 | 
257 |   def get_auxvec_code(self,entry):
258 |     #Example assembly for searching the auxiliary vector
259 |     #TODO: this commented assembly needs to be updated, as it's still (mostly) 32-bit code
260 |     '''
261 |   	mov [esp-4],esi		;I think there's no need to save these, but in case somehow the
262 |   	mov [esp-8],ecx		;linker leaves something of interest for _start, let's save them
263 |   	mov esi,[esp]		;Retrieve argc
264 |   	mov ecx,esp		;Retrieve address of argc
265 |   	lea ecx,[ecx+esi*4+4]	;Skip argv
266 |     loopenv:			;Iterate through each environment variable
267 |   	add ecx,4		;The first loop skips over the NULL after argv
268 |   	mov esi,[ecx]		;Retrieve environment variable
269 |   	test esi,esi		;Check whether it is NULL
270 |   	jnz loopenv		;If not, continue through environment vars
271 |   	add ecx,4		;Hop over 0 byte to first entry
272 |     loopaux:			;Iterate through auxiliary vector, looking for AT_SYSINFO (32)
273 |   	mov esi,[ecx]		;Retrieve the type field of this entry
274 |   	cmp esi,32		;Compare to 32, the entry we want
275 |   	jz foundsysinfo		;Found it
276 |   	test esi,esi		;Check whether we found the entry signifying the end of auxv
277 |   	jz restore		;Go to _start if we reach the end
278 |   	add ecx,8		;Each entry is 8 bytes; go to next
279 |   	jmp loopaux
280 |     foundsysinfo:
281 |   	mov esi,[ecx+4]		;Retrieve sysinfo address
282 |   	mov [sysinfo],esi	;Save address
283 |     restore:
284 |   	mov esi,[esp-4]
285 |   	mov ecx,[esp-8]
286 |     	push global_mapping	;Push address of global mapping for popgm
287 |     	call popgm
288 | 	;place restoretext here if we need to restore .text
289 |     	add esp,4		;Pop address of global mapping
290 |   	jmp realstart
291 | 
292 |     ;restoretext
293 | 	mov BYTE PTR [gs:%s],0	;Restore flag to original state
294 | 	push rax		;Save registers required for syscall
295 | 	push rdi
296 | 	push rsi
297 | 	push rdx
298 | 	mov rax, 10		;sys_mprotect
299 | 	mov rdi, text_base	;Location of start of text section (rounded down to nearest page size)
300 | 	mov rsi, 4096		;One page
301 | 	mov rdx, 7		;rwx
302 | 	syscall			;Make page writable
303 | 	mov rax, 0	;Use rax as an index (starting at an offset that skips plt entries and other things preceding .text)
304 | 	mov rsi, saved_text_addr;Use rsi as a base address (address of the saved first page) (global lookup address - offset)
305 | 	mov rdi, text_addr	;Load actual text section location
306 |     looprestore:
307 | 	mov rdx, [rsi+rax]	;Load 8 bytes from saved .text page
308 | 	mov [rdi+rax], rdx	;Restore this data
309 | 	add rax,8		;Move index forward 8 bytes
310 | 	cmp rax,page_end	;If less than 4096-text_offset, continue looping
311 | 	jb looprestore 
312 | 	mov rax, 10		;sys_mprotect
313 | 	mov rdi, text_base	;Location of start of text section (rounded down to nearest page size)
314 | 	mov rsi, 4096		;One page
315 | 	mov rdx, 5		;r-x
316 | 	syscall			;Remove writable permission
317 | 	pop rdx			;Restore registers required for syscall
318 | 	pop rsi
319 | 	pop rdi
320 | 	pop rax
321 | 	ret
322 |     '''
323 |     auxvec_template = '''
324 |   	mov [rsp-8],rsi
325 |   	mov [rsp-16],rcx
326 |   	mov rsi,[rsp]
327 |   	mov rcx,rsp
328 |   	lea rcx,[rcx+rsi*8+8]
329 |     loopenv:
330 |   	add rcx,8
331 |   	mov rsi,[rcx]
332 |   	test rsi,rsi
333 |   	jnz loopenv
334 |   	add rcx,8
335 |     loopaux:
336 |   	mov rsi,[rcx]
337 |   	cmp rsi,32
338 |   	jz foundsysinfo
339 |   	test rsi,rsi
340 |   	jz restore
341 |   	add rcx,16
342 |   	jmp loopaux
343 |     foundsysinfo:
344 |   	mov rsi,[rcx+8]
345 |   	mov [%s],rsi
346 |     restore:
347 |   	mov rsi,[rsp-8]
348 |   	mov rcx,[rsp-16]
349 |     	push %s
350 |     	call [rsp]
351 |     	add rsp,8
352 | 	%s
353 |     	mov QWORD PTR [rsp-16], %s
354 |   	jmp [rsp-16]'''
355 |     restoretext = '''
356 | 	push rax		
357 | 	push rdi
358 | 	push rsi
359 | 	push rdx
360 | 	mov rax, 10		
361 | 	mov rdi, %s	
362 | 	mov rsi, 4096		
363 | 	mov rdx, 7		
364 | 	syscall			
365 | 	mov rax, 0		
366 | 	mov rsi, %s
367 | 	mov rdi, %s	
368 |     looprestore:
369 | 	mov rdx, [rsi+rax]	
370 | 	mov [rdi+rax], rdx	
371 | 	add rax,8		
372 | 	cmp rax,%s		
373 | 	jb looprestore 
374 | 	mov rax, 10		
375 | 	mov rdi, %s	
376 | 	mov rsi, 4096		
377 | 	mov rdx, 5		
378 | 	syscall			
379 | 	pop rdx		
380 | 	pop rsi
381 | 	pop rdi
382 | 	pop rax
383 |     ''' % ( (self.context.oldbase/0x1000)*0x1000, self.context.global_lookup - 0x20000, self.context.oldbase, 0x1000-(self.context.oldbase%0x1000), (self.context.oldbase/0x1000)*0x1000 )
384 |     
385 |     return _asm(auxvec_template%(self.context.global_sysinfo,self.context.global_lookup+self.context.popgm_offset,restoretext if self.context.move_phdrs_to_text else '',self.context.newbase+entry))
386 | 
387 |   def get_popgm_code(self):
388 |     #pushad and popad do NOT exist in x64,
389 |     #so we must choose which registers must be preserved at program start
390 |     #TODO: For now we skip actually calling popgm, because it will have to be
391 |     #completely re-engineered, so we will need to change the offset to 0x11 
392 |     #once we have fixed popgm for x64
393 |     call_popgm = '''
394 |     push rax
395 |     push rcx
396 |     push rdx
397 |     push rbx
398 |     push rbp
399 |     push rsi
400 |     push rdi
401 |     mov rdi, %s
402 |     call $+0x0d
403 |     pop rdi
404 |     pop rsi
405 |     pop rbp
406 |     pop rbx
407 |     pop rdx
408 |     pop rcx
409 |     pop rax
410 |     ret
411 |     '''
412 |     popgmbytes = asm(call_popgm%(self.context.global_sysinfo+8))
413 |     with open('x64_%s' % self.context.popgm) as f:
414 |       popgmbytes+=f.read()
415 |     return popgmbytes
416 | 
417 |   def get_global_mapping_bytes(self):
418 |     #TODO: support global mapping
419 |     globalbytes = self.get_global_lookup_code()
420 |     #globalbytes+='\0' #flag field
421 |     globalbytes += self.get_popgm_code()
422 |     globalbytes += '\0\0\0\0\0\0\0\0' #sysinfo field
423 |     # Global mapping (0x6000 0x00 bytes).  This contains space for 1024 entries:
424 |     # 8 * 3 = 24 bytes per entry * 1024 entries = 0x6000 (24576) bytes.  If a binary
425 |     # has more than 1024 libraries, the program will most likely segfault.
426 |     globalbytes += '\x00'*0x6000
427 |     # Allocate extra space for any additional global variables that
428 |     # instrumentation code might require
429 |     if self.context.alloc_globals > 0:
430 |       globalbytes += '\x00'*self.context.alloc_globals
431 |     return globalbytes
432 | 


--------------------------------------------------------------------------------
/x64_translator.py:
--------------------------------------------------------------------------------
  1 | from x64_assembler import asm,cache,metacache
  2 | from capstone.x86 import X86_OP_REG,X86_OP_MEM,X86_OP_IMM
  3 | import struct
  4 | import re
  5 | from translator import Translator
  6 | 
  7 | class X64Translator(Translator):
  8 | 
  9 |   def __init__(self,before_callback,context):
 10 |     self.before_inst_callback = before_callback
 11 |     self.context = context
 12 |     self.memory_ref_string = re.compile(u'^qword ptr \[rip \+ (?P<offset>0x[0-9a-z]+)\]$')
 13 |     self.rip_with_offset = re.compile(u'\[rip(?: (?P<offset>[\+\-] [0x]?[0-9a-z]+))?\]') #Apparently the hex prefix is optional if the number is...unambiguous?
 14 |     # Pre-populate this instruction in the metacache so we can avoid rewriting variations of it
 15 |     metacache['        lea rbx,[rip]'] = 3
 16 |     metacache['    lea rbx,[rip]'] = 3
 17 |     #From Brian's Static_phase.py
 18 |     self.JCC = ['jo','jno','js','jns','je','jz','jne','jnz','jb','jnae',
 19 |       'jc','jnb','jae','jnc','jbe','jna','ja','jnbe','jl','jnge','jge',
 20 |       'jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jrcxz','jecxz']
 21 | 
 22 |   def replace_rip(self,ins,mapping,newlen):
 23 |         code = b''
 24 |         # In the main binary, we technically do not need to use rip;
 25 |         # since we know the location our main binary code will be at,
 26 |         # we can replace it with an absolute address.  HOWEVER, if we want
 27 |         # to support position-independent main binaries, and if we don't
 28 |         # want to have to re-assemble any instructions that our assembler
 29 |         # cannot currently handle correctly (such as ljmp), then it is better
 30 |         # to simply replace rip in the same way as in shared objects.
 31 |         #
 32 |         # For shared objects we *need* to use rip, but calculate
 33 |         # (rip - (newbase + after new instruction address)) + address after old instruction
 34 |         # or (rip + ( (address after old instruction) - (newbase + after new instruction address) ) )
 35 |         # The goal is to compute the value rip WOULD have had if the original binary were run, and replace
 36 |         # rip with that value, derived from the NEW value in rip...
 37 |         match = self.rip_with_offset.search(ins.op_str) #TODO: all this new stuff with the match and then the assembler optimization
 38 |         if mapping is not None:
 39 |           #print 'rewriting %s instruction with rip: %s %s' % (ins.mnemonic,ins.mnemonic,ins.op_str) 
 40 |           oldoffset = 0 #Assume at first that there is no offset from rip
 41 |           if match.group('offset') != None:
 42 |             #print 'match on offset: %s' % match.group('offset')
 43 |             oldoffset = int(match.group('offset'), 16)
 44 |           oldaddr = ins.address + len(ins.bytes)
 45 |           # For completely rewritten instructions, the new length will indeed change, because the original instruction
 46 |           # may be rewritten into multiple instructions, with potentially many instructions inserted before the one
 47 |           # that references rip.  Because an instruction referring to rip has it pointing after that instruction, we need
 48 |           # the length of all code preceding it and then the length of the new instruction referencing rip to know the 
 49 |           # *real* new address.  Then we can determine the offset between them and add the old offset, thereby giving our new offset.
 50 |           # All instructions may potentially have code inserted before them, so we will always need this new length.
 51 |           newaddr = mapping[ins.address] + newlen
 52 |           newoffset = (oldaddr - (self.context.newbase + newaddr)) + oldoffset
 53 |           newopstr = ''
 54 |           # If the new offset cannot be encoded in 4 bytes, replace it with a placeholder
 55 |           if newoffset <= -0x80000000 or newoffset >= 0x7fffffff:
 56 |             print 'WARNING: unencodable offset for instruction @ 0x%x: %x' % (ins.address,newoffset)
 57 |             newoffset = -0x7faddead
 58 |           # Check whether it's negative so we can prefix with 0x even with negative numbers
 59 |           if newoffset < 0:
 60 |             newopstr = self.rip_with_offset.sub('[rip - 0x%x]' % -newoffset, ins.op_str)
 61 |           else:
 62 |             newopstr = self.rip_with_offset.sub('[rip + 0x%x]' % newoffset, ins.op_str)
 63 |           #print 'Old offset: 0x%x / Old address: 0x%x / New address: 0x%x / New base: 0x%x' % (oldoffset,oldaddr,newaddr,self.context.newbase)
 64 |           #print 'New instruction: %s %s' % (ins.mnemonic,newopstr)
 65 |           return newopstr
 66 |         else:
 67 |           #Placeholder until we know the new instruction location
 68 |           newopstr = self.rip_with_offset.sub('[rip]', ins.op_str)
 69 |           #print 'rewriting %s instruction with rip: %s %s' % (ins.mnemonic,ins.mnemonic,ins.op_str) 
 70 |           #print 'assembling %s %s' % (ins.mnemonic, newopstr)
 71 |           #print 'instruction is %s' % str(ins.bytes[:-4] + (b'\0'*4)).encode('hex')
 72 |           newins = '%s %s' % (ins.mnemonic, newopstr)
 73 |           # Pre-populate cache with version of this instruction with NO offset; this means we never have to call assembler for this instruction.
 74 |           # The assembler can just replace the offset, which we assume is the last 4 bytes in the instruction
 75 |           if newins not in cache:
 76 |             # Only add to the cache ONCE.  If you keep adding to the cache, some instructions have prefixes that ALTER the base instruction length
 77 |             # for that instruction with no offset.  Therefore, if another instruction comes along with the same mnemonic and opstring, but containing
 78 |             # a different number of garbage prefixes before it, then the length of these instructions fluctuates, throwing off all the careful alignment
 79 |             # required for mapping these instructions.  Due to these garbage prefixes, some instructions may increase by a few bytes and semantics could
 80 |             # potentially, theoretically be altered, but this could be solved with a better assembler or disassembler.
 81 |             # ---
 82 |             # The displacement size and offset are not easily obtainable in the current version of capstone, so this requires a customized version that
 83 |             # provides access to this data.  With this, we can determine exactly the position of the displacement and replace it
 84 |             disp_size = ins._detail.arch.x86.encoding.disp_size
 85 |             disp_offset = ins._detail.arch.x86.encoding.disp_offset
 86 |             # We will only automatically replace 4-byte displacements, because smaller ones will very likely not fit the new displacement, and 4-byte
 87 |             # displacements are much more common.  This means we will need to re-assemble any instructions that do not have a 4-byte displacement, however.
 88 |             if disp_size == 4:
 89 |               metacache[newins] = disp_offset # Save displacement offset for assembler
 90 |               # Populate version in cache with the instruction with a displacement of all 0s.  Leave the immediate value (if there is one) intact.
 91 |               cache[newins] = ins.bytes[:disp_offset] + (b'\0'*4) + ins.bytes[disp_offset+disp_size:]
 92 |             else:
 93 |               # TODO: Changing the instruction to use a larger displacement WILL change the instruction length, and thus WILL result in an incorrect new
 94 |               # displacement as we calculate it now.  This needs to be fixed to use the correct new displacement as it would be calculated after knowing
 95 |               # the new instruction length.
 96 |               print 'WARNING: instruction %s has small displacement: %d'%(newins,disp_size)
 97 |           return newopstr
 98 | 
 99 |   def translate_one(self,ins,mapping):
100 |     if ins.mnemonic in ['call','jmp']: #Unconditional jump
101 |       return self.translate_uncond(ins,mapping)
102 |     elif ins.mnemonic in self.JCC: #Conditional jump
103 |       return self.translate_cond(ins,mapping)
104 |     elif ins.mnemonic == 'ret':
105 |       return self.translate_ret(ins,mapping)
106 |     elif ins.mnemonic in ['retn','retf','repz']: #I think retn is not used in Capstone
107 |       #print 'WARNING: unimplemented %s %s'%(ins.mnemonic,ins.op_str)
108 |       return '\xf4\xf4\xf4\xf4' #Create obvious cluster of hlt instructions
109 |     else: #Any other instruction
110 |       inserted = self.before_inst_callback(ins)
111 |       #Even for non-control-flow instructions, we need to replace all references to rip
112 |       #with the address pointing directly after the instruction.
113 |       #TODO: This will NOT work for shared libraries or any PIC, because it depends on
114 |       #knowing the static instruction address.  For all shared objects, we would need to
115 |       #subtract off the offset between the original and new text; as long as the offset is
116 |       #fixed, then we should be able to just precompute that offset, without it being affected
117 |       #by the position of the .so code
118 |       #TODO: abandon rewriting ljmp instructions for now because the assembler doesn't like them
119 |       #and we haven't been rewriting their destinations anyway; if they *are* used, they were already
120 |       #broken before this 
121 |       #TODO: I have also abandoned rewriting the following instructions because I can't get it to
122 |       #re-assemble with the current assembler:
123 |       #  fstp
124 |       #  fldenv
125 |       #  fld
126 |       #TODO: Since I am now doing a crazy optimization in which I use the original instruction's bytes
127 |       #and only change the last 4 bytes (the offset), I should actually be able to support these incompatible
128 |       #instructions by saving their original bytes in the assembler cache and therefore never actually sending
129 |       #the disassembled instruction to the assembler at all.
130 |       incompatible = ['ljmp', 'fstp', 'fldenv', 'fld', 'fbld']
131 |       if 'rip' in ins.op_str:# and (ins.mnemonic not in incompatible):
132 |         '''asm1 = asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping) ) )
133 |         asm2 = asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,None) ) )
134 |         if len(asm1) != len(asm2):
135 |           print '%s %s @ 0x%x LENGTH FAIL1: %s vs %s' % (ins.mnemonic, ins.op_str, ins.address, str(asm1).encode('hex'), str(asm2).encode('hex') )
136 |           newone = len( asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping) ) ) )
137 |           oldone = len( asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,None) ) ) )
138 |           print '%d vs %d, %d vs %d' % (newone,oldone,len(asm1),len(asm2))'''
139 |         code = b''
140 |         if inserted is not None:
141 |           code = asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping,len(inserted) + len(ins.bytes) ) ) )
142 |           code = inserted + code
143 |         else:
144 |           code = asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping,len(ins.bytes) ) ) )
145 |         return code
146 |       else:
147 | 	'''if 'rip' in ins.op_str and (ins.mnemonic in incompatible):
148 |           print 'NOT rewriting %s instruction with rip: %s %s' % (ins.mnemonic,ins.mnemonic,ins.op_str) 
149 |         if ins.mnemonic == 'ljmp':
150 |           print 'WARNING: unhandled %s %s @ %x'%(ins.mnemonic,ins.op_str,ins.address)'''
151 |         if inserted is not None:
152 |           return inserted + str(ins.bytes)
153 |       return None #No translation needs to be done
154 | 
155 |   def translate_ret(self,ins,mapping):
156 |     '''
157 |     mov [esp-28], eax	;save old eax value
158 |     pop eax		;pop address from stack from which we will get destination
159 |     call $+%s		;call lookup function
160 |     mov [esp-4], eax	;save new eax value (destination mapping)
161 |     mov eax, [esp-32]	;restore old eax value (the pop has shifted our stack so we must look at 28+4=32)
162 |     jmp [esp-4]		;jmp/call to new address
163 |     '''
164 |     template_before = '''
165 |     mov [rsp-56], rax
166 |     pop rax
167 |     '''
168 |     template_after = '''
169 |     call $+%s
170 |     %s
171 |     mov [rsp-8], rax
172 |     mov rax, [rsp-%d]
173 |     jmp [rsp-8]
174 |     '''
175 |     self.context.stat['ret']+=1
176 |     code = b''
177 |     inserted = self.before_inst_callback(ins)
178 |     if inserted is not None:
179 |       code += inserted
180 |     # Since thunks do not need to be used for 64-bit code, there is no specific
181 |     # place we need to treat as a special case.  It is unlikely that code will
182 |     # try to use the pushed return address to obtain the instruction pointer 
183 |     # (after all, it can just access it directly!), but should it TRY to do this,
184 |     # the program will crash!  Thus the no_pic optimization is a heuristic that
185 |     # won't work for some code (in this case only very unusual code?)
186 |     if self.context.no_pic: # and ins.address != self.context.get_pc_thunk + 3:
187 |       #Perform a normal return UNLESS this is the ret for the thunk.
188 |       #Currently its position is hardcoded as three bytes after the thunk entry.
189 |       code = asm( 'ret %s'%ins.op_str )
190 |     else:
191 |       code = asm(template_before)
192 |       size = len(code)
193 |       lookup_target = b''
194 |       if self.context.exec_only:
195 |         #Special lookup for not rewriting arguments when going outside new main text address space
196 |         lookup_target = self.remap_target(ins.address,mapping,self.context.secondary_lookup_function_offset,size)
197 |       else:
198 |         lookup_target = self.remap_target(ins.address,mapping,self.context.lookup_function_offset,size)
199 |       if ins.op_str == '':
200 |         code+=asm(template_after%(lookup_target,'',64)) #64 because of the value we popped
201 |       else: #For ret instructions that pop imm16 bytes from the stack, add that many bytes to esp
202 |         pop_amt = int(ins.op_str,16) #We need to retrieve the right eax value from where we saved it
203 |         code+=asm(template_after%(lookup_target,'add rsp,%d'%pop_amt,64+pop_amt))
204 |     return code
205 | 
206 |   def translate_cond(self,ins,mapping):
207 |     self.context.stat['jcc']+=1
208 |     patched = b''
209 |     inserted = self.before_inst_callback(ins)
210 |     if inserted is not None:
211 |       patched += inserted
212 |     if ins.mnemonic in ['jrcxz','jecxz']: #These instructions have no long encoding (and jcxz is not allowed in 64-bit)
213 |       jrcxz_template = '''
214 |       test rcx,rcx
215 |       '''
216 |       jecxz_template = '''
217 |       test ecx,ecx
218 |       '''
219 |       target = ins.operands[0].imm # int(ins.op_str,16) The destination of this instruction
220 |       #newtarget = remap_target(ins.address,mapping,target,0)
221 |       if ins.mnemonic == 'jrcxz':
222 |         patched+=asm(jrcxz_template)
223 |       else:
224 |         patched+=asm(jecxz_template)
225 |       newtarget = self.remap_target(ins.address,mapping,target,len(patched))
226 |       #print 'want %s, but have %s instead'%(remap_target(ins.address,mapping,target,len(patched)), newtarget)
227 |       #Apparently the offset for jcxz and jecxz instructions may have been wrong?  How did it work before?
228 |       patched += asm('jz $+%s'%newtarget)
229 |       #print 'code length: %d'%len(patched)
230 |       
231 |       #TODO: some instructions encode to 6 bytes, some to 5, some to 2.  How do we know which?
232 |       #For example, for CALL, it seems to only be 5 or 2 depending on offset.
233 |       #But for jg, it can be 2 or 6 depending on offset, I think because it has a 2-byte opcode.
234 |       #while len(patched) < 6: #Short encoding, which we do not want
235 |       #  patched+='\x90' #Add padding of NOPs
236 |       #The previous commented out code wouldn't even WORK now, since we insert another instruction
237 |       #at the MINIMUM.  I'm amazed the jcxz/jecxz code even worked at all before
238 |     else:
239 |       target = ins.operands[0].imm # int(ins.op_str,16) The destination of this instruction
240 |       newtarget = self.remap_target(ins.address,mapping,target,len(patched))
241 |       patched+=asm(ins.mnemonic + ' $+' + newtarget)
242 |       #TODO: some instructions encode to 6 bytes, some to 5, some to 2.  How do we know which?
243 |       #For example, for CALL, it seems to only be 5 or 2 depending on offset.
244 |       #But for jg, it can be 2 or 6 depending on offset, I think because it has a 2-byte opcode.
245 |       #while len(patched) < 6: #Short encoding, which we do not want
246 |       #  patched+='\x90' #Add padding of NOPs
247 |     return patched
248 |   
249 |   def translate_uncond(self,ins,mapping):
250 |     op = ins.operands[0] #Get operand
251 |     if op.type == X86_OP_REG: # e.g. call eax or jmp ebx
252 |       target = ins.reg_name(op.reg)
253 |       return self.get_indirect_uncond_code(ins,mapping,target)
254 |     elif op.type == X86_OP_MEM: # e.g. call [eax + ecx*4 + 0xcafebabe] or jmp [ebx+ecx]
255 |       target = ins.op_str
256 |       return self.get_indirect_uncond_code(ins,mapping,target)
257 |     elif op.type == X86_OP_IMM: # e.g. call 0xdeadbeef or jmp 0xcafebada
258 |       target = op.imm
259 |       code = b''
260 |       inserted = self.before_inst_callback(ins)
261 |       if inserted is not None:
262 |         code += inserted
263 |       # Again, there is no thunk special case for 64-bit code
264 |       if self.context.no_pic: # and target != self.context.get_pc_thunk:
265 |         #push nothing if no_pic UNLESS it's the thunk
266 |         #We only support DIRECT calls to the thunk
267 |         if ins.mnemonic == 'call':
268 |           self.context.stat['dircall']+=1
269 |         else:
270 |           self.context.stat['dirjmp']+=1
271 |       elif ins.mnemonic == 'call': #If it's a call, push the original address of the next instruction
272 |         self.context.stat['dircall']+=1
273 |         exec_call = '''
274 |         push %s
275 |         '''
276 |         so_call = '''
277 |         push rbx
278 |         lea rbx,[rip - 0x%x]
279 |         xchg rbx,[rsp]
280 |         '''
281 |         if self.context.write_so:
282 |           if mapping is not None:
283 |             # 8 is the length of push rbx;lea rbx,[rip-%s]
284 |             code += asm(so_call%( (self.context.newbase+(mapping[ins.address]+8)) - (ins.address+len(ins.bytes)) ) )
285 |           else:
286 |             code += asm(so_call%( (self.context.newbase) - (ins.address+len(ins.bytes)) ) )
287 |         else:
288 |           code += asm(exec_call%(ins.address+len(ins.bytes)))
289 |       else:
290 |         self.context.stat['dirjmp']+=1
291 |       newtarget = self.remap_target(ins.address,mapping,target,len(code))
292 |       #print "(pre)new length: %s"%len(callback_code)
293 |       #print "target: %s"%hex(target)
294 |       #print "newtarget: %s"%newtarget
295 |       # Again, there is no thunk special case for 64-bit code
296 |       if self.context.no_pic: # and target != self.context.get_pc_thunk:
297 |         code += asm( '%s $+%s'%(ins.mnemonic,newtarget) )
298 |       else:
299 |         patched = asm('jmp $+%s'%newtarget)
300 |         if len(patched) == 2: #Short encoding, which we do not want
301 |           patched+='\x90\x90\x90' #Add padding of 3 NOPs
302 |         code += patched
303 |       #print "new length: %s"%len(callback_code+patched)
304 |       return code
305 |     return None
306 |   
307 |   def get_indirect_uncond_code(self,ins,mapping,target):
308 |     #Commented assembly
309 |     '''
310 |     mov [esp-28], eax	;save old eax value (very far above the stack because of future push/call)
311 |     mov eax, %s		;read location in memory from which we will get destination
312 |     %s			;if a call, we push return address here
313 |     call $+%s		;call lookup function
314 |     mov [esp-4], eax	;save new eax value (destination mapping)
315 |     mov eax, [esp-%s]	;restore old eax value (offset depends on whether return address pushed)
316 |     jmp [esp-4]		;jmp to new address
317 |     '''
318 |     #If the argument is an offset from rip, then we must change the reference to rip.  Any rip-relative
319 |     #addressing is destroyed because all the offsets are completely different; we need the 
320 |     #original address that rip WOULD have pointed to, so we must replace any references to it.
321 |     template_before = '''
322 |     mov [rsp-64], rax
323 |     mov rax, %s
324 |     %s
325 |     '''
326 |     exec_call = '''
327 |     push %s
328 |     '''
329 |     so_call_before = '''
330 |     push rbx
331 |     '''
332 |     so_call_after = '''
333 |     lea rbx,[rip - 0x%x]
334 |     xchg rbx,[rsp]
335 |     '''
336 |     template_after = '''
337 |     call $+%s
338 |     mov [rsp-8], rax
339 |     mov rax, [rsp-%s]
340 |     jmp [rsp-8]
341 |     '''
342 |     template_nopic = '''
343 |     call $+%s
344 |     mov [rsp-8], rax
345 |     mov rax, [rsp-%s]
346 |     %s [rsp-8]
347 |     '''
348 |     #TODO: This is somehow still the bottleneck, so this needs to be optimized
349 |     code = b''
350 |     if self.context.exec_only:
351 |       code += self.get_remap_callbacks_code(ins,mapping,target)
352 |     #NOTE: user instrumentation code comes after callbacks code.  No particular reason to put it either way,
353 |     #other than perhaps consistency, but for now this is easier.
354 |     inserted = self.before_inst_callback(ins)
355 |     if inserted is not None:
356 |       code += inserted
357 |     #Replace references to rip with the original address after this instruction so that we
358 |     #can look up the new address using the original
359 |     if 'rip' in target:
360 |       '''if len( asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping) ) ) ) != len( asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,None) ) ) ):
361 |         print '%s %s @ 0x%x LENGTH FAIL2: %s vs %s' % (ins.mnemonic, ins.op_str, ins.address, str(asm('%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping) ))).encode('hex'), str(asm('%s %s' % (ins.mnemonic, self.replace_rip(ins,None)) )).encode('hex') )
362 |         newone = len( asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,mapping) ) ) )
363 |         oldone = len( asm( '%s %s' % (ins.mnemonic, self.replace_rip(ins,None) ) ) )
364 |         print '%d vs %d, %s' % (newone,oldone,newone == oldone)'''
365 |       # The new "instruction length" is the length of all preceding code, plus the instructions up through the one referencing rip
366 |       target = self.replace_rip(ins,mapping,len(code) + len(asm('mov [rsp-64],rax\nmov rax,[rip]')) )
367 |     if self.context.no_pic:
368 |       if ins.mnemonic == 'call':
369 |         self.context.stat['indcall']+=1
370 |       else:
371 |         self.context.stat['indjmp']+=1
372 |       code += asm( template_before%(target,'') )
373 |     elif ins.mnemonic == 'call':
374 |       self.context.stat['indcall']+=1
375 |       if self.context.write_so:
376 |         code += asm( template_before%(target,so_call_before) )
377 |         if mapping is not None:
378 |           # 7 is the length of the lea rbx,[rip-%s] instruction, which needs to be added to the length of the code preceding where we access RIP
379 |           code += asm(so_call_after%( (mapping[ins.address]+len(code)+7+self.context.newbase) - (ins.address+len(ins.bytes)) ) )
380 |         else:
381 |           code += asm(so_call_after%( (0x8f+self.context.newbase) - (ins.address+len(ins.bytes)) ) )
382 |       else:
383 |         code += asm(template_before%(target,exec_call%(ins.address+len(ins.bytes)) ))
384 |     else:
385 |       self.context.stat['indjmp']+=1
386 |       code += asm(template_before%(target,''))
387 |     size = len(code)
388 |     lookup_target = self.remap_target(ins.address,mapping,self.context.lookup_function_offset,size)
389 |     #Always transform an unconditional control transfer to a jmp, but
390 |     #for a call, insert a push instruction to push the original return address on the stack.
391 |     #At runtime, our rewritten ret will look up the right address to return to and jmp there.
392 |     #If we push a value on the stack, we have to store even FURTHER away from the stack.
393 |     #Note that calling the lookup function can move the stack pointer temporarily up to
394 |     #20 bytes, which will obliterate anything stored too close to the stack pointer.  That, plus
395 |     #the return value we push on the stack, means we need to put it at least 28 bytes away.
396 |     if self.context.no_pic:
397 |       #Change target to secondary lookup function instead
398 |       lookup_target = self.remap_target(ins.address,mapping,self.context.secondary_lookup_function_offset,size)
399 |       code += asm( template_nopic%(lookup_target,64,ins.mnemonic) )
400 |     elif ins.mnemonic == 'call':
401 |       code += asm(template_after%(lookup_target,56))
402 |     else:  
403 |       code += asm(template_after%(lookup_target,64))
404 |     return code
405 |   
406 |   def get_remap_callbacks_code(self,ins,mapping,target):
407 |     '''Checks whether the target destination (expressed as the opcode string from a jmp/call instruction)
408 |        is in the got, then checks if it matches a function with callbacks.  It then rewrites the
409 |        addresses if necessary.  This will *probably* always be from jmp instructions in the PLT.
410 |        NOTE: This assumes it does not have any code inserted before it, and that it comprises
411 |        the first special instructions inserted for an instruction.'''
412 |     if self.memory_ref_string.match(target):
413 |       match = self.memory_ref_string.match(target)
414 |       #Add address of instruction after this one and the offset to get destination
415 |       address = (ins.address + len(ins.bytes)) + int(match.group('offset'), 16)
416 |       if address in self.context.plt['entries']:
417 |         if self.context.plt['entries'][address] in self.context.callbacks:
418 |           print 'Found library call with callbacks: %s'%self.context.plt['entries'][address]
419 |           return self.get_callback_code( ins.address, mapping, self.context.callbacks[self.context.plt['entries'][address]] )
420 |     return b''
421 |   
422 |   def get_callback_code(self,address,mapping,cbargs):
423 |     '''Remaps each callback argument based on index.  cbargs is an array of argument indices
424 |        that let us know which argument (a register in x64) we must rewrite.
425 |        We insert code for each we must rewrite.'''
426 |     arg_registers = ['rdi','rsi','rdx','rcx','r8','r9'] #Order of arguments in x86-64
427 |     callback_template_before = '''
428 |     mov rax, %s
429 |     '''
430 |     callback_template_after = '''
431 |     call $+%s
432 |     mov %s, rax
433 |     '''
434 |     code = asm('push rax') #Save rax, use to hold callback address
435 |     for ind in cbargs:
436 |       #Move value in register for that argument to rax
437 |       cb_before = callback_template_before%( arg_registers[ind] )
438 |       code += asm(cb_before) #Assemble this part first so we will know the offset to the lookup function
439 |       size = len(code)
440 |       #Use secondary lookup function so it won't try to rewrite arguments if the callback is outside the main binary
441 |       lookup_target = self.remap_target( address, mapping, self.context.secondary_lookup_function_offset, size )
442 |       cb_after = callback_template_after%( lookup_target, arg_registers[ind] )
443 |       code += asm(cb_after) #Save the new address over the original
444 |     code += asm('pop rax') #Restore rax
445 |     return code
446 |   
447 |   def in_plt(self,target):
448 |     return target in range(self.context.plt['addr'],self.context.plt['addr']+self.context.plt['size'])
449 |   
450 |   '''def get_plt_entry(self,target):
451 |     #It seems that an elf does not directly give a mapping from each entry in the plt.
452 |     #Instead, it maps from the got entries instead, making it unclear exactly where objdump
453 |     #gets the information.  For our purposes, since all the entries in the plt jump to the got
454 |     #entry, we can read the destination address from the jmp instruction.
455 |     #TODO: ensure works for x64
456 |     offset = target - self.context.plt['addr'] #Get the offset into the plt
457 |     #TODO: The following assumes an absolute jmp, whereas I believe it is a rip-relative jmp in x64
458 |     dest = self.context.plt['data'][offset+2:offset+2+4] #Get the four bytes of the GOT address
459 |     dest = struct.unpack('<I',dest)[0] #Convert to integer, respecting byte endianness
460 |     if dest in self.context.plt['entries']:
461 |       return self.context.plt['entries'][dest] #If there is an entry, return that; the name of the function
462 |     return None #Some entries may be a jump to the start of the plt (no entry)
463 | '''
464 |   
465 |   def remap_target(self,addr,mapping,target,offs): #Only works for statically identifiable targets
466 |     newtarget = '0x8f'
467 |     if mapping is not None and target in mapping:#Second pass, known mapping
468 |       newtarget = mapping[target]-(mapping[addr]+offs) #Offset from curr location in mapping
469 |       newtarget = hex(newtarget)
470 |       #print "original target: %s"%hex(target)
471 |       #print "%s-(%s+%s) = %s"%(hex(mapping[target]),hex(mapping[addr]),hex(offs),newtarget)
472 |     return newtarget
473 | 


--------------------------------------------------------------------------------
/x86_assembler.py:
--------------------------------------------------------------------------------
  1 | import pwn
  2 | pwn.context(os='linux',arch='i386')
  3 | import re
  4 | import struct
  5 | 
  6 | cache = {}
  7 | pat = re.compile('\$\+[-]?0x[0-9a-f]+')
  8 | pat2 = re.compile('[ ]*push [0-9]+[ ]*')
  9 | pat3 = re.compile('[ ]*mov eax, (d)?word ptr \[0x[0-9a-f]+\][ ]*')
 10 | pat4 = re.compile('[ ]*mov eax, (dword ptr )?\[(?P<register>e[a-z][a-z])( )?[+-]( )?(0x)?[0-9a-f]+\][ ]*')
 11 | pat5 = re.compile('(0x[0-9a-f]+|[0-9]+)')
 12 | pat6 = re.compile('[ ]*(?P<mnemonic>(add)|(sub)) (?P<register>(esp)|(ebx)),(?P<amount>[0-9]*)[ ]*')
 13 | pat7 = re.compile('[ ]*mov eax, word ptr.*')#Match stupid size mismatch
 14 | pat8 = re.compile('[ ]*mov eax, .[xip]')#Match ridiculous register mismatch
 15 | 
 16 | #jcxz and jecxz are removed because they don't have a large expansion
 17 | JCC = ['jo','jno','js','jns','je','jz','jne','jnz','jb','jnae',
 18 |   'jc','jnb','jae','jnc','jbe','jna','ja','jnbe','jl','jnge','jge',
 19 |   'jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo']
 20 | 
 21 | #Simple cache code.  Called after more complex preprocessing of assembly source.
 22 | def _asm(text):
 23 |   if text in cache:
 24 |     return cache[text]
 25 |   else:
 26 |     with open('uncached.txt','a') as f:
 27 |       f.write(text+'\n')
 28 |     code = pwn.asm(text)
 29 |     cache[text] = code
 30 |     return code
 31 | 
 32 | def asm(text):
 33 |   code = b''
 34 |   for line in text.split('\n'):
 35 |     if not line.find(';') == -1:
 36 |       line = line[:line.find(';')]#Eliminate comments
 37 |     #Check for offsets ($+)
 38 |     match = pat.search(line)
 39 |     if match and match.group() != '$+0x8f':
 40 |       off = int(match.group()[2:],16)
 41 |       line = line.strip()
 42 |       mnemonic = line[:line.find(' ')]
 43 |       line = pat.sub('$+0x8f',line) #Replace actual offset with dummy
 44 |       newcode = _asm(line) #Assembled code with dummy offset
 45 |       if mnemonic in ['jmp','call']:
 46 |         off-=5 #Subtract 5 because the large encoding knows it's 5 bytes long
 47 |         newcode = newcode[0]+struct.pack('<i',off) #Signed int for negative jumps 
 48 |       elif mnemonic in JCC:
 49 |         off-=6 #Subtract 6 because the large encoding knows it's 6 bytes long
 50 |         newcode = newcode[0:2]+struct.pack('<i',off) #Signed int for negative jumps
 51 |       code+=newcode
 52 |     #Check for push instruction
 53 |     elif pat2.match(line):
 54 |       code+=b'\x68' + struct.pack('<I',int(line.strip().split(' ')[1]) ) #push case
 55 |     #Check for mov instruction to eax from immediate
 56 |     elif pat3.match(line):
 57 |       #mov eax, dword ptr [0xcafecafe]
 58 |       if ' word' in line:
 59 |         print 'WARNING: silently converting "mov eax, word ptr [<number>]" to "mov eax, dword ptr [<number>]"'
 60 |       code+=b'\xa1' + struct.pack('<I',int(line[line.find('[')+1:line.find(']')],16) )
 61 |     #Check for mov instruction to eax from some register plus or minus an offset
 62 |     #NOTE: This does NOT WORK for esp!  The instruction encoding pattern is DIFFERENT!
 63 |     #To handle this, right now this will only replace e[a-z]x registers, although it 
 64 |     #seems as if esi,edi, or ebp would also work.
 65 |     elif pat4.match(line):
 66 |       m = pat4.match(line)
 67 |       #f = open('crazyq.txt','a')
 68 |       #f.write(line+'\n')
 69 |       #ocode = _asm(line)
 70 |       #f.write(str(ocode).encode('hex')+'\n')
 71 |       original = pat5.search(line).group()
 72 |       if original.startswith('0x'):
 73 |         original = int(original,16)
 74 |       else:
 75 |         original = int(original)
 76 |       if '-' in line:
 77 |         original=-original
 78 |       if abs(original) > 0x7f:
 79 |         line = pat5.sub('0x8f',line)
 80 |         original = struct.pack('<i',original)
 81 |       else:
 82 |         line = pat5.sub('0x7f',line)
 83 |         original = struct.pack('<b',original)
 84 |       newcode = _asm(line)
 85 |       #f.write(str(newcode).encode('hex')+'\n')
 86 |       if m.group('register') == 'esp':
 87 |         newcode = newcode[0:3]+original
 88 |       else:
 89 |         newcode = newcode[0:2]+original
 90 |       #f.write(str(newcode).encode('hex')+'\n')
 91 |       #if newcode != ocode:
 92 |       #  print 'NO MATCH %s:\n%s\n%s'%(line,newcode.encode('hex'),ocode.encode('hex'))
 93 |       #  raise Exception
 94 |       #f.close() 
 95 |       code+=newcode
 96 |     elif pat6.match(line):
 97 |       #ocode = _asm(line)
 98 |       m = pat6.match(line)
 99 |       amount = int(m.group('amount'))
100 |       register = m.group('register')
101 |       mnemonic = m.group('mnemonic')
102 |       if amount > 0x7f:
103 |         newcode = _asm('%s %s,0x8f'%(mnemonic,register) )
104 |         newcode = newcode[:2] + struct.pack('<i',amount)
105 |       else:
106 |         newcode = _asm('%s %s,0x7f'%(mnemonic,register) )
107 |         newcode = newcode[:2] + struct.pack('<b',amount)
108 |       #if newcode != ocode:
109 |       #  print 'NO MATCH %s:\n%s\n%s'%(line,newcode.encode('hex'),ocode.encode('hex'))
110 |       #  raise Exception
111 |       code+=newcode
112 |     elif pat7.match(line):
113 |       print 'WARNING: silently converting "mov eax, word ptr [<value>]" to "mov eax, dword ptr [<value>]"'
114 |       code+=_asm(line.replace(' word',' dword'))
115 |     elif pat8.match(line):
116 |       print 'WARNING: silently converting "mov eax, <letter>[xip]" to "mov eax, e<letter>[xip]"'
117 |       code+=_asm(line.replace(', ',', e'))
118 |     else:
119 |       code+=_asm(line)
120 |   return code
121 | 
122 | def oldasm(text):
123 |   if 'mov [esp-16], eax\n  mov eax, ' in text:
124 |     print text
125 |     if not pat3.search(text):
126 |       print str(pwn.asm(text)).encode('hex')
127 |       text2 = '''
128 |   mov [esp-16], eax
129 |   mov eax, dword ptr [eax*4 + 0x80597bc]
130 | '''
131 |       print str(pwn.asm(text2)).encode('hex')
132 |       raise Exception
133 |   if '$+' in text:
134 |     code = b''
135 |     for line in text.split('\n'):
136 |       match = pat.search(line)
137 |       if match and match.group() != '$+0x8f':
138 |         #print 'ORIGINAL: %s'%line
139 |         #print 'MATCH %s'%match.group()
140 |         off = int(match.group()[2:],16)
141 |         #print 'offset %x'%off
142 |         line = line.strip()
143 |         mnemonic = line[:line.find(' ')]
144 |         #print 'mnemonic %s'%mnemonic
145 |         #before = _asm(line)
146 |         #print 'BEFORE: %s'%before.encode('hex')
147 |         line = pat.sub('$+0x8f',line) #Replace actual offset with dummy
148 |         newcode = _asm(line) #Assembled code with dummy offset
149 |         #print 'DUMMY: %s'%newcode.encode('hex')
150 |         if mnemonic in ['jmp','call']:
151 |           off-=5 #Subtract 5 because the large encoding knows it's 5 bytes long
152 |           newcode = newcode[0]+struct.pack('<i',off) #Signed int for negative jumps 
153 |         elif mnemonic in JCC:
154 |           off-=6 #Subtract 6 because the large encoding knows it's 6 bytes long
155 |           newcode = newcode[0:2]+struct.pack('<i',off) #Signed int for negative jumps
156 |           #if off < 0:
157 |           #  print 'AFTER: %s'%newcode.encode('hex')
158 |           #  raise Exception
159 |         #print 'AFTER: %s'%newcode.encode('hex')
160 |         #if before != newcode and len(before) != 2:
161 |         #  raise Exception
162 |         code+=newcode
163 |         #raise Exception
164 |       elif pat2.match(line):
165 |         #print 'push 02'
166 |         code+=b'\x68' + struct.pack('<I',int(line.strip().split(' ')[1]) ) #push case
167 |       else:
168 |         code+=_asm(line)
169 |     return code
170 |   elif pat2.match(text):
171 |     #print 'push 01'
172 |     #since this is always the push instruction, there really is no need to call pwn.asm at all.
173 |     return b'\x68' + struct.pack('<I',int(text.strip().split(' ')[1]) )
174 |     #print str(pwn.asm(text)).encode('hex')
175 |     #print str(pwn.asm('push 0x8f')).encode('hex')
176 |   #TODO: use this for optimizations, but also include other instructions outside of match
177 |   match = pat3.search(text)
178 |   if match:
179 |     print 'MATCHED %s'%match.group()
180 |     inst = match.group()
181 |     
182 |     print str(pwn.asm(inst)).encode('hex')
183 |     print str(pwn.asm('mov eax, dword ptr [0x8f]')).encode('hex')
184 |     #print hex(int(inst[inst.find('[')+1:-1],16))
185 |     print (  b'\xa1' + struct.pack('<I',int(inst[inst.find('[')+1:-1],16) )   ).encode('hex')
186 |     return b'\xa1' + struct.pack('<I',int(inst[inst.find('[')+1:-1],16) ) #mov eax, dword ptr [0xcafecafe]
187 |     '''matches = pat.finditer(text)
188 |     start = 0
189 |     for m in matches:
190 |       g = m.group() #entire match
191 |       if g != '' and g != '$+0x8f':
192 |         loc = m.start()
193 |         patches[loc] = g
194 |         prev = text.rfind('\n',loc)
195 |         if prev != -1:
196 |           print text[start:prev]
197 |           start = text.find('\n',loc)
198 |           print '---'
199 |           print text[prev:start]
200 |           print '---'
201 |           print text[start:end]
202 |           print 'ORIGINAL'
203 |           print text
204 |           raise Exception
205 |         print 'OPTIMIZATION OPPORTUNITY: %s'%text'''
206 |   return _asm(text)
207 | 


--------------------------------------------------------------------------------
/x86_populate_gm.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * debug: gcc -o x86_populate_gm -m32 -Wall -DDEBUG -fno-toplevel-reorder -masm=intel -O1 x86_populate_gm.c
  3 |  * build: gcc -o x86_populate_gm -m32 -Wall -nostdlib -fno-toplevel-reorder -masm=intel -O1 x86_populate_gm.c
  4 |  * dd if=x86_populate_gm of=x86_popgm skip=text_offset bs=1 count=text_size
  5 |  *
  6 |  * Read and parse /proc/self/maps filling in the global mapping
  7 |  *
  8 |  * This file uses no external libraries and no global variables
  9 |  * The .text section of the compiled binary can be cut out and reused without modification
 10 |  *
 11 |  *
 12 |  * TODO: [?] implement blacklist of mapped file names
 13 |  * 			  e.g., [vdso], ld-X.XX.so, etc
 14 |  * 		 [?] read /proc in buf_size chunks
 15 |  * 		 [!] handle reading less bytes than requested (when there are more to read)
 16 |  * 		 		- can happen in rare(?) cases... the file is not a "normal file"
 17 |  *
 18 |  */
 19 | #ifdef DEBUG
 20 | #include <stdio.h>
 21 | #include <sys/mman.h>
 22 | #include <stdlib.h>
 23 | #include <sys/types.h>
 24 | #include <sys/stat.h>
 25 | #include <fcntl.h>
 26 | #else
 27 | #define NULL ( (void *) 0)
 28 | #endif
 29 | 
 30 | unsigned int __attribute__ ((noinline)) my_read(int, char *, unsigned int);
 31 | int __attribute__ ((noinline)) my_open(const char *);
 32 | void populate_mapping(unsigned int, unsigned int, unsigned int, unsigned int *);
 33 | void process_maps(char *, unsigned int *);
 34 | unsigned int lookup(unsigned int, unsigned int *);
 35 | 
 36 | #ifdef DEBUG
 37 | int wrapper(unsigned int *global_mapping){
 38 | #else
 39 | int _start(void *global_mapping){
 40 | #endif
 41 | 	// force string to be stored on the stack even with optimizations
 42 | 	//char maps_path[] = "/proc/self/maps\0";
 43 | 	volatile int maps_path[] = {
 44 | 			0x6f72702f,
 45 | 			0x65732f63,
 46 | 			0x6d2f666c,
 47 | 			0x00737061,
 48 | 	};
 49 | 
 50 | 	unsigned int buf_size = 0x10000;
 51 | 	char buf[buf_size];
 52 | 	int proc_maps_fd;
 53 | 	int cnt, offset = 0;
 54 | 
 55 | 
 56 | 	proc_maps_fd = my_open((char *) &maps_path);
 57 | 	cnt = my_read(proc_maps_fd, buf, buf_size);
 58 |         while( cnt != 0 && offset < buf_size ){
 59 |                 offset += cnt;
 60 |         	cnt = my_read(proc_maps_fd, buf+offset, buf_size-offset);
 61 | 	}
 62 | 	buf[offset] = '\0';// must null terminate
 63 | 
 64 | #ifdef DEBUG
 65 | 	printf("READ:\n%s\n", buf);
 66 | 	// simulation for testing - dont call process maps
 67 | 	populate_mapping(0x08800000, 0x08880000, 0x07000000, global_mapping);
 68 | 	/*
 69 | 	int i;
 70 | 	for (i = 0x08800000; i < 0x08880000; i++){
 71 | 		if (lookup(i, global_mapping) != 0x07000000){
 72 | 			printf("Failed lookup of 0x%08x\n", i);
 73 | 		}
 74 | 	}
 75 | 	*/
 76 | 	//chedck edge cases
 77 | 
 78 | 	lookup(0x08800000-1, global_mapping);
 79 | 	lookup(0x08800000, global_mapping);
 80 | 	lookup(0x08880000+1, global_mapping);
 81 | 	//printf("0x08812345 => 0x%08x\n", lookup(0x08812345, global_mapping));
 82 | #else
 83 | 	process_maps(buf, global_mapping);
 84 | #endif
 85 | 	return 0;
 86 | }
 87 | 
 88 | #ifdef DEBUG
 89 | unsigned int lookup(unsigned int addr, unsigned int *global_mapping){
 90 | 	unsigned int index = addr >> 12;
 91 | 	//if (global_mapping[index] == 0xffffffff){
 92 | 		printf("0x%08x :: mapping[%d] :: &0x%p :: 0x%08x\n", addr, index, &(global_mapping[index]), global_mapping[index]);
 93 | 	//}
 94 | 	return global_mapping[index];
 95 | }
 96 | #endif
 97 | 
 98 | unsigned int __attribute__ ((noinline)) my_read(int fd, char *buf, unsigned int count){
 99 | 	unsigned int bytes_read;
100 | 	asm volatile(
101 | 		".intel_syntax noprefix\n"
102 | 		"mov eax, 3\n"
103 | 		"mov ebx, %1\n"
104 | 		"mov ecx, %2\n"
105 | 		"mov edx, %3\n"
106 | 		"int 0x80\n"
107 | 		"mov %0, eax\n"
108 | 		: "=g" (bytes_read)
109 | 		: "g" (fd), "g" (buf), "g" (count)
110 | 		: "ebx", "esi", "edi"
111 | 	);
112 | 	return bytes_read;
113 | }
114 | 
115 | int __attribute__ ((noinline)) my_open(const char *path){
116 | 	int fp;
117 | 	asm volatile(
118 | 		".intel_syntax noprefix\n"
119 | 		"mov eax, 5\n"
120 | 		"mov ebx, %1\n"
121 | 		"mov ecx, 0\n"
122 | 		"mov edx, 0\n"
123 | 		"int 0x80\n"
124 | 		"mov %0, eax\n"
125 | 		: "=r" (fp)
126 | 		: "g" (path)
127 | 		: "ebx", "esi", "edi"
128 | 	);
129 | 	return fp;
130 | }
131 | 
132 | int is_exec(char *line){
133 | 	// e.g., "08048000-08049000 r-xp ..."
134 | 	return line[20] == 'x';
135 | }
136 | 
137 | int is_write(char *line){
138 | 	// e.g., "08048000-08049000 rw-p ..."
139 | 	return line[19] == 'w';
140 | }
141 | 
142 | char *next_line(char *line){
143 | 	/*
144 | 	 * finds the next line to process
145 | 	 */
146 | 	for (; line[0] != '\0'; line++){
147 | 		if (line[0] == '\n'){
148 | 			if (line[1] == '\0')
149 | 				return NULL;
150 | 			return line+1;
151 | 		}
152 | 	}
153 | 	return NULL;
154 | }
155 | 
156 | unsigned int my_atoi(char *a){
157 | 	/*
158 | 	 * convert 8 byte hex string into its integer representation
159 | 	 * assumes input is from /proc/./maps
160 | 	 * i.e., 'a' is a left-padded 8 byte lowercase hex string
161 | 	 * e.g., "0804a000"
162 | 	 */
163 | 	unsigned int i = 0;
164 | 	int place, digit;
165 | 	for (place = 7; place >= 0; place--, a++){
166 | 		digit = (int)(*a) - 0x30;
167 | 		if (digit > 9)
168 | 			digit -= 0x27; // digit was [a-f]
169 | 		i += digit << (place << 2);
170 | 	}
171 | 	return i;
172 | }
173 | 
174 | void parse_range(char *line, unsigned int *start, unsigned int *end){
175 | 	// e.g., "08048000-08049000 ..."
176 | 	*start = my_atoi(line);
177 | 	*end   = my_atoi(line+9);
178 | }
179 | 
180 | void populate_mapping(unsigned int start, unsigned int end, unsigned int lookup_function, unsigned int *global_mapping){
181 | 	unsigned int index = start >> 12;
182 | 	int i;
183 | 	for(i = 0; i < (end - start) / 0x1000; i++){
184 | 		global_mapping[index + i] = lookup_function;
185 | 	}
186 | #ifdef DEBUG
187 | 	printf("Wrote %d entries\n", i);
188 | #endif
189 | }
190 | 
191 | void process_maps(char *buf, unsigned int *global_mapping){
192 | 	/*
193 | 	 * Process buf which contains output of /proc/self/maps
194 | 	 * populate global_mapping for each executable set of pages
195 | 	 */
196 | 	char *line = buf;
197 | 	//unsigned int global_start, global_end;
198 | 	unsigned int old_text_start, old_text_end;
199 | 	unsigned int new_text_start, new_text_end;
200 | 
201 | 	//Assume global mapping is first entry at 0x7000000 and that there is nothing before
202 | 	//Skip global mapping
203 | 	line = next_line(line);
204 | 	do{ // process each block of maps
205 | 		// process all segments from this object under very specific assumptions
206 | 		if ( is_exec(line) ){
207 | 			if( !is_write(line) ){
208 | 				parse_range(line, &old_text_start, &old_text_end);
209 | 			}else{
210 | 				parse_range(line, &new_text_start, &new_text_end);
211 | 				populate_mapping(old_text_start, old_text_end, new_text_start, global_mapping);
212 | 			}
213 | 		}
214 | 		line = next_line(line);
215 | 	} while(line != NULL);
216 | 	// assume the very last executable and non-writable segment is that of the dynamic linker (ld-X.X.so)
217 | 	// populate those ranges with the value 0x00000000 which will be compared against in the global lookup function
218 | 	populate_mapping(old_text_start, old_text_end, 0x00000000, global_mapping);
219 | }
220 | 
221 | #ifdef DEBUG
222 | int main(void){
223 | 	void *mapping_base = (void *)0x09000000;
224 | 	int fd = open("./map_shell", O_RDWR);
225 | 	void *global_mapping = mmap(mapping_base, 0x400000, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
226 | 	if (global_mapping != mapping_base){
227 | 		printf("failed to get requested base addr\n");
228 | 		exit(1);
229 | 	}
230 | 	wrapper(global_mapping);
231 | 
232 | 	return 0;
233 | }
234 | #endif
235 | 
236 | 


--------------------------------------------------------------------------------
/x86_runtime.py:
--------------------------------------------------------------------------------
  1 | from x86_assembler import _asm,asm
  2 | 
  3 | class X86Runtime(object):
  4 |   def __init__(self,context):
  5 |     self.context = context
  6 | 
  7 |   def get_lookup_code(self,base,size,lookup_off,mapping_off):
  8 |     #Example assembly for lookup function
  9 |     '''
 10 |   	push edx
 11 |   	mov edx,eax
 12 |   	call get_eip
 13 |     get_eip:
 14 |   	pop eax			;Get current instruction pointer
 15 |   	sub eax,0x8248		;Subtract offset from instruction pointer val to get new text base addr
 16 |   	sub edx,0x8048000	;Compare to start (exclusive) and set edx to an offset in the mapping
 17 |   	jl outside		;Out of bounds (too small)
 18 |   	cmp edx,0x220		;Compare to end (inclusive) (note we are now comparing to the size)
 19 |   	jge outside		;Out of bounds (too big)
 20 |   	mov edx,[mapping+edx*4]	;Retrieve mapping entry (can't do this directly in generated func)
 21 |   	cmp edx, 0xffffffff	;Compare to invalid entry
 22 |   	je failure		;It was an invalid entry
 23 |   	add eax,edx		;Add the offset of the destination to the new text section base addr
 24 |   	pop edx
 25 |   	ret
 26 |     outside:			;If the address is out of the mapping bounds, return original address
 27 |   	add edx,0x8048000	;Undo subtraction of base, giving us the originally requested address
 28 |   	mov eax,edx		;Place the original request back in eax
 29 |   	pop edx
 30 |   	jmp global_lookup	;Check if global lookup can find this
 31 |     failure:
 32 |   	hlt
 33 |     '''
 34 |     lookup_template = '''
 35 |   	push ebx
 36 |   	mov ebx,eax
 37 |   	call get_eip
 38 |     get_eip:
 39 |   	pop eax
 40 |   	sub eax,%s
 41 |     	%s
 42 |   	jb outside
 43 |   	cmp ebx,%s
 44 |   	jae outside
 45 |   	mov ebx,[eax+ebx*4+%s]
 46 |   	cmp ebx, 0xffffffff
 47 |   	je failure
 48 |   	add eax,ebx
 49 |   	pop ebx
 50 |   	ret
 51 |     outside:
 52 |   	%s
 53 |   	mov eax,ebx
 54 |   	pop ebx
 55 |   	mov DWORD PTR [esp-32],%s
 56 |     	jmp [esp-32]
 57 |     failure:
 58 |   	hlt
 59 |     '''
 60 |     exec_code = '''
 61 |     	sub ebx,%s
 62 |     '''
 63 |     exec_restore = '''
 64 |     	add ebx,%s
 65 |     '''
 66 |     exec_only_lookup = '''
 67 |     lookup:
 68 |   	push ebx
 69 |   	mov ebx,eax
 70 |   	call get_eip
 71 |     get_eip:
 72 |   	pop eax
 73 |   	sub eax,%s
 74 |   	sub ebx,%s
 75 |   	jb outside
 76 |   	cmp ebx,%s
 77 |   	jae outside
 78 |   	mov ebx,[eax+ebx*4+%s]
 79 |   	add eax,ebx
 80 |   	pop ebx
 81 |   	ret
 82 |   
 83 |     outside:
 84 |   	add ebx,%s
 85 |   	mov eax,[esp+8]
 86 |   	call lookup
 87 |   	mov [esp+8],eax
 88 |   	mov eax,ebx
 89 |   	pop ebx
 90 |   	ret
 91 |     '''
 92 |     #For an .so, it can be loaded at an arbitrary address, so we cannot depend on
 93 |     #the base address being in a fixed location.  Therefore, we instead compute 
 94 |     #the old text section's start address by using the new text section's offset
 95 |     #from it.  The new text section's offset equals the lookup address and is
 96 |     #stored in eax.  I use lea instead of add because it doesn't affect the flags,
 97 |     #which are used to determine if ebx is outside the range.
 98 |     so_code = '''
 99 |     	sub eax,%s
100 |     	sub ebx,eax
101 |     	lea eax,[eax+%s]
102 |     '''
103 |     so_restore = '''
104 |     	sub eax,%s
105 |     	add ebx,eax
106 |     	add eax,%s
107 |     '''
108 |     #retrieve eip 8 bytes after start of lookup function
109 |     if self.context.write_so:
110 |       return _asm(lookup_template%(lookup_off+8,so_code%(self.context.newbase,self.context.newbase),size,mapping_off,so_restore%(self.context.newbase,self.context.newbase),self.context.global_lookup))
111 |     elif self.context.exec_only:
112 |       return _asm( exec_only_lookup%(lookup_off+8,base,size,mapping_off,base) )
113 |     else:
114 |       return _asm(lookup_template%(lookup_off+8,exec_code%base,size,mapping_off,exec_restore%base,self.context.global_lookup))
115 | 
116 |   def get_secondary_lookup_code(self,base,size,sec_lookup_off,mapping_off):
117 |     '''This secondary lookup is only used when rewriting only the main executable.  It is a second, simpler
118 |        lookup function that is used by ret instructions and does NOT rewrite a return address on the stack
119 |        when the destination is outside the mapping.  It instead simply returns the original address and that's
120 |        it.  The only reason I'm doing this by way of a secondary lookup is this should be faster than a
121 |        a parameter passed at runtime, so I need to statically have an offset to jump to in the case of returns.
122 |        This is a cleaner way to do it than split the original lookup to have two entry points.'''
123 |     secondary_lookup = '''
124 |     lookup:
125 |   	push ebx
126 |   	mov ebx,eax
127 |   	call get_eip
128 |     get_eip:
129 |   	pop eax
130 |   	sub eax,%s
131 |   	sub ebx,%s
132 |   	jb outside
133 |   	cmp ebx,%s
134 |   	jae outside
135 |   	mov ebx,[eax+ebx*4+%s]
136 |   	add eax,ebx
137 |   	pop ebx
138 |   	ret
139 |   
140 |     outside:
141 |   	add ebx,%s
142 |   	mov eax,ebx
143 |   	pop ebx
144 |   	ret
145 |     '''
146 |     return _asm( secondary_lookup%(sec_lookup_off+8,base,size,mapping_off,base) )
147 | 
148 |   def get_global_lookup_code(self):
149 |     global_lookup_template = '''
150 |     	cmp eax,[%s]
151 |     	jz sysinfo
152 |     glookup:
153 |     	cmp BYTE PTR [gs:%s],1
154 |     	jz failure
155 |     	mov BYTE PTR [gs:%s],1
156 |     	push eax
157 |     	shr eax,12
158 |     	shl eax,2
159 |     	mov eax,[%s+eax]
160 |     	mov DWORD PTR [esp-32],eax
161 |     	cmp eax, 0xffffffff
162 |     	jz abort
163 |     	test eax,eax
164 |     	jz loader
165 |     	pop eax
166 |           call [esp-36]
167 |     	mov BYTE PTR [gs:%s],0
168 |     	ret
169 |     loader:
170 |     	mov BYTE PTR [gs:%s],0
171 |     	pop eax
172 |     sysinfo:
173 |     	push eax
174 |     	mov eax,[esp+8]
175 |     	call glookup
176 |     	mov [esp+8],eax
177 |     	pop eax
178 |   	ret
179 |     failure:
180 |     	hlt
181 |     abort:
182 |     	hlt
183 |     	mov eax,1
184 |     	int 0x80
185 |     '''
186 |     return _asm(global_lookup_template%(self.context.global_sysinfo,self.context.global_flag,self.context.global_flag,self.context.global_sysinfo+4,self.context.global_flag,self.context.global_flag))
187 | 
188 |   def get_auxvec_code(self,entry):
189 |     #Example assembly for searching the auxiliary vector
190 |     '''
191 |   	mov [esp-4],esi		;I think there's no need to save these, but in case somehow the
192 |   	mov [esp-8],ecx		;linker leaves something of interest for _start, let's save them
193 |   	mov esi,[esp]		;Retrieve argc
194 |   	mov ecx,esp		;Retrieve address of argc
195 |   	lea ecx,[ecx+esi*4+4]	;Skip argv
196 |     loopenv:			;Iterate through each environment variable
197 |   	add ecx,4		;The first loop skips over the NULL after argv
198 |   	mov esi,[ecx]		;Retrieve environment variable
199 |   	test esi,esi		;Check whether it is NULL
200 |   	jnz loopenv		;If not, continue through environment vars
201 |   	add ecx,4		;Hop over 0 byte to first entry
202 |     loopaux:			;Iterate through auxiliary vector, looking for AT_SYSINFO (32)
203 |   	mov esi,[ecx]		;Retrieve the type field of this entry
204 |   	cmp esi,32		;Compare to 32, the entry we want
205 |   	jz foundsysinfo		;Found it
206 |   	test esi,esi		;Check whether we found the entry signifying the end of auxv
207 |   	jz restore		;Go to _start if we reach the end
208 |   	add ecx,8		;Each entry is 8 bytes; go to next
209 |   	jmp loopaux
210 |     foundsysinfo:
211 |   	mov esi,[ecx+4]		;Retrieve sysinfo address
212 |   	mov [sysinfo],esi	;Save address
213 |     restore:
214 |   	mov esi,[esp-4]
215 |   	mov ecx,[esp-8]
216 |     	push global_mapping	;Push address of global mapping for popgm
217 |     	call popgm
218 |     	add esp,4		;Pop address of global mapping
219 |   	jmp realstart
220 |     '''
221 |     auxvec_template = '''
222 |   	mov [esp-4],esi
223 |   	mov [esp-8],ecx
224 |   	mov esi,[esp]
225 |   	mov ecx,esp
226 |   	lea ecx,[ecx+esi*4+4]
227 |     loopenv:
228 |   	add ecx,4
229 |   	mov esi,[ecx]
230 |   	test esi,esi
231 |   	jnz loopenv
232 |   	add ecx,4
233 |     loopaux:
234 |   	mov esi,[ecx]
235 |   	cmp esi,32
236 |   	jz foundsysinfo
237 |   	test esi,esi
238 |   	jz restore
239 |   	add ecx,8
240 |   	jmp loopaux
241 |     foundsysinfo:
242 |   	mov esi,[ecx+4]
243 |   	mov [%s],esi
244 |     restore:
245 |   	mov esi,[esp-4]
246 |   	mov ecx,[esp-8]
247 |     	push %s
248 |     	call [esp]
249 |     	add esp,4
250 |     	mov DWORD PTR [esp-12], %s
251 |   	jmp [esp-12]
252 |     '''
253 |     return _asm(auxvec_template%(self.context.global_sysinfo,self.context.global_lookup+self.context.popgm_offset,self.context.newbase+entry))
254 | 
255 |   def get_popgm_code(self):
256 |     call_popgm = '''
257 |     pushad
258 |     push %s
259 |     call $+0xa
260 |     add esp,4
261 |     popad
262 |     ret
263 |     '''
264 |     popgmbytes = asm(call_popgm%(self.context.global_sysinfo+4))
265 |     with open('x86_%s' % self.context.popgm) as f:
266 |       popgmbytes+=f.read()
267 |     return popgmbytes
268 | 
269 |   def get_global_mapping_bytes(self):
270 |     globalbytes = self.get_global_lookup_code()
271 |     #globalbytes+='\0' #flag field
272 |     globalbytes += self.get_popgm_code()
273 |     globalbytes += '\0\0\0\0' #sysinfo field
274 |     #Global mapping (0x3ffff8 0xff bytes) ending at kernel addresses.  Note it is NOT ending
275 |     #at 0xc0000000 because this boundary is only true for 32-bit kernels.  For 64-bit kernels,
276 |     #the application is able to use most of the entire 4GB address space, and the kernel only
277 |     #holds onto a tiny 8KB at the top of the address space.
278 |     globalbytes += '\xff'*((0xffffe000>>12)<<2)
279 |     # Allocate extra space for any additional global variables that
280 |     # instrumentation code might require
281 |     if self.context.alloc_globals > 0:
282 |       globalbytes += '\x00'*self.context.alloc_globals
283 |     return globalbytes
284 | 


--------------------------------------------------------------------------------
/x86_translator.py:
--------------------------------------------------------------------------------
  1 | from x86_assembler import asm
  2 | from capstone.x86 import X86_OP_REG,X86_OP_MEM,X86_OP_IMM
  3 | import struct
  4 | import re
  5 | from translator import Translator
  6 | 
  7 | class X86Translator(Translator):
  8 | 
  9 |   def __init__(self,before_callback,context):
 10 |     self.before_inst_callback = before_callback
 11 |     self.context = context
 12 |     self.memory_ref_string = re.compile(u'^dword ptr \[(?P<address>0x[0-9a-z]+)\]$')
 13 |     #From Brian's Static_phase.py
 14 |     self.JCC = ['jo','jno','js','jns','je','jz','jne','jnz','jb','jnae',
 15 |       'jc','jnb','jae','jnc','jbe','jna','ja','jnbe','jl','jnge','jge',
 16 |       'jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jcxz','jecxz']
 17 | 
 18 |   def translate_one(self,ins,mapping):
 19 |     if ins.mnemonic in ['call','jmp']: #Unconditional jump
 20 |       return self.translate_uncond(ins,mapping)
 21 |     elif ins.mnemonic in self.JCC: #Conditional jump
 22 |       return self.translate_cond(ins,mapping)
 23 |     elif ins.mnemonic == 'ret':
 24 |       return self.translate_ret(ins,mapping)
 25 |     elif ins.mnemonic in ['retn','retf','repz']: #I think retn is not used in Capstone
 26 |       #print 'WARNING: unimplemented %s %s'%(ins.mnemonic,ins.op_str)
 27 |       return '\xf4\xf4\xf4\xf4' #Create obvious cluster of hlt instructions
 28 |     else: #Any other instruction
 29 |       inserted = self.before_inst_callback(ins)
 30 |       if inserted is not None:
 31 |         return inserted + str(ins.bytes)
 32 |       return None #No translation needs to be done
 33 | 
 34 |   def translate_ret(self,ins,mapping):
 35 |     '''
 36 |     mov [esp-28], eax	;save old eax value
 37 |     pop eax		;pop address from stack from which we will get destination
 38 |     call $+%s		;call lookup function
 39 |     mov [esp-4], eax	;save new eax value (destination mapping)
 40 |     mov eax, [esp-32]	;restore old eax value (the pop has shifted our stack so we must look at 28+4=32)
 41 |     jmp [esp-4]		;jmp/call to new address
 42 |     '''
 43 |     template_before = '''
 44 |     mov [esp-28], eax
 45 |     pop eax
 46 |     '''
 47 |     template_after = '''
 48 |     call $+%s
 49 |     %s
 50 |     mov [esp-4], eax
 51 |     mov eax, [esp-%d]
 52 |     jmp [esp-4]
 53 |     '''
 54 |     self.context.stat['ret']+=1
 55 |     code = b''
 56 |     inserted = self.before_inst_callback(ins)
 57 |     if inserted is not None:
 58 |       code += inserted
 59 |     if self.context.no_pic and ins.address != self.context.get_pc_thunk + 3:
 60 |       #Perform a normal return UNLESS this is the ret for the thunk.
 61 |       #Currently its position is hardcoded as three bytes after the thunk entry.
 62 |       code = asm( 'ret %s'%ins.op_str )
 63 |     else:
 64 |       code = asm(template_before)
 65 |       size = len(code)
 66 |       lookup_target = b''
 67 |       if self.context.exec_only:
 68 |         #Special lookup for not rewriting arguments when going outside new main text address space
 69 |         lookup_target = self.remap_target(ins.address,mapping,self.context.secondary_lookup_function_offset,size)
 70 |       else:
 71 |         lookup_target = self.remap_target(ins.address,mapping,self.context.lookup_function_offset,size)
 72 |       if ins.op_str == '':
 73 |         code+=asm(template_after%(lookup_target,'',32)) #32 because of the value we popped
 74 |       else: #For ret instructions that pop imm16 bytes from the stack, add that many bytes to esp
 75 |         pop_amt = int(ins.op_str,16) #We need to retrieve the right eax value from where we saved it
 76 |         code+=asm(template_after%(lookup_target,'add esp,%d'%pop_amt,32+pop_amt))
 77 |     return code
 78 | 
 79 |   def translate_cond(self,ins,mapping):
 80 |     self.context.stat['jcc']+=1
 81 |     patched = b''
 82 |     inserted = self.before_inst_callback(ins)
 83 |     if inserted is not None:
 84 |       patched += inserted
 85 |     if ins.mnemonic in ['jcxz','jecxz']: #These instructions have no long encoding
 86 |       jcxz_template = '''
 87 |       test cx,cx
 88 |       '''
 89 |       jecxz_template = '''
 90 |       test ecx,ecx
 91 |       '''
 92 |       target = ins.operands[0].imm # int(ins.op_str,16) The destination of this instruction
 93 |       #newtarget = remap_target(ins.address,mapping,target,0)
 94 |       if ins.mnemonic == 'jcxz':
 95 |         patched+=asm(jcxz_template)
 96 |       else:
 97 |         patched+=asm(jecxz_template)
 98 |       newtarget = self.remap_target(ins.address,mapping,target,len(patched))
 99 |       #print 'want %s, but have %s instead'%(remap_target(ins.address,mapping,target,len(patched)), newtarget)
100 |       #Apparently the offset for jcxz and jecxz instructions may have been wrong?  How did it work before?
101 |       patched += asm('jz $+%s'%newtarget)
102 |       #print 'code length: %d'%len(patched)
103 |       
104 |       #TODO: some instructions encode to 6 bytes, some to 5, some to 2.  How do we know which?
105 |       #For example, for CALL, it seems to only be 5 or 2 depending on offset.
106 |       #But for jg, it can be 2 or 6 depending on offset, I think because it has a 2-byte opcode.
107 |       #while len(patched) < 6: #Short encoding, which we do not want
108 |       #  patched+='\x90' #Add padding of NOPs
109 |       #The previous commented out code wouldn't even WORK now, since we insert another instruction
110 |       #at the MINIMUM.  I'm amazed the jcxz/jecxz code even worked at all before
111 |     else:
112 |       target = ins.operands[0].imm # int(ins.op_str,16) The destination of this instruction
113 |       newtarget = self.remap_target(ins.address,mapping,target,len(patched))
114 |       patched+=asm(ins.mnemonic + ' $+' + newtarget)
115 |       #TODO: some instructions encode to 6 bytes, some to 5, some to 2.  How do we know which?
116 |       #For example, for CALL, it seems to only be 5 or 2 depending on offset.
117 |       #But for jg, it can be 2 or 6 depending on offset, I think because it has a 2-byte opcode.
118 |       #while len(patched) < 6: #Short encoding, which we do not want
119 |       #  patched+='\x90' #Add padding of NOPs
120 |     return patched
121 |   
122 |   def translate_uncond(self,ins,mapping):
123 |     op = ins.operands[0] #Get operand
124 |     if op.type == X86_OP_REG: # e.g. call eax or jmp ebx
125 |       target = ins.reg_name(op.reg)
126 |       return self.get_indirect_uncond_code(ins,mapping,target)
127 |     elif op.type == X86_OP_MEM: # e.g. call [eax + ecx*4 + 0xcafebabe] or jmp [ebx+ecx]
128 |       target = ins.op_str
129 |       return self.get_indirect_uncond_code(ins,mapping,target)
130 |     elif op.type == X86_OP_IMM: # e.g. call 0xdeadbeef or jmp 0xcafebada
131 |       target = op.imm
132 |       code = b''
133 |       inserted = self.before_inst_callback(ins)
134 |       if inserted is not None:
135 |         code += inserted
136 |       if self.context.no_pic and target != self.context.get_pc_thunk:
137 |         #push nothing if no_pic UNLESS it's the thunk
138 |         #We only support DIRECT calls to the thunk
139 |         if ins.mnemonic == 'call':
140 |           self.context.stat['dircall']+=1
141 |         else:
142 |           self.context.stat['dirjmp']+=1
143 |       elif ins.mnemonic == 'call': #If it's a call, push the original address of the next instruction
144 |         self.context.stat['dircall']+=1
145 |         exec_call = '''
146 |         push %s
147 |         '''
148 |         so_call_before = '''
149 |         push ebx
150 |         call $+5
151 |         '''
152 |         so_call_after = '''
153 |         pop ebx
154 |         sub ebx,%s
155 |         xchg ebx,[esp]
156 |         '''
157 |         if self.context.write_so:
158 |           code += asm(so_call_before)
159 |           if mapping is not None:
160 |             # Note that if somehow newbase is a very small value we could have problems with the small
161 |             # encoding of sub.  This could result in different lengths between the mapping and code gen phases
162 |             code += asm(so_call_after%( (self.context.newbase+(mapping[ins.address]+len(code))) - (ins.address+len(ins.bytes)) ) )
163 |           else:
164 |             code += asm(so_call_after%( (self.context.newbase) - (ins.address+len(ins.bytes)) ) )
165 |         else:
166 |           code += asm(exec_call%(ins.address+len(ins.bytes)))
167 |       else:
168 |         self.context.stat['dirjmp']+=1
169 |       newtarget = self.remap_target(ins.address,mapping,target,len(code))
170 |       #print "(pre)new length: %s"%len(callback_code)
171 |       #print "target: %s"%hex(target)
172 |       #print "newtarget: %s"%newtarget
173 |       if self.context.no_pic and target != self.context.get_pc_thunk:
174 |         code += asm( '%s $+%s'%(ins.mnemonic,newtarget) )
175 |       else:
176 |         patched = asm('jmp $+%s'%newtarget)
177 |         if len(patched) == 2: #Short encoding, which we do not want
178 |           patched+='\x90\x90\x90' #Add padding of 3 NOPs
179 |         code += patched
180 |       #print "new length: %s"%len(callback_code+patched)
181 |       return code
182 |     return None
183 |   
184 |   def get_indirect_uncond_code(self,ins,mapping,target):
185 |     #Commented assembly
186 |     '''
187 |     mov [esp-28], eax	;save old eax value (very far above the stack because of future push/call)
188 |     mov eax, %s		;read location in memory from which we will get destination
189 |     %s			;if a call, we push return address here
190 |     call $+%s		;call lookup function
191 |     mov [esp-4], eax	;save new eax value (destination mapping)
192 |     mov eax, [esp-%s]	;restore old eax value (offset depends on whether return address pushed)
193 |     jmp [esp-4]		;jmp to new address
194 |     '''
195 |     template_before = '''
196 |     mov [esp-32], eax
197 |     mov eax, %s
198 |     %s
199 |     '''
200 |     exec_call = '''
201 |     push %s
202 |     '''
203 |     so_call_before = '''
204 |     push ebx
205 |     call $+5
206 |     '''
207 |     so_call_after = '''
208 |     pop ebx
209 |     sub ebx,%s
210 |     xchg ebx,[esp]
211 |     '''
212 |     template_after = '''
213 |     call $+%s
214 |     mov [esp-4], eax
215 |     mov eax, [esp-%s]
216 |     jmp [esp-4]
217 |     '''
218 |     template_nopic = '''
219 |     call $+%s
220 |     mov [esp-4], eax
221 |     mov eax, [esp-%s]
222 |     %s [esp-4]
223 |     '''
224 |     #TODO: This is somehow still the bottleneck, so this needs to be optimized
225 |     code = b''
226 |     if self.context.exec_only:
227 |       code += self.get_remap_callbacks_code(ins.address,mapping,target)
228 |     #NOTE: user instrumentation code comes after callbacks code.  No particular reason to put it either way,
229 |     #other than perhaps consistency, but for now this is easier.
230 |     inserted = self.before_inst_callback(ins)
231 |     if inserted is not None:
232 |       code += inserted
233 |     if self.context.no_pic:
234 |       if ins.mnemonic == 'call':
235 |         self.context.stat['indcall']+=1
236 |       else:
237 |         self.context.stat['indjmp']+=1
238 |       code += asm( template_before%(target,'') )
239 |     elif ins.mnemonic == 'call':
240 |       self.context.stat['indcall']+=1
241 |       if self.context.write_so:
242 |         code += asm( template_before%(target,so_call_before) )
243 |         if mapping is not None:
244 |           code += asm(so_call_after%( (mapping[ins.address]+len(code)+self.context.newbase) - (ins.address+len(ins.bytes)) ) )
245 |           #print 'CODE LEN/1: %d\n%s'%(len(code),code.encode('hex'))
246 |         else:
247 |           code += asm(so_call_after%( (0x8f+self.context.newbase) - (ins.address+len(ins.bytes)) ) )
248 |           #print 'CODE LEN/0: %d\n%s'%(len(code),code.encode('hex'))
249 |       else:
250 |         code += asm(template_before%(target,exec_call%(ins.address+len(ins.bytes)) ))
251 |     else:
252 |       self.context.stat['indjmp']+=1
253 |       code += asm(template_before%(target,''))
254 |     size = len(code)
255 |     lookup_target = self.remap_target(ins.address,mapping,self.context.lookup_function_offset,size)
256 |     #Always transform an unconditional control transfer to a jmp, but
257 |     #for a call, insert a push instruction to push the original return address on the stack.
258 |     #At runtime, our rewritten ret will look up the right address to return to and jmp there.
259 |     #If we push a value on the stack, we have to store even FURTHER away from the stack.
260 |     #Note that calling the lookup function can move the stack pointer temporarily up to
261 |     #20 bytes, which will obliterate anything stored too close to the stack pointer.  That, plus
262 |     #the return value we push on the stack, means we need to put it at least 28 bytes away.
263 |     if self.context.no_pic:
264 |       #Change target to secondary lookup function instead
265 |       lookup_target = self.remap_target(ins.address,mapping,self.context.secondary_lookup_function_offset,size)
266 |       code += asm( template_nopic%(lookup_target,32,ins.mnemonic) )
267 |     elif ins.mnemonic == 'call':
268 |       code += asm(template_after%(lookup_target,28))
269 |     else:  
270 |       code += asm(template_after%(lookup_target,32))
271 |     return code
272 |   
273 |   def get_remap_callbacks_code(self,insaddr,mapping,target):
274 |     '''Checks whether the target destination (expressed as the opcode string from a jmp/call instruction)
275 |        is in the got, then checks if it matches a function with callbacks.  It then rewrites the
276 |        addresses if necessary.  This will *probably* always be from jmp instructions in the PLT.
277 |        NOTE: This assumes it does not have any code inserted before it, and that it comprises the first
278 |        special instructions inserted for an instruction.'''
279 |     if self.memory_ref_string.match(target):
280 |       address = int(self.memory_ref_string.match(target).group('address'), 16)
281 |       if address in self.context.plt['entries']:
282 |         if self.context.plt['entries'][address] in self.context.callbacks:
283 |           print 'Found library call with callbacks: %s'%self.context.plt['entries'][address]
284 |           return self.get_callback_code( insaddr, mapping, self.context.callbacks[self.context.plt['entries'][address]] )
285 |     return b''
286 |   
287 |   def get_callback_code(self,address,mapping,cbargs):
288 |     '''Remaps each callback argument on the stack based on index.  cbargs is an array of argument indices
289 |        that let us know where on the stack we must rewrite.  We insert code for each we must rewrite.'''
290 |     callback_template_before = '''
291 |     mov eax, [esp+(%s*4)]
292 |     '''
293 |     callback_template_after = '''
294 |     call $+%s
295 |     mov [esp+(%s*4)], eax
296 |     '''
297 |     code = asm('push eax') #Save eax, use to hold callback address
298 |     for ind in cbargs:
299 |       #Add 2 because we must skip over the saved value of eax and the return value already pushed
300 |       #ASSUMPTION: before this instruction OR this instruction if it IS a call, a return address was
301 |       #pushed.  Since this *probably* is taking place inside the PLT, in all probability this is a
302 |       #jmp instruction, and the call that got us *into* the PLT pushed a return address, so we can't rely
303 |       #on the current instruction to tell us this either way.  Therefore, we are *assuming* that the PLT
304 |       #is always entered via a call instruction, or that somebody is calling an address in the GOT directly.
305 |       #If code ever jmps based on an address in the got, we will probably corrupt the stack.
306 |       cb_before = callback_template_before%( ind + 2 )
307 |       code += asm(cb_before) #Assemble this part first so we will know the offset to the lookup function
308 |       size = len(code)
309 |       lookup_target = self.remap_target( address, mapping, self.context.lookup_function_offset, size )
310 |       cb_after = callback_template_after%( lookup_target, ind + 2 )
311 |       code += asm(cb_after) #Save the new address over the original
312 |     code += asm('pop eax') #Restore eax
313 |     return code
314 |   
315 |   def in_plt(self,target):
316 |     return target in range(self.context.plt['addr'],self.context.plt['addr']+self.context.plt['size'])
317 |   
318 |   def get_plt_entry(self,target):
319 |     #It seems that an elf does not directly give a mapping from each entry in the plt.
320 |     #Instead, it maps from the got entries instead, making it unclear exactly where objdump
321 |     #gets the information.  For our purposes, since all the entries in the plt jump to the got
322 |     #entry, we can read the destination address from the jmp instruction.
323 |     #TODO: ensure works for x64
324 |     offset = target - self.context.plt['addr'] #Get the offset into the plt
325 |     dest = self.context.plt['data'][offset+2:offset+2+4] #Get the four bytes of the GOT address
326 |     dest = struct.unpack('<I',dest)[0] #Convert to integer, respecting byte endianness
327 |     if dest in self.context.plt['entries']:
328 |       return self.context.plt['entries'][dest] #If there is an entry, return that; the name of the function
329 |     return None #Some entries may be a jump to the start of the plt (no entry)
330 |   
331 |   def remap_target(self,addr,mapping,target,offs): #Only works for statically identifiable targets
332 |     newtarget = '0x8f'
333 |     if mapping is not None and target in mapping:#Second pass, known mapping
334 |       newtarget = mapping[target]-(mapping[addr]+offs) #Offset from curr location in mapping
335 |       newtarget = hex(newtarget)
336 |       #print "original target: %s"%hex(target)
337 |       #print "%s-(%s+%s) = %s"%(hex(mapping[target]),hex(mapping[addr]),hex(offs),newtarget)
338 |     return newtarget
339 | 


--------------------------------------------------------------------------------