├── CONTRIBUTING.txt
├── LICENSE.txt
├── README.md
├── debug.py
├── enjarify.bat
├── enjarify.sh
└── enjarify
    ├── __init__.py
    ├── byteio.py
    ├── dalvik.py
    ├── dalvikformats.py
    ├── flags.py
    ├── jvm
        ├── __init__.py
        ├── arraytypes.py
        ├── constantpool.py
        ├── constants
        │   ├── __init__.py
        │   ├── calc.py
        │   ├── genlookup.py
        │   └── lookup.py
        ├── error.py
        ├── genmathops.py
        ├── ir.py
        ├── jvmops.py
        ├── mathops.py
        ├── optimization
        │   ├── __init__.py
        │   ├── consts.py
        │   ├── jumps.py
        │   ├── options.py
        │   ├── registers.py
        │   └── stack.py
        ├── scalartypes.py
        ├── writebytecode.py
        ├── writeclass.py
        └── writeir.py
    ├── main.py
    ├── mutf8.py
    ├── parsedex.py
    ├── treelist.py
    ├── typeinference
        ├── __init__.py
        └── typeinference.py
    └── util.py


/CONTRIBUTING.txt:
--------------------------------------------------------------------------------
 1 | Want to contribute? Great! First, read this page (including the small print at the end).
 2 | 
 3 | ### Before you contribute
 4 | Before we can use your code, you must sign the
 5 | [Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1)
 6 | (CLA), which you can do online. The CLA is necessary mainly because you own the
 7 | copyright to your changes, even after your contribution becomes part of our
 8 | codebase, so we need your permission to use and distribute your code. We also
 9 | need to be sure of various other things—for instance that you'll tell us if you
10 | know that your code infringes on other people's patents. You don't have to sign
11 | the CLA until after you've submitted your code for review and a member has
12 | approved it, but you must do it before we can put your code into our codebase.
13 | Before you start working on a larger contribution, you should get in touch with
14 | us first through the issue tracker with your idea so that we can help out and
15 | possibly guide you. Coordinating up front makes it much easier to avoid
16 | frustration later on.
17 | 
18 | ### Code reviews
19 | All submissions, including submissions by project members, require review. We
20 | use Github pull requests for this purpose.
21 | 
22 | ### The small print
23 | Contributions made by corporations are covered by a different agreement than
24 | the one above, the Software Grant and Corporate Contributor License Agreement.


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Introduction
 2 | 
 3 | Enjarify is a tool for translating Dalvik bytecode to equivalent Java bytecode. This allows Java analysis tools to analyze Android applications.
 4 | 
 5 | 
 6 | ### Usage and installation
 7 | 
 8 | Enjarify is a pure python 3 application, so you can just git clone and run it. To run it directly, assuming you are in the top directory of the repository, you can just do
 9 | 
10 |     python3 -O -m enjarify.main yourapp.apk
11 | 
12 | For normal use, you'll probably want to use the wrapper scripts and set it up on your path.
13 | 
14 | #### Linux
15 | 
16 | For convenience, a wrapper shell script is provided, enjarify.sh. This will try to use Pypy if available, since it is faster than CPython. If you want to be able to call Enjarify from anywhere, you can create a symlink from somewhere on your PATH, such as ~/bin. To do this, assuming you are inside the top level of the repository,
17 | 
18 |     ln -s "$PWD/enjarify.sh" ~/bin/enjarify
19 | 
20 | #### Windows
21 | 
22 | A wrapper batch script, enjarify.bat, is provided. To be able to call it from anywhere, just add the root directory of the repository to your PATH. The batch script will always invoke python3 as interpreter. If you want to use pypy, just edit the script.
23 | 
24 | #### Usage
25 | 
26 | Assuming you set up the script on your path correctly, you can call it from anywhere by just typing enjarify, e.g.
27 | 
28 |     enjarify yourapp.apk
29 | 
30 | The most basic form of usage is to just specify an apk file or dex file as input. If you specify a multidex apk, Enjarify will automatically translate all of the dex files and output the results in a single combined jar. If you specify a dex file, only that dex file will be translated. E.g. assuming you manually extracted the dex files you could do
31 | 
32 |     enjarify classes2.dex
33 | 
34 | The default output file is [inputname]-enjarify.jar in the current directory. To specify the filename for the output explicitly, pass the -o or --output option.
35 | 
36 |     enjarify yourapp.apk -o yourapp.jar
37 | 
38 | By default, Enjarify will refuse to overwrite the output file if it already exists. To overwrite the output, pass the -f or --force option.
39 | 
40 | 
41 | ### Why not dex2jar?
42 | 
43 | Dex2jar is an older tool that also tries to translate Dalvik to Java bytecode. It works reasonably well most of the time, but a lot of obscure features or edge cases will cause it to fail or even silently produce incorrect results. By contrast, Enjarify is designed to work in as many cases as possible, even for code where Dex2jar would fail. Among other things, Enjarify correctly handles unicode class names, constants used as multiple types, implicit casts, exception handlers jumping into normal control flow, classes that reference too many constants, very long methods, exception handlers after a catchall handler, and static initial values of the wrong type.
44 | 
45 | 
46 | ### Limitations
47 | 
48 | Currently, only version 35 dex files are supported. This means that the Java 8 related bytecode features introduced in Android N, O, and P are not supported.
49 | 
50 | Enjarify does not currently translate optional metadata such as sourcefile attributes, line numbers, and annotations.
51 | 
52 | Enjarify tries hard to successfully translate as many classes as possible, but there are some potential cases where it is simply not possible due to limitations in Android, Java, or both. Luckily, this only happens in contrived circumstances, so it shouldn't be a problem in practice.
53 | 
54 | 
55 | ### Performance tips
56 | 
57 | PyPy is much faster than CPython. To install PyPy, see http://pypy.org/. Make sure you get PyPy3 rather than regular PyPy. The Linux wrapper script will automatically use the command pypy3 if available. On Windows, you'll need to edit the wrapper script yourself.
58 | 
59 | By default, Enjarify runs optimizations on the bytecode which make it more readable for humans (copy propagation, unused value removal, etc.). If you don't need this, you can speed things up by disabling the optimizations with the --fast option. Note that in the very rare case where a class is too big to fit in a classfile without optimization, Enjarify will automatically retry it with all optimizations enabled, so this option does not affect the number of classes that are successfully translated.
60 | 
61 | 
62 | ### Disclaimer
63 | 
64 | This is not an official Google product (experimental or otherwise), it is just code that happens to be owned by Google.
65 | 


--------------------------------------------------------------------------------
/debug.py:
--------------------------------------------------------------------------------
1 | import enjarify.main
2 | 
3 | enjarify.main.main()
4 | 


--------------------------------------------------------------------------------
/enjarify.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | REM  Copyright 2015 Google Inc. All Rights Reserved.
 4 | REM
 5 | REM  Licensed under the Apache License, Version 2.0 (the "License");
 6 | REM  you may not use this file except in compliance with the License.
 7 | REM  You may obtain a copy of the License at
 8 | REM
 9 | REM      http://www.apache.org/licenses/LICENSE-2.0
10 | REM
11 | REM  Unless required by applicable law or agreed to in writing, software
12 | REM  distributed under the License is distributed on an "AS IS" BASIS,
13 | REM  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | REM  See the License for the specific language governing permissions and
15 | REM  limitations under the License.
16 | 
17 | set PYTHONPATH=%~dp0
18 | py -3 -O -m enjarify.main %*
19 | 


--------------------------------------------------------------------------------
/enjarify.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2015 Google Inc. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # Try to find a valid python3 command, preferring pypy if available
19 | function guess {
20 | 	if [ -z "$PYTHON" ]; then
21 | 		result=$($1 -c "print(range)" 2>/dev/null)
22 | 		if [ "$result" = "<class 'range'>" ]; then
23 | 			PYTHON=$1
24 | 		fi
25 | 	fi
26 | }
27 | 
28 | guess "pypy3"
29 | guess "python3"
30 | guess "pypy"
31 | guess "python"
32 | 
33 | if [ -z "$PYTHON" ]; then
34 | 	echo "Unable to find python3 on path"
35 | else
36 | 	echo "Using $PYTHON as Python interpreter"
37 | 
38 | 	# Find location of this bash script, and set its directory as the PYTHONPATH
39 | 	if [[ "$OSTYPE" == "darwin"* ]]; then
40 | 		READLINK="readlink"
41 | 	else
42 | 		READLINK="readlink -f"
43 | 	fi
44 | 
45 | 	export PYTHONPATH=$(dirname "$($READLINK "${BASH_SOURCE[0]}")")
46 | 
47 | 	# Now execute the actual program
48 | 	exec $PYTHON -O -m enjarify.main "$@"
49 | fi
50 | 


--------------------------------------------------------------------------------
/enjarify/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/enjarify/byteio.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import struct
16 | 
17 | from .util import signExtend
18 | 
19 | class Reader:
20 |     def __init__(self, data, pos=0):
21 |         self.data = data
22 |         self.pos = pos
23 | 
24 |     def read(self, size):
25 |         if not 0 <= size <= len(self.data) - self.pos:
26 |             raise IndexError
27 |         result = self.data[self.pos: self.pos+size]
28 |         self.pos += size
29 |         return result
30 | 
31 |     def _unpack(self, fmt):
32 |         fmt = struct.Struct(fmt)
33 |         return fmt.unpack_from(self.read(fmt.size))[0]
34 | 
35 |     def u8(self): return self.read(1)[0]
36 |     def u16(self): return self._unpack('<H')
37 |     def u32(self): return self._unpack('<I')
38 |     def u64(self): return self._unpack('<Q')
39 | 
40 |     def _leb128(self, signed=False):
41 |         result = 0
42 |         size = 0
43 |         while self.data[self.pos] >> 7:
44 |             result ^= (self.data[self.pos] & 0x7f) << size
45 |             size += 7
46 |             self.pos += 1
47 |         result ^= (self.data[self.pos] & 0x7f) << size
48 |         size += 7
49 |         self.pos += 1
50 | 
51 |         if signed:
52 |             result = signExtend(result, size)
53 |         return result
54 | 
55 |     def uleb128(self): return self._leb128()
56 |     def sleb128(self): return self._leb128(signed=True)
57 | 
58 |     # Maintain strings in binary encoding instead of attempting to decode them
59 |     # since the output will be using the same encoding anyway
60 |     def readCStr(self):
61 |         oldpos, self.pos = self.pos, self.data.find(b'\0', self.pos)
62 |         return self.data[oldpos:self.pos]
63 | 
64 | class Writer:
65 |     def __init__(self):
66 |         self.buf = bytearray()
67 | 
68 |     def write(self, s):
69 |         self.buf += s
70 | 
71 |     def _pack(self, fmt, arg):
72 |         return self.write(struct.pack(fmt, arg))
73 | 
74 |     def u8(self, x): return self.write(bytes([x]))
75 |     def u16(self, x): return self._pack('>H', x)
76 |     def u32(self, x): return self._pack('>I', x)
77 |     def u64(self, x): return self._pack('>Q', x)
78 | 
79 |     def toBytes(self):
80 |         return bytes(self.buf)
81 | 


--------------------------------------------------------------------------------
/enjarify/dalvik.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from . import dalvikformats
 16 | from . import util
 17 | 
 18 | class DalvikInstruction:
 19 |     def __init__(self, type_, pos, newpos, opcode, args):
 20 |         self.type = type_
 21 |         self.pos = pos
 22 |         self.pos2 = newpos
 23 |         self.opcode = opcode
 24 |         self.args = args
 25 | 
 26 |         self.implicit_casts = None
 27 |         self.prev_result = None # for move-result/exception
 28 |         self.fillarrdata = None
 29 |         self.switchdata = None
 30 | 
 31 | _it = iter(range(999))
 32 | Nop = next(_it)
 33 | Move = next(_it)
 34 | MoveWide = next(_it)
 35 | MoveResult = next(_it)
 36 | Return = next(_it)
 37 | Const32 = next(_it)
 38 | Const64 = next(_it)
 39 | ConstString = next(_it)
 40 | ConstClass = next(_it)
 41 | MonitorEnter = next(_it)
 42 | MonitorExit = next(_it)
 43 | CheckCast = next(_it)
 44 | InstanceOf = next(_it)
 45 | ArrayLen = next(_it)
 46 | NewInstance = next(_it)
 47 | NewArray = next(_it)
 48 | FilledNewArray = next(_it)
 49 | FillArrayData = next(_it)
 50 | Throw = next(_it)
 51 | Goto = next(_it)
 52 | Switch = next(_it)
 53 | Cmp = next(_it)
 54 | If = next(_it)
 55 | IfZ = next(_it)
 56 | 
 57 | ArrayGet = next(_it)
 58 | ArrayPut = next(_it)
 59 | InstanceGet = next(_it)
 60 | InstancePut = next(_it)
 61 | StaticGet = next(_it)
 62 | StaticPut = next(_it)
 63 | 
 64 | # Invoke = next(_it)
 65 | InvokeVirtual = next(_it)
 66 | InvokeSuper = next(_it)
 67 | InvokeDirect = next(_it)
 68 | InvokeStatic = next(_it)
 69 | InvokeInterface = next(_it)
 70 | 
 71 | # actual ops for these are defined in jvm/mathops.py
 72 | UnaryOp = next(_it)
 73 | BinaryOp = next(_it)
 74 | BinaryOpConst = next(_it)
 75 | 
 76 | INVOKE_TYPES = InvokeVirtual, InvokeSuper, InvokeDirect, InvokeStatic, InvokeInterface
 77 | 
 78 | # instructions which Dalvik considers to throw
 79 | THROW_TYPES = INVOKE_TYPES + (ConstString, ConstClass, MonitorEnter, MonitorExit, CheckCast, InstanceOf, ArrayLen, NewArray, NewInstance, FilledNewArray, FillArrayData, Throw, ArrayGet, ArrayPut, InstanceGet, InstancePut, StaticGet, StaticPut, BinaryOp, BinaryOpConst)
 80 | # last two only if it is int/long div or rem
 81 | 
 82 | # ignore the possiblity of linkage errors (i.e. constants and instanceof can't throw)
 83 | # in theory MonitorExit can't throw either due to the structured locking checks, but these are broken and work inconsistently
 84 | PRUNED_THROW_TYPES = INVOKE_TYPES + (MonitorEnter, MonitorExit, CheckCast, ArrayLen, NewArray, NewInstance, FilledNewArray, FillArrayData, Throw, ArrayGet, ArrayPut, InstanceGet, InstancePut, StaticGet, StaticPut, BinaryOp, BinaryOpConst)
 85 | 
 86 | OPCODES = util.keysToRanges({
 87 |     0x00: Nop,
 88 |     0x01: Move,
 89 |     0x04: MoveWide,
 90 |     0x07: Move,
 91 |     0x0a: MoveResult,
 92 |     0x0e: Return,
 93 |     0x12: Const32,
 94 |     0x16: Const64,
 95 |     0x1a: ConstString,
 96 |     0x1c: ConstClass,
 97 |     0x1d: MonitorEnter,
 98 |     0x1e: MonitorExit,
 99 |     0x1f: CheckCast,
100 |     0x20: InstanceOf,
101 |     0x21: ArrayLen,
102 |     0x22: NewInstance,
103 |     0x23: NewArray,
104 |     0x24: FilledNewArray,
105 |     0x26: FillArrayData,
106 |     0x27: Throw,
107 |     0x28: Goto,
108 |     0x2b: Switch,
109 |     0x2d: Cmp,
110 |     0x32: If,
111 |     0x38: IfZ,
112 |     0x3e: Nop, # unused
113 |     0x44: ArrayGet,
114 |     0x4b: ArrayPut,
115 |     0x52: InstanceGet,
116 |     0x59: InstancePut,
117 |     0x60: StaticGet,
118 |     0x67: StaticPut,
119 |     0x6e: InvokeVirtual,
120 |     0x6f: InvokeSuper,
121 |     0x70: InvokeDirect,
122 |     0x71: InvokeStatic,
123 |     0x72: InvokeInterface,
124 |     0x73: Nop, # unused
125 |     0x74: InvokeVirtual,
126 |     0x75: InvokeSuper,
127 |     0x76: InvokeDirect,
128 |     0x77: InvokeStatic,
129 |     0x78: InvokeInterface,
130 |     0x79: Nop, # unused
131 |     0x7b: UnaryOp,
132 |     0x90: BinaryOp,
133 |     0xd0: BinaryOpConst,
134 |     0xe3: Nop, # unused
135 | }, 256)
136 | 
137 | 
138 | def parseInstruction(dex, insns_start_pos, shorts, pos):
139 |     word = shorts[pos]
140 |     opcode = word & 0xFF
141 |     newpos, args = dalvikformats.decode(shorts, pos, opcode)
142 | 
143 |     # parse special data instructions
144 |     switchdata = None
145 |     fillarrdata = None
146 |     if word == 0x100 or word == 0x200: #switch
147 |         size = shorts[pos+1]
148 |         st = dex.stream(insns_start_pos + pos*2 + 4)
149 | 
150 |         if word == 0x100: #packed
151 |             first_key = st.u32()
152 |             targets = [st.u32() for _ in range(size)]
153 |             newpos = pos + 2 + (1 + size)*2
154 |             switchdata = {(i+first_key):x for i,x in enumerate(targets)}
155 |         else: #sparse
156 |             keys = [st.u32() for _ in range(size)]
157 |             targets = [st.u32() for _ in range(size)]
158 |             newpos = pos + 2 + (size + size)*2
159 |             switchdata = dict(zip(keys, targets))
160 | 
161 |     if word == 0x300:
162 |         width = shorts[pos+1] % 16
163 |         size = shorts[pos+2] ^ (shorts[pos+3] << 16)
164 |         newpos = pos + ((size * width + 1) // 2 + 4)
165 |         # get array data
166 |         stream = dex.stream(insns_start_pos + pos*2 + 8)
167 |         func = {
168 |             1: stream.u8,
169 |             2: stream.u16,
170 |             4: stream.u32,
171 |             8: stream.u64
172 |         }[width]
173 |         fillarrdata = width, [func() for _ in range(size)]
174 | 
175 |     # warning, this must go below the special data handling that calculates newpos
176 |     instruction = DalvikInstruction(OPCODES[opcode], pos, newpos, opcode, args)
177 |     instruction.fillarrdata = fillarrdata
178 |     instruction.switchdata = switchdata
179 | 
180 |     return newpos, instruction
181 | 
182 | def parseBytecode(dex, insns_start_pos, shorts, catch_addrs):
183 |     ops = []
184 |     pos = 0
185 |     while pos < len(shorts):
186 |         pos, op = parseInstruction(dex, insns_start_pos, shorts, pos)
187 |         ops.append(op)
188 | 
189 |     # Fill in data for move-result
190 |     for instr, instr2 in zip(ops, ops[1:]):
191 |         if not instr2.type == MoveResult:
192 |             continue
193 |         if instr.type in INVOKE_TYPES:
194 |             called_id = dex.method_id(instr.args[0])
195 |             if called_id.return_type != b'V':
196 |                 instr2.prev_result = called_id.return_type
197 |         elif instr.type == FilledNewArray:
198 |             instr2.prev_result = dex.type(instr.args[0])
199 |         elif instr2.pos in catch_addrs:
200 |             instr2.prev_result = b'Ljava/lang/Throwable;'
201 |     assert 0 not in catch_addrs
202 | 
203 |     # Fill in implicit cast data
204 |     for i, instr in enumerate(ops):
205 |         if instr.opcode in (0x38, 0x39): # if-eqz, if-nez
206 |             if i > 0 and ops[i-1].type == InstanceOf:
207 |                 prev = ops[i-1]
208 |                 desc_ind = prev.args[2]
209 |                 regs = {prev.args[1]}
210 | 
211 |                 if i > 1 and ops[i-2].type == Move:
212 |                     prev2 = ops[i-2]
213 |                     if prev2.args[0] == prev.args[1]:
214 |                         regs.add(prev2.args[1])
215 |                 # Don't cast result of instanceof if it overwrites the input
216 |                 regs.discard(prev.args[0])
217 |                 if regs:
218 |                     instr.implicit_casts = desc_ind, sorted(regs)
219 |     return ops
220 | 


--------------------------------------------------------------------------------
/enjarify/dalvikformats.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from . import util
 16 | 
 17 | # Code for parsing the various Dalvik opcode formats
 18 | INSTRUCTION_FORMAT = util.keysToRanges({
 19 |     0x00: '10x',
 20 |     0x01: '12x',
 21 |     0x02: '22x',
 22 |     0x03: '32x',
 23 |     0x04: '12x',
 24 |     0x05: '22x',
 25 |     0x06: '32x',
 26 |     0x07: '12x',
 27 |     0x08: '22x',
 28 |     0x09: '32x',
 29 |     0x0a: '11x',
 30 |     0x0b: '11x',
 31 |     0x0c: '11x',
 32 |     0x0d: '11x',
 33 |     0x0e: '10x',
 34 |     0x0f: '11x',
 35 |     0x10: '11x',
 36 |     0x11: '11x',
 37 |     0x12: '11n',
 38 |     0x13: '21s',
 39 |     0x14: '31i',
 40 |     0x15: '21h',
 41 |     0x16: '21s',
 42 |     0x17: '31i',
 43 |     0x18: '51l',
 44 |     0x19: '21h',
 45 |     0x1a: '21c',
 46 |     0x1b: '31c',
 47 |     0x1c: '21c',
 48 |     0x1d: '11x',
 49 |     0x1e: '11x',
 50 |     0x1f: '21c',
 51 |     0x20: '22c',
 52 |     0x21: '12x',
 53 |     0x22: '21c',
 54 |     0x23: '22c',
 55 |     0x24: '35c',
 56 |     0x25: '3rc',
 57 |     0x26: '31t',
 58 |     0x27: '11x',
 59 |     0x28: '10t',
 60 |     0x29: '20t',
 61 |     0x2a: '30t',
 62 |     0x2b: '31t',
 63 |     0x2c: '31t',
 64 |     0x2d: '23x',
 65 |     0x32: '22t',
 66 |     0x38: '21t',
 67 |     0x3e: '10x',
 68 |     0x44: '23x',
 69 |     0x52: '22c',
 70 |     0x60: '21c',
 71 |     0x6e: '35c',
 72 |     0x73: '10x',
 73 |     0x74: '3rc',
 74 |     0x79: '10x',
 75 |     0x7b: '12x',
 76 |     0x90: '23x',
 77 |     0xb0: '12x',
 78 |     0xd0: '22s',
 79 |     0xd8: '22b',
 80 |     0xe3: '10x',
 81 | }, 256)
 82 | 
 83 | # parsing funcs
 84 | def p00op(w): return []
 85 | def pBAop(w): return [(w >> 8) & 0xF, w >> 12]
 86 | def pAAop(w): return [w >> 8]
 87 | def p00opAAAA(w, w2): return [w2]
 88 | def pAAopBBBB(w, w2): return [w >> 8, w2]
 89 | def pAAopCCBB(w, w2): return [w >> 8, w2 & 0xFF, w2 >> 8]
 90 | def pBAopCCCC(w, w2): return [(w >> 8) & 0xF, w >> 12, w2]
 91 | def p00opAAAAAAAA(w, w2, w3): return [w2 ^ (w3 << 16)]
 92 | def p00opAAAABBBB(w, w2, w3): return [w2, w3]
 93 | def pAAopBBBBBBBB(w, w2, w3): return [w >> 8, w2 ^ (w3 << 16)]
 94 | 
 95 | def pAGopBBBBFEDC(w, w2, w3):
 96 |     a = w >> 12
 97 |     c, d, e, f = (w3) & 0xF, (w3 >> 4) & 0xF, (w3 >> 8) & 0xF, (w3 >> 12) & 0xF
 98 |     g = (w >> 8) & 0xF
 99 |     return [w2, [c, d, e, f, g][:a]]
100 | 
101 | def pAAopBBBBCCCC(w, w2, w3):
102 |     a = w >> 8
103 |     return [w2, range(w3, w3+a)]
104 | 
105 | def pAAopBBBBBBBBBBBBBBBB(w, w2, w3, w4, w5):
106 |     b = w2 ^ (w3 << 16) ^ (w4 << 32) ^ (w5 << 48)
107 |     return [w >> 8, b]
108 | 
109 | _FUNC = {
110 |     '10x': p00op,
111 |     '12x': pBAop,
112 |     '11n': pBAop,
113 |     '11x': pAAop,
114 |     '10t': pAAop,
115 |     '20t': p00opAAAA,
116 |     '22x': pAAopBBBB,
117 |     '21t': pAAopBBBB,
118 |     '21s': pAAopBBBB,
119 |     '21h': pAAopBBBB,
120 |     '21c': pAAopBBBB,
121 |     '23x': pAAopCCBB,
122 |     '22b': pAAopCCBB,
123 |     '22t': pBAopCCCC,
124 |     '22s': pBAopCCCC,
125 |     '22c': pBAopCCCC,
126 |     '30t': p00opAAAAAAAA,
127 |     '32x': p00opAAAABBBB,
128 |     '31i': pAAopBBBBBBBB,
129 |     '31t': pAAopBBBBBBBB,
130 |     '31c': pAAopBBBBBBBB,
131 |     '35c': pAGopBBBBFEDC,
132 |     '3rc': pAAopBBBBCCCC,
133 |     '51l': pAAopBBBBBBBBBBBBBBBB,
134 | }
135 | 
136 | def sign(x, bits):
137 |     if x >= (1 << (bits-1)):
138 |         x -= 1 << bits
139 |     return x
140 | 
141 | def decode(shorts, pos, opcode):
142 |     fmt = INSTRUCTION_FORMAT[opcode]
143 |     size = int(fmt[0])
144 |     results = _FUNC[fmt](*shorts[pos:pos+size])
145 |     # Check if we need to sign extend
146 |     if fmt[2] == 'n':
147 |         results[-1] = sign(results[-1], 4)
148 |     elif fmt[2] == 'b' or (fmt[2] == 't' and size == 1):
149 |         results[-1] = sign(results[-1], 8)
150 |     elif fmt[2] == 's' or (fmt[2] == 't' and size == 2):
151 |         results[-1] = sign(results[-1], 16)
152 |     elif fmt[2] == 't' and size == 3:
153 |         results[-1] = sign(results[-1], 32)
154 | 
155 |     # Hats depend on actual size expected, so we rely on opcode as a hack
156 |     if fmt[2] == 'h':
157 |         assert opcode == 0x15 or opcode == 0x19
158 |         results[-1] = results[-1] << (16 if opcode == 0x15 else 48)
159 | 
160 |     # Convert code offsets to actual code position
161 |     if fmt[2] == 't':
162 |         results[-1] += pos
163 |     return pos + size, results
164 | 


--------------------------------------------------------------------------------
/enjarify/flags.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ACC_PUBLIC = 0x1
16 | ACC_PRIVATE = 0x2
17 | ACC_PROTECTED = 0x4
18 | ACC_STATIC = 0x8
19 | ACC_FINAL = 0x10
20 | ACC_SYNCHRONIZED = 0x20
21 | ACC_VOLATILE = 0x40
22 | ACC_BRIDGE = 0x40
23 | ACC_TRANSIENT = 0x80
24 | ACC_VARARGS = 0x80
25 | ACC_NATIVE = 0x100
26 | ACC_INTERFACE = 0x200
27 | ACC_ABSTRACT = 0x400
28 | ACC_STRICT = 0x800
29 | ACC_SYNTHETIC = 0x1000
30 | ACC_ANNOTATION = 0x2000
31 | ACC_ENUM = 0x4000
32 | ACC_CONSTRUCTOR = 0x10000
33 | ACC_DECLARED_SYNCHRONIZED = 0x20000
34 | 
35 | # Might as well include this for completeness even though modern JVMs ignore it
36 | ACC_SUPER = 0x20
37 | 
38 | CLASS_FLAGS = ACC_PUBLIC | ACC_FINAL | ACC_SUPER | ACC_INTERFACE | ACC_ABSTRACT | ACC_SYNTHETIC | ACC_ANNOTATION | ACC_ENUM
39 | FIELD_FLAGS = ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL | ACC_VOLATILE | ACC_TRANSIENT | ACC_SYNTHETIC | ACC_ENUM
40 | METHOD_FLAGS = ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL | ACC_SYNCHRONIZED | ACC_BRIDGE | ACC_VARARGS | ACC_NATIVE | ACC_ABSTRACT | ACC_STRICT | ACC_SYNTHETIC
41 | 


--------------------------------------------------------------------------------
/enjarify/jvm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/enjarify/jvm/arraytypes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import scalartypes as scalars
16 | 
17 | # Array type inference -
18 | # For object arrays, we don't actually care which type of object it is, so we just
19 | # use a single value for them (INVALID) and assume all such values are an object
20 | # array of some type. For primative arrays, we just use the entire array descriptor
21 | # e.g. b'[[[C', except that bool arrays are treated as byte arrays.
22 | # For null we use a special marker object
23 | 
24 | # These strings can't be valid descriptors so there's no conflict
25 | INVALID = b'INVALID'
26 | NULL = b'NULL'
27 | 
28 | def merge(t1, t2):
29 |     if t1 is NULL:
30 |         return t2
31 |     if t2 is NULL:
32 |         return t1
33 |     return t1 if (t1 == t2) else INVALID
34 | 
35 | # intersect types
36 | def narrow(t1, t2):
37 |     if t1 is INVALID:
38 |         return t2
39 |     if t2 is INVALID:
40 |         return t1
41 |     return t1 if (t1 == t2) else NULL
42 | 
43 | def eletPair(t):
44 |     assert t is not NULL
45 |     if t is INVALID:
46 |         return scalars.OBJ, t
47 | 
48 |     assert t.startswith(b'[')
49 |     t = t[1:]
50 |     return scalars.fromDesc(t), t
51 | 
52 | def fromDesc(desc):
53 |     if not desc.startswith(b'[') or desc.endswith(b';'):
54 |         return INVALID
55 |     return desc
56 | 


--------------------------------------------------------------------------------
/enjarify/jvm/constantpool.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import struct
 16 | 
 17 | from . import error
 18 | 
 19 | CONSTANT_Class = 7
 20 | CONSTANT_Fieldref = 9
 21 | CONSTANT_Methodref = 10
 22 | CONSTANT_InterfaceMethodref = 11
 23 | CONSTANT_String = 8
 24 | CONSTANT_Integer = 3
 25 | CONSTANT_Float = 4
 26 | CONSTANT_Long = 5
 27 | CONSTANT_Double = 6
 28 | CONSTANT_NameAndType = 12
 29 | CONSTANT_Utf8 = 1
 30 | # CONSTANT_MethodHandle = 15
 31 | # CONSTANT_MethodType = 16
 32 | # CONSTANT_InvokeDynamic = 18
 33 | MAX_CONST = CONSTANT_NameAndType
 34 | 
 35 | def _width(tag):
 36 |     return 2 if tag in (CONSTANT_Long, CONSTANT_Double) else 1
 37 | 
 38 | class ConstantPoolBase:
 39 |     def __init__(self):
 40 |         # lookup dicts for deduplicating constants
 41 |         self.lookup = [{} for _ in range(MAX_CONST + 1)]
 42 | 
 43 |     def _get(self, tag, args):
 44 |         d = self.lookup[tag]
 45 |         try:
 46 |             return d[args]
 47 |         except KeyError:
 48 |             low = tag in (CONSTANT_Integer, CONSTANT_Float, CONSTANT_String)
 49 |             d[args] = index = self._getInd(low, _width(tag))
 50 | 
 51 |             assert self.vals[index] is None
 52 |             self.vals[index] = tag, args
 53 |         return d[args]
 54 | 
 55 |     def insertDirectly(self, pair, low):
 56 |         tag, x = pair
 57 |         d = self.lookup[tag]
 58 |         d[x] = index = self._getInd(low, _width(tag))
 59 |         self.vals[index] = pair
 60 | 
 61 |     def tryGet(self, pair):
 62 |         tag, x = pair
 63 |         d = self.lookup[tag]
 64 |         try:
 65 |             return d[x]
 66 |         except KeyError:
 67 |             pass
 68 |         width = _width(tag)
 69 |         if width > self.space():
 70 |             return None
 71 |         d[x] = index = self._getInd(True, width)
 72 |         self.vals[index] = pair
 73 |         return index
 74 | 
 75 |     def utf8(self, s):
 76 |         assert isinstance(s, bytes)
 77 |         return self._get(CONSTANT_Utf8, s)
 78 | 
 79 |     def class_(self, s): return self._get(CONSTANT_Class, self.utf8(s))
 80 |     def string(self, s): return self._get(CONSTANT_String, self.utf8(s))
 81 | 
 82 |     def nat(self, name, desc):
 83 |         return self._get(CONSTANT_NameAndType, (self.utf8(name), self.utf8(desc)))
 84 | 
 85 |     def _triple(self, tag, trip):
 86 |         return self._get(tag, (self.class_(trip[0]), self.nat(trip[1], trip[2])))
 87 | 
 88 |     def field(self, trip): return self._triple(CONSTANT_Fieldref, trip)
 89 |     def method(self, trip): return self._triple(CONSTANT_Methodref, trip)
 90 |     def imethod(self, trip): return self._triple(CONSTANT_InterfaceMethodref, trip)
 91 | 
 92 |     def int(self, x): return self._get(CONSTANT_Integer, x)
 93 |     def float(self, x): return self._get(CONSTANT_Float, x)
 94 |     def long(self, x): return self._get(CONSTANT_Long, x)
 95 |     def double(self, x): return self._get(CONSTANT_Double, x)
 96 | 
 97 |     def _writeEntry(self, stream, item):
 98 |         if item is None:
 99 |             return
100 |         tag, val = item
101 |         stream.u8(tag)
102 | 
103 |         if tag == CONSTANT_Utf8:
104 |             stream.u16(len(val))
105 |             stream.write(val)
106 |         elif tag in (CONSTANT_Integer, CONSTANT_Float):
107 |             stream.u32(val)
108 |         elif tag in (CONSTANT_Long, CONSTANT_Double):
109 |             stream.u64(val)
110 |         elif tag in (CONSTANT_Class, CONSTANT_String):
111 |             stream.u16(val)
112 |         else:
113 |             stream.u16(val[0])
114 |             stream.u16(val[1])
115 | 
116 | # A simple constant pool that just allocates slots in increasing order.
117 | class SimpleConstantPool(ConstantPoolBase):
118 |     def __init__(self):
119 |         super().__init__()
120 |         self.vals = [None]
121 | 
122 |     def space(self): return 65535 - len(self.vals)
123 |     def lowspace(self): return 256 - len(self.vals)
124 | 
125 |     def _getInd(self, low, width):
126 |         if self.space() < width:
127 |             raise error.ClassfileLimitExceeded()
128 |         temp = len(self.vals)
129 |         self.vals += [None]*width
130 |         return temp
131 | 
132 |     def write(self, stream):
133 |         stream.u16(len(self.vals))
134 |         for item in self.vals:
135 |             self._writeEntry(stream, item)
136 | 
137 | # Constant pool slots 1-255 are special because they can be referred to by the
138 | # two byte ldc instruction (as opposed to 3 byte ldc_w/ldc2_w). Therefore, it is
139 | # desireable to allocate constants which could use ldc in the first 255 slots,
140 | # while not wasting these valuable low slots with pool entries that can't use
141 | # ldc (utf8s, longs, etc.)
142 | # One possible approach is to allocate the ldc entries starting at 1 and the
143 | # others starting at 256, (possibly leaving a gap if there are less than 255 of
144 | # the former). However, this is not ideal because the empty slots are not
145 | # continguous. This means that you could end up in the sitatuation where there
146 | # are exactly two free slots and you wish to add a long/double entry but the
147 | # free slots are not continguous.
148 | # To solve this, we take a different approach - always create the pool as the
149 | # largest possible size (65534 entries) and allocate the non-ldc constants
150 | # starting from the highest index and counting down. This ensures that the free
151 | # slots are always contiguous. Since the classfile representation doesn't
152 | # actually allow gaps like that, the empty spaces if any are filled in with
153 | # dummy entries at the end.
154 | # For simplicity, we always allocate ints, floats, and strings in the low entries
155 | # and everything else in the high entries, regardless of whether they are actaully
156 | # referenced by a ldc or not. (see ConstantPoolBase._get)
157 | 
158 | # Fill in unused space with shortest possible item (Utf8 ''), preencoded for efficiency
159 | PLACEHOLDER_ENTRY = struct.pack('>BH', CONSTANT_Utf8, 0)
160 | class SplitConstantPool(ConstantPoolBase):
161 |     def __init__(self):
162 |         super().__init__()
163 |         self.vals = [None]*65535
164 |         self.bot = 1
165 |         self.top = len(self.vals)
166 | 
167 |     def space(self): return self.top - self.bot
168 |     def lowspace(self): return 256 - self.bot
169 | 
170 |     def _getInd(self, low, width):
171 |         if self.space() < width:
172 |             raise error.ClassfileLimitExceeded()
173 |         if low:
174 |             self.bot += width
175 |             return self.bot - width
176 |         self.top -= width
177 |         return self.top
178 | 
179 |     def write(self, stream):
180 |         stream.u16(len(self.vals))
181 | 
182 |         assert self.bot <= self.top
183 |         for item in self.vals[:self.bot]:
184 |             self._writeEntry(stream, item)
185 | 
186 |         stream.write(PLACEHOLDER_ENTRY * self.space())
187 | 
188 |         for item in self.vals[self.top:]:
189 |             self._writeEntry(stream, item)
190 | 


--------------------------------------------------------------------------------
/enjarify/jvm/constants/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/enjarify/jvm/constants/calc.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from ...util import s16, s32, s64
 16 | from .. import scalartypes as scalars
 17 | from ..jvmops import *
 18 | 
 19 | from . import lookup
 20 | from .genlookup import FLOAT_SIGN, FLOAT_INF, FLOAT_NINF, FLOAT_NAN, DOUBLE_SIGN, DOUBLE_INF, DOUBLE_NINF, DOUBLE_NAN
 21 | 
 22 | # Calculate a sequence of bytecode instructions to generate the given constant
 23 | # to be used in the rare case that the constant pool is full.
 24 | 
 25 | # NaN has multiple representations, so normalize Floats to a single NaN representation
 26 | def normalizeFloat(x):
 27 |     x %= 1<<32
 28 |     if x | FLOAT_SIGN > FLOAT_NINF:
 29 |         return FLOAT_NAN
 30 |     return x
 31 | 
 32 | def normalizeDouble(x):
 33 |     x %= 1<<64
 34 |     if x | DOUBLE_SIGN > DOUBLE_NINF:
 35 |         return DOUBLE_NAN
 36 |     return x
 37 | 
 38 | def _calcInt(x):
 39 |     assert x == s32(x)
 40 |     if x in lookup.INTS:
 41 |         return lookup.INTS[x]
 42 | 
 43 |     # max required - 10 bytes
 44 |     # (high << 16) ^ low
 45 |     low = s16(x)
 46 |     high = (x ^ low) >> 16
 47 |     assert high
 48 |     if not low:
 49 |         return _calcInt(high) + _calcInt(16) + bytes([ISHL])
 50 |     return _calcInt(high) + _calcInt(16) + bytes([ISHL]) + _calcInt(low) + bytes([IXOR])
 51 | 
 52 | def _calcLong(x):
 53 |     assert x == s64(x)
 54 |     if x in lookup.LONGS:
 55 |         return lookup.LONGS[x]
 56 | 
 57 |     # max required - 26 bytes
 58 |     # (high << 32) ^ low
 59 |     low = s32(x)
 60 |     high = (x ^ low) >> 32
 61 |     if not high:
 62 |         return _calcInt(low) + bytes([I2L])
 63 | 
 64 |     result = _calcInt(high) + bytes([I2L]) + _calcInt(32) + bytes([LSHL])
 65 |     if low:
 66 |         result += _calcInt(low) + bytes([I2L, LXOR])
 67 |     return result
 68 | 
 69 | def _calcFloat(x):
 70 |     assert x == normalizeFloat(x)
 71 |     if x in lookup.FLOATS:
 72 |         return lookup.FLOATS[x]
 73 | 
 74 |     # max required - 27 bytes
 75 |     exponent = ((x >> 23) & 0xFF) - 127
 76 |     mantissa = x % (1<<23)
 77 |     # check for denormals!
 78 |     if exponent == -127:
 79 |         exponent += 1
 80 |     else:
 81 |         mantissa += 1<<23
 82 |     exponent -= 23
 83 | 
 84 |     if x & FLOAT_SIGN:
 85 |         mantissa = -mantissa
 86 | 
 87 |     ex_combine_op = FDIV if exponent < 0 else FMUL
 88 |     exponent = abs(exponent)
 89 |     exponent_parts = bytearray()
 90 |     while exponent >= 63: # max 2 iterations since -149 <= exp <= 104
 91 |         exponent_parts.extend([LCONST_1, ICONST_M1, LSHL, L2F, ex_combine_op])
 92 |         mantissa = -mantissa
 93 |         exponent -= 63
 94 | 
 95 |     if exponent > 0:
 96 |         exponent_parts.append(LCONST_1)
 97 |         exponent_parts.extend(_calcInt(exponent))
 98 |         exponent_parts.extend([LSHL, L2F, ex_combine_op])
 99 |     return _calcInt(mantissa) + bytes([I2F]) + exponent_parts
100 | 
101 | def _calcDouble(x):
102 |     assert x == normalizeDouble(x)
103 |     if x in lookup.DOUBLES:
104 |         return lookup.DOUBLES[x]
105 | 
106 |     # max required - 55 bytes
107 |     exponent = ((x >> 52) & 0x7FF) - 1023
108 |     mantissa = x % (1<<52)
109 |     # check for denormals!
110 |     if exponent == -1023:
111 |         exponent += 1
112 |     else:
113 |         mantissa += 1<<52
114 |     exponent -= 52
115 | 
116 |     if x & DOUBLE_SIGN:
117 |         mantissa = -mantissa
118 | 
119 |     abs_exponent = abs(exponent)
120 |     exponent_parts = bytearray()
121 | 
122 |     part63 = abs_exponent // 63
123 |     if part63: #create *63 part of exponent by repeated squaring
124 |         # use 2^-x instead of calculating 2^x and dividing to avoid overflow in
125 |         # case we need 2^-1071
126 |         if exponent < 0: # -2^-63
127 |             exponent_parts.extend([DCONST_1, LCONST_1, ICONST_M1, LSHL, L2D, DDIV])
128 |         else: # -2^63
129 |             exponent_parts.extend([LCONST_1, ICONST_M1, LSHL, L2D])
130 |         # adjust sign of mantissa for odd powers since we're actually using -2^63 rather than positive
131 |         if part63 & 1:
132 |             mantissa = -mantissa
133 | 
134 |         last_needed = part63 & 1
135 |         stack = [1] # Not actually required to compute the results - it's just used for a sanity check
136 |         for bi in range(1, part63.bit_length()):
137 |             exponent_parts.append(DUP2)
138 |             stack.append(stack[-1])
139 |             if last_needed:
140 |                 exponent_parts.append(DUP2)
141 |                 stack.append(stack[-1])
142 |             exponent_parts.append(DMUL)
143 |             stack.append(stack.pop() + stack.pop())
144 |             last_needed = part63 & (1<<bi)
145 | 
146 |         assert sum(stack) == part63 and len(stack) == bin(part63).count('1')
147 |         exponent_parts.extend([DMUL] * bin(part63).count('1'))
148 | 
149 |     # now handle the rest
150 |     rest = abs_exponent % 63
151 |     if rest:
152 |         exponent_parts.append(LCONST_1)
153 |         exponent_parts.extend(_calcInt(rest))
154 |         exponent_parts.extend([LSHL, L2D])
155 |         exponent_parts.append(DDIV if exponent < 0 else DMUL)
156 | 
157 |     return _calcLong(mantissa) + bytes([L2D]) + exponent_parts
158 | 
159 | def calcInt(x): return _calcInt(s32(x))
160 | def calcLong(x): return _calcLong(s64(x))
161 | def calcFloat(x): return _calcFloat(normalizeFloat(x))
162 | def calcDouble(x): return _calcDouble(normalizeDouble(x))
163 | 
164 | def normalize(st, val):
165 |     if st == scalars.FLOAT:
166 |         return normalizeFloat(val)
167 |     elif st == scalars.DOUBLE:
168 |         return normalizeDouble(val)
169 |     return val
170 | 
171 | def calc(st, val):
172 |     if st == scalars.INT:
173 |         return calcInt(val)
174 |     elif st == scalars.FLOAT:
175 |         return calcFloat(val)
176 |     elif st == scalars.LONG:
177 |         return calcLong(val)
178 |     elif st == scalars.DOUBLE:
179 |         return calcDouble(val)
180 |     assert 0
181 | 
182 | def lookupOnly(st, val):
183 |     # assume floats and double have already been normalized but int/longs haven't
184 |     if st == scalars.INT:
185 |         return lookup.INTS.get(s32(val))
186 |     elif st == scalars.FLOAT:
187 |         return lookup.FLOATS.get(val)
188 |     elif st == scalars.LONG:
189 |         return lookup.LONGS.get(s64(val))
190 |     elif st == scalars.DOUBLE:
191 |         return lookup.DOUBLES.get(val)
192 | 


--------------------------------------------------------------------------------
/enjarify/jvm/constants/genlookup.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import struct, itertools
 16 | 
 17 | from ..jvmops import *
 18 | from ...util import s32
 19 | 
 20 | # Create a precomputed lookup table giving the bytecode sequence to generate
 21 | # any primative constant of 3 bytes or less plus special float values (negative
 22 | # infinity requires 4 bytes but is included anyway to simplify things elsewhere)
 23 | #
 24 | # For example
 25 | # 128 -> sipush 128
 26 | # -65535 -> iconst_m1 i2c ineg
 27 | # 2147483647 -> iconst_m1 iconst_m1 iushr
 28 | # 1L -> lconst_1
 29 | # 127L -> bipush 127 i2l
 30 | # 42.0f -> bipush 42 i2f
 31 | # -Inf -> dconst_1 dneg dconst_0 ddiv
 32 | #
 33 | # Lookup table keys are s32/s64 for ints/longs and u32/u64 for floats/doubles
 34 | # There are multiple NaN representations, so we normalize NaNs to the
 35 | # representation of all 1s (e.g. float NaN = 0xFFFFFFFF)
 36 | 
 37 | def u32(x): return x % (1<<32)
 38 | def u64(x): return x % (1<<64)
 39 | 
 40 | FLOAT_SIGN = 1<<31
 41 | FLOAT_NAN = u32(-1)
 42 | FLOAT_INF = 0xFF << 23
 43 | FLOAT_NINF = FLOAT_INF ^ FLOAT_SIGN
 44 | def i2f(x):
 45 |     if x == 0:
 46 |         return 0
 47 |     if x < 0:
 48 |         return i2f(-x) ^ FLOAT_SIGN
 49 |     shift = 24 - x.bit_length()
 50 |     # Don't bother implementing rounding since we'll only convert small ints
 51 |     # that can be exactly represented anyway
 52 |     assert shift >= 0
 53 |     mantissa = x << shift
 54 |     exponent = shift + 127
 55 |     return (exponent << 23) | mantissa
 56 | 
 57 | DOUBLE_SIGN = 1<<63
 58 | DOUBLE_NAN = u64(-1)
 59 | DOUBLE_INF = 0x7FF << 52
 60 | DOUBLE_NINF = DOUBLE_INF ^ DOUBLE_SIGN
 61 | def i2d(x):
 62 |     if x == 0:
 63 |         return 0
 64 |     if x < 0:
 65 |         return i2d(-x) ^ DOUBLE_SIGN
 66 |     shift = 53 - x.bit_length()
 67 |     assert shift >= 0
 68 |     mantissa = x << shift
 69 |     exponent = shift + 1023
 70 |     return (exponent << 52) | mantissa
 71 | 
 72 | # add if value is shorter then current best
 73 | def add(d, k, v):
 74 |     if k not in d or len(v) < len(d[k]):
 75 |         d[k] = v
 76 | 
 77 | if __name__ == "__main__":
 78 |     # int constants
 79 |     all_ints = {}
 80 | 
 81 |     # 1 byte ints
 82 |     for i in range(-1, 6):
 83 |         add(all_ints, i, bytes([ICONST_0 + i]))
 84 |     # Sort for determinism. Otherwise -0x80000000 could be either
 85 |     # 1 << -1 or -1 << -1, for example
 86 |     int_1s = sorted({k for k,v in all_ints.items() if len(v) == 1})
 87 | 
 88 |     # 2 byte ints
 89 |     for i in range(-128, 128):
 90 |         add(all_ints, i, struct.pack('>Bb', BIPUSH, i))
 91 |     for i in int_1s:
 92 |         add(all_ints, i % 65536, all_ints[i] + bytes([I2C]))
 93 |     int_2s = sorted({k for k,v in all_ints.items() if len(v) == 2})
 94 | 
 95 |     # 3 byte ints
 96 |     for i in range(-32768, 32768):
 97 |         add(all_ints, i, struct.pack('>Bh', SIPUSH, i))
 98 |     for i in int_2s:
 99 |         add(all_ints, i % 65536, all_ints[i] + bytes([I2C]))
100 |         add(all_ints, s32(-i), all_ints[i] + bytes([INEG]))
101 |     for x, y in itertools.product(int_1s, int_1s):
102 |         add(all_ints, s32(x << (y % 32)), all_ints[x] + all_ints[y] + bytes([ISHL]))
103 |         add(all_ints, s32(x >> (y % 32)), all_ints[x] + all_ints[y] + bytes([ISHR]))
104 |         add(all_ints, s32(u32(x) >> (y % 32)), all_ints[x] + all_ints[y] + bytes([IUSHR]))
105 | 
106 |     # long constants
107 |     all_longs = {}
108 |     for i in range(0, 2):
109 |         add(all_longs, i, bytes([LCONST_0 + i]))
110 | 
111 |     for i in int_1s + int_2s:
112 |         add(all_longs, i, all_ints[i] + bytes([I2L]))
113 | 
114 |     # float constants
115 |     all_floats = {}
116 |     for i in range(0, 2):
117 |         add(all_floats, i2f(i), bytes([FCONST_0 + i]))
118 | 
119 |     for i in int_1s + int_2s:
120 |         add(all_floats, i2f(i), all_ints[i] + bytes([I2F]))
121 | 
122 |     # hardcode unusual float values for simplicity
123 |     add(all_floats, FLOAT_SIGN, bytes([FCONST_0, FNEG])) # -0.0
124 |     add(all_floats, FLOAT_NAN, bytes([FCONST_0, FCONST_0, FDIV])) # NaN
125 |     add(all_floats, FLOAT_INF, bytes([FCONST_1, FCONST_0, FDIV])) # Inf
126 |     add(all_floats, FLOAT_NINF, bytes([FCONST_1, FNEG, FCONST_0, FDIV])) # -Inf
127 | 
128 |     # double constants
129 |     all_doubles = {}
130 |     for i in range(0, 2):
131 |         add(all_doubles, i2d(i), bytes([DCONST_0 + i]))
132 | 
133 |     for i in int_1s + int_2s:
134 |         add(all_doubles, i2d(i), all_ints[i] + bytes([I2D]))
135 | 
136 |     add(all_doubles, DOUBLE_SIGN, bytes([DCONST_0, DNEG])) # -0.0
137 |     add(all_doubles, DOUBLE_NAN, bytes([DCONST_0, DCONST_0, DDIV])) # NaN
138 |     add(all_doubles, DOUBLE_INF, bytes([DCONST_1, DCONST_0, DDIV])) # Inf
139 |     add(all_doubles, DOUBLE_NINF, bytes([DCONST_1, DNEG, DCONST_0, DDIV])) # -Inf
140 | 
141 |     print('''
142 | # Copyright 2015 Google Inc. All Rights Reserved.
143 | #
144 | # Licensed under the Apache License, Version 2.0 (the "License");
145 | # you may not use this file except in compliance with the License.
146 | # You may obtain a copy of the License at
147 | #
148 | #     http://www.apache.org/licenses/LICENSE-2.0
149 | #
150 | # Unless required by applicable law or agreed to in writing, software
151 | # distributed under the License is distributed on an "AS IS" BASIS,
152 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
153 | # See the License for the specific language governing permissions and
154 | # limitations under the License.
155 | 
156 | # Autogenerated by genlookup.py - do not edit''')
157 | 
158 |     for name, d in zip('INTS LONGS FLOATS DOUBLES'.split(), [all_ints, all_longs, all_floats, all_doubles]):
159 |         print(name + ' = {')
160 |         for k, v in sorted(d.items()):
161 |             print('    {}: {},'.format(hex(k), v))
162 |         print('}')


--------------------------------------------------------------------------------
/enjarify/jvm/error.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # e.g. too many registers in a method, too many constant pool entries, code too long
16 | class ClassfileLimitExceeded(Exception): pass
17 | 


--------------------------------------------------------------------------------
/enjarify/jvm/genmathops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Generate mathops.py, the lookup tables giving information about dalvik math operations by opcode
16 | if __name__ == "__main__":
17 |     unary = 'ineg inot lneg lnot fneg dneg i2l i2f i2d l2i l2f l2d f2i f2l f2d d2i d2l d2f i2b i2c i2s'
18 |     binary = 'iadd isub imul idiv irem iand ior ixor ishl ishr iushr ladd lsub lmul ldiv lrem land lor lxor lshl lshr lushr fadd fsub fmul fdiv frem dadd dsub dmul ddiv drem'
19 |     binary = binary + ' ' + binary
20 |     binlit = 'iadd isub imul idiv irem iand ior ixor '
21 |     binlit = binlit + binlit + 'ishl ishr iushr'
22 |     stypes = dict(zip('ifldbcs', 'INT FLOAT LONG DOUBLE INT INT INT'.split()))
23 | 
24 |     print('''
25 | # Copyright 2015 Google Inc. All Rights Reserved.
26 | #
27 | # Licensed under the Apache License, Version 2.0 (the "License");
28 | # you may not use this file except in compliance with the License.
29 | # You may obtain a copy of the License at
30 | #
31 | #     http://www.apache.org/licenses/LICENSE-2.0
32 | #
33 | # Unless required by applicable law or agreed to in writing, software
34 | # distributed under the License is distributed on an "AS IS" BASIS,
35 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36 | # See the License for the specific language governing permissions and
37 | # limitations under the License.
38 | 
39 | # Autogenerated by genmathops.py - do not edit''')
40 |     print('from . import jvmops')
41 |     print('from . import scalartypes as scalars')
42 | 
43 |     print('UNARY = {')
44 |     for i, code in enumerate(unary.split()):
45 |         code = code.replace('not','xor')
46 |         if '2' in code:
47 |             srct = stypes[code[0]]
48 |             destt = stypes[code[2]]
49 |         else:
50 |             srct = destt = stypes[code[0]]
51 |         print('    0x{:02X}: (jvmops.{}, scalars.{}, scalars.{}),'.format(i + 0x7b, code.upper(), srct, destt))
52 |     print('}')
53 | 
54 |     print('BINARY = {')
55 |     for i, code in enumerate(binary.split()):
56 |         st = stypes[code[0]]
57 |         # shift instructions have second arg an int even when operating on longs
58 |         st2 = 'INT' if 'sh' in code else st
59 |         print('    0x{:02X}: (jvmops.{}, scalars.{}, scalars.{}),'.format(i + 0x90, code.upper(), st, st2))
60 |     print('}')
61 | 
62 |     print('BINARY_LIT = {')
63 |     for i, code in enumerate(binlit.split()):
64 |         print('    0x{:02X}: jvmops.{},'.format(i + 0xd0, code.upper()))
65 |     print('}')
66 | 


--------------------------------------------------------------------------------
/enjarify/jvm/ir.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import struct
 16 | 
 17 | from .constants import calc
 18 | from .jvmops import *
 19 | from . import constantpool, error
 20 | from . import scalartypes as scalars
 21 | 
 22 | # IR representation roughly corresponding to JVM bytecode instructions. Note that these
 23 | # may correspond to more than one instruction in the actual bytecode generated but they
 24 | # are useful logical units for the internal optimization passes.
 25 | 
 26 | class JvmInstruction:
 27 |     def __init__(self, bytecode=None):
 28 |         self.bytecode = bytecode # None or bytestring
 29 | 
 30 |     def fallsthrough(self): return True
 31 |     def targets(self): return []
 32 | 
 33 | # Used to mark locations in the IR instructions for various purposes. These are
 34 | # seperate IR 'instructions' since the optimization passes may remove or replace
 35 | # the other instructions.
 36 | class Label(JvmInstruction):
 37 |     def __init__(self, id=None):
 38 |         super().__init__(b'')
 39 |         self.id = id # None or int
 40 | 
 41 | _ilfdaOrd = [scalars.INT, scalars.LONG, scalars.FLOAT, scalars.DOUBLE, scalars.OBJ].index
 42 | class RegAccess(JvmInstruction):
 43 |     def __init__(self, dreg, st, store):
 44 |         super().__init__()
 45 |         self.key = dreg, st
 46 |         self.store = store
 47 |         self.wide = scalars.iswide(st)
 48 | 
 49 |     @staticmethod
 50 |     def raw(local, stype, store):
 51 |         new = RegAccess(0, stype, store)
 52 |         new.calcBytecode(local)
 53 |         return new
 54 | 
 55 |     def calcBytecode(self, local):
 56 |         assert self.bytecode is None
 57 |         stype = self.key[1]
 58 |         op_off = (ISTORE - ILOAD) if self.store else 0
 59 |         if local < 4:
 60 |             self.bytecode = struct.pack('>B', ILOAD_0 + op_off + local + _ilfdaOrd(stype)*4)
 61 |         elif local < 256:
 62 |             self.bytecode = struct.pack('>BB', ILOAD + op_off + _ilfdaOrd(stype), local)
 63 |         else:
 64 |             self.bytecode = struct.pack('>BBH', WIDE, ILOAD + op_off + _ilfdaOrd(stype), local)
 65 | 
 66 | class PrimConstant(JvmInstruction):
 67 |     def __init__(self, st, val, pool=None):
 68 |         super().__init__()
 69 |         self.st = st
 70 |         self.val = val = calc.normalize(st, val)
 71 |         self.wide = scalars.iswide(st)
 72 | 
 73 |         # If pool is passed in, just grab an entry greedily, otherwise calculate
 74 |         # a sequence of bytecode to generate the constant
 75 |         if pool is not None:
 76 |             self.bytecode = calc.lookupOnly(st, val)
 77 |             if self.bytecode is None:
 78 |                 self._from_pool(pool)
 79 |             if self.bytecode is None:
 80 |                 raise error.ClassfileLimitExceeded()
 81 |         else:
 82 |             self.bytecode = calc.calc(st, val)
 83 | 
 84 |     def cpool_key(self):
 85 |         tag = {
 86 |             scalars.INT: constantpool.CONSTANT_Integer,
 87 |             scalars.FLOAT: constantpool.CONSTANT_Float,
 88 |             scalars.DOUBLE: constantpool.CONSTANT_Double,
 89 |             scalars.LONG: constantpool.CONSTANT_Long,
 90 |         }[self.st]
 91 |         return tag, self.val
 92 | 
 93 |     def _from_pool(self, pool):
 94 |         index = pool.tryGet(self.cpool_key())
 95 |         if index is not None:
 96 |             if scalars.iswide(self.st):
 97 |                 code = struct.pack('>BH', LDC2_W, index)
 98 |             elif index >= 256:
 99 |                 code = struct.pack('>BH', LDC_W, index)
100 |             else:
101 |                 code = struct.pack('>BB', LDC, index)
102 |             self.bytecode = code
103 | 
104 |     def fix_with_pool(self, pool):
105 |         if len(self.bytecode) > 2:
106 |             self._from_pool(pool)
107 | 
108 | class OtherConstant(JvmInstruction):
109 |     wide = False # will be null, string or class - always single
110 | 
111 | class LazyJumpBase(JvmInstruction):
112 |     def __init__(self, target):
113 |         super().__init__()
114 |         self.target = target
115 | 
116 |     def targets(self): return [self.target]
117 | 
118 |     def widenIfNecessary(self, labels, posd):
119 |         offset = posd[labels[self.target]] - posd[self]
120 |         if not -32768 <= offset < 32768:
121 |             self.min = self.max
122 |             return True
123 |         return False
124 | 
125 | class Goto(LazyJumpBase):
126 |     def __init__(self, target):
127 |         super().__init__(target)
128 |         self.min = 3
129 |         self.max = 5 # upper limit on length of bytecode
130 | 
131 |     def fallsthrough(self): return False
132 | 
133 |     def calcBytecode(self, posd, labels):
134 |         offset = posd[labels[self.target]] - posd[self]
135 |         if self.max == 3:
136 |             self.bytecode = struct.pack('>Bh', GOTO, offset)
137 |         else:
138 |             self.bytecode = struct.pack('>Bi', GOTO_W, offset)
139 | 
140 | _ifOpposite = {}
141 | for _op1, _op2 in [(IFEQ, IFNE), (IFLT, IFGE), (IFGT, IFLE), (IF_ICMPEQ, IF_ICMPNE), (IF_ICMPLT, IF_ICMPGE), (IF_ICMPGT, IF_ICMPLE), (IFNULL, IFNONNULL), (IF_ACMPEQ, IF_ACMPNE)]:
142 |     _ifOpposite[_op1] = _op2
143 |     _ifOpposite[_op2] = _op1
144 | class If(LazyJumpBase):
145 |     def __init__(self, op, target):
146 |         super().__init__(target)
147 |         self.op = op
148 |         self.min = 3
149 |         self.max = 8 # upper limit on length of bytecode
150 | 
151 |     # Unlike with goto, if instructions are limited to a 16 bit jump offset.
152 |     # Therefore, for larger jumps, we have to substitute a different sequence
153 |     #
154 |     # if x goto A
155 |     # B: whatever
156 |     #
157 |     # becomes
158 |     #
159 |     # if !x goto B
160 |     # goto A
161 |     # B: whatever
162 |     def calcBytecode(self, posd, labels):
163 |         if self.max == 3:
164 |             offset = posd[labels[self.target]] - posd[self]
165 |             self.bytecode = struct.pack('>Bh', self.op, offset)
166 |         else:
167 |             op = _ifOpposite[self.op]
168 |             offset = posd[labels[self.target]] - posd[self] - 3
169 |             self.bytecode = struct.pack('>BhBi', op, 8, GOTO_W, offset)
170 | 
171 | class Switch(JvmInstruction):
172 |     def __init__(self, default, jumps):
173 |         super().__init__()
174 |         self.default = default
175 |         self.jumps = jumps
176 | 
177 |         assert jumps
178 |         self.low = min(jumps)
179 |         self.high = max(jumps)
180 | 
181 |         table_count = self.high - self.low + 1
182 |         table_size =  4*(table_count+1)
183 |         jump_size = 8*len(jumps)
184 | 
185 |         self.istable = jump_size > table_size
186 |         self.nopad_size = 9 + (table_size if self.istable else jump_size)
187 |         self.max = self.nopad_size + 3
188 | 
189 |     def fallsthrough(self): return False
190 |     def targets(self): return sorted(set(self.jumps.values())) + [self.default]
191 | 
192 |     def calcBytecode(self, posd, labels):
193 |         pos = posd[self]
194 |         offset = posd[labels[self.default]] - pos
195 |         pad = (-pos-1) % 4
196 | 
197 |         bytecode = bytearray()
198 |         if self.istable:
199 |             bytecode += bytes([TABLESWITCH] + [0]*pad)
200 |             bytecode += struct.pack('>iii', offset, self.low, self.high)
201 |             for k in range(self.low, self.high + 1):
202 |                 target = self.jumps.get(k, self.default)
203 |                 bytecode += struct.pack('>i', posd[labels[target]] - pos)
204 |         else:
205 |             bytecode += bytes([LOOKUPSWITCH] + [0]*pad)
206 |             bytecode += struct.pack('>iI', offset, len(self.jumps))
207 |             for k, target in sorted(self.jumps.items()):
208 |                 offset = posd[labels[target]] - pos
209 |                 bytecode += struct.pack('>ii', k, offset)
210 |         self.bytecode = bytes(bytecode)
211 | 
212 | _return_or_throw_bytecodes = {bytes([op]) for op in range(IRETURN, RETURN+1) }
213 | _return_or_throw_bytecodes.add(bytes([ATHROW]))
214 | class Other(JvmInstruction):
215 |     def fallsthrough(self): return self.bytecode not in _return_or_throw_bytecodes
216 | 
217 | def Pop(): return Other(bytes([POP]))
218 | def Pop2(): return Other(bytes([POP2]))
219 | def Dup(): return Other(bytes([DUP]))
220 | def Dup2(): return Other(bytes([DUP2]))
221 | 


--------------------------------------------------------------------------------
/enjarify/jvm/jvmops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | NOP = 0x00
 16 | ACONST_NULL = 0x01
 17 | ICONST_M1 = 0x02
 18 | ICONST_0 = 0x03
 19 | ICONST_1 = 0x04
 20 | ICONST_2 = 0x05
 21 | ICONST_3 = 0x06
 22 | ICONST_4 = 0x07
 23 | ICONST_5 = 0x08
 24 | LCONST_0 = 0x09
 25 | LCONST_1 = 0x0A
 26 | FCONST_0 = 0x0B
 27 | FCONST_1 = 0x0C
 28 | FCONST_2 = 0x0D
 29 | DCONST_0 = 0x0E
 30 | DCONST_1 = 0x0F
 31 | BIPUSH = 0x10
 32 | SIPUSH = 0x11
 33 | LDC = 0x12
 34 | LDC_W = 0x13
 35 | LDC2_W = 0x14
 36 | ILOAD = 0x15
 37 | LLOAD = 0x16
 38 | FLOAD = 0x17
 39 | DLOAD = 0x18
 40 | ALOAD = 0x19
 41 | ILOAD_0 = 0x1A
 42 | ILOAD_1 = 0x1B
 43 | ILOAD_2 = 0x1C
 44 | ILOAD_3 = 0x1D
 45 | LLOAD_0 = 0x1E
 46 | LLOAD_1 = 0x1F
 47 | LLOAD_2 = 0x20
 48 | LLOAD_3 = 0x21
 49 | FLOAD_0 = 0x22
 50 | FLOAD_1 = 0x23
 51 | FLOAD_2 = 0x24
 52 | FLOAD_3 = 0x25
 53 | DLOAD_0 = 0x26
 54 | DLOAD_1 = 0x27
 55 | DLOAD_2 = 0x28
 56 | DLOAD_3 = 0x29
 57 | ALOAD_0 = 0x2A
 58 | ALOAD_1 = 0x2B
 59 | ALOAD_2 = 0x2C
 60 | ALOAD_3 = 0x2D
 61 | IALOAD = 0x2E
 62 | LALOAD = 0x2F
 63 | FALOAD = 0x30
 64 | DALOAD = 0x31
 65 | AALOAD = 0x32
 66 | BALOAD = 0x33
 67 | CALOAD = 0x34
 68 | SALOAD = 0x35
 69 | ISTORE = 0x36
 70 | LSTORE = 0x37
 71 | FSTORE = 0x38
 72 | DSTORE = 0x39
 73 | ASTORE = 0x3A
 74 | ISTORE_0 = 0x3B
 75 | ISTORE_1 = 0x3C
 76 | ISTORE_2 = 0x3D
 77 | ISTORE_3 = 0x3E
 78 | LSTORE_0 = 0x3F
 79 | LSTORE_1 = 0x40
 80 | LSTORE_2 = 0x41
 81 | LSTORE_3 = 0x42
 82 | FSTORE_0 = 0x43
 83 | FSTORE_1 = 0x44
 84 | FSTORE_2 = 0x45
 85 | FSTORE_3 = 0x46
 86 | DSTORE_0 = 0x47
 87 | DSTORE_1 = 0x48
 88 | DSTORE_2 = 0x49
 89 | DSTORE_3 = 0x4A
 90 | ASTORE_0 = 0x4B
 91 | ASTORE_1 = 0x4C
 92 | ASTORE_2 = 0x4D
 93 | ASTORE_3 = 0x4E
 94 | IASTORE = 0x4F
 95 | LASTORE = 0x50
 96 | FASTORE = 0x51
 97 | DASTORE = 0x52
 98 | AASTORE = 0x53
 99 | BASTORE = 0x54
100 | CASTORE = 0x55
101 | SASTORE = 0x56
102 | POP = 0x57
103 | POP2 = 0x58
104 | DUP = 0x59
105 | DUP_X1 = 0x5A
106 | DUP_X2 = 0x5B
107 | DUP2 = 0x5C
108 | DUP2_X1 = 0x5D
109 | DUP2_X2 = 0x5E
110 | SWAP = 0x5F
111 | IADD = 0x60
112 | LADD = 0x61
113 | FADD = 0x62
114 | DADD = 0x63
115 | ISUB = 0x64
116 | LSUB = 0x65
117 | FSUB = 0x66
118 | DSUB = 0x67
119 | IMUL = 0x68
120 | LMUL = 0x69
121 | FMUL = 0x6A
122 | DMUL = 0x6B
123 | IDIV = 0x6C
124 | LDIV = 0x6D
125 | FDIV = 0x6E
126 | DDIV = 0x6F
127 | IREM = 0x70
128 | LREM = 0x71
129 | FREM = 0x72
130 | DREM = 0x73
131 | INEG = 0x74
132 | LNEG = 0x75
133 | FNEG = 0x76
134 | DNEG = 0x77
135 | ISHL = 0x78
136 | LSHL = 0x79
137 | ISHR = 0x7A
138 | LSHR = 0x7B
139 | IUSHR = 0x7C
140 | LUSHR = 0x7D
141 | IAND = 0x7E
142 | LAND = 0x7F
143 | IOR = 0x80
144 | LOR = 0x81
145 | IXOR = 0x82
146 | LXOR = 0x83
147 | IINC = 0x84
148 | I2L = 0x85
149 | I2F = 0x86
150 | I2D = 0x87
151 | L2I = 0x88
152 | L2F = 0x89
153 | L2D = 0x8A
154 | F2I = 0x8B
155 | F2L = 0x8C
156 | F2D = 0x8D
157 | D2I = 0x8E
158 | D2L = 0x8F
159 | D2F = 0x90
160 | I2B = 0x91
161 | I2C = 0x92
162 | I2S = 0x93
163 | LCMP = 0x94
164 | FCMPL = 0x95
165 | FCMPG = 0x96
166 | DCMPL = 0x97
167 | DCMPG = 0x98
168 | IFEQ = 0x99
169 | IFNE = 0x9A
170 | IFLT = 0x9B
171 | IFGE = 0x9C
172 | IFGT = 0x9D
173 | IFLE = 0x9E
174 | IF_ICMPEQ = 0x9F
175 | IF_ICMPNE = 0xA0
176 | IF_ICMPLT = 0xA1
177 | IF_ICMPGE = 0xA2
178 | IF_ICMPGT = 0xA3
179 | IF_ICMPLE = 0xA4
180 | IF_ACMPEQ = 0xA5
181 | IF_ACMPNE = 0xA6
182 | GOTO = 0xA7
183 | JSR = 0xA8
184 | RET = 0xA9
185 | TABLESWITCH = 0xAA
186 | LOOKUPSWITCH = 0xAB
187 | IRETURN = 0xAC
188 | LRETURN = 0xAD
189 | FRETURN = 0xAE
190 | DRETURN = 0xAF
191 | ARETURN = 0xB0
192 | RETURN = 0xB1
193 | GETSTATIC = 0xB2
194 | PUTSTATIC = 0xB3
195 | GETFIELD = 0xB4
196 | PUTFIELD = 0xB5
197 | INVOKEVIRTUAL = 0xB6
198 | INVOKESPECIAL = 0xB7
199 | INVOKESTATIC = 0xB8
200 | INVOKEINTERFACE = 0xB9
201 | INVOKEDYNAMIC = 0xBA
202 | NEW = 0xBB
203 | NEWARRAY = 0xBC
204 | ANEWARRAY = 0xBD
205 | ARRAYLENGTH = 0xBE
206 | ATHROW = 0xBF
207 | CHECKCAST = 0xC0
208 | INSTANCEOF = 0xC1
209 | MONITORENTER = 0xC2
210 | MONITOREXIT = 0xC3
211 | WIDE = 0xC4
212 | MULTIANEWARRAY = 0xC5
213 | IFNULL = 0xC6
214 | IFNONNULL = 0xC7
215 | GOTO_W = 0xC8
216 | JSR_W = 0xC9
217 | 


--------------------------------------------------------------------------------
/enjarify/jvm/mathops.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright 2015 Google Inc. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # Autogenerated by genmathops.py - do not edit
 17 | from . import jvmops
 18 | from . import scalartypes as scalars
 19 | UNARY = {
 20 |     0x7B: (jvmops.INEG, scalars.INT, scalars.INT),
 21 |     0x7C: (jvmops.IXOR, scalars.INT, scalars.INT),
 22 |     0x7D: (jvmops.LNEG, scalars.LONG, scalars.LONG),
 23 |     0x7E: (jvmops.LXOR, scalars.LONG, scalars.LONG),
 24 |     0x7F: (jvmops.FNEG, scalars.FLOAT, scalars.FLOAT),
 25 |     0x80: (jvmops.DNEG, scalars.DOUBLE, scalars.DOUBLE),
 26 |     0x81: (jvmops.I2L, scalars.INT, scalars.LONG),
 27 |     0x82: (jvmops.I2F, scalars.INT, scalars.FLOAT),
 28 |     0x83: (jvmops.I2D, scalars.INT, scalars.DOUBLE),
 29 |     0x84: (jvmops.L2I, scalars.LONG, scalars.INT),
 30 |     0x85: (jvmops.L2F, scalars.LONG, scalars.FLOAT),
 31 |     0x86: (jvmops.L2D, scalars.LONG, scalars.DOUBLE),
 32 |     0x87: (jvmops.F2I, scalars.FLOAT, scalars.INT),
 33 |     0x88: (jvmops.F2L, scalars.FLOAT, scalars.LONG),
 34 |     0x89: (jvmops.F2D, scalars.FLOAT, scalars.DOUBLE),
 35 |     0x8A: (jvmops.D2I, scalars.DOUBLE, scalars.INT),
 36 |     0x8B: (jvmops.D2L, scalars.DOUBLE, scalars.LONG),
 37 |     0x8C: (jvmops.D2F, scalars.DOUBLE, scalars.FLOAT),
 38 |     0x8D: (jvmops.I2B, scalars.INT, scalars.INT),
 39 |     0x8E: (jvmops.I2C, scalars.INT, scalars.INT),
 40 |     0x8F: (jvmops.I2S, scalars.INT, scalars.INT),
 41 | }
 42 | BINARY = {
 43 |     0x90: (jvmops.IADD, scalars.INT, scalars.INT),
 44 |     0x91: (jvmops.ISUB, scalars.INT, scalars.INT),
 45 |     0x92: (jvmops.IMUL, scalars.INT, scalars.INT),
 46 |     0x93: (jvmops.IDIV, scalars.INT, scalars.INT),
 47 |     0x94: (jvmops.IREM, scalars.INT, scalars.INT),
 48 |     0x95: (jvmops.IAND, scalars.INT, scalars.INT),
 49 |     0x96: (jvmops.IOR, scalars.INT, scalars.INT),
 50 |     0x97: (jvmops.IXOR, scalars.INT, scalars.INT),
 51 |     0x98: (jvmops.ISHL, scalars.INT, scalars.INT),
 52 |     0x99: (jvmops.ISHR, scalars.INT, scalars.INT),
 53 |     0x9A: (jvmops.IUSHR, scalars.INT, scalars.INT),
 54 |     0x9B: (jvmops.LADD, scalars.LONG, scalars.LONG),
 55 |     0x9C: (jvmops.LSUB, scalars.LONG, scalars.LONG),
 56 |     0x9D: (jvmops.LMUL, scalars.LONG, scalars.LONG),
 57 |     0x9E: (jvmops.LDIV, scalars.LONG, scalars.LONG),
 58 |     0x9F: (jvmops.LREM, scalars.LONG, scalars.LONG),
 59 |     0xA0: (jvmops.LAND, scalars.LONG, scalars.LONG),
 60 |     0xA1: (jvmops.LOR, scalars.LONG, scalars.LONG),
 61 |     0xA2: (jvmops.LXOR, scalars.LONG, scalars.LONG),
 62 |     0xA3: (jvmops.LSHL, scalars.LONG, scalars.INT),
 63 |     0xA4: (jvmops.LSHR, scalars.LONG, scalars.INT),
 64 |     0xA5: (jvmops.LUSHR, scalars.LONG, scalars.INT),
 65 |     0xA6: (jvmops.FADD, scalars.FLOAT, scalars.FLOAT),
 66 |     0xA7: (jvmops.FSUB, scalars.FLOAT, scalars.FLOAT),
 67 |     0xA8: (jvmops.FMUL, scalars.FLOAT, scalars.FLOAT),
 68 |     0xA9: (jvmops.FDIV, scalars.FLOAT, scalars.FLOAT),
 69 |     0xAA: (jvmops.FREM, scalars.FLOAT, scalars.FLOAT),
 70 |     0xAB: (jvmops.DADD, scalars.DOUBLE, scalars.DOUBLE),
 71 |     0xAC: (jvmops.DSUB, scalars.DOUBLE, scalars.DOUBLE),
 72 |     0xAD: (jvmops.DMUL, scalars.DOUBLE, scalars.DOUBLE),
 73 |     0xAE: (jvmops.DDIV, scalars.DOUBLE, scalars.DOUBLE),
 74 |     0xAF: (jvmops.DREM, scalars.DOUBLE, scalars.DOUBLE),
 75 |     0xB0: (jvmops.IADD, scalars.INT, scalars.INT),
 76 |     0xB1: (jvmops.ISUB, scalars.INT, scalars.INT),
 77 |     0xB2: (jvmops.IMUL, scalars.INT, scalars.INT),
 78 |     0xB3: (jvmops.IDIV, scalars.INT, scalars.INT),
 79 |     0xB4: (jvmops.IREM, scalars.INT, scalars.INT),
 80 |     0xB5: (jvmops.IAND, scalars.INT, scalars.INT),
 81 |     0xB6: (jvmops.IOR, scalars.INT, scalars.INT),
 82 |     0xB7: (jvmops.IXOR, scalars.INT, scalars.INT),
 83 |     0xB8: (jvmops.ISHL, scalars.INT, scalars.INT),
 84 |     0xB9: (jvmops.ISHR, scalars.INT, scalars.INT),
 85 |     0xBA: (jvmops.IUSHR, scalars.INT, scalars.INT),
 86 |     0xBB: (jvmops.LADD, scalars.LONG, scalars.LONG),
 87 |     0xBC: (jvmops.LSUB, scalars.LONG, scalars.LONG),
 88 |     0xBD: (jvmops.LMUL, scalars.LONG, scalars.LONG),
 89 |     0xBE: (jvmops.LDIV, scalars.LONG, scalars.LONG),
 90 |     0xBF: (jvmops.LREM, scalars.LONG, scalars.LONG),
 91 |     0xC0: (jvmops.LAND, scalars.LONG, scalars.LONG),
 92 |     0xC1: (jvmops.LOR, scalars.LONG, scalars.LONG),
 93 |     0xC2: (jvmops.LXOR, scalars.LONG, scalars.LONG),
 94 |     0xC3: (jvmops.LSHL, scalars.LONG, scalars.INT),
 95 |     0xC4: (jvmops.LSHR, scalars.LONG, scalars.INT),
 96 |     0xC5: (jvmops.LUSHR, scalars.LONG, scalars.INT),
 97 |     0xC6: (jvmops.FADD, scalars.FLOAT, scalars.FLOAT),
 98 |     0xC7: (jvmops.FSUB, scalars.FLOAT, scalars.FLOAT),
 99 |     0xC8: (jvmops.FMUL, scalars.FLOAT, scalars.FLOAT),
100 |     0xC9: (jvmops.FDIV, scalars.FLOAT, scalars.FLOAT),
101 |     0xCA: (jvmops.FREM, scalars.FLOAT, scalars.FLOAT),
102 |     0xCB: (jvmops.DADD, scalars.DOUBLE, scalars.DOUBLE),
103 |     0xCC: (jvmops.DSUB, scalars.DOUBLE, scalars.DOUBLE),
104 |     0xCD: (jvmops.DMUL, scalars.DOUBLE, scalars.DOUBLE),
105 |     0xCE: (jvmops.DDIV, scalars.DOUBLE, scalars.DOUBLE),
106 |     0xCF: (jvmops.DREM, scalars.DOUBLE, scalars.DOUBLE),
107 | }
108 | BINARY_LIT = {
109 |     0xD0: jvmops.IADD,
110 |     0xD1: jvmops.ISUB,
111 |     0xD2: jvmops.IMUL,
112 |     0xD3: jvmops.IDIV,
113 |     0xD4: jvmops.IREM,
114 |     0xD5: jvmops.IAND,
115 |     0xD6: jvmops.IOR,
116 |     0xD7: jvmops.IXOR,
117 |     0xD8: jvmops.IADD,
118 |     0xD9: jvmops.ISUB,
119 |     0xDA: jvmops.IMUL,
120 |     0xDB: jvmops.IDIV,
121 |     0xDC: jvmops.IREM,
122 |     0xDD: jvmops.IAND,
123 |     0xDE: jvmops.IOR,
124 |     0xDF: jvmops.IXOR,
125 |     0xE0: jvmops.ISHL,
126 |     0xE1: jvmops.ISHR,
127 |     0xE2: jvmops.IUSHR,
128 | }
129 | 


--------------------------------------------------------------------------------
/enjarify/jvm/optimization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/enjarify/jvm/optimization/consts.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import collections
16 | 
17 | from .. import scalartypes as scalars
18 | from .. import ir
19 | 
20 | def allocateRequiredConstants(pool, long_irs):
21 |     # see comments in writebytecode.finishCodeAttrs
22 |     # We allocate the constants pretty much greedily. This is far from optimal,
23 |     # but it shouldn't be a big deal since this code is almost never required
24 |     # in the first place. In fact, there are no known real world classes that
25 |     # even come close to exhausting the constant pool.
26 |     narrow_pairs = collections.Counter()
27 |     wide_pairs = collections.Counter()
28 |     alt_lens = {}
29 |     for _ir in long_irs:
30 |         for ins in _ir.flat_instructions:
31 |             if isinstance(ins, ir.PrimConstant):
32 |                 key = ins.cpool_key()
33 |                 alt_lens[key] = len(ins.bytecode)
34 |                 if scalars.iswide(ins.st):
35 |                     if len(ins.bytecode) > 3:
36 |                         wide_pairs[key] += 1
37 |                 else:
38 |                     if len(ins.bytecode) > 2:
39 |                         narrow_pairs[key] += 1
40 | 
41 |     # see if already in the constant pool
42 |     for x in pool.vals:
43 |         del narrow_pairs[x]
44 |         del wide_pairs[x]
45 | 
46 |     # if we have enough space for all required constants, preferentially allocate
47 |     # most commonly used constants to first 255 slots
48 |     if pool.space() >= len(narrow_pairs) + 2*len(wide_pairs) and pool.lowspace() > 0:
49 |         # We can't use Counter.most_common here because it is nondeterminstic in
50 |         # the case of ties.
51 |         most_common = sorted(narrow_pairs, key=lambda p:(-narrow_pairs[p], p))
52 |         for key in most_common[:pool.lowspace()]:
53 |             pool.insertDirectly(key, True)
54 |             del narrow_pairs[key]
55 | 
56 |     scores = {}
57 |     for p, count in narrow_pairs.items():
58 |         scores[p] = (alt_lens[p] - 3) * count
59 |     for p, count in wide_pairs.items():
60 |         scores[p] = (alt_lens[p] - 3) * count
61 | 
62 |     # sort by score
63 |     narrowq = sorted(narrow_pairs, key=lambda p:(scores[p], p))
64 |     wideq = sorted(wide_pairs, key=lambda p:(scores[p], p))
65 |     while pool.space() >= 1 and (narrowq or wideq):
66 |         if not narrowq and pool.space() < 2:
67 |             break
68 | 
69 |         wscore = sum(scores[p] for p in wideq[-1:])
70 |         nscore = sum(scores[p] for p in narrowq[-2:])
71 |         if pool.space() >= 2 and wscore > nscore and wscore > 0:
72 |             pool.insertDirectly(wideq.pop(), False)
73 |         elif nscore > 0:
74 |             pool.insertDirectly(narrowq.pop(), True)
75 |         else:
76 |             break
77 | 


--------------------------------------------------------------------------------
/enjarify/jvm/optimization/jumps.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import struct
 16 | 
 17 | from .. import ir, error
 18 | from ..jvmops import *
 19 | from . import options
 20 | 
 21 | def _calcMinimumPositions(instrs):
 22 |     posd = {}
 23 |     pos = 0
 24 |     for ins in instrs:
 25 |         posd[ins] = pos
 26 |         if isinstance(ins, ir.LazyJumpBase):
 27 |             pos += ins.min
 28 |         elif isinstance(ins, ir.Switch):
 29 |             pad = (-pos-1) % 4
 30 |             pos += pad + ins.nopad_size
 31 |         else:
 32 |             pos += len(ins.bytecode)
 33 |     return posd, pos
 34 | 
 35 | def optimizeJumps(irdata):
 36 |     # For jump offsets of more than +-32767, a longer form of the jump instruction
 37 |     # is required. This function finds the optimal jump widths by optimistically
 38 |     # starting with everything narrow and then iteratively marking instructions
 39 |     # as wide if their offset is too large (in rare cases, this can in turn cause
 40 |     # other jumps to become wide, hence iterating until convergence)
 41 |     instrs = irdata.flat_instructions
 42 |     jump_instrs = [ins for ins in instrs if isinstance(ins, ir.LazyJumpBase)]
 43 | 
 44 |     while 1:
 45 |         done = True
 46 |         posd, _ = _calcMinimumPositions(instrs)
 47 | 
 48 |         for ins in jump_instrs:
 49 |             if ins.min < ins.max and ins.widenIfNecessary(irdata.labels, posd):
 50 |                 done = False
 51 |         if done:
 52 |             break
 53 | 
 54 |     for ins in jump_instrs:
 55 |         assert ins.min <= ins.max
 56 |         ins.max = ins.min
 57 | 
 58 | def createBytecode(irdata, opts):
 59 |     instrs = irdata.flat_instructions
 60 |     posd, end_pos = _calcMinimumPositions(instrs)
 61 | 
 62 |     bytecode = bytearray()
 63 |     for ins in instrs:
 64 |         if isinstance(ins, (ir.LazyJumpBase, ir.Switch)):
 65 |             ins.calcBytecode(posd, irdata.labels)
 66 |         bytecode += ins.bytecode
 67 |     assert len(bytecode) == end_pos
 68 | 
 69 | 
 70 |     if len(bytecode) > 65535:
 71 |         # If code is too long and optimization is off, raise exception so we can
 72 |         # retry with optimization. If it is still too long with optimization,
 73 |         # don't raise an error, since a class with illegally long code is better
 74 |         # than no output at all.
 75 |         if opts is not options.ALL:
 76 |             raise error.ClassfileLimitExceeded()
 77 | 
 78 | 
 79 |     prev_instr_map = dict(zip(instrs[1:], instrs))
 80 |     packed_excepts = []
 81 |     for s, e, h, c in irdata.excepts:
 82 |         # There appears to be a bug in the JVM where in rare cases, it throws
 83 |         # the exception at the address of the instruction _before_ the instruction
 84 |         # that actually caused the exception, triggering the wrong handler
 85 |         # therefore we include the previous (IR) instruction too
 86 |         # Note that this cannot cause an overlap because in that case the previous
 87 |         # instruction would just be a label and hence not change anything
 88 |         s = prev_instr_map.get(s, s)
 89 | 
 90 |         s_off = posd[s]
 91 |         e_off = posd[e]
 92 |         h_off = posd[h]
 93 |         assert s_off <= e_off
 94 |         if s_off < e_off:
 95 |             packed_excepts.append(struct.pack('>HHHH', s_off, e_off, h_off, c))
 96 |         else:
 97 |             print('Skipping zero width exception!')
 98 |             assert 0
 99 | 
100 |     return bytes(bytecode), packed_excepts
101 | 


--------------------------------------------------------------------------------
/enjarify/jvm/optimization/options.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | class Options:
16 |     def __init__(self, inline_consts=False, prune_store_loads=False,
17 |         copy_propagation=False, remove_unused_regs=False, dup2ize=False,
18 |         sort_registers=False, split_pool=False, delay_consts=False):
19 |         self.inline_consts = inline_consts
20 |         self.prune_store_loads = prune_store_loads
21 |         self.copy_propagation = copy_propagation
22 |         self.remove_unused_regs = remove_unused_regs
23 |         self.dup2ize = dup2ize
24 |         self.sort_registers = sort_registers
25 |         self.split_pool = split_pool
26 |         self.delay_consts = delay_consts
27 | 
28 | NONE = Options()
29 | # Options which make the generated code more readable for humans
30 | PRETTY = Options(inline_consts=True, prune_store_loads=True, copy_propagation=True, remove_unused_regs=True)
31 | ALL = Options(inline_consts=True, prune_store_loads=True, copy_propagation=True, remove_unused_regs=True, dup2ize=True,
32 |         sort_registers=True, split_pool=True, delay_consts=True)
33 | 


--------------------------------------------------------------------------------
/enjarify/jvm/optimization/registers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import collections
 16 | 
 17 | from .. import ir
 18 | from .. import scalartypes as scalars
 19 | from ..jvmops import *
 20 | 
 21 | # Copy propagation - when one register is moved to another, keep track and replace
 22 | # all loads with loads from the original register (as long as it hasn't since been
 23 | # overwritten). Note that stores won't be removed, since they may still be needed
 24 | # in some cases, but if they are unused, they'll be removed in a subsequent pass
 25 | # As usual, assume no iincs
 26 | 
 27 | # A set of registers that currently are copies of each other.
 28 | class _CopySet:
 29 |     def __init__(self, key):
 30 |         self.root = key
 31 |         self.set = {key}
 32 |         self.q = [] # keep track of insertion order in case root is overwritten
 33 | 
 34 |     def add(self, key):
 35 |         assert self.set
 36 |         self.set.add(key)
 37 |         self.q.append(key)
 38 | 
 39 |     def remove(self, key):
 40 |         self.set.remove(key)
 41 |         # Heuristic - use oldest element still in set as new root
 42 |         while self.q and self.root not in self.set:
 43 |             self.root = self.q.pop(0)
 44 | 
 45 |     def copy(self):
 46 |         new = _CopySet(self.root)
 47 |         new.set = self.set.copy()
 48 |         new.q = self.q[:]
 49 |         return new
 50 | 
 51 | # Map registers to CopySets
 52 | class _CopySetsMap:
 53 |     def __init__(self):
 54 |         self.lookup = {}
 55 | 
 56 |     def _get(self, key): return self.lookup.setdefault(key, _CopySet(key))
 57 | 
 58 |     def clobber(self, key):
 59 |         self._get(key).remove(key)
 60 |         del self.lookup[key]
 61 | 
 62 |     def move(self, dest, src):
 63 |         # return false if the corresponding instructions should be removed
 64 |         s_set = self._get(src)
 65 |         d_set = self._get(dest)
 66 |         if s_set is d_set:
 67 |             # src and dest are copies of same value, so we can remove
 68 |             return False
 69 |         d_set.remove(dest)
 70 |         s_set.add(dest)
 71 |         self.lookup[dest] = s_set
 72 |         return True
 73 | 
 74 |     def load(self, key):
 75 |         return self._get(key).root
 76 | 
 77 |     def copy(self):
 78 |         copies = {}
 79 |         new = _CopySetsMap()
 80 |         for k, v in self.lookup.items():
 81 |             if v not in copies:
 82 |                 copies[v] = v.copy()
 83 |             new.lookup[k] = copies[v]
 84 |         return new
 85 | 
 86 | def copyPropagation(irdata):
 87 |     instrs = irdata.flat_instructions
 88 |     replace = {}
 89 | 
 90 |     single_pred_infos = {}
 91 | 
 92 |     prev = None
 93 |     current = _CopySetsMap()
 94 |     for instr in instrs:
 95 |         # reset all info when control flow is merged
 96 |         if instr in irdata.jump_targets:
 97 |             # try to use info if this was a single predecessor forward jump
 98 |             if prev and not prev.fallsthrough() and irdata.target_pred_counts.get(instr) == 1:
 99 |                 current = single_pred_infos.get(instr, _CopySetsMap())
100 |             else:
101 |                 current = _CopySetsMap()
102 | 
103 |         elif isinstance(instr, ir.RegAccess):
104 |             key = instr.key
105 |             if instr.store:
106 |                 # check if previous instr was a load
107 |                 if isinstance(prev, ir.RegAccess) and not prev.store:
108 |                     if not current.move(dest=key, src=prev.key):
109 |                         replace[prev] = []
110 |                         replace[instr] = []
111 |                 else:
112 |                     current.clobber(key)
113 |             else:
114 |                 root_key = current.load(key)
115 |                 if key != root_key:
116 |                     assert instr not in replace
117 |                     # replace with load from root register instead
118 |                     replace[instr] = [ir.RegAccess(root_key[0], root_key[1], False)]
119 | 
120 |         else:
121 |             for target in instr.targets():
122 |                 label = irdata.labels[target]
123 |                 if irdata.target_pred_counts.get(label) == 1:
124 |                     single_pred_infos[label] = current.copy()
125 | 
126 |         prev = instr
127 |     irdata.replaceInstrs(replace)
128 | 
129 | def _isRemoveable(instr):
130 |     # can remove if load or const since we know there are no side effects
131 |     # note - instr may be None
132 |     if isinstance(instr, ir.RegAccess) and not instr.store:
133 |         return True
134 |     return isinstance(instr, (ir.PrimConstant, ir.OtherConstant))
135 | 
136 | def removeUnusedRegisters(irdata):
137 |     # Remove stores to registers that are not read from anywhere in the method
138 |     instrs = irdata.flat_instructions
139 |     used = set()
140 |     for instr in instrs:
141 |         if isinstance(instr, ir.RegAccess) and not instr.store:
142 |             used.add(instr.key)
143 | 
144 |     replace = {}
145 |     prev = None
146 |     for instr in instrs:
147 |         if isinstance(instr, ir.RegAccess) and instr.key not in used:
148 |             assert instr.store
149 |             # if prev instruction is load or const, just remove it and the store
150 |             # otherwise, replace the store with a pop
151 |             if _isRemoveable(prev):
152 |                 replace[prev] = []
153 |                 replace[instr] = []
154 |             else:
155 |                 replace[instr] = [ir.Pop2() if instr.wide else ir.Pop()]
156 |         prev = instr
157 |     irdata.replaceInstrs(replace)
158 | 
159 | # Allocate registers to JVM registers on a first come, first serve basis
160 | # For simplicity, parameter registers are preserved as is
161 | def simpleAllocateRegisters(irdata):
162 |     instrs = irdata.flat_instructions
163 |     regmap = {v:i for i,v in enumerate(irdata.initial_args)}
164 |     nextreg = len(irdata.initial_args)
165 | 
166 |     for instr in instrs:
167 |         if isinstance(instr, ir.RegAccess):
168 |             if instr.key not in regmap:
169 |                 regmap[instr.key] = nextreg
170 |                 nextreg += 1
171 |                 if instr.wide:
172 |                     nextreg += 1
173 |             instr.calcBytecode(regmap[instr.key])
174 |     irdata.numregs = nextreg
175 | 
176 | # Sort registers by number of uses so that more frequently used registers will
177 | # end up in slots 0-3 or 4-255 and benefit from the shorter instruction forms
178 | # For simplicity, parameter registers are still preserved as is with one exception
179 | def sortAllocateRegisters(irdata):
180 |     instrs = irdata.flat_instructions
181 | 
182 |     use_counts = collections.Counter()
183 |     for instr in instrs:
184 |         if isinstance(instr, ir.RegAccess):
185 |             use_counts[instr.key] += 1
186 | 
187 |     regs = irdata.initial_args[:]
188 |     rest = sorted(use_counts, key=lambda k:(-use_counts[k], k))
189 |     for key in rest:
190 |         # If key is a param, it was already added at the beginning
191 |         if key not in irdata.initial_args:
192 |             regs.append(key)
193 |             if scalars.iswide(key[1]):
194 |                 regs.append(None)
195 | 
196 |     # Sometimes the non-param regsisters are used more times than the param registers
197 |     # and it is beneificial to swap them (which requires inserting code at the
198 |     # beginning of the method to move the value if the param is not unused)
199 |     # This is very complicated to do in general, so the following code only does
200 |     # this in one specific circumstance which should nevertheless be sufficient
201 |     # to capture the majority of the benefit
202 |     # Specificially, it only swaps at most one register, and only in the case that
203 |     # it is nonwide and there is a nonwide parameter in the first 4 slots that
204 |     # it can be swapped with. Also, it doesn't bother to check if param is unused.
205 |     candidate_i = max(4, len(irdata.initial_args))
206 |     # make sure candidate is valid, nonwide register
207 |     if len(regs) > candidate_i and regs[candidate_i] is not None:
208 |         candidate = regs[candidate_i]
209 |         if not scalars.iswide(candidate[1]) and use_counts[candidate] >= 3:
210 |             for i in range(min(4, len(irdata.initial_args))):
211 |                 # make sure target is not wide
212 |                 if regs[i] is None or regs[i+1] is None:
213 |                     continue
214 | 
215 |                 target = regs[i]
216 |                 if use_counts[candidate] > use_counts[target] + 3:
217 |                     # swap register assignments
218 |                     regs[i], regs[candidate_i] = candidate, target
219 |                     # add move instructions at beginning of method
220 |                     load = ir.RegAccess.raw(i, target[1], False)
221 |                     store = ir.RegAccess(target[0], target[1], True)
222 |                     instrs = [load, store] + instrs
223 |                     irdata.flat_instructions = instrs
224 |                     break
225 | 
226 |     # Now generate bytecode from the selected register allocations
227 |     irdata.numregs = len(regs)
228 |     regmap = {v:i for i,v in enumerate(regs) if v is not None}
229 |     for instr in instrs:
230 |         if instr.bytecode is None and isinstance(instr, ir.RegAccess):
231 |             instr.calcBytecode(regmap[instr.key])
232 | 


--------------------------------------------------------------------------------
/enjarify/jvm/optimization/stack.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from .. import ir
 16 | from ..jvmops import *
 17 | 
 18 | def visitLinearCode(irdata, visitor):
 19 |     # Visit linear sections of code, pessimistically treating all exception
 20 |     # handler ranges as jumps.
 21 |     except_level = 0
 22 |     for instr in irdata.flat_instructions:
 23 |         if instr in irdata.except_starts:
 24 |             except_level += 1
 25 |             visitor.visitExceptionRange()
 26 |         elif instr in irdata.except_ends:
 27 |             except_level -= 1
 28 | 
 29 |         if except_level > 0:
 30 |             continue
 31 | 
 32 |         if instr in irdata.jump_targets or isinstance(instr, (ir.LazyJumpBase, ir.Switch)):
 33 |             visitor.visitJumpTargetOrBranch(instr)
 34 |         elif not instr.fallsthrough():
 35 |             visitor.visitReturn()
 36 |         else:
 37 |             visitor.visit(instr)
 38 |     assert except_level == 0
 39 |     return visitor
 40 | 
 41 | class NoExceptVisitorBase:
 42 |     def visitExceptionRange(self): self.reset()
 43 |     def visitJumpTargetOrBranch(self, instr): self.reset()
 44 | 
 45 | class ConstInliner(NoExceptVisitorBase):
 46 |     def __init__(self):
 47 |         self.uses = {}
 48 |         self.notmultiused = set()
 49 |         self.current = {}
 50 | 
 51 |     def reset(self):
 52 |         self.current = {}
 53 | 
 54 |     def visitReturn(self):
 55 |         for key in self.current:
 56 |             self.notmultiused.add(self.current[key])
 57 |         self.reset()
 58 | 
 59 |     def visit(self, instr):
 60 |         if isinstance(instr, ir.RegAccess):
 61 |             key = instr.key
 62 |             if instr.store:
 63 |                 if key in self.current:
 64 |                     self.notmultiused.add(self.current[key])
 65 |                 self.current[key] = instr
 66 |             elif key in self.current:
 67 |                 # if currently used 0, mark it used once
 68 |                 # if used once already, mark it as multiused
 69 |                 if self.current[key] in self.uses:
 70 |                     del self.current[key]
 71 |                 else:
 72 |                     self.uses[self.current[key]] = instr
 73 | 
 74 | def inlineConsts(irdata):
 75 |     # Inline constants which are only used once or not at all. This only covers
 76 |     # linear sections of code and pessimistically assumes everything is used
 77 |     # when it reaches a jump or exception range. Essentially, this means that
 78 |     # the value can only be considered unused if it is either overwritten by a
 79 |     # store or reaches a return or throw before any jumps.
 80 |     # As usual, assume no iinc.
 81 |     instrs = irdata.flat_instructions
 82 |     visitor = visitLinearCode(irdata, ConstInliner())
 83 | 
 84 |     replace = {}
 85 |     for ins1, ins2 in zip(instrs, instrs[1:]):
 86 |         if ins2 in visitor.notmultiused and isinstance(ins1, (ir.PrimConstant, ir.OtherConstant)):
 87 |             replace[ins1] = []
 88 |             replace[ins2] = []
 89 |             if ins2 in visitor.uses:
 90 |                 replace[visitor.uses[ins2]] = [ins1]
 91 |     irdata.replaceInstrs(replace)
 92 | 
 93 | class StoreLoadPruner(NoExceptVisitorBase):
 94 |     def __init__(self):
 95 |         self.current = {}
 96 |         self.last = None
 97 |         self.removed = set()
 98 | 
 99 |     def reset(self):
100 |         self.current = {}
101 |         self.last = None
102 | 
103 |     def visitReturn(self):
104 |         for pair in self.current.values():
105 |             assert pair[0].store and not pair[1].store
106 |             self.removed.update(pair)
107 |         self.reset()
108 | 
109 |     def visit(self, instr):
110 |         if isinstance(instr, ir.RegAccess):
111 |             key = instr.key
112 |             if instr.store:
113 |                 if key in self.current:
114 |                     pair = self.current[key]
115 |                     assert pair[0].store and not pair[1].store
116 |                     self.removed.update(self.current.pop(key))
117 |                 self.last = instr
118 |             else:
119 |                 self.current.pop(key, None)
120 |                 if self.last and self.last.key == key:
121 |                     self.current[key] = self.last, instr
122 |                 self.last = None
123 |         elif not isinstance(instr, ir.Label):
124 |             self.last = None
125 | 
126 | def pruneStoreLoads(irdata):
127 |     # Remove a store immediately followed by a load from the same register
128 |     # (potentially with a label in between) if it can be proven that this
129 |     # register isn't read again. As above, this only considers linear sections of code.
130 |     # Must not be run before dup2ize!
131 |     data = visitLinearCode(irdata, StoreLoadPruner())
132 |     irdata.replaceInstrs({instr:[] for instr in data.removed})
133 | 
134 | # used by writeir too
135 | def genDups(needed, needed_after):
136 |     # Generate a sequence of dup and dup2 instructions to duplicate the given
137 |     # value. This keeps up to 4 copies of the value on the stack. Thanks to dup2
138 |     # this asymptotically takes only half a byte per access.
139 |     have = 1
140 |     ele_count = needed
141 |     needed += needed_after
142 | 
143 |     for _ in range(ele_count):
144 |         cur = []
145 |         if have < needed:
146 |             if have == 1 and needed >= 2:
147 |                 cur.append(ir.Dup())
148 |                 have += 1
149 |             if have == 2 and needed >= 4:
150 |                 cur.append(ir.Dup2())
151 |                 have += 2
152 |         have -= 1
153 |         needed -= 1
154 |         yield cur
155 |     assert have >= needed
156 |     # check if we have to pop at end
157 |     yield [ir.Pop() for _ in range(have-needed)]
158 | 
159 | # Range of instruction indexes at which a given register is read (in linear code)
160 | class UseRange:
161 |     def __init__(self, uses):
162 |         self.uses = uses
163 | 
164 |     def add(self, i):
165 |         self.uses.append(i)
166 | 
167 |     @property
168 |     def start(self): return self.uses[0]
169 |     @property
170 |     def end(self): return self.uses[-1]
171 | 
172 |     def subtract(self, other):
173 |         s, e = other.start, other.end
174 |         left = [i for i in self.uses if i < s]
175 |         right = [i for i in self.uses if i > e]
176 |         if len(left) >= 2:
177 |             yield UseRange(left)
178 |         if len(right) >= 2:
179 |             yield UseRange(right)
180 | 
181 |     def sortkey(self): return len(self.uses), self.uses[0]
182 | 
183 | def makeRange(instr):
184 |     assert isinstance(instr, ir.RegAccess) and not instr.store
185 |     return UseRange([])
186 | 
187 | def dup2ize(irdata):
188 |     # This optimization replaces narrow registers which are frequently read at
189 |     # stack height 0 with a single read followed by the more efficient dup and
190 |     # dup2 instructions. This asymptotically uses only half a byte per access.
191 |     # For simplicity, instead of explicitly keeping track of which locations
192 |     # have stack height 0, we take advantage of the invariant that ranges of code
193 |     # corresponding to a single Dalvik instruction always begin with empty stack.
194 |     # These can be recognized by labels with a non-None id.
195 |     # This isn't true for move-result instructions, but in that case the range
196 |     # won't begin with a register load so it doesn't matter.
197 |     # Note that pruneStoreLoads breaks this invariant, so dup2ize must be run first.
198 |     # Also, for simplicity, we only keep at most one such value on the stack at
199 |     # a time (duplicated up to 4 times).
200 |     instrs = irdata.flat_instructions
201 | 
202 |     ranges = []
203 |     current = {}
204 |     at_head = False
205 |     for i, instr in enumerate(instrs):
206 |         # if not linear section of bytecode, reset everything. Exceptions are ok
207 |         # since they clear the stack, but jumps obviously aren't.
208 |         if instr in irdata.jump_targets or isinstance(instr, (ir.If, ir.Switch)):
209 |             ranges.extend(current.values())
210 |             current = {}
211 | 
212 |         if isinstance(instr, ir.RegAccess):
213 |             key = instr.key
214 |             if not instr.wide:
215 |                 if instr.store:
216 |                     if key in current:
217 |                         ranges.append(current.pop(key))
218 |                 elif at_head:
219 |                     current.setdefault(key, makeRange(instr)).add(i)
220 | 
221 |         at_head = isinstance(instr, ir.Label) and instr.id is not None
222 |     ranges.extend(current.values())
223 |     ranges = [ur for ur in ranges if len(ur.uses) >= 2]
224 |     ranges.sort(key=UseRange.sortkey)
225 | 
226 |     # Greedily choose a set of disjoint ranges to dup2ize.
227 |     chosen = []
228 |     while ranges:
229 |         best = ranges.pop()
230 |         chosen.append(best)
231 |         newranges = []
232 |         for ur in ranges:
233 |             newranges.extend(ur.subtract(best))
234 |         ranges = sorted(newranges, key=UseRange.sortkey)
235 | 
236 |     replace = {}
237 |     for ur in chosen:
238 |         gen = genDups(len(ur.uses), 0)
239 |         for pos in ur.uses:
240 |             ops = next(gen)
241 |             # remember to include initial load!
242 |             if pos == ur.start:
243 |                 ops = [instrs[pos]] + ops
244 |             replace[instrs[pos]] = ops
245 |     irdata.replaceInstrs(replace)
246 | 


--------------------------------------------------------------------------------
/enjarify/jvm/scalartypes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Primative type inference
16 | # In dalvik bytecode, constants are untyped, which effectively means a union type
17 | # They can be zero (int/float/null), narrow (int/float) or wide (long/double)
18 | 
19 | INVALID = 0
20 | INT = 1 << 0
21 | FLOAT = 1 << 1
22 | OBJ = 1 << 2
23 | LONG = 1 << 3
24 | DOUBLE = 1 << 4
25 | 
26 | ZERO = INT | FLOAT | OBJ
27 | C32 = INT | FLOAT
28 | C64 = LONG | DOUBLE
29 | ALL = ZERO | C64
30 | 
31 | _descToScalar = dict(zip(map(ord, 'ZBCSIFJDL['), [INT, INT, INT, INT, INT, FLOAT, LONG, DOUBLE, OBJ, OBJ]))
32 | def fromDesc(desc):
33 |     return _descToScalar[desc[0]]
34 | 
35 | def iswide(st):
36 |     return st & C64
37 | 
38 | def paramTypes(method_id, static):
39 |     temp = method_id.getSpacedParamTypes(static)
40 |     return [(INVALID if desc is None else fromDesc(desc)) for desc in temp]
41 | 


--------------------------------------------------------------------------------
/enjarify/jvm/writebytecode.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from ..byteio import Writer
 16 | from . import writeir, ir
 17 | from .optimization import registers, jumps, stack, consts
 18 | 
 19 | def getCodeIR(pool, method, opts):
 20 |     if method.code is not None:
 21 |         irdata = writeir.writeBytecode(pool, method, opts)
 22 | 
 23 |         if opts.inline_consts:
 24 |             stack.inlineConsts(irdata)
 25 | 
 26 |         if opts.copy_propagation:
 27 |             registers.copyPropagation(irdata)
 28 | 
 29 |         if opts.remove_unused_regs:
 30 |             registers.removeUnusedRegisters(irdata)
 31 | 
 32 |         if opts.dup2ize:
 33 |             stack.dup2ize(irdata)
 34 | 
 35 |         if opts.prune_store_loads:
 36 |             stack.pruneStoreLoads(irdata)
 37 |             if opts.remove_unused_regs:
 38 |                 registers.removeUnusedRegisters(irdata)
 39 | 
 40 |         if opts.sort_registers:
 41 |             registers.sortAllocateRegisters(irdata)
 42 |         else:
 43 |             registers.simpleAllocateRegisters(irdata)
 44 |         return irdata
 45 |     return None
 46 | 
 47 | def finishCodeAttrs(pool, code_irs, opts):
 48 |     code_irs = [x for x in code_irs if x is not None]
 49 |     # if we have any code, make sure to reserve pool slot for attr name
 50 |     if code_irs:
 51 |         pool.utf8(b"Code")
 52 | 
 53 |     if opts.delay_consts:
 54 |         # In the rare case where the class references too many constants to fit in
 55 |         # the constant pool, we can workaround this by replacing primative constants
 56 |         # e.g. ints, longs, floats, and doubles, with a sequence of bytecode instructions
 57 |         # to generate that constant. This obviously increases the size of the method's
 58 |         # bytecode, so we ideally only want to do it to constants in short methods.
 59 | 
 60 |         # First off, we find which methods are potentially too long. If a method
 61 |         # will be under 65536 bytes even with all constants replaced, then it
 62 |         # will be ok no matter what we do.
 63 |         long_irs = [irw for irw in code_irs if irw.calcUpperBound() >= 65536]
 64 | 
 65 |         # Now allocate constants used by potentially long methods
 66 |         if long_irs:
 67 |             consts.allocateRequiredConstants(pool, long_irs)
 68 | 
 69 |         # If there's space left in the constant pool, allocate constants used by short methods
 70 |         for _ir in code_irs:
 71 |             for ins in _ir.flat_instructions:
 72 |                 if isinstance(ins, ir.PrimConstant):
 73 |                     ins.fix_with_pool(pool)
 74 | 
 75 |     return {irdata.method: writeCodeAttributeTail(pool, irdata, opts=opts) for irdata in code_irs}
 76 | 
 77 | def writeCodeAttributeTail(pool, irdata, opts):
 78 |     method = irdata.method
 79 |     jumps.optimizeJumps(irdata)
 80 |     bytecode, excepts = jumps.createBytecode(irdata, opts)
 81 | 
 82 |     stream = Writer()
 83 |     # For simplicity, don't bother calculating the actual maximum stack height
 84 |     # of the generated code. Instead, just use a value that will always be high
 85 |     # enough. Note that just setting this to 65535 is a bad idea since it tends
 86 |     # to cause StackOverflowErrors under default JVM memory settings
 87 |     stream.u16(300) # stack
 88 |     stream.u16(irdata.numregs) # locals
 89 | 
 90 |     stream.u32(len(bytecode))
 91 |     stream.write(bytecode)
 92 | 
 93 |     # exceptions
 94 |     stream.u16(len(excepts))
 95 |     stream.write(b''.join(excepts))
 96 | 
 97 |     # attributes
 98 |     stream.u16(0)
 99 |     return stream
100 | 


--------------------------------------------------------------------------------
/enjarify/jvm/writeclass.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from .. import flags
 16 | from ..byteio import Writer
 17 | from . import constantpool, writebytecode, error
 18 | from .optimization import options
 19 | 
 20 | def writeField(pool, stream, field):
 21 |     stream.u16(field.access & flags.FIELD_FLAGS)
 22 |     stream.u16(pool.utf8(field.id.name))
 23 |     stream.u16(pool.utf8(field.id.desc))
 24 |     if field.constant_value is not None:
 25 |         stream.u16(1)
 26 |         stream.u16(pool.utf8(b"ConstantValue"))
 27 |         stream.u32(2)
 28 | 
 29 |         ctype, val = field.constant_value
 30 |         # Ignore dalvik constant type and use actual field type instead
 31 |         index = {
 32 |             b'Z': pool.int,
 33 |             b'B': pool.int,
 34 |             b'S': pool.int,
 35 |             b'C': pool.int,
 36 |             b'I': pool.int,
 37 |             b'F': pool.float,
 38 |             b'J': pool.long,
 39 |             b'D': pool.double,
 40 |             b'Ljava/lang/String;': pool.string,
 41 |             b'Ljava/lang/Class;': pool.class_,
 42 |         }[field.id.desc](val)
 43 |         stream.u16(index)
 44 |     else:
 45 |         stream.u16(0) # no attributes
 46 | 
 47 | def writeMethod(pool, stream, method, code_attr_data):
 48 |     stream.u16(method.access & flags.METHOD_FLAGS)
 49 |     stream.u16(pool.utf8(method.id.name))
 50 |     stream.u16(pool.utf8(method.id.desc))
 51 | 
 52 |     if code_attr_data is not None:
 53 |         code_attr_data = code_attr_data.toBytes()
 54 |         stream.u16(1)
 55 |         stream.u16(pool.utf8(b"Code"))
 56 |         stream.u32(len(code_attr_data))
 57 |         stream.write(code_attr_data)
 58 |     else:
 59 |         stream.u16(0) # no attributes
 60 | 
 61 | def writeMethods(pool, stream, methods, opts):
 62 |     code_irs = []
 63 |     for method in methods:
 64 |         code_irs.append(writebytecode.getCodeIR(pool, method, opts=opts))
 65 |     code_attrs = writebytecode.finishCodeAttrs(pool, code_irs, opts=opts)
 66 | 
 67 |     stream.u16(len(methods))
 68 |     for method in methods:
 69 |         writeMethod(pool, stream, method, code_attrs.get(method))
 70 | 
 71 | def classFileAfterPool(cls, opts):
 72 |     stream = Writer()
 73 |     if opts.split_pool:
 74 |         pool = constantpool.SplitConstantPool()
 75 |     else:
 76 |         pool = constantpool.SimpleConstantPool()
 77 | 
 78 |     cls.parseData()
 79 |     access = cls.access & flags.CLASS_FLAGS
 80 |     if not access & flags.ACC_INTERFACE:
 81 |         # Not necessary for correctness, but this works around a bug in dx
 82 |         access |= flags.ACC_SUPER
 83 | 
 84 |     stream.u16(access) # access
 85 |     stream.u16(pool.class_(cls.name)) # this
 86 |     super_ = pool.class_(cls.super) if cls.super is not None else 0
 87 |     stream.u16(super_) # super
 88 | 
 89 |     # interfaces
 90 |     stream.u16(len(cls.interfaces))
 91 |     for interface in cls.interfaces:
 92 |         stream.u16(pool.class_(interface))
 93 | 
 94 |     # fields
 95 |     stream.u16(len(cls.data.fields))
 96 |     for field in cls.data.fields:
 97 |         writeField(pool, stream, field)
 98 | 
 99 |     # methods
100 |     writeMethods(pool, stream, cls.data.methods, opts=opts)
101 | 
102 |     # attributes
103 |     stream.u16(0)
104 |     return pool, stream
105 | 
106 | def toClassFile(cls, opts):
107 |     stream = Writer()
108 |     stream.u32(0xCAFEBABE)
109 |     # bytecode version 49.0
110 |     stream.u16(0)
111 |     stream.u16(49)
112 | 
113 |     # Optimistically try translating without optimization to speed things up
114 |     # if the resulting code is too big, retry with optimization
115 |     try:
116 |         pool, rest_stream = classFileAfterPool(cls, opts=opts)
117 |     except error.ClassfileLimitExceeded:
118 |         # print('Retrying {} with optimization enabled'.format(cls.name))
119 |         pool, rest_stream = classFileAfterPool(cls, opts=options.ALL)
120 | 
121 |     # write constant pool
122 |     pool.write(stream)
123 |     # write rest of file
124 |     stream.write(rest_stream.toBytes())
125 |     return stream.toBytes()
126 | 


--------------------------------------------------------------------------------
/enjarify/jvm/writeir.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import collections, struct
 16 | from functools import partial
 17 | 
 18 | from . import ir
 19 | from .. import flags, dalvik
 20 | from .jvmops import *
 21 | from . import arraytypes as arrays
 22 | from . import scalartypes as scalars
 23 | from . import mathops
 24 | from .optimization import stack
 25 | from .. import util
 26 | from ..typeinference import typeinference
 27 | 
 28 | # Code for converting dalvik bytecode to intermediate representation
 29 | # effectively this is just Java bytecode instructions with some abstractions for
 30 | # later optimization
 31 | 
 32 | _ilfdaOrd = [scalars.INT, scalars.LONG, scalars.FLOAT, scalars.DOUBLE, scalars.OBJ].index
 33 | _newArrayCodes = {('['+t).encode(): v for t, v in zip('ZCFDBSIJ', range(4, 12))}
 34 | _arrStoreOps = {t.encode(): v for t, v in zip('IJFD BCS', range(IASTORE, SASTORE+1))}
 35 | _arrLoadOps = {t.encode(): v for t, v in zip('IJFD BCS', range(IALOAD, SALOAD+1))}
 36 | _arrStoreOps[b'Z'] = BASTORE
 37 | _arrLoadOps[b'Z'] = BALOAD
 38 | 
 39 | # For generating IR instructions corresponding to a single Dalvik instruction
 40 | class IRBlock:
 41 |     def __init__(self, parent, pos):
 42 |         self.type_data = parent.types[pos]
 43 |         self.pool = parent.pool
 44 |         self.delay_consts = parent.opts.delay_consts
 45 |         self.pos = pos
 46 |         self.instructions = [ir.Label(pos)]
 47 | 
 48 |     def add(self, jvm_instr):
 49 |         self.instructions.append(jvm_instr)
 50 | 
 51 |     def _other(self, bytecode):
 52 |         self.add(ir.Other(bytecode=bytecode))
 53 | 
 54 |     def u8(self, op): self._other(struct.pack('>B', op))
 55 |     def u8u8(self, op, x): self._other(struct.pack('>BB', op, x))
 56 |     def u8u16(self, op, x): self._other(struct.pack('>BH', op, x))
 57 |     # wide non iinc
 58 |     def u8u8u16(self, op, op2, x): self._other(struct.pack('>BBH', op, op2, x))
 59 |     # invokeinterface
 60 |     def u8u16u8u8(self, op, x, y, z): self._other(struct.pack('>BHBB', op, x, y, z))
 61 | 
 62 |     def ldc(self, index):
 63 |         if index < 256:
 64 |             self.add(ir.OtherConstant(bytecode=bytes([LDC, index])))
 65 |         else:
 66 |             self.add(ir.OtherConstant(bytecode=struct.pack('>BH', LDC_W, index)))
 67 | 
 68 |     def load(self, reg, stype, desc=None, clsname=None):
 69 |         # if we know the register to be 0/null, don't bother loading
 70 |         if self.type_data.arrs[reg] == arrays.NULL:
 71 |             self.const(0, stype)
 72 |         else:
 73 |             self.add(ir.RegAccess(reg, stype, store=False))
 74 |             # cast to appropriate type if tainted
 75 |             if stype == scalars.OBJ and self.type_data.tainted[reg]:
 76 |                 assert desc is None or clsname is None
 77 |                 if clsname is None:
 78 |                     # remember to handle arrays - also fallthrough if desc is None
 79 |                     clsname = desc[1:-1] if (desc and desc.startswith(b'L')) else desc
 80 |                 if clsname is not None and clsname != b'java/lang/Object':
 81 |                     self.u8u16(CHECKCAST, self.pool.class_(clsname))
 82 | 
 83 |     def loadAsArray(self, reg):
 84 |         at = self.type_data.arrs[reg]
 85 |         if at == arrays.NULL:
 86 |             self.const_null()
 87 |         else:
 88 |             self.add(ir.RegAccess(reg, scalars.OBJ, store=False))
 89 |             if self.type_data.tainted[reg]:
 90 |                 if at == arrays.INVALID:
 91 |                     # needs to be some type of object array, so just cast to Object[]
 92 |                     self.u8u16(CHECKCAST, self.pool.class_(b'[Ljava/lang/Object;'))
 93 |                 else:
 94 |                     # note - will throw if actual type is boolean[] but there's not
 95 |                     # much we can do in this case
 96 |                     self.u8u16(CHECKCAST, self.pool.class_(at))
 97 | 
 98 |     def store(self, reg, stype):
 99 |         self.add(ir.RegAccess(reg, stype, store=True))
100 | 
101 |     def return_(self, stype=None):
102 |         if stype is None:
103 |             self.u8(RETURN)
104 |         else:
105 |             self.u8(IRETURN + _ilfdaOrd(stype))
106 | 
107 |     def const(self, val, stype):
108 |         assert (1<<64) > val >= 0
109 |         if stype == scalars.OBJ:
110 |             assert val == 0
111 |             self.const_null()
112 |         else:
113 |             # If constant pool is simple, assume we're in non-opt mode and only use
114 |             # the constant pool for generating constants instead of calculating
115 |             # bytecode sequences for them. If we're in opt mode, pass None for pool
116 |             # to generate bytecode instead
117 |             pool = None if self.delay_consts else self.pool
118 |             self.add(ir.PrimConstant(stype, val, pool=pool))
119 | 
120 |     def const_null(self):
121 |         self.add(ir.OtherConstant(bytecode=bytes([ACONST_NULL])))
122 | 
123 |     def fillarraysub(self, op, cbs, pop=True):
124 |         gen = stack.genDups(len(cbs), 0 if pop else 1)
125 |         for i, cb in enumerate(cbs):
126 |             for instr in next(gen):
127 |                 self.add(instr)
128 |             self.const(i, scalars.INT)
129 |             cb()
130 |             self.u8(op)
131 |         # may need to pop at end
132 |         for instr in next(gen):
133 |             self.add(instr)
134 | 
135 |     def newarray(self, desc):
136 |         if desc in _newArrayCodes:
137 |             self.u8u8(NEWARRAY, _newArrayCodes[desc])
138 |         else:
139 |             # can be either multidim array or object array descriptor
140 |             desc = desc[1:]
141 |             if desc.startswith(b'L'):
142 |                 desc = desc[1:-1]
143 |             self.u8u16(ANEWARRAY, self.pool.class_(desc))
144 | 
145 |     def fillarraydata(self, op, stype, vals):
146 |         self.fillarraysub(op, [partial(self.const, val, stype) for val in vals])
147 | 
148 |     def cast(self, dex, reg, index):
149 |         self.load(reg, scalars.OBJ)
150 |         self.u8u16(CHECKCAST, self.pool.class_(dex.clsType(index)))
151 |         self.store(reg, scalars.OBJ)
152 | 
153 |     def goto(self, target):
154 |         self.add(ir.Goto(target))
155 | 
156 |     def if_(self, op, target):
157 |         self.add(ir.If(op, target))
158 | 
159 |     def switch(self, default, jumps):
160 |         jumps = {util.s32(k):v for k,v in jumps.items() if v != default}
161 |         if jumps:
162 |             self.add(ir.Switch(default, jumps))
163 |         else:
164 |             self.u8(ir.POP)
165 |             self.goto(default)
166 | 
167 |     def generateExceptLabels(self):
168 |         s_ind = 0
169 |         e_ind = len(self.instructions)
170 |         # assume only Other instructions can throw
171 |         while s_ind < e_ind and not isinstance(self.instructions[s_ind], ir.Other):
172 |             s_ind += 1
173 |         while s_ind < e_ind and not isinstance(self.instructions[e_ind-1], ir.Other):
174 |             e_ind -= 1
175 | 
176 |         assert s_ind < e_ind
177 |         start_lbl, end_lbl = ir.Label(), ir.Label()
178 |         self.instructions.insert(s_ind, start_lbl)
179 |         self.instructions.insert(e_ind+1, end_lbl)
180 |         return start_lbl, end_lbl
181 | 
182 | class IRWriter:
183 |     def __init__(self, pool, method, types, opts):
184 |         self.pool = pool
185 |         self.method = method
186 |         self.types = types
187 |         self.opts = opts
188 | 
189 |         self.iblocks = {}
190 | 
191 |         self.flat_instructions = None
192 |         self.excepts = []
193 |         self.labels = {}
194 |         self.initial_args = None
195 |         self.exception_redirects = {}
196 | 
197 |         self.except_starts = set()
198 |         self.except_ends = set()
199 |         self.jump_targets = set()
200 |         # used to detect jump targets with a unique predecessor
201 |         self.target_pred_counts = collections.defaultdict(int)
202 | 
203 |         self.numregs = None # will be set once registers are allocated (see registers.py)
204 | 
205 |     def calcInitialArgs(self, nregs, scalar_ptypes):
206 |         self.initial_args = args = []
207 |         regoff = nregs - len(scalar_ptypes)
208 |         for i, st in enumerate(scalar_ptypes):
209 |             if st == scalars.INVALID:
210 |                 args.append(None)
211 |             else:
212 |                 args.append((i + regoff, st))
213 | 
214 |     def addExceptionRedirect(self, target):
215 |         return self.exception_redirects.setdefault(target, ir.Label())
216 | 
217 |     def createBlock(self, instr):
218 |         block = IRBlock(self, instr.pos)
219 |         self.iblocks[block.pos] = block
220 |         self.labels[block.pos] = block.instructions[0]
221 |         return block
222 | 
223 |     def flatten(self):
224 |         instructions = []
225 |         for pos in sorted(self.iblocks):
226 |             if pos in self.exception_redirects:
227 |                 # check if we can put handler pop in front of block
228 |                 if instructions and not instructions[-1].fallsthrough():
229 |                     instructions.append(self.exception_redirects.pop(pos))
230 |                     instructions.append(ir.Pop())
231 |                 # if not, leave it in dict to be redirected later
232 |             # now add instructions for actual block
233 |             instructions += self.iblocks[pos].instructions
234 | 
235 |         # exception handler pops that couldn't be placed inline
236 |         # in this case, just put them at the end with a goto back to the handler
237 |         for target in sorted(self.exception_redirects):
238 |             instructions.append(self.exception_redirects[target])
239 |             instructions.append(ir.Pop())
240 |             instructions.append(ir.Goto(target))
241 | 
242 |         self.flat_instructions = instructions
243 |         self.iblocks = self.exception_redirects = None
244 | 
245 |     def replaceInstrs(self, replace):
246 |         if replace:
247 |             instructions = []
248 |             for instr in self.flat_instructions:
249 |                 instructions.extend(replace.get(instr, [instr]))
250 |             self.flat_instructions = instructions
251 |             assert len(set(instructions)) == len(instructions)
252 | 
253 |     def calcUpperBound(self):
254 |         # Get an uppper bound on the size of the bytecode
255 |         size = 0
256 |         for ins in self.flat_instructions:
257 |             if ins.bytecode is None:
258 |                 size += ins.max
259 |             else:
260 |                 size += len(ins.bytecode)
261 |         return size
262 | 
263 | ################################################################################
264 | def visitNop(method, dex, instr_d, type_data, block, instr):
265 |     pass
266 | 
267 | def visitMove(method, dex, instr_d, type_data, block, instr):
268 |     for st in (scalars.INT, scalars.OBJ, scalars.FLOAT):
269 |         if st & type_data.prims[instr.args[1]]:
270 |             block.load(instr.args[1], st)
271 |             block.store(instr.args[0], st)
272 | 
273 | def visitMoveWide(method, dex, instr_d, type_data, block, instr):
274 |     for st in (scalars.LONG, scalars.DOUBLE):
275 |         if st & type_data.prims[instr.args[1]]:
276 |             block.load(instr.args[1], st)
277 |             block.store(instr.args[0], st)
278 | 
279 | def visitMoveResult(method, dex, instr_d, type_data, block, instr):
280 |     st = scalars.fromDesc(instr.prev_result)
281 |     block.store(instr.args[0], st)
282 | 
283 | def visitReturn(method, dex, instr_d, type_data, block, instr):
284 |     if method.id.return_type == b'V':
285 |         block.return_()
286 |     else:
287 |         st = scalars.fromDesc(method.id.return_type)
288 |         block.load(instr.args[0], st, desc=method.id.return_type)
289 |         block.return_(st)
290 | 
291 | def visitConst32(method, dex, instr_d, type_data, block, instr):
292 |     val = instr.args[1] % (1<<32)
293 |     block.const(val, scalars.INT)
294 |     block.store(instr.args[0], scalars.INT)
295 |     block.const(val, scalars.FLOAT)
296 |     block.store(instr.args[0], scalars.FLOAT)
297 |     if not val:
298 |         block.const_null()
299 |         block.store(instr.args[0], scalars.OBJ)
300 | 
301 | def visitConst64(method, dex, instr_d, type_data, block, instr):
302 |     val = instr.args[1] % (1<<64)
303 |     block.const(val, scalars.LONG)
304 |     block.store(instr.args[0], scalars.LONG)
305 |     block.const(val, scalars.DOUBLE)
306 |     block.store(instr.args[0], scalars.DOUBLE)
307 | 
308 | def visitConstString(method, dex, instr_d, type_data, block, instr):
309 |     val = dex.string(instr.args[1])
310 |     block.ldc(block.pool.string(val))
311 |     block.store(instr.args[0], scalars.OBJ)
312 | 
313 | def visitConstClass(method, dex, instr_d, type_data, block, instr):
314 |     # Could use dex.type here since the JVM doesn't care, but this is cleaner
315 |     val = dex.clsType(instr.args[1])
316 |     block.ldc(block.pool.class_(val))
317 |     block.store(instr.args[0], scalars.OBJ)
318 | 
319 | def visitMonitorEnter(method, dex, instr_d, type_data, block, instr):
320 |     block.load(instr.args[0], scalars.OBJ)
321 |     block.u8(MONITORENTER)
322 | 
323 | def visitMonitorExit(method, dex, instr_d, type_data, block, instr):
324 |     block.load(instr.args[0], scalars.OBJ)
325 |     block.u8(MONITOREXIT)
326 | 
327 | def visitCheckCast(method, dex, instr_d, type_data, block, instr):
328 |     block.cast(dex, instr.args[0], instr.args[1])
329 | 
330 | def visitInstanceOf(method, dex, instr_d, type_data, block, instr):
331 |     block.load(instr.args[1], scalars.OBJ)
332 |     block.u8u16(INSTANCEOF, block.pool.class_(dex.clsType(instr.args[2])))
333 |     block.store(instr.args[0], scalars.INT)
334 | 
335 | def visitArrayLen(method, dex, instr_d, type_data, block, instr):
336 |     block.loadAsArray(instr.args[1])
337 |     block.u8(ARRAYLENGTH)
338 |     block.store(instr.args[0], scalars.INT)
339 | 
340 | def visitNewInstance(method, dex, instr_d, type_data, block, instr):
341 |     block.u8u16(NEW, block.pool.class_(dex.clsType(instr.args[1])))
342 |     block.store(instr.args[0], scalars.OBJ)
343 | 
344 | def visitNewArray(method, dex, instr_d, type_data, block, instr):
345 |     block.load(instr.args[1], scalars.INT)
346 |     block.newarray(dex.type(instr.args[2]))
347 |     block.store(instr.args[0], scalars.OBJ)
348 | 
349 | def visitFilledNewArray(method, dex, instr_d, type_data, block, instr):
350 |     regs = instr.args[1]
351 |     block.const(len(regs), scalars.INT)
352 |     block.newarray(dex.type(instr.args[0]))
353 |     st, elet = arrays.eletPair(arrays.fromDesc(dex.type(instr.args[0])))
354 |     op = _arrStoreOps.get(elet, AASTORE)
355 |     cbs = [partial(block.load, reg, st) for reg in regs]
356 |     # if not followed by move-result, don't leave it on the stack
357 |     mustpop = instr_d.get(instr.pos2).type != dalvik.MoveResult
358 |     block.fillarraysub(op, cbs, pop=mustpop)
359 | 
360 | def visitFillArrayData(method, dex, instr_d, type_data, block, instr):
361 |     width, arrdata = instr_d[instr.args[1]].fillarrdata
362 |     at = type_data.arrs[instr.args[0]]
363 | 
364 |     block.loadAsArray(instr.args[0])
365 |     if at is arrays.NULL:
366 |         block.u8(ATHROW)
367 |     else:
368 |         if len(arrdata) == 0:
369 |             # fill-array-data throws a NPE if array is null even when
370 |             # there is 0 data, so we need to add an instruction that
371 |             # throws a NPE in this case
372 |             block.u8(ARRAYLENGTH)
373 |             block.add(ir.Pop())
374 |         else:
375 |             st, elet = arrays.eletPair(at)
376 |             # check if we need to sign extend
377 |             if elet == b'B' or elet == b'Z':
378 |                 arrdata = [util.signExtend(x, 8) & 0xFFFFFFFF for x in arrdata]
379 |             elif elet == b'S':
380 |                 arrdata = [util.signExtend(x, 16) & 0xFFFFFFFF for x in arrdata]
381 |             block.fillarraydata(_arrStoreOps.get(elet, AASTORE), st, arrdata)
382 | 
383 | def visitThrow(method, dex, instr_d, type_data, block, instr):
384 |     block.load(instr.args[0], scalars.OBJ, clsname=b'java/lang/Throwable')
385 |     block.u8(ATHROW)
386 | 
387 | def visitGoto(method, dex, instr_d, type_data, block, instr):
388 |     block.goto(instr.args[0])
389 | 
390 | def visitSwitch(method, dex, instr_d, type_data, block, instr):
391 |     block.load(instr.args[0], scalars.INT)
392 |     switchdata = instr_d[instr.args[1]].switchdata
393 |     default = instr.pos2
394 |     jumps = {k:(offset + instr.pos) % (1<<32) for k, offset in switchdata.items()}
395 |     block.switch(default, jumps)
396 | 
397 | def visitCmp(method, dex, instr_d, type_data, block, instr):
398 |     op = [FCMPL, FCMPG, DCMPL, DCMPG, LCMP][instr.opcode - 0x2d]
399 |     st = [scalars.FLOAT, scalars.FLOAT, scalars.DOUBLE, scalars.DOUBLE, scalars.LONG][instr.opcode - 0x2d]
400 |     block.load(instr.args[1], st)
401 |     block.load(instr.args[2], st)
402 |     block.u8(op)
403 |     block.store(instr.args[0], scalars.INT)
404 | 
405 | def visitIf(method, dex, instr_d, type_data, block, instr):
406 |     st = type_data.prims[instr.args[0]] & type_data.prims[instr.args[1]]
407 |     if st & scalars.INT:
408 |         block.load(instr.args[0], scalars.INT)
409 |         block.load(instr.args[1], scalars.INT)
410 |         op = [IF_ICMPEQ, IF_ICMPNE, IF_ICMPLT, IF_ICMPGE, IF_ICMPGT, IF_ICMPLE][instr.opcode - 0x32]
411 |     else:
412 |         block.load(instr.args[0], scalars.OBJ)
413 |         block.load(instr.args[1], scalars.OBJ)
414 |         op = [IF_ACMPEQ, IF_ACMPNE][instr.opcode - 0x32]
415 |     block.if_(op, instr.args[2])
416 | 
417 | def visitIfZ(method, dex, instr_d, type_data, block, instr):
418 |     if type_data.prims[instr.args[0]] & scalars.INT:
419 |         block.load(instr.args[0], scalars.INT)
420 |         op = [IFEQ, IFNE, IFLT, IFGE, IFGT, IFLE][instr.opcode - 0x38]
421 |     else:
422 |         block.load(instr.args[0], scalars.OBJ)
423 |         op = [IFNULL, IFNONNULL][instr.opcode - 0x38]
424 |     block.if_(op, instr.args[1])
425 | 
426 | def visitArrayGet(method, dex, instr_d, type_data, block, instr):
427 |     at = type_data.arrs[instr.args[1]]
428 |     if at is arrays.NULL:
429 |         block.const_null()
430 |         block.u8(ATHROW)
431 |     else:
432 |         block.loadAsArray(instr.args[1])
433 |         block.load(instr.args[2], scalars.INT)
434 |         st, elet = arrays.eletPair(at)
435 |         block.u8(_arrLoadOps.get(elet, AALOAD))
436 |         block.store(instr.args[0], st)
437 | 
438 | def visitArrayPut(method, dex, instr_d, type_data, block, instr):
439 |     at = type_data.arrs[instr.args[1]]
440 |     if at is arrays.NULL:
441 |         block.const_null()
442 |         block.u8(ATHROW)
443 |     else:
444 |         block.loadAsArray(instr.args[1])
445 |         block.load(instr.args[2], scalars.INT)
446 |         st, elet = arrays.eletPair(at)
447 |         block.load(instr.args[0], st)
448 |         block.u8(_arrStoreOps.get(elet, AASTORE))
449 | 
450 | def visitInstanceGet(method, dex, instr_d, type_data, block, instr):
451 |     field_id = dex.field_id(instr.args[2])
452 |     st = scalars.fromDesc(field_id.desc)
453 |     block.load(instr.args[1], scalars.OBJ, clsname=field_id.cname)
454 |     block.u8u16(GETFIELD, block.pool.field(field_id.triple()))
455 |     block.store(instr.args[0], st)
456 | 
457 | def visitInstancePut(method, dex, instr_d, type_data, block, instr):
458 |     field_id = dex.field_id(instr.args[2])
459 |     st = scalars.fromDesc(field_id.desc)
460 |     block.load(instr.args[1], scalars.OBJ, clsname=field_id.cname)
461 |     block.load(instr.args[0], st, desc=field_id.desc)
462 |     block.u8u16(PUTFIELD, block.pool.field(field_id.triple()))
463 | 
464 | def visitStaticGet(method, dex, instr_d, type_data, block, instr):
465 |     field_id = dex.field_id(instr.args[1])
466 |     st = scalars.fromDesc(field_id.desc)
467 |     block.u8u16(GETSTATIC, block.pool.field(field_id.triple()))
468 |     block.store(instr.args[0], st)
469 | 
470 | def visitStaticPut(method, dex, instr_d, type_data, block, instr):
471 |     field_id = dex.field_id(instr.args[1])
472 |     st = scalars.fromDesc(field_id.desc)
473 |     block.load(instr.args[0], st, desc=field_id.desc)
474 |     block.u8u16(PUTSTATIC, block.pool.field(field_id.triple()))
475 | 
476 | def visitInvoke(method, dex, instr_d, type_data, block, instr):
477 |     isstatic = instr.type == dalvik.InvokeStatic
478 | 
479 |     called_id = dex.method_id(instr.args[0])
480 |     sts = scalars.paramTypes(called_id, static=isstatic)
481 |     descs = called_id.getSpacedParamTypes(isstatic=isstatic)
482 |     assert len(sts) == len(instr.args[1]) == len(descs)
483 | 
484 |     for st, desc, reg in zip(sts, descs, instr.args[1]):
485 |         if st != scalars.INVALID: # skip long/double tops
486 |             block.load(reg, st, desc=desc)
487 |     op = {
488 |         dalvik.InvokeVirtual: INVOKEVIRTUAL,
489 |         dalvik.InvokeSuper: INVOKESPECIAL,
490 |         dalvik.InvokeDirect: INVOKESPECIAL,
491 |         dalvik.InvokeStatic: INVOKESTATIC,
492 |         dalvik.InvokeInterface: INVOKEINTERFACE,
493 |     }[instr.type]
494 | 
495 |     if instr.type == dalvik.InvokeInterface:
496 |         block.u8u16u8u8(op, block.pool.imethod(called_id.triple()), len(descs), 0)
497 |     else:
498 |         block.u8u16(op, block.pool.method(called_id.triple()))
499 | 
500 |     # check if we need to pop result instead of leaving on stack
501 |     if instr_d.get(instr.pos2).type != dalvik.MoveResult:
502 |         if called_id.return_type != b'V':
503 |             st = scalars.fromDesc(called_id.return_type)
504 |             block.add(ir.Pop2() if scalars.iswide(st) else ir.Pop())
505 | 
506 | def visitUnaryOp(method, dex, instr_d, type_data, block, instr):
507 |     op, srct, destt = mathops.UNARY[instr.opcode]
508 |     block.load(instr.args[1], srct)
509 |     # *not requires special handling since there's no direct Java equivalent. Instead we have to do x ^ -1
510 |     if op == IXOR:
511 |         block.u8(ICONST_M1)
512 |     elif op == LXOR:
513 |         block.u8(ICONST_M1)
514 |         block.u8(I2L)
515 | 
516 |     block.u8(op)
517 |     block.store(instr.args[0], destt)
518 | 
519 | def visitBinaryOp(method, dex, instr_d, type_data, block, instr):
520 |     op, st, st2 = mathops.BINARY[instr.opcode]
521 |     # index arguments as negative so it works for regular and 2addr forms
522 |     block.load(instr.args[-2], st)
523 |     block.load(instr.args[-1], st2)
524 |     block.u8(op)
525 |     block.store(instr.args[0], st)
526 | 
527 | def visitBinaryOpConst(method, dex, instr_d, type_data, block, instr):
528 |     op = mathops.BINARY_LIT[instr.opcode]
529 |     if op == ISUB: # rsub
530 |         block.const(instr.args[2] % (1<<32), scalars.INT)
531 |         block.load(instr.args[1], scalars.INT)
532 |     else:
533 |         block.load(instr.args[1], scalars.INT)
534 |         block.const(instr.args[2] % (1<<32), scalars.INT)
535 |     block.u8(op)
536 |     block.store(instr.args[0], scalars.INT)
537 | ################################################################################
538 | VISIT_FUNCS = {
539 |     dalvik.Nop: visitNop,
540 |     dalvik.Move: visitMove,
541 |     dalvik.MoveWide: visitMoveWide,
542 |     dalvik.MoveResult: visitMoveResult,
543 |     dalvik.Return: visitReturn,
544 |     dalvik.Const32: visitConst32,
545 |     dalvik.Const64: visitConst64,
546 |     dalvik.ConstString: visitConstString,
547 |     dalvik.ConstClass: visitConstClass,
548 |     dalvik.MonitorEnter: visitMonitorEnter,
549 |     dalvik.MonitorExit: visitMonitorExit,
550 |     dalvik.CheckCast: visitCheckCast,
551 |     dalvik.InstanceOf: visitInstanceOf,
552 |     dalvik.ArrayLen: visitArrayLen,
553 |     dalvik.NewInstance: visitNewInstance,
554 |     dalvik.NewArray: visitNewArray,
555 |     dalvik.FilledNewArray: visitFilledNewArray,
556 |     dalvik.FillArrayData: visitFillArrayData,
557 |     dalvik.Throw: visitThrow,
558 |     dalvik.Goto: visitGoto,
559 |     dalvik.Switch: visitSwitch,
560 |     dalvik.Cmp: visitCmp,
561 |     dalvik.If: visitIf,
562 |     dalvik.IfZ: visitIfZ,
563 | 
564 |     dalvik.ArrayGet: visitArrayGet,
565 |     dalvik.ArrayPut: visitArrayPut,
566 |     dalvik.InstanceGet: visitInstanceGet,
567 |     dalvik.InstancePut: visitInstancePut,
568 |     dalvik.StaticGet: visitStaticGet,
569 |     dalvik.StaticPut: visitStaticPut,
570 | 
571 |     dalvik.InvokeVirtual: visitInvoke,
572 |     dalvik.InvokeSuper: visitInvoke,
573 |     dalvik.InvokeDirect: visitInvoke,
574 |     dalvik.InvokeStatic: visitInvoke,
575 |     dalvik.InvokeInterface: visitInvoke,
576 | 
577 |     dalvik.UnaryOp: visitUnaryOp,
578 |     dalvik.BinaryOp: visitBinaryOp,
579 |     dalvik.BinaryOpConst: visitBinaryOpConst,
580 | }
581 | 
582 | def writeBytecode(pool, method, opts):
583 |     dex = method.dex
584 |     code = method.code
585 |     instr_d = {instr.pos: instr for instr in code.bytecode}
586 |     types, all_handlers = typeinference.doInference(dex, method, code, code.bytecode, instr_d)
587 | 
588 |     scalar_ptypes = scalars.paramTypes(method.id, static=(method.access & flags.ACC_STATIC))
589 | 
590 |     writer = IRWriter(pool, method, types, opts)
591 |     writer.calcInitialArgs(code.nregs, scalar_ptypes)
592 | 
593 |     for instr in code.bytecode:
594 |         if instr.pos not in types: # skip unreachable instructions
595 |             continue
596 |         type_data = types[instr.pos]
597 |         block = writer.createBlock(instr)
598 |         VISIT_FUNCS[instr.type](method, dex, instr_d, type_data, block, instr)
599 | 
600 |     for instr in sorted(all_handlers, key=lambda instr: instr.pos):
601 |         assert all_handlers[instr]
602 |         if instr.pos not in types: # skip unreachable instructions
603 |             continue
604 | 
605 |         start, end = writer.iblocks[instr.pos].generateExceptLabels()
606 |         writer.except_starts.add(start)
607 |         writer.except_ends.add(end)
608 | 
609 |         for ctype, handler_pos in all_handlers[instr]:
610 |             # If handler doesn't use the caught exception, we need to redirect to a pop instead
611 |             if instr_d.get(handler_pos).type != dalvik.MoveResult:
612 |                 target = writer.addExceptionRedirect(handler_pos)
613 |             else:
614 |                 target = writer.labels[handler_pos]
615 |             writer.jump_targets.add(target)
616 |             writer.target_pred_counts[target] += 1
617 | 
618 |             # When catching Throwable, we can use the special index 0 instead,
619 |             # potentially saving a constant pool entry or two
620 |             jctype = 0 if ctype == b'java/lang/Throwable' else pool.class_(ctype)
621 |             writer.excepts.append((start, end, target, jctype))
622 |     writer.flatten()
623 | 
624 |     # find jump targets (in addition to exception handler targets)
625 |     for instr in writer.flat_instructions:
626 |         for target in instr.targets():
627 |             label = writer.labels[target]
628 |             writer.jump_targets.add(label)
629 |             writer.target_pred_counts[label] += 1
630 | 
631 |     return writer
632 | 


--------------------------------------------------------------------------------
/enjarify/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import zipfile, traceback, argparse, collections
 16 | 
 17 | from . import parsedex
 18 | from .jvm import writeclass
 19 | from .mutf8 import decode
 20 | from .jvm.optimization import options
 21 | 
 22 | def read(fname, mode='rb'):
 23 |     with open(fname, mode) as f:
 24 |         return f.read()
 25 | 
 26 | def translate(data, opts, classes=None, errors=None, allowErrors=True):
 27 |     dex = parsedex.DexFile(data)
 28 |     classes = collections.OrderedDict() if classes is None else classes
 29 |     errors = collections.OrderedDict() if errors is None else errors
 30 | 
 31 |     for cls in dex.classes:
 32 |         unicode_name = decode(cls.name) + '.class'
 33 |         if unicode_name in classes or unicode_name in errors:
 34 |             print('Warning, duplicate class name', unicode_name)
 35 |             continue
 36 | 
 37 |         try:
 38 |             class_data = writeclass.toClassFile(cls, opts)
 39 |             classes[unicode_name] = class_data
 40 |         except Exception:
 41 |             if not allowErrors:
 42 |                 raise
 43 |             errors[unicode_name] = traceback.format_exc()
 44 | 
 45 |         if not (len(classes) + len(errors)) % 1000:
 46 |             print(len(classes) + len(errors), 'classes processed')
 47 |     return classes, errors
 48 | 
 49 | def writeToJar(fname, classes):
 50 |     with zipfile.ZipFile(fname, 'w') as out:
 51 |         for unicode_name, data in classes.items():
 52 |             # Don't bother compressing small files
 53 |             compress_type = zipfile.ZIP_DEFLATED if len(data) > 10000 else zipfile.ZIP_STORED
 54 |             info = zipfile.ZipInfo(unicode_name)
 55 |             info.external_attr = 0o775 << 16 # set Unix file permissions
 56 |             out.writestr(info, data, compress_type=compress_type)
 57 | 
 58 | def main():
 59 |     parser = argparse.ArgumentParser(prog='enjarify', description='Translates Dalvik bytecode (.dex or .apk) to Java bytecode (.jar)')
 60 |     parser.add_argument('inputfile')
 61 |     parser.add_argument('-o', '--output', help='Output .jar file. Default is [input-filename]-enjarify.jar.')
 62 |     parser.add_argument('-f', '--force', action='store_true', help='Force overwrite. If output file already exists, this option is required to overwrite.')
 63 |     parser.add_argument('--fast', action='store_true', help='Speed up translation at the expense of generated bytecode being less readable.')
 64 |     args = parser.parse_args()
 65 | 
 66 |     dexs = []
 67 |     if args.inputfile.lower().endswith('.apk'):
 68 |         with zipfile.ZipFile(args.inputfile, 'r') as z:
 69 |             for name in z.namelist():
 70 |                 if name.startswith('classes') and name.endswith('.dex'):
 71 |                     dexs.append(z.read(name))
 72 |     else:
 73 |         dexs.append(read(args.inputfile))
 74 | 
 75 |     # Exclusive mode requires 3.3+, so provide helpful error in this case
 76 |     if not args.force:
 77 |         try:
 78 |             FileExistsError
 79 |         except NameError:
 80 |             print('Overwrite protection requires Python 3.3+. Either pass -f or --force, or upgrade to a more recent version of Python. If you are using Pypy3 2.4, you need to switch to a nightly build or build from source. Or just pass -f.')
 81 |             return
 82 | 
 83 |     # Might as well open the output file early so we can detect existing file error
 84 |     # before going to the trouble of translating everything
 85 |     outname = args.output or args.inputfile.rpartition('/')[-1].rpartition('.')[0] + '-enjarify.jar'
 86 |     try:
 87 |         outfile = open(outname, mode=('wb' if args.force else 'xb'))
 88 |     except FileExistsError:
 89 |         print('Attempting to write to', outname)
 90 |         print('Error, output file already exists and --force was not specified.')
 91 |         print('To overwrite the output file, pass -f or --force.')
 92 |         return
 93 | 
 94 |     opts = options.NONE if args.fast else options.PRETTY
 95 |     classes = collections.OrderedDict()
 96 |     errors = collections.OrderedDict()
 97 |     for data in dexs:
 98 |         translate(data, opts=opts, classes=classes, errors=errors)
 99 |     writeToJar(outfile, classes)
100 |     outfile.close()
101 |     print('Output written to', outname)
102 | 
103 |     for name, error in sorted(errors.items()):
104 |         print(name, error)
105 |     print('{} classes translated successfully, {} classes had errors'.format(len(classes), len(errors)))
106 | 
107 | if __name__ == "__main__":
108 |     main()
109 | 


--------------------------------------------------------------------------------
/enjarify/mutf8.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Unfortunately, there's no easy way to decode Modified UTF8 in Python, so we
16 | # have to write a custom decoder. This one is error tolerant and will decode
17 | # anything resembling mutf8.
18 | 
19 | def _decode(b):
20 |     # decode arbitrary utf8 codepoints, tolerating surrogate pairs, nonstandard encodings, etc.
21 |     for x in b:
22 |         if x < 128:
23 |             yield x
24 |         else:
25 |             # figure out how many bytes
26 |             extra = 0
27 |             for i in range(6, 0, -1):
28 |                 if x & (1<<i):
29 |                     extra += 1
30 |                 else:
31 |                     break
32 | 
33 |             bits = x % (1 << 6-extra)
34 |             for _ in range(extra):
35 |                 bits = (bits << 6) ^ (next(b) & 63)
36 |             yield bits
37 | 
38 | def _fixPairs(codes):
39 |     # convert surrogate pairs to single code points
40 |     for x in codes:
41 |         if 0xD800 <= x < 0xDC00:
42 |             high = x - 0xD800
43 |             low = next(codes) - 0xDC00
44 |             yield 0x10000 + (high << 10) + (low & 1023)
45 |         else:
46 |             yield x
47 | 
48 | def decode(b):
49 |     try:
50 |         return b.decode('utf8')
51 |     except UnicodeDecodeError:
52 |         return ''.join(map(chr, _fixPairs(_decode(iter(b)))))
53 | 


--------------------------------------------------------------------------------
/enjarify/parsedex.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from .byteio import Reader
 15 | from .dalvik import parseBytecode
 16 | from .util import signExtend
 17 | 
 18 | NO_INDEX = 0xFFFFFFFF
 19 | 
 20 | def typeList(dex, off, parseClsDesc=False):
 21 |     if off == 0:
 22 |         return []
 23 |     st = dex.stream(off)
 24 |     size = st.u32()
 25 |     idxs = [st.u16() for _ in range(size)]
 26 |     func = dex.clsType if parseClsDesc else dex.type
 27 |     return list(map(func, idxs))
 28 | 
 29 | def encodedValue(dex, stream):
 30 |     tag = stream.u8()
 31 |     vtype, varg = tag & 31, tag >> 5
 32 | 
 33 |     if vtype == 0x1c: # ARRAY
 34 |         size = stream.uleb128()
 35 |         return [encodedValue(dex, stream) for _ in range(size)]
 36 |     if vtype == 0x1d: # ANNOTATION
 37 |         # We don't actually care about annotations but still need to read it to
 38 |         # find out how much data is taken up
 39 |         stream.uleb128()
 40 |         for _ in range(stream.uleb128()):
 41 |             stream.uleb128()
 42 |             encodedValue(dex, stream)
 43 |         return None
 44 |     if vtype == 0x1e: # NULL
 45 |         return None
 46 | 
 47 |     # For the rest, we just return it as unsigned integers without recording type
 48 |     # extended to either u32 or u64 depending on int/float or long/double
 49 |     if vtype == 0x1f: # BOOLEAN
 50 |         return b'I', varg
 51 |     # the rest are an int encoded into varg + 1 bytes in some way
 52 |     size = varg + 1
 53 |     val = sum(stream.u8() << (i*8) for i in range(size))
 54 | 
 55 |     if vtype == 0x00: # BYTE
 56 |         return b'I', signExtend(val, 8) % (1<<32)
 57 |     if vtype == 0x02: # SHORT
 58 |         return b'I', signExtend(val, 16) % (1<<32)
 59 |     if vtype == 0x03: # CHAR
 60 |         return b'I', val
 61 |     if vtype == 0x04: # INT
 62 |         return b'I', val
 63 | 
 64 |     if vtype == 0x06: # LONG
 65 |         return b'J', val
 66 | 
 67 |     # floats are 0 extended to the right
 68 |     if vtype == 0x10: # FLOAT
 69 |         return b'F', val << (32 - size * 8)
 70 |     if vtype == 0x11: # DOUBLE
 71 |         return b'D', val << (64 - size * 8)
 72 | 
 73 |     if vtype == 0x17: # STRING
 74 |         return b'Ljava/lang/String;', dex.string(val)
 75 |     if vtype == 0x18: # TYPE
 76 |         return b'Ljava/lang/Class;', dex.clsType(val)
 77 | 
 78 | class MFIdMixin:
 79 |     def triple(self): return self.cname, self.name, self.desc
 80 | 
 81 | class FieldId(MFIdMixin):
 82 |     def __init__(self, dex, field_idx):
 83 |         stream = dex.stream(dex.field_ids.off + field_idx * 8)
 84 |         self.cname = dex.clsType(stream.u16())
 85 |         self.desc = dex.type(stream.u16())
 86 |         self.name = dex.string(stream.u32())
 87 | 
 88 | class Field:
 89 |     def __init__(self, dex, field_idx, access):
 90 |         self.id = FieldId(dex, field_idx)
 91 |         self.access = access
 92 |         self.constant_value = None # will be set later
 93 | 
 94 | class MethodId(MFIdMixin):
 95 |     def __init__(self, dex, method_idx):
 96 |         stream = dex.stream(dex.method_ids.off + method_idx * 8)
 97 |         self.cname = dex.clsType(stream.u16())
 98 |         proto_idx = stream.u16()
 99 |         self.name = dex.string(stream.u32())
100 | 
101 |         stream2 = dex.stream(dex.proto_ids.off + proto_idx * 12)
102 |         shorty_idx, return_idx, parameters_off = stream2.u32(), stream2.u32(), stream2.u32()
103 |         self.return_type = dex.type(return_idx)
104 |         self.param_types = typeList(dex, parameters_off)
105 | 
106 |         # rearrange things to Java format
107 |         parts = [b'('] + self.param_types + [b')', self.return_type]
108 |         self.desc = b''.join(parts)
109 | 
110 |     def getSpacedParamTypes(self, isstatic):
111 |         results = []
112 |         if not isstatic:
113 |             if self.cname.startswith(b'['):
114 |                 results.append(self.cname)
115 |             else:
116 |                 results.append(b'L' + self.cname + b';')
117 | 
118 |         for ptype in self.param_types:
119 |             results.append(ptype)
120 |             if ptype == b'J' or ptype == b'D':
121 |                 results.append(None)
122 |         return results
123 | 
124 | class TryItem:
125 |     def __init__(self, stream):
126 |         self.start, self.count, self.handler_off = stream.u32(), stream.u16(), stream.u16()
127 |         self.end = self.start + self.count
128 |         self.catches = None # to be filled in later
129 | 
130 |     def finish(self, dex, list_off):
131 |         stream = dex.stream(list_off + self.handler_off)
132 |         size = stream.sleb128()
133 |         self.catches = results = []
134 |         for _ in range(abs(size)):
135 |             results.append((dex.clsType(stream.uleb128()), stream.uleb128()))
136 |         if size <= 0:
137 |             results.append((b'java/lang/Throwable', stream.uleb128()))
138 | 
139 | class CodeItem:
140 |     def __init__(self, dex, offset):
141 |         stream = dex.stream(offset)
142 |         self.nregs = registers_size = stream.u16()
143 |         ins_size = stream.u16()
144 |         outs_size = stream.u16()
145 |         tries_size = stream.u16()
146 |         debug_off = stream.u32()
147 |         self.insns_size = stream.u32()
148 |         insns_start_pos = stream.pos
149 |         insns = [stream.u16() for _ in range(self.insns_size)]
150 |         if tries_size and self.insns_size & 1:
151 |             stream.u16() # padding
152 |         self.tries = [TryItem(stream) for _ in range(tries_size)]
153 |         self.list_off = stream.pos
154 |         for item in self.tries:
155 |             item.finish(dex, self.list_off)
156 | 
157 |         catch_addrs = set()
158 |         for tryi in self.tries:
159 |             catch_addrs.update(t[1] for t in tryi.catches)
160 |         self.bytecode = parseBytecode(dex, insns_start_pos, insns, catch_addrs)
161 | 
162 | class Method:
163 |     def __init__(self, dex, method_idx, access, code_off):
164 |         self.dex = dex
165 |         self.id = MethodId(dex, method_idx)
166 |         self.access = access
167 |         self.code_off = code_off
168 |         self.code = CodeItem(dex, code_off) if code_off else None
169 | 
170 | class ClassData:
171 |     def __init__(self, dex, offset):
172 |         self.fields = []
173 |         self.methods = []
174 |         # for offset 0, leave dummy data with no fields or methods
175 |         if offset != 0:
176 |             self._parse(dex, dex.stream(offset))
177 | 
178 |     def _parse(self, dex, stream):
179 |         numstatic = stream.uleb128()
180 |         numinstance = stream.uleb128()
181 |         numdirect = stream.uleb128()
182 |         numvirtual = stream.uleb128()
183 | 
184 |         fields = self.fields
185 |         for num in (numstatic, numinstance):
186 |             field_idx = 0
187 |             for i in range(num):
188 |                 field_idx += stream.uleb128()
189 |                 fields.append(Field(dex, field_idx, stream.uleb128()))
190 | 
191 |         methods = self.methods
192 |         for num in (numdirect, numvirtual):
193 |             method_idx = 0
194 |             for i in range(num):
195 |                 method_idx += stream.uleb128()
196 |                 methods.append(Method(dex, method_idx, stream.uleb128(), stream.uleb128()))
197 | 
198 | class DexClass:
199 |     def __init__(self, dex, base_off, i):
200 |         self.dex = dex
201 |         st = dex.stream(base_off + i*32)
202 | 
203 |         self.name = dex.clsType(st.u32())
204 |         self.access = st.u32()
205 |         super_ = st.u32()
206 |         self.super = dex.clsType(super_) if super_ != NO_INDEX else None
207 |         self.interfaces = typeList(dex, st.u32(), parseClsDesc=True)
208 |         _ = st.u32()
209 |         _ = st.u32()
210 |         self.data_off = st.u32()
211 |         self.data = None # parse data lazily in parseData()
212 |         self.constant_values_off = st.u32()
213 | 
214 |     def parseData(self):
215 |         if self.data is None:
216 |             self.data = ClassData(self.dex, self.data_off)
217 |             if self.constant_values_off:
218 |                 stream = self.dex.stream(self.constant_values_off)
219 |                 for field in self.data.fields[:stream.uleb128()]:
220 |                     field.constant_value = encodedValue(self.dex, stream)
221 | 
222 | class SizeOff:
223 |     def __init__(self, stream):
224 |         self.size = stream.u32()
225 |         self.off = stream.u32()
226 | 
227 | class DexFile:
228 |     def __init__(self, data):
229 |         self.raw = data
230 |         stream = Reader(data)
231 | 
232 |         # parse header
233 |         stream.read(36)
234 |         if stream.u32() != 0x70:
235 |             print('Warning, unexpected header size!')
236 |         if stream.u32() != 0x12345678:
237 |             print('Warning, unexpected endianess tag!')
238 | 
239 |         self.link = SizeOff(stream)
240 |         self.map_off = stream.u32()
241 |         self.string_ids = SizeOff(stream)
242 |         self.type_ids = SizeOff(stream)
243 |         self.proto_ids = SizeOff(stream)
244 |         self.field_ids = SizeOff(stream)
245 |         self.method_ids = SizeOff(stream)
246 |         self.class_defs = SizeOff(stream)
247 |         self.data = SizeOff(stream)
248 | 
249 |         defs = self.class_defs
250 |         self.classes = []
251 |         for i in range(defs.size):
252 |             self.classes.append(DexClass(self, defs.off, i))
253 | 
254 |     def stream(self, offset): return Reader(self.raw, offset)
255 | 
256 |     def string(self, i):
257 |         data_off = self.stream(self.string_ids.off + i*4).u32()
258 |         stream = self.stream(data_off)
259 |         stream.uleb128() # ignore decoded length
260 |         return stream.readCStr()
261 | 
262 |     def type(self, i):
263 |         if 0 <= i < NO_INDEX:
264 |             str_idx = self.stream(self.type_ids.off + i*4).u32()
265 |             return self.string(str_idx)
266 | 
267 |     def clsType(self, i):
268 |         # Can be either class _name_ or array _descriptor_
269 |         desc = self.type(i)
270 |         if desc.startswith(b'['):
271 |             return desc
272 |         elif desc.startswith(b'L'):
273 |             return desc[1:-1]
274 |         # Not sure how to handle primative classes properly,
275 |         # but this should hopefully be good enough.
276 |         return desc
277 | 
278 |     def field_id(self, i): return FieldId(self, i)
279 |     def method_id(self, i): return MethodId(self, i)
280 | 


--------------------------------------------------------------------------------
/enjarify/treelist.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # The first SIZE elements are stored directly, the rest are stored in one of SPLIT subtrees
 16 | SIZE = 16
 17 | SPLIT = 16
 18 | 
 19 | # This class represents a list as a persistent n-ary tree
 20 | # This has much slower access and updates than a real list but has the advantage
 21 | # of sharing memory with previous versions of the list when only a few elements
 22 | # are changed. See http://en.wikipedia.org/wiki/Persistent_data_structure#Trees
 23 | # Also, default values are not stored, so this is good for sparse arrays
 24 | class TreeList:
 25 |     def __init__(self, default, func, data=None):
 26 |         self.default = default
 27 |         self.func = func
 28 |         self.data = data or _TreeListSub(default)
 29 | 
 30 |     def __getitem__(self, i):
 31 |         return self.data[i]
 32 | 
 33 |     def __setitem__(self, i, val):
 34 |         self.data = self.data.set(i, val)
 35 | 
 36 |     def copy(self):
 37 |         return TreeList(self.default, self.func, self.data)
 38 | 
 39 |     def merge(self, other):
 40 |         assert self.func is other.func
 41 |         self.data = _TreeListSub.merge(self.data, other.data, self.func)
 42 | 
 43 | 
 44 | class _TreeListSub:
 45 |     def __init__(self, default, direct=None, children=None):
 46 |         self.default = default
 47 |         if direct is None:
 48 |             self.direct = [self.default]*SIZE
 49 |             self.children = [None]*SPLIT # Subtrees allocated lazily
 50 |         else:
 51 |             self.direct = direct
 52 |             self.children = children
 53 | 
 54 |     def __getitem__(self, i):
 55 |         assert i >= 0
 56 |         if i < SIZE:
 57 |             return self.direct[i]
 58 | 
 59 |         i -= SIZE
 60 |         i, ci = divmod(i, SPLIT)
 61 |         child = self.children[ci]
 62 | 
 63 |         if child is None:
 64 |             return self.default
 65 |         return child[i]
 66 | 
 67 |     def set(self, i, val):
 68 |         assert i >= 0
 69 |         if i < SIZE:
 70 |             if self.direct[i] == val:
 71 |                 return self
 72 | 
 73 |             temp = self.direct[:]
 74 |             temp[i] = val
 75 |             return _TreeListSub(self.default, temp, self.children)
 76 | 
 77 |         i -= SIZE
 78 |         i, ci = divmod(i, SPLIT)
 79 |         child = self.children[ci]
 80 | 
 81 |         if child is None:
 82 |             if val == self.default:
 83 |                 return self
 84 |             child = _TreeListSub(self.default).set(i, val)
 85 |         else:
 86 |             if val == child[i]:
 87 |                 return self
 88 |             child = child.set(i, val)
 89 | 
 90 |         temp = self.children[:]
 91 |         temp[ci] = child
 92 |         return _TreeListSub(self.default, self.direct, temp)
 93 | 
 94 |     @staticmethod
 95 |     def merge(left, right, func):
 96 |         # Effectively computes [func(x, y) for x, y in zip(left, right)]
 97 |         # Assume func(x, x) == x
 98 |         if left is right:
 99 |             return left
100 | 
101 |         if left is None:
102 |             left, right = right, left
103 | 
104 |         default = left.default
105 |         merge = _TreeListSub.merge
106 |         if right is None:
107 |             direct = [func(x, default) for x in left.direct]
108 |             children = [merge(child, None, func) for child in left.children]
109 |             if direct == left.direct and children == left.children:
110 |                 return left
111 |             return _TreeListSub(default, direct, children)
112 | 
113 |         direct = [func(x, y) for x, y in zip(left.direct, right.direct)]
114 |         children = [merge(c1, c2, func) for c1, c2 in zip(left.children, right.children)]
115 |         if direct == left.direct and children == left.children:
116 |             return left
117 |         if direct == right.direct and children == right.children:
118 |             return right
119 |         return _TreeListSub(default, direct, children)
120 | 


--------------------------------------------------------------------------------
/enjarify/typeinference/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/enjarify/typeinference/typeinference.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import collections, operator
 16 | 
 17 | from ..jvm import arraytypes as arrays
 18 | from ..jvm import scalartypes as scalars
 19 | from ..jvm import mathops, jvmops
 20 | from ..treelist import TreeList
 21 | from .. import flags, dalvik
 22 | 
 23 | 
 24 | # The two main things we need type inference for are determining the types of
 25 | # primative values and arrays. Luckily, we don't care about actual classes in
 26 | # these cases, we just need to know whether it is int,float,reference, etc. to
 27 | # generate the correct bytecode instructions, which are typed in Java.
 28 | #
 29 | # One additional problem is that ART's implicit casts narrow the type instead of
 30 | # replacing it like regular checkcasts do. This means that there is no way to
 31 | # replicate the behavior in Java using normal casts unless you know which class
 32 | # is a subclass of another and which classes are interfaces. However, we want to
 33 | # be able to translate code without knowing about every other class that could be
 34 | # referenced by the application, so we make do with a hack.
 35 | #
 36 | # Variables subjected to implicit casting are marked as tainted. Whenever a
 37 | # tained value is used, it is explcitly checkcasted to the expected type. This
 38 | # isn't ideal since it will incorrectly throw in the cast of bad interface casts,
 39 | # but it's the best we can do without requiring knowledge of the whole inheritance
 40 | # hierarchy.
 41 | 
 42 | class TypeInfo:
 43 |     def __init__(self, prims, arrs, tainted):
 44 |         # copy on write
 45 |         self.prims = prims
 46 |         self.arrs = arrs
 47 |         self.tainted = tainted
 48 | 
 49 |     def _copy(self): return TypeInfo(self.prims.copy(), self.arrs.copy(), self.tainted.copy())
 50 |     def _get(self, reg): return self.prims[reg], self.arrs[reg], self.tainted[reg]
 51 | 
 52 |     def _set(self, reg, st, at, taint=False):
 53 |         self.prims[reg] = st
 54 |         self.arrs[reg] = at
 55 |         self.tainted[reg] = taint
 56 |         return self
 57 | 
 58 |     def move(self, src, dest, wide):
 59 |         new = self._copy()._set(dest, *self._get(src))
 60 |         if wide:
 61 |             new._set(dest+1, *self._get(src+1))
 62 |         return new
 63 | 
 64 |     def assign(self, reg, st, at=arrays.INVALID, taint=False):
 65 |         assert st is not None
 66 |         return self._copy()._set(reg, st, at, taint)
 67 | 
 68 |     def assign2(self, reg, st):
 69 |         assert st is not None
 70 |         at = arrays.INVALID
 71 |         return self._copy()._set(reg, st, at)._set(reg+1, scalars.INVALID, at)
 72 | 
 73 |     def assignFromDesc(self, reg, desc):
 74 |         st = scalars.fromDesc(desc)
 75 |         at = arrays.fromDesc(desc)
 76 |         if scalars.iswide(st):
 77 |             return self.assign2(reg, st)
 78 |         else:
 79 |             return self.assign(reg, st, at)
 80 | 
 81 |     def isSame(self, other):
 82 |         return (self.prims.data is other.prims.data and
 83 |             self.arrs.data is other.arrs.data and
 84 |             self.tainted.data is other.tainted.data)
 85 | 
 86 | def merge(old, new):
 87 |     temp = old._copy()
 88 |     temp.prims.merge(new.prims)
 89 |     temp.arrs.merge(new.arrs)
 90 |     temp.tainted.merge(new.tainted)
 91 |     return old if old.isSame(temp) else temp
 92 | 
 93 | def fromParams(method, num_regs):
 94 |     isstatic = method.access & flags.ACC_STATIC
 95 |     full_ptypes = method.id.getSpacedParamTypes(isstatic)
 96 |     offset = num_regs - len(full_ptypes)
 97 | 
 98 |     prims = TreeList(scalars.INVALID, operator.__and__)
 99 |     arrs = TreeList(arrays.INVALID, arrays.merge)
100 |     tainted = TreeList(False, operator.__or__)
101 | 
102 |     for i, desc in enumerate(full_ptypes):
103 |         if desc is not None:
104 |             prims[offset + i] = scalars.fromDesc(desc)
105 |             arrs[offset + i] = arrays.fromDesc(desc)
106 |     return TypeInfo(prims, arrs, tainted)
107 | 
108 | _MATH_THROW_OPS = [jvmops.IDIV, jvmops.IREM, jvmops.LDIV, jvmops.LREM]
109 | def pruneHandlers(all_handlers):
110 |     result = collections.defaultdict(list)
111 |     for instr, handlers in all_handlers.items():
112 |         if not instr.type in dalvik.PRUNED_THROW_TYPES:
113 |             continue
114 |         # if math op, make sure it is int div/rem
115 |         if instr.type == dalvik.BinaryOp:
116 |             if mathops.BINARY[instr.opcode][0] not in _MATH_THROW_OPS:
117 |                 continue
118 |         elif instr.type == dalvik.BinaryOpConst:
119 |             if mathops.BINARY_LIT[instr.opcode] not in _MATH_THROW_OPS:
120 |                 continue
121 | 
122 |         types = set()
123 |         for ctype, handler in handlers:
124 |             # if multiple handlers with same catch type, only include the first
125 |             if ctype not in types:
126 |                 result[instr].append((ctype, handler))
127 |                 types.add(ctype)
128 |             # stop as soon as we reach a catch all handler
129 |             if ctype == b'java/lang/Throwable':
130 |                 break
131 |     return dict(result)
132 | 
133 | ################################################################################
134 | # Lots of instructions just return an object or int for type inference purposes
135 | # so we have a single function for these cases
136 | def visitRetObj(dex, instr, cur):
137 |     return cur.assign(instr.args[0], scalars.OBJ)
138 | def visitRetInt(dex, instr, cur):
139 |     return cur.assign(instr.args[0], scalars.INT)
140 | 
141 | # Instruction specific callbacks
142 | def visitMove(dex, instr, cur):
143 |     return cur.move(instr.args[1], instr.args[0], wide=False)
144 | def visitMoveWide(dex, instr, cur):
145 |     return cur.move(instr.args[1], instr.args[0], wide=True)
146 | def visitMoveResult(dex, instr, cur):
147 |     return cur.assignFromDesc(instr.args[0], instr.prev_result)
148 | def visitConst32(dex, instr, cur):
149 |     val = instr.args[1] % (1<<32)
150 |     if val == 0:
151 |         return cur.assign(instr.args[0], scalars.ZERO, arrays.NULL)
152 |     else:
153 |         return cur.assign(instr.args[0], scalars.C32)
154 | def visitConst64(dex, instr, cur):
155 |     return cur.assign2(instr.args[0], scalars.C64)
156 | def visitCheckCast(dex, instr, cur):
157 |     at = arrays.fromDesc(dex.type(instr.args[1]))
158 |     at = arrays.narrow(cur.arrs[instr.args[0]], at)
159 |     return cur.assign(instr.args[0], scalars.OBJ, at)
160 | def visitNewArray(dex, instr, cur):
161 |     at = arrays.fromDesc(dex.type(instr.args[2]))
162 |     return cur.assign(instr.args[0], scalars.OBJ, at)
163 | def visitArrayGet(dex, instr, cur):
164 |     arr_at = cur.arrs[instr.args[1]]
165 |     if arr_at is arrays.NULL:
166 |         # This is unreachable, so use (ALL, NULL), which can be merged with anything
167 |         return cur.assign(instr.args[0], scalars.ALL, arrays.NULL)
168 |     else:
169 |         st, at = arrays.eletPair(arr_at)
170 |         return cur.assign(instr.args[0], st, at)
171 | def visitInstanceGet(dex, instr, cur):
172 |     field_id = dex.field_id(instr.args[2])
173 |     return cur.assignFromDesc(instr.args[0], field_id.desc)
174 | def visitStaticGet(dex, instr, cur):
175 |     field_id = dex.field_id(instr.args[1])
176 |     return cur.assignFromDesc(instr.args[0], field_id.desc)
177 | 
178 | def visitUnaryOp(dex, instr, cur):
179 |     _, _, st = mathops.UNARY[instr.opcode]
180 |     if scalars.iswide(st):
181 |         return cur.assign2(instr.args[0], st)
182 |     else:
183 |         return cur.assign(instr.args[0], st)
184 | 
185 | def visitBinaryOp(dex, instr, cur):
186 |     _, st, _ = mathops.BINARY[instr.opcode]
187 |     if scalars.iswide(st):
188 |         return cur.assign2(instr.args[0], st)
189 |     else:
190 |         return cur.assign(instr.args[0], st)
191 | 
192 | FUNCS = {
193 |     dalvik.ConstString: visitRetObj,
194 |     dalvik.ConstClass: visitRetObj,
195 |     dalvik.NewInstance: visitRetObj,
196 |     dalvik.InstanceOf: visitRetInt,
197 |     dalvik.ArrayLen: visitRetInt,
198 |     dalvik.Cmp: visitRetInt,
199 |     dalvik.BinaryOpConst: visitRetInt,
200 | 
201 |     dalvik.Move: visitMove,
202 |     dalvik.MoveWide: visitMoveWide,
203 |     dalvik.MoveResult: visitMoveResult,
204 |     dalvik.Const32: visitConst32,
205 |     dalvik.Const64: visitConst64,
206 |     dalvik.CheckCast: visitCheckCast,
207 |     dalvik.NewArray: visitNewArray,
208 |     dalvik.ArrayGet: visitArrayGet,
209 |     dalvik.InstanceGet: visitInstanceGet,
210 |     dalvik.StaticGet: visitStaticGet,
211 |     dalvik.UnaryOp: visitUnaryOp,
212 |     dalvik.BinaryOp: visitBinaryOp,
213 | }
214 | 
215 | CONTROL_FLOW_OPS = {dalvik.Goto, dalvik.If, dalvik.IfZ, dalvik.Switch}
216 | 
217 | def doInference(dex, method, code, bytecode, instr_d):
218 |     # get exception handlers
219 |     all_handlers = collections.defaultdict(list)
220 |     for tryi in code.tries:
221 |         for instr in code.bytecode:
222 |             if tryi.start < instr.pos2 and tryi.end > instr.pos:
223 |                 all_handlers[instr] += tryi.catches
224 |     all_handlers = pruneHandlers(all_handlers)
225 | 
226 |     types = {}
227 |     types[0] = fromParams(method, code.nregs)
228 |     dirty = {0}
229 | 
230 |     def doMerge(pos, new):
231 |         # prevent infinite loops
232 |         if pos not in instr_d:
233 |             return
234 | 
235 |         if pos in types:
236 |             old = types[pos]
237 |             new = merge(old, new)
238 |             if new is not old:
239 |                 types[pos] = new
240 |                 dirty.add(pos)
241 |         else:
242 |             types[pos] = new
243 |             dirty.add(pos)
244 | 
245 |     while dirty: # iterate until convergence
246 |         for instr in bytecode:
247 |             if instr.pos not in dirty:
248 |                 continue
249 | 
250 |             dirty.remove(instr.pos)
251 |             cur = types[instr.pos]
252 |             itype = instr.type
253 |             if itype in FUNCS:
254 |                 after = FUNCS[itype](dex, instr, cur)
255 |             elif itype in CONTROL_FLOW_OPS:
256 |                 # control flow - none of these are in FUNCS
257 |                 result = after = after2 = cur
258 |                 if instr.implicit_casts is not None:
259 |                     desc_ind, regs = instr.implicit_casts
260 |                     for reg in regs:
261 |                         st = cur.prims[reg] # could != OBJ if null
262 |                         at = arrays.narrow(cur.arrs[reg], arrays.fromDesc(dex.type(desc_ind)))
263 |                         result = result.assign(reg, st, at, taint=True)
264 |                     # merge into branch if op = if-nez else merge into fallthrough
265 |                     if instr.opcode == 0x39:
266 |                         after2 = result
267 |                     else:
268 |                         after = result
269 | 
270 |                 if instr.type == dalvik.Goto:
271 |                     doMerge(instr.args[0], after2)
272 |                 elif instr.type == dalvik.If:
273 |                     doMerge(instr.args[2], after2)
274 |                 elif instr.type == dalvik.IfZ:
275 |                     doMerge(instr.args[1], after2)
276 |                 elif instr.type == dalvik.Switch:
277 |                     switchdata = instr_d[instr.args[1]].switchdata
278 |                     for offset in switchdata.values():
279 |                         target = (instr.pos + offset) % (1<<32)
280 |                         doMerge(target, cur)
281 |             else:
282 |                 after = cur
283 | 
284 |             # these instructions don't fallthrough
285 |             if instr.type not in (dalvik.Return, dalvik.Throw, dalvik.Goto):
286 |                 doMerge(instr.pos2, after)
287 | 
288 |             # exception handlers
289 |             if instr in all_handlers:
290 |                 for ctype, handler in all_handlers[instr]:
291 |                     doMerge(handler, cur)
292 |     return types, all_handlers
293 | 


--------------------------------------------------------------------------------
/enjarify/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | def keysToRanges(d, limit):
16 |     starts = sorted(d)
17 |     for s, e in zip(starts, starts[1:] + [limit]):
18 |         for k in range(s, e):
19 |             d[k] = d[s]
20 |     return d
21 | 
22 | def signExtend(val, size):
23 |     if val & (1 << (size-1)):
24 |         val -= (1 << size)
25 |     return val
26 | 
27 | def s16(val):
28 |     val %= 1 << 16
29 |     if val >= 1 << 15:
30 |         val -= 1 << 16
31 |     return val
32 | 
33 | def s32(val):
34 |     val %= 1 << 32
35 |     if val >= 1 << 31:
36 |         val -= 1 << 32
37 |     return val
38 | 
39 | def s64(val):
40 |     val %= 1 << 64
41 |     if val >= 1 << 63:
42 |         val -= 1 << 64
43 |     return val
44 | 


--------------------------------------------------------------------------------