├── CONTRIBUTING.txt ├── LICENSE.txt ├── README.md ├── debug.py ├── enjarify.bat ├── enjarify.sh └── enjarify ├── __init__.py ├── byteio.py ├── dalvik.py ├── dalvikformats.py ├── flags.py ├── jvm ├── __init__.py ├── arraytypes.py ├── constantpool.py ├── constants │ ├── __init__.py │ ├── calc.py │ ├── genlookup.py │ └── lookup.py ├── error.py ├── genmathops.py ├── ir.py ├── jvmops.py ├── mathops.py ├── optimization │ ├── __init__.py │ ├── consts.py │ ├── jumps.py │ ├── options.py │ ├── registers.py │ └── stack.py ├── scalartypes.py ├── writebytecode.py ├── writeclass.py └── writeir.py ├── main.py ├── mutf8.py ├── parsedex.py ├── treelist.py ├── typeinference ├── __init__.py └── typeinference.py └── util.py /CONTRIBUTING.txt: -------------------------------------------------------------------------------- 1 | Want to contribute? Great! First, read this page (including the small print at the end). 2 | 3 | ### Before you contribute 4 | Before we can use your code, you must sign the 5 | [Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1) 6 | (CLA), which you can do online. The CLA is necessary mainly because you own the 7 | copyright to your changes, even after your contribution becomes part of our 8 | codebase, so we need your permission to use and distribute your code. We also 9 | need to be sure of various other things—for instance that you'll tell us if you 10 | know that your code infringes on other people's patents. You don't have to sign 11 | the CLA until after you've submitted your code for review and a member has 12 | approved it, but you must do it before we can put your code into our codebase. 13 | Before you start working on a larger contribution, you should get in touch with 14 | us first through the issue tracker with your idea so that we can help out and 15 | possibly guide you. Coordinating up front makes it much easier to avoid 16 | frustration later on. 17 | 18 | ### Code reviews 19 | All submissions, including submissions by project members, require review. We 20 | use Github pull requests for this purpose. 21 | 22 | ### The small print 23 | Contributions made by corporations are covered by a different agreement than 24 | the one above, the Software Grant and Corporate Contributor License Agreement. -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Introduction 2 | 3 | Enjarify is a tool for translating Dalvik bytecode to equivalent Java bytecode. This allows Java analysis tools to analyze Android applications. 4 | 5 | 6 | ### Usage and installation 7 | 8 | Enjarify is a pure python 3 application, so you can just git clone and run it. To run it directly, assuming you are in the top directory of the repository, you can just do 9 | 10 | python3 -O -m enjarify.main yourapp.apk 11 | 12 | For normal use, you'll probably want to use the wrapper scripts and set it up on your path. 13 | 14 | #### Linux 15 | 16 | For convenience, a wrapper shell script is provided, enjarify.sh. This will try to use Pypy if available, since it is faster than CPython. If you want to be able to call Enjarify from anywhere, you can create a symlink from somewhere on your PATH, such as ~/bin. To do this, assuming you are inside the top level of the repository, 17 | 18 | ln -s "$PWD/enjarify.sh" ~/bin/enjarify 19 | 20 | #### Windows 21 | 22 | A wrapper batch script, enjarify.bat, is provided. To be able to call it from anywhere, just add the root directory of the repository to your PATH. The batch script will always invoke python3 as interpreter. If you want to use pypy, just edit the script. 23 | 24 | #### Usage 25 | 26 | Assuming you set up the script on your path correctly, you can call it from anywhere by just typing enjarify, e.g. 27 | 28 | enjarify yourapp.apk 29 | 30 | The most basic form of usage is to just specify an apk file or dex file as input. If you specify a multidex apk, Enjarify will automatically translate all of the dex files and output the results in a single combined jar. If you specify a dex file, only that dex file will be translated. E.g. assuming you manually extracted the dex files you could do 31 | 32 | enjarify classes2.dex 33 | 34 | The default output file is [inputname]-enjarify.jar in the current directory. To specify the filename for the output explicitly, pass the -o or --output option. 35 | 36 | enjarify yourapp.apk -o yourapp.jar 37 | 38 | By default, Enjarify will refuse to overwrite the output file if it already exists. To overwrite the output, pass the -f or --force option. 39 | 40 | 41 | ### Why not dex2jar? 42 | 43 | Dex2jar is an older tool that also tries to translate Dalvik to Java bytecode. It works reasonably well most of the time, but a lot of obscure features or edge cases will cause it to fail or even silently produce incorrect results. By contrast, Enjarify is designed to work in as many cases as possible, even for code where Dex2jar would fail. Among other things, Enjarify correctly handles unicode class names, constants used as multiple types, implicit casts, exception handlers jumping into normal control flow, classes that reference too many constants, very long methods, exception handlers after a catchall handler, and static initial values of the wrong type. 44 | 45 | 46 | ### Limitations 47 | 48 | Currently, only version 35 dex files are supported. This means that the Java 8 related bytecode features introduced in Android N, O, and P are not supported. 49 | 50 | Enjarify does not currently translate optional metadata such as sourcefile attributes, line numbers, and annotations. 51 | 52 | Enjarify tries hard to successfully translate as many classes as possible, but there are some potential cases where it is simply not possible due to limitations in Android, Java, or both. Luckily, this only happens in contrived circumstances, so it shouldn't be a problem in practice. 53 | 54 | 55 | ### Performance tips 56 | 57 | PyPy is much faster than CPython. To install PyPy, see http://pypy.org/. Make sure you get PyPy3 rather than regular PyPy. The Linux wrapper script will automatically use the command pypy3 if available. On Windows, you'll need to edit the wrapper script yourself. 58 | 59 | By default, Enjarify runs optimizations on the bytecode which make it more readable for humans (copy propagation, unused value removal, etc.). If you don't need this, you can speed things up by disabling the optimizations with the --fast option. Note that in the very rare case where a class is too big to fit in a classfile without optimization, Enjarify will automatically retry it with all optimizations enabled, so this option does not affect the number of classes that are successfully translated. 60 | 61 | 62 | ### Disclaimer 63 | 64 | This is not an official Google product (experimental or otherwise), it is just code that happens to be owned by Google. 65 | -------------------------------------------------------------------------------- /debug.py: -------------------------------------------------------------------------------- 1 | import enjarify.main 2 | 3 | enjarify.main.main() 4 | -------------------------------------------------------------------------------- /enjarify.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | REM Copyright 2015 Google Inc. All Rights Reserved. 4 | REM 5 | REM Licensed under the Apache License, Version 2.0 (the "License"); 6 | REM you may not use this file except in compliance with the License. 7 | REM You may obtain a copy of the License at 8 | REM 9 | REM http://www.apache.org/licenses/LICENSE-2.0 10 | REM 11 | REM Unless required by applicable law or agreed to in writing, software 12 | REM distributed under the License is distributed on an "AS IS" BASIS, 13 | REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | REM See the License for the specific language governing permissions and 15 | REM limitations under the License. 16 | 17 | set PYTHONPATH=%~dp0 18 | py -3 -O -m enjarify.main %* 19 | -------------------------------------------------------------------------------- /enjarify.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2015 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # Try to find a valid python3 command, preferring pypy if available 19 | function guess { 20 | if [ -z "$PYTHON" ]; then 21 | result=$($1 -c "print(range)" 2>/dev/null) 22 | if [ "$result" = "" ]; then 23 | PYTHON=$1 24 | fi 25 | fi 26 | } 27 | 28 | guess "pypy3" 29 | guess "python3" 30 | guess "pypy" 31 | guess "python" 32 | 33 | if [ -z "$PYTHON" ]; then 34 | echo "Unable to find python3 on path" 35 | else 36 | echo "Using $PYTHON as Python interpreter" 37 | 38 | # Find location of this bash script, and set its directory as the PYTHONPATH 39 | if [[ "$OSTYPE" == "darwin"* ]]; then 40 | READLINK="readlink" 41 | else 42 | READLINK="readlink -f" 43 | fi 44 | 45 | export PYTHONPATH=$(dirname "$($READLINK "${BASH_SOURCE[0]}")") 46 | 47 | # Now execute the actual program 48 | exec $PYTHON -O -m enjarify.main "$@" 49 | fi 50 | -------------------------------------------------------------------------------- /enjarify/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /enjarify/byteio.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import struct 16 | 17 | from .util import signExtend 18 | 19 | class Reader: 20 | def __init__(self, data, pos=0): 21 | self.data = data 22 | self.pos = pos 23 | 24 | def read(self, size): 25 | if not 0 <= size <= len(self.data) - self.pos: 26 | raise IndexError 27 | result = self.data[self.pos: self.pos+size] 28 | self.pos += size 29 | return result 30 | 31 | def _unpack(self, fmt): 32 | fmt = struct.Struct(fmt) 33 | return fmt.unpack_from(self.read(fmt.size))[0] 34 | 35 | def u8(self): return self.read(1)[0] 36 | def u16(self): return self._unpack('> 7: 44 | result ^= (self.data[self.pos] & 0x7f) << size 45 | size += 7 46 | self.pos += 1 47 | result ^= (self.data[self.pos] & 0x7f) << size 48 | size += 7 49 | self.pos += 1 50 | 51 | if signed: 52 | result = signExtend(result, size) 53 | return result 54 | 55 | def uleb128(self): return self._leb128() 56 | def sleb128(self): return self._leb128(signed=True) 57 | 58 | # Maintain strings in binary encoding instead of attempting to decode them 59 | # since the output will be using the same encoding anyway 60 | def readCStr(self): 61 | oldpos, self.pos = self.pos, self.data.find(b'\0', self.pos) 62 | return self.data[oldpos:self.pos] 63 | 64 | class Writer: 65 | def __init__(self): 66 | self.buf = bytearray() 67 | 68 | def write(self, s): 69 | self.buf += s 70 | 71 | def _pack(self, fmt, arg): 72 | return self.write(struct.pack(fmt, arg)) 73 | 74 | def u8(self, x): return self.write(bytes([x])) 75 | def u16(self, x): return self._pack('>H', x) 76 | def u32(self, x): return self._pack('>I', x) 77 | def u64(self, x): return self._pack('>Q', x) 78 | 79 | def toBytes(self): 80 | return bytes(self.buf) 81 | -------------------------------------------------------------------------------- /enjarify/dalvik.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import dalvikformats 16 | from . import util 17 | 18 | class DalvikInstruction: 19 | def __init__(self, type_, pos, newpos, opcode, args): 20 | self.type = type_ 21 | self.pos = pos 22 | self.pos2 = newpos 23 | self.opcode = opcode 24 | self.args = args 25 | 26 | self.implicit_casts = None 27 | self.prev_result = None # for move-result/exception 28 | self.fillarrdata = None 29 | self.switchdata = None 30 | 31 | _it = iter(range(999)) 32 | Nop = next(_it) 33 | Move = next(_it) 34 | MoveWide = next(_it) 35 | MoveResult = next(_it) 36 | Return = next(_it) 37 | Const32 = next(_it) 38 | Const64 = next(_it) 39 | ConstString = next(_it) 40 | ConstClass = next(_it) 41 | MonitorEnter = next(_it) 42 | MonitorExit = next(_it) 43 | CheckCast = next(_it) 44 | InstanceOf = next(_it) 45 | ArrayLen = next(_it) 46 | NewInstance = next(_it) 47 | NewArray = next(_it) 48 | FilledNewArray = next(_it) 49 | FillArrayData = next(_it) 50 | Throw = next(_it) 51 | Goto = next(_it) 52 | Switch = next(_it) 53 | Cmp = next(_it) 54 | If = next(_it) 55 | IfZ = next(_it) 56 | 57 | ArrayGet = next(_it) 58 | ArrayPut = next(_it) 59 | InstanceGet = next(_it) 60 | InstancePut = next(_it) 61 | StaticGet = next(_it) 62 | StaticPut = next(_it) 63 | 64 | # Invoke = next(_it) 65 | InvokeVirtual = next(_it) 66 | InvokeSuper = next(_it) 67 | InvokeDirect = next(_it) 68 | InvokeStatic = next(_it) 69 | InvokeInterface = next(_it) 70 | 71 | # actual ops for these are defined in jvm/mathops.py 72 | UnaryOp = next(_it) 73 | BinaryOp = next(_it) 74 | BinaryOpConst = next(_it) 75 | 76 | INVOKE_TYPES = InvokeVirtual, InvokeSuper, InvokeDirect, InvokeStatic, InvokeInterface 77 | 78 | # instructions which Dalvik considers to throw 79 | THROW_TYPES = INVOKE_TYPES + (ConstString, ConstClass, MonitorEnter, MonitorExit, CheckCast, InstanceOf, ArrayLen, NewArray, NewInstance, FilledNewArray, FillArrayData, Throw, ArrayGet, ArrayPut, InstanceGet, InstancePut, StaticGet, StaticPut, BinaryOp, BinaryOpConst) 80 | # last two only if it is int/long div or rem 81 | 82 | # ignore the possiblity of linkage errors (i.e. constants and instanceof can't throw) 83 | # in theory MonitorExit can't throw either due to the structured locking checks, but these are broken and work inconsistently 84 | PRUNED_THROW_TYPES = INVOKE_TYPES + (MonitorEnter, MonitorExit, CheckCast, ArrayLen, NewArray, NewInstance, FilledNewArray, FillArrayData, Throw, ArrayGet, ArrayPut, InstanceGet, InstancePut, StaticGet, StaticPut, BinaryOp, BinaryOpConst) 85 | 86 | OPCODES = util.keysToRanges({ 87 | 0x00: Nop, 88 | 0x01: Move, 89 | 0x04: MoveWide, 90 | 0x07: Move, 91 | 0x0a: MoveResult, 92 | 0x0e: Return, 93 | 0x12: Const32, 94 | 0x16: Const64, 95 | 0x1a: ConstString, 96 | 0x1c: ConstClass, 97 | 0x1d: MonitorEnter, 98 | 0x1e: MonitorExit, 99 | 0x1f: CheckCast, 100 | 0x20: InstanceOf, 101 | 0x21: ArrayLen, 102 | 0x22: NewInstance, 103 | 0x23: NewArray, 104 | 0x24: FilledNewArray, 105 | 0x26: FillArrayData, 106 | 0x27: Throw, 107 | 0x28: Goto, 108 | 0x2b: Switch, 109 | 0x2d: Cmp, 110 | 0x32: If, 111 | 0x38: IfZ, 112 | 0x3e: Nop, # unused 113 | 0x44: ArrayGet, 114 | 0x4b: ArrayPut, 115 | 0x52: InstanceGet, 116 | 0x59: InstancePut, 117 | 0x60: StaticGet, 118 | 0x67: StaticPut, 119 | 0x6e: InvokeVirtual, 120 | 0x6f: InvokeSuper, 121 | 0x70: InvokeDirect, 122 | 0x71: InvokeStatic, 123 | 0x72: InvokeInterface, 124 | 0x73: Nop, # unused 125 | 0x74: InvokeVirtual, 126 | 0x75: InvokeSuper, 127 | 0x76: InvokeDirect, 128 | 0x77: InvokeStatic, 129 | 0x78: InvokeInterface, 130 | 0x79: Nop, # unused 131 | 0x7b: UnaryOp, 132 | 0x90: BinaryOp, 133 | 0xd0: BinaryOpConst, 134 | 0xe3: Nop, # unused 135 | }, 256) 136 | 137 | 138 | def parseInstruction(dex, insns_start_pos, shorts, pos): 139 | word = shorts[pos] 140 | opcode = word & 0xFF 141 | newpos, args = dalvikformats.decode(shorts, pos, opcode) 142 | 143 | # parse special data instructions 144 | switchdata = None 145 | fillarrdata = None 146 | if word == 0x100 or word == 0x200: #switch 147 | size = shorts[pos+1] 148 | st = dex.stream(insns_start_pos + pos*2 + 4) 149 | 150 | if word == 0x100: #packed 151 | first_key = st.u32() 152 | targets = [st.u32() for _ in range(size)] 153 | newpos = pos + 2 + (1 + size)*2 154 | switchdata = {(i+first_key):x for i,x in enumerate(targets)} 155 | else: #sparse 156 | keys = [st.u32() for _ in range(size)] 157 | targets = [st.u32() for _ in range(size)] 158 | newpos = pos + 2 + (size + size)*2 159 | switchdata = dict(zip(keys, targets)) 160 | 161 | if word == 0x300: 162 | width = shorts[pos+1] % 16 163 | size = shorts[pos+2] ^ (shorts[pos+3] << 16) 164 | newpos = pos + ((size * width + 1) // 2 + 4) 165 | # get array data 166 | stream = dex.stream(insns_start_pos + pos*2 + 8) 167 | func = { 168 | 1: stream.u8, 169 | 2: stream.u16, 170 | 4: stream.u32, 171 | 8: stream.u64 172 | }[width] 173 | fillarrdata = width, [func() for _ in range(size)] 174 | 175 | # warning, this must go below the special data handling that calculates newpos 176 | instruction = DalvikInstruction(OPCODES[opcode], pos, newpos, opcode, args) 177 | instruction.fillarrdata = fillarrdata 178 | instruction.switchdata = switchdata 179 | 180 | return newpos, instruction 181 | 182 | def parseBytecode(dex, insns_start_pos, shorts, catch_addrs): 183 | ops = [] 184 | pos = 0 185 | while pos < len(shorts): 186 | pos, op = parseInstruction(dex, insns_start_pos, shorts, pos) 187 | ops.append(op) 188 | 189 | # Fill in data for move-result 190 | for instr, instr2 in zip(ops, ops[1:]): 191 | if not instr2.type == MoveResult: 192 | continue 193 | if instr.type in INVOKE_TYPES: 194 | called_id = dex.method_id(instr.args[0]) 195 | if called_id.return_type != b'V': 196 | instr2.prev_result = called_id.return_type 197 | elif instr.type == FilledNewArray: 198 | instr2.prev_result = dex.type(instr.args[0]) 199 | elif instr2.pos in catch_addrs: 200 | instr2.prev_result = b'Ljava/lang/Throwable;' 201 | assert 0 not in catch_addrs 202 | 203 | # Fill in implicit cast data 204 | for i, instr in enumerate(ops): 205 | if instr.opcode in (0x38, 0x39): # if-eqz, if-nez 206 | if i > 0 and ops[i-1].type == InstanceOf: 207 | prev = ops[i-1] 208 | desc_ind = prev.args[2] 209 | regs = {prev.args[1]} 210 | 211 | if i > 1 and ops[i-2].type == Move: 212 | prev2 = ops[i-2] 213 | if prev2.args[0] == prev.args[1]: 214 | regs.add(prev2.args[1]) 215 | # Don't cast result of instanceof if it overwrites the input 216 | regs.discard(prev.args[0]) 217 | if regs: 218 | instr.implicit_casts = desc_ind, sorted(regs) 219 | return ops 220 | -------------------------------------------------------------------------------- /enjarify/dalvikformats.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import util 16 | 17 | # Code for parsing the various Dalvik opcode formats 18 | INSTRUCTION_FORMAT = util.keysToRanges({ 19 | 0x00: '10x', 20 | 0x01: '12x', 21 | 0x02: '22x', 22 | 0x03: '32x', 23 | 0x04: '12x', 24 | 0x05: '22x', 25 | 0x06: '32x', 26 | 0x07: '12x', 27 | 0x08: '22x', 28 | 0x09: '32x', 29 | 0x0a: '11x', 30 | 0x0b: '11x', 31 | 0x0c: '11x', 32 | 0x0d: '11x', 33 | 0x0e: '10x', 34 | 0x0f: '11x', 35 | 0x10: '11x', 36 | 0x11: '11x', 37 | 0x12: '11n', 38 | 0x13: '21s', 39 | 0x14: '31i', 40 | 0x15: '21h', 41 | 0x16: '21s', 42 | 0x17: '31i', 43 | 0x18: '51l', 44 | 0x19: '21h', 45 | 0x1a: '21c', 46 | 0x1b: '31c', 47 | 0x1c: '21c', 48 | 0x1d: '11x', 49 | 0x1e: '11x', 50 | 0x1f: '21c', 51 | 0x20: '22c', 52 | 0x21: '12x', 53 | 0x22: '21c', 54 | 0x23: '22c', 55 | 0x24: '35c', 56 | 0x25: '3rc', 57 | 0x26: '31t', 58 | 0x27: '11x', 59 | 0x28: '10t', 60 | 0x29: '20t', 61 | 0x2a: '30t', 62 | 0x2b: '31t', 63 | 0x2c: '31t', 64 | 0x2d: '23x', 65 | 0x32: '22t', 66 | 0x38: '21t', 67 | 0x3e: '10x', 68 | 0x44: '23x', 69 | 0x52: '22c', 70 | 0x60: '21c', 71 | 0x6e: '35c', 72 | 0x73: '10x', 73 | 0x74: '3rc', 74 | 0x79: '10x', 75 | 0x7b: '12x', 76 | 0x90: '23x', 77 | 0xb0: '12x', 78 | 0xd0: '22s', 79 | 0xd8: '22b', 80 | 0xe3: '10x', 81 | }, 256) 82 | 83 | # parsing funcs 84 | def p00op(w): return [] 85 | def pBAop(w): return [(w >> 8) & 0xF, w >> 12] 86 | def pAAop(w): return [w >> 8] 87 | def p00opAAAA(w, w2): return [w2] 88 | def pAAopBBBB(w, w2): return [w >> 8, w2] 89 | def pAAopCCBB(w, w2): return [w >> 8, w2 & 0xFF, w2 >> 8] 90 | def pBAopCCCC(w, w2): return [(w >> 8) & 0xF, w >> 12, w2] 91 | def p00opAAAAAAAA(w, w2, w3): return [w2 ^ (w3 << 16)] 92 | def p00opAAAABBBB(w, w2, w3): return [w2, w3] 93 | def pAAopBBBBBBBB(w, w2, w3): return [w >> 8, w2 ^ (w3 << 16)] 94 | 95 | def pAGopBBBBFEDC(w, w2, w3): 96 | a = w >> 12 97 | c, d, e, f = (w3) & 0xF, (w3 >> 4) & 0xF, (w3 >> 8) & 0xF, (w3 >> 12) & 0xF 98 | g = (w >> 8) & 0xF 99 | return [w2, [c, d, e, f, g][:a]] 100 | 101 | def pAAopBBBBCCCC(w, w2, w3): 102 | a = w >> 8 103 | return [w2, range(w3, w3+a)] 104 | 105 | def pAAopBBBBBBBBBBBBBBBB(w, w2, w3, w4, w5): 106 | b = w2 ^ (w3 << 16) ^ (w4 << 32) ^ (w5 << 48) 107 | return [w >> 8, b] 108 | 109 | _FUNC = { 110 | '10x': p00op, 111 | '12x': pBAop, 112 | '11n': pBAop, 113 | '11x': pAAop, 114 | '10t': pAAop, 115 | '20t': p00opAAAA, 116 | '22x': pAAopBBBB, 117 | '21t': pAAopBBBB, 118 | '21s': pAAopBBBB, 119 | '21h': pAAopBBBB, 120 | '21c': pAAopBBBB, 121 | '23x': pAAopCCBB, 122 | '22b': pAAopCCBB, 123 | '22t': pBAopCCCC, 124 | '22s': pBAopCCCC, 125 | '22c': pBAopCCCC, 126 | '30t': p00opAAAAAAAA, 127 | '32x': p00opAAAABBBB, 128 | '31i': pAAopBBBBBBBB, 129 | '31t': pAAopBBBBBBBB, 130 | '31c': pAAopBBBBBBBB, 131 | '35c': pAGopBBBBFEDC, 132 | '3rc': pAAopBBBBCCCC, 133 | '51l': pAAopBBBBBBBBBBBBBBBB, 134 | } 135 | 136 | def sign(x, bits): 137 | if x >= (1 << (bits-1)): 138 | x -= 1 << bits 139 | return x 140 | 141 | def decode(shorts, pos, opcode): 142 | fmt = INSTRUCTION_FORMAT[opcode] 143 | size = int(fmt[0]) 144 | results = _FUNC[fmt](*shorts[pos:pos+size]) 145 | # Check if we need to sign extend 146 | if fmt[2] == 'n': 147 | results[-1] = sign(results[-1], 4) 148 | elif fmt[2] == 'b' or (fmt[2] == 't' and size == 1): 149 | results[-1] = sign(results[-1], 8) 150 | elif fmt[2] == 's' or (fmt[2] == 't' and size == 2): 151 | results[-1] = sign(results[-1], 16) 152 | elif fmt[2] == 't' and size == 3: 153 | results[-1] = sign(results[-1], 32) 154 | 155 | # Hats depend on actual size expected, so we rely on opcode as a hack 156 | if fmt[2] == 'h': 157 | assert opcode == 0x15 or opcode == 0x19 158 | results[-1] = results[-1] << (16 if opcode == 0x15 else 48) 159 | 160 | # Convert code offsets to actual code position 161 | if fmt[2] == 't': 162 | results[-1] += pos 163 | return pos + size, results 164 | -------------------------------------------------------------------------------- /enjarify/flags.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ACC_PUBLIC = 0x1 16 | ACC_PRIVATE = 0x2 17 | ACC_PROTECTED = 0x4 18 | ACC_STATIC = 0x8 19 | ACC_FINAL = 0x10 20 | ACC_SYNCHRONIZED = 0x20 21 | ACC_VOLATILE = 0x40 22 | ACC_BRIDGE = 0x40 23 | ACC_TRANSIENT = 0x80 24 | ACC_VARARGS = 0x80 25 | ACC_NATIVE = 0x100 26 | ACC_INTERFACE = 0x200 27 | ACC_ABSTRACT = 0x400 28 | ACC_STRICT = 0x800 29 | ACC_SYNTHETIC = 0x1000 30 | ACC_ANNOTATION = 0x2000 31 | ACC_ENUM = 0x4000 32 | ACC_CONSTRUCTOR = 0x10000 33 | ACC_DECLARED_SYNCHRONIZED = 0x20000 34 | 35 | # Might as well include this for completeness even though modern JVMs ignore it 36 | ACC_SUPER = 0x20 37 | 38 | CLASS_FLAGS = ACC_PUBLIC | ACC_FINAL | ACC_SUPER | ACC_INTERFACE | ACC_ABSTRACT | ACC_SYNTHETIC | ACC_ANNOTATION | ACC_ENUM 39 | FIELD_FLAGS = ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL | ACC_VOLATILE | ACC_TRANSIENT | ACC_SYNTHETIC | ACC_ENUM 40 | METHOD_FLAGS = ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL | ACC_SYNCHRONIZED | ACC_BRIDGE | ACC_VARARGS | ACC_NATIVE | ACC_ABSTRACT | ACC_STRICT | ACC_SYNTHETIC 41 | -------------------------------------------------------------------------------- /enjarify/jvm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /enjarify/jvm/arraytypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import scalartypes as scalars 16 | 17 | # Array type inference - 18 | # For object arrays, we don't actually care which type of object it is, so we just 19 | # use a single value for them (INVALID) and assume all such values are an object 20 | # array of some type. For primative arrays, we just use the entire array descriptor 21 | # e.g. b'[[[C', except that bool arrays are treated as byte arrays. 22 | # For null we use a special marker object 23 | 24 | # These strings can't be valid descriptors so there's no conflict 25 | INVALID = b'INVALID' 26 | NULL = b'NULL' 27 | 28 | def merge(t1, t2): 29 | if t1 is NULL: 30 | return t2 31 | if t2 is NULL: 32 | return t1 33 | return t1 if (t1 == t2) else INVALID 34 | 35 | # intersect types 36 | def narrow(t1, t2): 37 | if t1 is INVALID: 38 | return t2 39 | if t2 is INVALID: 40 | return t1 41 | return t1 if (t1 == t2) else NULL 42 | 43 | def eletPair(t): 44 | assert t is not NULL 45 | if t is INVALID: 46 | return scalars.OBJ, t 47 | 48 | assert t.startswith(b'[') 49 | t = t[1:] 50 | return scalars.fromDesc(t), t 51 | 52 | def fromDesc(desc): 53 | if not desc.startswith(b'[') or desc.endswith(b';'): 54 | return INVALID 55 | return desc 56 | -------------------------------------------------------------------------------- /enjarify/jvm/constantpool.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import struct 16 | 17 | from . import error 18 | 19 | CONSTANT_Class = 7 20 | CONSTANT_Fieldref = 9 21 | CONSTANT_Methodref = 10 22 | CONSTANT_InterfaceMethodref = 11 23 | CONSTANT_String = 8 24 | CONSTANT_Integer = 3 25 | CONSTANT_Float = 4 26 | CONSTANT_Long = 5 27 | CONSTANT_Double = 6 28 | CONSTANT_NameAndType = 12 29 | CONSTANT_Utf8 = 1 30 | # CONSTANT_MethodHandle = 15 31 | # CONSTANT_MethodType = 16 32 | # CONSTANT_InvokeDynamic = 18 33 | MAX_CONST = CONSTANT_NameAndType 34 | 35 | def _width(tag): 36 | return 2 if tag in (CONSTANT_Long, CONSTANT_Double) else 1 37 | 38 | class ConstantPoolBase: 39 | def __init__(self): 40 | # lookup dicts for deduplicating constants 41 | self.lookup = [{} for _ in range(MAX_CONST + 1)] 42 | 43 | def _get(self, tag, args): 44 | d = self.lookup[tag] 45 | try: 46 | return d[args] 47 | except KeyError: 48 | low = tag in (CONSTANT_Integer, CONSTANT_Float, CONSTANT_String) 49 | d[args] = index = self._getInd(low, _width(tag)) 50 | 51 | assert self.vals[index] is None 52 | self.vals[index] = tag, args 53 | return d[args] 54 | 55 | def insertDirectly(self, pair, low): 56 | tag, x = pair 57 | d = self.lookup[tag] 58 | d[x] = index = self._getInd(low, _width(tag)) 59 | self.vals[index] = pair 60 | 61 | def tryGet(self, pair): 62 | tag, x = pair 63 | d = self.lookup[tag] 64 | try: 65 | return d[x] 66 | except KeyError: 67 | pass 68 | width = _width(tag) 69 | if width > self.space(): 70 | return None 71 | d[x] = index = self._getInd(True, width) 72 | self.vals[index] = pair 73 | return index 74 | 75 | def utf8(self, s): 76 | assert isinstance(s, bytes) 77 | return self._get(CONSTANT_Utf8, s) 78 | 79 | def class_(self, s): return self._get(CONSTANT_Class, self.utf8(s)) 80 | def string(self, s): return self._get(CONSTANT_String, self.utf8(s)) 81 | 82 | def nat(self, name, desc): 83 | return self._get(CONSTANT_NameAndType, (self.utf8(name), self.utf8(desc))) 84 | 85 | def _triple(self, tag, trip): 86 | return self._get(tag, (self.class_(trip[0]), self.nat(trip[1], trip[2]))) 87 | 88 | def field(self, trip): return self._triple(CONSTANT_Fieldref, trip) 89 | def method(self, trip): return self._triple(CONSTANT_Methodref, trip) 90 | def imethod(self, trip): return self._triple(CONSTANT_InterfaceMethodref, trip) 91 | 92 | def int(self, x): return self._get(CONSTANT_Integer, x) 93 | def float(self, x): return self._get(CONSTANT_Float, x) 94 | def long(self, x): return self._get(CONSTANT_Long, x) 95 | def double(self, x): return self._get(CONSTANT_Double, x) 96 | 97 | def _writeEntry(self, stream, item): 98 | if item is None: 99 | return 100 | tag, val = item 101 | stream.u8(tag) 102 | 103 | if tag == CONSTANT_Utf8: 104 | stream.u16(len(val)) 105 | stream.write(val) 106 | elif tag in (CONSTANT_Integer, CONSTANT_Float): 107 | stream.u32(val) 108 | elif tag in (CONSTANT_Long, CONSTANT_Double): 109 | stream.u64(val) 110 | elif tag in (CONSTANT_Class, CONSTANT_String): 111 | stream.u16(val) 112 | else: 113 | stream.u16(val[0]) 114 | stream.u16(val[1]) 115 | 116 | # A simple constant pool that just allocates slots in increasing order. 117 | class SimpleConstantPool(ConstantPoolBase): 118 | def __init__(self): 119 | super().__init__() 120 | self.vals = [None] 121 | 122 | def space(self): return 65535 - len(self.vals) 123 | def lowspace(self): return 256 - len(self.vals) 124 | 125 | def _getInd(self, low, width): 126 | if self.space() < width: 127 | raise error.ClassfileLimitExceeded() 128 | temp = len(self.vals) 129 | self.vals += [None]*width 130 | return temp 131 | 132 | def write(self, stream): 133 | stream.u16(len(self.vals)) 134 | for item in self.vals: 135 | self._writeEntry(stream, item) 136 | 137 | # Constant pool slots 1-255 are special because they can be referred to by the 138 | # two byte ldc instruction (as opposed to 3 byte ldc_w/ldc2_w). Therefore, it is 139 | # desireable to allocate constants which could use ldc in the first 255 slots, 140 | # while not wasting these valuable low slots with pool entries that can't use 141 | # ldc (utf8s, longs, etc.) 142 | # One possible approach is to allocate the ldc entries starting at 1 and the 143 | # others starting at 256, (possibly leaving a gap if there are less than 255 of 144 | # the former). However, this is not ideal because the empty slots are not 145 | # continguous. This means that you could end up in the sitatuation where there 146 | # are exactly two free slots and you wish to add a long/double entry but the 147 | # free slots are not continguous. 148 | # To solve this, we take a different approach - always create the pool as the 149 | # largest possible size (65534 entries) and allocate the non-ldc constants 150 | # starting from the highest index and counting down. This ensures that the free 151 | # slots are always contiguous. Since the classfile representation doesn't 152 | # actually allow gaps like that, the empty spaces if any are filled in with 153 | # dummy entries at the end. 154 | # For simplicity, we always allocate ints, floats, and strings in the low entries 155 | # and everything else in the high entries, regardless of whether they are actaully 156 | # referenced by a ldc or not. (see ConstantPoolBase._get) 157 | 158 | # Fill in unused space with shortest possible item (Utf8 ''), preencoded for efficiency 159 | PLACEHOLDER_ENTRY = struct.pack('>BH', CONSTANT_Utf8, 0) 160 | class SplitConstantPool(ConstantPoolBase): 161 | def __init__(self): 162 | super().__init__() 163 | self.vals = [None]*65535 164 | self.bot = 1 165 | self.top = len(self.vals) 166 | 167 | def space(self): return self.top - self.bot 168 | def lowspace(self): return 256 - self.bot 169 | 170 | def _getInd(self, low, width): 171 | if self.space() < width: 172 | raise error.ClassfileLimitExceeded() 173 | if low: 174 | self.bot += width 175 | return self.bot - width 176 | self.top -= width 177 | return self.top 178 | 179 | def write(self, stream): 180 | stream.u16(len(self.vals)) 181 | 182 | assert self.bot <= self.top 183 | for item in self.vals[:self.bot]: 184 | self._writeEntry(stream, item) 185 | 186 | stream.write(PLACEHOLDER_ENTRY * self.space()) 187 | 188 | for item in self.vals[self.top:]: 189 | self._writeEntry(stream, item) 190 | -------------------------------------------------------------------------------- /enjarify/jvm/constants/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /enjarify/jvm/constants/calc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ...util import s16, s32, s64 16 | from .. import scalartypes as scalars 17 | from ..jvmops import * 18 | 19 | from . import lookup 20 | from .genlookup import FLOAT_SIGN, FLOAT_INF, FLOAT_NINF, FLOAT_NAN, DOUBLE_SIGN, DOUBLE_INF, DOUBLE_NINF, DOUBLE_NAN 21 | 22 | # Calculate a sequence of bytecode instructions to generate the given constant 23 | # to be used in the rare case that the constant pool is full. 24 | 25 | # NaN has multiple representations, so normalize Floats to a single NaN representation 26 | def normalizeFloat(x): 27 | x %= 1<<32 28 | if x | FLOAT_SIGN > FLOAT_NINF: 29 | return FLOAT_NAN 30 | return x 31 | 32 | def normalizeDouble(x): 33 | x %= 1<<64 34 | if x | DOUBLE_SIGN > DOUBLE_NINF: 35 | return DOUBLE_NAN 36 | return x 37 | 38 | def _calcInt(x): 39 | assert x == s32(x) 40 | if x in lookup.INTS: 41 | return lookup.INTS[x] 42 | 43 | # max required - 10 bytes 44 | # (high << 16) ^ low 45 | low = s16(x) 46 | high = (x ^ low) >> 16 47 | assert high 48 | if not low: 49 | return _calcInt(high) + _calcInt(16) + bytes([ISHL]) 50 | return _calcInt(high) + _calcInt(16) + bytes([ISHL]) + _calcInt(low) + bytes([IXOR]) 51 | 52 | def _calcLong(x): 53 | assert x == s64(x) 54 | if x in lookup.LONGS: 55 | return lookup.LONGS[x] 56 | 57 | # max required - 26 bytes 58 | # (high << 32) ^ low 59 | low = s32(x) 60 | high = (x ^ low) >> 32 61 | if not high: 62 | return _calcInt(low) + bytes([I2L]) 63 | 64 | result = _calcInt(high) + bytes([I2L]) + _calcInt(32) + bytes([LSHL]) 65 | if low: 66 | result += _calcInt(low) + bytes([I2L, LXOR]) 67 | return result 68 | 69 | def _calcFloat(x): 70 | assert x == normalizeFloat(x) 71 | if x in lookup.FLOATS: 72 | return lookup.FLOATS[x] 73 | 74 | # max required - 27 bytes 75 | exponent = ((x >> 23) & 0xFF) - 127 76 | mantissa = x % (1<<23) 77 | # check for denormals! 78 | if exponent == -127: 79 | exponent += 1 80 | else: 81 | mantissa += 1<<23 82 | exponent -= 23 83 | 84 | if x & FLOAT_SIGN: 85 | mantissa = -mantissa 86 | 87 | ex_combine_op = FDIV if exponent < 0 else FMUL 88 | exponent = abs(exponent) 89 | exponent_parts = bytearray() 90 | while exponent >= 63: # max 2 iterations since -149 <= exp <= 104 91 | exponent_parts.extend([LCONST_1, ICONST_M1, LSHL, L2F, ex_combine_op]) 92 | mantissa = -mantissa 93 | exponent -= 63 94 | 95 | if exponent > 0: 96 | exponent_parts.append(LCONST_1) 97 | exponent_parts.extend(_calcInt(exponent)) 98 | exponent_parts.extend([LSHL, L2F, ex_combine_op]) 99 | return _calcInt(mantissa) + bytes([I2F]) + exponent_parts 100 | 101 | def _calcDouble(x): 102 | assert x == normalizeDouble(x) 103 | if x in lookup.DOUBLES: 104 | return lookup.DOUBLES[x] 105 | 106 | # max required - 55 bytes 107 | exponent = ((x >> 52) & 0x7FF) - 1023 108 | mantissa = x % (1<<52) 109 | # check for denormals! 110 | if exponent == -1023: 111 | exponent += 1 112 | else: 113 | mantissa += 1<<52 114 | exponent -= 52 115 | 116 | if x & DOUBLE_SIGN: 117 | mantissa = -mantissa 118 | 119 | abs_exponent = abs(exponent) 120 | exponent_parts = bytearray() 121 | 122 | part63 = abs_exponent // 63 123 | if part63: #create *63 part of exponent by repeated squaring 124 | # use 2^-x instead of calculating 2^x and dividing to avoid overflow in 125 | # case we need 2^-1071 126 | if exponent < 0: # -2^-63 127 | exponent_parts.extend([DCONST_1, LCONST_1, ICONST_M1, LSHL, L2D, DDIV]) 128 | else: # -2^63 129 | exponent_parts.extend([LCONST_1, ICONST_M1, LSHL, L2D]) 130 | # adjust sign of mantissa for odd powers since we're actually using -2^63 rather than positive 131 | if part63 & 1: 132 | mantissa = -mantissa 133 | 134 | last_needed = part63 & 1 135 | stack = [1] # Not actually required to compute the results - it's just used for a sanity check 136 | for bi in range(1, part63.bit_length()): 137 | exponent_parts.append(DUP2) 138 | stack.append(stack[-1]) 139 | if last_needed: 140 | exponent_parts.append(DUP2) 141 | stack.append(stack[-1]) 142 | exponent_parts.append(DMUL) 143 | stack.append(stack.pop() + stack.pop()) 144 | last_needed = part63 & (1< sipush 128 26 | # -65535 -> iconst_m1 i2c ineg 27 | # 2147483647 -> iconst_m1 iconst_m1 iushr 28 | # 1L -> lconst_1 29 | # 127L -> bipush 127 i2l 30 | # 42.0f -> bipush 42 i2f 31 | # -Inf -> dconst_1 dneg dconst_0 ddiv 32 | # 33 | # Lookup table keys are s32/s64 for ints/longs and u32/u64 for floats/doubles 34 | # There are multiple NaN representations, so we normalize NaNs to the 35 | # representation of all 1s (e.g. float NaN = 0xFFFFFFFF) 36 | 37 | def u32(x): return x % (1<<32) 38 | def u64(x): return x % (1<<64) 39 | 40 | FLOAT_SIGN = 1<<31 41 | FLOAT_NAN = u32(-1) 42 | FLOAT_INF = 0xFF << 23 43 | FLOAT_NINF = FLOAT_INF ^ FLOAT_SIGN 44 | def i2f(x): 45 | if x == 0: 46 | return 0 47 | if x < 0: 48 | return i2f(-x) ^ FLOAT_SIGN 49 | shift = 24 - x.bit_length() 50 | # Don't bother implementing rounding since we'll only convert small ints 51 | # that can be exactly represented anyway 52 | assert shift >= 0 53 | mantissa = x << shift 54 | exponent = shift + 127 55 | return (exponent << 23) | mantissa 56 | 57 | DOUBLE_SIGN = 1<<63 58 | DOUBLE_NAN = u64(-1) 59 | DOUBLE_INF = 0x7FF << 52 60 | DOUBLE_NINF = DOUBLE_INF ^ DOUBLE_SIGN 61 | def i2d(x): 62 | if x == 0: 63 | return 0 64 | if x < 0: 65 | return i2d(-x) ^ DOUBLE_SIGN 66 | shift = 53 - x.bit_length() 67 | assert shift >= 0 68 | mantissa = x << shift 69 | exponent = shift + 1023 70 | return (exponent << 52) | mantissa 71 | 72 | # add if value is shorter then current best 73 | def add(d, k, v): 74 | if k not in d or len(v) < len(d[k]): 75 | d[k] = v 76 | 77 | if __name__ == "__main__": 78 | # int constants 79 | all_ints = {} 80 | 81 | # 1 byte ints 82 | for i in range(-1, 6): 83 | add(all_ints, i, bytes([ICONST_0 + i])) 84 | # Sort for determinism. Otherwise -0x80000000 could be either 85 | # 1 << -1 or -1 << -1, for example 86 | int_1s = sorted({k for k,v in all_ints.items() if len(v) == 1}) 87 | 88 | # 2 byte ints 89 | for i in range(-128, 128): 90 | add(all_ints, i, struct.pack('>Bb', BIPUSH, i)) 91 | for i in int_1s: 92 | add(all_ints, i % 65536, all_ints[i] + bytes([I2C])) 93 | int_2s = sorted({k for k,v in all_ints.items() if len(v) == 2}) 94 | 95 | # 3 byte ints 96 | for i in range(-32768, 32768): 97 | add(all_ints, i, struct.pack('>Bh', SIPUSH, i)) 98 | for i in int_2s: 99 | add(all_ints, i % 65536, all_ints[i] + bytes([I2C])) 100 | add(all_ints, s32(-i), all_ints[i] + bytes([INEG])) 101 | for x, y in itertools.product(int_1s, int_1s): 102 | add(all_ints, s32(x << (y % 32)), all_ints[x] + all_ints[y] + bytes([ISHL])) 103 | add(all_ints, s32(x >> (y % 32)), all_ints[x] + all_ints[y] + bytes([ISHR])) 104 | add(all_ints, s32(u32(x) >> (y % 32)), all_ints[x] + all_ints[y] + bytes([IUSHR])) 105 | 106 | # long constants 107 | all_longs = {} 108 | for i in range(0, 2): 109 | add(all_longs, i, bytes([LCONST_0 + i])) 110 | 111 | for i in int_1s + int_2s: 112 | add(all_longs, i, all_ints[i] + bytes([I2L])) 113 | 114 | # float constants 115 | all_floats = {} 116 | for i in range(0, 2): 117 | add(all_floats, i2f(i), bytes([FCONST_0 + i])) 118 | 119 | for i in int_1s + int_2s: 120 | add(all_floats, i2f(i), all_ints[i] + bytes([I2F])) 121 | 122 | # hardcode unusual float values for simplicity 123 | add(all_floats, FLOAT_SIGN, bytes([FCONST_0, FNEG])) # -0.0 124 | add(all_floats, FLOAT_NAN, bytes([FCONST_0, FCONST_0, FDIV])) # NaN 125 | add(all_floats, FLOAT_INF, bytes([FCONST_1, FCONST_0, FDIV])) # Inf 126 | add(all_floats, FLOAT_NINF, bytes([FCONST_1, FNEG, FCONST_0, FDIV])) # -Inf 127 | 128 | # double constants 129 | all_doubles = {} 130 | for i in range(0, 2): 131 | add(all_doubles, i2d(i), bytes([DCONST_0 + i])) 132 | 133 | for i in int_1s + int_2s: 134 | add(all_doubles, i2d(i), all_ints[i] + bytes([I2D])) 135 | 136 | add(all_doubles, DOUBLE_SIGN, bytes([DCONST_0, DNEG])) # -0.0 137 | add(all_doubles, DOUBLE_NAN, bytes([DCONST_0, DCONST_0, DDIV])) # NaN 138 | add(all_doubles, DOUBLE_INF, bytes([DCONST_1, DCONST_0, DDIV])) # Inf 139 | add(all_doubles, DOUBLE_NINF, bytes([DCONST_1, DNEG, DCONST_0, DDIV])) # -Inf 140 | 141 | print(''' 142 | # Copyright 2015 Google Inc. All Rights Reserved. 143 | # 144 | # Licensed under the Apache License, Version 2.0 (the "License"); 145 | # you may not use this file except in compliance with the License. 146 | # You may obtain a copy of the License at 147 | # 148 | # http://www.apache.org/licenses/LICENSE-2.0 149 | # 150 | # Unless required by applicable law or agreed to in writing, software 151 | # distributed under the License is distributed on an "AS IS" BASIS, 152 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 153 | # See the License for the specific language governing permissions and 154 | # limitations under the License. 155 | 156 | # Autogenerated by genlookup.py - do not edit''') 157 | 158 | for name, d in zip('INTS LONGS FLOATS DOUBLES'.split(), [all_ints, all_longs, all_floats, all_doubles]): 159 | print(name + ' = {') 160 | for k, v in sorted(d.items()): 161 | print(' {}: {},'.format(hex(k), v)) 162 | print('}') -------------------------------------------------------------------------------- /enjarify/jvm/error.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # e.g. too many registers in a method, too many constant pool entries, code too long 16 | class ClassfileLimitExceeded(Exception): pass 17 | -------------------------------------------------------------------------------- /enjarify/jvm/genmathops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Generate mathops.py, the lookup tables giving information about dalvik math operations by opcode 16 | if __name__ == "__main__": 17 | unary = 'ineg inot lneg lnot fneg dneg i2l i2f i2d l2i l2f l2d f2i f2l f2d d2i d2l d2f i2b i2c i2s' 18 | binary = 'iadd isub imul idiv irem iand ior ixor ishl ishr iushr ladd lsub lmul ldiv lrem land lor lxor lshl lshr lushr fadd fsub fmul fdiv frem dadd dsub dmul ddiv drem' 19 | binary = binary + ' ' + binary 20 | binlit = 'iadd isub imul idiv irem iand ior ixor ' 21 | binlit = binlit + binlit + 'ishl ishr iushr' 22 | stypes = dict(zip('ifldbcs', 'INT FLOAT LONG DOUBLE INT INT INT'.split())) 23 | 24 | print(''' 25 | # Copyright 2015 Google Inc. All Rights Reserved. 26 | # 27 | # Licensed under the Apache License, Version 2.0 (the "License"); 28 | # you may not use this file except in compliance with the License. 29 | # You may obtain a copy of the License at 30 | # 31 | # http://www.apache.org/licenses/LICENSE-2.0 32 | # 33 | # Unless required by applicable law or agreed to in writing, software 34 | # distributed under the License is distributed on an "AS IS" BASIS, 35 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 | # See the License for the specific language governing permissions and 37 | # limitations under the License. 38 | 39 | # Autogenerated by genmathops.py - do not edit''') 40 | print('from . import jvmops') 41 | print('from . import scalartypes as scalars') 42 | 43 | print('UNARY = {') 44 | for i, code in enumerate(unary.split()): 45 | code = code.replace('not','xor') 46 | if '2' in code: 47 | srct = stypes[code[0]] 48 | destt = stypes[code[2]] 49 | else: 50 | srct = destt = stypes[code[0]] 51 | print(' 0x{:02X}: (jvmops.{}, scalars.{}, scalars.{}),'.format(i + 0x7b, code.upper(), srct, destt)) 52 | print('}') 53 | 54 | print('BINARY = {') 55 | for i, code in enumerate(binary.split()): 56 | st = stypes[code[0]] 57 | # shift instructions have second arg an int even when operating on longs 58 | st2 = 'INT' if 'sh' in code else st 59 | print(' 0x{:02X}: (jvmops.{}, scalars.{}, scalars.{}),'.format(i + 0x90, code.upper(), st, st2)) 60 | print('}') 61 | 62 | print('BINARY_LIT = {') 63 | for i, code in enumerate(binlit.split()): 64 | print(' 0x{:02X}: jvmops.{},'.format(i + 0xd0, code.upper())) 65 | print('}') 66 | -------------------------------------------------------------------------------- /enjarify/jvm/ir.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import struct 16 | 17 | from .constants import calc 18 | from .jvmops import * 19 | from . import constantpool, error 20 | from . import scalartypes as scalars 21 | 22 | # IR representation roughly corresponding to JVM bytecode instructions. Note that these 23 | # may correspond to more than one instruction in the actual bytecode generated but they 24 | # are useful logical units for the internal optimization passes. 25 | 26 | class JvmInstruction: 27 | def __init__(self, bytecode=None): 28 | self.bytecode = bytecode # None or bytestring 29 | 30 | def fallsthrough(self): return True 31 | def targets(self): return [] 32 | 33 | # Used to mark locations in the IR instructions for various purposes. These are 34 | # seperate IR 'instructions' since the optimization passes may remove or replace 35 | # the other instructions. 36 | class Label(JvmInstruction): 37 | def __init__(self, id=None): 38 | super().__init__(b'') 39 | self.id = id # None or int 40 | 41 | _ilfdaOrd = [scalars.INT, scalars.LONG, scalars.FLOAT, scalars.DOUBLE, scalars.OBJ].index 42 | class RegAccess(JvmInstruction): 43 | def __init__(self, dreg, st, store): 44 | super().__init__() 45 | self.key = dreg, st 46 | self.store = store 47 | self.wide = scalars.iswide(st) 48 | 49 | @staticmethod 50 | def raw(local, stype, store): 51 | new = RegAccess(0, stype, store) 52 | new.calcBytecode(local) 53 | return new 54 | 55 | def calcBytecode(self, local): 56 | assert self.bytecode is None 57 | stype = self.key[1] 58 | op_off = (ISTORE - ILOAD) if self.store else 0 59 | if local < 4: 60 | self.bytecode = struct.pack('>B', ILOAD_0 + op_off + local + _ilfdaOrd(stype)*4) 61 | elif local < 256: 62 | self.bytecode = struct.pack('>BB', ILOAD + op_off + _ilfdaOrd(stype), local) 63 | else: 64 | self.bytecode = struct.pack('>BBH', WIDE, ILOAD + op_off + _ilfdaOrd(stype), local) 65 | 66 | class PrimConstant(JvmInstruction): 67 | def __init__(self, st, val, pool=None): 68 | super().__init__() 69 | self.st = st 70 | self.val = val = calc.normalize(st, val) 71 | self.wide = scalars.iswide(st) 72 | 73 | # If pool is passed in, just grab an entry greedily, otherwise calculate 74 | # a sequence of bytecode to generate the constant 75 | if pool is not None: 76 | self.bytecode = calc.lookupOnly(st, val) 77 | if self.bytecode is None: 78 | self._from_pool(pool) 79 | if self.bytecode is None: 80 | raise error.ClassfileLimitExceeded() 81 | else: 82 | self.bytecode = calc.calc(st, val) 83 | 84 | def cpool_key(self): 85 | tag = { 86 | scalars.INT: constantpool.CONSTANT_Integer, 87 | scalars.FLOAT: constantpool.CONSTANT_Float, 88 | scalars.DOUBLE: constantpool.CONSTANT_Double, 89 | scalars.LONG: constantpool.CONSTANT_Long, 90 | }[self.st] 91 | return tag, self.val 92 | 93 | def _from_pool(self, pool): 94 | index = pool.tryGet(self.cpool_key()) 95 | if index is not None: 96 | if scalars.iswide(self.st): 97 | code = struct.pack('>BH', LDC2_W, index) 98 | elif index >= 256: 99 | code = struct.pack('>BH', LDC_W, index) 100 | else: 101 | code = struct.pack('>BB', LDC, index) 102 | self.bytecode = code 103 | 104 | def fix_with_pool(self, pool): 105 | if len(self.bytecode) > 2: 106 | self._from_pool(pool) 107 | 108 | class OtherConstant(JvmInstruction): 109 | wide = False # will be null, string or class - always single 110 | 111 | class LazyJumpBase(JvmInstruction): 112 | def __init__(self, target): 113 | super().__init__() 114 | self.target = target 115 | 116 | def targets(self): return [self.target] 117 | 118 | def widenIfNecessary(self, labels, posd): 119 | offset = posd[labels[self.target]] - posd[self] 120 | if not -32768 <= offset < 32768: 121 | self.min = self.max 122 | return True 123 | return False 124 | 125 | class Goto(LazyJumpBase): 126 | def __init__(self, target): 127 | super().__init__(target) 128 | self.min = 3 129 | self.max = 5 # upper limit on length of bytecode 130 | 131 | def fallsthrough(self): return False 132 | 133 | def calcBytecode(self, posd, labels): 134 | offset = posd[labels[self.target]] - posd[self] 135 | if self.max == 3: 136 | self.bytecode = struct.pack('>Bh', GOTO, offset) 137 | else: 138 | self.bytecode = struct.pack('>Bi', GOTO_W, offset) 139 | 140 | _ifOpposite = {} 141 | for _op1, _op2 in [(IFEQ, IFNE), (IFLT, IFGE), (IFGT, IFLE), (IF_ICMPEQ, IF_ICMPNE), (IF_ICMPLT, IF_ICMPGE), (IF_ICMPGT, IF_ICMPLE), (IFNULL, IFNONNULL), (IF_ACMPEQ, IF_ACMPNE)]: 142 | _ifOpposite[_op1] = _op2 143 | _ifOpposite[_op2] = _op1 144 | class If(LazyJumpBase): 145 | def __init__(self, op, target): 146 | super().__init__(target) 147 | self.op = op 148 | self.min = 3 149 | self.max = 8 # upper limit on length of bytecode 150 | 151 | # Unlike with goto, if instructions are limited to a 16 bit jump offset. 152 | # Therefore, for larger jumps, we have to substitute a different sequence 153 | # 154 | # if x goto A 155 | # B: whatever 156 | # 157 | # becomes 158 | # 159 | # if !x goto B 160 | # goto A 161 | # B: whatever 162 | def calcBytecode(self, posd, labels): 163 | if self.max == 3: 164 | offset = posd[labels[self.target]] - posd[self] 165 | self.bytecode = struct.pack('>Bh', self.op, offset) 166 | else: 167 | op = _ifOpposite[self.op] 168 | offset = posd[labels[self.target]] - posd[self] - 3 169 | self.bytecode = struct.pack('>BhBi', op, 8, GOTO_W, offset) 170 | 171 | class Switch(JvmInstruction): 172 | def __init__(self, default, jumps): 173 | super().__init__() 174 | self.default = default 175 | self.jumps = jumps 176 | 177 | assert jumps 178 | self.low = min(jumps) 179 | self.high = max(jumps) 180 | 181 | table_count = self.high - self.low + 1 182 | table_size = 4*(table_count+1) 183 | jump_size = 8*len(jumps) 184 | 185 | self.istable = jump_size > table_size 186 | self.nopad_size = 9 + (table_size if self.istable else jump_size) 187 | self.max = self.nopad_size + 3 188 | 189 | def fallsthrough(self): return False 190 | def targets(self): return sorted(set(self.jumps.values())) + [self.default] 191 | 192 | def calcBytecode(self, posd, labels): 193 | pos = posd[self] 194 | offset = posd[labels[self.default]] - pos 195 | pad = (-pos-1) % 4 196 | 197 | bytecode = bytearray() 198 | if self.istable: 199 | bytecode += bytes([TABLESWITCH] + [0]*pad) 200 | bytecode += struct.pack('>iii', offset, self.low, self.high) 201 | for k in range(self.low, self.high + 1): 202 | target = self.jumps.get(k, self.default) 203 | bytecode += struct.pack('>i', posd[labels[target]] - pos) 204 | else: 205 | bytecode += bytes([LOOKUPSWITCH] + [0]*pad) 206 | bytecode += struct.pack('>iI', offset, len(self.jumps)) 207 | for k, target in sorted(self.jumps.items()): 208 | offset = posd[labels[target]] - pos 209 | bytecode += struct.pack('>ii', k, offset) 210 | self.bytecode = bytes(bytecode) 211 | 212 | _return_or_throw_bytecodes = {bytes([op]) for op in range(IRETURN, RETURN+1) } 213 | _return_or_throw_bytecodes.add(bytes([ATHROW])) 214 | class Other(JvmInstruction): 215 | def fallsthrough(self): return self.bytecode not in _return_or_throw_bytecodes 216 | 217 | def Pop(): return Other(bytes([POP])) 218 | def Pop2(): return Other(bytes([POP2])) 219 | def Dup(): return Other(bytes([DUP])) 220 | def Dup2(): return Other(bytes([DUP2])) 221 | -------------------------------------------------------------------------------- /enjarify/jvm/jvmops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | NOP = 0x00 16 | ACONST_NULL = 0x01 17 | ICONST_M1 = 0x02 18 | ICONST_0 = 0x03 19 | ICONST_1 = 0x04 20 | ICONST_2 = 0x05 21 | ICONST_3 = 0x06 22 | ICONST_4 = 0x07 23 | ICONST_5 = 0x08 24 | LCONST_0 = 0x09 25 | LCONST_1 = 0x0A 26 | FCONST_0 = 0x0B 27 | FCONST_1 = 0x0C 28 | FCONST_2 = 0x0D 29 | DCONST_0 = 0x0E 30 | DCONST_1 = 0x0F 31 | BIPUSH = 0x10 32 | SIPUSH = 0x11 33 | LDC = 0x12 34 | LDC_W = 0x13 35 | LDC2_W = 0x14 36 | ILOAD = 0x15 37 | LLOAD = 0x16 38 | FLOAD = 0x17 39 | DLOAD = 0x18 40 | ALOAD = 0x19 41 | ILOAD_0 = 0x1A 42 | ILOAD_1 = 0x1B 43 | ILOAD_2 = 0x1C 44 | ILOAD_3 = 0x1D 45 | LLOAD_0 = 0x1E 46 | LLOAD_1 = 0x1F 47 | LLOAD_2 = 0x20 48 | LLOAD_3 = 0x21 49 | FLOAD_0 = 0x22 50 | FLOAD_1 = 0x23 51 | FLOAD_2 = 0x24 52 | FLOAD_3 = 0x25 53 | DLOAD_0 = 0x26 54 | DLOAD_1 = 0x27 55 | DLOAD_2 = 0x28 56 | DLOAD_3 = 0x29 57 | ALOAD_0 = 0x2A 58 | ALOAD_1 = 0x2B 59 | ALOAD_2 = 0x2C 60 | ALOAD_3 = 0x2D 61 | IALOAD = 0x2E 62 | LALOAD = 0x2F 63 | FALOAD = 0x30 64 | DALOAD = 0x31 65 | AALOAD = 0x32 66 | BALOAD = 0x33 67 | CALOAD = 0x34 68 | SALOAD = 0x35 69 | ISTORE = 0x36 70 | LSTORE = 0x37 71 | FSTORE = 0x38 72 | DSTORE = 0x39 73 | ASTORE = 0x3A 74 | ISTORE_0 = 0x3B 75 | ISTORE_1 = 0x3C 76 | ISTORE_2 = 0x3D 77 | ISTORE_3 = 0x3E 78 | LSTORE_0 = 0x3F 79 | LSTORE_1 = 0x40 80 | LSTORE_2 = 0x41 81 | LSTORE_3 = 0x42 82 | FSTORE_0 = 0x43 83 | FSTORE_1 = 0x44 84 | FSTORE_2 = 0x45 85 | FSTORE_3 = 0x46 86 | DSTORE_0 = 0x47 87 | DSTORE_1 = 0x48 88 | DSTORE_2 = 0x49 89 | DSTORE_3 = 0x4A 90 | ASTORE_0 = 0x4B 91 | ASTORE_1 = 0x4C 92 | ASTORE_2 = 0x4D 93 | ASTORE_3 = 0x4E 94 | IASTORE = 0x4F 95 | LASTORE = 0x50 96 | FASTORE = 0x51 97 | DASTORE = 0x52 98 | AASTORE = 0x53 99 | BASTORE = 0x54 100 | CASTORE = 0x55 101 | SASTORE = 0x56 102 | POP = 0x57 103 | POP2 = 0x58 104 | DUP = 0x59 105 | DUP_X1 = 0x5A 106 | DUP_X2 = 0x5B 107 | DUP2 = 0x5C 108 | DUP2_X1 = 0x5D 109 | DUP2_X2 = 0x5E 110 | SWAP = 0x5F 111 | IADD = 0x60 112 | LADD = 0x61 113 | FADD = 0x62 114 | DADD = 0x63 115 | ISUB = 0x64 116 | LSUB = 0x65 117 | FSUB = 0x66 118 | DSUB = 0x67 119 | IMUL = 0x68 120 | LMUL = 0x69 121 | FMUL = 0x6A 122 | DMUL = 0x6B 123 | IDIV = 0x6C 124 | LDIV = 0x6D 125 | FDIV = 0x6E 126 | DDIV = 0x6F 127 | IREM = 0x70 128 | LREM = 0x71 129 | FREM = 0x72 130 | DREM = 0x73 131 | INEG = 0x74 132 | LNEG = 0x75 133 | FNEG = 0x76 134 | DNEG = 0x77 135 | ISHL = 0x78 136 | LSHL = 0x79 137 | ISHR = 0x7A 138 | LSHR = 0x7B 139 | IUSHR = 0x7C 140 | LUSHR = 0x7D 141 | IAND = 0x7E 142 | LAND = 0x7F 143 | IOR = 0x80 144 | LOR = 0x81 145 | IXOR = 0x82 146 | LXOR = 0x83 147 | IINC = 0x84 148 | I2L = 0x85 149 | I2F = 0x86 150 | I2D = 0x87 151 | L2I = 0x88 152 | L2F = 0x89 153 | L2D = 0x8A 154 | F2I = 0x8B 155 | F2L = 0x8C 156 | F2D = 0x8D 157 | D2I = 0x8E 158 | D2L = 0x8F 159 | D2F = 0x90 160 | I2B = 0x91 161 | I2C = 0x92 162 | I2S = 0x93 163 | LCMP = 0x94 164 | FCMPL = 0x95 165 | FCMPG = 0x96 166 | DCMPL = 0x97 167 | DCMPG = 0x98 168 | IFEQ = 0x99 169 | IFNE = 0x9A 170 | IFLT = 0x9B 171 | IFGE = 0x9C 172 | IFGT = 0x9D 173 | IFLE = 0x9E 174 | IF_ICMPEQ = 0x9F 175 | IF_ICMPNE = 0xA0 176 | IF_ICMPLT = 0xA1 177 | IF_ICMPGE = 0xA2 178 | IF_ICMPGT = 0xA3 179 | IF_ICMPLE = 0xA4 180 | IF_ACMPEQ = 0xA5 181 | IF_ACMPNE = 0xA6 182 | GOTO = 0xA7 183 | JSR = 0xA8 184 | RET = 0xA9 185 | TABLESWITCH = 0xAA 186 | LOOKUPSWITCH = 0xAB 187 | IRETURN = 0xAC 188 | LRETURN = 0xAD 189 | FRETURN = 0xAE 190 | DRETURN = 0xAF 191 | ARETURN = 0xB0 192 | RETURN = 0xB1 193 | GETSTATIC = 0xB2 194 | PUTSTATIC = 0xB3 195 | GETFIELD = 0xB4 196 | PUTFIELD = 0xB5 197 | INVOKEVIRTUAL = 0xB6 198 | INVOKESPECIAL = 0xB7 199 | INVOKESTATIC = 0xB8 200 | INVOKEINTERFACE = 0xB9 201 | INVOKEDYNAMIC = 0xBA 202 | NEW = 0xBB 203 | NEWARRAY = 0xBC 204 | ANEWARRAY = 0xBD 205 | ARRAYLENGTH = 0xBE 206 | ATHROW = 0xBF 207 | CHECKCAST = 0xC0 208 | INSTANCEOF = 0xC1 209 | MONITORENTER = 0xC2 210 | MONITOREXIT = 0xC3 211 | WIDE = 0xC4 212 | MULTIANEWARRAY = 0xC5 213 | IFNULL = 0xC6 214 | IFNONNULL = 0xC7 215 | GOTO_W = 0xC8 216 | JSR_W = 0xC9 217 | -------------------------------------------------------------------------------- /enjarify/jvm/mathops.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright 2015 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Autogenerated by genmathops.py - do not edit 17 | from . import jvmops 18 | from . import scalartypes as scalars 19 | UNARY = { 20 | 0x7B: (jvmops.INEG, scalars.INT, scalars.INT), 21 | 0x7C: (jvmops.IXOR, scalars.INT, scalars.INT), 22 | 0x7D: (jvmops.LNEG, scalars.LONG, scalars.LONG), 23 | 0x7E: (jvmops.LXOR, scalars.LONG, scalars.LONG), 24 | 0x7F: (jvmops.FNEG, scalars.FLOAT, scalars.FLOAT), 25 | 0x80: (jvmops.DNEG, scalars.DOUBLE, scalars.DOUBLE), 26 | 0x81: (jvmops.I2L, scalars.INT, scalars.LONG), 27 | 0x82: (jvmops.I2F, scalars.INT, scalars.FLOAT), 28 | 0x83: (jvmops.I2D, scalars.INT, scalars.DOUBLE), 29 | 0x84: (jvmops.L2I, scalars.LONG, scalars.INT), 30 | 0x85: (jvmops.L2F, scalars.LONG, scalars.FLOAT), 31 | 0x86: (jvmops.L2D, scalars.LONG, scalars.DOUBLE), 32 | 0x87: (jvmops.F2I, scalars.FLOAT, scalars.INT), 33 | 0x88: (jvmops.F2L, scalars.FLOAT, scalars.LONG), 34 | 0x89: (jvmops.F2D, scalars.FLOAT, scalars.DOUBLE), 35 | 0x8A: (jvmops.D2I, scalars.DOUBLE, scalars.INT), 36 | 0x8B: (jvmops.D2L, scalars.DOUBLE, scalars.LONG), 37 | 0x8C: (jvmops.D2F, scalars.DOUBLE, scalars.FLOAT), 38 | 0x8D: (jvmops.I2B, scalars.INT, scalars.INT), 39 | 0x8E: (jvmops.I2C, scalars.INT, scalars.INT), 40 | 0x8F: (jvmops.I2S, scalars.INT, scalars.INT), 41 | } 42 | BINARY = { 43 | 0x90: (jvmops.IADD, scalars.INT, scalars.INT), 44 | 0x91: (jvmops.ISUB, scalars.INT, scalars.INT), 45 | 0x92: (jvmops.IMUL, scalars.INT, scalars.INT), 46 | 0x93: (jvmops.IDIV, scalars.INT, scalars.INT), 47 | 0x94: (jvmops.IREM, scalars.INT, scalars.INT), 48 | 0x95: (jvmops.IAND, scalars.INT, scalars.INT), 49 | 0x96: (jvmops.IOR, scalars.INT, scalars.INT), 50 | 0x97: (jvmops.IXOR, scalars.INT, scalars.INT), 51 | 0x98: (jvmops.ISHL, scalars.INT, scalars.INT), 52 | 0x99: (jvmops.ISHR, scalars.INT, scalars.INT), 53 | 0x9A: (jvmops.IUSHR, scalars.INT, scalars.INT), 54 | 0x9B: (jvmops.LADD, scalars.LONG, scalars.LONG), 55 | 0x9C: (jvmops.LSUB, scalars.LONG, scalars.LONG), 56 | 0x9D: (jvmops.LMUL, scalars.LONG, scalars.LONG), 57 | 0x9E: (jvmops.LDIV, scalars.LONG, scalars.LONG), 58 | 0x9F: (jvmops.LREM, scalars.LONG, scalars.LONG), 59 | 0xA0: (jvmops.LAND, scalars.LONG, scalars.LONG), 60 | 0xA1: (jvmops.LOR, scalars.LONG, scalars.LONG), 61 | 0xA2: (jvmops.LXOR, scalars.LONG, scalars.LONG), 62 | 0xA3: (jvmops.LSHL, scalars.LONG, scalars.INT), 63 | 0xA4: (jvmops.LSHR, scalars.LONG, scalars.INT), 64 | 0xA5: (jvmops.LUSHR, scalars.LONG, scalars.INT), 65 | 0xA6: (jvmops.FADD, scalars.FLOAT, scalars.FLOAT), 66 | 0xA7: (jvmops.FSUB, scalars.FLOAT, scalars.FLOAT), 67 | 0xA8: (jvmops.FMUL, scalars.FLOAT, scalars.FLOAT), 68 | 0xA9: (jvmops.FDIV, scalars.FLOAT, scalars.FLOAT), 69 | 0xAA: (jvmops.FREM, scalars.FLOAT, scalars.FLOAT), 70 | 0xAB: (jvmops.DADD, scalars.DOUBLE, scalars.DOUBLE), 71 | 0xAC: (jvmops.DSUB, scalars.DOUBLE, scalars.DOUBLE), 72 | 0xAD: (jvmops.DMUL, scalars.DOUBLE, scalars.DOUBLE), 73 | 0xAE: (jvmops.DDIV, scalars.DOUBLE, scalars.DOUBLE), 74 | 0xAF: (jvmops.DREM, scalars.DOUBLE, scalars.DOUBLE), 75 | 0xB0: (jvmops.IADD, scalars.INT, scalars.INT), 76 | 0xB1: (jvmops.ISUB, scalars.INT, scalars.INT), 77 | 0xB2: (jvmops.IMUL, scalars.INT, scalars.INT), 78 | 0xB3: (jvmops.IDIV, scalars.INT, scalars.INT), 79 | 0xB4: (jvmops.IREM, scalars.INT, scalars.INT), 80 | 0xB5: (jvmops.IAND, scalars.INT, scalars.INT), 81 | 0xB6: (jvmops.IOR, scalars.INT, scalars.INT), 82 | 0xB7: (jvmops.IXOR, scalars.INT, scalars.INT), 83 | 0xB8: (jvmops.ISHL, scalars.INT, scalars.INT), 84 | 0xB9: (jvmops.ISHR, scalars.INT, scalars.INT), 85 | 0xBA: (jvmops.IUSHR, scalars.INT, scalars.INT), 86 | 0xBB: (jvmops.LADD, scalars.LONG, scalars.LONG), 87 | 0xBC: (jvmops.LSUB, scalars.LONG, scalars.LONG), 88 | 0xBD: (jvmops.LMUL, scalars.LONG, scalars.LONG), 89 | 0xBE: (jvmops.LDIV, scalars.LONG, scalars.LONG), 90 | 0xBF: (jvmops.LREM, scalars.LONG, scalars.LONG), 91 | 0xC0: (jvmops.LAND, scalars.LONG, scalars.LONG), 92 | 0xC1: (jvmops.LOR, scalars.LONG, scalars.LONG), 93 | 0xC2: (jvmops.LXOR, scalars.LONG, scalars.LONG), 94 | 0xC3: (jvmops.LSHL, scalars.LONG, scalars.INT), 95 | 0xC4: (jvmops.LSHR, scalars.LONG, scalars.INT), 96 | 0xC5: (jvmops.LUSHR, scalars.LONG, scalars.INT), 97 | 0xC6: (jvmops.FADD, scalars.FLOAT, scalars.FLOAT), 98 | 0xC7: (jvmops.FSUB, scalars.FLOAT, scalars.FLOAT), 99 | 0xC8: (jvmops.FMUL, scalars.FLOAT, scalars.FLOAT), 100 | 0xC9: (jvmops.FDIV, scalars.FLOAT, scalars.FLOAT), 101 | 0xCA: (jvmops.FREM, scalars.FLOAT, scalars.FLOAT), 102 | 0xCB: (jvmops.DADD, scalars.DOUBLE, scalars.DOUBLE), 103 | 0xCC: (jvmops.DSUB, scalars.DOUBLE, scalars.DOUBLE), 104 | 0xCD: (jvmops.DMUL, scalars.DOUBLE, scalars.DOUBLE), 105 | 0xCE: (jvmops.DDIV, scalars.DOUBLE, scalars.DOUBLE), 106 | 0xCF: (jvmops.DREM, scalars.DOUBLE, scalars.DOUBLE), 107 | } 108 | BINARY_LIT = { 109 | 0xD0: jvmops.IADD, 110 | 0xD1: jvmops.ISUB, 111 | 0xD2: jvmops.IMUL, 112 | 0xD3: jvmops.IDIV, 113 | 0xD4: jvmops.IREM, 114 | 0xD5: jvmops.IAND, 115 | 0xD6: jvmops.IOR, 116 | 0xD7: jvmops.IXOR, 117 | 0xD8: jvmops.IADD, 118 | 0xD9: jvmops.ISUB, 119 | 0xDA: jvmops.IMUL, 120 | 0xDB: jvmops.IDIV, 121 | 0xDC: jvmops.IREM, 122 | 0xDD: jvmops.IAND, 123 | 0xDE: jvmops.IOR, 124 | 0xDF: jvmops.IXOR, 125 | 0xE0: jvmops.ISHL, 126 | 0xE1: jvmops.ISHR, 127 | 0xE2: jvmops.IUSHR, 128 | } 129 | -------------------------------------------------------------------------------- /enjarify/jvm/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /enjarify/jvm/optimization/consts.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections 16 | 17 | from .. import scalartypes as scalars 18 | from .. import ir 19 | 20 | def allocateRequiredConstants(pool, long_irs): 21 | # see comments in writebytecode.finishCodeAttrs 22 | # We allocate the constants pretty much greedily. This is far from optimal, 23 | # but it shouldn't be a big deal since this code is almost never required 24 | # in the first place. In fact, there are no known real world classes that 25 | # even come close to exhausting the constant pool. 26 | narrow_pairs = collections.Counter() 27 | wide_pairs = collections.Counter() 28 | alt_lens = {} 29 | for _ir in long_irs: 30 | for ins in _ir.flat_instructions: 31 | if isinstance(ins, ir.PrimConstant): 32 | key = ins.cpool_key() 33 | alt_lens[key] = len(ins.bytecode) 34 | if scalars.iswide(ins.st): 35 | if len(ins.bytecode) > 3: 36 | wide_pairs[key] += 1 37 | else: 38 | if len(ins.bytecode) > 2: 39 | narrow_pairs[key] += 1 40 | 41 | # see if already in the constant pool 42 | for x in pool.vals: 43 | del narrow_pairs[x] 44 | del wide_pairs[x] 45 | 46 | # if we have enough space for all required constants, preferentially allocate 47 | # most commonly used constants to first 255 slots 48 | if pool.space() >= len(narrow_pairs) + 2*len(wide_pairs) and pool.lowspace() > 0: 49 | # We can't use Counter.most_common here because it is nondeterminstic in 50 | # the case of ties. 51 | most_common = sorted(narrow_pairs, key=lambda p:(-narrow_pairs[p], p)) 52 | for key in most_common[:pool.lowspace()]: 53 | pool.insertDirectly(key, True) 54 | del narrow_pairs[key] 55 | 56 | scores = {} 57 | for p, count in narrow_pairs.items(): 58 | scores[p] = (alt_lens[p] - 3) * count 59 | for p, count in wide_pairs.items(): 60 | scores[p] = (alt_lens[p] - 3) * count 61 | 62 | # sort by score 63 | narrowq = sorted(narrow_pairs, key=lambda p:(scores[p], p)) 64 | wideq = sorted(wide_pairs, key=lambda p:(scores[p], p)) 65 | while pool.space() >= 1 and (narrowq or wideq): 66 | if not narrowq and pool.space() < 2: 67 | break 68 | 69 | wscore = sum(scores[p] for p in wideq[-1:]) 70 | nscore = sum(scores[p] for p in narrowq[-2:]) 71 | if pool.space() >= 2 and wscore > nscore and wscore > 0: 72 | pool.insertDirectly(wideq.pop(), False) 73 | elif nscore > 0: 74 | pool.insertDirectly(narrowq.pop(), True) 75 | else: 76 | break 77 | -------------------------------------------------------------------------------- /enjarify/jvm/optimization/jumps.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import struct 16 | 17 | from .. import ir, error 18 | from ..jvmops import * 19 | from . import options 20 | 21 | def _calcMinimumPositions(instrs): 22 | posd = {} 23 | pos = 0 24 | for ins in instrs: 25 | posd[ins] = pos 26 | if isinstance(ins, ir.LazyJumpBase): 27 | pos += ins.min 28 | elif isinstance(ins, ir.Switch): 29 | pad = (-pos-1) % 4 30 | pos += pad + ins.nopad_size 31 | else: 32 | pos += len(ins.bytecode) 33 | return posd, pos 34 | 35 | def optimizeJumps(irdata): 36 | # For jump offsets of more than +-32767, a longer form of the jump instruction 37 | # is required. This function finds the optimal jump widths by optimistically 38 | # starting with everything narrow and then iteratively marking instructions 39 | # as wide if their offset is too large (in rare cases, this can in turn cause 40 | # other jumps to become wide, hence iterating until convergence) 41 | instrs = irdata.flat_instructions 42 | jump_instrs = [ins for ins in instrs if isinstance(ins, ir.LazyJumpBase)] 43 | 44 | while 1: 45 | done = True 46 | posd, _ = _calcMinimumPositions(instrs) 47 | 48 | for ins in jump_instrs: 49 | if ins.min < ins.max and ins.widenIfNecessary(irdata.labels, posd): 50 | done = False 51 | if done: 52 | break 53 | 54 | for ins in jump_instrs: 55 | assert ins.min <= ins.max 56 | ins.max = ins.min 57 | 58 | def createBytecode(irdata, opts): 59 | instrs = irdata.flat_instructions 60 | posd, end_pos = _calcMinimumPositions(instrs) 61 | 62 | bytecode = bytearray() 63 | for ins in instrs: 64 | if isinstance(ins, (ir.LazyJumpBase, ir.Switch)): 65 | ins.calcBytecode(posd, irdata.labels) 66 | bytecode += ins.bytecode 67 | assert len(bytecode) == end_pos 68 | 69 | 70 | if len(bytecode) > 65535: 71 | # If code is too long and optimization is off, raise exception so we can 72 | # retry with optimization. If it is still too long with optimization, 73 | # don't raise an error, since a class with illegally long code is better 74 | # than no output at all. 75 | if opts is not options.ALL: 76 | raise error.ClassfileLimitExceeded() 77 | 78 | 79 | prev_instr_map = dict(zip(instrs[1:], instrs)) 80 | packed_excepts = [] 81 | for s, e, h, c in irdata.excepts: 82 | # There appears to be a bug in the JVM where in rare cases, it throws 83 | # the exception at the address of the instruction _before_ the instruction 84 | # that actually caused the exception, triggering the wrong handler 85 | # therefore we include the previous (IR) instruction too 86 | # Note that this cannot cause an overlap because in that case the previous 87 | # instruction would just be a label and hence not change anything 88 | s = prev_instr_map.get(s, s) 89 | 90 | s_off = posd[s] 91 | e_off = posd[e] 92 | h_off = posd[h] 93 | assert s_off <= e_off 94 | if s_off < e_off: 95 | packed_excepts.append(struct.pack('>HHHH', s_off, e_off, h_off, c)) 96 | else: 97 | print('Skipping zero width exception!') 98 | assert 0 99 | 100 | return bytes(bytecode), packed_excepts 101 | -------------------------------------------------------------------------------- /enjarify/jvm/optimization/options.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | class Options: 16 | def __init__(self, inline_consts=False, prune_store_loads=False, 17 | copy_propagation=False, remove_unused_regs=False, dup2ize=False, 18 | sort_registers=False, split_pool=False, delay_consts=False): 19 | self.inline_consts = inline_consts 20 | self.prune_store_loads = prune_store_loads 21 | self.copy_propagation = copy_propagation 22 | self.remove_unused_regs = remove_unused_regs 23 | self.dup2ize = dup2ize 24 | self.sort_registers = sort_registers 25 | self.split_pool = split_pool 26 | self.delay_consts = delay_consts 27 | 28 | NONE = Options() 29 | # Options which make the generated code more readable for humans 30 | PRETTY = Options(inline_consts=True, prune_store_loads=True, copy_propagation=True, remove_unused_regs=True) 31 | ALL = Options(inline_consts=True, prune_store_loads=True, copy_propagation=True, remove_unused_regs=True, dup2ize=True, 32 | sort_registers=True, split_pool=True, delay_consts=True) 33 | -------------------------------------------------------------------------------- /enjarify/jvm/optimization/registers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections 16 | 17 | from .. import ir 18 | from .. import scalartypes as scalars 19 | from ..jvmops import * 20 | 21 | # Copy propagation - when one register is moved to another, keep track and replace 22 | # all loads with loads from the original register (as long as it hasn't since been 23 | # overwritten). Note that stores won't be removed, since they may still be needed 24 | # in some cases, but if they are unused, they'll be removed in a subsequent pass 25 | # As usual, assume no iincs 26 | 27 | # A set of registers that currently are copies of each other. 28 | class _CopySet: 29 | def __init__(self, key): 30 | self.root = key 31 | self.set = {key} 32 | self.q = [] # keep track of insertion order in case root is overwritten 33 | 34 | def add(self, key): 35 | assert self.set 36 | self.set.add(key) 37 | self.q.append(key) 38 | 39 | def remove(self, key): 40 | self.set.remove(key) 41 | # Heuristic - use oldest element still in set as new root 42 | while self.q and self.root not in self.set: 43 | self.root = self.q.pop(0) 44 | 45 | def copy(self): 46 | new = _CopySet(self.root) 47 | new.set = self.set.copy() 48 | new.q = self.q[:] 49 | return new 50 | 51 | # Map registers to CopySets 52 | class _CopySetsMap: 53 | def __init__(self): 54 | self.lookup = {} 55 | 56 | def _get(self, key): return self.lookup.setdefault(key, _CopySet(key)) 57 | 58 | def clobber(self, key): 59 | self._get(key).remove(key) 60 | del self.lookup[key] 61 | 62 | def move(self, dest, src): 63 | # return false if the corresponding instructions should be removed 64 | s_set = self._get(src) 65 | d_set = self._get(dest) 66 | if s_set is d_set: 67 | # src and dest are copies of same value, so we can remove 68 | return False 69 | d_set.remove(dest) 70 | s_set.add(dest) 71 | self.lookup[dest] = s_set 72 | return True 73 | 74 | def load(self, key): 75 | return self._get(key).root 76 | 77 | def copy(self): 78 | copies = {} 79 | new = _CopySetsMap() 80 | for k, v in self.lookup.items(): 81 | if v not in copies: 82 | copies[v] = v.copy() 83 | new.lookup[k] = copies[v] 84 | return new 85 | 86 | def copyPropagation(irdata): 87 | instrs = irdata.flat_instructions 88 | replace = {} 89 | 90 | single_pred_infos = {} 91 | 92 | prev = None 93 | current = _CopySetsMap() 94 | for instr in instrs: 95 | # reset all info when control flow is merged 96 | if instr in irdata.jump_targets: 97 | # try to use info if this was a single predecessor forward jump 98 | if prev and not prev.fallsthrough() and irdata.target_pred_counts.get(instr) == 1: 99 | current = single_pred_infos.get(instr, _CopySetsMap()) 100 | else: 101 | current = _CopySetsMap() 102 | 103 | elif isinstance(instr, ir.RegAccess): 104 | key = instr.key 105 | if instr.store: 106 | # check if previous instr was a load 107 | if isinstance(prev, ir.RegAccess) and not prev.store: 108 | if not current.move(dest=key, src=prev.key): 109 | replace[prev] = [] 110 | replace[instr] = [] 111 | else: 112 | current.clobber(key) 113 | else: 114 | root_key = current.load(key) 115 | if key != root_key: 116 | assert instr not in replace 117 | # replace with load from root register instead 118 | replace[instr] = [ir.RegAccess(root_key[0], root_key[1], False)] 119 | 120 | else: 121 | for target in instr.targets(): 122 | label = irdata.labels[target] 123 | if irdata.target_pred_counts.get(label) == 1: 124 | single_pred_infos[label] = current.copy() 125 | 126 | prev = instr 127 | irdata.replaceInstrs(replace) 128 | 129 | def _isRemoveable(instr): 130 | # can remove if load or const since we know there are no side effects 131 | # note - instr may be None 132 | if isinstance(instr, ir.RegAccess) and not instr.store: 133 | return True 134 | return isinstance(instr, (ir.PrimConstant, ir.OtherConstant)) 135 | 136 | def removeUnusedRegisters(irdata): 137 | # Remove stores to registers that are not read from anywhere in the method 138 | instrs = irdata.flat_instructions 139 | used = set() 140 | for instr in instrs: 141 | if isinstance(instr, ir.RegAccess) and not instr.store: 142 | used.add(instr.key) 143 | 144 | replace = {} 145 | prev = None 146 | for instr in instrs: 147 | if isinstance(instr, ir.RegAccess) and instr.key not in used: 148 | assert instr.store 149 | # if prev instruction is load or const, just remove it and the store 150 | # otherwise, replace the store with a pop 151 | if _isRemoveable(prev): 152 | replace[prev] = [] 153 | replace[instr] = [] 154 | else: 155 | replace[instr] = [ir.Pop2() if instr.wide else ir.Pop()] 156 | prev = instr 157 | irdata.replaceInstrs(replace) 158 | 159 | # Allocate registers to JVM registers on a first come, first serve basis 160 | # For simplicity, parameter registers are preserved as is 161 | def simpleAllocateRegisters(irdata): 162 | instrs = irdata.flat_instructions 163 | regmap = {v:i for i,v in enumerate(irdata.initial_args)} 164 | nextreg = len(irdata.initial_args) 165 | 166 | for instr in instrs: 167 | if isinstance(instr, ir.RegAccess): 168 | if instr.key not in regmap: 169 | regmap[instr.key] = nextreg 170 | nextreg += 1 171 | if instr.wide: 172 | nextreg += 1 173 | instr.calcBytecode(regmap[instr.key]) 174 | irdata.numregs = nextreg 175 | 176 | # Sort registers by number of uses so that more frequently used registers will 177 | # end up in slots 0-3 or 4-255 and benefit from the shorter instruction forms 178 | # For simplicity, parameter registers are still preserved as is with one exception 179 | def sortAllocateRegisters(irdata): 180 | instrs = irdata.flat_instructions 181 | 182 | use_counts = collections.Counter() 183 | for instr in instrs: 184 | if isinstance(instr, ir.RegAccess): 185 | use_counts[instr.key] += 1 186 | 187 | regs = irdata.initial_args[:] 188 | rest = sorted(use_counts, key=lambda k:(-use_counts[k], k)) 189 | for key in rest: 190 | # If key is a param, it was already added at the beginning 191 | if key not in irdata.initial_args: 192 | regs.append(key) 193 | if scalars.iswide(key[1]): 194 | regs.append(None) 195 | 196 | # Sometimes the non-param regsisters are used more times than the param registers 197 | # and it is beneificial to swap them (which requires inserting code at the 198 | # beginning of the method to move the value if the param is not unused) 199 | # This is very complicated to do in general, so the following code only does 200 | # this in one specific circumstance which should nevertheless be sufficient 201 | # to capture the majority of the benefit 202 | # Specificially, it only swaps at most one register, and only in the case that 203 | # it is nonwide and there is a nonwide parameter in the first 4 slots that 204 | # it can be swapped with. Also, it doesn't bother to check if param is unused. 205 | candidate_i = max(4, len(irdata.initial_args)) 206 | # make sure candidate is valid, nonwide register 207 | if len(regs) > candidate_i and regs[candidate_i] is not None: 208 | candidate = regs[candidate_i] 209 | if not scalars.iswide(candidate[1]) and use_counts[candidate] >= 3: 210 | for i in range(min(4, len(irdata.initial_args))): 211 | # make sure target is not wide 212 | if regs[i] is None or regs[i+1] is None: 213 | continue 214 | 215 | target = regs[i] 216 | if use_counts[candidate] > use_counts[target] + 3: 217 | # swap register assignments 218 | regs[i], regs[candidate_i] = candidate, target 219 | # add move instructions at beginning of method 220 | load = ir.RegAccess.raw(i, target[1], False) 221 | store = ir.RegAccess(target[0], target[1], True) 222 | instrs = [load, store] + instrs 223 | irdata.flat_instructions = instrs 224 | break 225 | 226 | # Now generate bytecode from the selected register allocations 227 | irdata.numregs = len(regs) 228 | regmap = {v:i for i,v in enumerate(regs) if v is not None} 229 | for instr in instrs: 230 | if instr.bytecode is None and isinstance(instr, ir.RegAccess): 231 | instr.calcBytecode(regmap[instr.key]) 232 | -------------------------------------------------------------------------------- /enjarify/jvm/optimization/stack.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .. import ir 16 | from ..jvmops import * 17 | 18 | def visitLinearCode(irdata, visitor): 19 | # Visit linear sections of code, pessimistically treating all exception 20 | # handler ranges as jumps. 21 | except_level = 0 22 | for instr in irdata.flat_instructions: 23 | if instr in irdata.except_starts: 24 | except_level += 1 25 | visitor.visitExceptionRange() 26 | elif instr in irdata.except_ends: 27 | except_level -= 1 28 | 29 | if except_level > 0: 30 | continue 31 | 32 | if instr in irdata.jump_targets or isinstance(instr, (ir.LazyJumpBase, ir.Switch)): 33 | visitor.visitJumpTargetOrBranch(instr) 34 | elif not instr.fallsthrough(): 35 | visitor.visitReturn() 36 | else: 37 | visitor.visit(instr) 38 | assert except_level == 0 39 | return visitor 40 | 41 | class NoExceptVisitorBase: 42 | def visitExceptionRange(self): self.reset() 43 | def visitJumpTargetOrBranch(self, instr): self.reset() 44 | 45 | class ConstInliner(NoExceptVisitorBase): 46 | def __init__(self): 47 | self.uses = {} 48 | self.notmultiused = set() 49 | self.current = {} 50 | 51 | def reset(self): 52 | self.current = {} 53 | 54 | def visitReturn(self): 55 | for key in self.current: 56 | self.notmultiused.add(self.current[key]) 57 | self.reset() 58 | 59 | def visit(self, instr): 60 | if isinstance(instr, ir.RegAccess): 61 | key = instr.key 62 | if instr.store: 63 | if key in self.current: 64 | self.notmultiused.add(self.current[key]) 65 | self.current[key] = instr 66 | elif key in self.current: 67 | # if currently used 0, mark it used once 68 | # if used once already, mark it as multiused 69 | if self.current[key] in self.uses: 70 | del self.current[key] 71 | else: 72 | self.uses[self.current[key]] = instr 73 | 74 | def inlineConsts(irdata): 75 | # Inline constants which are only used once or not at all. This only covers 76 | # linear sections of code and pessimistically assumes everything is used 77 | # when it reaches a jump or exception range. Essentially, this means that 78 | # the value can only be considered unused if it is either overwritten by a 79 | # store or reaches a return or throw before any jumps. 80 | # As usual, assume no iinc. 81 | instrs = irdata.flat_instructions 82 | visitor = visitLinearCode(irdata, ConstInliner()) 83 | 84 | replace = {} 85 | for ins1, ins2 in zip(instrs, instrs[1:]): 86 | if ins2 in visitor.notmultiused and isinstance(ins1, (ir.PrimConstant, ir.OtherConstant)): 87 | replace[ins1] = [] 88 | replace[ins2] = [] 89 | if ins2 in visitor.uses: 90 | replace[visitor.uses[ins2]] = [ins1] 91 | irdata.replaceInstrs(replace) 92 | 93 | class StoreLoadPruner(NoExceptVisitorBase): 94 | def __init__(self): 95 | self.current = {} 96 | self.last = None 97 | self.removed = set() 98 | 99 | def reset(self): 100 | self.current = {} 101 | self.last = None 102 | 103 | def visitReturn(self): 104 | for pair in self.current.values(): 105 | assert pair[0].store and not pair[1].store 106 | self.removed.update(pair) 107 | self.reset() 108 | 109 | def visit(self, instr): 110 | if isinstance(instr, ir.RegAccess): 111 | key = instr.key 112 | if instr.store: 113 | if key in self.current: 114 | pair = self.current[key] 115 | assert pair[0].store and not pair[1].store 116 | self.removed.update(self.current.pop(key)) 117 | self.last = instr 118 | else: 119 | self.current.pop(key, None) 120 | if self.last and self.last.key == key: 121 | self.current[key] = self.last, instr 122 | self.last = None 123 | elif not isinstance(instr, ir.Label): 124 | self.last = None 125 | 126 | def pruneStoreLoads(irdata): 127 | # Remove a store immediately followed by a load from the same register 128 | # (potentially with a label in between) if it can be proven that this 129 | # register isn't read again. As above, this only considers linear sections of code. 130 | # Must not be run before dup2ize! 131 | data = visitLinearCode(irdata, StoreLoadPruner()) 132 | irdata.replaceInstrs({instr:[] for instr in data.removed}) 133 | 134 | # used by writeir too 135 | def genDups(needed, needed_after): 136 | # Generate a sequence of dup and dup2 instructions to duplicate the given 137 | # value. This keeps up to 4 copies of the value on the stack. Thanks to dup2 138 | # this asymptotically takes only half a byte per access. 139 | have = 1 140 | ele_count = needed 141 | needed += needed_after 142 | 143 | for _ in range(ele_count): 144 | cur = [] 145 | if have < needed: 146 | if have == 1 and needed >= 2: 147 | cur.append(ir.Dup()) 148 | have += 1 149 | if have == 2 and needed >= 4: 150 | cur.append(ir.Dup2()) 151 | have += 2 152 | have -= 1 153 | needed -= 1 154 | yield cur 155 | assert have >= needed 156 | # check if we have to pop at end 157 | yield [ir.Pop() for _ in range(have-needed)] 158 | 159 | # Range of instruction indexes at which a given register is read (in linear code) 160 | class UseRange: 161 | def __init__(self, uses): 162 | self.uses = uses 163 | 164 | def add(self, i): 165 | self.uses.append(i) 166 | 167 | @property 168 | def start(self): return self.uses[0] 169 | @property 170 | def end(self): return self.uses[-1] 171 | 172 | def subtract(self, other): 173 | s, e = other.start, other.end 174 | left = [i for i in self.uses if i < s] 175 | right = [i for i in self.uses if i > e] 176 | if len(left) >= 2: 177 | yield UseRange(left) 178 | if len(right) >= 2: 179 | yield UseRange(right) 180 | 181 | def sortkey(self): return len(self.uses), self.uses[0] 182 | 183 | def makeRange(instr): 184 | assert isinstance(instr, ir.RegAccess) and not instr.store 185 | return UseRange([]) 186 | 187 | def dup2ize(irdata): 188 | # This optimization replaces narrow registers which are frequently read at 189 | # stack height 0 with a single read followed by the more efficient dup and 190 | # dup2 instructions. This asymptotically uses only half a byte per access. 191 | # For simplicity, instead of explicitly keeping track of which locations 192 | # have stack height 0, we take advantage of the invariant that ranges of code 193 | # corresponding to a single Dalvik instruction always begin with empty stack. 194 | # These can be recognized by labels with a non-None id. 195 | # This isn't true for move-result instructions, but in that case the range 196 | # won't begin with a register load so it doesn't matter. 197 | # Note that pruneStoreLoads breaks this invariant, so dup2ize must be run first. 198 | # Also, for simplicity, we only keep at most one such value on the stack at 199 | # a time (duplicated up to 4 times). 200 | instrs = irdata.flat_instructions 201 | 202 | ranges = [] 203 | current = {} 204 | at_head = False 205 | for i, instr in enumerate(instrs): 206 | # if not linear section of bytecode, reset everything. Exceptions are ok 207 | # since they clear the stack, but jumps obviously aren't. 208 | if instr in irdata.jump_targets or isinstance(instr, (ir.If, ir.Switch)): 209 | ranges.extend(current.values()) 210 | current = {} 211 | 212 | if isinstance(instr, ir.RegAccess): 213 | key = instr.key 214 | if not instr.wide: 215 | if instr.store: 216 | if key in current: 217 | ranges.append(current.pop(key)) 218 | elif at_head: 219 | current.setdefault(key, makeRange(instr)).add(i) 220 | 221 | at_head = isinstance(instr, ir.Label) and instr.id is not None 222 | ranges.extend(current.values()) 223 | ranges = [ur for ur in ranges if len(ur.uses) >= 2] 224 | ranges.sort(key=UseRange.sortkey) 225 | 226 | # Greedily choose a set of disjoint ranges to dup2ize. 227 | chosen = [] 228 | while ranges: 229 | best = ranges.pop() 230 | chosen.append(best) 231 | newranges = [] 232 | for ur in ranges: 233 | newranges.extend(ur.subtract(best)) 234 | ranges = sorted(newranges, key=UseRange.sortkey) 235 | 236 | replace = {} 237 | for ur in chosen: 238 | gen = genDups(len(ur.uses), 0) 239 | for pos in ur.uses: 240 | ops = next(gen) 241 | # remember to include initial load! 242 | if pos == ur.start: 243 | ops = [instrs[pos]] + ops 244 | replace[instrs[pos]] = ops 245 | irdata.replaceInstrs(replace) 246 | -------------------------------------------------------------------------------- /enjarify/jvm/scalartypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Primative type inference 16 | # In dalvik bytecode, constants are untyped, which effectively means a union type 17 | # They can be zero (int/float/null), narrow (int/float) or wide (long/double) 18 | 19 | INVALID = 0 20 | INT = 1 << 0 21 | FLOAT = 1 << 1 22 | OBJ = 1 << 2 23 | LONG = 1 << 3 24 | DOUBLE = 1 << 4 25 | 26 | ZERO = INT | FLOAT | OBJ 27 | C32 = INT | FLOAT 28 | C64 = LONG | DOUBLE 29 | ALL = ZERO | C64 30 | 31 | _descToScalar = dict(zip(map(ord, 'ZBCSIFJDL['), [INT, INT, INT, INT, INT, FLOAT, LONG, DOUBLE, OBJ, OBJ])) 32 | def fromDesc(desc): 33 | return _descToScalar[desc[0]] 34 | 35 | def iswide(st): 36 | return st & C64 37 | 38 | def paramTypes(method_id, static): 39 | temp = method_id.getSpacedParamTypes(static) 40 | return [(INVALID if desc is None else fromDesc(desc)) for desc in temp] 41 | -------------------------------------------------------------------------------- /enjarify/jvm/writebytecode.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..byteio import Writer 16 | from . import writeir, ir 17 | from .optimization import registers, jumps, stack, consts 18 | 19 | def getCodeIR(pool, method, opts): 20 | if method.code is not None: 21 | irdata = writeir.writeBytecode(pool, method, opts) 22 | 23 | if opts.inline_consts: 24 | stack.inlineConsts(irdata) 25 | 26 | if opts.copy_propagation: 27 | registers.copyPropagation(irdata) 28 | 29 | if opts.remove_unused_regs: 30 | registers.removeUnusedRegisters(irdata) 31 | 32 | if opts.dup2ize: 33 | stack.dup2ize(irdata) 34 | 35 | if opts.prune_store_loads: 36 | stack.pruneStoreLoads(irdata) 37 | if opts.remove_unused_regs: 38 | registers.removeUnusedRegisters(irdata) 39 | 40 | if opts.sort_registers: 41 | registers.sortAllocateRegisters(irdata) 42 | else: 43 | registers.simpleAllocateRegisters(irdata) 44 | return irdata 45 | return None 46 | 47 | def finishCodeAttrs(pool, code_irs, opts): 48 | code_irs = [x for x in code_irs if x is not None] 49 | # if we have any code, make sure to reserve pool slot for attr name 50 | if code_irs: 51 | pool.utf8(b"Code") 52 | 53 | if opts.delay_consts: 54 | # In the rare case where the class references too many constants to fit in 55 | # the constant pool, we can workaround this by replacing primative constants 56 | # e.g. ints, longs, floats, and doubles, with a sequence of bytecode instructions 57 | # to generate that constant. This obviously increases the size of the method's 58 | # bytecode, so we ideally only want to do it to constants in short methods. 59 | 60 | # First off, we find which methods are potentially too long. If a method 61 | # will be under 65536 bytes even with all constants replaced, then it 62 | # will be ok no matter what we do. 63 | long_irs = [irw for irw in code_irs if irw.calcUpperBound() >= 65536] 64 | 65 | # Now allocate constants used by potentially long methods 66 | if long_irs: 67 | consts.allocateRequiredConstants(pool, long_irs) 68 | 69 | # If there's space left in the constant pool, allocate constants used by short methods 70 | for _ir in code_irs: 71 | for ins in _ir.flat_instructions: 72 | if isinstance(ins, ir.PrimConstant): 73 | ins.fix_with_pool(pool) 74 | 75 | return {irdata.method: writeCodeAttributeTail(pool, irdata, opts=opts) for irdata in code_irs} 76 | 77 | def writeCodeAttributeTail(pool, irdata, opts): 78 | method = irdata.method 79 | jumps.optimizeJumps(irdata) 80 | bytecode, excepts = jumps.createBytecode(irdata, opts) 81 | 82 | stream = Writer() 83 | # For simplicity, don't bother calculating the actual maximum stack height 84 | # of the generated code. Instead, just use a value that will always be high 85 | # enough. Note that just setting this to 65535 is a bad idea since it tends 86 | # to cause StackOverflowErrors under default JVM memory settings 87 | stream.u16(300) # stack 88 | stream.u16(irdata.numregs) # locals 89 | 90 | stream.u32(len(bytecode)) 91 | stream.write(bytecode) 92 | 93 | # exceptions 94 | stream.u16(len(excepts)) 95 | stream.write(b''.join(excepts)) 96 | 97 | # attributes 98 | stream.u16(0) 99 | return stream 100 | -------------------------------------------------------------------------------- /enjarify/jvm/writeclass.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .. import flags 16 | from ..byteio import Writer 17 | from . import constantpool, writebytecode, error 18 | from .optimization import options 19 | 20 | def writeField(pool, stream, field): 21 | stream.u16(field.access & flags.FIELD_FLAGS) 22 | stream.u16(pool.utf8(field.id.name)) 23 | stream.u16(pool.utf8(field.id.desc)) 24 | if field.constant_value is not None: 25 | stream.u16(1) 26 | stream.u16(pool.utf8(b"ConstantValue")) 27 | stream.u32(2) 28 | 29 | ctype, val = field.constant_value 30 | # Ignore dalvik constant type and use actual field type instead 31 | index = { 32 | b'Z': pool.int, 33 | b'B': pool.int, 34 | b'S': pool.int, 35 | b'C': pool.int, 36 | b'I': pool.int, 37 | b'F': pool.float, 38 | b'J': pool.long, 39 | b'D': pool.double, 40 | b'Ljava/lang/String;': pool.string, 41 | b'Ljava/lang/Class;': pool.class_, 42 | }[field.id.desc](val) 43 | stream.u16(index) 44 | else: 45 | stream.u16(0) # no attributes 46 | 47 | def writeMethod(pool, stream, method, code_attr_data): 48 | stream.u16(method.access & flags.METHOD_FLAGS) 49 | stream.u16(pool.utf8(method.id.name)) 50 | stream.u16(pool.utf8(method.id.desc)) 51 | 52 | if code_attr_data is not None: 53 | code_attr_data = code_attr_data.toBytes() 54 | stream.u16(1) 55 | stream.u16(pool.utf8(b"Code")) 56 | stream.u32(len(code_attr_data)) 57 | stream.write(code_attr_data) 58 | else: 59 | stream.u16(0) # no attributes 60 | 61 | def writeMethods(pool, stream, methods, opts): 62 | code_irs = [] 63 | for method in methods: 64 | code_irs.append(writebytecode.getCodeIR(pool, method, opts=opts)) 65 | code_attrs = writebytecode.finishCodeAttrs(pool, code_irs, opts=opts) 66 | 67 | stream.u16(len(methods)) 68 | for method in methods: 69 | writeMethod(pool, stream, method, code_attrs.get(method)) 70 | 71 | def classFileAfterPool(cls, opts): 72 | stream = Writer() 73 | if opts.split_pool: 74 | pool = constantpool.SplitConstantPool() 75 | else: 76 | pool = constantpool.SimpleConstantPool() 77 | 78 | cls.parseData() 79 | access = cls.access & flags.CLASS_FLAGS 80 | if not access & flags.ACC_INTERFACE: 81 | # Not necessary for correctness, but this works around a bug in dx 82 | access |= flags.ACC_SUPER 83 | 84 | stream.u16(access) # access 85 | stream.u16(pool.class_(cls.name)) # this 86 | super_ = pool.class_(cls.super) if cls.super is not None else 0 87 | stream.u16(super_) # super 88 | 89 | # interfaces 90 | stream.u16(len(cls.interfaces)) 91 | for interface in cls.interfaces: 92 | stream.u16(pool.class_(interface)) 93 | 94 | # fields 95 | stream.u16(len(cls.data.fields)) 96 | for field in cls.data.fields: 97 | writeField(pool, stream, field) 98 | 99 | # methods 100 | writeMethods(pool, stream, cls.data.methods, opts=opts) 101 | 102 | # attributes 103 | stream.u16(0) 104 | return pool, stream 105 | 106 | def toClassFile(cls, opts): 107 | stream = Writer() 108 | stream.u32(0xCAFEBABE) 109 | # bytecode version 49.0 110 | stream.u16(0) 111 | stream.u16(49) 112 | 113 | # Optimistically try translating without optimization to speed things up 114 | # if the resulting code is too big, retry with optimization 115 | try: 116 | pool, rest_stream = classFileAfterPool(cls, opts=opts) 117 | except error.ClassfileLimitExceeded: 118 | # print('Retrying {} with optimization enabled'.format(cls.name)) 119 | pool, rest_stream = classFileAfterPool(cls, opts=options.ALL) 120 | 121 | # write constant pool 122 | pool.write(stream) 123 | # write rest of file 124 | stream.write(rest_stream.toBytes()) 125 | return stream.toBytes() 126 | -------------------------------------------------------------------------------- /enjarify/jvm/writeir.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections, struct 16 | from functools import partial 17 | 18 | from . import ir 19 | from .. import flags, dalvik 20 | from .jvmops import * 21 | from . import arraytypes as arrays 22 | from . import scalartypes as scalars 23 | from . import mathops 24 | from .optimization import stack 25 | from .. import util 26 | from ..typeinference import typeinference 27 | 28 | # Code for converting dalvik bytecode to intermediate representation 29 | # effectively this is just Java bytecode instructions with some abstractions for 30 | # later optimization 31 | 32 | _ilfdaOrd = [scalars.INT, scalars.LONG, scalars.FLOAT, scalars.DOUBLE, scalars.OBJ].index 33 | _newArrayCodes = {('['+t).encode(): v for t, v in zip('ZCFDBSIJ', range(4, 12))} 34 | _arrStoreOps = {t.encode(): v for t, v in zip('IJFD BCS', range(IASTORE, SASTORE+1))} 35 | _arrLoadOps = {t.encode(): v for t, v in zip('IJFD BCS', range(IALOAD, SALOAD+1))} 36 | _arrStoreOps[b'Z'] = BASTORE 37 | _arrLoadOps[b'Z'] = BALOAD 38 | 39 | # For generating IR instructions corresponding to a single Dalvik instruction 40 | class IRBlock: 41 | def __init__(self, parent, pos): 42 | self.type_data = parent.types[pos] 43 | self.pool = parent.pool 44 | self.delay_consts = parent.opts.delay_consts 45 | self.pos = pos 46 | self.instructions = [ir.Label(pos)] 47 | 48 | def add(self, jvm_instr): 49 | self.instructions.append(jvm_instr) 50 | 51 | def _other(self, bytecode): 52 | self.add(ir.Other(bytecode=bytecode)) 53 | 54 | def u8(self, op): self._other(struct.pack('>B', op)) 55 | def u8u8(self, op, x): self._other(struct.pack('>BB', op, x)) 56 | def u8u16(self, op, x): self._other(struct.pack('>BH', op, x)) 57 | # wide non iinc 58 | def u8u8u16(self, op, op2, x): self._other(struct.pack('>BBH', op, op2, x)) 59 | # invokeinterface 60 | def u8u16u8u8(self, op, x, y, z): self._other(struct.pack('>BHBB', op, x, y, z)) 61 | 62 | def ldc(self, index): 63 | if index < 256: 64 | self.add(ir.OtherConstant(bytecode=bytes([LDC, index]))) 65 | else: 66 | self.add(ir.OtherConstant(bytecode=struct.pack('>BH', LDC_W, index))) 67 | 68 | def load(self, reg, stype, desc=None, clsname=None): 69 | # if we know the register to be 0/null, don't bother loading 70 | if self.type_data.arrs[reg] == arrays.NULL: 71 | self.const(0, stype) 72 | else: 73 | self.add(ir.RegAccess(reg, stype, store=False)) 74 | # cast to appropriate type if tainted 75 | if stype == scalars.OBJ and self.type_data.tainted[reg]: 76 | assert desc is None or clsname is None 77 | if clsname is None: 78 | # remember to handle arrays - also fallthrough if desc is None 79 | clsname = desc[1:-1] if (desc and desc.startswith(b'L')) else desc 80 | if clsname is not None and clsname != b'java/lang/Object': 81 | self.u8u16(CHECKCAST, self.pool.class_(clsname)) 82 | 83 | def loadAsArray(self, reg): 84 | at = self.type_data.arrs[reg] 85 | if at == arrays.NULL: 86 | self.const_null() 87 | else: 88 | self.add(ir.RegAccess(reg, scalars.OBJ, store=False)) 89 | if self.type_data.tainted[reg]: 90 | if at == arrays.INVALID: 91 | # needs to be some type of object array, so just cast to Object[] 92 | self.u8u16(CHECKCAST, self.pool.class_(b'[Ljava/lang/Object;')) 93 | else: 94 | # note - will throw if actual type is boolean[] but there's not 95 | # much we can do in this case 96 | self.u8u16(CHECKCAST, self.pool.class_(at)) 97 | 98 | def store(self, reg, stype): 99 | self.add(ir.RegAccess(reg, stype, store=True)) 100 | 101 | def return_(self, stype=None): 102 | if stype is None: 103 | self.u8(RETURN) 104 | else: 105 | self.u8(IRETURN + _ilfdaOrd(stype)) 106 | 107 | def const(self, val, stype): 108 | assert (1<<64) > val >= 0 109 | if stype == scalars.OBJ: 110 | assert val == 0 111 | self.const_null() 112 | else: 113 | # If constant pool is simple, assume we're in non-opt mode and only use 114 | # the constant pool for generating constants instead of calculating 115 | # bytecode sequences for them. If we're in opt mode, pass None for pool 116 | # to generate bytecode instead 117 | pool = None if self.delay_consts else self.pool 118 | self.add(ir.PrimConstant(stype, val, pool=pool)) 119 | 120 | def const_null(self): 121 | self.add(ir.OtherConstant(bytecode=bytes([ACONST_NULL]))) 122 | 123 | def fillarraysub(self, op, cbs, pop=True): 124 | gen = stack.genDups(len(cbs), 0 if pop else 1) 125 | for i, cb in enumerate(cbs): 126 | for instr in next(gen): 127 | self.add(instr) 128 | self.const(i, scalars.INT) 129 | cb() 130 | self.u8(op) 131 | # may need to pop at end 132 | for instr in next(gen): 133 | self.add(instr) 134 | 135 | def newarray(self, desc): 136 | if desc in _newArrayCodes: 137 | self.u8u8(NEWARRAY, _newArrayCodes[desc]) 138 | else: 139 | # can be either multidim array or object array descriptor 140 | desc = desc[1:] 141 | if desc.startswith(b'L'): 142 | desc = desc[1:-1] 143 | self.u8u16(ANEWARRAY, self.pool.class_(desc)) 144 | 145 | def fillarraydata(self, op, stype, vals): 146 | self.fillarraysub(op, [partial(self.const, val, stype) for val in vals]) 147 | 148 | def cast(self, dex, reg, index): 149 | self.load(reg, scalars.OBJ) 150 | self.u8u16(CHECKCAST, self.pool.class_(dex.clsType(index))) 151 | self.store(reg, scalars.OBJ) 152 | 153 | def goto(self, target): 154 | self.add(ir.Goto(target)) 155 | 156 | def if_(self, op, target): 157 | self.add(ir.If(op, target)) 158 | 159 | def switch(self, default, jumps): 160 | jumps = {util.s32(k):v for k,v in jumps.items() if v != default} 161 | if jumps: 162 | self.add(ir.Switch(default, jumps)) 163 | else: 164 | self.u8(ir.POP) 165 | self.goto(default) 166 | 167 | def generateExceptLabels(self): 168 | s_ind = 0 169 | e_ind = len(self.instructions) 170 | # assume only Other instructions can throw 171 | while s_ind < e_ind and not isinstance(self.instructions[s_ind], ir.Other): 172 | s_ind += 1 173 | while s_ind < e_ind and not isinstance(self.instructions[e_ind-1], ir.Other): 174 | e_ind -= 1 175 | 176 | assert s_ind < e_ind 177 | start_lbl, end_lbl = ir.Label(), ir.Label() 178 | self.instructions.insert(s_ind, start_lbl) 179 | self.instructions.insert(e_ind+1, end_lbl) 180 | return start_lbl, end_lbl 181 | 182 | class IRWriter: 183 | def __init__(self, pool, method, types, opts): 184 | self.pool = pool 185 | self.method = method 186 | self.types = types 187 | self.opts = opts 188 | 189 | self.iblocks = {} 190 | 191 | self.flat_instructions = None 192 | self.excepts = [] 193 | self.labels = {} 194 | self.initial_args = None 195 | self.exception_redirects = {} 196 | 197 | self.except_starts = set() 198 | self.except_ends = set() 199 | self.jump_targets = set() 200 | # used to detect jump targets with a unique predecessor 201 | self.target_pred_counts = collections.defaultdict(int) 202 | 203 | self.numregs = None # will be set once registers are allocated (see registers.py) 204 | 205 | def calcInitialArgs(self, nregs, scalar_ptypes): 206 | self.initial_args = args = [] 207 | regoff = nregs - len(scalar_ptypes) 208 | for i, st in enumerate(scalar_ptypes): 209 | if st == scalars.INVALID: 210 | args.append(None) 211 | else: 212 | args.append((i + regoff, st)) 213 | 214 | def addExceptionRedirect(self, target): 215 | return self.exception_redirects.setdefault(target, ir.Label()) 216 | 217 | def createBlock(self, instr): 218 | block = IRBlock(self, instr.pos) 219 | self.iblocks[block.pos] = block 220 | self.labels[block.pos] = block.instructions[0] 221 | return block 222 | 223 | def flatten(self): 224 | instructions = [] 225 | for pos in sorted(self.iblocks): 226 | if pos in self.exception_redirects: 227 | # check if we can put handler pop in front of block 228 | if instructions and not instructions[-1].fallsthrough(): 229 | instructions.append(self.exception_redirects.pop(pos)) 230 | instructions.append(ir.Pop()) 231 | # if not, leave it in dict to be redirected later 232 | # now add instructions for actual block 233 | instructions += self.iblocks[pos].instructions 234 | 235 | # exception handler pops that couldn't be placed inline 236 | # in this case, just put them at the end with a goto back to the handler 237 | for target in sorted(self.exception_redirects): 238 | instructions.append(self.exception_redirects[target]) 239 | instructions.append(ir.Pop()) 240 | instructions.append(ir.Goto(target)) 241 | 242 | self.flat_instructions = instructions 243 | self.iblocks = self.exception_redirects = None 244 | 245 | def replaceInstrs(self, replace): 246 | if replace: 247 | instructions = [] 248 | for instr in self.flat_instructions: 249 | instructions.extend(replace.get(instr, [instr])) 250 | self.flat_instructions = instructions 251 | assert len(set(instructions)) == len(instructions) 252 | 253 | def calcUpperBound(self): 254 | # Get an uppper bound on the size of the bytecode 255 | size = 0 256 | for ins in self.flat_instructions: 257 | if ins.bytecode is None: 258 | size += ins.max 259 | else: 260 | size += len(ins.bytecode) 261 | return size 262 | 263 | ################################################################################ 264 | def visitNop(method, dex, instr_d, type_data, block, instr): 265 | pass 266 | 267 | def visitMove(method, dex, instr_d, type_data, block, instr): 268 | for st in (scalars.INT, scalars.OBJ, scalars.FLOAT): 269 | if st & type_data.prims[instr.args[1]]: 270 | block.load(instr.args[1], st) 271 | block.store(instr.args[0], st) 272 | 273 | def visitMoveWide(method, dex, instr_d, type_data, block, instr): 274 | for st in (scalars.LONG, scalars.DOUBLE): 275 | if st & type_data.prims[instr.args[1]]: 276 | block.load(instr.args[1], st) 277 | block.store(instr.args[0], st) 278 | 279 | def visitMoveResult(method, dex, instr_d, type_data, block, instr): 280 | st = scalars.fromDesc(instr.prev_result) 281 | block.store(instr.args[0], st) 282 | 283 | def visitReturn(method, dex, instr_d, type_data, block, instr): 284 | if method.id.return_type == b'V': 285 | block.return_() 286 | else: 287 | st = scalars.fromDesc(method.id.return_type) 288 | block.load(instr.args[0], st, desc=method.id.return_type) 289 | block.return_(st) 290 | 291 | def visitConst32(method, dex, instr_d, type_data, block, instr): 292 | val = instr.args[1] % (1<<32) 293 | block.const(val, scalars.INT) 294 | block.store(instr.args[0], scalars.INT) 295 | block.const(val, scalars.FLOAT) 296 | block.store(instr.args[0], scalars.FLOAT) 297 | if not val: 298 | block.const_null() 299 | block.store(instr.args[0], scalars.OBJ) 300 | 301 | def visitConst64(method, dex, instr_d, type_data, block, instr): 302 | val = instr.args[1] % (1<<64) 303 | block.const(val, scalars.LONG) 304 | block.store(instr.args[0], scalars.LONG) 305 | block.const(val, scalars.DOUBLE) 306 | block.store(instr.args[0], scalars.DOUBLE) 307 | 308 | def visitConstString(method, dex, instr_d, type_data, block, instr): 309 | val = dex.string(instr.args[1]) 310 | block.ldc(block.pool.string(val)) 311 | block.store(instr.args[0], scalars.OBJ) 312 | 313 | def visitConstClass(method, dex, instr_d, type_data, block, instr): 314 | # Could use dex.type here since the JVM doesn't care, but this is cleaner 315 | val = dex.clsType(instr.args[1]) 316 | block.ldc(block.pool.class_(val)) 317 | block.store(instr.args[0], scalars.OBJ) 318 | 319 | def visitMonitorEnter(method, dex, instr_d, type_data, block, instr): 320 | block.load(instr.args[0], scalars.OBJ) 321 | block.u8(MONITORENTER) 322 | 323 | def visitMonitorExit(method, dex, instr_d, type_data, block, instr): 324 | block.load(instr.args[0], scalars.OBJ) 325 | block.u8(MONITOREXIT) 326 | 327 | def visitCheckCast(method, dex, instr_d, type_data, block, instr): 328 | block.cast(dex, instr.args[0], instr.args[1]) 329 | 330 | def visitInstanceOf(method, dex, instr_d, type_data, block, instr): 331 | block.load(instr.args[1], scalars.OBJ) 332 | block.u8u16(INSTANCEOF, block.pool.class_(dex.clsType(instr.args[2]))) 333 | block.store(instr.args[0], scalars.INT) 334 | 335 | def visitArrayLen(method, dex, instr_d, type_data, block, instr): 336 | block.loadAsArray(instr.args[1]) 337 | block.u8(ARRAYLENGTH) 338 | block.store(instr.args[0], scalars.INT) 339 | 340 | def visitNewInstance(method, dex, instr_d, type_data, block, instr): 341 | block.u8u16(NEW, block.pool.class_(dex.clsType(instr.args[1]))) 342 | block.store(instr.args[0], scalars.OBJ) 343 | 344 | def visitNewArray(method, dex, instr_d, type_data, block, instr): 345 | block.load(instr.args[1], scalars.INT) 346 | block.newarray(dex.type(instr.args[2])) 347 | block.store(instr.args[0], scalars.OBJ) 348 | 349 | def visitFilledNewArray(method, dex, instr_d, type_data, block, instr): 350 | regs = instr.args[1] 351 | block.const(len(regs), scalars.INT) 352 | block.newarray(dex.type(instr.args[0])) 353 | st, elet = arrays.eletPair(arrays.fromDesc(dex.type(instr.args[0]))) 354 | op = _arrStoreOps.get(elet, AASTORE) 355 | cbs = [partial(block.load, reg, st) for reg in regs] 356 | # if not followed by move-result, don't leave it on the stack 357 | mustpop = instr_d.get(instr.pos2).type != dalvik.MoveResult 358 | block.fillarraysub(op, cbs, pop=mustpop) 359 | 360 | def visitFillArrayData(method, dex, instr_d, type_data, block, instr): 361 | width, arrdata = instr_d[instr.args[1]].fillarrdata 362 | at = type_data.arrs[instr.args[0]] 363 | 364 | block.loadAsArray(instr.args[0]) 365 | if at is arrays.NULL: 366 | block.u8(ATHROW) 367 | else: 368 | if len(arrdata) == 0: 369 | # fill-array-data throws a NPE if array is null even when 370 | # there is 0 data, so we need to add an instruction that 371 | # throws a NPE in this case 372 | block.u8(ARRAYLENGTH) 373 | block.add(ir.Pop()) 374 | else: 375 | st, elet = arrays.eletPair(at) 376 | # check if we need to sign extend 377 | if elet == b'B' or elet == b'Z': 378 | arrdata = [util.signExtend(x, 8) & 0xFFFFFFFF for x in arrdata] 379 | elif elet == b'S': 380 | arrdata = [util.signExtend(x, 16) & 0xFFFFFFFF for x in arrdata] 381 | block.fillarraydata(_arrStoreOps.get(elet, AASTORE), st, arrdata) 382 | 383 | def visitThrow(method, dex, instr_d, type_data, block, instr): 384 | block.load(instr.args[0], scalars.OBJ, clsname=b'java/lang/Throwable') 385 | block.u8(ATHROW) 386 | 387 | def visitGoto(method, dex, instr_d, type_data, block, instr): 388 | block.goto(instr.args[0]) 389 | 390 | def visitSwitch(method, dex, instr_d, type_data, block, instr): 391 | block.load(instr.args[0], scalars.INT) 392 | switchdata = instr_d[instr.args[1]].switchdata 393 | default = instr.pos2 394 | jumps = {k:(offset + instr.pos) % (1<<32) for k, offset in switchdata.items()} 395 | block.switch(default, jumps) 396 | 397 | def visitCmp(method, dex, instr_d, type_data, block, instr): 398 | op = [FCMPL, FCMPG, DCMPL, DCMPG, LCMP][instr.opcode - 0x2d] 399 | st = [scalars.FLOAT, scalars.FLOAT, scalars.DOUBLE, scalars.DOUBLE, scalars.LONG][instr.opcode - 0x2d] 400 | block.load(instr.args[1], st) 401 | block.load(instr.args[2], st) 402 | block.u8(op) 403 | block.store(instr.args[0], scalars.INT) 404 | 405 | def visitIf(method, dex, instr_d, type_data, block, instr): 406 | st = type_data.prims[instr.args[0]] & type_data.prims[instr.args[1]] 407 | if st & scalars.INT: 408 | block.load(instr.args[0], scalars.INT) 409 | block.load(instr.args[1], scalars.INT) 410 | op = [IF_ICMPEQ, IF_ICMPNE, IF_ICMPLT, IF_ICMPGE, IF_ICMPGT, IF_ICMPLE][instr.opcode - 0x32] 411 | else: 412 | block.load(instr.args[0], scalars.OBJ) 413 | block.load(instr.args[1], scalars.OBJ) 414 | op = [IF_ACMPEQ, IF_ACMPNE][instr.opcode - 0x32] 415 | block.if_(op, instr.args[2]) 416 | 417 | def visitIfZ(method, dex, instr_d, type_data, block, instr): 418 | if type_data.prims[instr.args[0]] & scalars.INT: 419 | block.load(instr.args[0], scalars.INT) 420 | op = [IFEQ, IFNE, IFLT, IFGE, IFGT, IFLE][instr.opcode - 0x38] 421 | else: 422 | block.load(instr.args[0], scalars.OBJ) 423 | op = [IFNULL, IFNONNULL][instr.opcode - 0x38] 424 | block.if_(op, instr.args[1]) 425 | 426 | def visitArrayGet(method, dex, instr_d, type_data, block, instr): 427 | at = type_data.arrs[instr.args[1]] 428 | if at is arrays.NULL: 429 | block.const_null() 430 | block.u8(ATHROW) 431 | else: 432 | block.loadAsArray(instr.args[1]) 433 | block.load(instr.args[2], scalars.INT) 434 | st, elet = arrays.eletPair(at) 435 | block.u8(_arrLoadOps.get(elet, AALOAD)) 436 | block.store(instr.args[0], st) 437 | 438 | def visitArrayPut(method, dex, instr_d, type_data, block, instr): 439 | at = type_data.arrs[instr.args[1]] 440 | if at is arrays.NULL: 441 | block.const_null() 442 | block.u8(ATHROW) 443 | else: 444 | block.loadAsArray(instr.args[1]) 445 | block.load(instr.args[2], scalars.INT) 446 | st, elet = arrays.eletPair(at) 447 | block.load(instr.args[0], st) 448 | block.u8(_arrStoreOps.get(elet, AASTORE)) 449 | 450 | def visitInstanceGet(method, dex, instr_d, type_data, block, instr): 451 | field_id = dex.field_id(instr.args[2]) 452 | st = scalars.fromDesc(field_id.desc) 453 | block.load(instr.args[1], scalars.OBJ, clsname=field_id.cname) 454 | block.u8u16(GETFIELD, block.pool.field(field_id.triple())) 455 | block.store(instr.args[0], st) 456 | 457 | def visitInstancePut(method, dex, instr_d, type_data, block, instr): 458 | field_id = dex.field_id(instr.args[2]) 459 | st = scalars.fromDesc(field_id.desc) 460 | block.load(instr.args[1], scalars.OBJ, clsname=field_id.cname) 461 | block.load(instr.args[0], st, desc=field_id.desc) 462 | block.u8u16(PUTFIELD, block.pool.field(field_id.triple())) 463 | 464 | def visitStaticGet(method, dex, instr_d, type_data, block, instr): 465 | field_id = dex.field_id(instr.args[1]) 466 | st = scalars.fromDesc(field_id.desc) 467 | block.u8u16(GETSTATIC, block.pool.field(field_id.triple())) 468 | block.store(instr.args[0], st) 469 | 470 | def visitStaticPut(method, dex, instr_d, type_data, block, instr): 471 | field_id = dex.field_id(instr.args[1]) 472 | st = scalars.fromDesc(field_id.desc) 473 | block.load(instr.args[0], st, desc=field_id.desc) 474 | block.u8u16(PUTSTATIC, block.pool.field(field_id.triple())) 475 | 476 | def visitInvoke(method, dex, instr_d, type_data, block, instr): 477 | isstatic = instr.type == dalvik.InvokeStatic 478 | 479 | called_id = dex.method_id(instr.args[0]) 480 | sts = scalars.paramTypes(called_id, static=isstatic) 481 | descs = called_id.getSpacedParamTypes(isstatic=isstatic) 482 | assert len(sts) == len(instr.args[1]) == len(descs) 483 | 484 | for st, desc, reg in zip(sts, descs, instr.args[1]): 485 | if st != scalars.INVALID: # skip long/double tops 486 | block.load(reg, st, desc=desc) 487 | op = { 488 | dalvik.InvokeVirtual: INVOKEVIRTUAL, 489 | dalvik.InvokeSuper: INVOKESPECIAL, 490 | dalvik.InvokeDirect: INVOKESPECIAL, 491 | dalvik.InvokeStatic: INVOKESTATIC, 492 | dalvik.InvokeInterface: INVOKEINTERFACE, 493 | }[instr.type] 494 | 495 | if instr.type == dalvik.InvokeInterface: 496 | block.u8u16u8u8(op, block.pool.imethod(called_id.triple()), len(descs), 0) 497 | else: 498 | block.u8u16(op, block.pool.method(called_id.triple())) 499 | 500 | # check if we need to pop result instead of leaving on stack 501 | if instr_d.get(instr.pos2).type != dalvik.MoveResult: 502 | if called_id.return_type != b'V': 503 | st = scalars.fromDesc(called_id.return_type) 504 | block.add(ir.Pop2() if scalars.iswide(st) else ir.Pop()) 505 | 506 | def visitUnaryOp(method, dex, instr_d, type_data, block, instr): 507 | op, srct, destt = mathops.UNARY[instr.opcode] 508 | block.load(instr.args[1], srct) 509 | # *not requires special handling since there's no direct Java equivalent. Instead we have to do x ^ -1 510 | if op == IXOR: 511 | block.u8(ICONST_M1) 512 | elif op == LXOR: 513 | block.u8(ICONST_M1) 514 | block.u8(I2L) 515 | 516 | block.u8(op) 517 | block.store(instr.args[0], destt) 518 | 519 | def visitBinaryOp(method, dex, instr_d, type_data, block, instr): 520 | op, st, st2 = mathops.BINARY[instr.opcode] 521 | # index arguments as negative so it works for regular and 2addr forms 522 | block.load(instr.args[-2], st) 523 | block.load(instr.args[-1], st2) 524 | block.u8(op) 525 | block.store(instr.args[0], st) 526 | 527 | def visitBinaryOpConst(method, dex, instr_d, type_data, block, instr): 528 | op = mathops.BINARY_LIT[instr.opcode] 529 | if op == ISUB: # rsub 530 | block.const(instr.args[2] % (1<<32), scalars.INT) 531 | block.load(instr.args[1], scalars.INT) 532 | else: 533 | block.load(instr.args[1], scalars.INT) 534 | block.const(instr.args[2] % (1<<32), scalars.INT) 535 | block.u8(op) 536 | block.store(instr.args[0], scalars.INT) 537 | ################################################################################ 538 | VISIT_FUNCS = { 539 | dalvik.Nop: visitNop, 540 | dalvik.Move: visitMove, 541 | dalvik.MoveWide: visitMoveWide, 542 | dalvik.MoveResult: visitMoveResult, 543 | dalvik.Return: visitReturn, 544 | dalvik.Const32: visitConst32, 545 | dalvik.Const64: visitConst64, 546 | dalvik.ConstString: visitConstString, 547 | dalvik.ConstClass: visitConstClass, 548 | dalvik.MonitorEnter: visitMonitorEnter, 549 | dalvik.MonitorExit: visitMonitorExit, 550 | dalvik.CheckCast: visitCheckCast, 551 | dalvik.InstanceOf: visitInstanceOf, 552 | dalvik.ArrayLen: visitArrayLen, 553 | dalvik.NewInstance: visitNewInstance, 554 | dalvik.NewArray: visitNewArray, 555 | dalvik.FilledNewArray: visitFilledNewArray, 556 | dalvik.FillArrayData: visitFillArrayData, 557 | dalvik.Throw: visitThrow, 558 | dalvik.Goto: visitGoto, 559 | dalvik.Switch: visitSwitch, 560 | dalvik.Cmp: visitCmp, 561 | dalvik.If: visitIf, 562 | dalvik.IfZ: visitIfZ, 563 | 564 | dalvik.ArrayGet: visitArrayGet, 565 | dalvik.ArrayPut: visitArrayPut, 566 | dalvik.InstanceGet: visitInstanceGet, 567 | dalvik.InstancePut: visitInstancePut, 568 | dalvik.StaticGet: visitStaticGet, 569 | dalvik.StaticPut: visitStaticPut, 570 | 571 | dalvik.InvokeVirtual: visitInvoke, 572 | dalvik.InvokeSuper: visitInvoke, 573 | dalvik.InvokeDirect: visitInvoke, 574 | dalvik.InvokeStatic: visitInvoke, 575 | dalvik.InvokeInterface: visitInvoke, 576 | 577 | dalvik.UnaryOp: visitUnaryOp, 578 | dalvik.BinaryOp: visitBinaryOp, 579 | dalvik.BinaryOpConst: visitBinaryOpConst, 580 | } 581 | 582 | def writeBytecode(pool, method, opts): 583 | dex = method.dex 584 | code = method.code 585 | instr_d = {instr.pos: instr for instr in code.bytecode} 586 | types, all_handlers = typeinference.doInference(dex, method, code, code.bytecode, instr_d) 587 | 588 | scalar_ptypes = scalars.paramTypes(method.id, static=(method.access & flags.ACC_STATIC)) 589 | 590 | writer = IRWriter(pool, method, types, opts) 591 | writer.calcInitialArgs(code.nregs, scalar_ptypes) 592 | 593 | for instr in code.bytecode: 594 | if instr.pos not in types: # skip unreachable instructions 595 | continue 596 | type_data = types[instr.pos] 597 | block = writer.createBlock(instr) 598 | VISIT_FUNCS[instr.type](method, dex, instr_d, type_data, block, instr) 599 | 600 | for instr in sorted(all_handlers, key=lambda instr: instr.pos): 601 | assert all_handlers[instr] 602 | if instr.pos not in types: # skip unreachable instructions 603 | continue 604 | 605 | start, end = writer.iblocks[instr.pos].generateExceptLabels() 606 | writer.except_starts.add(start) 607 | writer.except_ends.add(end) 608 | 609 | for ctype, handler_pos in all_handlers[instr]: 610 | # If handler doesn't use the caught exception, we need to redirect to a pop instead 611 | if instr_d.get(handler_pos).type != dalvik.MoveResult: 612 | target = writer.addExceptionRedirect(handler_pos) 613 | else: 614 | target = writer.labels[handler_pos] 615 | writer.jump_targets.add(target) 616 | writer.target_pred_counts[target] += 1 617 | 618 | # When catching Throwable, we can use the special index 0 instead, 619 | # potentially saving a constant pool entry or two 620 | jctype = 0 if ctype == b'java/lang/Throwable' else pool.class_(ctype) 621 | writer.excepts.append((start, end, target, jctype)) 622 | writer.flatten() 623 | 624 | # find jump targets (in addition to exception handler targets) 625 | for instr in writer.flat_instructions: 626 | for target in instr.targets(): 627 | label = writer.labels[target] 628 | writer.jump_targets.add(label) 629 | writer.target_pred_counts[label] += 1 630 | 631 | return writer 632 | -------------------------------------------------------------------------------- /enjarify/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import zipfile, traceback, argparse, collections 16 | 17 | from . import parsedex 18 | from .jvm import writeclass 19 | from .mutf8 import decode 20 | from .jvm.optimization import options 21 | 22 | def read(fname, mode='rb'): 23 | with open(fname, mode) as f: 24 | return f.read() 25 | 26 | def translate(data, opts, classes=None, errors=None, allowErrors=True): 27 | dex = parsedex.DexFile(data) 28 | classes = collections.OrderedDict() if classes is None else classes 29 | errors = collections.OrderedDict() if errors is None else errors 30 | 31 | for cls in dex.classes: 32 | unicode_name = decode(cls.name) + '.class' 33 | if unicode_name in classes or unicode_name in errors: 34 | print('Warning, duplicate class name', unicode_name) 35 | continue 36 | 37 | try: 38 | class_data = writeclass.toClassFile(cls, opts) 39 | classes[unicode_name] = class_data 40 | except Exception: 41 | if not allowErrors: 42 | raise 43 | errors[unicode_name] = traceback.format_exc() 44 | 45 | if not (len(classes) + len(errors)) % 1000: 46 | print(len(classes) + len(errors), 'classes processed') 47 | return classes, errors 48 | 49 | def writeToJar(fname, classes): 50 | with zipfile.ZipFile(fname, 'w') as out: 51 | for unicode_name, data in classes.items(): 52 | # Don't bother compressing small files 53 | compress_type = zipfile.ZIP_DEFLATED if len(data) > 10000 else zipfile.ZIP_STORED 54 | info = zipfile.ZipInfo(unicode_name) 55 | info.external_attr = 0o775 << 16 # set Unix file permissions 56 | out.writestr(info, data, compress_type=compress_type) 57 | 58 | def main(): 59 | parser = argparse.ArgumentParser(prog='enjarify', description='Translates Dalvik bytecode (.dex or .apk) to Java bytecode (.jar)') 60 | parser.add_argument('inputfile') 61 | parser.add_argument('-o', '--output', help='Output .jar file. Default is [input-filename]-enjarify.jar.') 62 | parser.add_argument('-f', '--force', action='store_true', help='Force overwrite. If output file already exists, this option is required to overwrite.') 63 | parser.add_argument('--fast', action='store_true', help='Speed up translation at the expense of generated bytecode being less readable.') 64 | args = parser.parse_args() 65 | 66 | dexs = [] 67 | if args.inputfile.lower().endswith('.apk'): 68 | with zipfile.ZipFile(args.inputfile, 'r') as z: 69 | for name in z.namelist(): 70 | if name.startswith('classes') and name.endswith('.dex'): 71 | dexs.append(z.read(name)) 72 | else: 73 | dexs.append(read(args.inputfile)) 74 | 75 | # Exclusive mode requires 3.3+, so provide helpful error in this case 76 | if not args.force: 77 | try: 78 | FileExistsError 79 | except NameError: 80 | print('Overwrite protection requires Python 3.3+. Either pass -f or --force, or upgrade to a more recent version of Python. If you are using Pypy3 2.4, you need to switch to a nightly build or build from source. Or just pass -f.') 81 | return 82 | 83 | # Might as well open the output file early so we can detect existing file error 84 | # before going to the trouble of translating everything 85 | outname = args.output or args.inputfile.rpartition('/')[-1].rpartition('.')[0] + '-enjarify.jar' 86 | try: 87 | outfile = open(outname, mode=('wb' if args.force else 'xb')) 88 | except FileExistsError: 89 | print('Attempting to write to', outname) 90 | print('Error, output file already exists and --force was not specified.') 91 | print('To overwrite the output file, pass -f or --force.') 92 | return 93 | 94 | opts = options.NONE if args.fast else options.PRETTY 95 | classes = collections.OrderedDict() 96 | errors = collections.OrderedDict() 97 | for data in dexs: 98 | translate(data, opts=opts, classes=classes, errors=errors) 99 | writeToJar(outfile, classes) 100 | outfile.close() 101 | print('Output written to', outname) 102 | 103 | for name, error in sorted(errors.items()): 104 | print(name, error) 105 | print('{} classes translated successfully, {} classes had errors'.format(len(classes), len(errors))) 106 | 107 | if __name__ == "__main__": 108 | main() 109 | -------------------------------------------------------------------------------- /enjarify/mutf8.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Unfortunately, there's no easy way to decode Modified UTF8 in Python, so we 16 | # have to write a custom decoder. This one is error tolerant and will decode 17 | # anything resembling mutf8. 18 | 19 | def _decode(b): 20 | # decode arbitrary utf8 codepoints, tolerating surrogate pairs, nonstandard encodings, etc. 21 | for x in b: 22 | if x < 128: 23 | yield x 24 | else: 25 | # figure out how many bytes 26 | extra = 0 27 | for i in range(6, 0, -1): 28 | if x & (1<> 5 32 | 33 | if vtype == 0x1c: # ARRAY 34 | size = stream.uleb128() 35 | return [encodedValue(dex, stream) for _ in range(size)] 36 | if vtype == 0x1d: # ANNOTATION 37 | # We don't actually care about annotations but still need to read it to 38 | # find out how much data is taken up 39 | stream.uleb128() 40 | for _ in range(stream.uleb128()): 41 | stream.uleb128() 42 | encodedValue(dex, stream) 43 | return None 44 | if vtype == 0x1e: # NULL 45 | return None 46 | 47 | # For the rest, we just return it as unsigned integers without recording type 48 | # extended to either u32 or u64 depending on int/float or long/double 49 | if vtype == 0x1f: # BOOLEAN 50 | return b'I', varg 51 | # the rest are an int encoded into varg + 1 bytes in some way 52 | size = varg + 1 53 | val = sum(stream.u8() << (i*8) for i in range(size)) 54 | 55 | if vtype == 0x00: # BYTE 56 | return b'I', signExtend(val, 8) % (1<<32) 57 | if vtype == 0x02: # SHORT 58 | return b'I', signExtend(val, 16) % (1<<32) 59 | if vtype == 0x03: # CHAR 60 | return b'I', val 61 | if vtype == 0x04: # INT 62 | return b'I', val 63 | 64 | if vtype == 0x06: # LONG 65 | return b'J', val 66 | 67 | # floats are 0 extended to the right 68 | if vtype == 0x10: # FLOAT 69 | return b'F', val << (32 - size * 8) 70 | if vtype == 0x11: # DOUBLE 71 | return b'D', val << (64 - size * 8) 72 | 73 | if vtype == 0x17: # STRING 74 | return b'Ljava/lang/String;', dex.string(val) 75 | if vtype == 0x18: # TYPE 76 | return b'Ljava/lang/Class;', dex.clsType(val) 77 | 78 | class MFIdMixin: 79 | def triple(self): return self.cname, self.name, self.desc 80 | 81 | class FieldId(MFIdMixin): 82 | def __init__(self, dex, field_idx): 83 | stream = dex.stream(dex.field_ids.off + field_idx * 8) 84 | self.cname = dex.clsType(stream.u16()) 85 | self.desc = dex.type(stream.u16()) 86 | self.name = dex.string(stream.u32()) 87 | 88 | class Field: 89 | def __init__(self, dex, field_idx, access): 90 | self.id = FieldId(dex, field_idx) 91 | self.access = access 92 | self.constant_value = None # will be set later 93 | 94 | class MethodId(MFIdMixin): 95 | def __init__(self, dex, method_idx): 96 | stream = dex.stream(dex.method_ids.off + method_idx * 8) 97 | self.cname = dex.clsType(stream.u16()) 98 | proto_idx = stream.u16() 99 | self.name = dex.string(stream.u32()) 100 | 101 | stream2 = dex.stream(dex.proto_ids.off + proto_idx * 12) 102 | shorty_idx, return_idx, parameters_off = stream2.u32(), stream2.u32(), stream2.u32() 103 | self.return_type = dex.type(return_idx) 104 | self.param_types = typeList(dex, parameters_off) 105 | 106 | # rearrange things to Java format 107 | parts = [b'('] + self.param_types + [b')', self.return_type] 108 | self.desc = b''.join(parts) 109 | 110 | def getSpacedParamTypes(self, isstatic): 111 | results = [] 112 | if not isstatic: 113 | if self.cname.startswith(b'['): 114 | results.append(self.cname) 115 | else: 116 | results.append(b'L' + self.cname + b';') 117 | 118 | for ptype in self.param_types: 119 | results.append(ptype) 120 | if ptype == b'J' or ptype == b'D': 121 | results.append(None) 122 | return results 123 | 124 | class TryItem: 125 | def __init__(self, stream): 126 | self.start, self.count, self.handler_off = stream.u32(), stream.u16(), stream.u16() 127 | self.end = self.start + self.count 128 | self.catches = None # to be filled in later 129 | 130 | def finish(self, dex, list_off): 131 | stream = dex.stream(list_off + self.handler_off) 132 | size = stream.sleb128() 133 | self.catches = results = [] 134 | for _ in range(abs(size)): 135 | results.append((dex.clsType(stream.uleb128()), stream.uleb128())) 136 | if size <= 0: 137 | results.append((b'java/lang/Throwable', stream.uleb128())) 138 | 139 | class CodeItem: 140 | def __init__(self, dex, offset): 141 | stream = dex.stream(offset) 142 | self.nregs = registers_size = stream.u16() 143 | ins_size = stream.u16() 144 | outs_size = stream.u16() 145 | tries_size = stream.u16() 146 | debug_off = stream.u32() 147 | self.insns_size = stream.u32() 148 | insns_start_pos = stream.pos 149 | insns = [stream.u16() for _ in range(self.insns_size)] 150 | if tries_size and self.insns_size & 1: 151 | stream.u16() # padding 152 | self.tries = [TryItem(stream) for _ in range(tries_size)] 153 | self.list_off = stream.pos 154 | for item in self.tries: 155 | item.finish(dex, self.list_off) 156 | 157 | catch_addrs = set() 158 | for tryi in self.tries: 159 | catch_addrs.update(t[1] for t in tryi.catches) 160 | self.bytecode = parseBytecode(dex, insns_start_pos, insns, catch_addrs) 161 | 162 | class Method: 163 | def __init__(self, dex, method_idx, access, code_off): 164 | self.dex = dex 165 | self.id = MethodId(dex, method_idx) 166 | self.access = access 167 | self.code_off = code_off 168 | self.code = CodeItem(dex, code_off) if code_off else None 169 | 170 | class ClassData: 171 | def __init__(self, dex, offset): 172 | self.fields = [] 173 | self.methods = [] 174 | # for offset 0, leave dummy data with no fields or methods 175 | if offset != 0: 176 | self._parse(dex, dex.stream(offset)) 177 | 178 | def _parse(self, dex, stream): 179 | numstatic = stream.uleb128() 180 | numinstance = stream.uleb128() 181 | numdirect = stream.uleb128() 182 | numvirtual = stream.uleb128() 183 | 184 | fields = self.fields 185 | for num in (numstatic, numinstance): 186 | field_idx = 0 187 | for i in range(num): 188 | field_idx += stream.uleb128() 189 | fields.append(Field(dex, field_idx, stream.uleb128())) 190 | 191 | methods = self.methods 192 | for num in (numdirect, numvirtual): 193 | method_idx = 0 194 | for i in range(num): 195 | method_idx += stream.uleb128() 196 | methods.append(Method(dex, method_idx, stream.uleb128(), stream.uleb128())) 197 | 198 | class DexClass: 199 | def __init__(self, dex, base_off, i): 200 | self.dex = dex 201 | st = dex.stream(base_off + i*32) 202 | 203 | self.name = dex.clsType(st.u32()) 204 | self.access = st.u32() 205 | super_ = st.u32() 206 | self.super = dex.clsType(super_) if super_ != NO_INDEX else None 207 | self.interfaces = typeList(dex, st.u32(), parseClsDesc=True) 208 | _ = st.u32() 209 | _ = st.u32() 210 | self.data_off = st.u32() 211 | self.data = None # parse data lazily in parseData() 212 | self.constant_values_off = st.u32() 213 | 214 | def parseData(self): 215 | if self.data is None: 216 | self.data = ClassData(self.dex, self.data_off) 217 | if self.constant_values_off: 218 | stream = self.dex.stream(self.constant_values_off) 219 | for field in self.data.fields[:stream.uleb128()]: 220 | field.constant_value = encodedValue(self.dex, stream) 221 | 222 | class SizeOff: 223 | def __init__(self, stream): 224 | self.size = stream.u32() 225 | self.off = stream.u32() 226 | 227 | class DexFile: 228 | def __init__(self, data): 229 | self.raw = data 230 | stream = Reader(data) 231 | 232 | # parse header 233 | stream.read(36) 234 | if stream.u32() != 0x70: 235 | print('Warning, unexpected header size!') 236 | if stream.u32() != 0x12345678: 237 | print('Warning, unexpected endianess tag!') 238 | 239 | self.link = SizeOff(stream) 240 | self.map_off = stream.u32() 241 | self.string_ids = SizeOff(stream) 242 | self.type_ids = SizeOff(stream) 243 | self.proto_ids = SizeOff(stream) 244 | self.field_ids = SizeOff(stream) 245 | self.method_ids = SizeOff(stream) 246 | self.class_defs = SizeOff(stream) 247 | self.data = SizeOff(stream) 248 | 249 | defs = self.class_defs 250 | self.classes = [] 251 | for i in range(defs.size): 252 | self.classes.append(DexClass(self, defs.off, i)) 253 | 254 | def stream(self, offset): return Reader(self.raw, offset) 255 | 256 | def string(self, i): 257 | data_off = self.stream(self.string_ids.off + i*4).u32() 258 | stream = self.stream(data_off) 259 | stream.uleb128() # ignore decoded length 260 | return stream.readCStr() 261 | 262 | def type(self, i): 263 | if 0 <= i < NO_INDEX: 264 | str_idx = self.stream(self.type_ids.off + i*4).u32() 265 | return self.string(str_idx) 266 | 267 | def clsType(self, i): 268 | # Can be either class _name_ or array _descriptor_ 269 | desc = self.type(i) 270 | if desc.startswith(b'['): 271 | return desc 272 | elif desc.startswith(b'L'): 273 | return desc[1:-1] 274 | # Not sure how to handle primative classes properly, 275 | # but this should hopefully be good enough. 276 | return desc 277 | 278 | def field_id(self, i): return FieldId(self, i) 279 | def method_id(self, i): return MethodId(self, i) 280 | -------------------------------------------------------------------------------- /enjarify/treelist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # The first SIZE elements are stored directly, the rest are stored in one of SPLIT subtrees 16 | SIZE = 16 17 | SPLIT = 16 18 | 19 | # This class represents a list as a persistent n-ary tree 20 | # This has much slower access and updates than a real list but has the advantage 21 | # of sharing memory with previous versions of the list when only a few elements 22 | # are changed. See http://en.wikipedia.org/wiki/Persistent_data_structure#Trees 23 | # Also, default values are not stored, so this is good for sparse arrays 24 | class TreeList: 25 | def __init__(self, default, func, data=None): 26 | self.default = default 27 | self.func = func 28 | self.data = data or _TreeListSub(default) 29 | 30 | def __getitem__(self, i): 31 | return self.data[i] 32 | 33 | def __setitem__(self, i, val): 34 | self.data = self.data.set(i, val) 35 | 36 | def copy(self): 37 | return TreeList(self.default, self.func, self.data) 38 | 39 | def merge(self, other): 40 | assert self.func is other.func 41 | self.data = _TreeListSub.merge(self.data, other.data, self.func) 42 | 43 | 44 | class _TreeListSub: 45 | def __init__(self, default, direct=None, children=None): 46 | self.default = default 47 | if direct is None: 48 | self.direct = [self.default]*SIZE 49 | self.children = [None]*SPLIT # Subtrees allocated lazily 50 | else: 51 | self.direct = direct 52 | self.children = children 53 | 54 | def __getitem__(self, i): 55 | assert i >= 0 56 | if i < SIZE: 57 | return self.direct[i] 58 | 59 | i -= SIZE 60 | i, ci = divmod(i, SPLIT) 61 | child = self.children[ci] 62 | 63 | if child is None: 64 | return self.default 65 | return child[i] 66 | 67 | def set(self, i, val): 68 | assert i >= 0 69 | if i < SIZE: 70 | if self.direct[i] == val: 71 | return self 72 | 73 | temp = self.direct[:] 74 | temp[i] = val 75 | return _TreeListSub(self.default, temp, self.children) 76 | 77 | i -= SIZE 78 | i, ci = divmod(i, SPLIT) 79 | child = self.children[ci] 80 | 81 | if child is None: 82 | if val == self.default: 83 | return self 84 | child = _TreeListSub(self.default).set(i, val) 85 | else: 86 | if val == child[i]: 87 | return self 88 | child = child.set(i, val) 89 | 90 | temp = self.children[:] 91 | temp[ci] = child 92 | return _TreeListSub(self.default, self.direct, temp) 93 | 94 | @staticmethod 95 | def merge(left, right, func): 96 | # Effectively computes [func(x, y) for x, y in zip(left, right)] 97 | # Assume func(x, x) == x 98 | if left is right: 99 | return left 100 | 101 | if left is None: 102 | left, right = right, left 103 | 104 | default = left.default 105 | merge = _TreeListSub.merge 106 | if right is None: 107 | direct = [func(x, default) for x in left.direct] 108 | children = [merge(child, None, func) for child in left.children] 109 | if direct == left.direct and children == left.children: 110 | return left 111 | return _TreeListSub(default, direct, children) 112 | 113 | direct = [func(x, y) for x, y in zip(left.direct, right.direct)] 114 | children = [merge(c1, c2, func) for c1, c2 in zip(left.children, right.children)] 115 | if direct == left.direct and children == left.children: 116 | return left 117 | if direct == right.direct and children == right.children: 118 | return right 119 | return _TreeListSub(default, direct, children) 120 | -------------------------------------------------------------------------------- /enjarify/typeinference/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /enjarify/typeinference/typeinference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections, operator 16 | 17 | from ..jvm import arraytypes as arrays 18 | from ..jvm import scalartypes as scalars 19 | from ..jvm import mathops, jvmops 20 | from ..treelist import TreeList 21 | from .. import flags, dalvik 22 | 23 | 24 | # The two main things we need type inference for are determining the types of 25 | # primative values and arrays. Luckily, we don't care about actual classes in 26 | # these cases, we just need to know whether it is int,float,reference, etc. to 27 | # generate the correct bytecode instructions, which are typed in Java. 28 | # 29 | # One additional problem is that ART's implicit casts narrow the type instead of 30 | # replacing it like regular checkcasts do. This means that there is no way to 31 | # replicate the behavior in Java using normal casts unless you know which class 32 | # is a subclass of another and which classes are interfaces. However, we want to 33 | # be able to translate code without knowing about every other class that could be 34 | # referenced by the application, so we make do with a hack. 35 | # 36 | # Variables subjected to implicit casting are marked as tainted. Whenever a 37 | # tained value is used, it is explcitly checkcasted to the expected type. This 38 | # isn't ideal since it will incorrectly throw in the cast of bad interface casts, 39 | # but it's the best we can do without requiring knowledge of the whole inheritance 40 | # hierarchy. 41 | 42 | class TypeInfo: 43 | def __init__(self, prims, arrs, tainted): 44 | # copy on write 45 | self.prims = prims 46 | self.arrs = arrs 47 | self.tainted = tainted 48 | 49 | def _copy(self): return TypeInfo(self.prims.copy(), self.arrs.copy(), self.tainted.copy()) 50 | def _get(self, reg): return self.prims[reg], self.arrs[reg], self.tainted[reg] 51 | 52 | def _set(self, reg, st, at, taint=False): 53 | self.prims[reg] = st 54 | self.arrs[reg] = at 55 | self.tainted[reg] = taint 56 | return self 57 | 58 | def move(self, src, dest, wide): 59 | new = self._copy()._set(dest, *self._get(src)) 60 | if wide: 61 | new._set(dest+1, *self._get(src+1)) 62 | return new 63 | 64 | def assign(self, reg, st, at=arrays.INVALID, taint=False): 65 | assert st is not None 66 | return self._copy()._set(reg, st, at, taint) 67 | 68 | def assign2(self, reg, st): 69 | assert st is not None 70 | at = arrays.INVALID 71 | return self._copy()._set(reg, st, at)._set(reg+1, scalars.INVALID, at) 72 | 73 | def assignFromDesc(self, reg, desc): 74 | st = scalars.fromDesc(desc) 75 | at = arrays.fromDesc(desc) 76 | if scalars.iswide(st): 77 | return self.assign2(reg, st) 78 | else: 79 | return self.assign(reg, st, at) 80 | 81 | def isSame(self, other): 82 | return (self.prims.data is other.prims.data and 83 | self.arrs.data is other.arrs.data and 84 | self.tainted.data is other.tainted.data) 85 | 86 | def merge(old, new): 87 | temp = old._copy() 88 | temp.prims.merge(new.prims) 89 | temp.arrs.merge(new.arrs) 90 | temp.tainted.merge(new.tainted) 91 | return old if old.isSame(temp) else temp 92 | 93 | def fromParams(method, num_regs): 94 | isstatic = method.access & flags.ACC_STATIC 95 | full_ptypes = method.id.getSpacedParamTypes(isstatic) 96 | offset = num_regs - len(full_ptypes) 97 | 98 | prims = TreeList(scalars.INVALID, operator.__and__) 99 | arrs = TreeList(arrays.INVALID, arrays.merge) 100 | tainted = TreeList(False, operator.__or__) 101 | 102 | for i, desc in enumerate(full_ptypes): 103 | if desc is not None: 104 | prims[offset + i] = scalars.fromDesc(desc) 105 | arrs[offset + i] = arrays.fromDesc(desc) 106 | return TypeInfo(prims, arrs, tainted) 107 | 108 | _MATH_THROW_OPS = [jvmops.IDIV, jvmops.IREM, jvmops.LDIV, jvmops.LREM] 109 | def pruneHandlers(all_handlers): 110 | result = collections.defaultdict(list) 111 | for instr, handlers in all_handlers.items(): 112 | if not instr.type in dalvik.PRUNED_THROW_TYPES: 113 | continue 114 | # if math op, make sure it is int div/rem 115 | if instr.type == dalvik.BinaryOp: 116 | if mathops.BINARY[instr.opcode][0] not in _MATH_THROW_OPS: 117 | continue 118 | elif instr.type == dalvik.BinaryOpConst: 119 | if mathops.BINARY_LIT[instr.opcode] not in _MATH_THROW_OPS: 120 | continue 121 | 122 | types = set() 123 | for ctype, handler in handlers: 124 | # if multiple handlers with same catch type, only include the first 125 | if ctype not in types: 126 | result[instr].append((ctype, handler)) 127 | types.add(ctype) 128 | # stop as soon as we reach a catch all handler 129 | if ctype == b'java/lang/Throwable': 130 | break 131 | return dict(result) 132 | 133 | ################################################################################ 134 | # Lots of instructions just return an object or int for type inference purposes 135 | # so we have a single function for these cases 136 | def visitRetObj(dex, instr, cur): 137 | return cur.assign(instr.args[0], scalars.OBJ) 138 | def visitRetInt(dex, instr, cur): 139 | return cur.assign(instr.args[0], scalars.INT) 140 | 141 | # Instruction specific callbacks 142 | def visitMove(dex, instr, cur): 143 | return cur.move(instr.args[1], instr.args[0], wide=False) 144 | def visitMoveWide(dex, instr, cur): 145 | return cur.move(instr.args[1], instr.args[0], wide=True) 146 | def visitMoveResult(dex, instr, cur): 147 | return cur.assignFromDesc(instr.args[0], instr.prev_result) 148 | def visitConst32(dex, instr, cur): 149 | val = instr.args[1] % (1<<32) 150 | if val == 0: 151 | return cur.assign(instr.args[0], scalars.ZERO, arrays.NULL) 152 | else: 153 | return cur.assign(instr.args[0], scalars.C32) 154 | def visitConst64(dex, instr, cur): 155 | return cur.assign2(instr.args[0], scalars.C64) 156 | def visitCheckCast(dex, instr, cur): 157 | at = arrays.fromDesc(dex.type(instr.args[1])) 158 | at = arrays.narrow(cur.arrs[instr.args[0]], at) 159 | return cur.assign(instr.args[0], scalars.OBJ, at) 160 | def visitNewArray(dex, instr, cur): 161 | at = arrays.fromDesc(dex.type(instr.args[2])) 162 | return cur.assign(instr.args[0], scalars.OBJ, at) 163 | def visitArrayGet(dex, instr, cur): 164 | arr_at = cur.arrs[instr.args[1]] 165 | if arr_at is arrays.NULL: 166 | # This is unreachable, so use (ALL, NULL), which can be merged with anything 167 | return cur.assign(instr.args[0], scalars.ALL, arrays.NULL) 168 | else: 169 | st, at = arrays.eletPair(arr_at) 170 | return cur.assign(instr.args[0], st, at) 171 | def visitInstanceGet(dex, instr, cur): 172 | field_id = dex.field_id(instr.args[2]) 173 | return cur.assignFromDesc(instr.args[0], field_id.desc) 174 | def visitStaticGet(dex, instr, cur): 175 | field_id = dex.field_id(instr.args[1]) 176 | return cur.assignFromDesc(instr.args[0], field_id.desc) 177 | 178 | def visitUnaryOp(dex, instr, cur): 179 | _, _, st = mathops.UNARY[instr.opcode] 180 | if scalars.iswide(st): 181 | return cur.assign2(instr.args[0], st) 182 | else: 183 | return cur.assign(instr.args[0], st) 184 | 185 | def visitBinaryOp(dex, instr, cur): 186 | _, st, _ = mathops.BINARY[instr.opcode] 187 | if scalars.iswide(st): 188 | return cur.assign2(instr.args[0], st) 189 | else: 190 | return cur.assign(instr.args[0], st) 191 | 192 | FUNCS = { 193 | dalvik.ConstString: visitRetObj, 194 | dalvik.ConstClass: visitRetObj, 195 | dalvik.NewInstance: visitRetObj, 196 | dalvik.InstanceOf: visitRetInt, 197 | dalvik.ArrayLen: visitRetInt, 198 | dalvik.Cmp: visitRetInt, 199 | dalvik.BinaryOpConst: visitRetInt, 200 | 201 | dalvik.Move: visitMove, 202 | dalvik.MoveWide: visitMoveWide, 203 | dalvik.MoveResult: visitMoveResult, 204 | dalvik.Const32: visitConst32, 205 | dalvik.Const64: visitConst64, 206 | dalvik.CheckCast: visitCheckCast, 207 | dalvik.NewArray: visitNewArray, 208 | dalvik.ArrayGet: visitArrayGet, 209 | dalvik.InstanceGet: visitInstanceGet, 210 | dalvik.StaticGet: visitStaticGet, 211 | dalvik.UnaryOp: visitUnaryOp, 212 | dalvik.BinaryOp: visitBinaryOp, 213 | } 214 | 215 | CONTROL_FLOW_OPS = {dalvik.Goto, dalvik.If, dalvik.IfZ, dalvik.Switch} 216 | 217 | def doInference(dex, method, code, bytecode, instr_d): 218 | # get exception handlers 219 | all_handlers = collections.defaultdict(list) 220 | for tryi in code.tries: 221 | for instr in code.bytecode: 222 | if tryi.start < instr.pos2 and tryi.end > instr.pos: 223 | all_handlers[instr] += tryi.catches 224 | all_handlers = pruneHandlers(all_handlers) 225 | 226 | types = {} 227 | types[0] = fromParams(method, code.nregs) 228 | dirty = {0} 229 | 230 | def doMerge(pos, new): 231 | # prevent infinite loops 232 | if pos not in instr_d: 233 | return 234 | 235 | if pos in types: 236 | old = types[pos] 237 | new = merge(old, new) 238 | if new is not old: 239 | types[pos] = new 240 | dirty.add(pos) 241 | else: 242 | types[pos] = new 243 | dirty.add(pos) 244 | 245 | while dirty: # iterate until convergence 246 | for instr in bytecode: 247 | if instr.pos not in dirty: 248 | continue 249 | 250 | dirty.remove(instr.pos) 251 | cur = types[instr.pos] 252 | itype = instr.type 253 | if itype in FUNCS: 254 | after = FUNCS[itype](dex, instr, cur) 255 | elif itype in CONTROL_FLOW_OPS: 256 | # control flow - none of these are in FUNCS 257 | result = after = after2 = cur 258 | if instr.implicit_casts is not None: 259 | desc_ind, regs = instr.implicit_casts 260 | for reg in regs: 261 | st = cur.prims[reg] # could != OBJ if null 262 | at = arrays.narrow(cur.arrs[reg], arrays.fromDesc(dex.type(desc_ind))) 263 | result = result.assign(reg, st, at, taint=True) 264 | # merge into branch if op = if-nez else merge into fallthrough 265 | if instr.opcode == 0x39: 266 | after2 = result 267 | else: 268 | after = result 269 | 270 | if instr.type == dalvik.Goto: 271 | doMerge(instr.args[0], after2) 272 | elif instr.type == dalvik.If: 273 | doMerge(instr.args[2], after2) 274 | elif instr.type == dalvik.IfZ: 275 | doMerge(instr.args[1], after2) 276 | elif instr.type == dalvik.Switch: 277 | switchdata = instr_d[instr.args[1]].switchdata 278 | for offset in switchdata.values(): 279 | target = (instr.pos + offset) % (1<<32) 280 | doMerge(target, cur) 281 | else: 282 | after = cur 283 | 284 | # these instructions don't fallthrough 285 | if instr.type not in (dalvik.Return, dalvik.Throw, dalvik.Goto): 286 | doMerge(instr.pos2, after) 287 | 288 | # exception handlers 289 | if instr in all_handlers: 290 | for ctype, handler in all_handlers[instr]: 291 | doMerge(handler, cur) 292 | return types, all_handlers 293 | -------------------------------------------------------------------------------- /enjarify/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | def keysToRanges(d, limit): 16 | starts = sorted(d) 17 | for s, e in zip(starts, starts[1:] + [limit]): 18 | for k in range(s, e): 19 | d[k] = d[s] 20 | return d 21 | 22 | def signExtend(val, size): 23 | if val & (1 << (size-1)): 24 | val -= (1 << size) 25 | return val 26 | 27 | def s16(val): 28 | val %= 1 << 16 29 | if val >= 1 << 15: 30 | val -= 1 << 16 31 | return val 32 | 33 | def s32(val): 34 | val %= 1 << 32 35 | if val >= 1 << 31: 36 | val -= 1 << 32 37 | return val 38 | 39 | def s64(val): 40 | val %= 1 << 64 41 | if val >= 1 << 63: 42 | val -= 1 << 64 43 | return val 44 | --------------------------------------------------------------------------------