├── .github ├── ISSUE_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── main.yml │ └── pip-audit.yml ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Makefile ├── api.rst ├── conf.py └── index.rst ├── pyevmasm ├── __init__.py ├── __main__.py └── evmasm.py ├── setup.py ├── tests └── test_EVMAssembler.py └── tox.ini /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### pyvmasm version 2 | 3 | 4 | ### Python version 5 | 6 | 7 | ### Summary of the problem 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: github-actions 5 | directory: / 6 | schedule: 7 | interval: daily 8 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 3.7 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: 3.7 18 | - name: Lint 19 | if: github.event_name == 'pull_request' 20 | env: 21 | BASE_SHA: ${{ github.event.pull_request.base.sha }} 22 | HEAD_SHA: ${{ github.event.pull_request.head.sha }} 23 | run: | 24 | pip install black 25 | pip install mypy 26 | python setup.py install 27 | black --version 28 | black pyevmasm 29 | mypy --version 30 | mypy pyevmasm 31 | 32 | test37: 33 | runs-on: ubuntu-latest 34 | 35 | steps: 36 | - uses: actions/checkout@v3 37 | 38 | - name: Set up Python 3.7 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: 3.7 42 | - name: Run Tests 37 43 | run: | 44 | python setup.py install 45 | python -m unittest discover "tests/" 46 | -------------------------------------------------------------------------------- /.github/workflows/pip-audit.yml: -------------------------------------------------------------------------------- 1 | name: Scan dependencies for vulnerabilities with pip-audit 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | schedule: 9 | - cron: "0 12 * * *" 10 | 11 | jobs: 12 | pip-audit: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v3 18 | 19 | - name: Install Python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: "3.x" 23 | 24 | - name: Install project 25 | run: | 26 | python -m venv /tmp/pip-audit-env 27 | source /tmp/pip-audit-env/bin/activate 28 | 29 | python -m pip install --upgrade pip 30 | python -m pip install . 31 | 32 | 33 | - name: Run pip-audit 34 | uses: trailofbits/gh-action-pip-audit@v1.0.8 35 | with: 36 | virtual-environment: /tmp/pip-audit-env 37 | 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | venvpy/ 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | .venv 84 | venv/ 85 | venv36/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | 94 | # macOS Finder files 95 | .DS_Store 96 | 97 | # PyCharm files 98 | .idea/ 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyevmasm 2 | [![Build Status](https://github.com/crytic/pyevmasm/workflows/CI/badge.svg)](https://github.com/crytic/pyevmasm/actions?query=workflow%3ACI) 3 | 4 | [![PyPI version](https://badge.fury.io/py/pyevmasm.svg)](https://badge.fury.io/py/pyevmasm) 5 | [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc) 6 | 7 | pyevmasm is an assembler and disassembler library for the Ethereum Virtual Machine (EVM). It includes a commandline utility and a Python API. 8 | 9 | ## CLI Examples with evmasm 10 | 11 | `evmasm` is a commandline utility that uses pyevmasm to assemble or disassemble EVM: 12 | 13 | ``` 14 | usage: evmasm [-h] (-a | -d | -t) [-bi] [-bo] [-i [INPUT]] [-o [OUTPUT]] [-f FORK] 15 | 16 | pyevmasm the EVM assembler and disassembler 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | -a, --assemble Assemble EVM instructions to opcodes 21 | -d, --disassemble Disassemble EVM to opcodes 22 | -t, --print-opcode-table 23 | List supported EVM opcodes 24 | -bi, --binary-input Binary input mode (-d only) 25 | -bo, --binary-output Binary output mode (-a only) 26 | -i [INPUT], --input [INPUT] 27 | Input file, default=stdin 28 | -o [OUTPUT], --output [OUTPUT] 29 | Output file, default=stdout 30 | -f FORK, --fork FORK Fork, default: byzantium. Possible: frontier, 31 | homestead, tangerine_whistle, spurious_dragon, 32 | byzantium, constantinople, serenity. Also an unsigned 33 | block number is accepted to select the fork. 34 | ``` 35 | 36 | Disassembling the preamble of compiled contract: 37 | 38 | ``` 39 | $ echo -n "608060405260043610603f57600035" | evmasm -d 40 | 00000000: PUSH1 0x80 41 | 00000002: PUSH1 0x40 42 | 00000004: MSTORE 43 | 00000005: PUSH1 0x4 44 | 00000007: CALLDATASIZE 45 | 00000008: LT 46 | 00000009: PUSH1 0x3f 47 | 0000000b: JUMPI 48 | 0000000c: PUSH1 0x0 49 | 0000000e: CALLDATALOAD 50 | ``` 51 | 52 | ## Python API Examples 53 | 54 | ``` 55 | >>> from pyevmasm import instruction_tables, disassemble_hex, disassemble_all, assemble_hex 56 | >>> instruction_table = instruction_tables['byzantium'] 57 | >>> instruction_table[20] 58 | Instruction(0x14, 'EQ', 0, 2, 1, 3, 'Equality comparision.', None, 0) 59 | >>> instruction_table['EQ'] 60 | Instruction(0x14, 'EQ', 0, 2, 1, 3, 'Equality comparision.', None, 0) 61 | >>> instrs = list(disassemble_all(binascii.unhexlify('608060405260043610603f57600035'))) 62 | >>> instrs.insert(1, instruction_table['JUMPI']) 63 | >>> a = assemble_hex(instrs) 64 | >>> a 65 | '0x60805760405260043610603f57600035' 66 | >>> print(disassemble_hex(a)) 67 | PUSH1 0x80 68 | JUMPI 69 | PUSH1 0x40 70 | MSTORE 71 | ... 72 | >>> assemble_hex('PUSH1 0x40\nMSTORE\n') 73 | '0x604052' 74 | ``` 75 | 76 | # Installation 77 | 78 | Python >=2.7 or Python >=3.3 is required. 79 | 80 | Install the latest stable version using pip: 81 | ``` 82 | pip install pyevmasm 83 | ``` 84 | 85 | Or, install the library from source: 86 | ``` 87 | git clone https://github.com/trailofbits/pyevmasm 88 | cd pyevmasm 89 | python setup.py install --user 90 | ``` 91 | 92 | ## Documentation 93 | 94 | [https://pyevmasm.readthedocs.io](https://pyevmasm.readthedocs.io) 95 | 96 | New issues, feature requests, and contributions are welcome. Join us in #ethereum channel on the [Empire Hacking Slack](https://slack.empirehacking.nyc) to discuss Ethereum security tool development. 97 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pyevmasm 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | evmasm 5 | ------ 6 | .. automodule:: pyevmasm.evmasm 7 | :members: 8 | .. py:data:: instruction 9 | 10 | Instance of InstructionTable for EVM. (see; InstructionTable) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'pyevmasm' 23 | copyright = '2018, Trail of Bits' 24 | author = 'Trail of Bits' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc' 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix(es) of source filenames. 49 | # You can specify multiple suffix as a list of string: 50 | # 51 | # source_suffix = ['.rst', '.md'] 52 | source_suffix = '.rst' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | # 60 | # This is also used if you do content translation via gettext catalogs. 61 | # Usually you set "language" from the command line for these cases. 62 | language = None 63 | 64 | # List of patterns, relative to source directory, that match files and 65 | # directories to ignore when looking for source files. 66 | # This pattern also affects html_static_path and html_extra_path . 67 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 68 | 69 | # The name of the Pygments (syntax highlighting) style to use. 70 | pygments_style = 'sphinx' 71 | 72 | 73 | # -- Options for HTML output ------------------------------------------------- 74 | 75 | # The theme to use for HTML and HTML Help pages. See the documentation for 76 | # a list of builtin themes. 77 | # 78 | html_theme = 'alabaster' 79 | 80 | # Theme options are theme-specific and customize the look and feel of a theme 81 | # further. For a list of options available for each theme, see the 82 | # documentation. 83 | # 84 | # html_theme_options = {} 85 | 86 | # Add any paths that contain custom static files (such as style sheets) here, 87 | # relative to this directory. They are copied after the builtin static files, 88 | # so a file named "default.css" will overwrite the builtin "default.css". 89 | html_static_path = ['_static'] 90 | 91 | # Custom sidebar templates, must be a dictionary that maps document names 92 | # to template names. 93 | # 94 | # The default sidebars (for documents that don't match any pattern) are 95 | # defined by theme itself. Builtin themes are using these templates by 96 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 97 | # 'searchbox.html']``. 98 | # 99 | # html_sidebars = {} 100 | 101 | 102 | # -- Options for HTMLHelp output --------------------------------------------- 103 | 104 | # Output file base name for HTML help builder. 105 | htmlhelp_basename = 'pyevmasmdoc' 106 | 107 | 108 | # -- Options for LaTeX output ------------------------------------------------ 109 | 110 | latex_elements = { 111 | # The paper size ('letterpaper' or 'a4paper'). 112 | # 113 | # 'papersize': 'letterpaper', 114 | 115 | # The font size ('10pt', '11pt' or '12pt'). 116 | # 117 | # 'pointsize': '10pt', 118 | 119 | # Additional stuff for the LaTeX preamble. 120 | # 121 | # 'preamble': '', 122 | 123 | # Latex figure (float) alignment 124 | # 125 | # 'figure_align': 'htbp', 126 | } 127 | 128 | # Grouping the document tree into LaTeX files. List of tuples 129 | # (source start file, target name, title, 130 | # author, documentclass [howto, manual, or own class]). 131 | latex_documents = [ 132 | (master_doc, 'pyevmasm.tex', 'pyevmasm Documentation', 133 | 'Trail of Bits', 'manual'), 134 | ] 135 | 136 | 137 | # -- Options for manual page output ------------------------------------------ 138 | 139 | # One entry per manual page. List of tuples 140 | # (source start file, name, description, authors, manual section). 141 | man_pages = [ 142 | (master_doc, 'pyevmasm', 'pyevmasm Documentation', 143 | [author], 1) 144 | ] 145 | 146 | 147 | # -- Options for Texinfo output ---------------------------------------------- 148 | 149 | # Grouping the document tree into Texinfo files. List of tuples 150 | # (source start file, target name, title, author, 151 | # dir menu entry, description, category) 152 | texinfo_documents = [ 153 | (master_doc, 'pyevmasm', 'pyevmasm Documentation', 154 | author, 'pyevmasm', 'One line description of project.', 155 | 'Miscellaneous'), 156 | ] 157 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. pyevmasm documentation master file, created by 2 | sphinx-quickstart on Wed Jul 11 19:50:09 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to pyevmasm's documentation! 7 | ==================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | api 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /pyevmasm/__init__.py: -------------------------------------------------------------------------------- 1 | from .evmasm import instruction_tables, Instruction # noqa: F401 2 | from .evmasm import block_to_fork, DEFAULT_FORK 3 | from .evmasm import assemble, assemble_all, assemble_hex, assemble_one 4 | from .evmasm import disassemble, disassemble_all, disassemble_hex, disassemble_one 5 | -------------------------------------------------------------------------------- /pyevmasm/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import sys 4 | import binascii 5 | 6 | from .evmasm import ( 7 | assemble_hex, 8 | disassemble_all, 9 | instruction_tables, 10 | assemble_all, 11 | block_to_fork, 12 | DEFAULT_FORK, 13 | accepted_forks, 14 | ) 15 | 16 | 17 | def main(): 18 | parser = argparse.ArgumentParser( 19 | description="pyevmasm the EVM assembler and disassembler" 20 | ) 21 | group_action = parser.add_mutually_exclusive_group(required=True) 22 | group_action.add_argument( 23 | "-a", 24 | "--assemble", 25 | action="store_true", 26 | help="Assemble EVM instructions to opcodes", 27 | ) 28 | group_action.add_argument( 29 | "-d", "--disassemble", action="store_true", help="Disassemble EVM to opcodes" 30 | ) 31 | group_action.add_argument( 32 | "-t", 33 | "--print-opcode-table", 34 | action="store_true", 35 | help="List supported EVM opcodes", 36 | ) 37 | parser.add_argument( 38 | "-bi", "--binary-input", action="store_true", help="Binary input mode (-d only)" 39 | ) 40 | parser.add_argument( 41 | "-bo", 42 | "--binary-output", 43 | action="store_true", 44 | help="Binary output mode (-a only)", 45 | ) 46 | parser.add_argument( 47 | "-i", 48 | "--input", 49 | nargs="?", 50 | default=sys.stdin, 51 | type=argparse.FileType("r"), 52 | help="Input file, default=stdin", 53 | ) 54 | parser.add_argument( 55 | "-o", 56 | "--output", 57 | nargs="?", 58 | default=sys.stdout, 59 | type=argparse.FileType("w"), 60 | help="Output file, default=stdout", 61 | ) 62 | parser.add_argument( 63 | "-f", 64 | "--fork", 65 | default=DEFAULT_FORK, 66 | type=str, 67 | help="Fork, default: london. " 68 | "Possible: frontier, homestead, tangerine_whistle, spurious_dragon, byzantium, constantinople, istanbul, london, serenity. " 69 | "Also an unsigned block number is accepted to select the fork.", 70 | ) 71 | 72 | args = parser.parse_args(sys.argv[1:]) 73 | arg_fork = args.fork.lower() 74 | if arg_fork not in accepted_forks: 75 | try: 76 | block_number = abs(int(arg_fork)) 77 | fork = block_to_fork(block_number) 78 | except ValueError: 79 | sys.stderr.write( 80 | "Wrong fork name or block number. " 81 | "Please provide an integer or one of %s.\n" % accepted_forks 82 | ) 83 | sys.exit(1) 84 | else: 85 | fork = arg_fork 86 | 87 | instruction_table = instruction_tables[fork] 88 | if args.print_opcode_table: 89 | for instr in instruction_table: 90 | print( 91 | "0x{:02x}: {:16s} {:s}".format( 92 | instr.opcode, instr.name, instr.description 93 | ) 94 | ) 95 | sys.exit(0) 96 | 97 | if args.assemble: 98 | try: 99 | asm = args.input.read().strip().rstrip() 100 | except KeyboardInterrupt: 101 | sys.exit(0) 102 | if args.binary_output: 103 | for i in assemble_all(asm, fork=fork): 104 | if sys.version_info >= (3, 2): 105 | args.output.buffer.write(i.bytes) 106 | else: 107 | args.output.write(i.bytes) 108 | else: 109 | args.output.write(assemble_hex(asm, fork=fork) + "\n") 110 | 111 | if args.disassemble: 112 | if args.binary_input and sys.version_info >= (3, 2): 113 | buf = args.input.buffer.read() 114 | else: 115 | try: 116 | buf = args.input.read().strip().rstrip() 117 | except KeyboardInterrupt: 118 | sys.exit(0) 119 | except UnicodeDecodeError: 120 | print("Input is binary? try using -bi.") 121 | sys.exit(1) 122 | 123 | if buf[:3] == "EVM": # binja prefix 124 | buf = buf[3:] 125 | elif buf[:2] == "0x": # hex prefixed 126 | buf = binascii.unhexlify(buf[2:]) 127 | else: # detect all hex buffer 128 | buf_set = set() 129 | for c in buf: 130 | buf_set.add(c.lower()) 131 | 132 | hex_set = set(list("0123456789abcdef")) 133 | if buf_set <= hex_set: # subset 134 | buf = binascii.unhexlify(buf) 135 | 136 | insns = list(disassemble_all(buf, fork=fork)) 137 | for i in insns: 138 | args.output.write("%08x: %s\n" % (i.pc, str(i))) 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /pyevmasm/evmasm.py: -------------------------------------------------------------------------------- 1 | from bisect import bisect 2 | from binascii import hexlify, unhexlify 3 | from builtins import map, next, range, object 4 | 5 | from future.builtins import next, bytes # type: ignore 6 | import copy 7 | 8 | DEFAULT_FORK = "shanghai" 9 | 10 | """ 11 | Example use:: 12 | >>> from pyevmasm import * 13 | >>> disassemble_one('\\x60\\x10') 14 | Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) 15 | >>> assemble_one('PUSH1 0x10') 16 | Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) 17 | >>> tuple(disassemble_all('\\x30\\x31')) 18 | (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), 19 | Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) 20 | >>> tuple(assemble_all('ADDRESS\\nBALANCE')) 21 | (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), 22 | Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) 23 | >>> assemble_hex('''PUSH1 0x60\n \ 24 | BLOCKHASH\n \ 25 | MSTORE\n \ 26 | PUSH1 0x2\n \ 27 | PUSH2 0x100\n \ 28 | ''') 29 | '0x606040526002610100' 30 | >>> disassemble_hex('0x606040526002610100') 31 | 'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100' 32 | 33 | """ 34 | 35 | 36 | class UnknownMnemonicError(Exception): 37 | pass 38 | 39 | 40 | class UnknownOpcodeError(Exception): 41 | pass 42 | 43 | 44 | class AssembleError(Exception): 45 | pass 46 | 47 | 48 | class ParseError(Exception): 49 | pass 50 | 51 | 52 | class Instruction(object): 53 | def __init__( 54 | self, 55 | opcode, 56 | name, 57 | operand_size, 58 | pops, 59 | pushes, 60 | fee, 61 | description, 62 | operand=None, 63 | pc=0, 64 | ): 65 | """ 66 | This represents an EVM instruction. 67 | EVMAsm will create this for you. 68 | 69 | :param opcode: the opcode value 70 | :param name: instruction name 71 | :param operand_size: immediate operand size in bytes 72 | :param pops: number of items popped from the stack 73 | :param pushes: number of items pushed into the stack 74 | :param fee: gas fee for the instruction 75 | :param description: textual description of the instruction 76 | :param operand: optional immediate operand 77 | :param pc: optional program counter of this instruction in the program 78 | 79 | Example use:: 80 | 81 | >>> instruction = assemble_one('PUSH1 0x10') 82 | >>> print('Instruction: %s'% instruction) 83 | >>> print('\tdescription:', instruction.description) 84 | >>> print('\tgroup:', instruction.group) 85 | >>> print('\tpc:', instruction.pc) 86 | >>> print('\tsize:', instruction.size) 87 | >>> print('\thas_operand:', instruction.has_operand) 88 | >>> print('\toperand_size:', instruction.operand_size) 89 | >>> print('\toperand:', instruction.operand) 90 | >>> print('\tsemantics:', instruction.semantics) 91 | >>> print('\tpops:', instruction.pops) 92 | >>> print('\tpushes:', instruction.pushes) 93 | >>> print('\tbytes:', '0x'+instruction.bytes.encode('hex')) 94 | >>> print('\twrites to stack:', instruction.writes_to_stack) 95 | >>> print('\treads from stack:', instruction.reads_from_stack) 96 | >>> print('\twrites to memory:', instruction.writes_to_memory) 97 | >>> print('\treads from memory:', instruction.reads_from_memory) 98 | >>> print('\twrites to storage:', instruction.writes_to_storage) 99 | >>> print('\treads from storage:', instruction.reads_from_storage) 100 | >>> print('\tis terminator', instruction.is_terminator) 101 | 102 | 103 | """ 104 | self._opcode = opcode 105 | self._name = name 106 | self._operand_size = operand_size 107 | self._pops = pops 108 | self._pushes = pushes 109 | self._fee = fee 110 | self._description = description 111 | self._operand = operand # Immediate operand if any 112 | self._pc = pc 113 | 114 | def __eq__(self, other): 115 | """Instructions are equal if all features match""" 116 | return ( 117 | self._opcode == other._opcode 118 | and self._name == other._name 119 | and self._operand == other._operand 120 | and self._operand_size == other._operand_size 121 | and self._pops == other._pops 122 | and self._pushes == other._pushes 123 | and self._fee == other._fee 124 | and self._pc == other._pc 125 | and self._description == other._description 126 | ) 127 | 128 | def __repr__(self): 129 | output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})".format( 130 | self._opcode, 131 | self._name, 132 | self._operand_size, 133 | self._pops, 134 | self._pushes, 135 | self._fee, 136 | self._description, 137 | self._operand, 138 | self._pc, 139 | ) 140 | return output 141 | 142 | def __str__(self): 143 | if self.has_operand: 144 | if self.operand is not None: 145 | return "{} 0x{:x}".format(self.name, self.operand) 146 | else: 147 | return "{} ???".format(self.name) 148 | return self.name 149 | 150 | @property 151 | def opcode(self): 152 | """The opcode as an integer""" 153 | return self._opcode 154 | 155 | @property 156 | def mnemonic(self): 157 | """Alias for name""" 158 | return self.name 159 | 160 | @staticmethod 161 | def _long_name(short_name, operand_size, pops): 162 | if short_name == "PUSH": 163 | return "PUSH{:d}".format(operand_size) 164 | elif short_name == "DUP": 165 | return "DUP{:d}".format(pops) 166 | elif short_name == "SWAP": 167 | return "SWAP{:d}".format(pops - 1) 168 | elif short_name == "LOG": 169 | return "LOG{:d}".format(pops - 2) 170 | return short_name 171 | 172 | @property 173 | def name(self): 174 | """The instruction name/mnemonic""" 175 | return self._long_name(self._name, self._operand_size, self._pops) 176 | 177 | def parse_operand(self, buf): 178 | """Parses an operand from buf 179 | 180 | :param buf: a buffer 181 | :type buf: iterator/generator/string 182 | """ 183 | buf = iter(buf) 184 | try: 185 | operand = 0 186 | for _ in range(self.operand_size): 187 | operand <<= 8 188 | operand |= next(buf) 189 | self._operand = operand 190 | except StopIteration: 191 | raise ParseError("Not enough data for decoding") 192 | 193 | @property 194 | def operand_size(self): 195 | """The immediate operand size""" 196 | return self._operand_size 197 | 198 | @property 199 | def has_operand(self): 200 | """True if the instruction uses an immediate operand""" 201 | return self.operand_size > 0 202 | 203 | @property 204 | def operand(self): 205 | return self._operand 206 | 207 | @operand.setter 208 | def operand(self, value): 209 | if self.operand_size != 0 and value is not None: 210 | mask = (1 << self.operand_size * 8) - 1 211 | if ~mask & value: 212 | raise ValueError( 213 | "operand should be {:d} bits long".format(self.operand_size * 8) 214 | ) 215 | self._operand = value 216 | 217 | @property 218 | def pops(self): 219 | """Number words popped from the stack""" 220 | return self._pops 221 | 222 | @property 223 | def pushes(self): 224 | """Number words pushed to the stack""" 225 | return self._pushes 226 | 227 | @property 228 | def size(self): 229 | """Size of the encoded instruction""" 230 | return self._operand_size + 1 231 | 232 | @property 233 | def fee(self): 234 | """The basic gas fee of the instruction""" 235 | return self._fee 236 | 237 | @property 238 | def semantics(self): 239 | """Canonical semantics""" 240 | return self._name 241 | 242 | @property 243 | def description(self): 244 | """Colloquial description of the instruction""" 245 | return self._description 246 | 247 | @property 248 | def bytes(self): 249 | """Encoded instruction""" 250 | b = [bytes([self._opcode])] 251 | for offset in reversed(range(self.operand_size)): 252 | b.append(bytes([(self.operand >> offset * 8) & 0xFF])) 253 | return b"".join(b) 254 | 255 | @property 256 | def pc(self): 257 | return self._pc 258 | 259 | @pc.setter 260 | def pc(self, value): 261 | """Location in the program (optional)""" 262 | self._pc = value 263 | 264 | @property 265 | def group(self): 266 | """Instruction classification as per the yellow paper""" 267 | classes = { 268 | 0: "Stop and Arithmetic Operations", 269 | 1: "Comparison & Bitwise Logic Operations", 270 | 2: "SHA3", 271 | 3: "Environmental Information", 272 | 4: "Block Information", 273 | 5: "Stack, Memory, Storage and Flow Operations", 274 | 6: "Push Operations", 275 | 7: "Push Operations", 276 | 8: "Duplication Operations", 277 | 9: "Exchange Operations", 278 | 0xA: "Logging Operations", 279 | 0xF: "System operations", 280 | } 281 | return classes.get(self.opcode >> 4, "Invalid instruction") 282 | 283 | @property 284 | def uses_stack(self): 285 | """True if the instruction reads/writes from/to the stack""" 286 | return self.reads_from_stack or self.writes_to_stack 287 | 288 | @property 289 | def reads_from_stack(self): 290 | """True if the instruction reads from stack""" 291 | return self.pops > 0 292 | 293 | @property 294 | def writes_to_stack(self): 295 | """True if the instruction writes to the stack""" 296 | return self.pushes > 0 297 | 298 | @property 299 | def writes_to_memory(self): 300 | """True if the instruction writes to memory""" 301 | return self.semantics in { 302 | "MSTORE", 303 | "MSTORE8", 304 | "CALLDATACOPY", 305 | "CODECOPY", 306 | "EXTCODECOPY", 307 | "RETURNDATACOPY", 308 | "CALL", 309 | "STATICCALL", 310 | "DELEGATECALL", 311 | "CALLCODE", 312 | } 313 | 314 | @property 315 | def reads_from_memory(self): 316 | """True if the instruction reads from memory""" 317 | return self.semantics in { 318 | "SHA3", 319 | "MLOAD", 320 | "CREATE", 321 | "CALL", 322 | "STATICCALL", 323 | "DELEGATECALL", 324 | "CALLCODE", 325 | "RETURN", 326 | "REVERT", 327 | } 328 | 329 | @property 330 | def writes_to_storage(self): 331 | """True if the instruction writes to the storage""" 332 | return self.semantics == "SSTORE" 333 | 334 | @property 335 | def reads_from_storage(self): 336 | """True if the instruction reads from the storage""" 337 | return self.semantics == "SLOAD" 338 | 339 | @property 340 | def is_terminator(self): 341 | """True if the instruction is a basic block terminator""" 342 | return self.semantics in { 343 | "RETURN", 344 | "STOP", 345 | "INVALID", 346 | "JUMP", 347 | "JUMPI", 348 | "SELFDESTRUCT", 349 | "REVERT", 350 | } 351 | 352 | @property 353 | def is_endtx(self): 354 | """True if the instruction is a transaction terminator""" 355 | return self.semantics in {"RETURN", "STOP", "INVALID", "SELFDESTRUCT", "REVERT"} 356 | 357 | @property 358 | def is_starttx(self): 359 | """True if the instruction is a transaction initiator""" 360 | return self.semantics in { 361 | "CREATE", 362 | "CREATE2", 363 | "CALL", 364 | "CALLCODE", 365 | "DELEGATECALL", 366 | "STATICCALL", 367 | } 368 | 369 | @property 370 | def is_branch(self): 371 | """True if the instruction is a jump""" 372 | return self.semantics in {"JUMP", "JUMPI"} 373 | 374 | @property 375 | def is_environmental(self): 376 | """True if the instruction access enviromental data""" 377 | return self.group == "Environmental Information" 378 | 379 | @property 380 | def is_system(self): 381 | """True if the instruction is a system operation""" 382 | return self.group == "System operations" 383 | 384 | @property 385 | def uses_block_info(self): 386 | """True if the instruction access block information""" 387 | return self.group == "Block Information" 388 | 389 | @property 390 | def is_arithmetic(self): 391 | """True if the instruction is an arithmetic operation""" 392 | return self.semantics in { 393 | "ADD", 394 | "MUL", 395 | "SUB", 396 | "DIV", 397 | "SDIV", 398 | "MOD", 399 | "SMOD", 400 | "ADDMOD", 401 | "MULMOD", 402 | "EXP", 403 | "SIGNEXTEND", 404 | "SHL", 405 | "SHR", 406 | "SAR", 407 | } 408 | 409 | 410 | def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): 411 | """Assemble one EVM instruction from its textual representation. 412 | 413 | :param asmcode: assembly code for one instruction 414 | :type asmcode: str 415 | :param pc: program counter of the instruction(optional) 416 | :type pc: int 417 | :param fork: fork name (optional) 418 | :type fork: str 419 | :return: An Instruction object 420 | :rtype: Instruction 421 | 422 | Example use:: 423 | 424 | >>> print assemble_one('LT') 425 | 426 | 427 | """ 428 | try: 429 | instruction_table = instruction_tables[fork] 430 | asmcode = asmcode.strip().split(" ") 431 | instr = instruction_table[asmcode[0].upper()] 432 | if pc: 433 | instr.pc = pc 434 | if instr.operand_size > 0: 435 | assert len(asmcode) == 2 436 | instr.operand = int(asmcode[1], 0) 437 | return instr 438 | except Exception: 439 | raise AssembleError("Something wrong at pc {:d}".format(pc)) 440 | 441 | 442 | def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): 443 | """ Assemble a sequence of textual representation of EVM instructions 444 | 445 | :param asmcode: assembly code for any number of instructions 446 | :type asmcode: str 447 | :param pc: program counter of the first instruction(optional) 448 | :type pc: int 449 | :param fork: fork name (optional) 450 | :type fork: str 451 | :return: An generator of Instruction objects 452 | :rtype: generator[Instructions] 453 | 454 | Example use:: 455 | 456 | >>> assemble_one('''PUSH1 0x60\n \ 457 | PUSH1 0x40\n \ 458 | MSTORE\n \ 459 | PUSH1 0x2\n \ 460 | PUSH2 0x108\n \ 461 | PUSH1 0x0\n \ 462 | POP\n \ 463 | SSTORE\n \ 464 | PUSH1 0x40\n \ 465 | MLOAD\n \ 466 | ''') 467 | 468 | """ 469 | asmcode = asmcode.split("\n") 470 | asmcode = iter(asmcode) 471 | for line in asmcode: 472 | if not line.strip(): 473 | continue 474 | instr = assemble_one(line, pc=pc, fork=fork) 475 | yield instr 476 | pc += instr.size 477 | 478 | 479 | def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): 480 | """Disassemble a single instruction from a bytecode 481 | 482 | :param bytecode: the bytecode stream 483 | :type bytecode: str | bytes | bytearray | iterator 484 | :param pc: program counter of the instruction(optional) 485 | :type pc: int 486 | :param fork: fork name (optional) 487 | :type fork: str 488 | :return: an Instruction object 489 | :rtype: Instruction 490 | 491 | Example use:: 492 | 493 | >>> print disassemble_one('\x60\x10') 494 | 495 | """ 496 | instruction_table = instruction_tables[fork] 497 | if isinstance(bytecode, bytes): 498 | bytecode = bytearray(bytecode) 499 | if isinstance(bytecode, str): 500 | bytecode = bytearray(bytecode.encode("latin-1")) 501 | 502 | bytecode = iter(bytecode) 503 | try: 504 | opcode = next(bytecode) 505 | except StopIteration: 506 | return 507 | 508 | assert isinstance(opcode, int) 509 | 510 | instruction = copy.copy(instruction_table.get(opcode, None)) 511 | if instruction is None: 512 | instruction = Instruction( 513 | opcode, "INVALID", 0, 0, 0, 0, "Unspecified invalid instruction." 514 | ) 515 | instruction.pc = pc 516 | 517 | try: 518 | if instruction.has_operand: 519 | instruction.parse_operand(bytecode) 520 | except ParseError: 521 | instruction = None 522 | finally: 523 | return instruction 524 | 525 | 526 | def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK): 527 | """Disassemble all instructions in bytecode 528 | 529 | :param bytecode: an evm bytecode (binary) 530 | :type bytecode: str | bytes | bytearray | iterator 531 | :param pc: program counter of the first instruction(optional) 532 | :type pc: int 533 | :param fork: fork name (optional) 534 | :type fork: str 535 | :return: An generator of Instruction objects 536 | :rtype: list[Instruction] 537 | 538 | Example use:: 539 | 540 | >>> for inst in disassemble_all(bytecode): 541 | ... print(instr) 542 | 543 | ... 544 | PUSH1 0x60 545 | PUSH1 0x40 546 | MSTORE 547 | PUSH1 0x2 548 | PUSH2 0x108 549 | PUSH1 0x0 550 | POP 551 | SSTORE 552 | PUSH1 0x40 553 | MLOAD 554 | 555 | 556 | """ 557 | if isinstance(bytecode, bytes): 558 | bytecode = bytearray(bytecode) 559 | if isinstance(bytecode, str): 560 | bytecode = bytearray(bytecode.encode("latin-1")) 561 | 562 | bytecode = iter(bytecode) 563 | while True: 564 | instr = disassemble_one(bytecode, pc=pc, fork=fork) 565 | if not instr: 566 | return 567 | pc += instr.size 568 | yield instr 569 | 570 | 571 | def disassemble(bytecode, pc=0, fork=DEFAULT_FORK): 572 | """Disassemble an EVM bytecode 573 | 574 | :param bytecode: binary representation of an evm bytecode 575 | :type bytecode: str | bytes | bytearray 576 | :param pc: program counter of the first instruction(optional) 577 | :type pc: int 578 | :param fork: fork name (optional) 579 | :type fork: str 580 | :return: the text representation of the assembler code 581 | 582 | Example use:: 583 | 584 | >>> disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00") 585 | ... 586 | PUSH1 0x60 587 | BLOCKHASH 588 | MSTORE 589 | PUSH1 0x2 590 | PUSH2 0x100 591 | 592 | """ 593 | return "\n".join(map(str, disassemble_all(bytecode, pc=pc, fork=fork))) 594 | 595 | 596 | def assemble(asmcode, pc=0, fork=DEFAULT_FORK): 597 | """ Assemble an EVM program 598 | 599 | :param asmcode: an evm assembler program 600 | :type asmcode: str 601 | :param pc: program counter of the first instruction(optional) 602 | :type pc: int 603 | :param fork: fork name (optional) 604 | :type fork: str 605 | :return: the hex representation of the bytecode 606 | :rtype: str 607 | 608 | Example use:: 609 | 610 | >>> assemble('''PUSH1 0x60\n \ 611 | BLOCKHASH\n \ 612 | MSTORE\n \ 613 | PUSH1 0x2\n \ 614 | PUSH2 0x100\n \ 615 | ''') 616 | ... 617 | b"\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00" 618 | """ 619 | return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, fork=fork)) 620 | 621 | 622 | def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK): 623 | """Disassemble an EVM bytecode 624 | 625 | :param bytecode: canonical representation of an evm bytecode (hexadecimal) 626 | :type bytecode: str 627 | :param pc: program counter of the first instruction(optional) 628 | :type pc: int 629 | :param fork: fork name (optional) 630 | :type fork: str 631 | :return: the text representation of the assembler code 632 | :rtype: str 633 | 634 | Example use:: 635 | 636 | >>> disassemble_hex("0x6060604052600261010") 637 | ... 638 | PUSH1 0x60 639 | BLOCKHASH 640 | MSTORE 641 | PUSH1 0x2 642 | PUSH2 0x100 643 | 644 | """ 645 | if bytecode.startswith("0x"): 646 | bytecode = bytecode[2:] 647 | bytecode = unhexlify(bytecode) 648 | return disassemble(bytecode, pc=pc, fork=fork) 649 | 650 | 651 | def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK): 652 | """ Assemble an EVM program 653 | 654 | :param asmcode: an evm assembler program 655 | :type asmcode: str | iterator[Instruction] 656 | :param pc: program counter of the first instruction(optional) 657 | :type pc: int 658 | :param fork: fork name (optional) 659 | :type fork: str 660 | :return: the hex representation of the bytecode 661 | :rtype: str 662 | 663 | Example use:: 664 | 665 | >>> assemble_hex('''PUSH1 0x60\n \ 666 | BLOCKHASH\n \ 667 | MSTORE\n \ 668 | PUSH1 0x2\n \ 669 | PUSH2 0x100\n \ 670 | ''') 671 | ... 672 | "0x6060604052600261010" 673 | """ 674 | if isinstance(asmcode, list): 675 | return "0x" + hexlify(b"".join([x.bytes for x in asmcode])).decode("ascii") 676 | return "0x" + hexlify(assemble(asmcode, pc=pc, fork=fork)).decode("ascii") 677 | 678 | 679 | class InstructionTable: 680 | """ 681 | EVM Instruction factory 682 | Implements an immutable, iterable instruction LUT that can be indexed by both mnemonic or opcode. 683 | 684 | Example:: 685 | 686 | >>> from pyevmasm import instruction_tables 687 | >>> instruction_table = instruction_tables['byzantium'] 688 | >>> instruction_table[0] 689 | Instruction(0x0, 'STOP', 0, 0, 0, 0, 'Halts execution.', None, 0) 690 | >>> instruction_table['STOP'] 691 | Instruction(0x0, 'STOP', 0, 0, 0, 0, 'Halts execution.', None, 0) 692 | >>> i = instruction_table.__iter__() 693 | >>> i.__next__() 694 | Instruction(0x0, 'STOP', 0, 0, 0, 0, 'Halts execution.', None, 0) 695 | >>> i.__next__() 696 | Instruction(0x1, 'ADD', 0, 2, 1, 3, 'Addition operation.', None, 0) 697 | >>> i.__next__() 698 | Instruction(0x2, 'MUL', 0, 2, 1, 5, 'Multiplication operation.', None, 0) 699 | >>> i.__next__() 700 | Instruction(0x3, 'SUB', 0, 2, 1, 3, 'Subtraction operation.', None, 0) 701 | 702 | """ 703 | 704 | __slots__ = ("_instruction_list", "__name_to_opcode") 705 | 706 | def __init__(self, *args, **kwargs): 707 | previous_fork = kwargs.get("previous_fork", None) 708 | 709 | self._instruction_list = {} 710 | self.__name_to_opcode = None 711 | 712 | if previous_fork is not None: 713 | if not isinstance(previous_fork, self.__class__): 714 | raise TypeError("{} expected".format(self.__class__)) 715 | self._instruction_list.update(previous_fork._instruction_list) 716 | 717 | self._instruction_list.update(args[0]) 718 | self.__name_to_opcode = None 719 | 720 | @property 721 | def _name_to_opcode(self): 722 | if self.__name_to_opcode is None: 723 | self.__name_to_opcode = {} 724 | for ( 725 | opcode, 726 | (name, operand_size, pops, pushes, gas, description), 727 | ) in self._instruction_list.items(): 728 | long_name = Instruction._long_name(name, operand_size, pops) 729 | self.__name_to_opcode[long_name] = opcode 730 | return self.__name_to_opcode 731 | 732 | def _search_by_name(self, k): 733 | return self._search_by_opcode(self._name_to_opcode[k]) 734 | 735 | def _search_by_opcode(self, k): 736 | return (k,) + self._instruction_list[k] 737 | 738 | def _search(self, k): 739 | try: 740 | value = self._search_by_opcode(k) 741 | except KeyError: 742 | value = self._search_by_name(k) 743 | return value 744 | 745 | def __getitem__(self, k): 746 | return Instruction(*self._search(k)) 747 | 748 | def get(self, k, default=None): 749 | try: 750 | return Instruction(*self._search(k)) 751 | except KeyError: 752 | return default 753 | 754 | def __contains__(self, k): 755 | return k in self._instruction_list or k in self._name_to_opcode 756 | 757 | def __iter__(self): 758 | for k in self.keys(): 759 | yield Instruction(*((k,) + self._instruction_list[k])) 760 | 761 | def keys(self): 762 | return sorted(self._instruction_list.keys()) 763 | 764 | def __repr__(self): 765 | return repr(self._instruction_list) 766 | 767 | 768 | # from http://gavwood.com/paper.pdf 769 | frontier_instruction_table = { 770 | 0x0: ("STOP", 0, 0, 0, 0, "Halts execution."), 771 | 0x1: ("ADD", 0, 2, 1, 3, "Addition operation."), 772 | 0x2: ("MUL", 0, 2, 1, 5, "Multiplication operation."), 773 | 0x3: ("SUB", 0, 2, 1, 3, "Subtraction operation."), 774 | 0x4: ("DIV", 0, 2, 1, 5, "Integer division operation."), 775 | 0x5: ("SDIV", 0, 2, 1, 5, "Signed integer division operation (truncated)."), 776 | 0x6: ("MOD", 0, 2, 1, 5, "Modulo remainder operation."), 777 | 0x7: ("SMOD", 0, 2, 1, 5, "Signed modulo remainder operation."), 778 | 0x8: ("ADDMOD", 0, 3, 1, 8, "Modulo addition operation."), 779 | 0x9: ("MULMOD", 0, 3, 1, 8, "Modulo multiplication operation."), 780 | 0xA: ("EXP", 0, 2, 1, 10, "Exponential operation."), 781 | 0xB: ( 782 | "SIGNEXTEND", 783 | 0, 784 | 2, 785 | 1, 786 | 5, 787 | "Extend length of two's complement signed integer.", 788 | ), 789 | 0x10: ("LT", 0, 2, 1, 3, "Less-than comparison."), 790 | 0x11: ("GT", 0, 2, 1, 3, "Greater-than comparison."), 791 | 0x12: ("SLT", 0, 2, 1, 3, "Signed less-than comparison."), 792 | 0x13: ("SGT", 0, 2, 1, 3, "Signed greater-than comparison."), 793 | 0x14: ("EQ", 0, 2, 1, 3, "Simple not operator."), 794 | 0x15: ("ISZERO", 0, 1, 1, 3, "Equals zero comparison."), 795 | 0x16: ("AND", 0, 2, 1, 3, "Bitwise AND operation."), 796 | 0x17: ("OR", 0, 2, 1, 3, "Bitwise OR operation."), 797 | 0x18: ("XOR", 0, 2, 1, 3, "Bitwise XOR operation."), 798 | 0x19: ("NOT", 0, 1, 1, 3, "Bitwise NOT operation."), 799 | 0x1A: ("BYTE", 0, 2, 1, 3, "Retrieve single byte from word."), 800 | 0x20: ("SHA3", 0, 2, 1, 30, "Compute Keccak-256 hash."), 801 | 0x30: ("ADDRESS", 0, 0, 1, 2, "Get address of currently executing account."), 802 | 0x31: ("BALANCE", 0, 1, 1, 20, "Get balance of the given account."), 803 | 0x32: ("ORIGIN", 0, 0, 1, 2, "Get execution origination address."), 804 | 0x33: ("CALLER", 0, 0, 1, 2, "Get caller address."), 805 | 0x34: ( 806 | "CALLVALUE", 807 | 0, 808 | 0, 809 | 1, 810 | 2, 811 | "Get deposited value by the instruction/transaction responsible for this execution.", 812 | ), 813 | 0x35: ("CALLDATALOAD", 0, 1, 1, 3, "Get input data of current environment."), 814 | 0x36: ( 815 | "CALLDATASIZE", 816 | 0, 817 | 0, 818 | 1, 819 | 2, 820 | "Get size of input data in current environment.", 821 | ), 822 | 0x37: ( 823 | "CALLDATACOPY", 824 | 0, 825 | 3, 826 | 0, 827 | 3, 828 | "Copy input data in current environment to memory.", 829 | ), 830 | 0x38: ("CODESIZE", 0, 0, 1, 2, "Get size of code running in current environment."), 831 | 0x39: ( 832 | "CODECOPY", 833 | 0, 834 | 3, 835 | 0, 836 | 3, 837 | "Copy code running in current environment to memory.", 838 | ), 839 | 0x3A: ("GASPRICE", 0, 0, 1, 2, "Get price of gas in current environment."), 840 | 0x3B: ("EXTCODESIZE", 0, 1, 1, 20, "Get size of an account's code."), 841 | 0x3C: ("EXTCODECOPY", 0, 4, 0, 20, "Copy an account's code to memory."), 842 | 0x40: ( 843 | "BLOCKHASH", 844 | 0, 845 | 1, 846 | 1, 847 | 20, 848 | "Get the hash of one of the 256 most recent complete blocks.", 849 | ), 850 | 0x41: ("COINBASE", 0, 0, 1, 2, "Get the block's beneficiary address."), 851 | 0x42: ("TIMESTAMP", 0, 0, 1, 2, "Get the block's timestamp."), 852 | 0x43: ("NUMBER", 0, 0, 1, 2, "Get the block's number."), 853 | 0x44: ("DIFFICULTY", 0, 0, 1, 2, "Get the block's difficulty."), 854 | 0x45: ("GASLIMIT", 0, 0, 1, 2, "Get the block's gas limit."), 855 | 0x50: ("POP", 0, 1, 0, 2, "Remove item from stack."), 856 | 0x51: ("MLOAD", 0, 1, 1, 3, "Load word from memory."), 857 | 0x52: ("MSTORE", 0, 2, 0, 3, "Save word to memory."), 858 | 0x53: ("MSTORE8", 0, 2, 0, 3, "Save byte to memory."), 859 | 0x54: ("SLOAD", 0, 1, 1, 50, "Load word from storage."), 860 | 0x55: ("SSTORE", 0, 2, 0, 0, "Save word to storage."), 861 | 0x56: ("JUMP", 0, 1, 0, 8, "Alter the program counter."), 862 | 0x57: ("JUMPI", 0, 2, 0, 10, "Conditionally alter the program counter."), 863 | 0x58: ( 864 | "GETPC", 865 | 0, 866 | 0, 867 | 1, 868 | 2, 869 | "Get the value of the program counter prior to the increment.", 870 | ), 871 | 0x59: ("MSIZE", 0, 0, 1, 2, "Get the size of active memory in bytes."), 872 | 0x5A: ( 873 | "GAS", 874 | 0, 875 | 0, 876 | 1, 877 | 2, 878 | "Get the amount of available gas, including the corresponding reduction the amount of available gas.", 879 | ), 880 | 0x5B: ("JUMPDEST", 0, 0, 0, 1, "Mark a valid destination for jumps."), 881 | 0x60: ("PUSH", 1, 0, 1, 3, "Place 1 byte item on stack."), 882 | 0x61: ("PUSH", 2, 0, 1, 3, "Place 2-byte item on stack."), 883 | 0x62: ("PUSH", 3, 0, 1, 3, "Place 3-byte item on stack."), 884 | 0x63: ("PUSH", 4, 0, 1, 3, "Place 4-byte item on stack."), 885 | 0x64: ("PUSH", 5, 0, 1, 3, "Place 5-byte item on stack."), 886 | 0x65: ("PUSH", 6, 0, 1, 3, "Place 6-byte item on stack."), 887 | 0x66: ("PUSH", 7, 0, 1, 3, "Place 7-byte item on stack."), 888 | 0x67: ("PUSH", 8, 0, 1, 3, "Place 8-byte item on stack."), 889 | 0x68: ("PUSH", 9, 0, 1, 3, "Place 9-byte item on stack."), 890 | 0x69: ("PUSH", 10, 0, 1, 3, "Place 10-byte item on stack."), 891 | 0x6A: ("PUSH", 11, 0, 1, 3, "Place 11-byte item on stack."), 892 | 0x6B: ("PUSH", 12, 0, 1, 3, "Place 12-byte item on stack."), 893 | 0x6C: ("PUSH", 13, 0, 1, 3, "Place 13-byte item on stack."), 894 | 0x6D: ("PUSH", 14, 0, 1, 3, "Place 14-byte item on stack."), 895 | 0x6E: ("PUSH", 15, 0, 1, 3, "Place 15-byte item on stack."), 896 | 0x6F: ("PUSH", 16, 0, 1, 3, "Place 16-byte item on stack."), 897 | 0x70: ("PUSH", 17, 0, 1, 3, "Place 17-byte item on stack."), 898 | 0x71: ("PUSH", 18, 0, 1, 3, "Place 18-byte item on stack."), 899 | 0x72: ("PUSH", 19, 0, 1, 3, "Place 19-byte item on stack."), 900 | 0x73: ("PUSH", 20, 0, 1, 3, "Place 20-byte item on stack."), 901 | 0x74: ("PUSH", 21, 0, 1, 3, "Place 21-byte item on stack."), 902 | 0x75: ("PUSH", 22, 0, 1, 3, "Place 22-byte item on stack."), 903 | 0x76: ("PUSH", 23, 0, 1, 3, "Place 23-byte item on stack."), 904 | 0x77: ("PUSH", 24, 0, 1, 3, "Place 24-byte item on stack."), 905 | 0x78: ("PUSH", 25, 0, 1, 3, "Place 25-byte item on stack."), 906 | 0x79: ("PUSH", 26, 0, 1, 3, "Place 26-byte item on stack."), 907 | 0x7A: ("PUSH", 27, 0, 1, 3, "Place 27-byte item on stack."), 908 | 0x7B: ("PUSH", 28, 0, 1, 3, "Place 28-byte item on stack."), 909 | 0x7C: ("PUSH", 29, 0, 1, 3, "Place 29-byte item on stack."), 910 | 0x7D: ("PUSH", 30, 0, 1, 3, "Place 30-byte item on stack."), 911 | 0x7E: ("PUSH", 31, 0, 1, 3, "Place 31-byte item on stack."), 912 | 0x7F: ("PUSH", 32, 0, 1, 3, "Place 32-byte (full word) item on stack."), 913 | 0x80: ("DUP", 0, 1, 2, 3, "Duplicate 1st stack item."), 914 | 0x81: ("DUP", 0, 2, 3, 3, "Duplicate 2nd stack item."), 915 | 0x82: ("DUP", 0, 3, 4, 3, "Duplicate 3rd stack item."), 916 | 0x83: ("DUP", 0, 4, 5, 3, "Duplicate 4th stack item."), 917 | 0x84: ("DUP", 0, 5, 6, 3, "Duplicate 5th stack item."), 918 | 0x85: ("DUP", 0, 6, 7, 3, "Duplicate 6th stack item."), 919 | 0x86: ("DUP", 0, 7, 8, 3, "Duplicate 7th stack item."), 920 | 0x87: ("DUP", 0, 8, 9, 3, "Duplicate 8th stack item."), 921 | 0x88: ("DUP", 0, 9, 10, 3, "Duplicate 9th stack item."), 922 | 0x89: ("DUP", 0, 10, 11, 3, "Duplicate 10th stack item."), 923 | 0x8A: ("DUP", 0, 11, 12, 3, "Duplicate 11th stack item."), 924 | 0x8B: ("DUP", 0, 12, 13, 3, "Duplicate 12th stack item."), 925 | 0x8C: ("DUP", 0, 13, 14, 3, "Duplicate 13th stack item."), 926 | 0x8D: ("DUP", 0, 14, 15, 3, "Duplicate 14th stack item."), 927 | 0x8E: ("DUP", 0, 15, 16, 3, "Duplicate 15th stack item."), 928 | 0x8F: ("DUP", 0, 16, 17, 3, "Duplicate 16th stack item."), 929 | 0x90: ("SWAP", 0, 2, 2, 3, "Exchange 1st and 2nd stack items."), 930 | 0x91: ("SWAP", 0, 3, 3, 3, "Exchange 1st and 3rd stack items."), 931 | 0x92: ("SWAP", 0, 4, 4, 3, "Exchange 1st and 4th stack items."), 932 | 0x93: ("SWAP", 0, 5, 5, 3, "Exchange 1st and 5th stack items."), 933 | 0x94: ("SWAP", 0, 6, 6, 3, "Exchange 1st and 6th stack items."), 934 | 0x95: ("SWAP", 0, 7, 7, 3, "Exchange 1st and 7th stack items."), 935 | 0x96: ("SWAP", 0, 8, 8, 3, "Exchange 1st and 8th stack items."), 936 | 0x97: ("SWAP", 0, 9, 9, 3, "Exchange 1st and 9th stack items."), 937 | 0x98: ("SWAP", 0, 10, 10, 3, "Exchange 1st and 10th stack items."), 938 | 0x99: ("SWAP", 0, 11, 11, 3, "Exchange 1st and 11th stack items."), 939 | 0x9A: ("SWAP", 0, 12, 12, 3, "Exchange 1st and 12th stack items."), 940 | 0x9B: ("SWAP", 0, 13, 13, 3, "Exchange 1st and 13th stack items."), 941 | 0x9C: ("SWAP", 0, 14, 14, 3, "Exchange 1st and 14th stack items."), 942 | 0x9D: ("SWAP", 0, 15, 15, 3, "Exchange 1st and 15th stack items."), 943 | 0x9E: ("SWAP", 0, 16, 16, 3, "Exchange 1st and 16th stack items."), 944 | 0x9F: ("SWAP", 0, 17, 17, 3, "Exchange 1st and 17th stack items."), 945 | 0xA0: ("LOG", 0, 2, 0, 375, "Append log record with no topics."), 946 | 0xA1: ("LOG", 0, 3, 0, 750, "Append log record with one topic."), 947 | 0xA2: ("LOG", 0, 4, 0, 1125, "Append log record with two topics."), 948 | 0xA3: ("LOG", 0, 5, 0, 1500, "Append log record with three topics."), 949 | 0xA4: ("LOG", 0, 6, 0, 1875, "Append log record with four topics."), 950 | 0xF0: ("CREATE", 0, 3, 1, 32000, "Create a new account with associated code."), 951 | 0xF1: ("CALL", 0, 7, 1, 40, "Message-call into an account."), 952 | 0xF2: ( 953 | "CALLCODE", 954 | 0, 955 | 7, 956 | 1, 957 | 40, 958 | "Message-call into this account with alternative account's code.", 959 | ), 960 | 0xF3: ("RETURN", 0, 2, 0, 0, "Halt execution returning output data."), 961 | 0xFE: ("INVALID", 0, 0, 0, 0, "Designated invalid instruction."), 962 | 0xFF: ( 963 | "SELFDESTRUCT", 964 | 0, 965 | 1, 966 | 0, 967 | 0, 968 | "Halt execution and register account for later deletion.", 969 | ), 970 | } 971 | frontier_instruction_table = InstructionTable(frontier_instruction_table) # type: ignore 972 | 973 | homestead_instruction_table = { 974 | 0xF4: ( 975 | "DELEGATECALL", 976 | 0, 977 | 6, 978 | 1, 979 | 40, 980 | "Message-call into this account with an alternative account's code, but persisting into this account with an alternative account's code.", 981 | ) 982 | } 983 | homestead_instruction_table = InstructionTable( # type: ignore 984 | homestead_instruction_table, previous_fork=frontier_instruction_table 985 | ) 986 | 987 | tangerine_whistle_instruction_table = { 988 | 0x3B: ("EXTCODESIZE", 0, 1, 1, 700, "Get size of an account's code."), 989 | 0x3C: ("EXTCODECOPY", 0, 4, 0, 700, "Copy an account's code to memory."), 990 | 0x31: ("BALANCE", 0, 1, 1, 400, "Get balance of the given account."), 991 | 0x54: ("SLOAD", 0, 1, 1, 200, "Load word from storage."), 992 | 0xF0: ("CREATE", 0, 3, 1, 32000, "Create a new account with associated code."), 993 | 0xF1: ("CALL", 0, 7, 1, 700, "Message-call into an account."), 994 | 0xF2: ( 995 | "CALLCODE", 996 | 0, 997 | 7, 998 | 1, 999 | 700, 1000 | "Message-call into this account with alternative account's code.", 1001 | ), 1002 | 0xF4: ( 1003 | "DELEGATECALL", 1004 | 0, 1005 | 6, 1006 | 1, 1007 | 700, 1008 | "Message-call into this account with an alternative account's code, but persisting into this account with an alternative account's code.", 1009 | ), 1010 | 0xFF: ( 1011 | "SELFDESTRUCT", 1012 | 0, 1013 | 1, 1014 | 0, 1015 | 5000, 1016 | "Halt execution and register account for later deletion.", 1017 | ), 1018 | } 1019 | tangerine_whistle_instruction_table = InstructionTable( # type: ignore 1020 | tangerine_whistle_instruction_table, previous_fork=homestead_instruction_table 1021 | ) 1022 | 1023 | spurious_dragon_instruction_table = {} # type: ignore 1024 | spurious_dragon_instruction_table = InstructionTable( # type: ignore 1025 | spurious_dragon_instruction_table, previous_fork=tangerine_whistle_instruction_table 1026 | ) 1027 | 1028 | byzantium_instruction_table = { 1029 | 0x3D: ( 1030 | "RETURNDATASIZE", 1031 | 0, 1032 | 0, 1033 | 1, 1034 | 2, 1035 | "Get size of output data from the previous call from the current environment.", 1036 | ), 1037 | 0x3E: ( 1038 | "RETURNDATACOPY", 1039 | 0, 1040 | 3, 1041 | 0, 1042 | 3, 1043 | "Copy output data from the previous call to memory.", 1044 | ), 1045 | 0xFA: ("STATICCALL", 0, 6, 1, 40, "Static message-call into an account."), 1046 | 0xFD: ( 1047 | "REVERT", 1048 | 0, 1049 | 2, 1050 | 0, 1051 | 0, 1052 | "Stop execution and revert state changes, without consuming all provided gas and providing a reason.", 1053 | ), 1054 | } 1055 | byzantium_instruction_table = InstructionTable( # type: ignore 1056 | byzantium_instruction_table, previous_fork=spurious_dragon_instruction_table 1057 | ) 1058 | 1059 | constantinople_instruction_table = { 1060 | 0x1B: ("SHL", 0, 2, 1, 3, "Shift left."), 1061 | 0x1C: ("SHR", 0, 2, 1, 3, "Logical shift right."), 1062 | 0x1D: ("SAR", 0, 2, 1, 3, "Arithmetic shift right"), 1063 | 0x3F: ("EXTCODEHASH", 0, 1, 1, 400, "Get hash of code"), 1064 | 0xF5: ( 1065 | "CREATE2", 1066 | 0, 1067 | 4, 1068 | 1, 1069 | 32000, 1070 | "Behaves identically to CREATE, except using keccak256( 0xff ++ address ++ salt ++ keccak256(init_code)))[12:] as the address where the contract is initialized at", 1071 | ), 1072 | } 1073 | constantinople_instruction_table = InstructionTable( # type: ignore 1074 | constantinople_instruction_table, previous_fork=byzantium_instruction_table 1075 | ) 1076 | 1077 | serenity_instruction_table = InstructionTable( 1078 | {}, previous_fork=constantinople_instruction_table 1079 | ) 1080 | 1081 | istanbul_instruction_table = { 1082 | 0x31: ("BALANCE", 0, 1, 1, 700, "Get balance of the given account."), 1083 | 0x3F: ("EXTCODEHASH", 0, 1, 1, 700, "Get hash of code"), 1084 | 0x46: ("CHAINID", 0, 0, 1, 2, "Get current chainid."), 1085 | 0x47: ("SELFBALANCE", 0, 0, 1, 5, "Balance of the current address."), 1086 | 0x54: ("SLOAD", 0, 1, 1, 800, "Load word from storage."), 1087 | } 1088 | istanbul_instruction_table = InstructionTable( # type: ignore 1089 | istanbul_instruction_table, previous_fork=serenity_instruction_table 1090 | ) 1091 | 1092 | london_instruction_table = {0x48: ("BASEFEE", 0, 0, 1, 2, "Base fee in wei")} 1093 | 1094 | london_instruction_table = InstructionTable( # type: ignore 1095 | london_instruction_table, previous_fork=istanbul_instruction_table 1096 | ) 1097 | 1098 | shanghai_instruction_table = {0x5f: ("PUSH", 0, 0, 1, 2, "Place 0 constant byte item on stack.")} 1099 | 1100 | shanghai_instruction_table = InstructionTable( # type: ignore 1101 | shanghai_instruction_table, previous_fork=london_instruction_table 1102 | ) 1103 | 1104 | accepted_forks = ( 1105 | "frontier", 1106 | "homestead", 1107 | "tangerine_whistle", 1108 | "spurious_dragon", 1109 | "byzantium", 1110 | "constantinople", 1111 | "petersburg", 1112 | "serenity", 1113 | "istanbul", 1114 | "london", 1115 | "shanghai" 1116 | ) 1117 | 1118 | 1119 | instruction_tables = { 1120 | "frontier": frontier_instruction_table, 1121 | "homestead": homestead_instruction_table, 1122 | "tangerine_whistle": tangerine_whistle_instruction_table, 1123 | "spurious_dragon": spurious_dragon_instruction_table, 1124 | "byzantium": byzantium_instruction_table, 1125 | "constantinople": constantinople_instruction_table, 1126 | "petersburg": constantinople_instruction_table, # constantinople table is intentional here: those two are aliases 1127 | "serenity": serenity_instruction_table, 1128 | "istanbul": istanbul_instruction_table, 1129 | "london": london_instruction_table, 1130 | "shanghai": shanghai_instruction_table, 1131 | } 1132 | 1133 | 1134 | def block_to_fork(block_number): 1135 | """Convert block number to fork name. 1136 | 1137 | :param block_number: block number 1138 | :type block_number: int 1139 | :return: fork name 1140 | :rtype: str 1141 | 1142 | Example use:: 1143 | 1144 | >>> block_to_fork(0) 1145 | ... 1146 | "frontier" 1147 | >>> block_to_fork(4370000) 1148 | ... 1149 | "byzantium" 1150 | >>> block_to_fork(4370001) 1151 | ... 1152 | "byzantium" 1153 | """ 1154 | forks_by_block = { 1155 | 0: "frontier", 1156 | 1150000: "homestead", 1157 | # 1920000 Dao 1158 | 2463000: "tangerine_whistle", 1159 | 2675000: "spurious_dragon", 1160 | 4370000: "byzantium", 1161 | # 7280000: "constantinople", # Same Block as petersburg, commented to avoid conflicts 1162 | 7280000: "petersburg", 1163 | 9069000: "istanbul", 1164 | 12965000: "london", 1165 | 17034870: "shanghai", 1166 | 99999999: "serenity", # to be replaced after Serenity launch 1167 | } 1168 | fork_names = list(forks_by_block.values()) 1169 | fork_blocks = list(forks_by_block.keys()) 1170 | return fork_names[bisect(fork_blocks, block_number) - 1] 1171 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='pyevmasm', 5 | version='0.2.4', 6 | description='Ethereum Virtual Machine (EVM) assembler and disassembler', 7 | author='Trail of Bits', 8 | author_email='evmasm@trailofbits.com', 9 | url='https://github.com/trailofbits/pyevmasm', 10 | license='Apache License 2.0', 11 | packages=['pyevmasm'], 12 | python_requires='>2.7', 13 | install_requires=[ 14 | 'future' 15 | ], 16 | extras_require={ 17 | 'dev': [ 18 | 'nose', 19 | 'coverage', 20 | 'flake8' 21 | ] 22 | }, 23 | entry_points={ 24 | 'console_scripts': [ 25 | 'evmasm = pyevmasm.__main__:main' 26 | ] 27 | } 28 | ) 29 | -------------------------------------------------------------------------------- /tests/test_EVMAssembler.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import pyevmasm as EVMAsm 5 | 6 | 7 | def int_to_bytes(i): 8 | if sys.version_info[0] >= 3: 9 | return i.to_bytes(1, "little") 10 | else: 11 | return bytes(chr(i)) 12 | 13 | 14 | # noinspection PyPep8Naming 15 | class EVMTest_Assembler(unittest.TestCase): 16 | _multiprocess_can_split_ = True 17 | maxDiff = None 18 | 19 | def test_ADD_1(self): 20 | instruction = EVMAsm.disassemble_one(b"\x60\x10") 21 | self.assertEqual( 22 | EVMAsm.Instruction(0x60, "PUSH", 1, 0, 1, 3, "Place 1 byte item on stack.", 16, 0), instruction 23 | ) 24 | 25 | instruction = EVMAsm.assemble_one("PUSH1 0x10") 26 | self.assertEqual( 27 | instruction, EVMAsm.Instruction(0x60, "PUSH", 1, 0, 1, 3, "Place 1 byte item on stack.", 16, 0) 28 | ) 29 | 30 | instructions1 = EVMAsm.disassemble_all(b"\x30\x31") 31 | instructions2 = EVMAsm.assemble_all("ADDRESS\nBALANCE") 32 | self.assertTrue(all(a == b for a, b in zip(instructions1, instructions2))) 33 | 34 | # High level simple assembler/disassembler 35 | 36 | bytecode = EVMAsm.assemble_hex( 37 | """PUSH1 0x80 38 | BLOCKHASH 39 | MSTORE 40 | PUSH1 0x2 41 | PUSH2 0x100 42 | """ 43 | ) 44 | self.assertEqual(bytecode, "0x608040526002610100") 45 | 46 | asmcode = EVMAsm.disassemble_hex("0x608040526002610100") 47 | self.assertEqual(asmcode, """PUSH1 0x80\nBLOCKHASH\nMSTORE\nPUSH1 0x2\nPUSH2 0x100""") 48 | 49 | def test_STOP(self): 50 | insn = EVMAsm.disassemble_one(b"\x00") 51 | self.assertTrue(insn.mnemonic == "STOP") 52 | 53 | def test_JUMPI(self): 54 | insn = EVMAsm.disassemble_one(b"\x57") 55 | self.assertTrue(insn.mnemonic == "JUMPI") 56 | self.assertTrue(insn.is_branch) 57 | 58 | def test_pre_byzantium(self): 59 | insn = EVMAsm.disassemble_one(b"\x57", fork="frontier") 60 | self.assertTrue(insn.mnemonic == "JUMPI") 61 | self.assertTrue(insn.is_branch) 62 | insn = EVMAsm.disassemble_one(b"\xfa", fork="frontier") 63 | self.assertTrue(insn.mnemonic == "INVALID") # STATICCALL added in byzantium 64 | insn = EVMAsm.disassemble_one(b"\xfd", fork="frontier") 65 | self.assertTrue(insn.mnemonic == "INVALID") # REVERT added in byzantium 66 | 67 | def test_byzantium_fork(self): 68 | insn = EVMAsm.disassemble_one(b"\x57", fork="byzantium") 69 | self.assertTrue(insn.mnemonic == "JUMPI") 70 | self.assertTrue(insn.is_branch) 71 | insn = EVMAsm.disassemble_one(b"\x1b", fork="byzantium") 72 | self.assertTrue(insn.mnemonic == "INVALID") # SHL added in constantinople 73 | insn = EVMAsm.disassemble_one(b"\x1c", fork="byzantium") 74 | self.assertTrue(insn.mnemonic == "INVALID") # SHR added in constantinople 75 | insn = EVMAsm.disassemble_one(b"\x1d", fork="byzantium") 76 | self.assertTrue(insn.mnemonic == "INVALID") # SAR added in constantinople 77 | insn = EVMAsm.disassemble_one(b"\x3f", fork="byzantium") 78 | self.assertTrue(insn.mnemonic == "INVALID") # EXTCODEHASH added in constantinople 79 | insn = EVMAsm.disassemble_one(b"\xf5", fork="byzantium") 80 | self.assertTrue(insn.mnemonic == "INVALID") # CREATE2 added in constantinople 81 | 82 | def test_constantinople_fork(self): 83 | insn = EVMAsm.disassemble_one(b"\x1b", fork="constantinople") 84 | self.assertTrue(insn.mnemonic == "SHL") 85 | self.assertTrue(insn.is_arithmetic) 86 | insn = EVMAsm.disassemble_one(b"\x1c", fork="constantinople") 87 | self.assertTrue(insn.mnemonic == "SHR") 88 | self.assertTrue(insn.is_arithmetic) 89 | insn = EVMAsm.disassemble_one(b"\x1d", fork="constantinople") 90 | self.assertTrue(insn.mnemonic == "SAR") 91 | self.assertTrue(insn.is_arithmetic) 92 | insn = EVMAsm.disassemble_one(b"\x3f", fork="constantinople") 93 | self.assertTrue(insn.mnemonic == "EXTCODEHASH") 94 | insn = EVMAsm.disassemble_one(b"\xf5", fork="constantinople") 95 | self.assertTrue(insn.mnemonic == "CREATE2") 96 | 97 | def test_istanbul_fork(self): 98 | insn = EVMAsm.disassemble_one(b"\x31", fork="istanbul") 99 | self.assertTrue(insn.mnemonic == "BALANCE") 100 | self.assertTrue(insn.fee == 700) 101 | self.assertTrue(insn.pops == 1) 102 | self.assertTrue(insn.pushes == 1) 103 | insn = EVMAsm.disassemble_one(b"\x3f", fork="istanbul") 104 | self.assertTrue(insn.mnemonic == "EXTCODEHASH") 105 | self.assertTrue(insn.fee == 700) 106 | self.assertTrue(insn.pops == 1) 107 | self.assertTrue(insn.pushes == 1) 108 | insn = EVMAsm.disassemble_one(b"\x46", fork="istanbul") 109 | self.assertTrue(insn.mnemonic == "CHAINID") 110 | self.assertTrue(insn.fee == 2) 111 | self.assertTrue(insn.pops == 0) 112 | self.assertTrue(insn.pushes == 1) 113 | insn = EVMAsm.disassemble_one(b"\x47", fork="istanbul") 114 | self.assertTrue(insn.mnemonic == "SELFBALANCE") 115 | self.assertTrue(insn.fee == 5) 116 | self.assertTrue(insn.pops == 0) 117 | self.assertTrue(insn.pushes == 1) 118 | insn = EVMAsm.disassemble_one(b"\x54", fork="istanbul") 119 | self.assertTrue(insn.mnemonic == "SLOAD") 120 | self.assertTrue(insn.fee == 800) 121 | self.assertTrue(insn.pops == 1) 122 | self.assertTrue(insn.pushes == 1) 123 | 124 | def test_london_fork(self): 125 | insn = EVMAsm.disassemble_one(b"\x48", fork="london") 126 | self.assertTrue(insn.mnemonic == "BASEFEE") 127 | self.assertTrue(insn.fee == 2) 128 | self.assertTrue(insn.pops == 0) 129 | self.assertTrue(insn.pushes == 1) 130 | 131 | def test_shanghai_fork(self): 132 | insn = EVMAsm.disassemble_one(b"\x5f", fork="shanghai") 133 | self.assertTrue(insn.mnemonic == "PUSH0") 134 | self.assertTrue(insn.fee == 2) 135 | self.assertTrue(insn.pops == 0) 136 | self.assertTrue(insn.pushes == 1) 137 | self.assertTrue(insn.operand_size == 0) 138 | 139 | def test_assemble_DUP1_regression(self): 140 | insn = EVMAsm.assemble_one("DUP1") 141 | self.assertEqual(insn.mnemonic, "DUP1") 142 | self.assertEqual(insn.opcode, 0x80) 143 | 144 | def test_assemble_LOGX_regression(self): 145 | inst_table = EVMAsm.instruction_tables[EVMAsm.DEFAULT_FORK] 146 | log0_opcode = 0xA0 147 | for n in range(5): 148 | opcode = log0_opcode + n 149 | self.assertTrue(opcode in inst_table, "{!r} not in instruction_table".format(opcode)) 150 | asm = "LOG" + str(n) 151 | self.assertTrue(asm in inst_table, "{!r} not in instruction_table".format(asm)) 152 | insn = EVMAsm.assemble_one(asm) 153 | self.assertEqual(insn.mnemonic, asm) 154 | self.assertEqual(insn.opcode, opcode) 155 | 156 | def test_consistency_assembler_disassembler(self): 157 | """ 158 | Tests whether every opcode that can be disassembled, can also be 159 | assembled again. 160 | """ 161 | inst_table = EVMAsm.instruction_tables[EVMAsm.DEFAULT_FORK] 162 | for opcode in inst_table.keys(): 163 | b = int_to_bytes(opcode) + b"\x00" * 32 164 | inst_dis = EVMAsm.disassemble_one(b) 165 | a = str(inst_dis) 166 | inst_as = EVMAsm.assemble_one(a) 167 | self.assertEqual(inst_dis, inst_as) 168 | 169 | 170 | if __name__ == "__main__": 171 | unittest.main() 172 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{27,36} 3 | 4 | [testenv] 5 | deps = .[dev] 6 | commands = nosetests 7 | install_command = pip install {opts} {packages} 8 | 9 | [testenv:pep8] 10 | basepython = python2.7 11 | deps = flake8 12 | commands = 13 | flake8 . 14 | 15 | [pep8] 16 | ignore = E265,E501 17 | max-line-length = 160 18 | exclude = docs/,examples/,scripts/,tests/ 19 | 20 | [flake8] 21 | #ignore = E265,E501,F403,F405,E266,E712,F841,E741,E722,E731 22 | max-line-length = 160 23 | exclude = .tox,.*.egg,.git,docs/,examples/,scripts/,tests/ 24 | --------------------------------------------------------------------------------