├── .git-blame-ignore-revs ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ ├── feature-request.yml │ └── question.yml └── workflows │ ├── ci.yml │ ├── cifuzz.yml │ ├── custom.yml │ ├── macos.yml │ ├── nightly-ci.yml │ └── windows.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── api.rst ├── conf.py ├── index.rst ├── make.bat └── quickstart.rst ├── fuzzing ├── build.sh ├── enhanced_fdp.py └── irsb_fuzzer.py ├── make_ffi.py ├── pyproject.toml ├── pyvex ├── __init__.py ├── _register_info.py ├── arches.py ├── block.py ├── const.py ├── const_val.py ├── data_ref.py ├── enums.py ├── errors.py ├── expr.py ├── lifting │ ├── __init__.py │ ├── gym │ │ ├── README.md │ │ ├── __init__.py │ │ ├── aarch64_spotter.py │ │ ├── arm_spotter.py │ │ └── x86_spotter.py │ ├── libvex.py │ ├── lift_function.py │ ├── lifter.py │ ├── post_processor.py │ ├── util │ │ ├── __init__.py │ │ ├── instr_helper.py │ │ ├── lifter_helper.py │ │ ├── syntax_wrapper.py │ │ └── vex_helper.py │ └── zerodivision.py ├── native.py ├── py.typed ├── stmt.py ├── types.py └── utils.py ├── pyvex_c ├── LICENSE ├── Makefile ├── Makefile-msvc ├── README ├── analysis.c ├── e4c_lite.h ├── logging.c ├── logging.h ├── postprocess.c ├── pyvex.c ├── pyvex.def ├── pyvex.h └── pyvex_internal.h ├── setup.py └── tests ├── test_arm_postprocess.py ├── test_gym.py ├── test_irsb_property_caching.py ├── test_lift.py ├── test_mips32_postprocess.py ├── test_pyvex.py ├── test_s390x_exrl.py ├── test_s390x_lochi.py ├── test_s390x_vl.py ├── test_spotter.py └── test_ud2.py /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Black + pre-commit 2 | 23503e79193a3cff5d6f1c92f22349fd2227d936 # Black 3 | cd758543f17a2253b5a0630327eac0ad6780217a # Trailing whitespace, pyupgrade, prefer builtin constructors 4 | dfd137fc8d3073ff065347401f528c1eaf62c383 # ruff 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: Report a bug 2 | description: Report a bug in pyvex 3 | labels: [bug,needs-triage] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thank you for taking the time to submit this bug report! 9 | 10 | Before submitting this bug report, please check the following, which may resolve your issue: 11 | * Have you checked that you are running the latest versions of angr and its components? angr is rapidly-evolving! 12 | * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aopen+is%3Aissue+label%3Abug) to see if this bug has been reported before? 13 | * Have you checked the [documentation](https://docs.angr.io/)? 14 | * Have you checked the [FAQ](https://docs.angr.io/introductory-errata/faq)? 15 | 16 | **Important:** If this bug is a security vulnerability, please submit it privately. See our [security policy](https://github.com/angr/angr/blob/master/SECURITY.md) for more details. 17 | 18 | Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. For more real-time help with angr, from us and the community, join our [Slack](https://angr.io/invite/). 19 | 20 | - type: textarea 21 | attributes: 22 | label: Description 23 | description: Brief description of the bug, with any relevant log messages. 24 | validations: 25 | required: true 26 | 27 | - type: textarea 28 | attributes: 29 | label: Steps to reproduce the bug 30 | description: | 31 | If appropriate, include both a **script to reproduce the bug**, and if possible **attach the binary used**. 32 | 33 | **Tip:** You can attach files to the issue by first clicking on the textarea to select it, then dragging & dropping the file onto the textarea. 34 | - type: textarea 35 | attributes: 36 | label: Environment 37 | description: Many common issues are caused by problems with the local Python environment. Before submitting, double-check that your versions of all modules in the angr suite (angr, cle, pyvex, ...) are up to date and include the output of `python -m angr.misc.bug_report` here. 38 | 39 | - type: textarea 40 | attributes: 41 | label: Additional context 42 | description: Any additional context about the problem. 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Join our Slack community 4 | url: https://angr.io/invite/ 5 | about: For questions and help with angr, you are invited to join the angr Slack community 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Request a feature 2 | description: Request a new feature for pyvex 3 | labels: [enhancement,needs-triage] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thank you for taking the time to submit this feature request! 9 | 10 | Before submitting this feature request, please check the following: 11 | * Have you checked that you are running the latest versions of angr and its components? angr is rapidly-evolving! 12 | * Have you checked the [documentation](https://docs.angr.io/) to see if this feature exists already? 13 | * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aissue+label%3Aenhancement+) to see if this feature has been requested before? 14 | 15 | Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. For more real-time help with angr, from us and the community, join our [Slack](https://angr.io/invite/). 16 | 17 | - type: textarea 18 | attributes: 19 | label: Description 20 | description: | 21 | Brief description of the desired feature. If the feature is intended to solve some problem, please clearly describe the problem, including any relevant binaries, etc. 22 | 23 | **Tip:** You can attach files to the issue by first clicking on the textarea to select it, then dragging & dropping the file onto the textarea. 24 | validations: 25 | required: true 26 | 27 | - type: textarea 28 | attributes: 29 | label: Alternatives 30 | description: Possible alternative solutions or features that you have considered. 31 | 32 | - type: textarea 33 | attributes: 34 | label: Additional context 35 | description: Any other context or screenshots about the feature request. 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | name: Ask a question 2 | description: Ask a question about pyvex 3 | labels: [question,needs-triage] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | If you have a question about pyvex, that is not a bug report or a feature request, you can ask it here. For more real-time help with pyvex, from us and the community, join our [Slack](https://angr.io/invite/). 9 | 10 | Before submitting this question, please check the following, which may answer your question: 11 | * Have you checked the [documentation](https://docs.angr.io/)? 12 | * Have you checked the [FAQ](https://docs.angr.io/introductory-errata/faq)? 13 | * Have you checked our library of [examples](https://github.com/angr/angr-doc/tree/master/examples)? 14 | * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aissue+label%3Aquestion) to see if this question has been answered before? 15 | * Have you checked that you are running the latest versions of angr and its components. angr is rapidly-evolving! 16 | 17 | Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. 18 | 19 | - type: textarea 20 | attributes: 21 | label: Question 22 | description: 23 | validations: 24 | required: true 25 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | ci: 12 | uses: angr/ci-settings/.github/workflows/angr-ci.yml@master 13 | windows: 14 | uses: ./.github/workflows/windows.yml 15 | macos: 16 | uses: ./.github/workflows/macos.yml 17 | 18 | -------------------------------------------------------------------------------- /.github/workflows/cifuzz.yml: -------------------------------------------------------------------------------- 1 | name: OSS-Fuzz 2 | 3 | on: 4 | # push: 5 | # branches: 6 | # - master 7 | # pull_request: 8 | workflow_dispatch: 9 | 10 | permissions: {} 11 | 12 | jobs: 13 | Fuzzing: 14 | runs-on: ubuntu-latest 15 | permissions: 16 | security-events: write 17 | steps: 18 | - name: Build Fuzzers 19 | id: build 20 | uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master 21 | with: 22 | oss-fuzz-project-name: 'pyvex' 23 | language: python 24 | - name: Run Fuzzers 25 | uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master 26 | with: 27 | oss-fuzz-project-name: 'pyvex' 28 | language: python 29 | fuzz-seconds: 600 30 | output-sarif: true 31 | - name: Upload Crash 32 | uses: actions/upload-artifact@v3 33 | if: failure() && steps.build.outcome == 'success' 34 | with: 35 | name: artifacts 36 | path: ./out/artifacts 37 | - name: Upload Sarif 38 | if: always() && steps.build.outcome == 'success' 39 | uses: github/codeql-action/upload-sarif@v2 40 | with: 41 | # Path to SARIF file relative to the root of the repository 42 | sarif_file: cifuzz-sarif/results.sarif 43 | checkout_path: cifuzz-sarif 44 | -------------------------------------------------------------------------------- /.github/workflows/custom.yml: -------------------------------------------------------------------------------- 1 | name: Custom CI 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | image: 7 | description: Container image to run with 8 | type: string 9 | required: true 10 | nightly: 11 | description: Run in nightly mode (include slow tests, no dependent projects) 12 | type: boolean 13 | required: true 14 | afl: 15 | description: Set parameters for AFL 16 | type: boolean 17 | required: true 18 | 19 | 20 | jobs: 21 | ci: 22 | uses: angr/ci-settings/.github/workflows/angr-ci.yml@master 23 | with: 24 | container_image: ${{ inputs.image }} 25 | nightly: ${{ inputs.nightly }} 26 | afl: ${{ inputs.afl }} 27 | 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/macos.yml: -------------------------------------------------------------------------------- 1 | name: Test on macOS 2 | 3 | on: 4 | workflow_dispatch: 5 | workflow_call: 6 | 7 | jobs: 8 | macos: 9 | name: Test macOS 10 | runs-on: macos-15 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | path: pyvex 15 | submodules: true 16 | - uses: actions/checkout@v3 17 | with: 18 | repository: angr/binaries 19 | path: binaries 20 | - uses: actions/setup-python@v4 21 | with: 22 | python-version: "3.10" 23 | - run: python -m venv $HOME/venv 24 | name: Create venv 25 | shell: bash 26 | - run: | 27 | source $HOME/venv/bin/activate 28 | pip install git+https://github.com/angr/archinfo.git 29 | name: Install dependencies 30 | - run: | 31 | source $HOME/venv/bin/activate 32 | pip install ./pyvex[testing] 33 | name: Install 34 | - run: | 35 | source $HOME/venv/bin/activate 36 | pytest -n auto pyvex 37 | name: Run pytest 38 | -------------------------------------------------------------------------------- /.github/workflows/nightly-ci.yml: -------------------------------------------------------------------------------- 1 | name: Nightly CI 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" 6 | workflow_dispatch: 7 | 8 | jobs: 9 | ci: 10 | uses: angr/ci-settings/.github/workflows/angr-ci.yml@master 11 | with: 12 | nightly: true 13 | secrets: inherit 14 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Test on Windows 2 | 3 | on: 4 | workflow_dispatch: 5 | workflow_call: 6 | 7 | jobs: 8 | windows: 9 | name: Test Windows 10 | runs-on: windows-2022 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | path: pyvex 15 | submodules: true 16 | - uses: actions/checkout@v3 17 | with: 18 | repository: angr/binaries 19 | path: binaries 20 | - uses: actions/setup-python@v4 21 | with: 22 | python-version: "3.10" 23 | - run: python -m venv $HOME/venv 24 | name: Create venv 25 | shell: bash 26 | - run: | 27 | call %USERPROFILE%\venv\Scripts\activate 28 | pip install git+https://github.com/angr/archinfo.git 29 | name: Install dependencies 30 | shell: cmd 31 | - run: | 32 | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" 33 | call %USERPROFILE%\venv\Scripts\activate 34 | pip install ./pyvex[testing] 35 | name: Install 36 | shell: cmd 37 | - run: | 38 | call %USERPROFILE%\venv\Scripts\activate 39 | pytest -n auto pyvex 40 | name: Run pytest 41 | shell: cmd 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | MANIFEST 4 | pyvex_python 5 | vex_ffi.py 6 | libpyvex.so 7 | *.egg-info 8 | *.eggs 9 | *.pyc 10 | *.swp 11 | *.obj 12 | *.lib 13 | *.dll 14 | *.exp 15 | *.o 16 | *.a 17 | *.dylib 18 | pyvex/lib 19 | pyvex/include 20 | vex-master 21 | vex-master.tar.gz 22 | docs/_build 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vex"] 2 | path = vex 3 | url = https://github.com/angr/vex.git 4 | branch = master 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | 3 | # 4 | # Fail fast 5 | # 6 | 7 | - repo: https://github.com/abravalheri/validate-pyproject 8 | rev: v0.24.1 9 | hooks: 10 | - id: validate-pyproject 11 | fail_fast: true 12 | 13 | - repo: https://github.com/pre-commit/pre-commit-hooks 14 | rev: v5.0.0 15 | hooks: 16 | # General 17 | - id: check-merge-conflict 18 | fail_fast: true 19 | - id: check-case-conflict 20 | fail_fast: true 21 | - id: destroyed-symlinks 22 | fail_fast: true 23 | - id: check-symlinks 24 | fail_fast: true 25 | - id: check-added-large-files 26 | fail_fast: true 27 | # Syntax 28 | - id: check-toml 29 | fail_fast: true 30 | - id: check-json 31 | fail_fast: true 32 | - id: check-yaml 33 | fail_fast: true 34 | 35 | - repo: https://github.com/pre-commit/pre-commit-hooks 36 | rev: v5.0.0 37 | hooks: 38 | - id: check-ast 39 | fail_fast: true 40 | 41 | # 42 | # Modifiers 43 | # 44 | 45 | - repo: https://github.com/pre-commit/pre-commit-hooks 46 | rev: v5.0.0 47 | hooks: 48 | - id: mixed-line-ending 49 | - id: trailing-whitespace 50 | 51 | - repo: https://github.com/dannysepler/rm_unneeded_f_str 52 | rev: v0.2.0 53 | hooks: 54 | - id: rm-unneeded-f-str 55 | 56 | - repo: https://github.com/asottile/pyupgrade 57 | rev: v3.20.0 58 | hooks: 59 | - id: pyupgrade 60 | args: [--py310-plus] 61 | 62 | - repo: https://github.com/astral-sh/ruff-pre-commit 63 | rev: v0.11.13 64 | hooks: 65 | - id: ruff 66 | args: [--fix, --exit-non-zero-on-fix] 67 | 68 | # Last modifier: Coding Standard 69 | - repo: https://github.com/psf/black 70 | rev: 25.1.0 71 | hooks: 72 | - id: black 73 | 74 | # 75 | # Static Checks 76 | # 77 | 78 | - repo: https://github.com/pre-commit/pygrep-hooks 79 | rev: v1.10.0 80 | hooks: 81 | # Python 82 | - id: python-use-type-annotations 83 | - id: python-no-log-warn 84 | # Documentation 85 | - id: rst-backticks 86 | - id: rst-directive-colons 87 | - id: rst-inline-touching-normal 88 | 89 | - repo: https://github.com/pre-commit/pre-commit-hooks 90 | rev: v5.0.0 91 | hooks: 92 | - id: debug-statements 93 | - id: check-builtin-literals 94 | - id: check-docstring-first 95 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | sphinx: 7 | configuration: docs/conf.py 8 | 9 | submodules: 10 | include: all 11 | 12 | build: 13 | os: ubuntu-22.04 14 | tools: 15 | python: "3.10" 16 | jobs: 17 | pre_install: 18 | - pip install -U pip 19 | - pip install git+https://github.com/angr/archinfo.git 20 | 21 | python: 22 | install: 23 | - method: pip 24 | path: . 25 | extra_requirements: 26 | - docs 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, The Regents of the University of California 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include make_ffi.py 4 | recursive-include pyvex_c *.c *.h *.def Makefile Makefile-msvc LICENSE 5 | recursive-include vex * 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyVEX 2 | [![Latest Release](https://img.shields.io/pypi/v/pyvex.svg)](https://pypi.python.org/pypi/pyvex/) 3 | [![Python Version](https://img.shields.io/pypi/pyversions/pyvex)](https://pypi.python.org/pypi/pyvex/) 4 | [![PyPI Statistics](https://img.shields.io/pypi/dm/pyvex.svg)](https://pypistats.org/packages/pyvex) 5 | [![License](https://img.shields.io/github/license/angr/pyvex.svg)](https://github.com/angr/pyvex/blob/master/LICENSE) 6 | 7 | PyVEX is Python bindings for the VEX IR. 8 | 9 | ## Project Links 10 | Project repository: https://github.com/angr/pyvex 11 | 12 | Documentation: https://api.angr.io/projects/pyvex/en/latest/ 13 | 14 | ## Installing PyVEX 15 | 16 | PyVEX can be pip-installed: 17 | 18 | ```bash 19 | pip install pyvex 20 | ``` 21 | 22 | ## Using PyVEX 23 | 24 | ```python 25 | import pyvex 26 | import archinfo 27 | 28 | # translate an AMD64 basic block (of nops) at 0x400400 into VEX 29 | irsb = pyvex.lift(b"\x90\x90\x90\x90\x90", 0x400400, archinfo.ArchAMD64()) 30 | 31 | # pretty-print the basic block 32 | irsb.pp() 33 | 34 | # this is the IR Expression of the jump target of the unconditional exit at the end of the basic block 35 | print(irsb.next) 36 | 37 | # this is the type of the unconditional exit (i.e., a call, ret, syscall, etc) 38 | print(irsb.jumpkind) 39 | 40 | # you can also pretty-print it 41 | irsb.next.pp() 42 | 43 | # iterate through each statement and print all the statements 44 | for stmt in irsb.statements: 45 | stmt.pp() 46 | 47 | # pretty-print the IR expression representing the data, and the *type* of that IR expression written by every store statement 48 | import pyvex 49 | for stmt in irsb.statements: 50 | if isinstance(stmt, pyvex.IRStmt.Store): 51 | print("Data:", end="") 52 | stmt.data.pp() 53 | print("") 54 | 55 | print("Type:", end="") 56 | print(stmt.data.result_type) 57 | print("") 58 | 59 | # pretty-print the condition and jump target of every conditional exit from the basic block 60 | for stmt in irsb.statements: 61 | if isinstance(stmt, pyvex.IRStmt.Exit): 62 | print("Condition:", end="") 63 | stmt.guard.pp() 64 | print("") 65 | 66 | print("Target:", end="") 67 | stmt.dst.pp() 68 | print("") 69 | 70 | # these are the types of every temp in the IRSB 71 | print(irsb.tyenv.types) 72 | 73 | # here is one way to get the type of temp 0 74 | print(irsb.tyenv.types[0]) 75 | ``` 76 | 77 | Keep in mind that this is a *syntactic* respresentation of a basic block. That is, it'll tell you what the block means, but you don't have any context to say, for example, what *actual* data is written by a store instruction. 78 | 79 | ## VEX Intermediate Representation 80 | 81 | To deal with widely diverse architectures, it is useful to carry out analyses on an intermediate representation. 82 | An IR abstracts away several architecture differences when dealing with different architectures, allowing a single analysis to be run on all of them: 83 | 84 | - **Register names.** The quantity and names of registers differ between architectures, but modern CPU designs hold to a common theme: each CPU contains several general purpose registers, a register to hold the stack pointer, a set of registers to store condition flags, and so forth. The IR provides a consistent, abstracted interface to registers on different platforms. Specifically, VEX models the registers as a separate memory space, with integer offsets (i.e., AMD64's `rax` is stored starting at address 16 in this memory space). 85 | - **Memory access.** Different architectures access memory in different ways. For example, ARM can access memory in both little-endian and big-endian modes. The IR must abstracts away these differences. 86 | - **Memory segmentation.** Some architectures, such as x86, support memory segmentation through the use of special segment registers. The IR understands such memory access mechanisms. 87 | - **Instruction side-effects.** Most instructions have side-effects. For example, most operations in Thumb mode on ARM update the condition flags, and stack push/pop instructions update the stack pointer. Tracking these side-effects in an *ad hoc* manner in the analysis would be crazy, so the IR makes these effects explicit. 88 | 89 | There are lots of choices for an IR. We use VEX, since the uplifting of binary code into VEX is quite well supported. 90 | VEX is an architecture-agnostic, side-effects-free representation of a number of target machine languages. 91 | It abstracts machine code into a representation designed to make program analysis easier. 92 | This representation has five main classes of objects: 93 | 94 | - **Expressions.** IR Expressions represent a calculated or constant value. This includes memory loads, register reads, and results of arithmetic operations. 95 | - **Operations.** IR Operations describe a *modification* of IR Expressions. This includes integer arithmetic, floating-point arithmetic, bit operations, and so forth. An IR Operation applied to IR Expressions yields an IR Expression as a result. 96 | - **Temporary variables.** VEX uses temporary variables as internal registers: IR Expressions are stored in temporary variables between use. The content of a temporary variable can be retrieved using an IR Expression. These temporaries are numbered, starting at `t0`. These temporaries are strongly typed (i.e., "64-bit integer" or "32-bit float"). 97 | - **Statements.** IR Statements model changes in the state of the target machine, such as the effect of memory stores and register writes. IR Statements use IR Expressions for values they may need. For example, a memory store *IR Statement* uses an *IR Expression* for the target address of the write, and another *IR Expression* for the content. 98 | - **Blocks.** An IR Block is a collection of IR Statements, representing an extended basic block (termed "IR Super Block" or "IRSB") in the target architecture. A block can have several exits. For conditional exits from the middle of a basic block, a special *Exit* IR Statement is used. An IR Expression is used to represent the target of the unconditional exit at the end of the block. 99 | 100 | VEX IR is actually quite well documented in the `libvex_ir.h` file (https://github.com/angr/vex/blob/dev/pub/libvex_ir.h) in the VEX repository. For the lazy, we'll detail some parts of VEX that you'll likely interact with fairly frequently. To begin with, here are some IR Expressions: 101 | 102 | | IR Expression | Evaluated Value | VEX Output Example | 103 | | ------------- | --------------- | ------- | 104 | | Constant | A constant value. | 0x4:I32 | 105 | | Read Temp | The value stored in a VEX temporary variable. | RdTmp(t10) | 106 | | Get Register | The value stored in a register. | GET:I32(16) | 107 | | Load Memory | The value stored at a memory address, with the address specified by another IR Expression. | LDle:I32 / LDbe:I64 | 108 | | Operation | A result of a specified IR Operation, applied to specified IR Expression arguments. | Add32 | 109 | | If-Then-Else | If a given IR Expression evaluates to 0, return one IR Expression. Otherwise, return another. | ITE | 110 | | Helper Function | VEX uses C helper functions for certain operations, such as computing the conditional flags registers of certain architectures. These functions return IR Expressions. | function\_name() | 111 | 112 | These expressions are then, in turn, used in IR Statements. Here are some common ones: 113 | 114 | | IR Statement | Meaning | VEX Output Example | 115 | | ------------ | ------- | ------------------ | 116 | Write Temp | Set a VEX temporary variable to the value of the given IR Expression. | WrTmp(t1) = (IR Expression) | 117 | Put Register | Update a register with the value of the given IR Expression. | PUT(16) = (IR Expression) | 118 | Store Memory | Update a location in memory, given as an IR Expression, with a value, also given as an IR Expression. | STle(0x1000) = (IR Expression) | 119 | Exit | A conditional exit from a basic block, with the jump target specified by an IR Expression. The condition is specified by an IR Expression. | if (condition) goto (Boring) 0x4000A00:I32 | 120 | 121 | An example of an IR translation, on ARM, is produced below. In the example, the subtraction operation is translated into a single IR block comprising 5 IR Statements, each of which contains at least one IR Expression (although, in real life, an IR block would typically consist of more than one instruction). Register names are translated into numerical indices given to the *GET* Expression and *PUT* Statement. 122 | The astute reader will observe that the actual subtraction is modeled by the first 4 IR Statements of the block, and the incrementing of the program counter to point to the next instruction (which, in this case, is located at `0x59FC8`) is modeled by the last statement. 123 | 124 | The following ARM instruction: 125 | 126 | subs R2, R2, #8 127 | 128 | Becomes this VEX IR: 129 | 130 | t0 = GET:I32(16) 131 | t1 = 0x8:I32 132 | t3 = Sub32(t0,t1) 133 | PUT(16) = t3 134 | PUT(68) = 0x59FC8:I32 135 | 136 | Cool stuff! 137 | 138 | ## Citing PyVEX 139 | 140 | If you use PyVEX in an academic work, please cite the paper for which it was developed: 141 | 142 | ```bibtex 143 | @article{shoshitaishvili2015firmalice, 144 | title={Firmalice - Automatic Detection of Authentication Bypass Vulnerabilities in Binary Firmware}, 145 | author={Shoshitaishvili, Yan and Wang, Ruoyu and Hauser, Christophe and Kruegel, Christopher and Vigna, Giovanni}, 146 | booktitle={NDSS}, 147 | year={2015} 148 | } 149 | ``` 150 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | :mod:`pyvex` --- Binary Translator 2 | ================================== 3 | 4 | .. automodule:: pyvex 5 | .. automodule:: pyvex.native 6 | 7 | 8 | Translation Interface 9 | --------------------- 10 | 11 | .. automodule:: pyvex.block 12 | 13 | 14 | IR Components 15 | ------------- 16 | 17 | .. automodule:: pyvex.stmt 18 | .. automodule:: pyvex.expr 19 | .. automodule:: pyvex.const 20 | .. automodule:: pyvex.enums 21 | 22 | Lifting System 23 | -------------- 24 | 25 | .. automodule:: pyvex.data_ref 26 | .. automodule:: pyvex.lifting 27 | .. automodule:: pyvex.lifting.lift_function 28 | .. automodule:: pyvex.lifting.libvex 29 | .. automodule:: pyvex.lifting.lifter 30 | .. automodule:: pyvex.lifting.post_processor 31 | .. automodule:: pyvex.lifting.util.irsb_postprocess 32 | .. automodule:: pyvex.lifting.util 33 | .. automodule:: pyvex.lifting.util.syntax_wrapper 34 | .. automodule:: pyvex.lifting.util.vex_helper 35 | .. automodule:: pyvex.lifting.util.lifter_helper 36 | .. automodule:: pyvex.lifting.util.instr_helper 37 | 38 | Builtin IR Processors 39 | --------------------- 40 | 41 | .. automodule:: pyvex.lifting.zerodivision 42 | 43 | Errors 44 | ------ 45 | 46 | .. automodule:: pyvex.errors 47 | 48 | Utilities 49 | --------- 50 | 51 | .. automodule:: pyvex.utils 52 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | import datetime 7 | 8 | # -- Project information ----------------------------------------------------- 9 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 10 | 11 | project = "pyvex" 12 | project_copyright = f"{datetime.datetime.now().year}, The angr Project contributors" 13 | author = "The angr Project" 14 | 15 | # -- General configuration --------------------------------------------------- 16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 17 | 18 | extensions = [ 19 | "sphinx.ext.autodoc", 20 | "sphinx.ext.autosummary", 21 | "sphinx.ext.coverage", 22 | "sphinx.ext.napoleon", 23 | "sphinx.ext.todo", 24 | "sphinx.ext.viewcode", 25 | "sphinx_autodoc_typehints", 26 | "myst_parser", 27 | ] 28 | 29 | templates_path = ["_templates"] 30 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 31 | 32 | # -- Options for autodoc ----------------------------------------------------- 33 | # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration 34 | autoclass_content = "class" 35 | autodoc_default_options = { 36 | "members": True, 37 | "member-order": "bysource", 38 | "show-inheritance": True, 39 | "special-members": "__init__", 40 | "undoc-members": True, 41 | } 42 | autodoc_inherit_docstrings = True 43 | autodoc_typehints = "both" 44 | 45 | # -- Options for coverage ---------------------------------------------------- 46 | # https://www.sphinx-doc.org/en/master/usage/extensions/coverage.html 47 | coverage_write_headline = False 48 | 49 | 50 | # -- Options for HTML output ------------------------------------------------- 51 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 52 | 53 | html_theme = "furo" 54 | html_static_path = ["_static"] 55 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to pyVEX's documentation! 2 | ================================= 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | :caption: Contents: 8 | 9 | Quickstart 10 | API 11 | 12 | 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | * :ref:`search` 20 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /fuzzing/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | # Copyright 2023 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | ################################################################################ 17 | 18 | # Since pyvex requires a specific developer build of archinfo, install it from source 19 | cd "$SRC"/archinfo 20 | python3 -m pip install . 21 | 22 | cd "$SRC"/pyvex 23 | python3 -m pip install .[testing] 24 | 25 | # Generate a simple binary for the corpus 26 | echo -ne "start:\n\txor %edi, %edi\nmov \$60, %eax\nsyscall" > /tmp/corpus.s 27 | clang -Os -s /tmp/corpus.s -nostdlib -nostartfiles -m32 -o corpus 28 | zip -r "$OUT"/irsb_fuzzer_seed_corpus.zip corpus 29 | 30 | # Build fuzzers in $OUT 31 | for fuzzer in $(find $SRC -name '*_fuzzer.py'); do 32 | compile_python_fuzzer "$fuzzer" --add-binary="pyvex/lib/libpyvex.so:pyvex/lib" 33 | done 34 | -------------------------------------------------------------------------------- /fuzzing/enhanced_fdp.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | ################################################################################ 16 | """ 17 | Defines the EnhancedFuzzedDataProvider 18 | """ 19 | from atheris import FuzzedDataProvider 20 | 21 | 22 | class EnhancedFuzzedDataProvider(FuzzedDataProvider): 23 | """ 24 | Extends the functionality of FuzzedDataProvider 25 | """ 26 | 27 | def _consume_random_count(self) -> int: 28 | """ 29 | :return: A count of bytes that is strictly in range 0<=n<=remaining_bytes 30 | """ 31 | return self.ConsumeIntInRange(0, self.remaining_bytes()) 32 | 33 | def ConsumeRandomBytes(self) -> bytes: 34 | """ 35 | Consume a 'random' count of the remaining bytes 36 | :return: 0<=n<=remaining_bytes bytes 37 | """ 38 | return self.ConsumeBytes(self._consume_random_count()) 39 | 40 | def ConsumeRemainingBytes(self) -> bytes: 41 | """ 42 | :return: The remaining buffer 43 | """ 44 | return self.ConsumeBytes(self.remaining_bytes()) 45 | 46 | def ConsumeRandomString(self) -> str: 47 | """ 48 | Consume a 'random' length string, excluding surrogates 49 | :return: The string 50 | """ 51 | return self.ConsumeUnicodeNoSurrogates(self._consume_random_count()) 52 | 53 | def ConsumeRemainingString(self) -> str: 54 | """ 55 | :return: The remaining buffer, as a string without surrogates 56 | """ 57 | return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes()) 58 | 59 | def PickValueInEnum(self, enum): 60 | return self.PickValueInList([e.value for e in enum]) 61 | -------------------------------------------------------------------------------- /fuzzing/irsb_fuzzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # Copyright 2023 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | ################################################################################ 17 | import re 18 | import sys 19 | from contextlib import contextmanager 20 | from enum import IntEnum 21 | from io import StringIO 22 | 23 | import atheris 24 | 25 | with atheris.instrument_imports(include=["pyvex"]): 26 | import pyvex 27 | 28 | # Additional imports 29 | from enhanced_fdp import EnhancedFuzzedDataProvider 30 | 31 | register_error_msg = re.compile("Register .*? does not exist!") 32 | 33 | 34 | @contextmanager 35 | def nostdout(): 36 | saved_stdout = sys.stdout 37 | saved_stderr = sys.stderr 38 | sys.stdout = StringIO() 39 | sys.stderr = StringIO() 40 | yield 41 | sys.stdout = saved_stdout 42 | sys.stderr = saved_stderr 43 | 44 | 45 | # Save all available architectures off 46 | available_archs = [ 47 | pyvex.ARCH_X86, 48 | pyvex.ARCH_AMD64, 49 | pyvex.ARCH_ARM_LE, 50 | pyvex.ARCH_ARM_BE, 51 | pyvex.ARCH_ARM64_LE, 52 | pyvex.ARCH_ARM64_BE, 53 | pyvex.ARCH_PPC32, 54 | pyvex.ARCH_PPC64_BE, 55 | pyvex.ARCH_PPC64_LE, 56 | pyvex.ARCH_S390X, 57 | pyvex.ARCH_MIPS32_BE, 58 | pyvex.ARCH_MIPS32_LE, 59 | pyvex.ARCH_MIPS64_BE, 60 | pyvex.ARCH_MIPS64_LE, 61 | ] 62 | 63 | 64 | class SupportedOptLevels(IntEnum): 65 | """ 66 | Enumerates the supported optimization levels within pyvex, as derived from the documentation 67 | """ 68 | 69 | StrictUnopt = -1 70 | Unopt = 0 71 | Opt = 1 72 | StrictOpt = 2 73 | 74 | 75 | def consume_random_arch(fdp: atheris.FuzzedDataProvider) -> pyvex.arches.PyvexArch: 76 | return fdp.PickValueInList(available_archs) 77 | 78 | 79 | def TestOneInput(data: bytes): 80 | fdp = EnhancedFuzzedDataProvider(data) 81 | 82 | arch = consume_random_arch(fdp) 83 | 84 | try: 85 | with nostdout(): 86 | data = fdp.ConsumeRandomBytes() 87 | max_bytes = fdp.ConsumeIntInRange(0, len(data)) 88 | irsb = pyvex.lift( 89 | data, 90 | fdp.ConsumeInt(arch.bits), 91 | arch, 92 | max_bytes=fdp.ConsumeIntInRange(0, len(data)), 93 | max_inst=fdp.ConsumeInt(16), 94 | bytes_offset=fdp.ConsumeIntInRange(0, max_bytes), 95 | opt_level=fdp.PickValueInEnum(SupportedOptLevels), 96 | ) 97 | irsb.pp() 98 | return 0 99 | except pyvex.PyVEXError: 100 | return -1 101 | except ValueError as e: 102 | if re.match(register_error_msg, str(e)): 103 | return -1 104 | raise e 105 | except OverflowError: 106 | return -1 107 | 108 | 109 | def main(): 110 | atheris.Setup(sys.argv, TestOneInput) 111 | atheris.Fuzz() 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /make_ffi.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import platform 4 | import re 5 | import subprocess 6 | import sys 7 | 8 | import cffi 9 | 10 | log = logging.getLogger("cffier") 11 | log.setLevel(logging.DEBUG) 12 | 13 | 14 | def find_good_scan(questionable): 15 | known_good = [] 16 | 17 | end_line = len(questionable) 18 | 19 | while len(questionable): 20 | ffi = cffi.FFI() 21 | log.debug("scan - trying %d good and %d questionable", len(known_good), len(questionable)) 22 | 23 | candidate = known_good + questionable[:end_line] 24 | failed_line = -1 25 | 26 | try: 27 | ffi.cdef("\n".join(candidate)) 28 | 29 | known_good = candidate 30 | questionable = questionable[end_line:] 31 | end_line = len(questionable) 32 | except AssertionError: 33 | questionable = questionable[1:] 34 | end_line = len(questionable) 35 | except cffi.CDefError as e: 36 | if "" in str(e): 37 | failed_line = int(str(e).split("\n")[-1].split(":")[1]) - 1 38 | elif str(e).count(":") >= 2: 39 | failed_line = int(str(e).split("\n")[1].split(":")[1]) 40 | failed_line_description = str(e).split("\n")[0] 41 | idx1 = failed_line_description.index('"') 42 | idx2 = failed_line_description.rindex('"') 43 | failed_reason = failed_line_description[idx1 + 1 : idx2] 44 | 45 | for i in range(failed_line, -1, -1): 46 | if failed_reason in candidate[i]: 47 | failed_line = i 48 | elif "unrecognized construct" in str(e): 49 | failed_line = int(str(e).split()[1][:-1]) - 1 50 | elif "end of input" in str(e): 51 | end_line -= 1 52 | else: 53 | raise Exception("Unknown error") 54 | except cffi.FFIError as e: 55 | if str(e).count(":") >= 2: 56 | failed_line = int(str(e).split("\n")[0].split(":")[1]) - 1 57 | else: 58 | raise Exception("Unknown error") 59 | 60 | if failed_line != -1: 61 | end_line = failed_line - len(known_good) 62 | 63 | if end_line == 0: 64 | questionable = questionable[1:] 65 | end_line = len(questionable) 66 | return known_good 67 | 68 | 69 | def doit(vex_path): 70 | cpplist = ["cl", "cpp"] 71 | cpp = os.getenv("CPP") 72 | if cpp: 73 | cpplist.insert(0, cpp) 74 | if platform.system() == "Darwin": 75 | cpplist.insert(0, "clang") 76 | 77 | errs = [] 78 | for cpp in cpplist: 79 | cmd = [cpp, "-I" + vex_path, os.path.join("pyvex_c", "pyvex.h")] 80 | if cpp in ("cl", "clang", "gcc", "cc", "clang++", "g++"): 81 | cmd.append("-E") 82 | try: 83 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 84 | header, stderr = p.communicate() 85 | try: 86 | header = header.decode("utf-8") 87 | stderr = stderr.decode("utf-8") 88 | except UnicodeDecodeError: 89 | # They don't have to be unicode on Windows 90 | pass 91 | 92 | if not header.strip() or p.returncode != 0: 93 | errs.append((" ".join(cmd), p.returncode, stderr)) 94 | continue 95 | else: 96 | break 97 | except OSError: 98 | errs.append((" ".join(cmd), -1, "does not exist")) 99 | continue 100 | else: 101 | log.warning("failed commands:\n" + "\n".join("{} ({}) -- {}".format(*e) for e in errs)) 102 | raise Exception( 103 | "Couldn't process pyvex headers." 104 | + 'Please set CPP environmental variable to local path of "cpp".' 105 | + 'Note that "cpp" and "g++" are different.' 106 | ) 107 | # header = vex_pp + pyvex_pp 108 | 109 | linesep = "\r\n" if "\r\n" in header else "\n" 110 | ffi_text = linesep.join( 111 | line 112 | for line in header.split(linesep) 113 | if "#" not in line and line.strip() != "" and "jmp_buf" not in line and not ("=" in line and ";" in line) 114 | ) 115 | ffi_text = re.sub(r"\{\s*\} NoOp;", "{ int DONOTUSE; } NoOp;", ffi_text) 116 | ffi_text = re.sub(r"__attribute__\s*\(.*\)", "", ffi_text) 117 | ffi_text = re.sub(r"__declspec\s*\([^\)]*\)", "", ffi_text) 118 | ffi_text = ffi_text.replace("__const", "const") 119 | ffi_text = ffi_text.replace("__inline", "") 120 | ffi_text = ffi_text.replace("__w64", "") 121 | ffi_text = ffi_text.replace("__cdecl", "") 122 | ffi_text = ffi_text.replace("__int64", "long") 123 | ffi_lines = ffi_text.split(linesep) 124 | 125 | good = find_good_scan(ffi_lines) 126 | good += ["extern VexControl vex_control;"] 127 | 128 | with open("pyvex/vex_ffi.py", "w") as fp: 129 | fp.write('ffi_str = """' + "\n".join(good) + '"""\n') 130 | fp.write("guest_offsets = " + repr(get_guest_offsets(vex_path)) + "\n") 131 | 132 | 133 | def get_guest_offsets(vex_path): 134 | fname = os.path.join(vex_path, "libvex_guest_offsets.h") 135 | out = {} 136 | with open(fname) as fp: 137 | for line in fp: 138 | if line.startswith("#define"): 139 | _, names, val = line.split() 140 | val = int(val, 0) 141 | assert names.startswith("OFFSET_") 142 | _, arch, reg = names.split("_", 2) 143 | out[(arch, reg.lower())] = val 144 | return out 145 | 146 | 147 | if __name__ == "__main__": 148 | logging.basicConfig(level=logging.DEBUG) 149 | doit(sys.argv[1]) 150 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=59", "wheel", "cffi >= 1.0.3"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "pyvex" 7 | description = "A Python interface to libVEX and VEX IR" 8 | license = { text = "BSD-2-Clause" } 9 | classifiers = [ 10 | "Programming Language :: Python :: 3", 11 | "Programming Language :: Python :: 3 :: Only", 12 | "Programming Language :: Python :: 3.10", 13 | "Programming Language :: Python :: 3.11", 14 | "Programming Language :: Python :: 3.12", 15 | "Programming Language :: Python :: 3.13", 16 | ] 17 | requires-python = ">=3.10" 18 | dependencies = [ 19 | "bitstring", 20 | "cffi>=1.0.3;implementation_name == 'cpython'", 21 | ] 22 | dynamic = ["version"] 23 | 24 | [project.readme] 25 | file = "README.md" 26 | content-type = "text/markdown" 27 | 28 | [project.urls] 29 | Homepage = "https://api.angr.io/projects/pyvex/en/latest/" 30 | Repository = "https://github.com/angr/pyvex" 31 | 32 | [project.optional-dependencies] 33 | docs = [ 34 | "furo", 35 | "myst-parser", 36 | "sphinx", 37 | "sphinx-autodoc-typehints", 38 | ] 39 | fuzzing = [ 40 | "atheris>=2.3.0", 41 | ] 42 | testing = [ 43 | "pytest", 44 | "pytest-xdist", 45 | ] 46 | 47 | [tool.setuptools] 48 | include-package-data = true 49 | license-files = ["LICENSE", "pyvex_c/LICENSE"] 50 | 51 | [tool.setuptools.packages.find] 52 | exclude = ["tests*"] 53 | namespaces = false 54 | 55 | [tool.setuptools.dynamic] 56 | version = { attr = "pyvex.__version__" } 57 | 58 | [tool.setuptools.package-data] 59 | pyvex = ["py.typed", "lib/*", "include/*"] 60 | 61 | [tool.black] 62 | line-length = 120 63 | target-version = ['py310'] 64 | force-exclude = ''' 65 | /( 66 | vex 67 | )/ 68 | ''' 69 | 70 | [tool.ruff] 71 | line-length = 120 72 | 73 | [tool.ruff.lint] 74 | select = [ 75 | "E", 76 | "F", 77 | "I", 78 | "TID", 79 | ] 80 | -------------------------------------------------------------------------------- /pyvex/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyVEX provides an interface that translates binary code into the VEX intermediate representation (IR). 3 | For an introduction to VEX, take a look here: https://docs.angr.io/advanced-topics/ir 4 | """ 5 | 6 | __version__ = "9.2.160.dev0" 7 | 8 | from . import const, expr, stmt 9 | from .arches import ( 10 | ARCH_AMD64, 11 | ARCH_ARM64_BE, 12 | ARCH_ARM64_LE, 13 | ARCH_ARM_BE, 14 | ARCH_ARM_BE_LE, 15 | ARCH_ARM_LE, 16 | ARCH_MIPS32_BE, 17 | ARCH_MIPS32_LE, 18 | ARCH_MIPS64_BE, 19 | ARCH_MIPS64_LE, 20 | ARCH_PPC32, 21 | ARCH_PPC64_BE, 22 | ARCH_PPC64_LE, 23 | ARCH_RISCV64_LE, 24 | ARCH_S390X, 25 | ARCH_X86, 26 | ) 27 | from .block import IRSB, IRTypeEnv 28 | from .const import get_type_size, get_type_spec_size, tag_to_const_class 29 | from .enums import ( 30 | IRCallee, 31 | IRRegArray, 32 | VEXObject, 33 | default_vex_archinfo, 34 | get_enum_from_int, 35 | get_int_from_enum, 36 | irop_enums_to_ints, 37 | vex_endness_from_string, 38 | ) 39 | from .errors import PyVEXError 40 | from .expr import get_op_retty 41 | from .lifting import lift, lifters 42 | from .native import ffi, pvc 43 | 44 | # aliases.... 45 | IRStmt = stmt 46 | IRExpr = expr 47 | IRConst = const 48 | 49 | 50 | __all__ = [ 51 | "const", 52 | "expr", 53 | "stmt", 54 | "IRSB", 55 | "IRTypeEnv", 56 | "get_type_size", 57 | "get_type_spec_size", 58 | "irop_enums_to_ints", 59 | "tag_to_const_class", 60 | "IRCallee", 61 | "IRRegArray", 62 | "VEXObject", 63 | "default_vex_archinfo", 64 | "get_enum_from_int", 65 | "get_int_from_enum", 66 | "vex_endness_from_string", 67 | "PyVEXError", 68 | "get_op_retty", 69 | "lift", 70 | "lifters", 71 | "ffi", 72 | "pvc", 73 | "IRStmt", 74 | "IRExpr", 75 | "IRConst", 76 | "ARCH_X86", 77 | "ARCH_AMD64", 78 | "ARCH_ARM_BE", 79 | "ARCH_ARM_BE_LE", 80 | "ARCH_ARM_LE", 81 | "ARCH_ARM64_LE", 82 | "ARCH_ARM64_BE", 83 | "ARCH_PPC32", 84 | "ARCH_PPC64_BE", 85 | "ARCH_PPC64_LE", 86 | "ARCH_S390X", 87 | "ARCH_MIPS32_BE", 88 | "ARCH_MIPS32_LE", 89 | "ARCH_MIPS64_BE", 90 | "ARCH_MIPS64_LE", 91 | "ARCH_RISCV64_LE", 92 | ] 93 | -------------------------------------------------------------------------------- /pyvex/arches.py: -------------------------------------------------------------------------------- 1 | from ._register_info import REGISTER_OFFSETS 2 | from .enums import default_vex_archinfo, vex_endness_from_string 3 | from .types import Register 4 | from .vex_ffi import guest_offsets 5 | 6 | 7 | class PyvexArch: 8 | """ 9 | An architecture definition for use with pyvex - usable version. 10 | """ 11 | 12 | def __init__(self, name: str, bits: int, memory_endness: str, instruction_endness: str = "Iend_BE"): 13 | self.name = name 14 | self.bits = bits 15 | self.memory_endness = memory_endness 16 | self.instruction_endness = instruction_endness 17 | self.byte_width = 8 18 | self.register_list: list[Register] = [] 19 | self.registers: dict[str, tuple[int, int]] = {} 20 | self.vex_arch = { 21 | "X86": "VexArchX86", 22 | "AMD64": "VexArchAMD64", 23 | "ARM": "VexArchARM", 24 | "ARM64": "VexArchARM64", 25 | "PPC32": "VexArchPPC32", 26 | "PPC64": "VexArchPPC64", 27 | "S390X": "VexArchS390X", 28 | "MIPS32": "VexArchMIPS32", 29 | "MIPS64": "VexArchMIPS64", 30 | "RISCV64": "VexArchRISCV64", 31 | }[name] 32 | self.ip_offset = guest_offsets[ 33 | ( 34 | self.vex_name_small, 35 | { 36 | "X86": "eip", 37 | "AMD64": "rip", 38 | "ARM": "r15t", 39 | "ARM64": "pc", 40 | "PPC32": "cia", 41 | "PPC64": "cia", 42 | "S390X": "ia", 43 | "MIPS32": "pc", 44 | "MIPS64": "pc", 45 | "RISCV64": "pc", 46 | }[name], 47 | ) 48 | ] 49 | self.vex_archinfo = default_vex_archinfo() 50 | if memory_endness == "Iend_BE": 51 | self.vex_archinfo["endness"] = vex_endness_from_string("VexEndnessBE") 52 | 53 | def __repr__(self): 54 | return f"" 55 | 56 | @property 57 | def vex_name_small(self): 58 | return self.vex_arch[7:].lower() 59 | 60 | def translate_register_name(self, offset, size=None): # pylint: disable=unused-argument 61 | for (arch, reg), offset2 in guest_offsets.items(): 62 | if arch == self.vex_name_small and offset2 == offset: 63 | return reg 64 | for (arch, reg), offset2 in REGISTER_OFFSETS.items(): 65 | if arch == self.vex_name_small and offset2 == offset: 66 | return reg 67 | return str(offset) 68 | 69 | def get_register_offset(self, name: str) -> int: 70 | arch_reg_tuple = (self.vex_name_small, name) 71 | if arch_reg_tuple in guest_offsets: 72 | return guest_offsets[arch_reg_tuple] 73 | elif arch_reg_tuple in REGISTER_OFFSETS: 74 | return REGISTER_OFFSETS[arch_reg_tuple] 75 | else: 76 | raise KeyError(f"Unknown register {name} for architecture {self.name}") 77 | 78 | 79 | ARCH_X86 = PyvexArch("X86", 32, "Iend_LE") 80 | ARCH_AMD64 = PyvexArch("AMD64", 64, "Iend_LE") 81 | ARCH_ARM_LE = PyvexArch("ARM", 32, "Iend_LE", instruction_endness="Iend_LE") 82 | ARCH_ARM_BE_LE = PyvexArch("ARM", 32, "Iend_BE", instruction_endness="Iend_LE") 83 | ARCH_ARM_BE = PyvexArch("ARM", 32, "Iend_LE") 84 | ARCH_ARM64_LE = PyvexArch("ARM64", 64, "Iend_LE", instruction_endness="Iend_LE") 85 | ARCH_ARM64_BE = PyvexArch("ARM64", 64, "Iend_BE") 86 | ARCH_PPC32 = PyvexArch("PPC32", 32, "Iend_BE") 87 | ARCH_PPC64_BE = PyvexArch("PPC64", 64, "Iend_BE") 88 | ARCH_PPC64_LE = PyvexArch("PPC64", 64, "Iend_LE") 89 | ARCH_S390X = PyvexArch("S390X", 64, "Iend_BE") 90 | ARCH_MIPS32_BE = PyvexArch("MIPS32", 32, "Iend_BE") 91 | ARCH_MIPS32_LE = PyvexArch("MIPS32", 32, "Iend_LE") 92 | ARCH_MIPS64_BE = PyvexArch("MIPS64", 64, "Iend_BE") 93 | ARCH_MIPS64_LE = PyvexArch("MIPS64", 64, "Iend_LE") 94 | ARCH_RISCV64_LE = PyvexArch("RISCV64", 64, "Iend_LE", instruction_endness="Iend_LE") 95 | -------------------------------------------------------------------------------- /pyvex/const.py: -------------------------------------------------------------------------------- 1 | # pylint:disable=missing-class-docstring,raise-missing-from,not-callable 2 | import re 3 | from abc import ABC 4 | 5 | from .enums import VEXObject, get_enum_from_int 6 | from .errors import PyVEXError 7 | from .native import ffi, pvc 8 | 9 | 10 | # IRConst hierarchy 11 | class IRConst(VEXObject, ABC): 12 | __slots__ = ["_value"] 13 | 14 | type: str 15 | size: int 16 | tag: str 17 | c_constructor = None 18 | _value: int 19 | 20 | def pp(self): 21 | print(str(self)) 22 | 23 | @property 24 | def value(self) -> int: 25 | return self._value 26 | 27 | @staticmethod 28 | def _from_c(c_const): 29 | if c_const[0] == ffi.NULL: 30 | return None 31 | 32 | tag = get_enum_from_int(c_const.tag) 33 | 34 | try: 35 | return tag_to_const_class(tag)._from_c(c_const) 36 | except KeyError: 37 | raise PyVEXError("Unknown/unsupported IRConstTag %s\n" % tag) 38 | 39 | _translate = _from_c 40 | 41 | @classmethod 42 | def _to_c(cls, const): 43 | # libvex throws an exception when constructing a U1 with a value other than 0 or 1 44 | if const.tag == "Ico_U1" and const.value not in (0, 1): 45 | raise PyVEXError("Invalid U1 value: %d" % const.value) 46 | 47 | try: 48 | return cls.c_constructor(const.value) 49 | except KeyError: 50 | raise PyVEXError("Unknown/unsupported IRConstTag %s]n" % const.tag) 51 | 52 | def __eq__(self, other): 53 | if not isinstance(other, type(self)): 54 | return False 55 | return self._value == other._value 56 | 57 | def __hash__(self): 58 | return hash((type(self), self._value)) 59 | 60 | 61 | class U1(IRConst): 62 | __slots__: list[str] = [] 63 | 64 | type = "Ity_I1" 65 | size = 1 66 | tag = "Ico_U1" 67 | op_format = "1" 68 | c_constructor = pvc.IRConst_U1 69 | 70 | def __init__(self, value): 71 | self._value = value 72 | 73 | def __str__(self): 74 | return "%d" % self.value 75 | 76 | @staticmethod 77 | def _from_c(c_const): 78 | return U1(c_const.Ico.U1) 79 | 80 | 81 | class U8(IRConst): 82 | __slots__: list[str] = [] 83 | 84 | type = "Ity_I8" 85 | size = 8 86 | tag = "Ico_U8" 87 | op_format = "8" 88 | c_constructor = pvc.IRConst_U8 89 | 90 | def __init__(self, value): 91 | self._value = value 92 | 93 | def __str__(self): 94 | return "0x%02x" % self.value 95 | 96 | @staticmethod 97 | def _from_c(c_const): 98 | return _U8_POOL[c_const.Ico.U8] 99 | 100 | 101 | _U8_POOL = [U8(i) for i in range(256)] 102 | 103 | 104 | class U16(IRConst): 105 | __slots__: list[str] = [] 106 | 107 | type = "Ity_I16" 108 | size = 16 109 | tag = "Ico_U16" 110 | op_format = "16" 111 | c_constructor = pvc.IRConst_U16 112 | 113 | def __init__(self, value): 114 | self._value = value 115 | 116 | def __str__(self): 117 | return "0x%04x" % self.value 118 | 119 | @staticmethod 120 | def _from_c(c_const): 121 | val = c_const.Ico.U16 122 | if val < 1024: 123 | return _U16_POOL[val] 124 | if val >= 0xFC00: 125 | return _U16_POOL[val - 0xFC00 + 1024] 126 | return U16(val) 127 | 128 | 129 | _U16_POOL = [U16(i) for i in range(1024)] + [U16(i) for i in range(0xFC00, 0xFFFF + 1)] 130 | 131 | 132 | class U32(IRConst): 133 | __slots__: list[str] = [] 134 | 135 | type = "Ity_I32" 136 | size = 32 137 | tag = "Ico_U32" 138 | op_format = "32" 139 | c_constructor = pvc.IRConst_U32 140 | 141 | def __init__(self, value: int): 142 | self._value = value 143 | 144 | def __str__(self): 145 | return "0x%08x" % self.value 146 | 147 | @staticmethod 148 | def _from_c(c_const): 149 | val = c_const.Ico.U32 150 | if val < 1024: 151 | return _U32_POOL[val] 152 | if val >= 0xFFFFFC00: 153 | return _U32_POOL[val - 0xFFFFFC00 + 1024] 154 | return U32(val) 155 | 156 | 157 | _U32_POOL = [U32(i) for i in range(1024)] + [U32(i) for i in range(0xFFFFFC00, 0xFFFFFFFF + 1)] 158 | 159 | 160 | class U64(IRConst): 161 | __slots__: list[str] = [] 162 | 163 | type = "Ity_I64" 164 | size = 64 165 | tag = "Ico_U64" 166 | op_format = "64" 167 | c_constructor = pvc.IRConst_U64 168 | 169 | def __init__(self, value): 170 | self._value = value 171 | 172 | def __str__(self): 173 | return "0x%016x" % self.value 174 | 175 | @staticmethod 176 | def _from_c(c_const): 177 | val = c_const.Ico.U64 178 | if val < 1024: 179 | return _U64_POOL[val] 180 | if val >= 0xFFFFFFFFFFFFFC00: 181 | return _U64_POOL[val - 0xFFFFFFFFFFFFFC00 + 1024] 182 | return U64(val) 183 | 184 | 185 | _U64_POOL = [U64(i) for i in range(1024)] + [U64(i) for i in range(0xFFFFFFFFFFFFFC00, 0xFFFFFFFFFFFFFFFF + 1)] 186 | 187 | # Integer Type Imagination 188 | class_cache = {1: U1, 8: U8, 16: U16, 32: U32, 64: U64} 189 | 190 | 191 | def vex_int_class(size): 192 | try: 193 | return class_cache[size] 194 | except KeyError: 195 | 196 | class VexInt(IRConst): 197 | type = "Ity_I%d" % size 198 | tag = "Ico_U%d" % size 199 | op_format = str(size) 200 | 201 | def __init__(self, value): 202 | IRConst.__init__(self) 203 | self._value = value 204 | 205 | def __str__(self): 206 | return f"(0x{self.value:x} :: {self.type})" 207 | 208 | VexInt.__name__ = "U%d" % size 209 | class_cache[size] = VexInt 210 | return VexInt 211 | 212 | 213 | class F32(IRConst): 214 | __slots__: list[str] = [] 215 | 216 | type = "Ity_F32" 217 | tag = "Ico_F32" 218 | op_format = "F32" 219 | c_constructor = pvc.IRConst_F32 220 | size = 32 221 | 222 | def __init__(self, value): 223 | self._value = value 224 | 225 | def __str__(self): 226 | return "%f" % self.value 227 | 228 | @staticmethod 229 | def _from_c(c_const): 230 | return F32(c_const.Ico.F32) 231 | 232 | 233 | class F32i(IRConst): 234 | __slots__: list[str] = [] 235 | 236 | type = "Ity_F32" 237 | tag = "Ico_F32i" 238 | op_format = "F32" 239 | c_constructor = pvc.IRConst_F32i 240 | size = 32 241 | 242 | def __init__(self, value): 243 | self._value = value 244 | 245 | def __str__(self): 246 | return "%f" % self.value 247 | 248 | @staticmethod 249 | def _from_c(c_const): 250 | return F32i(c_const.Ico.F32) 251 | 252 | 253 | class F64(IRConst): 254 | __slots__: list[str] = [] 255 | 256 | type = "Ity_F64" 257 | tag = "Ico_F64" 258 | op_format = "F64" 259 | c_constructor = pvc.IRConst_F64 260 | size = 64 261 | 262 | def __init__(self, value): 263 | self._value = value 264 | 265 | def __str__(self): 266 | return "%f" % self.value 267 | 268 | @staticmethod 269 | def _from_c(c_const): 270 | return F64(c_const.Ico.F64) 271 | 272 | 273 | class F64i(IRConst): 274 | __slots__: list[str] = [] 275 | 276 | type = "Ity_F64" 277 | tag = "Ico_F64i" 278 | op_format = "F64" 279 | c_constructor = pvc.IRConst_F64i 280 | size = 64 281 | 282 | def __init__(self, value): 283 | self._value = value 284 | 285 | def __str__(self): 286 | return "%f" % self.value 287 | 288 | @staticmethod 289 | def _from_c(c_const): 290 | return F64i(c_const.Ico.F64) 291 | 292 | 293 | class V128(IRConst): 294 | __slots__: list[str] = [] 295 | 296 | type = "Ity_V128" 297 | tag = "Ico_V128" 298 | op_format = "V128" 299 | c_constructor = pvc.IRConst_V128 300 | size = 128 301 | 302 | def __init__(self, value): 303 | self._value = value 304 | 305 | def __str__(self): 306 | return "%x" % self.value 307 | 308 | # vex doesn't store a full 128 bit constant, instead it stores 1 bit per 8 bits of data 309 | # and duplicates each bit 8 times 310 | @staticmethod 311 | def _from_c(c_const): 312 | base_const = c_const.Ico.V128 313 | real_const = 0 314 | for i in range(16): 315 | if (base_const >> i) & 1 == 1: 316 | real_const |= 0xFF << (8 * i) 317 | return V128(real_const) 318 | 319 | 320 | class V256(IRConst): 321 | __slots__: list[str] = [] 322 | 323 | type = "Ity_V256" 324 | tag = "Ico_V256" 325 | op_format = "V256" 326 | c_constructor = pvc.IRConst_V256 327 | size = 256 328 | 329 | def __init__(self, value): 330 | self._value = value 331 | 332 | def __str__(self): 333 | return "%x" % self.value 334 | 335 | # see above 336 | @staticmethod 337 | def _from_c(c_const): 338 | base_const = c_const.Ico.V256 339 | real_const = 0 340 | for i in range(32): 341 | if (base_const >> i) & 1 == 1: 342 | real_const |= 0xFF << (8 * i) 343 | return V256(real_const) 344 | 345 | 346 | predefined_types = [U1, U8, U16, U32, U64, F32, F32i, F64, F64i, V128, V256] 347 | predefined_types_map = {c.type: c for c in predefined_types} 348 | predefined_classes_map = {c.tag: c for c in predefined_types} 349 | 350 | # precompiled regexes 351 | int_ty_re = re.compile(r"Ity_I\d+") 352 | int_tag_re = re.compile(r"Ico_U\d+") 353 | tag_size_re = re.compile(r"Ico_[UFV](?P\d+)i?") 354 | 355 | 356 | def is_int_ty(ty): 357 | m = int_ty_re.match(ty) 358 | return m is not None 359 | 360 | 361 | def is_int_tag(tag): 362 | m = int_tag_re.match(tag) 363 | return m is not None 364 | 365 | 366 | def get_tag_size(tag): 367 | m = tag_size_re.match(tag) 368 | if m is None: 369 | raise ValueError("Tag %s does not have size" % tag) 370 | return int(m.group("size")) 371 | 372 | 373 | type_str_re = re.compile(r"Ity_[IFDV](?P\d+)") 374 | type_tag_str_re = re.compile(r"[IFDV]?(?P\d+)[SU]?") 375 | 376 | 377 | def get_type_size(ty): 378 | """ 379 | Returns the size, in BITS, of a VEX type specifier 380 | e.g., Ity_I16 -> 16 381 | 382 | :param ty: 383 | :return: 384 | """ 385 | m = type_str_re.match(ty) 386 | if m is None: 387 | raise ValueError("Type %s does not have size" % ty) 388 | return int(m.group("size")) 389 | 390 | 391 | def get_type_spec_size(ty): 392 | """ 393 | Get the width of a "type specifier" 394 | like I16U 395 | or F16 396 | or just 16 397 | (Yes, this really just takes the int out. If we must special-case, do it here. 398 | :param tyspec: 399 | :return: 400 | """ 401 | m = type_tag_str_re.match(ty) 402 | if m is None: 403 | raise ValueError("Type specifier %s does not have size" % ty) 404 | return int(m.group("size")) 405 | 406 | 407 | def ty_to_const_class(ty): 408 | try: 409 | return predefined_types_map[ty] 410 | except KeyError: 411 | if is_int_ty(ty): 412 | size = get_type_size(ty) 413 | return vex_int_class(size) 414 | else: 415 | raise ValueError("Type %s does not exist" % ty) 416 | 417 | 418 | def tag_to_const_class(tag): 419 | try: 420 | return predefined_classes_map[tag] 421 | except KeyError: 422 | if is_int_tag(tag): 423 | size = get_tag_size(tag) 424 | return vex_int_class(size) 425 | else: 426 | raise ValueError("Tag %s does not exist" % tag) 427 | -------------------------------------------------------------------------------- /pyvex/const_val.py: -------------------------------------------------------------------------------- 1 | class ConstVal: 2 | """ 3 | A constant value object. Indicates a constant value assignment to a VEX tmp variable. 4 | 5 | :ivar tmp: The tmp variable being assigned to. 6 | :ivar value: The value of the tmp variable. 7 | :ivar stmt_idx: The IRSB statement index containing the data access 8 | """ 9 | 10 | __slots__ = ( 11 | "tmp", 12 | "value", 13 | "stmt_idx", 14 | ) 15 | 16 | def __init__(self, tmp: int, value: int, stmt_idx: int): 17 | self.tmp = tmp 18 | self.value = value 19 | self.stmt_idx = stmt_idx 20 | 21 | def __repr__(self): 22 | return f"" 23 | 24 | @classmethod 25 | def from_c(cls, r): 26 | return cls(r.tmp, r.value, r.stmt_idx) 27 | -------------------------------------------------------------------------------- /pyvex/data_ref.py: -------------------------------------------------------------------------------- 1 | def data_ref_type_str(dref_enum): 2 | """ 3 | Translate an ``enum DataRefTypes`` value into a string representation. 4 | """ 5 | if dref_enum == 0x9000: 6 | return "unknown" 7 | elif dref_enum == 0x9001: 8 | return "integer" 9 | elif dref_enum == 0x9002: 10 | return "fp" 11 | elif dref_enum == 0x9003: 12 | return "integer(store)" 13 | else: 14 | return "INVALID" 15 | 16 | 17 | class DataRef: 18 | """ 19 | A data reference object. Indicates a data access in an IRSB. 20 | 21 | :ivar data_addr: The address of the data being accessed 22 | :ivar data_size: The size of the data being accessed, in bytes 23 | :ivar data_type: The type of the data, a DataRefTypes enum. 24 | :ivar stmt_idx: The IRSB statement index containing the data access 25 | :ivar ins_addr: The address of the instruction performing the data access 26 | """ 27 | 28 | __slots__ = ("data_addr", "data_size", "data_type", "stmt_idx", "ins_addr") 29 | 30 | def __init__(self, data_addr, data_size, data_type, stmt_idx, ins_addr): 31 | self.data_addr = data_addr 32 | self.data_size = data_size 33 | self.data_type = data_type 34 | self.stmt_idx = stmt_idx 35 | self.ins_addr = ins_addr 36 | 37 | @property 38 | def data_type_str(self): 39 | """ 40 | The data ref type as a string, "unknown" "integer" "fp" or "INVALID" 41 | """ 42 | return data_ref_type_str(self.data_type) 43 | 44 | def __repr__(self): 45 | return "" % ( 46 | self.data_addr, 47 | data_ref_type_str(self.data_type), 48 | self.data_size, 49 | self.ins_addr, 50 | self.stmt_idx, 51 | ) 52 | 53 | @classmethod 54 | def from_c(cls, r): 55 | return cls(r.data_addr, r.size, r.data_type, r.stmt_idx, r.ins_addr) 56 | -------------------------------------------------------------------------------- /pyvex/enums.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from .native import ffi, pvc 4 | from .utils import stable_hash 5 | 6 | 7 | class VEXObject: 8 | """ 9 | The base class for Vex types. 10 | """ 11 | 12 | __slots__: list[str] = [] 13 | 14 | def __eq__(self, other): 15 | if not isinstance(other, type(self)): 16 | return False 17 | # compare values in slots 18 | for slot in self.__slots__: 19 | if getattr(self, slot) != getattr(other, slot): 20 | return False 21 | return True 22 | 23 | def __hash__(self): 24 | values = [getattr(self, slot) for slot in self.__slots__] 25 | for i, lst_val in enumerate(values): 26 | if isinstance(lst_val, list): 27 | values[i] = tuple(lst_val) 28 | return stable_hash(tuple([type(self)] + values)) 29 | 30 | 31 | class IRCallee(VEXObject): 32 | """ 33 | Describes a helper function to call. 34 | """ 35 | 36 | __slots__ = ["regparms", "name", "mcx_mask"] 37 | 38 | def __init__(self, regparms, name, mcx_mask): 39 | VEXObject.__init__(self) 40 | self.regparms = regparms 41 | self.name = name 42 | self.mcx_mask = mcx_mask 43 | 44 | def __str__(self): 45 | return str(self.name) 46 | 47 | @staticmethod 48 | def _from_c(c_callee): 49 | return IRCallee( 50 | c_callee.regparms, 51 | ffi.string(c_callee.name).decode(), 52 | # NO. #int(ffi.cast("unsigned long long", c_callee.addr)), 53 | c_callee.mcx_mask, 54 | ) 55 | 56 | @staticmethod 57 | def _to_c(callee): # pylint: disable=unused-argument 58 | raise TypeError( 59 | "This doesn't work! Please invent a way to get the correct address for the named function from pyvex_c." 60 | ) 61 | # c_callee = pvc.mkIRCallee(callee.regparms, 62 | # callee.name.encode(), 63 | # ffi.cast("void *", callee.addr)) 64 | # c_callee.mcx_mask = callee.mcx_mask 65 | # return c_callee 66 | 67 | 68 | class IRRegArray(VEXObject): 69 | """ 70 | A section of the guest state that we want te be able to index at run time, so as to be able to describe indexed or 71 | rotating register files on the guest. 72 | 73 | :ivar int base: The offset into the state that this array starts 74 | :ivar str elemTy: The types of the elements in this array, as VEX enum strings 75 | :ivar int nElems: The number of elements in this array 76 | """ 77 | 78 | __slots__ = ["base", "elemTy", "nElems"] 79 | 80 | def __init__(self, base, elemTy, nElems): 81 | VEXObject.__init__(self) 82 | self.base = base 83 | self.elemTy = elemTy 84 | self.nElems = nElems 85 | 86 | def __str__(self): 87 | return "%s:%sx%d" % (self.base, self.elemTy[4:], self.nElems) 88 | 89 | @staticmethod 90 | def _from_c(c_arr): 91 | return IRRegArray(c_arr.base, ints_to_enums[c_arr.elemTy], c_arr.nElems) 92 | 93 | @staticmethod 94 | def _to_c(arr): 95 | return pvc.mkIRRegArray(arr.base, get_int_from_enum(arr.elemTy), arr.nElems) 96 | 97 | 98 | ints_to_enums: dict[int, str] = {} 99 | enums_to_ints: dict[str, int] = {} 100 | irop_enums_to_ints: dict[str, int] = {} 101 | will_be_overwritten = ["Ircr_GT", "Ircr_LT"] 102 | 103 | 104 | def get_enum_from_int(i): 105 | return ints_to_enums[i] 106 | 107 | 108 | def get_int_from_enum(e): 109 | return enums_to_ints[e] 110 | 111 | 112 | _add_enum_counter = 0 113 | 114 | 115 | def _add_enum(s, i=None): # TODO get rid of this 116 | global _add_enum_counter # pylint: disable=global-statement 117 | if i is None: 118 | while _add_enum_counter in ints_to_enums: 119 | _add_enum_counter += 1 120 | i = _add_enum_counter 121 | _add_enum_counter += 1 # Update for the next iteration 122 | if i in ints_to_enums: 123 | if ints_to_enums[i] not in will_be_overwritten: 124 | raise ValueError("Enum with intkey %d already present" % i) 125 | enums_to_ints[s] = i 126 | ints_to_enums[i] = s 127 | if s.startswith("Iop_"): 128 | irop_enums_to_ints[s] = i 129 | 130 | 131 | for attr in dir(pvc): 132 | if attr[0] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" and hasattr(pvc, attr) and isinstance(getattr(pvc, attr), int): 133 | _add_enum(attr, getattr(pvc, attr)) 134 | 135 | 136 | def vex_endness_from_string(endness_str): 137 | return getattr(pvc, endness_str) 138 | 139 | 140 | def default_vex_archinfo() -> dict[str, Any]: 141 | return { 142 | "hwcaps": 0, 143 | "endness": vex_endness_from_string("VexEndnessLE"), 144 | "ppc_icache_line_szB": 0, 145 | "ppc_dcbz_szB": 0, 146 | "ppc_dcbzl_szB": 0, 147 | "arm64_dMinLine_lg2_szB": 0, 148 | "arm64_iMinLine_lg2_szB": 0, 149 | "hwcache_info": { 150 | "num_levels": 0, 151 | "num_caches": 0, 152 | "caches": None, 153 | "icaches_maintain_coherence": True, 154 | }, 155 | "x86_cr0": 0xFFFFFFFF, 156 | } 157 | -------------------------------------------------------------------------------- /pyvex/errors.py: -------------------------------------------------------------------------------- 1 | class PyVEXError(Exception): 2 | pass 3 | 4 | 5 | class SkipStatementsError(PyVEXError): 6 | pass 7 | 8 | 9 | # 10 | # Exceptions and notifications that post-processors can raise 11 | # 12 | 13 | 14 | class LiftingException(Exception): 15 | pass 16 | 17 | 18 | class NeedStatementsNotification(LiftingException): 19 | """ 20 | A post-processor may raise a NeedStatementsNotification if it needs to work with statements, but the current IRSB 21 | is generated without any statement available (skip_stmts=True). The lifter will re-lift the current block with 22 | skip_stmts=False upon catching a NeedStatementsNotification, and re-run the post-processors. 23 | 24 | It's worth noting that if a post-processor always raises this notification for every basic block without statements, 25 | it will essentially disable the skipping statement optimization, and it is bad for performance (especially for 26 | CFGFast, which heavily relies on this optimization). Post-processor authors are encouraged to at least filter the 27 | IRSBs based on available properties (jumpkind, next, etc.). If a post-processor must work with statements for the 28 | majority of IRSBs, the author should implement it in PyVEX in C for the sake of a better performance. 29 | """ 30 | 31 | pass 32 | -------------------------------------------------------------------------------- /pyvex/lifting/__init__.py: -------------------------------------------------------------------------------- 1 | from .gym import AARCH64Spotter, AMD64Spotter, ARMSpotter, X86Spotter 2 | from .libvex import LIBVEX_SUPPORTED_ARCHES, LibVEXLifter 3 | from .lift_function import lift, lifters, register 4 | from .lifter import Lifter 5 | from .post_processor import Postprocessor 6 | from .zerodivision import ZeroDivisionPostProcessor 7 | 8 | for arch in LIBVEX_SUPPORTED_ARCHES: 9 | register(LibVEXLifter, arch) 10 | register(AARCH64Spotter, "AARCH64") 11 | register(ARMSpotter, "ARM") 12 | register(ARMSpotter, "ARMEL") 13 | register(ARMSpotter, "ARMHF") 14 | register(ARMSpotter, "ARMCortexM") 15 | register(AMD64Spotter, "AMD64") 16 | register(X86Spotter, "X86") 17 | 18 | __all__ = ["Lifter", "Postprocessor", "lift", "register", "lifters", "ZeroDivisionPostProcessor"] 19 | -------------------------------------------------------------------------------- /pyvex/lifting/gym/README.md: -------------------------------------------------------------------------------- 1 | # The Gym 2 | 3 | This is where we're putting non-libvex lifters that we feel should be included with the pyvex distribution. 4 | 5 | These will probably be mostly "spotters", which correct for gaps in libvex's instruction support. 6 | 7 | 8 | -------------------------------------------------------------------------------- /pyvex/lifting/gym/__init__.py: -------------------------------------------------------------------------------- 1 | from .aarch64_spotter import AARCH64Spotter 2 | from .arm_spotter import ARMSpotter 3 | from .x86_spotter import AMD64Spotter, X86Spotter 4 | 5 | __all__ = ("ARMSpotter", "AARCH64Spotter", "X86Spotter", "AMD64Spotter") 6 | -------------------------------------------------------------------------------- /pyvex/lifting/gym/aarch64_spotter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyvex.lifting.util.instr_helper import Instruction 4 | from pyvex.lifting.util.lifter_helper import GymratLifter 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | 9 | class Aarch64Instruction(Instruction): # pylint: disable=abstract-method 10 | # NOTE: WARNING: There is no MRS, MSR, SYSL in VEX's ARM implementation 11 | # You must use straight nasty hacks instead. 12 | pass 13 | 14 | 15 | class Instruction_SYSL(Aarch64Instruction): 16 | name = "SYSL" 17 | bin_format = "1101010100101qqqnnnnmmmmppprrrrr" 18 | 19 | def compute_result(self): # pylint: disable=arguments-differ 20 | log.debug("Ignoring SYSL instruction at %#x.", self.addr) 21 | 22 | 23 | class Instruction_MSR(Aarch64Instruction): 24 | name = "MSR" 25 | bin_format = "11010101000ioqqqnnnnmmmmppprrrrr" 26 | 27 | def compute_result(self): # pylint: disable=arguments-differ 28 | log.debug("Ignoring MSR instruction at %#x.", self.addr) 29 | 30 | 31 | class Instruction_MRS(Aarch64Instruction): 32 | name = "MRS" 33 | bin_format = "110101010011opppnnnnmmmmppprrrrr" 34 | 35 | def compute_result(self): # pylint: disable=arguments-differ 36 | log.debug("Ignoring MRS instruction at %#x.", self.addr) 37 | 38 | 39 | class AARCH64Spotter(GymratLifter): 40 | instrs = [Instruction_MRS, Instruction_MSR, Instruction_SYSL] 41 | -------------------------------------------------------------------------------- /pyvex/lifting/gym/x86_spotter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyvex.lifting.util import GymratLifter, Instruction, JumpKind, Type 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | # pylint: disable=missing-class-docstring 8 | 9 | 10 | class Instruction_SWAPGS(Instruction): 11 | name = "SWAPGS" 12 | bin_format = "000011110000000111111000" # 0f 01 f8 13 | 14 | def compute_result(self, *args): 15 | pass # TODO check for priv mode 16 | 17 | 18 | class Instruction_SYSRET(Instruction): 19 | name = "SYSRET" 20 | bin_format = "010010000000111100000111" # 48 04 07 21 | 22 | def compute_result(self, *args): 23 | result = self.dirty(Type.int_64, "%sg_dirtyhelper_SYSRET" % self.arch.name.lower(), ()) 24 | self.jump(None, result, JumpKind.Ret) 25 | 26 | 27 | class Instruction_IRETQ(Instruction): 28 | name = "IRETQ" 29 | bin_format = "0100100011001111" # 48 cf 30 | 31 | def compute_result(self, *args): 32 | result = self.dirty(Type.int_64, "%sg_dirtyhelper_IRETQ" % self.arch.name.lower(), ()) 33 | self.jump(None, result, JumpKind.Ret) 34 | 35 | 36 | class Instruction_RDMSR(Instruction): 37 | name = "RDMSR" 38 | bin_format = "0000111100110010" # 0f 32 39 | 40 | def compute_result(self, *args): 41 | ecx = self.get("ecx", Type.int_32) 42 | result = self.dirty(Type.int_64, "%sg_dirtyhelper_RDMSR" % self.arch.name.lower(), (ecx,)) 43 | edx = result.narrow_high(Type.int_32) 44 | eax = result.narrow_low(Type.int_32) 45 | if self.arch.bits == 32: 46 | self.put(eax, "eax") 47 | self.put(edx, "edx") 48 | else: 49 | self.put(eax.widen_unsigned(Type.int_64), "rax") 50 | self.put(edx.widen_unsigned(Type.int_64), "rdx") 51 | 52 | 53 | class Instruction_XGETBV(Instruction): 54 | name = "XGETBV" 55 | bin_format = "000011110000000111010000" # 0f 01 d0 56 | 57 | def compute_result(self, *args): 58 | ecx = self.get("ecx", Type.int_32) 59 | result = self.dirty(Type.int_64, "%sg_dirtyhelper_XGETBV" % self.arch.name.lower(), (ecx,)) 60 | edx = result.narrow_high(Type.int_32) 61 | eax = result.narrow_low(Type.int_32) 62 | if self.arch.bits == 32: 63 | self.put(eax, "eax") 64 | self.put(edx, "edx") 65 | else: 66 | self.put(eax.widen_unsigned(Type.int_64), "rax") 67 | self.put(edx.widen_unsigned(Type.int_64), "rdx") 68 | 69 | 70 | class Instruction_AAM(Instruction): 71 | name = "AAM" 72 | bin_format = "11010100iiiiiiii" 73 | 74 | # From https://www.felixcloutier.com/x86/aam 75 | def compute_result(self): # pylint: disable=arguments-differ 76 | base = self.constant(int(self.data["i"], 2), Type.int_8) 77 | temp_al = self.get("al", Type.int_8) 78 | temp_ah = temp_al // base 79 | temp_al = temp_al % base 80 | self.put(temp_ah, "ah") 81 | self.put(temp_al, "al") 82 | log.debug( 83 | "The generalized AAM instruction is not supported by VEX, and is handled specially by pyvex." 84 | " It has no flag handling at present. See pyvex/lifting/gym/x86_spotter.py for details" 85 | ) 86 | 87 | # TODO: Flags 88 | 89 | 90 | class Instruction_AAD(Instruction): 91 | name = "AAD" 92 | bin_format = "11010101iiiiiiii" 93 | 94 | # From https://www.felixcloutier.com/x86/aad 95 | def compute_result(self): # pylint: disable=arguments-differ 96 | base = self.constant(int(self.data["i"], 2), Type.int_8) 97 | temp_al = self.get("al", Type.int_8) 98 | temp_ah = self.get("ah", Type.int_8) 99 | temp_al = (temp_al + (temp_ah * base)) & 0xFF 100 | temp_ah = self.constant(0, Type.int_8) 101 | self.put(temp_ah, "ah") 102 | self.put(temp_al, "al") 103 | log.debug( 104 | "The generalized AAD instruction is not supported by VEX, and is handled specially by pyvex." 105 | " It has no flag handling at present. See pyvex/lifting/gym/x86_spotter.py for details" 106 | ) 107 | 108 | # TODO: Flags 109 | 110 | 111 | class AMD64Spotter(GymratLifter): 112 | instrs = [ 113 | Instruction_RDMSR, 114 | Instruction_XGETBV, 115 | Instruction_AAD, 116 | Instruction_AAM, 117 | Instruction_SWAPGS, 118 | Instruction_IRETQ, 119 | Instruction_SYSRET, 120 | ] 121 | 122 | 123 | class X86Spotter(GymratLifter): 124 | instrs = [ 125 | Instruction_RDMSR, 126 | Instruction_XGETBV, 127 | Instruction_AAD, 128 | Instruction_AAM, 129 | ] 130 | -------------------------------------------------------------------------------- /pyvex/lifting/libvex.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | from typing import TYPE_CHECKING 4 | 5 | from pyvex.errors import LiftingException 6 | from pyvex.native import ffi, pvc 7 | from pyvex.types import CLiftSource, LibvexArch 8 | 9 | from .lift_function import Lifter 10 | 11 | log = logging.getLogger("pyvex.lifting.libvex") 12 | 13 | _libvex_lock = threading.Lock() 14 | 15 | LIBVEX_SUPPORTED_ARCHES = { 16 | "X86", 17 | "AMD64", 18 | "MIPS32", 19 | "MIPS64", 20 | "ARM", 21 | "ARMEL", 22 | "ARMHF", 23 | "ARMCortexM", 24 | "AARCH64", 25 | "PPC32", 26 | "PPC64", 27 | "S390X", 28 | "RISCV64", 29 | } 30 | 31 | VEX_MAX_INSTRUCTIONS = 99 32 | VEX_MAX_BYTES = 5000 33 | 34 | 35 | class VexRegisterUpdates: 36 | VexRegUpd_INVALID = 0x700 37 | VexRegUpdSpAtMemAccess = 0x701 38 | VexRegUpdUnwindregsAtMemAccess = 0x702 39 | VexRegUpdAllregsAtMemAccess = 0x703 40 | VexRegUpdAllregsAtEachInsn = 0x704 41 | VexRegUpdLdAllregsAtEachInsn = 0x705 42 | 43 | 44 | class LibVEXLifter(Lifter): 45 | __slots__ = () 46 | 47 | REQUIRE_DATA_C = True 48 | 49 | @staticmethod 50 | def get_vex_log(): 51 | return bytes(ffi.buffer(pvc.msg_buffer, pvc.msg_current_size)).decode() if pvc.msg_buffer != ffi.NULL else None 52 | 53 | def _lift(self): 54 | if TYPE_CHECKING: 55 | assert isinstance(self.irsb.arch, LibvexArch) 56 | assert isinstance(self.data, CLiftSource) 57 | try: 58 | _libvex_lock.acquire() 59 | 60 | pvc.log_level = log.getEffectiveLevel() 61 | vex_arch = getattr(pvc, self.irsb.arch.vex_arch, None) 62 | assert vex_arch is not None 63 | 64 | if self.bytes_offset is None: 65 | self.bytes_offset = 0 66 | 67 | if self.max_bytes is None or self.max_bytes > VEX_MAX_BYTES: 68 | max_bytes = VEX_MAX_BYTES 69 | else: 70 | max_bytes = self.max_bytes 71 | 72 | if self.max_inst is None or self.max_inst > VEX_MAX_INSTRUCTIONS: 73 | max_inst = VEX_MAX_INSTRUCTIONS 74 | else: 75 | max_inst = self.max_inst 76 | 77 | strict_block_end = self.strict_block_end 78 | if strict_block_end is None: 79 | strict_block_end = True 80 | 81 | if self.cross_insn_opt: 82 | px_control = VexRegisterUpdates.VexRegUpdUnwindregsAtMemAccess 83 | else: 84 | px_control = VexRegisterUpdates.VexRegUpdLdAllregsAtEachInsn 85 | 86 | self.irsb.arch.vex_archinfo["hwcache_info"]["caches"] = ffi.NULL 87 | lift_r = pvc.vex_lift( 88 | vex_arch, 89 | self.irsb.arch.vex_archinfo, 90 | self.data + self.bytes_offset, 91 | self.irsb.addr, 92 | max_inst, 93 | max_bytes, 94 | self.opt_level, 95 | self.traceflags, 96 | self.allow_arch_optimizations, 97 | strict_block_end, 98 | 1 if self.collect_data_refs else 0, 99 | 1 if self.load_from_ro_regions else 0, 100 | 1 if self.const_prop else 0, 101 | px_control, 102 | self.bytes_offset, 103 | ) 104 | log_str = self.get_vex_log() 105 | if lift_r == ffi.NULL: 106 | raise LiftingException("libvex: unknown error" if log_str is None else log_str) 107 | else: 108 | if log_str is not None: 109 | log.debug(log_str) 110 | 111 | self.irsb._from_c(lift_r, skip_stmts=self.skip_stmts) 112 | if self.irsb.size == 0: 113 | log.debug("raising lifting exception") 114 | raise LiftingException("libvex: could not decode any instructions @ 0x%x" % self.addr) 115 | finally: 116 | _libvex_lock.release() 117 | self.irsb.arch.vex_archinfo["hwcache_info"]["caches"] = None 118 | -------------------------------------------------------------------------------- /pyvex/lifting/lift_function.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import defaultdict 3 | from typing import DefaultDict 4 | 5 | from pyvex import const 6 | from pyvex.block import IRSB 7 | from pyvex.const import vex_int_class 8 | from pyvex.errors import LiftingException, NeedStatementsNotification, PyVEXError, SkipStatementsError 9 | from pyvex.expr import Const 10 | from pyvex.native import ffi 11 | from pyvex.types import LiftSource, PyLiftSource 12 | 13 | from .lifter import Lifter 14 | from .post_processor import Postprocessor 15 | 16 | log = logging.getLogger(__name__) 17 | 18 | lifters: DefaultDict[str, list[type[Lifter]]] = defaultdict(list) 19 | postprocessors: DefaultDict[str, list[type[Postprocessor]]] = defaultdict(list) 20 | 21 | 22 | def lift( 23 | data: LiftSource, 24 | addr, 25 | arch, 26 | max_bytes=None, 27 | max_inst=None, 28 | bytes_offset=0, 29 | opt_level=1, 30 | traceflags=0, 31 | strict_block_end=True, 32 | inner=False, 33 | skip_stmts=False, 34 | collect_data_refs=False, 35 | cross_insn_opt=True, 36 | load_from_ro_regions=False, 37 | const_prop=False, 38 | ): 39 | """ 40 | Recursively lifts blocks using the registered lifters and postprocessors. Tries each lifter in the order in 41 | which they are registered on the data to lift. 42 | 43 | If a lifter raises a LiftingException on the data, it is skipped. 44 | If it succeeds and returns a block with a jumpkind of Ijk_NoDecode, all of the lifters are tried on the rest 45 | of the data and if they work, their output is appended to the first block. 46 | 47 | :param arch: The arch to lift the data as. 48 | :param addr: The starting address of the block. Effects the IMarks. 49 | :param data: The bytes to lift as either a python string of bytes or a cffi buffer object. 50 | :param max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used. 51 | :param max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used. 52 | :param bytes_offset: The offset into `data` to start lifting at. 53 | :param opt_level: The level of optimization to apply to the IR, -1 through 2. -1 is the strictest 54 | unoptimized level, 0 is unoptimized but will perform some lookahead/lookbehind 55 | optimizations, 1 performs constant propogation, and 2 performs loop unrolling, 56 | which honestly doesn't make much sense in the context of pyvex. The default is 1. 57 | :param traceflags: The libVEX traceflags, controlling VEX debug prints. 58 | 59 | .. note:: Explicitly specifying the number of instructions to lift (`max_inst`) may not always work 60 | exactly as expected. For example, on MIPS, it is meaningless to lift a branch or jump 61 | instruction without its delay slot. VEX attempts to Do The Right Thing by possibly decoding 62 | fewer instructions than requested. Specifically, this means that lifting a branch or jump 63 | on MIPS as a single instruction (`max_inst=1`) will result in an empty IRSB, and subsequent 64 | attempts to run this block will raise `SimIRSBError('Empty IRSB passed to SimIRSB.')`. 65 | 66 | .. note:: If no instruction and byte limit is used, pyvex will continue lifting the block until the block 67 | ends properly or until it runs out of data to lift. 68 | """ 69 | if max_bytes is not None and max_bytes <= 0: 70 | raise PyVEXError("Cannot lift block with no data (max_bytes <= 0)") 71 | 72 | if not data: 73 | raise PyVEXError("Cannot lift block with no data (data is empty)") 74 | 75 | if isinstance(data, str): 76 | raise TypeError("Cannot pass unicode string as data to lifter") 77 | 78 | py_data: PyLiftSource | None 79 | if isinstance(data, (bytes, bytearray, memoryview)): 80 | py_data = data 81 | c_data = None 82 | allow_arch_optimizations = False 83 | else: 84 | if max_bytes is None: 85 | raise PyVEXError("Cannot lift block with ffi pointer and no size (max_bytes is None)") 86 | c_data = data 87 | py_data = None 88 | allow_arch_optimizations = True 89 | 90 | # In order to attempt to preserve the property that 91 | # VEX lifts the same bytes to the same IR at all times when optimizations are disabled 92 | # we hack off all of VEX's non-IROpt optimizations when opt_level == -1. 93 | # This is intended to enable comparisons of the lifted IR between code that happens to be 94 | # found in different contexts. 95 | if opt_level < 0: 96 | allow_arch_optimizations = False 97 | opt_level = 0 98 | 99 | for lifter in lifters[arch.name]: 100 | try: 101 | u_data: LiftSource = data 102 | if lifter.REQUIRE_DATA_C: 103 | if c_data is None: 104 | assert py_data is not None 105 | if isinstance(py_data, (bytearray, memoryview)): 106 | u_data = ffi.from_buffer(ffi.BVoidP, py_data) 107 | else: 108 | u_data = ffi.from_buffer(ffi.BVoidP, py_data + b"\0" * 8) 109 | max_bytes = min(len(py_data), max_bytes) if max_bytes is not None else len(py_data) 110 | else: 111 | u_data = c_data 112 | skip = 0 113 | elif lifter.REQUIRE_DATA_PY: 114 | if bytes_offset and arch.name.startswith("ARM") and (addr & 1) == 1: 115 | skip = bytes_offset - 1 116 | else: 117 | skip = bytes_offset 118 | if py_data is None: 119 | assert c_data is not None 120 | if max_bytes is None: 121 | log.debug("Cannot create py_data from c_data when no max length is given") 122 | continue 123 | u_data = ffi.buffer(c_data + skip, max_bytes)[:] 124 | else: 125 | if max_bytes is None: 126 | u_data = py_data[skip:] 127 | else: 128 | u_data = py_data[skip : skip + max_bytes] 129 | else: 130 | raise RuntimeError( 131 | "Incorrect lifter configuration. What type of data does %s expect?" % lifter.__class__ 132 | ) 133 | 134 | try: 135 | final_irsb = lifter(arch, addr).lift( 136 | u_data, 137 | bytes_offset - skip, 138 | max_bytes, 139 | max_inst, 140 | opt_level, 141 | traceflags, 142 | allow_arch_optimizations, 143 | strict_block_end, 144 | skip_stmts, 145 | collect_data_refs=collect_data_refs, 146 | cross_insn_opt=cross_insn_opt, 147 | load_from_ro_regions=load_from_ro_regions, 148 | const_prop=const_prop, 149 | ) 150 | except SkipStatementsError: 151 | assert skip_stmts is True 152 | final_irsb = lifter(arch, addr).lift( 153 | u_data, 154 | bytes_offset - skip, 155 | max_bytes, 156 | max_inst, 157 | opt_level, 158 | traceflags, 159 | allow_arch_optimizations, 160 | strict_block_end, 161 | skip_stmts=False, 162 | collect_data_refs=collect_data_refs, 163 | cross_insn_opt=cross_insn_opt, 164 | load_from_ro_regions=load_from_ro_regions, 165 | const_prop=const_prop, 166 | ) 167 | break 168 | except LiftingException as ex: 169 | log.debug("Lifting Exception: %s", str(ex)) 170 | continue 171 | else: 172 | final_irsb = IRSB.empty_block( 173 | arch, 174 | addr, 175 | size=0, 176 | nxt=Const(const.vex_int_class(arch.bits)(addr)), 177 | jumpkind="Ijk_NoDecode", 178 | ) 179 | final_irsb.invalidate_direct_next() 180 | return final_irsb 181 | 182 | if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode": 183 | # We have decoded a few bytes before we hit an undecodeable instruction. 184 | 185 | # Determine if this is an intentional NoDecode, like the ud2 instruction on AMD64 186 | nodecode_addr_expr = final_irsb.next 187 | if type(nodecode_addr_expr) is Const: 188 | nodecode_addr = nodecode_addr_expr.con.value 189 | next_irsb_start_addr = addr + final_irsb.size 190 | if nodecode_addr != next_irsb_start_addr: 191 | # The last instruction of the IRSB has a non-zero length. This is an intentional NoDecode. 192 | # The very last instruction has been decoded 193 | final_irsb.jumpkind = "Ijk_NoDecode" 194 | final_irsb.next = final_irsb.next 195 | final_irsb.invalidate_direct_next() 196 | return final_irsb 197 | 198 | # Decode more bytes 199 | if skip_stmts: 200 | # When gymrat will be invoked, we will merge future basic blocks to the current basic block. In this case, 201 | # statements are usually required. 202 | # TODO: In the future, we may further optimize it to handle cases where getting statements in gymrat is not 203 | # TODO: required. 204 | return lift( 205 | data, 206 | addr, 207 | arch, 208 | max_bytes=max_bytes, 209 | max_inst=max_inst, 210 | bytes_offset=bytes_offset, 211 | opt_level=opt_level, 212 | traceflags=traceflags, 213 | strict_block_end=strict_block_end, 214 | skip_stmts=False, 215 | collect_data_refs=collect_data_refs, 216 | load_from_ro_regions=load_from_ro_regions, 217 | const_prop=const_prop, 218 | ) 219 | 220 | next_addr = addr + final_irsb.size 221 | if max_bytes is not None: 222 | max_bytes -= final_irsb.size 223 | if isinstance(data, (bytes, bytearray, memoryview)): 224 | data_left = data[final_irsb.size :] 225 | else: 226 | data_left = data + final_irsb.size 227 | if max_inst is not None: 228 | max_inst -= final_irsb.instructions 229 | if (max_bytes is None or max_bytes > 0) and (max_inst is None or max_inst > 0) and data_left: 230 | more_irsb = lift( 231 | data_left, 232 | next_addr, 233 | arch, 234 | max_bytes=max_bytes, 235 | max_inst=max_inst, 236 | bytes_offset=bytes_offset, 237 | opt_level=opt_level, 238 | traceflags=traceflags, 239 | strict_block_end=strict_block_end, 240 | inner=True, 241 | skip_stmts=False, 242 | collect_data_refs=collect_data_refs, 243 | load_from_ro_regions=load_from_ro_regions, 244 | const_prop=const_prop, 245 | ) 246 | if more_irsb.size: 247 | # Successfully decoded more bytes 248 | final_irsb.extend(more_irsb) 249 | elif max_bytes == 0: 250 | # We have no more bytes left. Mark the jumpkind of the IRSB as Ijk_Boring 251 | if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode": 252 | final_irsb.jumpkind = "Ijk_Boring" 253 | final_irsb.next = Const(vex_int_class(arch.bits)(final_irsb.addr + final_irsb.size)) 254 | 255 | if not inner: 256 | for postprocessor in postprocessors[arch.name]: 257 | try: 258 | postprocessor(final_irsb).postprocess() 259 | except NeedStatementsNotification as e: 260 | # The post-processor cannot work without statements. Re-lift the current block with skip_stmts=False 261 | if not skip_stmts: 262 | # sanity check 263 | # Why does the post-processor raise NeedStatementsNotification when skip_stmts is False? 264 | raise TypeError( 265 | "Bad post-processor %s: " 266 | "NeedStatementsNotification is raised when statements are available." % postprocessor.__class__ 267 | ) from e 268 | 269 | # Re-lift the current IRSB 270 | return lift( 271 | data, 272 | addr, 273 | arch, 274 | max_bytes=max_bytes, 275 | max_inst=max_inst, 276 | bytes_offset=bytes_offset, 277 | opt_level=opt_level, 278 | traceflags=traceflags, 279 | strict_block_end=strict_block_end, 280 | inner=inner, 281 | skip_stmts=False, 282 | collect_data_refs=collect_data_refs, 283 | load_from_ro_regions=load_from_ro_regions, 284 | const_prop=const_prop, 285 | ) 286 | except LiftingException: 287 | continue 288 | 289 | return final_irsb 290 | 291 | 292 | def register(lifter, arch_name): 293 | """ 294 | Registers a Lifter or Postprocessor to be used by pyvex. Lifters are are given priority based on the order 295 | in which they are registered. Postprocessors will be run in registration order. 296 | 297 | :param lifter: The Lifter or Postprocessor to register 298 | :vartype lifter: :class:`Lifter` or :class:`Postprocessor` 299 | """ 300 | if issubclass(lifter, Lifter): 301 | log.debug("Registering lifter %s for architecture %s.", lifter.__name__, arch_name) 302 | lifters[arch_name].append(lifter) 303 | if issubclass(lifter, Postprocessor): 304 | log.debug("Registering postprocessor %s for architecture %s.", lifter.__name__, arch_name) 305 | postprocessors[arch_name].append(lifter) 306 | -------------------------------------------------------------------------------- /pyvex/lifting/lifter.py: -------------------------------------------------------------------------------- 1 | from pyvex.block import IRSB 2 | from pyvex.types import Arch, LiftSource 3 | 4 | # pylint:disable=attribute-defined-outside-init 5 | 6 | 7 | class Lifter: 8 | __slots__ = ( 9 | "data", 10 | "bytes_offset", 11 | "opt_level", 12 | "traceflags", 13 | "allow_arch_optimizations", 14 | "strict_block_end", 15 | "collect_data_refs", 16 | "max_inst", 17 | "max_bytes", 18 | "skip_stmts", 19 | "irsb", 20 | "arch", 21 | "addr", 22 | "cross_insn_opt", 23 | "load_from_ro_regions", 24 | "const_prop", 25 | "disasm", 26 | "dump_irsb", 27 | ) 28 | 29 | """ 30 | A lifter is a class of methods for processing a block. 31 | 32 | :ivar data: The bytes to lift as either a python string of bytes or a cffi buffer object. 33 | :ivar bytes_offset: The offset into `data` to start lifting at. 34 | :ivar max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used. 35 | :ivar max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used. 36 | :ivar opt_level: The level of optimization to apply to the IR, 0-2. Most likely will be ignored in any lifter 37 | other then LibVEX. 38 | :ivar traceflags: The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in any 39 | lifter other than LibVEX. 40 | :ivar allow_arch_optimizations: Should the LibVEX lifter be allowed to perform lift-time preprocessing 41 | optimizations (e.g., lookback ITSTATE optimization on THUMB) 42 | Most likely will be ignored in any lifter other than LibVEX. 43 | :ivar strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z. 44 | :ivar skip_stmts: Should LibVEX ignore statements. 45 | """ 46 | REQUIRE_DATA_C = False 47 | REQUIRE_DATA_PY = False 48 | 49 | def __init__(self, arch: Arch, addr: int): 50 | self.arch: Arch = arch 51 | self.addr: int = addr 52 | 53 | def lift( 54 | self, 55 | data: LiftSource, 56 | bytes_offset: int | None = None, 57 | max_bytes: int | None = None, 58 | max_inst: int | None = None, 59 | opt_level: int | float = 1, 60 | traceflags: int | None = None, 61 | allow_arch_optimizations: bool | None = None, 62 | strict_block_end: bool | None = None, 63 | skip_stmts: bool = False, 64 | collect_data_refs: bool = False, 65 | cross_insn_opt: bool = True, 66 | load_from_ro_regions: bool = False, 67 | const_prop: bool = False, 68 | disasm: bool = False, 69 | dump_irsb: bool = False, 70 | ): 71 | """ 72 | Wrapper around the `_lift` method on Lifters. Should not be overridden in child classes. 73 | 74 | :param data: The bytes to lift as either a python string of bytes or a cffi buffer object. 75 | :param bytes_offset: The offset into `data` to start lifting at. 76 | :param max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used. 77 | :param max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is 78 | used. 79 | :param opt_level: The level of optimization to apply to the IR, 0-2. Most likely will be ignored in 80 | any lifter other then LibVEX. 81 | :param traceflags: The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in 82 | any lifter other than LibVEX. 83 | :param allow_arch_optimizations: Should the LibVEX lifter be allowed to perform lift-time preprocessing 84 | optimizations (e.g., lookback ITSTATE optimization on THUMB) Most likely will be 85 | ignored in any lifter other than LibVEX. 86 | :param strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z. 87 | :param skip_stmts: Should the lifter skip transferring IRStmts from C to Python. 88 | :param collect_data_refs: Should the LibVEX lifter collect data references in C. 89 | :param cross_insn_opt: If cross-instruction-boundary optimizations are allowed or not. 90 | :param disasm: Should the GymratLifter generate disassembly during lifting. 91 | :param dump_irsb: Should the GymratLifter log the lifted IRSB. 92 | """ 93 | irsb: IRSB = IRSB.empty_block(self.arch, self.addr) 94 | self.data = data 95 | self.bytes_offset = bytes_offset 96 | self.opt_level = opt_level 97 | self.traceflags = traceflags 98 | self.allow_arch_optimizations = allow_arch_optimizations 99 | self.strict_block_end = strict_block_end 100 | self.collect_data_refs = collect_data_refs 101 | self.max_inst = max_inst 102 | self.max_bytes = max_bytes 103 | self.skip_stmts = skip_stmts 104 | self.irsb = irsb 105 | self.cross_insn_opt = cross_insn_opt 106 | self.load_from_ro_regions = load_from_ro_regions 107 | self.const_prop = const_prop 108 | self.disasm = disasm 109 | self.dump_irsb = dump_irsb 110 | self._lift() 111 | return self.irsb 112 | 113 | def _lift(self): 114 | """ 115 | Lifts the data using the information passed into _lift. Should be overridden in child classes. 116 | 117 | Should set the lifted IRSB to self.irsb. 118 | If a lifter raises a LiftingException on the data, this signals that the lifter cannot lift this data and arch 119 | and the lifter is skipped. 120 | If a lifter can lift any amount of data, it should lift it and return the lifted block with a jumpkind of 121 | Ijk_NoDecode, signalling to pyvex that other lifters should be used on the undecodable data. 122 | 123 | """ 124 | raise NotImplementedError() 125 | -------------------------------------------------------------------------------- /pyvex/lifting/post_processor.py: -------------------------------------------------------------------------------- 1 | # 2 | # The post-processor base class 3 | # 4 | 5 | 6 | class Postprocessor: 7 | def __init__(self, irsb): 8 | self.irsb = irsb 9 | 10 | def postprocess(self): 11 | """ 12 | Modify the irsb 13 | 14 | All of the postprocessors will be used in the order that they are registered 15 | """ 16 | pass 17 | -------------------------------------------------------------------------------- /pyvex/lifting/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .instr_helper import Instruction 2 | from .lifter_helper import GymratLifter, ParseError 3 | from .syntax_wrapper import VexValue 4 | from .vex_helper import JumpKind, Type 5 | 6 | __all__ = [ 7 | "Type", 8 | "JumpKind", 9 | "VexValue", 10 | "ParseError", 11 | "Instruction", 12 | "GymratLifter", 13 | "ParseError", 14 | ] 15 | -------------------------------------------------------------------------------- /pyvex/lifting/util/lifter_helper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import TYPE_CHECKING 3 | 4 | import bitstring 5 | 6 | from pyvex.const import vex_int_class 7 | from pyvex.errors import LiftingException 8 | from pyvex.lifting.lifter import Lifter 9 | 10 | from .vex_helper import IRSBCustomizer, JumpKind 11 | 12 | if TYPE_CHECKING: 13 | from .instr_helper import Instruction 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | 18 | def is_empty(bitstrm): 19 | try: 20 | bitstrm.peek(1) 21 | return False 22 | except bitstring.ReadError: 23 | return True 24 | 25 | 26 | class ParseError(Exception): 27 | pass 28 | 29 | 30 | class GymratLifter(Lifter): 31 | """ 32 | This is a base class for lifters that use Gymrat. 33 | For most architectures, all you need to do is subclass this, and set the property "instructions" 34 | to be a list of classes that define each instruction. 35 | By default, a lifter will decode instructions by attempting to instantiate every class until one works. 36 | This will use an IRSBCustomizer, which will, if it succeeds, add the appropriate VEX instructions to a pyvex IRSB. 37 | pyvex, when lifting a block of code for this architecture, will call the method "lift", which will produce the IRSB 38 | of the lifted code. 39 | """ 40 | 41 | __slots__ = ( 42 | "bitstrm", 43 | "errors", 44 | "thedata", 45 | "disassembly", 46 | ) 47 | 48 | REQUIRE_DATA_PY = True 49 | instrs: list[type["Instruction"]] 50 | 51 | def __init__(self, arch, addr): 52 | super().__init__(arch, addr) 53 | self.bitstrm = None 54 | self.errors = None 55 | self.thedata = None 56 | self.disassembly = None 57 | 58 | def create_bitstrm(self): 59 | self.bitstrm = bitstring.ConstBitStream(bytes=self.thedata) 60 | 61 | def _decode_next_instruction(self, addr): 62 | # Try every instruction until one works 63 | for possible_instr in self.instrs: 64 | try: 65 | log.debug("Trying %s", possible_instr.name) 66 | return possible_instr(self.bitstrm, self.irsb.arch, addr) 67 | # a ParserError signals that this instruction did not match 68 | # we need to try other instructions, so we ignore this error 69 | except ParseError: 70 | pass # l.exception(repr(possible_instr)) 71 | # if we are out of input, ignore. 72 | # there may be other, shorter instructions that still match, 73 | # so we continue with the loop 74 | except (bitstring.ReadError, bitstring.InterpretError): 75 | pass 76 | 77 | # If no instruction matches, log an error 78 | errorstr = "Unknown instruction at bit position %d" % self.bitstrm.bitpos 79 | log.debug(errorstr) 80 | log.debug("Address: %#08x" % addr) 81 | 82 | def decode(self): 83 | try: 84 | self.create_bitstrm() 85 | count = 0 86 | disas = [] 87 | addr = self.irsb.addr 88 | log.debug("Starting block at address: " + hex(addr)) 89 | bytepos = self.bitstrm.bytepos 90 | 91 | while not is_empty(self.bitstrm): 92 | instr = self._decode_next_instruction(addr) 93 | if not instr: 94 | break 95 | disas.append(instr) 96 | log.debug("Matched " + instr.name) 97 | addr += self.bitstrm.bytepos - bytepos 98 | bytepos = self.bitstrm.bytepos 99 | count += 1 100 | return disas 101 | except Exception as e: 102 | self.errors = str(e) 103 | log.exception(f"Error decoding block at offset {bytepos:#x} (address {addr:#x}):") 104 | raise 105 | 106 | def _lift(self): 107 | self.thedata = ( 108 | self.data[: self.max_bytes] 109 | if isinstance(self.data, (bytes, bytearray, memoryview)) 110 | else self.data[: self.max_bytes].encode() 111 | ) 112 | log.debug(repr(self.thedata)) 113 | instructions = self.decode() 114 | 115 | if self.disasm: 116 | self.disassembly = [instr.disassemble() for instr in instructions] 117 | self.irsb.jumpkind = JumpKind.Invalid 118 | irsb_c = IRSBCustomizer(self.irsb) 119 | log.debug("Decoding complete.") 120 | for i, instr in enumerate(instructions[: self.max_inst]): 121 | log.debug("Lifting instruction %s", instr.name) 122 | instr(irsb_c, instructions[:i], instructions[i + 1 :]) 123 | if irsb_c.irsb.jumpkind != JumpKind.Invalid: 124 | break 125 | if (i + 1) == self.max_inst: # if we are on our last iteration 126 | instr.jump(None, irsb_c.irsb.addr + irsb_c.irsb.size) 127 | break 128 | else: 129 | if len(irsb_c.irsb.statements) == 0: 130 | raise LiftingException("Could not decode any instructions") 131 | irsb_c.irsb.jumpkind = JumpKind.NoDecode 132 | dst = irsb_c.irsb.addr + irsb_c.irsb.size 133 | dst_ty = vex_int_class(irsb_c.irsb.arch.bits).type 134 | irsb_c.irsb.next = irsb_c.mkconst(dst, dst_ty) 135 | log.debug(str(self.irsb)) 136 | if self.dump_irsb: 137 | self.irsb.pp() 138 | return self.irsb 139 | 140 | def pp_disas(self): 141 | disasstr = "" 142 | insts = self.disassemble() 143 | for addr, name, args in insts: 144 | args_str = ",".join(str(a) for a in args) 145 | disasstr += f"{addr:#08x}:\t{name} {args_str}\n" 146 | print(disasstr) 147 | 148 | def error(self): 149 | return self.errors 150 | 151 | def disassemble(self): 152 | if self.disassembly is None: 153 | self.lift(self.data, disasm=True) 154 | return self.disassembly 155 | -------------------------------------------------------------------------------- /pyvex/lifting/util/syntax_wrapper.py: -------------------------------------------------------------------------------- 1 | import functools 2 | from typing import Union 3 | 4 | from pyvex.const import get_type_size 5 | from pyvex.expr import Const, IRExpr, RdTmp 6 | 7 | from .vex_helper import IRSBCustomizer, Type 8 | 9 | 10 | def checkparams(rhstype=None): 11 | def decorator(fn): 12 | @functools.wraps(fn) 13 | def inner_decorator(self, *args, **kwargs): 14 | irsb_cs = {a.irsb_c for a in list(args) + [self] if isinstance(a, VexValue)} # pylint: disable=no-member 15 | assert len(irsb_cs) == 1, "All VexValues must belong to the same irsb_c" 16 | args = list(args) 17 | for idx, arg in enumerate(args): 18 | if isinstance(arg, int): 19 | thetype = rhstype if rhstype else self.ty 20 | args[idx] = VexValue.Constant(self.irsb_c, arg, thetype) 21 | elif not isinstance(arg, VexValue): 22 | raise Exception("Cannot convert param %s" % str(arg)) 23 | args = tuple(args) 24 | return fn(self, *args, **kwargs) 25 | 26 | return inner_decorator 27 | 28 | return decorator 29 | 30 | 31 | def vvifyresults(f): 32 | @functools.wraps(f) 33 | def decor(self, *args, **kwargs): 34 | returned = f(self, *args, **kwargs) 35 | assert isinstance(returned, RdTmp) or isinstance(returned, Const) 36 | return VexValue(self.irsb_c, returned) 37 | 38 | return decor 39 | 40 | 41 | class VexValue: 42 | def __init__(self, irsb_c: "IRSBCustomizer", rdt: "Union[RdTmp, Const]", signed=False): 43 | self.irsb_c = irsb_c 44 | self.ty = self.irsb_c.get_type(rdt) 45 | self.rdt = rdt 46 | self.width = get_type_size(self.ty) 47 | self._is_signed = signed 48 | 49 | @property 50 | def value(self): 51 | if isinstance(self.rdt, Const): 52 | return self.rdt.con.value 53 | else: 54 | raise ValueError("Non-constant VexValue has no value property") 55 | 56 | @property 57 | def signed(self): 58 | return VexValue(self.irsb_c, self.rdt, True) 59 | 60 | @vvifyresults 61 | def widen_unsigned(self, ty): 62 | return self.irsb_c.op_widen_int_unsigned(self.rdt, ty) 63 | 64 | @vvifyresults 65 | def cast_to(self, ty, signed=False, high=False): 66 | return self.irsb_c.cast_to(self.rdt, ty, signed=signed, high=high) 67 | 68 | @vvifyresults 69 | def widen_signed(self, ty): 70 | return self.irsb_c.op_widen_int_signed(self.rdt, ty) 71 | 72 | @vvifyresults 73 | def narrow_high(self, ty): 74 | return self.irsb_c.op_narrow_int(self.rdt, ty, high_half=True) 75 | 76 | @vvifyresults 77 | def narrow_low(self, ty): 78 | return self.irsb_c.op_narrow_int(self.rdt, ty, high_half=False) 79 | 80 | # TODO at some point extend this to Vex nonconstants 81 | def __getitem__(self, idx): 82 | def getb(i): 83 | return VexValue(self.irsb_c, self.irsb_c.get_bit(self.rdt, i)) 84 | 85 | def makeconstant(x): 86 | return VexValue.Constant(self.irsb_c, x, Type.int_8).rdt 87 | 88 | if not isinstance(idx, slice): 89 | actualindex = slice(idx).indices(self.width)[1] 90 | return getb(makeconstant(actualindex)) 91 | else: 92 | return [getb(makeconstant(i)) for i in range(*idx.indices(self.width))] 93 | 94 | def __setitem__(self, idx, bval): 95 | setted = self.set_bit(idx, bval) 96 | self.__init__(setted.irsb_c, setted.rdt) 97 | 98 | @checkparams(rhstype=Type.int_8) 99 | @vvifyresults 100 | def set_bit(self, idx, bval): 101 | return self.irsb_c.set_bit(self.rdt, idx.rdt, bval.rdt) 102 | 103 | @checkparams() 104 | @vvifyresults 105 | def set_bits(self, idxsandvals): 106 | return self.irsb_c.set_bits(self.rdt, [(i.cast_to(Type.int_8).rdt, b.rdt) for i, b in idxsandvals]) 107 | 108 | @checkparams() 109 | @vvifyresults 110 | def ite(self, iftrue, iffalse): 111 | onebitcond = self.cast_to(Type.int_1) 112 | return self.irsb_c.ite(onebitcond.rdt, iftrue.rdt, iffalse.rdt) 113 | 114 | @checkparams() 115 | @vvifyresults 116 | def sar(self, right): 117 | """ 118 | `v.sar(r)` should do arithmetic shift right of `v` by `r` 119 | 120 | :param right:VexValue value to shift by 121 | :return: VexValue - result of a shift 122 | """ 123 | return self.irsb_c.op_sar(self.rdt, right.rdt) 124 | 125 | @checkparams() 126 | @vvifyresults 127 | def __add__(self, right): 128 | return self.irsb_c.op_add(self.rdt, right.rdt) 129 | 130 | @checkparams() 131 | def __radd__(self, left): 132 | return self + left 133 | 134 | @checkparams() 135 | @vvifyresults 136 | def __sub__(self, right): 137 | return self.irsb_c.op_sub(self.rdt, right.rdt) 138 | 139 | @checkparams() 140 | def __rsub__(self, left): 141 | return left - self 142 | 143 | @checkparams() 144 | @vvifyresults 145 | def __div__(self, right): 146 | if self._is_signed: 147 | return self.irsb_c.op_sdiv(self.rdt, right.rdt) 148 | else: 149 | return self.irsb_c.op_udiv(self.rdt, right.rdt) 150 | 151 | @checkparams() 152 | def __rdiv__(self, left): 153 | return left // self 154 | 155 | @checkparams() 156 | def __floordiv__(self, right): # Note: nonprimitive 157 | return self.__div__(right) 158 | 159 | @checkparams() 160 | def __rfloordiv__(self, left): 161 | return left // self 162 | 163 | @checkparams() 164 | def __truediv__(self, right): # Note: nonprimitive 165 | return self / right 166 | 167 | @checkparams() 168 | def __rtruediv__(self, left): 169 | return left.__truediv__(self) 170 | 171 | @checkparams() 172 | @vvifyresults 173 | def __and__(self, right): 174 | return self.irsb_c.op_and(self.rdt, right.rdt) 175 | 176 | @checkparams() 177 | def __rand__(self, left): 178 | return left & self 179 | 180 | @checkparams() 181 | @vvifyresults 182 | def __eq__(self, right): 183 | return self.irsb_c.op_cmp_eq(self.rdt, right.rdt) 184 | 185 | @checkparams() 186 | @vvifyresults 187 | def __ne__(self, other): 188 | return self.irsb_c.op_cmp_ne(self.rdt, other.rdt) 189 | 190 | @checkparams() 191 | @vvifyresults 192 | def __invert__(self): 193 | return self.irsb_c.op_not(self.rdt) 194 | 195 | @checkparams() 196 | @vvifyresults 197 | def __le__(self, right): 198 | if self._is_signed: 199 | return self.irsb_c.op_cmp_sle(self.rdt, right.rdt) 200 | else: 201 | return self.irsb_c.op_cmp_ule(self.rdt, right.rdt) 202 | 203 | @checkparams() 204 | @vvifyresults 205 | def __gt__(self, other): 206 | if self._is_signed: 207 | return self.irsb_c.op_cmp_sgt(self.rdt, other.rdt) 208 | else: 209 | return self.irsb_c.op_cmp_ugt(self.rdt, other.rdt) 210 | 211 | @checkparams() 212 | @vvifyresults 213 | def __ge__(self, right): 214 | if self._is_signed: 215 | return self.irsb_c.op_cmp_sge(self.rdt, right.rdt) 216 | else: 217 | return self.irsb_c.op_cmp_uge(self.rdt, right.rdt) 218 | 219 | @checkparams(rhstype=Type.int_8) 220 | @vvifyresults 221 | def __lshift__(self, right): # TODO put better type inference in irsb_c so we can have rlshift 222 | """ 223 | logical shift left 224 | """ 225 | return self.irsb_c.op_shl(self.rdt, right.rdt) 226 | 227 | @checkparams() 228 | @vvifyresults 229 | def __lt__(self, right): 230 | if self._is_signed: 231 | return self.irsb_c.op_cmp_slt(self.rdt, right.rdt) 232 | else: 233 | return self.irsb_c.op_cmp_ult(self.rdt, right.rdt) 234 | 235 | @checkparams() 236 | @vvifyresults 237 | def __mod__(self, right): # Note: nonprimitive 238 | return self.irsb_c.op_mod(self.rdt, right.rdt) 239 | 240 | @checkparams() 241 | def __rmod__(self, left): 242 | return left % self 243 | 244 | @checkparams() 245 | @vvifyresults 246 | def __mul__(self, right): 247 | if self._is_signed: 248 | return self.irsb_c.op_smul(self.rdt, right.rdt) 249 | else: 250 | return self.irsb_c.op_umul(self.rdt, right.rdt) 251 | 252 | @checkparams() 253 | def __rmul__(self, left): 254 | return left * self 255 | 256 | @checkparams() 257 | @vvifyresults 258 | def __neg__(self): # Note: nonprimitive 259 | if not self._is_signed: 260 | raise Exception("Number is unsigned, cannot change sign!") 261 | else: 262 | return self.rdt * -1 263 | 264 | @checkparams() 265 | @vvifyresults 266 | def __or__(self, right): 267 | return self.irsb_c.op_or(self.rdt, right.rdt) 268 | 269 | def __ror__(self, left): 270 | return self | left 271 | 272 | @checkparams() 273 | @vvifyresults 274 | def __pos__(self): 275 | return self 276 | 277 | @checkparams(rhstype=Type.int_8) 278 | @vvifyresults 279 | def __rshift__(self, right): 280 | """ 281 | logical shift right 282 | """ 283 | return self.irsb_c.op_shr(self.rdt, right.rdt) 284 | 285 | @checkparams() 286 | def __rlshift__(self, left): 287 | return left << self 288 | 289 | @checkparams() 290 | def __rrshift__(self, left): 291 | return left >> self 292 | 293 | @checkparams() 294 | @vvifyresults 295 | def __xor__(self, right): 296 | return self.irsb_c.op_xor(self.rdt, right.rdt) 297 | 298 | def __rxor__(self, left): 299 | return self ^ left 300 | 301 | @classmethod 302 | def Constant(cls, irsb_c, val, ty): 303 | """ 304 | Creates a constant as a VexValue 305 | :param irsb_c: The IRSBCustomizer to use 306 | :param val: The value, as an integer 307 | :param ty: The type of the resulting VexValue 308 | :return: a VexValue 309 | """ 310 | assert not (isinstance(val, VexValue) or isinstance(val, IRExpr)) 311 | rdt = irsb_c.mkconst(val, ty) 312 | return cls(irsb_c, rdt) 313 | -------------------------------------------------------------------------------- /pyvex/lifting/util/vex_helper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import re 3 | 4 | from pyvex.const import U1, get_type_size, ty_to_const_class, vex_int_class 5 | from pyvex.enums import IRCallee 6 | from pyvex.expr import ITE, Binop, CCall, Const, Get, Load, RdTmp, Unop 7 | from pyvex.stmt import Dirty, Exit, IMark, NoOp, Put, Store, WrTmp 8 | 9 | 10 | class JumpKind: 11 | Boring = "Ijk_Boring" 12 | Call = "Ijk_Call" 13 | Ret = "Ijk_Ret" 14 | Segfault = "Ijk_SigSEGV" 15 | Exit = "Ijk_Exit" 16 | Syscall = "Ijk_Sys_syscall" 17 | Sysenter = "Ijk_Sys_sysenter" 18 | Invalid = "Ijk_INVALID" 19 | NoDecode = "Ijk_NoDecode" 20 | 21 | 22 | class TypeMeta(type): 23 | typemeta_re = re.compile(r"int_(?P\d+)$") 24 | 25 | def __getattr__(self, name): 26 | match = self.typemeta_re.match(name) 27 | if match: 28 | width = int(match.group("size")) 29 | return vex_int_class(width).type 30 | else: 31 | return type.__getattr__(name) 32 | 33 | 34 | class Type(metaclass=TypeMeta): 35 | __metaclass__ = TypeMeta 36 | 37 | ieee_float_16 = "Ity_F16" 38 | ieee_float_32 = "Ity_F32" 39 | ieee_float_64 = "Ity_F64" 40 | ieee_float_128 = "Ity_F128" 41 | decimal_float_32 = "Ity_D32" 42 | decimal_float_64 = "Ity_D64" 43 | decimal_float_128 = "Ity_D128" 44 | simd_vector_128 = "Ity_V128" 45 | simd_vector_256 = "Ity_V256" 46 | 47 | 48 | def get_op_format_from_const_ty(ty): 49 | return ty_to_const_class(ty).op_format 50 | 51 | 52 | def make_format_op_generator(fmt_string): 53 | """ 54 | Return a function which generates an op format (just a string of the vex instruction) 55 | 56 | Functions by formatting the fmt_string with the types of the arguments 57 | """ 58 | 59 | def gen(arg_types): 60 | converted_arg_types = list(map(get_op_format_from_const_ty, arg_types)) 61 | op = fmt_string.format(arg_t=converted_arg_types) 62 | return op 63 | 64 | return gen 65 | 66 | 67 | def mkbinop(fstring): 68 | return lambda self, expr_a, expr_b: self.op_binary(make_format_op_generator(fstring))(expr_a, expr_b) 69 | 70 | 71 | def mkunop(fstring): 72 | return lambda self, expr_a: self.op_unary(make_format_op_generator(fstring))(expr_a) 73 | 74 | 75 | def mkcmpop(fstring_fragment, signedness=""): 76 | def cmpop(self, expr_a, expr_b): 77 | ty = self.get_type(expr_a) 78 | fstring = f"Iop_Cmp{fstring_fragment}{{arg_t[0]}}{signedness}" 79 | retval = mkbinop(fstring)(self, expr_a, expr_b) 80 | return self.cast_to(retval, ty) 81 | 82 | return cmpop 83 | 84 | 85 | class IRSBCustomizer: 86 | op_add = mkbinop("Iop_Add{arg_t[0]}") 87 | op_sub = mkbinop("Iop_Sub{arg_t[0]}") 88 | op_umul = mkbinop("Iop_Mul{arg_t[0]}") 89 | op_smul = mkbinop("Iop_MullS{arg_t[0]}") 90 | op_sdiv = mkbinop("Iop_DivS{arg_t[0]}") 91 | op_udiv = mkbinop("Iop_DivU{arg_t[0]}") 92 | 93 | # Custom operation that does not exist in libVEX 94 | op_mod = mkbinop("Iop_Mod{arg_t[0]}") 95 | 96 | op_or = mkbinop("Iop_Or{arg_t[0]}") 97 | op_and = mkbinop("Iop_And{arg_t[0]}") 98 | op_xor = mkbinop("Iop_Xor{arg_t[0]}") 99 | 100 | op_shr = mkbinop("Iop_Shr{arg_t[0]}") # Shift Right (logical) 101 | op_shl = mkbinop("Iop_Shl{arg_t[0]}") # Shift Left (logical) 102 | 103 | op_sar = mkbinop("Iop_Sar{arg_t[0]}") # Shift Arithmetic Right operation 104 | 105 | op_not = mkunop("Iop_Not{arg_t[0]}") 106 | 107 | op_cmp_eq = mkcmpop("EQ") 108 | op_cmp_ne = mkcmpop("NE") 109 | op_cmp_slt = mkcmpop("LT", "S") 110 | op_cmp_sle = mkcmpop("LE", "S") 111 | op_cmp_ult = mkcmpop("LT", "U") 112 | op_cmp_ule = mkcmpop("LE", "U") 113 | op_cmp_sge = mkcmpop("GE", "S") 114 | op_cmp_uge = mkcmpop("GE", "U") 115 | op_cmp_sgt = mkcmpop("GT", "S") 116 | op_cmp_ugt = mkcmpop("GT", "U") 117 | 118 | def __init__(self, irsb): 119 | self.arch = irsb.arch 120 | self.irsb = irsb 121 | 122 | def get_type(self, rdt): 123 | return rdt.result_type(self.irsb.tyenv) 124 | 125 | # Statements (no return value) 126 | def _append_stmt(self, stmt): 127 | self.irsb.statements += [stmt] 128 | 129 | def imark(self, int_addr, int_length, int_delta=0): 130 | self._append_stmt(IMark(int_addr, int_length, int_delta)) 131 | 132 | def get_reg(self, regname): # TODO move this into the lifter 133 | return self.arch.registers[regname][0] 134 | 135 | def put(self, expr_val, tuple_reg): 136 | self._append_stmt(Put(copy.copy(expr_val), tuple_reg)) 137 | 138 | def store(self, addr, expr): 139 | self._append_stmt(Store(copy.copy(addr), copy.copy(expr), self.arch.memory_endness)) 140 | 141 | def noop(self): 142 | self._append_stmt(NoOp()) 143 | 144 | def add_exit(self, guard, dst, jk, ip): 145 | """ 146 | Add an exit out of the middle of an IRSB. 147 | (e.g., a conditional jump) 148 | :param guard: An expression, the exit is taken if true 149 | :param dst: the destination of the exit (a Const) 150 | :param jk: the JumpKind of this exit (probably Ijk_Boring) 151 | :param ip: The address of this exit's source 152 | """ 153 | self.irsb.statements.append(Exit(guard, dst.con, jk, ip)) 154 | 155 | # end statements 156 | 157 | def goto(self, addr): 158 | self.irsb.next = addr 159 | self.irsb.jumpkind = JumpKind.Boring 160 | 161 | def ret(self, addr): 162 | self.irsb.next = addr 163 | self.irsb.jumpkind = JumpKind.Ret 164 | 165 | def call(self, addr): 166 | self.irsb.next = addr 167 | self.irsb.jumpkind = JumpKind.Call 168 | 169 | def _add_tmp(self, t): 170 | return self.irsb.tyenv.add(t) 171 | 172 | def _rdtmp(self, tmp): 173 | return RdTmp.get_instance(tmp) 174 | 175 | def _settmp(self, expr): 176 | ty = self.get_type(expr) 177 | tmp = self._add_tmp(ty) 178 | self._append_stmt(WrTmp(tmp, expr)) 179 | return self._rdtmp(tmp) 180 | 181 | def rdreg(self, reg, ty): 182 | return self._settmp(Get(reg, ty)) 183 | 184 | def load(self, addr, ty): 185 | return self._settmp(Load(self.arch.memory_endness, ty, copy.copy(addr))) 186 | 187 | def op_ccall(self, retty, funcstr, args): 188 | return self._settmp(CCall(retty, IRCallee(len(args), funcstr, 0xFFFF), args)) 189 | 190 | def dirty(self, retty, funcstr, args): 191 | if retty is None: 192 | tmp = 0xFFFFFFFF 193 | else: 194 | tmp = self._add_tmp(retty) 195 | self._append_stmt(Dirty(IRCallee(len(args), funcstr, 0xFFFF), Const(U1(1)), args, tmp, None, None, None, None)) 196 | return self._rdtmp(tmp) 197 | 198 | def ite(self, condrdt, iftruerdt, iffalserdt): 199 | return self._settmp(ITE(copy.copy(condrdt), copy.copy(iffalserdt), copy.copy(iftruerdt))) 200 | 201 | def mkconst(self, val, ty): 202 | cls = ty_to_const_class(ty) 203 | return Const(cls(val)) 204 | 205 | # Operations 206 | def op_generic(self, Operation, op_generator): 207 | def instance(*args): # Note: The args here are all RdTmps 208 | for arg in args: 209 | assert isinstance(arg, RdTmp) or isinstance(arg, Const) 210 | arg_types = [self.get_type(arg) for arg in args] 211 | # two operations should never share the same argument instances, copy them here to ensure that 212 | args = [copy.copy(a) for a in args] 213 | op = Operation(op_generator(arg_types), args) 214 | msg = "operation needs to be well typed: " + str(op) 215 | assert op.typecheck(self.irsb.tyenv), msg + "\ntypes: " + str(self.irsb.tyenv) 216 | return self._settmp(op) 217 | 218 | return instance 219 | 220 | def op_binary(self, op_format_str): 221 | return self.op_generic(Binop, op_format_str) 222 | 223 | def op_unary(self, op_format_str): 224 | return self.op_generic(Unop, op_format_str) 225 | 226 | def cast_to(self, rdt, tydest, signed=False, high=False): 227 | goalwidth = get_type_size(tydest) 228 | rdtwidth = self.get_rdt_width(rdt) 229 | 230 | if rdtwidth > goalwidth: 231 | return self.op_narrow_int(rdt, tydest, high_half=high) 232 | elif rdtwidth < goalwidth: 233 | return self.op_widen_int(rdt, tydest, signed=signed) 234 | else: 235 | return rdt 236 | 237 | def op_to_one_bit(self, rdt): 238 | rdtty = self.get_type(rdt) 239 | if rdtty not in [Type.int_64, Type.int_32]: 240 | rdt = self.op_widen_int_unsigned(rdt, Type.int_32) 241 | onebit = self.op_narrow_int(rdt, Type.int_1) 242 | return onebit 243 | 244 | def op_narrow_int(self, rdt, tydest, high_half=False): 245 | op_name = "{op}{high}to{dest}".format( 246 | op="Iop_{arg_t[0]}", high="HI" if high_half else "", dest=get_op_format_from_const_ty(tydest) 247 | ) 248 | return self.op_unary(make_format_op_generator(op_name))(rdt) 249 | 250 | def op_widen_int(self, rdt, tydest, signed=False): 251 | op_name = "{op}{sign}to{dest}".format( 252 | op="Iop_{arg_t[0]}", sign="S" if signed else "U", dest=get_op_format_from_const_ty(tydest) 253 | ) 254 | return self.op_unary(make_format_op_generator(op_name))(rdt) 255 | 256 | def op_widen_int_signed(self, rdt, tydest): 257 | return self.op_widen_int(rdt, tydest, signed=True) 258 | 259 | def op_widen_int_unsigned(self, rdt, tydest): 260 | return self.op_widen_int(rdt, tydest, signed=False) 261 | 262 | def get_msb(self, tmp, ty): 263 | width = get_type_size(ty) 264 | return self.get_bit(tmp, width - 1) 265 | 266 | def get_bit(self, rdt, idx): 267 | shifted = self.op_shr(rdt, idx) 268 | bit = self.op_extract_lsb(shifted) 269 | return bit 270 | 271 | def op_extract_lsb(self, rdt): 272 | bitmask = self.mkconst(1, self.get_type(rdt)) 273 | return self.op_and(bitmask, rdt) 274 | 275 | def set_bit(self, rdt, idx, bval): 276 | currbit = self.get_bit(rdt, idx) 277 | areequalextrabits = self.op_xor(bval, currbit) 278 | one = self.mkconst(1, self.get_type(areequalextrabits)) 279 | areequal = self.op_and(areequalextrabits, one) 280 | shifted = self.op_shl(areequal, idx) 281 | return self.op_xor(rdt, shifted) 282 | 283 | def set_bits(self, rdt, idxsandvals): 284 | ty = self.get_type(rdt) 285 | if all([isinstance(idx, Const) for idx, _ in idxsandvals]): 286 | relevantbits = self.mkconst(sum([1 << idx.con.value for idx, _ in idxsandvals]), ty) 287 | else: 288 | relevantbits = self.mkconst(0, ty) 289 | for idx, _ in idxsandvals: 290 | shifted = self.op_shl(self.mkconst(1, ty), idx) 291 | relevantbits = self.op_or(relevantbits, shifted) 292 | setto = self.mkconst(0, ty) 293 | for idx, bval in idxsandvals: 294 | bvalbit = self.op_extract_lsb(bval) 295 | shifted = self.op_shl(bvalbit, idx) 296 | setto = self.op_or(setto, shifted) 297 | shouldflip = self.op_and(self.op_xor(setto, rdt), relevantbits) 298 | return self.op_xor(rdt, shouldflip) 299 | 300 | def get_rdt_width(self, rdt): 301 | return rdt.result_size(self.irsb.tyenv) 302 | -------------------------------------------------------------------------------- /pyvex/lifting/zerodivision.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from pyvex import const, expr, stmt 4 | 5 | from .post_processor import Postprocessor 6 | 7 | 8 | class ZeroDivisionPostProcessor(Postprocessor): 9 | """ 10 | A postprocessor for adding zero-division checks to VEX. 11 | 12 | For "div rcx", will turn: 13 | 14 | 00 | ------ IMark(0x8000, 3, 0) ------ 15 | 01 | t0 = GET:I64(rcx) 16 | 02 | t1 = GET:I64(rax) 17 | 03 | t2 = GET:I64(rdx) 18 | 04 | t3 = 64HLto128(t2,t1) 19 | 05 | t4 = DivModU128to64(t3,t0) 20 | 06 | t5 = 128to64(t4) 21 | 07 | PUT(rax) = t5 22 | 08 | t6 = 128HIto64(t4) 23 | 09 | PUT(rdx) = t6 24 | NEXT: PUT(rip) = 0x0000000000008003; Ijk_Boring 25 | 26 | into: 27 | 28 | 00 | ------ IMark(0x8000, 3, 0) ------ 29 | 01 | t0 = GET:I64(rcx) 30 | 02 | t4 = GET:I64(rax) 31 | 03 | t5 = GET:I64(rdx) 32 | 04 | t3 = 64HLto128(t5,t4) 33 | 05 | t9 = CmpEQ(t0,0x0000000000000000) 34 | 06 | if (t9) { PUT(pc) = 0x8000; Ijk_SigFPE_IntDiv } 35 | 07 | t2 = DivModU128to64(t3,t0) 36 | 08 | t6 = 128to64(t2) 37 | 09 | PUT(rax) = t6 38 | 10 | t7 = 128HIto64(t2) 39 | 11 | PUT(rdx) = t7 40 | NEXT: PUT(rip) = 0x0000000000008003; Ijk_Boring 41 | """ 42 | 43 | def postprocess(self): 44 | if self.irsb.statements is None: 45 | # This is an optimized IRSB. We cannot really post-process it. 46 | return 47 | 48 | insertions = [] 49 | last_ip = 0 50 | for i, s in enumerate(self.irsb.statements): 51 | if s.tag == "Ist_IMark": 52 | last_ip = s.addr 53 | if s.tag == "Ist_WrTmp" and s.data.tag == "Iex_Binop" and ("Div" in s.data.op or "Mod" in s.data.op): 54 | arg_size = s.data.args[1].result_size(self.irsb.tyenv) 55 | cmp_args = [copy.copy(s.data.args[1]), expr.Const(const.vex_int_class(arg_size)(0))] 56 | cmp_tmp = self.irsb.tyenv.add("Ity_I1") 57 | insertions.append((i, stmt.WrTmp(cmp_tmp, expr.Binop("Iop_CmpEQ%d" % arg_size, cmp_args)))) 58 | insertions.append( 59 | ( 60 | i, 61 | stmt.Exit( 62 | expr.RdTmp.get_instance(cmp_tmp), 63 | const.vex_int_class(self.irsb.arch.bits)(last_ip), 64 | "Ijk_SigFPE_IntDiv", 65 | self.irsb.offsIP, 66 | ), 67 | ) 68 | ) 69 | 70 | for i, s in reversed(insertions): 71 | self.irsb.statements.insert(i, s) 72 | -------------------------------------------------------------------------------- /pyvex/native.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import pickle 4 | import sys 5 | import tempfile 6 | from typing import Any 7 | 8 | import cffi 9 | 10 | from .vex_ffi import ffi_str as _ffi_str 11 | 12 | ffi = cffi.FFI() 13 | 14 | 15 | def _locate_lib(module: str, library: str) -> str: 16 | """ 17 | Attempt to find a native library without using pkg_resources, and only fall back to pkg_resources upon failures. 18 | This is because "import pkg_resources" is slow. 19 | 20 | :return: The full path of the native library. 21 | """ 22 | base_dir = os.path.dirname(__file__) 23 | attempt = os.path.join(base_dir, library) 24 | if os.path.isfile(attempt): 25 | return attempt 26 | 27 | import pkg_resources # pylint:disable=import-outside-toplevel 28 | 29 | return pkg_resources.resource_filename(module, os.path.join("lib", library)) 30 | 31 | 32 | def _parse_ffi_str(): 33 | hash_ = hashlib.md5(_ffi_str.encode("utf-8")).hexdigest() 34 | cache_location = os.path.join(tempfile.gettempdir(), f"pyvex_ffi_parser_cache.{hash_}") 35 | 36 | if os.path.isfile(cache_location): 37 | # load the cache 38 | with open(cache_location, "rb") as f: 39 | cache = pickle.loads(f.read()) 40 | ffi._parser._declarations = cache["_declarations"] 41 | ffi._parser._int_constants = cache["_int_constants"] 42 | else: 43 | ffi.cdef(_ffi_str) 44 | # cache the result 45 | cache = { 46 | "_declarations": ffi._parser._declarations, 47 | "_int_constants": ffi._parser._int_constants, 48 | } 49 | # atomically write cache 50 | with tempfile.NamedTemporaryFile(delete=False) as temp_file: 51 | temp_file.write(pickle.dumps(cache)) 52 | temp_file_name = temp_file.name 53 | os.replace(temp_file_name, cache_location) 54 | 55 | 56 | def _find_c_lib(): 57 | # Load the c library for calling into VEX 58 | if sys.platform in ("win32", "cygwin"): 59 | library_file = "pyvex.dll" 60 | elif sys.platform == "darwin": 61 | library_file = "libpyvex.dylib" 62 | else: 63 | library_file = "libpyvex.so" 64 | 65 | pyvex_path = _locate_lib(__name__, os.path.join("lib", library_file)) 66 | # parse _ffi_str and use cache if possible 67 | _parse_ffi_str() 68 | # RTLD_GLOBAL used for sim_unicorn.so 69 | lib = ffi.dlopen(pyvex_path) 70 | if not lib.vex_init(): 71 | raise ImportError("libvex failed to initialize") 72 | # this looks up all the definitions (wtf) 73 | dir(lib) 74 | return lib 75 | 76 | 77 | pvc: Any = _find_c_lib() # This should be properly typed, but this seems non trivial 78 | -------------------------------------------------------------------------------- /pyvex/py.typed: -------------------------------------------------------------------------------- 1 | partial 2 | -------------------------------------------------------------------------------- /pyvex/types.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Any, Protocol, Union, runtime_checkable 2 | 3 | from cffi.api import FFI 4 | 5 | 6 | class Register(Protocol): 7 | """ 8 | A register. Pyvex should probably not have this dependency. 9 | """ 10 | 11 | name: str 12 | 13 | 14 | class Arch(Protocol): 15 | """ 16 | An architecture description. 17 | """ 18 | 19 | name: str 20 | ip_offset: int 21 | bits: int 22 | instruction_endness: str 23 | memory_endness: str 24 | byte_width: int 25 | register_list: list[Register] 26 | registers: dict[str, tuple[int, int]] 27 | 28 | def translate_register_name(self, offset: int, size: int | None = None) -> str | None: ... 29 | 30 | def get_register_offset(self, name: str) -> int: ... 31 | 32 | 33 | @runtime_checkable 34 | class LibvexArch(Protocol): 35 | """ 36 | The description for an architecture that is usable with libvex 37 | """ 38 | 39 | vex_arch: str 40 | vex_archinfo: dict[str, Any] 41 | 42 | 43 | PyLiftSource = Union[bytes, bytearray, memoryview] 44 | if TYPE_CHECKING: 45 | CLiftSource = FFI.CData 46 | else: 47 | CLiftSource = None 48 | LiftSource = Union[PyLiftSource, CLiftSource] 49 | -------------------------------------------------------------------------------- /pyvex/utils.py: -------------------------------------------------------------------------------- 1 | import struct 2 | from collections.abc import Callable 3 | from typing import Any 4 | 5 | try: 6 | import _md5 as md5lib 7 | except ImportError: 8 | import hashlib as md5lib 9 | 10 | 11 | md5_unpacker = struct.Struct("4I") 12 | 13 | 14 | def stable_hash(t: tuple) -> int: 15 | cnt = _dump_tuple(t) 16 | hd = md5lib.md5(cnt).digest() 17 | return md5_unpacker.unpack(hd)[0] # 32 bits 18 | 19 | 20 | def _dump_tuple(t: tuple) -> bytes: 21 | cnt = b"" 22 | for item in t: 23 | if item is not None: 24 | type_ = type(item) 25 | if type_ in _DUMP_BY_TYPE: 26 | cnt += _DUMP_BY_TYPE[type_](item) 27 | else: 28 | cnt += struct.pack(" bytes: 34 | return t.encode("ascii") 35 | 36 | 37 | def _dump_int(t: int) -> bytes: 38 | prefix = b"" if t >= 0 else b"-" 39 | t = abs(t) 40 | if t <= 0xFFFF: 41 | return prefix + struct.pack(" 0: 49 | cnt += _dump_int(t & 0xFFFF_FFFF_FFFF_FFFF) 50 | t >>= 64 51 | return prefix + cnt 52 | 53 | 54 | def _dump_type(t: type) -> bytes: 55 | return t.__name__.encode("ascii") 56 | 57 | 58 | _DUMP_BY_TYPE: dict[type, Callable[[Any], bytes]] = { 59 | tuple: _dump_tuple, 60 | str: _dump_str, 61 | int: _dump_int, 62 | type: _dump_type, 63 | } 64 | -------------------------------------------------------------------------------- /pyvex_c/Makefile: -------------------------------------------------------------------------------- 1 | UNAME := $(shell uname) 2 | ifeq ($(UNAME), Darwin) 3 | LIBRARY_FILE=libpyvex.dylib 4 | STATIC_LIBRARY_FILE=libpyvex.a 5 | LDFLAGS=-Wl,-install_name,@rpath/$(LIBRARY_FILE) 6 | endif 7 | ifeq ($(UNAME), Linux) 8 | LIBRARY_FILE=libpyvex.so 9 | STATIC_LIBRARY_FILE=libpyvex.a 10 | LDFLAGS=-Wl,-soname,$(LIBRARY_FILE) 11 | endif 12 | ifeq ($(UNAME), FreeBSD) 13 | LIBRARY_FILE=libpyvex.so 14 | STATIC_LIBRARY_FILE=libpyvex.a 15 | LDFLAGS=-Wl,-soname,$(LIBRARY_FILE) 16 | endif 17 | ifeq ($(UNAME), NetBSD) 18 | LIBRARY_FILE=libpyvex.so 19 | STATIC_LIBRARY_FILE=libpyvex.a 20 | LDFLAGS=-Wl,-soname,$(LIBRARY_FILE) 21 | endif 22 | ifeq ($(UNAME), OpenBSD) 23 | LIBRARY_FILE=libpyvex.so 24 | LDFLAGS=-Wl,-soname,$(LIBRARY_FILE) -L/usr/local/lib -lvex 25 | endif 26 | ifeq ($(findstring MINGW,$(UNAME)), MINGW) 27 | LIBRARY_FILE=pyvex.dll 28 | STATIC_LIBRARY_FILE=libpyvex.a 29 | LDFLAGS= 30 | endif 31 | 32 | # deeply evil 33 | # https://www.cmcrossroads.com/article/gnu-make-meets-file-names-spaces-them 34 | sp =$(null) $(null) 35 | qs = $(subst ?,$(sp),$1) 36 | sq = $(subst $(sp),?,$1) 37 | 38 | CC?=gcc 39 | AR=ar 40 | INCFLAGS=-I "$(VEX_INCLUDE_PATH)" 41 | CFLAGS=-g -O2 -Wall -shared -fPIC -std=c99 $(INCFLAGS) 42 | 43 | OBJECTS=pyvex.o logging.o analysis.o postprocess.o 44 | HEADERS=pyvex.h 45 | 46 | all: $(LIBRARY_FILE) $(STATIC_LIBRARY_FILE) 47 | 48 | %.o: %.c 49 | $(CC) -c $(CFLAGS) $< 50 | 51 | $(LIBRARY_FILE): $(OBJECTS) $(HEADERS) $(call sq,$(VEX_LIB_PATH)/libvex.a) 52 | $(CC) $(CFLAGS) -o $(LIBRARY_FILE) $(OBJECTS) "$(VEX_LIB_PATH)/libvex.a" $(LDFLAGS) 53 | 54 | $(STATIC_LIBRARY_FILE): $(OBJECTS) $(HEADERS) $(call sq,$(VEX_LIB_PATH)/libvex.a) 55 | $(AR) rcs $(STATIC_LIBRARY_FILE) $(OBJECTS) 56 | 57 | clean: 58 | rm -f $(LIBRARY_FILE) $(STATIC_LIBRARY_FILE) *.o 59 | -------------------------------------------------------------------------------- /pyvex_c/Makefile-msvc: -------------------------------------------------------------------------------- 1 | CC=cl 2 | INCFLAGS=/I "$(VEX_INCLUDE_PATH)" 3 | CFLAGS=/LD /O2 $(INCFLAGS) 4 | LDFLAGS=/link /DEF:pyvex.def 5 | 6 | pyvex.dll: postprocess.c analysis.c pyvex.c logging.c "$(VEX_LIB_FILE)" pyvex.h pyvex.def 7 | $(CC) $(CFLAGS) pyvex.c postprocess.c analysis.c logging.c "$(VEX_LIB_FILE)" $(LDFLAGS) 8 | 9 | clean: 10 | del pyvex.dll pyvex.lib pyvex.exp pyvex.obj logging.obj 11 | -------------------------------------------------------------------------------- /pyvex_c/README: -------------------------------------------------------------------------------- 1 | To generate the list of exports for windows: 2 | 3 | grep -E -o -h -r "pvc\.[a-zA-Z0-9_]+" | cut -c 5- | sort -u 4 | 5 | Then remove Ity_I8 and add vex_lift (called from __init__ where we can't use the name pvc to refernce it) and sizeofIRType (called from... the unicorn compatibility layer I think?) 6 | -------------------------------------------------------------------------------- /pyvex_c/e4c_lite.h: -------------------------------------------------------------------------------- 1 | /* 2 | * exceptions4c lightweight version 1.0 3 | * 4 | * Copyright (c) 2014 Guillermo Calvo 5 | * Licensed under the GNU Lesser General Public License 6 | */ 7 | 8 | #ifndef EXCEPTIONS4C_LITE 9 | #define EXCEPTIONS4C_LITE 10 | 11 | #include 12 | #include 13 | 14 | /* Maximum number of nested `try` blocks */ 15 | #ifndef E4C_MAX_FRAMES 16 | # define E4C_MAX_FRAMES 16 17 | #endif 18 | 19 | /* Maximum length (in bytes) of an exception message */ 20 | #ifndef E4C_MESSAGE_SIZE 21 | # define E4C_MESSAGE_SIZE 128 22 | #endif 23 | 24 | /* Exception handling keywords: try/catch/finally/throw */ 25 | #ifndef E4C_NOKEYWORDS 26 | # define try E4C_TRY 27 | # define catch(type) E4C_CATCH(type) 28 | # define finally E4C_FINALLY 29 | # define throw(type, message) E4C_THROW(type, message) 30 | #endif 31 | 32 | /* Represents an exception type */ 33 | struct e4c_exception_type{ 34 | const char * name; 35 | const char * default_message; 36 | const struct e4c_exception_type * supertype; 37 | }; 38 | 39 | /* Declarations and definitions of exception types */ 40 | #define E4C_DECLARE_EXCEPTION(name) extern const struct e4c_exception_type name 41 | #define E4C_DEFINE_EXCEPTION(name, default_message, supertype) const struct e4c_exception_type name = { #name, default_message, &supertype } 42 | 43 | /* Predefined exception types */ 44 | E4C_DECLARE_EXCEPTION(RuntimeException); 45 | E4C_DECLARE_EXCEPTION(NullPointerException); 46 | 47 | /* Represents an instance of an exception type */ 48 | struct e4c_exception{ 49 | char message[E4C_MESSAGE_SIZE]; 50 | const char * file; 51 | int line; 52 | const struct e4c_exception_type * type; 53 | }; 54 | 55 | /* Retrieve current thrown exception */ 56 | #define E4C_EXCEPTION e4c.err 57 | 58 | /* Returns whether current exception is of a given type */ 59 | #define E4C_IS_INSTANCE_OF(t) ( e4c.err.type == &t || e4c_extends(e4c.err.type, &t) ) 60 | 61 | /* Implementation details */ 62 | #define E4C_TRY if(e4c_try(E4C_INFO) && setjmp(e4c.jump[e4c.frames - 1]) >= 0) while(e4c_hook(0)) if(e4c.frame[e4c.frames].stage == e4c_trying) 63 | #define E4C_CATCH(type) else if(e4c.frame[e4c.frames].stage == e4c_catching && E4C_IS_INSTANCE_OF(type) && e4c_hook(1)) 64 | #define E4C_FINALLY else if(e4c.frame[e4c.frames].stage == e4c_finalizing) 65 | #define E4C_THROW(type, message) e4c_throw(&type, E4C_INFO, message) 66 | #ifndef NDEBUG 67 | # define E4C_INFO __FILE__, __LINE__ 68 | #else 69 | # define E4C_INFO NULL, 0 70 | #endif 71 | 72 | enum e4c_stage{e4c_beginning, e4c_trying, e4c_catching, e4c_finalizing, e4c_done}; 73 | extern struct e4c_context{jmp_buf jump[E4C_MAX_FRAMES]; struct e4c_exception err; struct{unsigned char stage; unsigned char uncaught;} frame[E4C_MAX_FRAMES + 1]; int frames;} e4c; 74 | extern int e4c_try(const char * file, int line); 75 | extern int e4c_hook(int is_catch); 76 | extern int e4c_extends(const struct e4c_exception_type * child, const struct e4c_exception_type * parent); 77 | extern void e4c_throw(const struct e4c_exception_type * exception_type, const char * file, int line, const char * message); 78 | 79 | # endif 80 | -------------------------------------------------------------------------------- /pyvex_c/logging.c: -------------------------------------------------------------------------------- 1 | // This code is GPLed by Yan Shoshitaishvili 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "logging.h" 8 | 9 | int log_level = 50; 10 | 11 | void pyvex_debug(const char *fmt, ...) 12 | { 13 | if (log_level > 10) return; 14 | 15 | fprintf(stderr, "[[pyvex_c]]\tDEBUG:\t"); 16 | va_list args; 17 | va_start(args,fmt); 18 | vfprintf(stderr, fmt, args); 19 | va_end(args); 20 | 21 | fflush(stdout); 22 | } 23 | 24 | void pyvex_info(const char *fmt, ...) 25 | { 26 | if (log_level > 20) return; 27 | 28 | fprintf(stderr, "[[pyvex_c]]\tINFO:\t"); 29 | va_list args; 30 | va_start(args, fmt); 31 | vfprintf(stderr, fmt, args); 32 | va_end(args); 33 | 34 | fflush(stdout); 35 | } 36 | 37 | void pyvex_error(const char *fmt, ...) 38 | { 39 | if (log_level > 40) return; 40 | 41 | fprintf(stderr, "[[pyvex_c]]\tERROR:\t"); 42 | va_list args; 43 | va_start(args,fmt); 44 | vfprintf(stderr, fmt,args); 45 | va_end(args); 46 | 47 | fflush(stderr); 48 | } 49 | -------------------------------------------------------------------------------- /pyvex_c/logging.h: -------------------------------------------------------------------------------- 1 | // This code is GPLed by Yan Shoshitaishvili 2 | 3 | #ifndef __COMMON_H 4 | #define __COMMON_H 5 | 6 | extern int log_level; 7 | 8 | void pyvex_debug(const char *, ...); 9 | void pyvex_info(const char *, ...); 10 | void pyvex_error(const char *, ...); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /pyvex_c/postprocess.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "pyvex_internal.h" 6 | 7 | // 8 | // Jumpkind fixes for ARM 9 | // 10 | // If PC is moved to LR, then this should be an Ijk_Call 11 | // 12 | // Example: 13 | // MOV LR, PC 14 | // MOV PC, R8 15 | // 16 | // Note that the value of PC is directly used in IRStatements, i.e 17 | // instead of having: 18 | // t0 = GET:I32(pc) 19 | // PUT(lr) = t0 20 | // we have: 21 | // PUT(lr) = 0x10400 22 | // The only case (that I've seen so far) where a temporary variable 23 | // is assigned to LR is: 24 | // t2 = ITE(cond, t0, t1) 25 | // PUT(lr) = t2 26 | // 27 | void arm_post_processor_determine_calls( 28 | Addr irsb_addr, // Address of this IRSB 29 | Int irsb_size, // Size of this IRSB 30 | Int irsb_insts, // Number of instructions 31 | IRSB *irsb) { 32 | 33 | // Offset to the link register 34 | #define ARM_OFFB_LR offsetof(VexGuestARMState,guest_R14) 35 | // The maximum number of tmps 36 | #define MAX_TMP 1000 37 | // The maximum offset of registers 38 | #define MAX_REG_OFFSET 1000 39 | // Dummy value 40 | #define DUMMY 0xffeffeff 41 | 42 | if (irsb->jumpkind != Ijk_Boring) { 43 | return; 44 | } 45 | 46 | // Emulated CPU context 47 | Addr tmps[MAX_TMP + 1]; 48 | Addr regs[MAX_REG_OFFSET + 1]; 49 | 50 | // Initialize context 51 | Int i; 52 | 53 | for (i = 0; i <= MAX_TMP; ++i) { 54 | tmps[i] = DUMMY; 55 | } 56 | 57 | for (i = 0; i <= MAX_REG_OFFSET; ++i) { 58 | regs[i] = DUMMY; 59 | } 60 | 61 | Int lr_store_pc = 0; 62 | Int inst_ctr = 0; 63 | Int has_exit = 0; 64 | IRStmt *other_exit = NULL; 65 | Addr next_irsb_addr = (irsb_addr & (~1)) + irsb_size; // Clear the least significant bit 66 | Int is_thumb_mode = irsb_addr & 1; 67 | 68 | // if we pop {..,lr,...}; b xxx, I bet this isn't a boring jump! 69 | for (i = 0; i < irsb->stmts_used; ++i) { 70 | IRStmt *stmt = irsb->stmts[i]; 71 | if (stmt->tag == Ist_Exit){ 72 | // HACK: FIXME: BLCC and friends set the default exit to Ijk_Boring 73 | // Yet, the call is there, and it's just fine. 74 | // We assume if the block has an exit AND lr stores PC, we're probably 75 | // doing one of those fancy BL-ish things. 76 | // Should work for BCC and friends though 77 | has_exit = 1; 78 | other_exit = stmt; 79 | } 80 | } 81 | 82 | 83 | for (i = 0; i < irsb->stmts_used; ++i) { 84 | IRStmt *stmt = irsb->stmts[i]; 85 | 86 | if (stmt->tag == Ist_Put) { 87 | // LR is modified just before the last instruction of the block... 88 | if (stmt->Ist.Put.offset == ARM_OFFB_LR /*&& inst_ctr == irsb_insts - 1*/) { 89 | // ... by a constant, so test whether it is the address of the next IRSB 90 | if (stmt->Ist.Put.data->tag == Iex_Const) { 91 | IRConst *con = stmt->Ist.Put.data->Iex.Const.con; 92 | if (get_value_from_const_expr(con) == next_irsb_addr) { 93 | lr_store_pc = 1; 94 | } else { 95 | lr_store_pc = 0; 96 | } 97 | } else if (stmt->Ist.Put.data->tag == Iex_RdTmp) { 98 | Int tmp = stmt->Ist.Put.data->Iex.RdTmp.tmp; 99 | if (tmp <= MAX_TMP && next_irsb_addr == tmps[tmp]) { 100 | lr_store_pc = 1; 101 | } else { 102 | lr_store_pc = 0; 103 | } 104 | } 105 | break; 106 | } 107 | else { 108 | Int reg_offset = stmt->Ist.Put.offset; 109 | if (reg_offset <= MAX_REG_OFFSET) { 110 | IRExpr *data = stmt->Ist.Put.data; 111 | if (data->tag == Iex_Const) { 112 | regs[reg_offset] = get_value_from_const_expr(stmt->Ist.Put.data->Iex.Const.con); 113 | } else if (data->tag == Iex_RdTmp) { 114 | Int tmp = data->Iex.RdTmp.tmp; 115 | if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { 116 | regs[reg_offset] = tmps[tmp]; 117 | } 118 | } else if (data->tag == Iex_Get) { 119 | Int src_reg = data->Iex.Get.offset; 120 | if (src_reg <= MAX_REG_OFFSET && regs[src_reg] != DUMMY) { 121 | regs[reg_offset] = regs[src_reg]; 122 | } 123 | } 124 | } 125 | } 126 | } 127 | else if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp <= MAX_TMP) { 128 | // The PC value may propagate through the block, and since 129 | // LR is modified at the end of the block, the PC value have 130 | // to be incremented in order to match the address of the 131 | // next IRSB. So the only propagation ways that can lead to 132 | // a function call are: 133 | // 134 | // - Iop_Add* operations (even "sub r0, #-4" is compiled 135 | // as "add r0, #4") 136 | // - Iop_And*, Iop_Or*, Iop_Xor*, Iop_Sh*, Iop_Not* (there 137 | // may be some tricky and twisted ways to increment PC) 138 | // 139 | Int tmp_dst = stmt->Ist.WrTmp.tmp; 140 | if (stmt->Ist.WrTmp.data->tag == Iex_Binop) { 141 | IRExpr* data = stmt->Ist.WrTmp.data; 142 | Addr op0 = DUMMY, op1 = DUMMY; 143 | // Extract op0 144 | if (data->Iex.Binop.arg1->tag == Iex_Const) { 145 | op0 = get_value_from_const_expr(data->Iex.Binop.arg1->Iex.Const.con); 146 | } else if (data->Iex.Binop.arg1->tag == Iex_RdTmp) { 147 | Int tmp = data->Iex.Binop.arg1->Iex.RdTmp.tmp; 148 | if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { 149 | op0 = tmps[tmp]; 150 | } 151 | } 152 | // Extract op1 153 | if (data->Iex.Binop.arg2->tag == Iex_Const) { 154 | op1 = get_value_from_const_expr(data->Iex.Binop.arg2->Iex.Const.con); 155 | } else if (data->Iex.Binop.arg2->tag == Iex_RdTmp) { 156 | Int tmp = data->Iex.Binop.arg2->Iex.RdTmp.tmp; 157 | if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { 158 | op1 = tmps[tmp]; 159 | } 160 | } 161 | if (op0 != DUMMY && op1 != DUMMY) { 162 | // Both operands are loaded. Perfom calculation. 163 | switch (data->Iex.Binop.op) { 164 | case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: 165 | tmps[tmp_dst] = op0 + op1; 166 | break; 167 | case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64: 168 | tmps[tmp_dst] = op0 - op1; 169 | break; 170 | case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64: 171 | tmps[tmp_dst] = op0 & op1; 172 | break; 173 | case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64: 174 | tmps[tmp_dst] = op0 | op1; 175 | break; 176 | case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64: 177 | tmps[tmp_dst] = op0 ^ op1; 178 | break; 179 | case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64: 180 | tmps[tmp_dst] = op0 << op1; 181 | break; 182 | case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64: 183 | case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64: 184 | tmps[tmp_dst] = op0 >> op1; 185 | break; 186 | default: 187 | // Unsupported operation 188 | break; 189 | } 190 | } 191 | } else if (stmt->Ist.WrTmp.data->tag == Iex_Get) { 192 | Int reg_offset = stmt->Ist.WrTmp.data->Iex.Get.offset; 193 | if (reg_offset <= MAX_REG_OFFSET && regs[reg_offset] != DUMMY) { 194 | tmps[tmp_dst] = regs[reg_offset]; 195 | } 196 | } else if (stmt->Ist.WrTmp.data->tag == Iex_ITE) { 197 | // Parse iftrue and iffalse 198 | IRExpr *data = stmt->Ist.WrTmp.data; 199 | if (data->Iex.ITE.iffalse->tag == Iex_Const) { 200 | tmps[tmp_dst] = get_value_from_const_expr(data->Iex.ITE.iffalse->Iex.Const.con); 201 | } else if (data->Iex.ITE.iffalse->tag == Iex_RdTmp) { 202 | Int tmp = data->Iex.ITE.iffalse->Iex.RdTmp.tmp; 203 | if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { 204 | tmps[tmp_dst] = tmps[tmp]; 205 | } 206 | } 207 | if (data->Iex.ITE.iftrue->tag == Iex_Const) { 208 | tmps[tmp_dst] = get_value_from_const_expr(data->Iex.ITE.iftrue->Iex.Const.con); 209 | } else if (data->Iex.ITE.iftrue->tag == Iex_RdTmp) { 210 | Int tmp = data->Iex.ITE.iftrue->Iex.RdTmp.tmp; 211 | if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { 212 | tmps[tmp_dst] = tmps[tmp]; 213 | } 214 | } 215 | } else if (stmt->Ist.WrTmp.data->tag == Iex_RdTmp) { 216 | IRExpr *data = stmt->Ist.WrTmp.data; 217 | Int tmp = data->Iex.RdTmp.tmp; 218 | if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { 219 | tmps[tmp_dst] = tmps[tmp]; 220 | } 221 | } else if (stmt->Ist.WrTmp.data->tag == Iex_Const) { 222 | IRConst *con = stmt->Ist.WrTmp.data->Iex.Const.con; 223 | tmps[tmp_dst] = get_value_from_const_expr(con); 224 | } 225 | } 226 | else if (stmt->tag == Ist_IMark) { 227 | inst_ctr++; 228 | } 229 | } 230 | 231 | if (lr_store_pc) { 232 | if (has_exit && // It has a non-default exit 233 | other_exit->Ist.Exit.jk == Ijk_Boring && // The non-default exit is a Boring jump 234 | get_value_from_const_expr(other_exit->Ist.Exit.dst) != next_irsb_addr + is_thumb_mode // The non-defualt exit is not skipping 235 | // the last instruction 236 | ) { 237 | // Fix the not-default exit 238 | other_exit->Ist.Exit.jk = Ijk_Call; 239 | } 240 | else if (!has_exit || other_exit->Ist.Exit.jk != Ijk_Call) { 241 | //Fix the default exit 242 | irsb->jumpkind = Ijk_Call; 243 | } 244 | } 245 | 246 | // Undefine all defined values 247 | #undef ARM_OFFB_LR 248 | #undef MAX_TMP 249 | #undef MAX_REG_OFFSET 250 | #undef DUMMY 251 | } 252 | 253 | 254 | // 255 | // Unconditional branch fixes for MIPS32 256 | // 257 | // Handle unconditional branches 258 | // `beq $zero, $zero, xxxx` 259 | // It is translated to 260 | // 261 | // 15 | ------ IMark(0x401684, 4, 0) ------ 262 | // 16 | t0 = CmpEQ32(0x00000000, 0x00000000) 263 | // 17 | PUT(128) = 0x00401688 264 | // 18 | ------ IMark(0x401688, 4, 0) ------ 265 | // 19 | if (t0) goto {Ijk_Boring} 0x401684 266 | // 20 | PUT(128) = 0x0040168c 267 | // 21 | t4 = GET:I32(128) 268 | // NEXT: PUT(128) = t4; Ijk_Boring 269 | // 270 | void mips32_post_processor_fix_unconditional_exit( 271 | IRSB *irsb) { 272 | 273 | #define INVALID 0xffff 274 | 275 | Int i; 276 | Int tmp_exit = INVALID, exit_stmt_idx = INVALID; 277 | IRConst *dst = NULL; 278 | 279 | for (i = irsb->stmts_used - 1; i >= 0; --i) { 280 | IRStmt *stmt = irsb->stmts[i]; 281 | if (tmp_exit == INVALID) { 282 | // Looking for the Exit statement 283 | if (stmt->tag == Ist_Exit && 284 | stmt->Ist.Exit.jk == Ijk_Boring && 285 | stmt->Ist.Exit.guard->tag == Iex_RdTmp) { 286 | tmp_exit = stmt->Ist.Exit.guard->Iex.RdTmp.tmp; 287 | dst = stmt->Ist.Exit.dst; 288 | exit_stmt_idx = i; 289 | } 290 | } 291 | else if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp == tmp_exit) { 292 | // Looking for the WrTmp statement 293 | IRExpr *data = stmt->Ist.WrTmp.data; 294 | if (data->tag == Iex_Binop && 295 | data->Iex.Binop.op == Iop_CmpEQ32 && 296 | data->Iex.Binop.arg1->tag == Iex_Const && 297 | data->Iex.Binop.arg2->tag == Iex_Const && 298 | get_value_from_const_expr(data->Iex.Binop.arg1->Iex.Const.con) == 299 | get_value_from_const_expr(data->Iex.Binop.arg2->Iex.Const.con)) { 300 | // We found it 301 | 302 | // Update the statements 303 | Int j; 304 | for (j = exit_stmt_idx; j < irsb->stmts_used - 1; ++j) { 305 | irsb->stmts[j] = irsb->stmts[j + 1]; 306 | } 307 | irsb->stmts_used -= 1; 308 | // Update the default of the IRSB 309 | irsb->next = IRExpr_Const(dst); 310 | } 311 | break; 312 | } 313 | } 314 | 315 | #undef INVALID 316 | } 317 | 318 | void irsb_insert(IRSB *irsb, IRStmt* stmt, Int i) { 319 | addStmtToIRSB(irsb, stmt); 320 | 321 | IRStmt *in_air = irsb->stmts[irsb->stmts_used - 1]; 322 | for (Int j = irsb->stmts_used - 1; j > i; j--) { 323 | irsb->stmts[j] = irsb->stmts[j-1]; 324 | } 325 | irsb->stmts[i] = in_air; 326 | } 327 | 328 | void zero_division_side_exits(IRSB *irsb) { 329 | Int i; 330 | Addr lastIp = -1; 331 | IRType addrTy = typeOfIRExpr(irsb->tyenv, irsb->next); 332 | IRConstTag addrConst = addrTy == Ity_I32 ? Ico_U32 : addrTy == Ity_I16 ? Ico_U16 : Ico_U64; 333 | IRType argty; 334 | IRTemp cmptmp; 335 | 336 | for (i = 0; i < irsb->stmts_used; i++) { 337 | IRStmt *stmt = irsb->stmts[i]; 338 | switch (stmt->tag) { 339 | case Ist_IMark: 340 | lastIp = stmt->Ist.IMark.addr; 341 | continue; 342 | case Ist_WrTmp: 343 | if (stmt->Ist.WrTmp.data->tag != Iex_Binop) { 344 | continue; 345 | } 346 | 347 | switch (stmt->Ist.WrTmp.data->Iex.Binop.op) { 348 | case Iop_DivU32: 349 | case Iop_DivS32: 350 | case Iop_DivU32E: 351 | case Iop_DivS32E: 352 | case Iop_DivModU64to32: 353 | case Iop_DivModS64to32: 354 | argty = Ity_I32; 355 | break; 356 | 357 | case Iop_DivU64: 358 | case Iop_DivS64: 359 | case Iop_DivU64E: 360 | case Iop_DivS64E: 361 | case Iop_DivModU128to64: 362 | case Iop_DivModS128to64: 363 | case Iop_DivModS64to64: 364 | argty = Ity_I64; 365 | break; 366 | 367 | // TODO YIKES 368 | //case Iop_DivF32: 369 | // argty = Ity_F32; 370 | 371 | //case Iop_DivF64: 372 | //case Iop_DivF64r32: 373 | // argty = Ity_F64; 374 | 375 | //case Iop_DivF128: 376 | // argty = Ity_F128; 377 | 378 | //case Iop_DivD64: 379 | // argty = Ity_D64; 380 | 381 | //case Iop_DivD128: 382 | // argty = Ity_D128; 383 | 384 | //case Iop_Div32Fx4: 385 | //case Iop_Div32F0x4: 386 | //case Iop_Div64Fx2: 387 | //case Iop_Div64F0x2: 388 | //case Iop_Div64Fx4: 389 | //case Iop_Div32Fx8: 390 | 391 | default: 392 | continue; 393 | } 394 | 395 | cmptmp = newIRTemp(irsb->tyenv, Ity_I1); 396 | irsb_insert(irsb, IRStmt_WrTmp(cmptmp, IRExpr_Binop(argty == Ity_I32 ? Iop_CmpEQ32 : Iop_CmpEQ64, stmt->Ist.WrTmp.data->Iex.Binop.arg2, IRExpr_Const(argty == Ity_I32 ? IRConst_U32(0) : IRConst_U64(0)))), i); 397 | i++; 398 | IRConst *failAddr = IRConst_U64(lastIp); // ohhhhh boy this is a hack 399 | failAddr->tag = addrConst; 400 | irsb_insert(irsb, IRStmt_Exit(IRExpr_RdTmp(cmptmp), Ijk_SigFPE_IntDiv, failAddr, irsb->offsIP), i); 401 | i++; 402 | break; 403 | 404 | default: 405 | continue; 406 | } 407 | } 408 | } 409 | 410 | -------------------------------------------------------------------------------- /pyvex_c/pyvex.c: -------------------------------------------------------------------------------- 1 | /* 2 | This is shamelessly ripped from Vine, because those guys have very very strange language preferences. 3 | Vine is Copyright (C) 2006-2009, BitBlaze Team. 4 | 5 | You can redistribute and modify it under the terms of the GNU GPL, 6 | version 2 or later, but it is made available WITHOUT ANY WARRANTY. 7 | See the top-level README file for more details. 8 | 9 | For more information about Vine and other BitBlaze software, see our 10 | web site at: http://bitblaze.cs.berkeley.edu/ 11 | */ 12 | 13 | //====================================================================== 14 | // 15 | // This file provides the interface to VEX that allows block by block 16 | // translation from binary to VEX IR. 17 | // 18 | //====================================================================== 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "pyvex.h" 28 | #include "pyvex_internal.h" 29 | #include "logging.h" 30 | 31 | //====================================================================== 32 | // 33 | // Globals 34 | // 35 | //====================================================================== 36 | 37 | // Some info required for translation 38 | VexArchInfo vai_host; 39 | VexGuestExtents vge; 40 | VexTranslateArgs vta; 41 | VexTranslateResult vtr; 42 | VexAbiInfo vbi; 43 | VexControl vc; 44 | 45 | // Log message buffer, from vex itself 46 | char *msg_buffer = NULL; 47 | size_t msg_capacity = 0, msg_current_size = 0; 48 | 49 | jmp_buf jumpout; 50 | 51 | //====================================================================== 52 | // 53 | // Functions needed for the VEX translation 54 | // 55 | //====================================================================== 56 | 57 | #ifdef _MSC_VER 58 | __declspec(noreturn) 59 | #else 60 | __attribute__((noreturn)) 61 | #endif 62 | static void failure_exit(void) { 63 | longjmp(jumpout, 1); 64 | } 65 | 66 | static void log_bytes(const HChar* bytes, SizeT nbytes) { 67 | if (msg_buffer == NULL) { 68 | msg_buffer = malloc(nbytes); 69 | msg_capacity = nbytes; 70 | } 71 | if (nbytes + msg_current_size > msg_capacity) { 72 | do { 73 | msg_capacity *= 2; 74 | } while (nbytes + msg_current_size > msg_capacity); 75 | msg_buffer = realloc(msg_buffer, msg_capacity); 76 | } 77 | 78 | memcpy(&msg_buffer[msg_current_size], bytes, nbytes); 79 | msg_current_size += nbytes; 80 | } 81 | 82 | void clear_log() { 83 | if (msg_buffer != NULL) { 84 | free(msg_buffer); 85 | msg_buffer = NULL; 86 | msg_capacity = 0; 87 | msg_current_size = 0; 88 | } 89 | } 90 | 91 | static Bool chase_into_ok(void *closureV, Addr addr64) { 92 | return False; 93 | } 94 | 95 | static UInt needs_self_check(void *callback_opaque, VexRegisterUpdates* pxControl, const VexGuestExtents *guest_extents) { 96 | return 0; 97 | } 98 | 99 | static void *dispatch(void) { 100 | return NULL; 101 | } 102 | 103 | 104 | //---------------------------------------------------------------------- 105 | // Initializes VEX 106 | // It must be called before using VEX for translation to Valgrind IR 107 | //---------------------------------------------------------------------- 108 | int vex_init() { 109 | static int initialized = 0; 110 | pyvex_debug("Initializing VEX.\n"); 111 | 112 | if (initialized) { 113 | pyvex_debug("VEX already initialized.\n"); 114 | return 1; 115 | } 116 | initialized = 1; 117 | 118 | // Initialize VEX 119 | LibVEX_default_VexControl(&vc); 120 | LibVEX_default_VexArchInfo(&vai_host); 121 | LibVEX_default_VexAbiInfo(&vbi); 122 | 123 | vc.iropt_verbosity = 0; 124 | vc.iropt_level = 0; // No optimization by default 125 | //vc.iropt_precise_memory_exns = False; 126 | vc.iropt_unroll_thresh = 0; 127 | vc.guest_max_insns = 1; // By default, we vex 1 instruction at a time 128 | vc.guest_chase_thresh = 0; 129 | vc.arm64_allow_reordered_writeback = 0; 130 | vc.x86_optimize_callpop_idiom = 0; 131 | vc.strict_block_end = 0; 132 | vc.special_instruction_support = 0; 133 | 134 | pyvex_debug("Calling LibVEX_Init()....\n"); 135 | if (setjmp(jumpout) == 0) { 136 | // the 0 is the debug level 137 | LibVEX_Init(&failure_exit, &log_bytes, 0, &vc); 138 | pyvex_debug("LibVEX_Init() done....\n"); 139 | } else { 140 | pyvex_debug("LibVEX_Init() failed catastrophically...\n"); 141 | return 0; 142 | } 143 | 144 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 145 | vai_host.endness = VexEndnessLE; 146 | #else 147 | vai_host.endness = VexEndnessBE; 148 | #endif 149 | 150 | // various settings to make stuff work 151 | // ... former is set to 'unspecified', but gets set in vex_inst for archs which care 152 | // ... the latter two are for dealing with gs and fs in VEX 153 | vbi.guest_stack_redzone_size = 0; 154 | vbi.guest_amd64_assume_fs_is_const = True; 155 | vbi.guest_amd64_assume_gs_is_const = True; 156 | 157 | //------------------------------------ 158 | // options for instruction translation 159 | 160 | // 161 | // Architecture info 162 | // 163 | vta.arch_guest = VexArch_INVALID; // to be assigned later 164 | #if __amd64__ || _WIN64 165 | vta.arch_host = VexArchAMD64; 166 | #elif __i386__ || _WIN32 167 | vta.arch_host = VexArchX86; 168 | #elif __arm__ 169 | vta.arch_host = VexArchARM; 170 | vai_host.hwcaps = 7; 171 | #elif __aarch64__ 172 | vta.arch_host = VexArchARM64; 173 | #elif __s390x__ 174 | vta.arch_host = VexArchS390X; 175 | vai_host.hwcaps = VEX_HWCAPS_S390X_LDISP; 176 | #elif defined(__powerpc__) && defined(__NetBSD__) 177 | # if defined(__LONG_WIDTH__) && (__LONG_WIDTH__ == 32) 178 | vta.arch_host = VexArchPPC32; 179 | # endif 180 | #elif defined(__powerpc__) 181 | vta.arch_host = VexArchPPC64; 182 | #elif defined(__riscv) 183 | # if defined(__riscv_xlen) && (__riscv_xlen == 64) 184 | vta.arch_host = VexArchRISCV64; 185 | # endif 186 | #else 187 | #error "Unsupported host arch" 188 | #endif 189 | 190 | vta.archinfo_host = vai_host; 191 | 192 | // 193 | // The actual stuff to vex 194 | // 195 | vta.guest_bytes = NULL; // Set in vex_insts 196 | vta.guest_bytes_addr = 0; // Set in vex_insts 197 | 198 | // 199 | // callbacks 200 | // 201 | vta.callback_opaque = NULL; // Used by chase_into_ok, but never actually called 202 | vta.chase_into_ok = chase_into_ok; // Always returns false 203 | vta.preamble_function = NULL; 204 | vta.instrument1 = NULL; 205 | vta.instrument2 = NULL; 206 | vta.finaltidy = NULL; 207 | vta.needs_self_check = needs_self_check; 208 | 209 | vta.disp_cp_chain_me_to_slowEP = (void *)dispatch; // Not used 210 | vta.disp_cp_chain_me_to_fastEP = (void *)dispatch; // Not used 211 | vta.disp_cp_xindir = (void *)dispatch; // Not used 212 | vta.disp_cp_xassisted = (void *)dispatch; // Not used 213 | 214 | vta.guest_extents = &vge; 215 | vta.host_bytes = NULL; // Buffer for storing the output binary 216 | vta.host_bytes_size = 0; 217 | vta.host_bytes_used = NULL; 218 | // doesn't exist? vta.do_self_check = False; 219 | vta.traceflags = 0; // Debug verbosity 220 | //vta.traceflags = -1; // Debug verbosity 221 | return 1; 222 | } 223 | 224 | // Prepare the VexArchInfo struct 225 | static void vex_prepare_vai(VexArch arch, VexArchInfo *vai) { 226 | switch (arch) { 227 | case VexArchX86: 228 | vai->hwcaps = VEX_HWCAPS_X86_MMXEXT | 229 | VEX_HWCAPS_X86_SSE1 | 230 | VEX_HWCAPS_X86_SSE2 | 231 | VEX_HWCAPS_X86_SSE3 | 232 | VEX_HWCAPS_X86_LZCNT; 233 | break; 234 | case VexArchAMD64: 235 | vai->hwcaps = VEX_HWCAPS_AMD64_SSE3 | 236 | VEX_HWCAPS_AMD64_CX16 | 237 | VEX_HWCAPS_AMD64_LZCNT | 238 | VEX_HWCAPS_AMD64_AVX | 239 | VEX_HWCAPS_AMD64_RDTSCP | 240 | VEX_HWCAPS_AMD64_BMI | 241 | VEX_HWCAPS_AMD64_AVX2; 242 | break; 243 | case VexArchARM: 244 | vai->hwcaps = VEX_ARM_ARCHLEVEL(8) | 245 | VEX_HWCAPS_ARM_NEON | 246 | VEX_HWCAPS_ARM_VFP3; 247 | break; 248 | case VexArchARM64: 249 | vai->hwcaps = 0; 250 | vai->arm64_dMinLine_lg2_szB = 6; 251 | vai->arm64_iMinLine_lg2_szB = 6; 252 | break; 253 | case VexArchPPC32: 254 | vai->hwcaps = VEX_HWCAPS_PPC32_F | 255 | VEX_HWCAPS_PPC32_V | 256 | VEX_HWCAPS_PPC32_FX | 257 | VEX_HWCAPS_PPC32_GX | 258 | VEX_HWCAPS_PPC32_VX | 259 | VEX_HWCAPS_PPC32_DFP | 260 | VEX_HWCAPS_PPC32_ISA2_07; 261 | vai->ppc_icache_line_szB = 32; // unsure if correct 262 | break; 263 | case VexArchPPC64: 264 | vai->hwcaps = VEX_HWCAPS_PPC64_V | 265 | VEX_HWCAPS_PPC64_FX | 266 | VEX_HWCAPS_PPC64_GX | 267 | VEX_HWCAPS_PPC64_VX | 268 | VEX_HWCAPS_PPC64_DFP | 269 | VEX_HWCAPS_PPC64_ISA2_07; 270 | vai->ppc_icache_line_szB = 64; // unsure if correct 271 | break; 272 | case VexArchS390X: 273 | vai->hwcaps = 0; 274 | break; 275 | case VexArchMIPS32: 276 | case VexArchMIPS64: 277 | vai->hwcaps = VEX_PRID_COMP_CAVIUM; 278 | break; 279 | case VexArchRISCV64: 280 | vai->hwcaps = 0; 281 | break; 282 | default: 283 | pyvex_error("Invalid arch in vex_prepare_vai.\n"); 284 | break; 285 | } 286 | } 287 | 288 | // Prepare the VexAbiInfo 289 | static void vex_prepare_vbi(VexArch arch, VexAbiInfo *vbi) { 290 | // only setting the guest_stack_redzone_size for now 291 | // this attribute is only specified by the X86, AMD64 and PPC64 ABIs 292 | 293 | switch (arch) { 294 | case VexArchX86: 295 | vbi->guest_stack_redzone_size = 0; 296 | break; 297 | case VexArchAMD64: 298 | vbi->guest_stack_redzone_size = 128; 299 | break; 300 | case VexArchPPC64: 301 | vbi->guest_stack_redzone_size = 288; 302 | break; 303 | default: 304 | break; 305 | } 306 | } 307 | 308 | VEXLiftResult _lift_r; 309 | 310 | //---------------------------------------------------------------------- 311 | // Main entry point. Do a lift. 312 | //---------------------------------------------------------------------- 313 | VEXLiftResult *vex_lift( 314 | VexArch guest, 315 | VexArchInfo archinfo, 316 | unsigned char *insn_start, 317 | unsigned long long insn_addr, 318 | unsigned int max_insns, 319 | unsigned int max_bytes, 320 | int opt_level, 321 | int traceflags, 322 | int allow_arch_optimizations, 323 | int strict_block_end, 324 | int collect_data_refs, 325 | int load_from_ro_regions, 326 | int const_prop, 327 | VexRegisterUpdates px_control, 328 | unsigned int lookback) { 329 | VexRegisterUpdates pxControl = px_control; 330 | 331 | vex_prepare_vai(guest, &archinfo); 332 | vex_prepare_vbi(guest, &vbi); 333 | 334 | pyvex_debug("Guest arch: %d\n", guest); 335 | pyvex_debug("Guest arch hwcaps: %08x\n", archinfo.hwcaps); 336 | 337 | vta.archinfo_guest = archinfo; 338 | vta.arch_guest = guest; 339 | vta.abiinfo_both = vbi; // Set the vbi value 340 | 341 | vta.guest_bytes = (UChar *)(insn_start); // Ptr to actual bytes of start of instruction 342 | vta.guest_bytes_addr = (Addr64)(insn_addr); 343 | vta.traceflags = traceflags; 344 | 345 | vc.guest_max_bytes = max_bytes; 346 | vc.guest_max_insns = max_insns; 347 | vc.iropt_level = opt_level; 348 | vc.lookback_amount = lookback; 349 | 350 | // Gate all of these on one flag, they depend on the arch 351 | vc.arm_allow_optimizing_lookback = allow_arch_optimizations; 352 | vc.arm64_allow_reordered_writeback = allow_arch_optimizations; 353 | vc.x86_optimize_callpop_idiom = allow_arch_optimizations; 354 | 355 | vc.strict_block_end = strict_block_end; 356 | 357 | clear_log(); 358 | 359 | // Do the actual translation 360 | if (setjmp(jumpout) == 0) { 361 | LibVEX_Update_Control(&vc); 362 | _lift_r.is_noop_block = False; 363 | _lift_r.data_ref_count = 0; 364 | _lift_r.const_val_count = 0; 365 | _lift_r.irsb = LibVEX_Lift(&vta, &vtr, &pxControl); 366 | if (!_lift_r.irsb) { 367 | // Lifting failed 368 | return NULL; 369 | } 370 | remove_noops(_lift_r.irsb); 371 | if (guest == VexArchMIPS32) { 372 | // This post processor may potentially remove statements. 373 | // Call it before we get exit statements and such. 374 | mips32_post_processor_fix_unconditional_exit(_lift_r.irsb); 375 | } 376 | get_exits_and_inst_addrs(_lift_r.irsb, &_lift_r); 377 | get_default_exit_target(_lift_r.irsb, &_lift_r); 378 | if (guest == VexArchARM && _lift_r.insts > 0) { 379 | arm_post_processor_determine_calls(_lift_r.inst_addrs[0], _lift_r.size, _lift_r.insts, _lift_r.irsb); 380 | } 381 | zero_division_side_exits(_lift_r.irsb); 382 | get_is_noop_block(_lift_r.irsb, &_lift_r); 383 | if (collect_data_refs || const_prop) { 384 | execute_irsb(_lift_r.irsb, &_lift_r, guest, (Bool)load_from_ro_regions, (Bool)collect_data_refs, (Bool)const_prop); 385 | } 386 | return &_lift_r; 387 | } else { 388 | return NULL; 389 | } 390 | } 391 | -------------------------------------------------------------------------------- /pyvex_c/pyvex.def: -------------------------------------------------------------------------------- 1 | LIBRARY pyvex.dll 2 | 3 | EXPORTS 4 | IRConst_F32 5 | IRConst_F32i 6 | IRConst_F64 7 | IRConst_F64i 8 | IRConst_U1 9 | IRConst_U16 10 | IRConst_U32 11 | IRConst_U64 12 | IRConst_U8 13 | IRConst_V128 14 | IRConst_V256 15 | IRExpr_Binder 16 | IRExpr_Binop 17 | IRExpr_CCall 18 | IRExpr_Const 19 | IRExpr_GSPTR 20 | IRExpr_Get 21 | IRExpr_GetI 22 | IRExpr_ITE 23 | IRExpr_Load 24 | IRExpr_Qop 25 | IRExpr_RdTmp 26 | IRExpr_Triop 27 | IRExpr_Unop 28 | IRExpr_VECRET 29 | emptyIRSB 30 | emptyIRTypeEnv 31 | log_level 32 | mkIRCallee 33 | mkIRExprVec_0 34 | mkIRExprVec_1 35 | mkIRExprVec_2 36 | mkIRExprVec_3 37 | mkIRExprVec_4 38 | mkIRExprVec_5 39 | mkIRExprVec_6 40 | mkIRExprVec_7 41 | mkIRExprVec_8 42 | mkIRRegArray 43 | msg_buffer 44 | msg_current_size 45 | newIRTemp 46 | typeOfIRExpr 47 | typeOfIRLoadGOp 48 | typeOfPrimop 49 | clear_log 50 | vex_lift 51 | vex_init 52 | register_readonly_region 53 | deregister_all_readonly_regions 54 | register_initial_register_value 55 | reset_initial_register_values 56 | sizeofIRType 57 | -------------------------------------------------------------------------------- /pyvex_c/pyvex.h: -------------------------------------------------------------------------------- 1 | // This code is GPLed by Yan Shoshitaishvili 2 | 3 | #ifndef __VEXIR_H 4 | #define __VEXIR_H 5 | 6 | #include 7 | 8 | // Some info required for translation 9 | extern int log_level; 10 | extern VexTranslateArgs vta; 11 | 12 | extern char *msg_buffer; 13 | extern size_t msg_current_size; 14 | void clear_log(void); 15 | 16 | // 17 | // Initializes VEX. This function must be called before vex_lift 18 | // can be used. 19 | // 20 | int vex_init(void); 21 | 22 | typedef struct _ExitInfo { 23 | Int stmt_idx; 24 | Addr ins_addr; 25 | IRStmt *stmt; 26 | } ExitInfo; 27 | 28 | typedef enum { 29 | Dt_Unknown = 0x9000, 30 | Dt_Integer, 31 | Dt_FP, 32 | Dt_StoreInteger 33 | } DataRefTypes; 34 | 35 | typedef struct _DataRef { 36 | Addr data_addr; 37 | Int size; 38 | DataRefTypes data_type; 39 | Int stmt_idx; 40 | Addr ins_addr; 41 | } DataRef; 42 | 43 | typedef struct _ConstVal { 44 | Int tmp; 45 | Int stmt_idx; 46 | ULong value; // 64-bit max 47 | } ConstVal; 48 | 49 | #define MAX_EXITS 400 50 | #define MAX_DATA_REFS 2000 51 | #define MAX_CONST_VALS 1000 52 | 53 | typedef struct _VEXLiftResult { 54 | IRSB* irsb; 55 | Int size; 56 | Bool is_noop_block; 57 | // Conditional exits 58 | Int exit_count; 59 | ExitInfo exits[MAX_EXITS]; 60 | // The default exit 61 | Int is_default_exit_constant; 62 | Addr default_exit; 63 | // Instruction addresses 64 | Int insts; 65 | Addr inst_addrs[200]; 66 | // Data references 67 | Int data_ref_count; 68 | DataRef data_refs[MAX_DATA_REFS]; 69 | // Constant propagation 70 | Int const_val_count; 71 | ConstVal const_vals[MAX_CONST_VALS]; 72 | } VEXLiftResult; 73 | 74 | VEXLiftResult *vex_lift( 75 | VexArch guest, 76 | VexArchInfo archinfo, 77 | unsigned char *insn_start, 78 | unsigned long long insn_addr, 79 | unsigned int max_insns, 80 | unsigned int max_bytes, 81 | int opt_level, 82 | int traceflags, 83 | int allow_arch_optimizations, 84 | int strict_block_end, 85 | int collect_data_refs, 86 | int load_from_ro_regions, 87 | int const_prop, 88 | VexRegisterUpdates px_control, 89 | unsigned int lookback_amount); 90 | 91 | Bool register_readonly_region(ULong start, ULong size, unsigned char* content); 92 | void deregister_all_readonly_regions(); 93 | Bool register_initial_register_value(UInt offset, UInt size, ULong value); 94 | Bool reset_initial_register_values(); 95 | 96 | #endif 97 | -------------------------------------------------------------------------------- /pyvex_c/pyvex_internal.h: -------------------------------------------------------------------------------- 1 | #include "pyvex.h" 2 | 3 | void arm_post_processor_determine_calls(Addr irsb_addr, Int irsb_size, Int irsb_insts, IRSB *irsb); 4 | void mips32_post_processor_fix_unconditional_exit(IRSB *irsb); 5 | 6 | void remove_noops(IRSB* irsb); 7 | void zero_division_side_exits(IRSB* irsb); 8 | void get_exits_and_inst_addrs(IRSB *irsb, VEXLiftResult *lift_r); 9 | void get_default_exit_target(IRSB *irsb, VEXLiftResult *lift_r); 10 | void get_is_noop_block(IRSB *irsb, VEXLiftResult *lift_r); 11 | void execute_irsb(IRSB *irsb, VEXLiftResult *lift_r, VexArch guest, Bool load_from_ro_regions, Bool collect_data_refs, Bool const_prop); 12 | Addr get_value_from_const_expr(IRConst* con); 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-name-in-module,import-error,missing-class-docstring 2 | import glob 3 | import multiprocessing 4 | import os 5 | import platform 6 | import shutil 7 | import subprocess 8 | import sys 9 | from distutils.command.build import build as st_build 10 | from distutils.util import get_platform 11 | 12 | from setuptools import setup 13 | from setuptools.command.develop import develop as st_develop 14 | from setuptools.command.sdist import sdist as st_sdist 15 | from setuptools.errors import LibError 16 | 17 | PROJECT_DIR = os.path.dirname(os.path.realpath(__file__)) 18 | LIB_DIR = os.path.join(PROJECT_DIR, "pyvex", "lib") 19 | INCLUDE_DIR = os.path.join(PROJECT_DIR, "pyvex", "include") 20 | 21 | 22 | if sys.platform in ("win32", "cygwin"): 23 | LIBRARY_FILE = "pyvex.dll" 24 | STATIC_LIBRARY_FILE = "pyvex.lib" 25 | elif sys.platform == "darwin": 26 | LIBRARY_FILE = "libpyvex.dylib" 27 | STATIC_LIBRARY_FILE = "libpyvex.a" 28 | else: 29 | LIBRARY_FILE = "libpyvex.so" 30 | STATIC_LIBRARY_FILE = "libpyvex.a" 31 | 32 | 33 | VEX_LIB_NAME = "vex" # can also be vex-amd64-linux 34 | VEX_PATH = os.path.abspath(os.path.join(PROJECT_DIR, "vex")) 35 | 36 | 37 | def _build_vex(): 38 | if len(os.listdir(VEX_PATH)) == 0: 39 | raise LibError( 40 | "vex submodule not cloned correctly, aborting.\nThis may be fixed with `git submodule update --init`" 41 | ) 42 | 43 | e = os.environ.copy() 44 | e["MULTIARCH"] = "1" 45 | e["DEBUG"] = "1" 46 | 47 | if sys.platform == "win32": 48 | cmd = ["nmake", "/f", "Makefile-msvc", "all"] 49 | elif shutil.which("gmake") is not None: 50 | cmd = ["gmake", "-f", "Makefile-gcc", "-j", str(multiprocessing.cpu_count()), "all"] 51 | else: 52 | cmd = ["make", "-f", "Makefile-gcc", "-j", str(multiprocessing.cpu_count()), "all"] 53 | 54 | try: 55 | subprocess.run(cmd, cwd=VEX_PATH, env=e, check=True) 56 | except FileNotFoundError as err: 57 | raise LibError("Couldn't find " + cmd[0] + " in PATH") from err 58 | except subprocess.CalledProcessError as err: 59 | raise LibError("Error while building libvex: " + str(err)) from err 60 | 61 | 62 | def _build_pyvex(): 63 | e = os.environ.copy() 64 | e["VEX_LIB_PATH"] = VEX_PATH 65 | e["VEX_INCLUDE_PATH"] = os.path.join(VEX_PATH, "pub") 66 | e["VEX_LIB_FILE"] = os.path.join(VEX_PATH, "libvex.lib") 67 | 68 | if sys.platform == "win32": 69 | cmd = ["nmake", "/f", "Makefile-msvc"] 70 | elif shutil.which("gmake") is not None: 71 | cmd = ["gmake", "-f", "Makefile", "-j", str(multiprocessing.cpu_count())] 72 | else: 73 | cmd = ["make", "-f", "Makefile", "-j", str(multiprocessing.cpu_count())] 74 | 75 | try: 76 | subprocess.run(cmd, cwd="pyvex_c", env=e, check=True) 77 | except FileNotFoundError as err: 78 | raise LibError("Couldn't find " + cmd[0] + " in PATH") from err 79 | except subprocess.CalledProcessError as err: 80 | raise LibError("Error while building libpyvex: " + str(err)) from err 81 | 82 | 83 | def _shuffle_files(): 84 | shutil.rmtree(LIB_DIR, ignore_errors=True) 85 | shutil.rmtree(INCLUDE_DIR, ignore_errors=True) 86 | os.mkdir(LIB_DIR) 87 | os.mkdir(INCLUDE_DIR) 88 | 89 | pyvex_c_dir = os.path.join(PROJECT_DIR, "pyvex_c") 90 | 91 | shutil.copy(os.path.join(pyvex_c_dir, LIBRARY_FILE), LIB_DIR) 92 | shutil.copy(os.path.join(pyvex_c_dir, STATIC_LIBRARY_FILE), LIB_DIR) 93 | shutil.copy(os.path.join(pyvex_c_dir, "pyvex.h"), INCLUDE_DIR) 94 | for f in glob.glob(os.path.join(VEX_PATH, "pub", "*")): 95 | shutil.copy(f, INCLUDE_DIR) 96 | 97 | 98 | def _clean_bins(): 99 | shutil.rmtree(LIB_DIR, ignore_errors=True) 100 | shutil.rmtree(INCLUDE_DIR, ignore_errors=True) 101 | 102 | 103 | def _build_ffi(): 104 | sys.path.append(".") # PEP 517 doesn't include . in sys.path 105 | import make_ffi # pylint: disable=import-outside-toplevel 106 | 107 | sys.path.pop() 108 | 109 | make_ffi.doit(os.path.join(VEX_PATH, "pub")) 110 | 111 | 112 | class build(st_build): 113 | def run(self, *args): 114 | self.execute(_build_vex, (), msg="Building libVEX") 115 | self.execute(_build_pyvex, (), msg="Building libpyvex") 116 | self.execute(_shuffle_files, (), msg="Copying libraries and headers") 117 | self.execute(_build_ffi, (), msg="Creating CFFI defs file") 118 | super().run(*args) 119 | 120 | 121 | class develop(st_develop): 122 | def run(self): 123 | self.run_command("build") 124 | super().run() 125 | 126 | 127 | class sdist(st_sdist): 128 | def run(self, *args): 129 | self.execute(_clean_bins, (), msg="Removing binaries") 130 | super().run(*args) 131 | 132 | 133 | cmdclass = { 134 | "build": build, 135 | "develop": develop, 136 | "sdist": sdist, 137 | } 138 | 139 | try: 140 | from setuptools.command.editable_wheel import editable_wheel as st_editable_wheel 141 | 142 | class editable_wheel(st_editable_wheel): 143 | def run(self): 144 | self.run_command("build") 145 | super().run() 146 | 147 | cmdclass["editable_wheel"] = editable_wheel 148 | except ModuleNotFoundError: 149 | pass 150 | 151 | if "bdist_wheel" in sys.argv and "--plat-name" not in sys.argv: 152 | sys.argv.append("--plat-name") 153 | name = get_platform() 154 | if "linux" in name: 155 | sys.argv.append("manylinux2014_" + platform.machine()) 156 | else: 157 | # https://www.python.org/dev/peps/pep-0425/ 158 | sys.argv.append(name.replace(".", "_").replace("-", "_")) 159 | 160 | setup(cmdclass=cmdclass) 161 | -------------------------------------------------------------------------------- /tests/test_arm_postprocess.py: -------------------------------------------------------------------------------- 1 | import pyvex 2 | 3 | 4 | ########################## 5 | ### ARM Postprocessing ### 6 | ########################## 7 | def test_arm_postprocess_call(): 8 | for i in range(3): 9 | # Thumb 10 | 11 | # push {r7} 12 | # add r7, sp, #0 13 | # mov.w r1, #6 14 | # mov r0, pc 15 | # add.w lr, r0, r1 16 | # b.w 10408 17 | irsb = pyvex.IRSB( 18 | data=(b"\x80\xb4" b"\x00\xaf" b"\x4f\xf0\x06\x01" b"\x78\x46" b"\x00\xeb\x01\x0e" b"\xff\xf7\xec\xbf"), 19 | mem_addr=0x1041F, 20 | arch=pyvex.ARCH_ARM_LE, 21 | num_inst=6, 22 | bytes_offset=1, 23 | opt_level=i, 24 | ) 25 | assert irsb.jumpkind == "Ijk_Call" 26 | 27 | # mov lr, pc 28 | # b.w 10408 29 | irsb = pyvex.IRSB( 30 | data=(b"\xfe\x46" b"\xe9\xe7"), 31 | mem_addr=0x10431, 32 | arch=pyvex.ARCH_ARM_LE, 33 | num_inst=2, 34 | bytes_offset=1, 35 | opt_level=i, 36 | ) 37 | assert irsb.jumpkind == "Ijk_Call" 38 | 39 | # add r2, pc, #0 40 | # add.w lr, r2, #4 41 | # ldr.w pc, [pc, #52] 42 | irsb = pyvex.IRSB( 43 | data=(b"\x00\xa2" b"\x02\xf1\x06\x0e" b"\xdf\xf8\x34\xf0"), 44 | mem_addr=0x10435, 45 | arch=pyvex.ARCH_ARM_LE, 46 | num_inst=3, 47 | bytes_offset=1, 48 | opt_level=i, 49 | ) 50 | assert irsb.jumpkind == "Ijk_Call" 51 | 52 | # ldr r0, [pc, #48] 53 | # mov r1, pc 54 | # add.w r2, r1, #4 55 | # add.w r3, r2, #4 56 | # add.w r4, r3, #4 57 | # add.w lr, r4, #4 58 | # mov pc, r0 59 | irsb = pyvex.IRSB( 60 | data=( 61 | b"\x0c\x48" 62 | b"\x79\x46" 63 | b"\x01\xf1\x04\x02" 64 | b"\x02\xf1\x04\x03" 65 | b"\x03\xf1\x04\x04" 66 | b"\x04\xf1\x04\x0e" 67 | b"\x87\x46" 68 | ), 69 | mem_addr=0x1043F, 70 | arch=pyvex.ARCH_ARM_LE, 71 | num_inst=7, 72 | bytes_offset=1, 73 | opt_level=i, 74 | ) 75 | assert irsb.jumpkind == "Ijk_Call" 76 | 77 | # eor.w r0, r0, r0 78 | # mov lr, pc 79 | # b.n 10460 80 | irsb = pyvex.IRSB( 81 | data=(b"\x80\xea\x00\x00" b"\x86\x46" b"\x01\xe0"), 82 | mem_addr=0x10455, 83 | arch=pyvex.ARCH_ARM_LE, 84 | num_inst=3, 85 | bytes_offset=1, 86 | opt_level=i, 87 | ) 88 | assert irsb.jumpkind == "Ijk_Boring" 89 | 90 | # Thumb compiled with optimizations (gcc -O2) 91 | 92 | # mov.w r1, #6 93 | # mov r0, pc 94 | # add.w lr, r0, r1 95 | # b.w 104bc 96 | irsb = pyvex.IRSB( 97 | data=(b"\x4f\xf0\x06\x01" b"\x78\x46" b"\x00\xeb\x01\x0e" b"\x00\xf0\xc5\xb8"), 98 | mem_addr=0x10325, 99 | arch=pyvex.ARCH_ARM_LE, 100 | num_inst=4, 101 | bytes_offset=1, 102 | opt_level=i, 103 | ) 104 | assert irsb.jumpkind == "Ijk_Call" 105 | 106 | # ldr r0, [pc, #56] 107 | # mov r1, pc 108 | # add.w r2, r1, #4 109 | # add.w r3, r2, #4 110 | # add.w r4, r3, #4 111 | # add.w lr, r4, #4 112 | # mov pc, r0 113 | irsb = pyvex.IRSB( 114 | data=( 115 | b"\x0e\x48" 116 | b"\x79\x46" 117 | b"\x01\xf1\x04\x02" 118 | b"\x02\xf1\x04\x03" 119 | b"\x03\xf1\x04\x04" 120 | b"\x04\xf1\x04\x0e" 121 | b"\x87\x46" 122 | ), 123 | mem_addr=0x10333, 124 | arch=pyvex.ARCH_ARM_LE, 125 | num_inst=7, 126 | bytes_offset=1, 127 | opt_level=i, 128 | ) 129 | assert irsb.jumpkind == "Ijk_Call" 130 | 131 | # add r2, pc, #0 132 | # add.w lr, r2, #6 133 | # ldr.w pc, [pc, #28] 134 | irsb = pyvex.IRSB( 135 | data=(b"\x00\xa2" b"\x02\xf1\x06\x0e" b"\xdf\xf8\x1c\xf0"), 136 | mem_addr=0x10349, 137 | arch=pyvex.ARCH_ARM_LE, 138 | num_inst=3, 139 | bytes_offset=1, 140 | opt_level=i, 141 | ) 142 | assert irsb.jumpkind == "Ijk_Call" 143 | 144 | # mov lr, pc 145 | # b.w 104bc 146 | irsb = pyvex.IRSB( 147 | data=(b"\xfe\x46" b"\xb2\xe0"), 148 | mem_addr=0x10353, 149 | arch=pyvex.ARCH_ARM_LE, 150 | num_inst=2, 151 | bytes_offset=1, 152 | opt_level=i, 153 | ) 154 | assert irsb.jumpkind == "Ijk_Call" 155 | 156 | # eor.w r0, r0, r0 157 | # mov lr, pc 158 | # b.n 10362 159 | irsb = pyvex.IRSB( 160 | data=(b"\x80\xea\x00\x00" b"\x86\x46" b"\x01\xe0"), 161 | mem_addr=0x10357, 162 | arch=pyvex.ARCH_ARM_LE, 163 | num_inst=3, 164 | bytes_offset=1, 165 | opt_level=i, 166 | ) 167 | assert irsb.jumpkind == "Ijk_Boring" 168 | 169 | # ARM compiled with optimizations (gcc -O2) 170 | 171 | # mov r1, #4 172 | # mov r0, pc 173 | # add lr, r0, r1 174 | # ldr pc, [pc, #56] 175 | irsb = pyvex.IRSB( 176 | data=(b"\x04\x10\xa0\xe3" b"\x0f\x00\xa0\xe1" b"\x01\xe0\x80\xe0" b"\x38\xf0\x9f\xe5"), 177 | mem_addr=0x10298, 178 | arch=pyvex.ARCH_ARM_LE, 179 | num_inst=4, 180 | opt_level=i, 181 | ) 182 | assert irsb.jumpkind == "Ijk_Call" 183 | 184 | # add r1, pc, #0 185 | # add r2, r1, #4 186 | # add r3, r2, #4 187 | # add r4, r3, #4 188 | # add lr, r4, #4 189 | # b 10414 190 | irsb = pyvex.IRSB( 191 | data=( 192 | b"\x00\x10\x8f\xe2" 193 | b"\x04\x20\x81\xe2" 194 | b"\x04\x30\x82\xe2" 195 | b"\x04\x40\x83\xe2" 196 | b"\x04\xe0\x84\xe2" 197 | b"\x54\x00\x00\xea" 198 | ), 199 | mem_addr=0x102A8, 200 | arch=pyvex.ARCH_ARM_LE, 201 | num_inst=6, 202 | opt_level=i, 203 | ) 204 | assert irsb.jumpkind == "Ijk_Call" 205 | 206 | # mov lr, pc 207 | # b 10414 208 | irsb = pyvex.IRSB( 209 | data=(b"\x0f\xe0\xa0\xe1" b"\x52\x00\x00\xea"), 210 | mem_addr=0x102C0, 211 | arch=pyvex.ARCH_ARM_LE, 212 | num_inst=2, 213 | opt_level=i, 214 | ) 215 | assert irsb.jumpkind == "Ijk_Call" 216 | 217 | # eor r0, r0, r0 218 | # mov lr, r0 219 | # b 102d8 220 | irsb = pyvex.IRSB( 221 | data=(b"\x00\x00\x20\xe0" b"\x00\xe0\xa0\xe1" b"\x00\x00\x00\xea"), 222 | mem_addr=0x102C8, 223 | arch=pyvex.ARCH_ARM_LE, 224 | num_inst=3, 225 | opt_level=i, 226 | ) 227 | assert irsb.jumpkind == "Ijk_Boring" 228 | 229 | # ARM 230 | 231 | # push {fp} 232 | # add fp, sp, #0 233 | # mov r1, #4 234 | # mov r0, pc 235 | # add lr, r0, r1 236 | # ldr pc, [pc, #68] 237 | irsb = pyvex.IRSB( 238 | data=( 239 | b"\x04\xb0\x2d\xe5" 240 | b"\x00\xb0\x8d\xe2" 241 | b"\x04\x10\xa0\xe3" 242 | b"\x0f\x00\xa0\xe1" 243 | b"\x01\xe0\x80\xe0" 244 | b"\x44\xf0\x9f\xe5" 245 | ), 246 | mem_addr=0x103E8, 247 | arch=pyvex.ARCH_ARM_LE, 248 | num_inst=6, 249 | opt_level=i, 250 | ) 251 | assert irsb.jumpkind == "Ijk_Call" 252 | 253 | # add r1, pc, #0 254 | # add r2, r1, #4 255 | # add r3, r2, #4 256 | # add r4, r3, #4 257 | # add lr, r4, #4 258 | # b 103c4 259 | irsb = pyvex.IRSB( 260 | data=( 261 | b"\x00\x10\x8f\xe2" 262 | b"\x04\x20\x81\xe2" 263 | b"\x04\x30\x82\xe2" 264 | b"\x04\x40\x83\xe2" 265 | b"\x04\xe0\x84\xe2" 266 | b"\x54\xff\xff\xea" 267 | ), 268 | mem_addr=0x10400, 269 | arch=pyvex.ARCH_ARM_LE, 270 | num_inst=6, 271 | opt_level=i, 272 | ) 273 | assert irsb.jumpkind == "Ijk_Call" 274 | 275 | # mov lr, pc 276 | # b 103c4 277 | irsb = pyvex.IRSB( 278 | data=(b"\x0f\xe0\xa0\xe1" b"\xe8\xff\xff\xea"), 279 | mem_addr=0x10418, 280 | arch=pyvex.ARCH_ARM_LE, 281 | num_inst=2, 282 | opt_level=i, 283 | ) 284 | assert irsb.jumpkind == "Ijk_Call" 285 | 286 | # eor r0, r0, r0 287 | # mov lr, r0 288 | # b 10430 289 | irsb = pyvex.IRSB( 290 | data=(b"\x00\x00\x20\xe0" b"\x00\xe0\xa0\xe1" b"\x00\x00\x00\xea"), 291 | mem_addr=0x10420, 292 | arch=pyvex.ARCH_ARM_LE, 293 | num_inst=3, 294 | opt_level=i, 295 | ) 296 | assert irsb.jumpkind == "Ijk_Boring" 297 | 298 | # From a "real thing" compiled with armc 299 | # ARM: 300 | # 301 | irsb = pyvex.IRSB( 302 | data=( 303 | b"H\x10\x9b\xe5" 304 | b"\x0b\x00\xa0\xe1" 305 | b"\x04 \x91\xe5" 306 | b"\x04\xe0\x8f\xe2" 307 | b"\x01\x10\x82\xe0" 308 | b"\x01\xf0\xa0\xe1" 309 | ), 310 | mem_addr=0x264B4C, 311 | arch=pyvex.ARCH_ARM_LE, 312 | num_inst=6, 313 | opt_level=i, 314 | ) 315 | assert irsb.jumpkind == "Ijk_Call" 316 | 317 | # 400000 str lr, [sp,#-0x4]! 318 | # 400004 mov r1, #0xa 319 | # 400008 cmp r0, r1 320 | # 40000c blne #FunctionB 321 | irsb = pyvex.IRSB( 322 | data=bytes.fromhex("04e02de50a10a0e3010050e10100001b"), 323 | mem_addr=0x400000, 324 | arch=pyvex.ARCH_ARM_LE, 325 | num_inst=4, 326 | opt_level=i, 327 | ) 328 | assert len(irsb.exit_statements) == 1 329 | assert irsb.exit_statements[0][2].jumpkind == "Ijk_Call" 330 | assert irsb.jumpkind == "Ijk_Boring" 331 | 332 | 333 | def test_arm_postprocess_ret(): 334 | for i in range(3): 335 | # e91ba8f0 336 | # ldmdb R11, {R4,R11,SP,PC} 337 | irsb = pyvex.IRSB( 338 | data=b"\xe9\x1b\xa8\xf0", 339 | mem_addr=0xED4028, 340 | arch=pyvex.ARCH_ARM_BE_LE, 341 | num_inst=1, 342 | opt_level=i, 343 | ) 344 | assert irsb.jumpkind == "Ijk_Ret" 345 | 346 | # e91badf0 347 | # ldmdb R11, {R4-R8,R10,R11,SP,PC} 348 | irsb = pyvex.IRSB( 349 | data=b"\xe9\x1b\xa8\xf0", 350 | mem_addr=0x4D4028, 351 | arch=pyvex.ARCH_ARM_BE_LE, 352 | num_inst=1, 353 | opt_level=i, 354 | ) 355 | assert irsb.jumpkind == "Ijk_Ret" 356 | 357 | # 00a89de8 358 | # ldmfd SP, {R11,SP,PC} 359 | # Fixed by Fish in the VEX fork, commit 43c78f608490f9a5c71c7fca87c04759c1b93741 360 | irsb = pyvex.IRSB( 361 | data=b"\x00\xa8\x9d\xe8", 362 | mem_addr=0xC800B57C, 363 | arch=pyvex.ARCH_ARM_BE, 364 | num_inst=1, 365 | opt_level=1, 366 | ) 367 | assert irsb.jumpkind == "Ijk_Ret" 368 | 369 | 370 | if __name__ == "__main__": 371 | test_arm_postprocess_call() 372 | test_arm_postprocess_ret() 373 | -------------------------------------------------------------------------------- /tests/test_gym.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-class-docstring 2 | import unittest 3 | 4 | import pyvex 5 | 6 | 7 | class Tests(unittest.TestCase): 8 | def test_x86_aam(self): 9 | irsb = pyvex.lift(b"\xd4\x0b", 0, pyvex.ARCH_X86) 10 | self.assertEqual(irsb.jumpkind, "Ijk_Boring") 11 | self.assertEqual(irsb.size, 2) 12 | 13 | def test_x86_aad(self): 14 | irsb = pyvex.lift(b"\xd5\x0b", 0, pyvex.ARCH_X86) 15 | self.assertEqual(irsb.jumpkind, "Ijk_Boring") 16 | self.assertEqual(irsb.size, 2) 17 | 18 | def test_x86_xgetbv(self): 19 | irsb = pyvex.lift(b"\x0f\x01\xd0", 0, pyvex.ARCH_X86) 20 | self.assertEqual(irsb.jumpkind, "Ijk_Boring") 21 | self.assertEqual(irsb.size, 3) 22 | 23 | def test_x86_rdmsr(self): 24 | irsb = pyvex.lift(b"\x0f\x32", 0, pyvex.ARCH_X86) 25 | self.assertEqual(irsb.jumpkind, "Ijk_Boring") 26 | self.assertEqual(irsb.size, 2) 27 | 28 | 29 | if __name__ == "__main__": 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /tests/test_irsb_property_caching.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-class-docstring,no-self-use 2 | import unittest 3 | 4 | import pyvex 5 | 6 | 7 | class TestCacheInvalidationOnExtend(unittest.TestCase): 8 | def test_cache_invalidation_on_extend(self): 9 | b = pyvex.block.IRSB(b"\x50", 0, pyvex.ARCH_X86) 10 | assert b.size == 1 11 | assert b.instructions == 1 12 | toappend = pyvex.block.IRSB(b"\x51", 0, pyvex.ARCH_X86) 13 | toappend.jumpkind = "Ijk_Invalid" 14 | toappend._direct_next = None # Invalidate the cache because I manually changed the jumpkind 15 | assert not toappend.direct_next 16 | b.extend(toappend) 17 | assert b.size == 2 18 | assert b.instructions == 2 19 | assert not b.direct_next 20 | 21 | 22 | if __name__ == "__main__": 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /tests/test_lift.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pyvex 4 | from pyvex import IRSB, ffi, lift 5 | from pyvex.errors import PyVEXError 6 | from pyvex.lifting.util import GymratLifter, Instruction, JumpKind 7 | 8 | 9 | # pylint: disable=R0201 10 | # pylint: disable=C0115 11 | class TestLift(unittest.TestCase): 12 | def test_partial_lift(self): 13 | """This tests that gymrat correctly handles the case where an 14 | instruction is longer than the remaining input. 15 | """ 16 | 17 | class NOP(Instruction): 18 | name = "nop" 19 | bin_format = "0000111100001111" 20 | 21 | def compute_result(self, *args): 22 | pass 23 | 24 | class NOPLifter(GymratLifter): 25 | instrs = [NOP] 26 | 27 | lifter = NOPLifter(pyvex.ARCH_AMD64, 0) 28 | # this should not throw an exception 29 | block = lifter.lift("\x0f\x0fa") 30 | assert block.size == 2 31 | assert block.instructions == 1 32 | assert block.jumpkind == JumpKind.NoDecode 33 | 34 | def test_skipstmts_toomanyexits(self): 35 | # https://github.com/angr/pyvex/issues/153 36 | 37 | old_exit_limit = IRSB.MAX_EXITS 38 | IRSB.MAX_EXITS = 32 39 | 40 | bytes_ = bytes.fromhex( 41 | "0DF1B00B2EAB94E8030008938BE803000DF1C0089AE8030083E" 42 | "80300019B0DF1F00A339AE669E26193E8030085E8030098E803" 43 | "0083E80300069B95E8030088E80300A26993E803004A9200236" 44 | "3622362A361E362A36238AC029A069484E8030012AC09982993" 45 | "28932B9303C885E8030092E8030084E803009AE8030082E8030" 46 | "02A460A9D26993E910B9941910D9942910C992A93409548AD43" 47 | "9194E803008AE8030027983F9927913F909BE803000DF5887B2" 48 | "69335938BE803000DF58C7B089903C98BE8030098E8030084E8" 49 | "030095E8030088E803004B993391329394E8030034933793369" 50 | "3069C059B4C93049B4E9350ABCDF834C1CDF83CE185E8030094" 51 | "E803004B9683E8030015A94498C4F7E2EA " 52 | ) 53 | arch = pyvex.ARCH_ARM_LE 54 | # Lifting the first four bytes will not cause any problem. Statements should be skipped as expected 55 | b = IRSB(bytes_[:34], 0xC6951, arch, opt_level=1, bytes_offset=5, skip_stmts=True) 56 | assert len(b.exit_statements) > 0 57 | assert not b.has_statements 58 | 59 | # Lifting the entire block will cause the number of exit statements go 60 | # beyond the limit (currently 32). PyVEX will 61 | # automatically relift this block without skipping the statements 62 | b = IRSB(bytes_, 0xC6951, arch, opt_level=1, bytes_offset=5, skip_stmts=True) 63 | assert b.statements is not None 64 | assert len(b.exit_statements) > 32 65 | 66 | # Restore the setting 67 | IRSB.MAX_EXITS = old_exit_limit 68 | 69 | def test_max_bytes(self): 70 | data = bytes.fromhex("909090909090c3") 71 | arch = pyvex.ARCH_X86 72 | assert lift(data, 0x1000, arch, max_bytes=None).size == len(data) 73 | assert lift(data, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1 74 | assert lift(data, 0x1000, arch, max_bytes=len(data) + 1).size == len(data) 75 | 76 | data2 = ffi.from_buffer(data) 77 | self.assertRaises(PyVEXError, lift, data2, 0x1000, arch) 78 | assert lift(data2, 0x1000, arch, max_bytes=len(data)).size == len(data) 79 | assert lift(data2, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1 80 | 81 | 82 | if __name__ == "__main__": 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /tests/test_mips32_postprocess.py: -------------------------------------------------------------------------------- 1 | import pyvex 2 | 3 | 4 | def test_mips32_unconditional_jumps(): 5 | # 0040000c: 10000002 ; beq $zero, $zero, LABEL_ELSE_IF 6 | # 00400010: 00000000 ; sll $zero, $zero, 0 7 | # 00400014: 08100012 ; j LABEL_DONE 8 | # 00400018: ; LABEL_ELSE_IF: 9 | irsb = pyvex.IRSB( 10 | data=(b"\x10\x00\x00\x02" b"\x00\x00\x00\x00"), 11 | mem_addr=0x40000C, 12 | arch=pyvex.ARCH_MIPS32_BE, 13 | num_inst=2, 14 | opt_level=0, 15 | ) 16 | assert type(irsb.next) is pyvex.expr.Const 17 | assert irsb.next.con.value == 0x400018 18 | 19 | 20 | if __name__ == "__main__": 21 | test_mips32_unconditional_jumps() 22 | -------------------------------------------------------------------------------- /tests/test_s390x_exrl.py: -------------------------------------------------------------------------------- 1 | import pyvex 2 | 3 | 4 | def test_s390x_exrl(): 5 | arch = pyvex.ARCH_S390X 6 | irsb = pyvex.lift( 7 | b"\xc6\x10\x00\x00\x00\x04" # exrl %r1,0x400408 8 | b"\x07\xfe" # br %r14 9 | b"\xd7\x00\x20\x00\x30\x00" # xc 0(0,%r2),0(%r3) 10 | b"\x7d\xa7", # padding 11 | 0x400400, 12 | arch, 13 | ) 14 | irsb_str = str(irsb) 15 | 16 | # check last_execute_target, only top 6 bytes are relevant 17 | assert "0xd700200030000000" in irsb_str 18 | assert "s390x_dirtyhelper_EX" in irsb_str 19 | assert "{ PUT(ia) = 0x400400; Ijk_Boring }" in irsb_str 20 | assert "------ IMark(0x400406, 2, 0) ------" in irsb_str 21 | assert irsb.jumpkind == "Ijk_Ret" 22 | 23 | 24 | if __name__ == "__main__": 25 | test_s390x_exrl() 26 | -------------------------------------------------------------------------------- /tests/test_s390x_lochi.py: -------------------------------------------------------------------------------- 1 | import pyvex 2 | 3 | 4 | def test_s390x_lochi(): 5 | arch = pyvex.ARCH_S390X 6 | irsb = pyvex.lift(b"\xec\x18\xab\xcd\x00\x42", 0x400400, arch) # lochi %r1,0xabcd,8 7 | irsb_str = str(irsb) 8 | 9 | assert "s390_calculate_cond(0x0000000000000008" in irsb_str 10 | assert "PUT(r1_32) = 0xffffabcd" in irsb_str 11 | assert irsb.jumpkind in "Ijk_Boring" 12 | 13 | 14 | if __name__ == "__main__": 15 | test_s390x_lochi() 16 | -------------------------------------------------------------------------------- /tests/test_s390x_vl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import pyvex 3 | 4 | 5 | def test_s390x_vl(): 6 | arch = pyvex.ARCH_S390X 7 | irsb = pyvex.lift(b"\xe7\x40\x90\xa8\x00\x06", 0x11C6C9E, arch) # vl %v4, 0xa8(%r9) 8 | irsb_str = str(irsb) 9 | 10 | assert "GET:I64(r9)" in irsb_str 11 | assert "Add64(0x00000000000000a8" in irsb_str 12 | assert "LDbe:V128" in irsb_str 13 | assert "PUT(v4) =" in irsb_str 14 | assert irsb.jumpkind == "Ijk_Boring" 15 | 16 | 17 | if __name__ == "__main__": 18 | test_s390x_vl() 19 | -------------------------------------------------------------------------------- /tests/test_spotter.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pyvex 4 | import pyvex.lifting 5 | from pyvex.lifting import register 6 | from pyvex.lifting.util import GymratLifter, Instruction, Type 7 | 8 | test_location = str(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../binaries/tests")) 9 | 10 | 11 | class Instruction_IMAGINARY(Instruction): 12 | bin_format = bin(0x0F0B)[2:].zfill(16) 13 | name = "IMAGINARY" 14 | 15 | def compute_result(self): 16 | a = self.constant(10, Type.int_27) 17 | b = self.constant(20, Type.int_27) 18 | a + b 19 | 20 | 21 | class ImaginarySpotter(GymratLifter): 22 | instrs = [Instruction_IMAGINARY] 23 | 24 | 25 | register(ImaginarySpotter, "X86") 26 | 27 | basic_goal = """ 28 | IRSB { 29 | t0:Ity_I27 30 | 31 | 00 | ------ IMark(0x1, 2, 0) ------ 32 | 01 | t0 = Add27((0xa :: Ity_I27),(0x14 :: Ity_I27)) 33 | NEXT: PUT(eip) = 0x00000003; Ijk_Boring 34 | } 35 | """ 36 | 37 | 38 | def test_basic(): 39 | b = pyvex.block.IRSB(b"\x0f\x0b", 1, pyvex.ARCH_X86) 40 | assert str(b).strip() == basic_goal.strip() 41 | 42 | 43 | def test_embedded(): 44 | b = pyvex.block.IRSB(b"\x50" * 3 + b"\x0f\x0b" + b"\x50" * 6, 1, pyvex.ARCH_X86) 45 | for i, stmt in enumerate(b.statements): 46 | if type(stmt) is pyvex.stmt.IMark and stmt.addr == 0x4 and stmt.len == 2 and stmt.delta == 0: 47 | imaginary_trans_stmt = b.statements[i + 1] 48 | assert type(imaginary_trans_stmt) is pyvex.stmt.WrTmp 49 | addexpr = imaginary_trans_stmt.data 50 | assert type(addexpr) is pyvex.expr.Binop 51 | assert addexpr.op == "Iop_Add27" 52 | arg1, arg2 = addexpr.args 53 | assert type(arg1) is pyvex.expr.Const 54 | assert arg1.con.value == 10 55 | assert type(arg2) is pyvex.expr.Const 56 | assert arg2.con.value == 20 57 | return 58 | assert False, "Could not find matching IMark" 59 | 60 | 61 | class Instruction_MSR(Instruction): 62 | bin_format = bin(0x8808F380)[2:].zfill(32) 63 | name = "MSR.W" 64 | 65 | def compute_result(self): 66 | a = self.constant(10, Type.int_27) 67 | b = self.constant(20, Type.int_27) 68 | a + b 69 | 70 | 71 | class Instruction_CPSIEI(Instruction): 72 | bin_format = bin(0xB662)[2:].zfill(16) 73 | name = "CPSIE I" 74 | 75 | def compute_result(self): 76 | a = self.constant(10, Type.int_27) 77 | b = self.constant(20, Type.int_27) 78 | a + b 79 | 80 | 81 | class Instruction_CPSIEF(Instruction): 82 | bin_format = bin(0xB661)[2:].zfill(16) 83 | name = "CPSIE F" 84 | 85 | def compute_result(self): 86 | a = self.constant(10, Type.int_27) 87 | b = self.constant(20, Type.int_27) 88 | a + b 89 | 90 | 91 | class CortexSpotter(GymratLifter): 92 | instrs = [Instruction_MSR, Instruction_CPSIEI, Instruction_CPSIEF] 93 | 94 | 95 | register(CortexSpotter, "ARMEL") 96 | 97 | 98 | def test_tmrs(): 99 | arch = pyvex.ARCH_ARM_LE 100 | ins = b"\xef\xf3\x08\x82" 101 | b = pyvex.block.IRSB(ins, 1, arch) 102 | assert b.jumpkind == "Ijk_Boring" 103 | assert isinstance(b.statements[1].data, pyvex.expr.Get) 104 | assert arch.translate_register_name(b.statements[1].data.offset) in ["sp", "r13"] 105 | assert isinstance(b.statements[2], pyvex.stmt.Put) 106 | 107 | 108 | def test_tmsr(): 109 | arch = pyvex.ARCH_ARM_LE 110 | inss = b"\x82\xf3\x08\x88" 111 | b = pyvex.block.IRSB(inss, 1, arch, opt_level=3) 112 | assert b.jumpkind == "Ijk_Boring" 113 | assert isinstance(b.statements[1].data, pyvex.expr.Get) 114 | assert arch.translate_register_name(b.statements[1].data.offset) == "r2" 115 | assert isinstance(b.statements[2], pyvex.stmt.Put) 116 | 117 | 118 | if __name__ == "__main__": 119 | test_basic() 120 | test_embedded() 121 | test_tmrs() 122 | test_tmsr() 123 | -------------------------------------------------------------------------------- /tests/test_ud2.py: -------------------------------------------------------------------------------- 1 | import pyvex 2 | 3 | 4 | def test_ud2(): 5 | # On x86 and amd64, ud2 is a valid 2-byte instruction that means "undefined instruction". Upon decoding a basic 6 | # block that ends with ud2, we should treat it as an explicit NoDecode, instead of skipping the instruction and 7 | # resume lifting. 8 | 9 | b = pyvex.block.IRSB(b"\x90\x90\x0f\x0b\x90\x90", 0x20, pyvex.ARCH_AMD64) 10 | assert b.jumpkind == "Ijk_NoDecode" 11 | assert b.next.con.value == 0x22 12 | assert b.size == 4 13 | 14 | 15 | if __name__ == "__main__": 16 | test_ud2() 17 | --------------------------------------------------------------------------------