├── .git-blame-ignore-revs
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   ├── feature-request.yml
    │   └── question.yml
    └── workflows
    │   ├── ci.yml
    │   ├── cifuzz.yml
    │   ├── custom.yml
    │   ├── macos.yml
    │   ├── nightly-ci.yml
    │   └── windows.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    └── quickstart.rst
├── fuzzing
    ├── build.sh
    ├── enhanced_fdp.py
    └── irsb_fuzzer.py
├── make_ffi.py
├── pyproject.toml
├── pyvex
    ├── __init__.py
    ├── _register_info.py
    ├── arches.py
    ├── block.py
    ├── const.py
    ├── const_val.py
    ├── data_ref.py
    ├── enums.py
    ├── errors.py
    ├── expr.py
    ├── lifting
    │   ├── __init__.py
    │   ├── gym
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── aarch64_spotter.py
    │   │   ├── arm_spotter.py
    │   │   └── x86_spotter.py
    │   ├── libvex.py
    │   ├── lift_function.py
    │   ├── lifter.py
    │   ├── post_processor.py
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── instr_helper.py
    │   │   ├── lifter_helper.py
    │   │   ├── syntax_wrapper.py
    │   │   └── vex_helper.py
    │   └── zerodivision.py
    ├── native.py
    ├── py.typed
    ├── stmt.py
    ├── types.py
    └── utils.py
├── pyvex_c
    ├── LICENSE
    ├── Makefile
    ├── Makefile-msvc
    ├── README
    ├── analysis.c
    ├── e4c_lite.h
    ├── logging.c
    ├── logging.h
    ├── postprocess.c
    ├── pyvex.c
    ├── pyvex.def
    ├── pyvex.h
    └── pyvex_internal.h
├── setup.py
└── tests
    ├── test_arm_postprocess.py
    ├── test_gym.py
    ├── test_irsb_property_caching.py
    ├── test_lift.py
    ├── test_mips32_postprocess.py
    ├── test_pyvex.py
    ├── test_s390x_exrl.py
    ├── test_s390x_lochi.py
    ├── test_s390x_vl.py
    ├── test_spotter.py
    └── test_ud2.py


/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Black + pre-commit
2 | 23503e79193a3cff5d6f1c92f22349fd2227d936 # Black
3 | cd758543f17a2253b5a0630327eac0ad6780217a # Trailing whitespace, pyupgrade, prefer builtin constructors
4 | dfd137fc8d3073ff065347401f528c1eaf62c383 # ruff
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: Report a bug
 2 | description: Report a bug in pyvex
 3 | labels: [bug,needs-triage]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Thank you for taking the time to submit this bug report!
 9 | 
10 |         Before submitting this bug report, please check the following, which may resolve your issue:
11 |         * Have you checked that you are running the latest versions of angr and its components? angr is rapidly-evolving!
12 |         * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aopen+is%3Aissue+label%3Abug) to see if this bug has been reported before?
13 |         * Have you checked the [documentation](https://docs.angr.io/)?
14 |         * Have you checked the [FAQ](https://docs.angr.io/introductory-errata/faq)?
15 | 
16 |         **Important:** If this bug is a security vulnerability, please submit it privately. See our [security policy](https://github.com/angr/angr/blob/master/SECURITY.md) for more details.
17 | 
18 |         Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. For more real-time help with angr, from us and the community, join our [Slack](https://angr.io/invite/).
19 | 
20 |   - type: textarea
21 |     attributes:
22 |       label: Description
23 |       description: Brief description of the bug, with any relevant log messages.
24 |     validations:
25 |       required: true
26 | 
27 |   - type: textarea
28 |     attributes:
29 |       label: Steps to reproduce the bug
30 |       description: |
31 |         If appropriate, include both a **script to reproduce the bug**, and if possible **attach the binary used**.
32 | 
33 |         **Tip:** You can attach files to the issue by first clicking on the textarea to select it, then dragging & dropping the file onto the textarea.
34 |   - type: textarea
35 |     attributes:
36 |       label: Environment
37 |       description: Many common issues are caused by problems with the local Python environment. Before submitting, double-check that your versions of all modules in the angr suite (angr, cle, pyvex, ...) are up to date and include the output of `python -m angr.misc.bug_report` here.
38 | 
39 |   - type: textarea
40 |     attributes:
41 |       label: Additional context
42 |       description: Any additional context about the problem.
43 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Join our Slack community
4 |     url: https://angr.io/invite/
5 |     about: For questions and help with angr, you are invited to join the angr Slack community
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: Request a feature
 2 | description: Request a new feature for pyvex
 3 | labels: [enhancement,needs-triage]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Thank you for taking the time to submit this feature request!
 9 | 
10 |         Before submitting this feature request, please check the following:
11 |         * Have you checked that you are running the latest versions of angr and its components? angr is rapidly-evolving!
12 |         * Have you checked the [documentation](https://docs.angr.io/) to see if this feature exists already?
13 |         * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aissue+label%3Aenhancement+) to see if this feature has been requested before?
14 | 
15 |         Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. For more real-time help with angr, from us and the community, join our [Slack](https://angr.io/invite/).
16 | 
17 |   - type: textarea
18 |     attributes:
19 |       label: Description
20 |       description: |
21 |         Brief description of the desired feature. If the feature is intended to solve some problem, please clearly describe the problem, including any relevant binaries, etc.
22 | 
23 |         **Tip:** You can attach files to the issue by first clicking on the textarea to select it, then dragging & dropping the file onto the textarea.
24 |     validations:
25 |       required: true
26 | 
27 |   - type: textarea
28 |     attributes:
29 |       label: Alternatives
30 |       description: Possible alternative solutions or features that you have considered.
31 | 
32 |   - type: textarea
33 |     attributes:
34 |       label: Additional context
35 |       description: Any other context or screenshots about the feature request.
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
 1 | name: Ask a question
 2 | description: Ask a question about pyvex
 3 | labels: [question,needs-triage]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         If you have a question about pyvex, that is not a bug report or a feature request, you can ask it here. For more real-time help with pyvex, from us and the community, join our [Slack](https://angr.io/invite/).
 9 | 
10 |         Before submitting this question, please check the following, which may answer your question:
11 |         * Have you checked the [documentation](https://docs.angr.io/)?
12 |         * Have you checked the [FAQ](https://docs.angr.io/introductory-errata/faq)?
13 |         * Have you checked our library of [examples](https://github.com/angr/angr-doc/tree/master/examples)?
14 |         * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aissue+label%3Aquestion) to see if this question has been answered before?
15 |         * Have you checked that you are running the latest versions of angr and its components. angr is rapidly-evolving!
16 | 
17 |         Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best.
18 | 
19 |   - type: textarea
20 |     attributes:
21 |       label: Question
22 |       description:
23 |     validations:
24 |       required: true
25 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   ci:
12 |     uses: angr/ci-settings/.github/workflows/angr-ci.yml@master
13 |   windows:
14 |     uses: ./.github/workflows/windows.yml
15 |   macos:
16 |     uses: ./.github/workflows/macos.yml
17 | 
18 | 


--------------------------------------------------------------------------------
/.github/workflows/cifuzz.yml:
--------------------------------------------------------------------------------
 1 | name: OSS-Fuzz
 2 | 
 3 | on:
 4 |   # push:
 5 |   #   branches:
 6 |   #     - master
 7 |   # pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | permissions: {}
11 | 
12 | jobs:
13 |  Fuzzing:
14 |    runs-on: ubuntu-latest
15 |    permissions:
16 |      security-events: write
17 |    steps:
18 |    - name: Build Fuzzers
19 |      id: build
20 |      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
21 |      with:
22 |        oss-fuzz-project-name: 'pyvex'
23 |        language: python
24 |    - name: Run Fuzzers
25 |      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
26 |      with:
27 |        oss-fuzz-project-name: 'pyvex'
28 |        language: python
29 |        fuzz-seconds: 600
30 |        output-sarif: true
31 |    - name: Upload Crash
32 |      uses: actions/upload-artifact@v3
33 |      if: failure() && steps.build.outcome == 'success'
34 |      with:
35 |        name: artifacts
36 |        path: ./out/artifacts
37 |    - name: Upload Sarif
38 |      if: always() && steps.build.outcome == 'success'
39 |      uses: github/codeql-action/upload-sarif@v2
40 |      with:
41 |       # Path to SARIF file relative to the root of the repository
42 |       sarif_file: cifuzz-sarif/results.sarif
43 |       checkout_path: cifuzz-sarif
44 | 


--------------------------------------------------------------------------------
/.github/workflows/custom.yml:
--------------------------------------------------------------------------------
 1 | name: Custom CI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       image:
 7 |         description: Container image to run with
 8 |         type: string
 9 |         required: true
10 |       nightly:
11 |         description: Run in nightly mode (include slow tests, no dependent projects)
12 |         type: boolean
13 |         required: true
14 |       afl:
15 |         description: Set parameters for AFL
16 |         type: boolean
17 |         required: true
18 | 
19 | 
20 | jobs:
21 |   ci:
22 |     uses: angr/ci-settings/.github/workflows/angr-ci.yml@master
23 |     with:
24 |       container_image: ${{ inputs.image }}
25 |       nightly: ${{ inputs.nightly }}
26 |       afl: ${{ inputs.afl }}
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/.github/workflows/macos.yml:
--------------------------------------------------------------------------------
 1 | name: Test on macOS
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 | 
 7 | jobs:
 8 |   macos:
 9 |     name: Test macOS
10 |     runs-on: macos-15
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |         with:
14 |           path: pyvex
15 |           submodules: true
16 |       - uses: actions/checkout@v3
17 |         with:
18 |           repository: angr/binaries
19 |           path: binaries
20 |       - uses: actions/setup-python@v4
21 |         with:
22 |           python-version: "3.10"
23 |       - run: python -m venv $HOME/venv
24 |         name: Create venv
25 |         shell: bash
26 |       - run: |
27 |           source $HOME/venv/bin/activate
28 |           pip install git+https://github.com/angr/archinfo.git
29 |         name: Install dependencies
30 |       - run: |
31 |           source $HOME/venv/bin/activate
32 |           pip install ./pyvex[testing]
33 |         name: Install
34 |       - run: |
35 |           source $HOME/venv/bin/activate
36 |           pytest -n auto pyvex
37 |         name: Run pytest
38 | 


--------------------------------------------------------------------------------
/.github/workflows/nightly-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Nightly CI
 2 | 
 3 | on:
 4 |   schedule:
 5 |   - cron: "0 0 * * *"
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   ci:
10 |     uses: angr/ci-settings/.github/workflows/angr-ci.yml@master
11 |     with:
12 |       nightly: true
13 |     secrets: inherit
14 | 


--------------------------------------------------------------------------------
/.github/workflows/windows.yml:
--------------------------------------------------------------------------------
 1 | name: Test on Windows
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 | 
 7 | jobs:
 8 |   windows:
 9 |     name: Test Windows
10 |     runs-on: windows-2022
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |         with:
14 |           path: pyvex
15 |           submodules: true
16 |       - uses: actions/checkout@v3
17 |         with:
18 |           repository: angr/binaries
19 |           path: binaries
20 |       - uses: actions/setup-python@v4
21 |         with:
22 |           python-version: "3.10"
23 |       - run: python -m venv $HOME/venv
24 |         name: Create venv
25 |         shell: bash
26 |       - run: |
27 |           call %USERPROFILE%\venv\Scripts\activate
28 |           pip install git+https://github.com/angr/archinfo.git
29 |         name: Install dependencies
30 |         shell: cmd
31 |       - run: |
32 |           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
33 |           call %USERPROFILE%\venv\Scripts\activate
34 |           pip install ./pyvex[testing]
35 |         name: Install
36 |         shell: cmd
37 |       - run: |
38 |           call %USERPROFILE%\venv\Scripts\activate
39 |           pytest -n auto pyvex
40 |         name: Run pytest
41 |         shell: cmd
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build
 2 | dist
 3 | MANIFEST
 4 | pyvex_python
 5 | vex_ffi.py
 6 | libpyvex.so
 7 | *.egg-info
 8 | *.eggs
 9 | *.pyc
10 | *.swp
11 | *.obj
12 | *.lib
13 | *.dll
14 | *.exp
15 | *.o
16 | *.a
17 | *.dylib
18 | pyvex/lib
19 | pyvex/include
20 | vex-master
21 | vex-master.tar.gz
22 | docs/_build
23 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vex"]
2 | 	path = vex
3 | 	url = https://github.com/angr/vex.git
4 | 	branch = master
5 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | 
 3 | #
 4 | # Fail fast
 5 | #
 6 | 
 7 | -   repo: https://github.com/abravalheri/validate-pyproject
 8 |     rev: v0.24.1
 9 |     hooks:
10 |     - id: validate-pyproject
11 |       fail_fast: true
12 | 
13 | -   repo: https://github.com/pre-commit/pre-commit-hooks
14 |     rev: v5.0.0
15 |     hooks:
16 |     # General
17 |     -   id: check-merge-conflict
18 |         fail_fast: true
19 |     -   id: check-case-conflict
20 |         fail_fast: true
21 |     -   id: destroyed-symlinks
22 |         fail_fast: true
23 |     -   id: check-symlinks
24 |         fail_fast: true
25 |     -   id: check-added-large-files
26 |         fail_fast: true
27 |     # Syntax
28 |     -   id: check-toml
29 |         fail_fast: true
30 |     -   id: check-json
31 |         fail_fast: true
32 |     -   id: check-yaml
33 |         fail_fast: true
34 | 
35 | -   repo: https://github.com/pre-commit/pre-commit-hooks
36 |     rev: v5.0.0
37 |     hooks:
38 |     -   id: check-ast
39 |         fail_fast: true
40 | 
41 | #
42 | # Modifiers
43 | #
44 | 
45 | -   repo: https://github.com/pre-commit/pre-commit-hooks
46 |     rev: v5.0.0
47 |     hooks:
48 |     -   id: mixed-line-ending
49 |     -   id: trailing-whitespace
50 | 
51 | -   repo: https://github.com/dannysepler/rm_unneeded_f_str
52 |     rev: v0.2.0
53 |     hooks:
54 |     -   id: rm-unneeded-f-str
55 | 
56 | -   repo: https://github.com/asottile/pyupgrade
57 |     rev: v3.20.0
58 |     hooks:
59 |     -   id: pyupgrade
60 |         args: [--py310-plus]
61 | 
62 | -   repo: https://github.com/astral-sh/ruff-pre-commit
63 |     rev: v0.11.13
64 |     hooks:
65 |     - id: ruff
66 |       args: [--fix, --exit-non-zero-on-fix]
67 | 
68 | # Last modifier: Coding Standard
69 | -   repo: https://github.com/psf/black
70 |     rev: 25.1.0
71 |     hooks:
72 |     -   id: black
73 | 
74 | #
75 | # Static Checks
76 | #
77 | 
78 | -   repo: https://github.com/pre-commit/pygrep-hooks
79 |     rev: v1.10.0
80 |     hooks:
81 |     # Python
82 |     -   id: python-use-type-annotations
83 |     -   id: python-no-log-warn
84 |     # Documentation
85 |     -   id: rst-backticks
86 |     -   id: rst-directive-colons
87 |     -   id: rst-inline-touching-normal
88 | 
89 | -   repo: https://github.com/pre-commit/pre-commit-hooks
90 |     rev: v5.0.0
91 |     hooks:
92 |     -   id: debug-statements
93 |     -   id: check-builtin-literals
94 |     -   id: check-docstring-first
95 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | sphinx:
 7 |   configuration: docs/conf.py
 8 | 
 9 | submodules:
10 |   include: all
11 | 
12 | build:
13 |   os: ubuntu-22.04
14 |   tools:
15 |     python: "3.10"
16 |   jobs:
17 |     pre_install:
18 |       - pip install -U pip
19 |       - pip install git+https://github.com/angr/archinfo.git
20 | 
21 | python:
22 |   install:
23 |     - method: pip
24 |       path: .
25 |       extra_requirements:
26 |         - docs
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, The Regents of the University of California
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 
25 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | include make_ffi.py
4 | recursive-include pyvex_c *.c *.h *.def Makefile Makefile-msvc LICENSE
5 | recursive-include vex *
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PyVEX
  2 | [![Latest Release](https://img.shields.io/pypi/v/pyvex.svg)](https://pypi.python.org/pypi/pyvex/)
  3 | [![Python Version](https://img.shields.io/pypi/pyversions/pyvex)](https://pypi.python.org/pypi/pyvex/)
  4 | [![PyPI Statistics](https://img.shields.io/pypi/dm/pyvex.svg)](https://pypistats.org/packages/pyvex)
  5 | [![License](https://img.shields.io/github/license/angr/pyvex.svg)](https://github.com/angr/pyvex/blob/master/LICENSE)
  6 | 
  7 | PyVEX is Python bindings for the VEX IR.
  8 | 
  9 | ## Project Links
 10 | Project repository: https://github.com/angr/pyvex
 11 | 
 12 | Documentation: https://api.angr.io/projects/pyvex/en/latest/
 13 | 
 14 | ## Installing PyVEX
 15 | 
 16 | PyVEX can be pip-installed:
 17 | 
 18 | ```bash
 19 | pip install pyvex
 20 | ```
 21 | 
 22 | ## Using PyVEX
 23 | 
 24 | ```python
 25 | import pyvex
 26 | import archinfo
 27 | 
 28 | # translate an AMD64 basic block (of nops) at 0x400400 into VEX
 29 | irsb = pyvex.lift(b"\x90\x90\x90\x90\x90", 0x400400, archinfo.ArchAMD64())
 30 | 
 31 | # pretty-print the basic block
 32 | irsb.pp()
 33 | 
 34 | # this is the IR Expression of the jump target of the unconditional exit at the end of the basic block
 35 | print(irsb.next)
 36 | 
 37 | # this is the type of the unconditional exit (i.e., a call, ret, syscall, etc)
 38 | print(irsb.jumpkind)
 39 | 
 40 | # you can also pretty-print it
 41 | irsb.next.pp()
 42 | 
 43 | # iterate through each statement and print all the statements
 44 | for stmt in irsb.statements:
 45 |     stmt.pp()
 46 | 
 47 | # pretty-print the IR expression representing the data, and the *type* of that IR expression written by every store statement
 48 | import pyvex
 49 | for stmt in irsb.statements:
 50 |     if isinstance(stmt, pyvex.IRStmt.Store):
 51 |         print("Data:", end="")
 52 |         stmt.data.pp()
 53 |         print("")
 54 | 
 55 |         print("Type:", end="")
 56 |         print(stmt.data.result_type)
 57 |         print("")
 58 | 
 59 | # pretty-print the condition and jump target of every conditional exit from the basic block
 60 | for stmt in irsb.statements:
 61 |     if isinstance(stmt, pyvex.IRStmt.Exit):
 62 |         print("Condition:", end="")
 63 |         stmt.guard.pp()
 64 |         print("")
 65 | 
 66 |         print("Target:", end="")
 67 |         stmt.dst.pp()
 68 |         print("")
 69 | 
 70 | # these are the types of every temp in the IRSB
 71 | print(irsb.tyenv.types)
 72 | 
 73 | # here is one way to get the type of temp 0
 74 | print(irsb.tyenv.types[0])
 75 | ```
 76 | 
 77 | Keep in mind that this is a *syntactic* respresentation of a basic block. That is, it'll tell you what the block means, but you don't have any context to say, for example, what *actual* data is written by a store instruction.
 78 | 
 79 | ## VEX Intermediate Representation
 80 | 
 81 | To deal with widely diverse architectures, it is useful to carry out analyses on an intermediate representation.
 82 | An IR abstracts away several architecture differences when dealing with different architectures, allowing a single analysis to be run on all of them:
 83 | 
 84 | - **Register names.** The quantity and names of registers differ between architectures, but modern CPU designs hold to a common theme: each CPU contains several general purpose registers, a register to hold the stack pointer, a set of registers to store condition flags, and so forth. The IR provides a consistent, abstracted interface to registers on different platforms. Specifically, VEX models the registers as a separate memory space, with integer offsets (i.e., AMD64's `rax` is stored starting at address 16 in this memory space).
 85 | - **Memory access.** Different architectures access memory in different ways. For example, ARM can access memory in both little-endian and big-endian modes. The IR must abstracts away these differences.
 86 | - **Memory segmentation.** Some architectures, such as x86, support memory segmentation through the use of special segment registers. The IR understands such memory access mechanisms.
 87 | - **Instruction side-effects.** Most instructions have side-effects. For example, most operations in Thumb mode on ARM update the condition flags, and stack push/pop instructions update the stack pointer. Tracking these side-effects in an *ad hoc* manner in the analysis would be crazy, so the IR makes these effects explicit.
 88 | 
 89 | There are lots of choices for an IR. We use VEX, since the uplifting of binary code into VEX is quite well supported.
 90 | VEX is an architecture-agnostic, side-effects-free representation of a number of target machine languages.
 91 | It abstracts machine code into a representation designed to make program analysis easier.
 92 | This representation has five main classes of objects:
 93 | 
 94 | - **Expressions.** IR Expressions represent a calculated or constant value. This includes memory loads, register reads, and results of arithmetic operations.
 95 | - **Operations.** IR Operations describe a *modification* of IR Expressions. This includes integer arithmetic, floating-point arithmetic, bit operations, and so forth. An IR Operation applied to IR Expressions yields an IR Expression as a result.
 96 | - **Temporary variables.** VEX uses temporary variables as internal registers: IR Expressions are stored in temporary variables between use. The content of a temporary variable can be retrieved using an IR Expression. These temporaries are numbered, starting at `t0`. These temporaries are strongly typed (i.e., "64-bit integer" or "32-bit float").
 97 | - **Statements.** IR Statements model changes in the state of the target machine, such as the effect of memory stores and register writes. IR Statements use IR Expressions for values they may need. For example, a memory store *IR Statement* uses an *IR Expression* for the target address of the write, and another *IR Expression* for the content.
 98 | - **Blocks.** An IR Block is a collection of IR Statements, representing an extended basic block (termed "IR Super Block" or "IRSB") in the target architecture. A block can have several exits. For conditional exits from the middle of a basic block, a special *Exit* IR Statement is used. An IR Expression is used to represent the target of the unconditional exit at the end of the block.
 99 | 
100 | VEX IR is actually quite well documented in the `libvex_ir.h` file (https://github.com/angr/vex/blob/dev/pub/libvex_ir.h) in the VEX repository. For the lazy, we'll detail some parts of VEX that you'll likely interact with fairly frequently. To begin with, here are some IR Expressions:
101 | 
102 | | IR Expression | Evaluated Value | VEX Output Example |
103 | | ------------- | --------------- | ------- |
104 | | Constant | A constant value. | 0x4:I32 |
105 | | Read Temp | The value stored in a VEX temporary variable. | RdTmp(t10) |
106 | | Get Register | The value stored in a register. | GET:I32(16) |
107 | | Load Memory | The value stored at a memory address, with the address specified by another IR Expression. | LDle:I32 / LDbe:I64 |
108 | | Operation | A result of a specified IR Operation, applied to specified IR Expression arguments. | Add32 |
109 | | If-Then-Else | If a given IR Expression evaluates to 0, return one IR Expression. Otherwise, return another. | ITE |
110 | | Helper Function | VEX uses C helper functions for certain operations, such as computing the conditional flags registers of certain architectures. These functions return IR Expressions. | function\_name() |
111 | 
112 | These expressions are then, in turn, used in IR Statements. Here are some common ones:
113 | 
114 | | IR Statement | Meaning | VEX Output Example |
115 | | ------------ | ------- | ------------------ |
116 | Write Temp | Set a VEX temporary variable to the value of the given IR Expression. | WrTmp(t1) = (IR Expression) |
117 | Put Register | Update a register with the value of the given IR Expression. | PUT(16) = (IR Expression) |
118 | Store Memory | Update a location in memory, given as an IR Expression, with a value, also given as an IR Expression. | STle(0x1000) = (IR Expression) |
119 | Exit | A conditional exit from a basic block, with the jump target specified by an IR Expression. The condition is specified by an IR Expression. | if (condition) goto (Boring) 0x4000A00:I32 |
120 | 
121 | An example of an IR translation, on ARM, is produced below. In the example, the subtraction operation is translated into a single IR block comprising 5 IR Statements, each of which contains at least one IR Expression (although, in real life, an IR block would typically consist of more than one instruction). Register names are translated into numerical indices given to the *GET* Expression and *PUT* Statement.
122 | The astute reader will observe that the actual subtraction is modeled by the first 4 IR Statements of the block, and the incrementing of the program counter to point to the next instruction (which, in this case, is located at `0x59FC8`) is modeled by the last statement.
123 | 
124 | The following ARM instruction:
125 | 
126 |     subs R2, R2, #8
127 | 
128 | Becomes this VEX IR:
129 | 
130 |     t0 = GET:I32(16)
131 |     t1 = 0x8:I32
132 |     t3 = Sub32(t0,t1)
133 |     PUT(16) = t3
134 |     PUT(68) = 0x59FC8:I32
135 | 
136 | Cool stuff!
137 | 
138 | ## Citing PyVEX
139 | 
140 | If you use PyVEX in an academic work, please cite the paper for which it was developed:
141 | 
142 | ```bibtex
143 | @article{shoshitaishvili2015firmalice,
144 |   title={Firmalice - Automatic Detection of Authentication Bypass Vulnerabilities in Binary Firmware},
145 |   author={Shoshitaishvili, Yan and Wang, Ruoyu and Hauser, Christophe and Kruegel, Christopher and Vigna, Giovanni},
146 |   booktitle={NDSS},
147 |   year={2015}
148 | }
149 | ```
150 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | :mod:`pyvex` --- Binary Translator
 2 | ==================================
 3 | 
 4 | .. automodule:: pyvex
 5 | .. automodule:: pyvex.native
 6 | 
 7 | 
 8 | Translation Interface
 9 | ---------------------
10 | 
11 | .. automodule:: pyvex.block
12 | 
13 | 
14 | IR Components
15 | -------------
16 | 
17 | .. automodule:: pyvex.stmt
18 | .. automodule:: pyvex.expr
19 | .. automodule:: pyvex.const
20 | .. automodule:: pyvex.enums
21 | 
22 | Lifting System
23 | --------------
24 | 
25 | .. automodule:: pyvex.data_ref
26 | .. automodule:: pyvex.lifting
27 | .. automodule:: pyvex.lifting.lift_function
28 | .. automodule:: pyvex.lifting.libvex
29 | .. automodule:: pyvex.lifting.lifter
30 | .. automodule:: pyvex.lifting.post_processor
31 | .. automodule:: pyvex.lifting.util.irsb_postprocess
32 | .. automodule:: pyvex.lifting.util
33 | .. automodule:: pyvex.lifting.util.syntax_wrapper
34 | .. automodule:: pyvex.lifting.util.vex_helper
35 | .. automodule:: pyvex.lifting.util.lifter_helper
36 | .. automodule:: pyvex.lifting.util.instr_helper
37 | 
38 | Builtin IR Processors
39 | ---------------------
40 | 
41 | .. automodule:: pyvex.lifting.zerodivision
42 | 
43 | Errors
44 | ------
45 | 
46 | .. automodule:: pyvex.errors
47 | 
48 | Utilities
49 | ---------
50 | 
51 | .. automodule:: pyvex.utils
52 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | import datetime
 7 | 
 8 | # -- Project information -----------------------------------------------------
 9 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
10 | 
11 | project = "pyvex"
12 | project_copyright = f"{datetime.datetime.now().year}, The angr Project contributors"
13 | author = "The angr Project"
14 | 
15 | # -- General configuration ---------------------------------------------------
16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
17 | 
18 | extensions = [
19 |     "sphinx.ext.autodoc",
20 |     "sphinx.ext.autosummary",
21 |     "sphinx.ext.coverage",
22 |     "sphinx.ext.napoleon",
23 |     "sphinx.ext.todo",
24 |     "sphinx.ext.viewcode",
25 |     "sphinx_autodoc_typehints",
26 |     "myst_parser",
27 | ]
28 | 
29 | templates_path = ["_templates"]
30 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
31 | 
32 | # -- Options for autodoc -----------------------------------------------------
33 | # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration
34 | autoclass_content = "class"
35 | autodoc_default_options = {
36 |     "members": True,
37 |     "member-order": "bysource",
38 |     "show-inheritance": True,
39 |     "special-members": "__init__",
40 |     "undoc-members": True,
41 | }
42 | autodoc_inherit_docstrings = True
43 | autodoc_typehints = "both"
44 | 
45 | # -- Options for coverage ----------------------------------------------------
46 | # https://www.sphinx-doc.org/en/master/usage/extensions/coverage.html
47 | coverage_write_headline = False
48 | 
49 | 
50 | # -- Options for HTML output -------------------------------------------------
51 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
52 | 
53 | html_theme = "furo"
54 | html_static_path = ["_static"]
55 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to pyVEX's documentation!
 2 | =================================
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 |    :caption: Contents:
 8 | 
 9 |    Quickstart <quickstart>
10 |    API <api>
11 | 
12 | 
13 | 
14 | Indices and tables
15 | ==================
16 | 
17 | * :ref:`genindex`
18 | * :ref:`modindex`
19 | * :ref:`search`
20 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.md
2 |    :parser: myst_parser.sphinx_
3 | 


--------------------------------------------------------------------------------
/fuzzing/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -eu
 2 | # Copyright 2023 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | ################################################################################
17 | 
18 | # Since pyvex requires a specific developer build of archinfo, install it from source
19 | cd "$SRC"/archinfo
20 | python3 -m pip install .
21 | 
22 | cd "$SRC"/pyvex
23 | python3 -m pip install .[testing]
24 | 
25 | # Generate a simple binary for the corpus
26 | echo -ne "start:\n\txor %edi, %edi\nmov \$60, %eax\nsyscall" > /tmp/corpus.s
27 | clang -Os -s /tmp/corpus.s -nostdlib -nostartfiles -m32 -o corpus
28 | zip -r "$OUT"/irsb_fuzzer_seed_corpus.zip corpus
29 | 
30 | # Build fuzzers in $OUT
31 | for fuzzer in $(find $SRC -name '*_fuzzer.py'); do
32 |   compile_python_fuzzer "$fuzzer" --add-binary="pyvex/lib/libpyvex.so:pyvex/lib"
33 | done
34 | 


--------------------------------------------------------------------------------
/fuzzing/enhanced_fdp.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | ################################################################################
16 | """
17 | Defines the EnhancedFuzzedDataProvider
18 | """
19 | from atheris import FuzzedDataProvider
20 | 
21 | 
22 | class EnhancedFuzzedDataProvider(FuzzedDataProvider):
23 |     """
24 |     Extends the functionality of FuzzedDataProvider
25 |     """
26 | 
27 |     def _consume_random_count(self) -> int:
28 |         """
29 |         :return: A count of bytes that is strictly in range 0<=n<=remaining_bytes
30 |         """
31 |         return self.ConsumeIntInRange(0, self.remaining_bytes())
32 | 
33 |     def ConsumeRandomBytes(self) -> bytes:
34 |         """
35 |         Consume a 'random' count of the remaining bytes
36 |         :return: 0<=n<=remaining_bytes bytes
37 |         """
38 |         return self.ConsumeBytes(self._consume_random_count())
39 | 
40 |     def ConsumeRemainingBytes(self) -> bytes:
41 |         """
42 |         :return: The remaining buffer
43 |         """
44 |         return self.ConsumeBytes(self.remaining_bytes())
45 | 
46 |     def ConsumeRandomString(self) -> str:
47 |         """
48 |         Consume a 'random' length string, excluding surrogates
49 |         :return: The string
50 |         """
51 |         return self.ConsumeUnicodeNoSurrogates(self._consume_random_count())
52 | 
53 |     def ConsumeRemainingString(self) -> str:
54 |         """
55 |         :return: The remaining buffer, as a string without surrogates
56 |         """
57 |         return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes())
58 | 
59 |     def PickValueInEnum(self, enum):
60 |         return self.PickValueInList([e.value for e in enum])
61 | 


--------------------------------------------------------------------------------
/fuzzing/irsb_fuzzer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # Copyright 2023 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | ################################################################################
 17 | import re
 18 | import sys
 19 | from contextlib import contextmanager
 20 | from enum import IntEnum
 21 | from io import StringIO
 22 | 
 23 | import atheris
 24 | 
 25 | with atheris.instrument_imports(include=["pyvex"]):
 26 |     import pyvex
 27 | 
 28 | # Additional imports
 29 | from enhanced_fdp import EnhancedFuzzedDataProvider
 30 | 
 31 | register_error_msg = re.compile("Register .*? does not exist!")
 32 | 
 33 | 
 34 | @contextmanager
 35 | def nostdout():
 36 |     saved_stdout = sys.stdout
 37 |     saved_stderr = sys.stderr
 38 |     sys.stdout = StringIO()
 39 |     sys.stderr = StringIO()
 40 |     yield
 41 |     sys.stdout = saved_stdout
 42 |     sys.stderr = saved_stderr
 43 | 
 44 | 
 45 | # Save all available architectures off
 46 | available_archs = [
 47 |     pyvex.ARCH_X86,
 48 |     pyvex.ARCH_AMD64,
 49 |     pyvex.ARCH_ARM_LE,
 50 |     pyvex.ARCH_ARM_BE,
 51 |     pyvex.ARCH_ARM64_LE,
 52 |     pyvex.ARCH_ARM64_BE,
 53 |     pyvex.ARCH_PPC32,
 54 |     pyvex.ARCH_PPC64_BE,
 55 |     pyvex.ARCH_PPC64_LE,
 56 |     pyvex.ARCH_S390X,
 57 |     pyvex.ARCH_MIPS32_BE,
 58 |     pyvex.ARCH_MIPS32_LE,
 59 |     pyvex.ARCH_MIPS64_BE,
 60 |     pyvex.ARCH_MIPS64_LE,
 61 | ]
 62 | 
 63 | 
 64 | class SupportedOptLevels(IntEnum):
 65 |     """
 66 |     Enumerates the supported optimization levels within pyvex, as derived from the documentation
 67 |     """
 68 | 
 69 |     StrictUnopt = -1
 70 |     Unopt = 0
 71 |     Opt = 1
 72 |     StrictOpt = 2
 73 | 
 74 | 
 75 | def consume_random_arch(fdp: atheris.FuzzedDataProvider) -> pyvex.arches.PyvexArch:
 76 |     return fdp.PickValueInList(available_archs)
 77 | 
 78 | 
 79 | def TestOneInput(data: bytes):
 80 |     fdp = EnhancedFuzzedDataProvider(data)
 81 | 
 82 |     arch = consume_random_arch(fdp)
 83 | 
 84 |     try:
 85 |         with nostdout():
 86 |             data = fdp.ConsumeRandomBytes()
 87 |             max_bytes = fdp.ConsumeIntInRange(0, len(data))
 88 |             irsb = pyvex.lift(
 89 |                 data,
 90 |                 fdp.ConsumeInt(arch.bits),
 91 |                 arch,
 92 |                 max_bytes=fdp.ConsumeIntInRange(0, len(data)),
 93 |                 max_inst=fdp.ConsumeInt(16),
 94 |                 bytes_offset=fdp.ConsumeIntInRange(0, max_bytes),
 95 |                 opt_level=fdp.PickValueInEnum(SupportedOptLevels),
 96 |             )
 97 |             irsb.pp()
 98 |         return 0
 99 |     except pyvex.PyVEXError:
100 |         return -1
101 |     except ValueError as e:
102 |         if re.match(register_error_msg, str(e)):
103 |             return -1
104 |         raise e
105 |     except OverflowError:
106 |         return -1
107 | 
108 | 
109 | def main():
110 |     atheris.Setup(sys.argv, TestOneInput)
111 |     atheris.Fuzz()
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     main()
116 | 


--------------------------------------------------------------------------------
/make_ffi.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import platform
  4 | import re
  5 | import subprocess
  6 | import sys
  7 | 
  8 | import cffi
  9 | 
 10 | log = logging.getLogger("cffier")
 11 | log.setLevel(logging.DEBUG)
 12 | 
 13 | 
 14 | def find_good_scan(questionable):
 15 |     known_good = []
 16 | 
 17 |     end_line = len(questionable)
 18 | 
 19 |     while len(questionable):
 20 |         ffi = cffi.FFI()
 21 |         log.debug("scan - trying %d good and %d questionable", len(known_good), len(questionable))
 22 | 
 23 |         candidate = known_good + questionable[:end_line]
 24 |         failed_line = -1
 25 | 
 26 |         try:
 27 |             ffi.cdef("\n".join(candidate))
 28 | 
 29 |             known_good = candidate
 30 |             questionable = questionable[end_line:]
 31 |             end_line = len(questionable)
 32 |         except AssertionError:
 33 |             questionable = questionable[1:]
 34 |             end_line = len(questionable)
 35 |         except cffi.CDefError as e:
 36 |             if "<cdef source string>" in str(e):
 37 |                 failed_line = int(str(e).split("\n")[-1].split(":")[1]) - 1
 38 |             elif str(e).count(":") >= 2:
 39 |                 failed_line = int(str(e).split("\n")[1].split(":")[1])
 40 |                 failed_line_description = str(e).split("\n")[0]
 41 |                 idx1 = failed_line_description.index('"')
 42 |                 idx2 = failed_line_description.rindex('"')
 43 |                 failed_reason = failed_line_description[idx1 + 1 : idx2]
 44 | 
 45 |                 for i in range(failed_line, -1, -1):
 46 |                     if failed_reason in candidate[i]:
 47 |                         failed_line = i
 48 |             elif "unrecognized construct" in str(e):
 49 |                 failed_line = int(str(e).split()[1][:-1]) - 1
 50 |             elif "end of input" in str(e):
 51 |                 end_line -= 1
 52 |             else:
 53 |                 raise Exception("Unknown error")
 54 |         except cffi.FFIError as e:
 55 |             if str(e).count(":") >= 2:
 56 |                 failed_line = int(str(e).split("\n")[0].split(":")[1]) - 1
 57 |             else:
 58 |                 raise Exception("Unknown error")
 59 | 
 60 |         if failed_line != -1:
 61 |             end_line = failed_line - len(known_good)
 62 | 
 63 |         if end_line == 0:
 64 |             questionable = questionable[1:]
 65 |             end_line = len(questionable)
 66 |     return known_good
 67 | 
 68 | 
 69 | def doit(vex_path):
 70 |     cpplist = ["cl", "cpp"]
 71 |     cpp = os.getenv("CPP")
 72 |     if cpp:
 73 |         cpplist.insert(0, cpp)
 74 |     if platform.system() == "Darwin":
 75 |         cpplist.insert(0, "clang")
 76 | 
 77 |     errs = []
 78 |     for cpp in cpplist:
 79 |         cmd = [cpp, "-I" + vex_path, os.path.join("pyvex_c", "pyvex.h")]
 80 |         if cpp in ("cl", "clang", "gcc", "cc", "clang++", "g++"):
 81 |             cmd.append("-E")
 82 |         try:
 83 |             p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 84 |             header, stderr = p.communicate()
 85 |             try:
 86 |                 header = header.decode("utf-8")
 87 |                 stderr = stderr.decode("utf-8")
 88 |             except UnicodeDecodeError:
 89 |                 # They don't have to be unicode on Windows
 90 |                 pass
 91 | 
 92 |             if not header.strip() or p.returncode != 0:
 93 |                 errs.append((" ".join(cmd), p.returncode, stderr))
 94 |                 continue
 95 |             else:
 96 |                 break
 97 |         except OSError:
 98 |             errs.append((" ".join(cmd), -1, "does not exist"))
 99 |             continue
100 |     else:
101 |         log.warning("failed commands:\n" + "\n".join("{} ({}) -- {}".format(*e) for e in errs))
102 |         raise Exception(
103 |             "Couldn't process pyvex headers."
104 |             + 'Please set CPP environmental variable to local path of "cpp".'
105 |             + 'Note that "cpp" and "g++" are different.'
106 |         )
107 |     # header = vex_pp + pyvex_pp
108 | 
109 |     linesep = "\r\n" if "\r\n" in header else "\n"
110 |     ffi_text = linesep.join(
111 |         line
112 |         for line in header.split(linesep)
113 |         if "#" not in line and line.strip() != "" and "jmp_buf" not in line and not ("=" in line and ";" in line)
114 |     )
115 |     ffi_text = re.sub(r"\{\s*\} NoOp;", "{ int DONOTUSE; } NoOp;", ffi_text)
116 |     ffi_text = re.sub(r"__attribute__\s*\(.*\)", "", ffi_text)
117 |     ffi_text = re.sub(r"__declspec\s*\([^\)]*\)", "", ffi_text)
118 |     ffi_text = ffi_text.replace("__const", "const")
119 |     ffi_text = ffi_text.replace("__inline", "")
120 |     ffi_text = ffi_text.replace("__w64", "")
121 |     ffi_text = ffi_text.replace("__cdecl", "")
122 |     ffi_text = ffi_text.replace("__int64", "long")
123 |     ffi_lines = ffi_text.split(linesep)
124 | 
125 |     good = find_good_scan(ffi_lines)
126 |     good += ["extern VexControl vex_control;"]
127 | 
128 |     with open("pyvex/vex_ffi.py", "w") as fp:
129 |         fp.write('ffi_str = """' + "\n".join(good) + '"""\n')
130 |         fp.write("guest_offsets = " + repr(get_guest_offsets(vex_path)) + "\n")
131 | 
132 | 
133 | def get_guest_offsets(vex_path):
134 |     fname = os.path.join(vex_path, "libvex_guest_offsets.h")
135 |     out = {}
136 |     with open(fname) as fp:
137 |         for line in fp:
138 |             if line.startswith("#define"):
139 |                 _, names, val = line.split()
140 |                 val = int(val, 0)
141 |                 assert names.startswith("OFFSET_")
142 |                 _, arch, reg = names.split("_", 2)
143 |                 out[(arch, reg.lower())] = val
144 |     return out
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     logging.basicConfig(level=logging.DEBUG)
149 |     doit(sys.argv[1])
150 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=59", "wheel", "cffi >= 1.0.3"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "pyvex"
 7 | description = "A Python interface to libVEX and VEX IR"
 8 | license = { text = "BSD-2-Clause" }
 9 | classifiers = [
10 |     "Programming Language :: Python :: 3",
11 |     "Programming Language :: Python :: 3 :: Only",
12 |     "Programming Language :: Python :: 3.10",
13 |     "Programming Language :: Python :: 3.11",
14 |     "Programming Language :: Python :: 3.12",
15 |     "Programming Language :: Python :: 3.13",
16 | ]
17 | requires-python = ">=3.10"
18 | dependencies = [
19 |     "bitstring",
20 |     "cffi>=1.0.3;implementation_name == 'cpython'",
21 | ]
22 | dynamic = ["version"]
23 | 
24 | [project.readme]
25 | file = "README.md"
26 | content-type = "text/markdown"
27 | 
28 | [project.urls]
29 | Homepage = "https://api.angr.io/projects/pyvex/en/latest/"
30 | Repository = "https://github.com/angr/pyvex"
31 | 
32 | [project.optional-dependencies]
33 | docs = [
34 |     "furo",
35 |     "myst-parser",
36 |     "sphinx",
37 |     "sphinx-autodoc-typehints",
38 | ]
39 | fuzzing = [
40 |     "atheris>=2.3.0",
41 | ]
42 | testing = [
43 |     "pytest",
44 |     "pytest-xdist",
45 | ]
46 | 
47 | [tool.setuptools]
48 | include-package-data = true
49 | license-files = ["LICENSE", "pyvex_c/LICENSE"]
50 | 
51 | [tool.setuptools.packages.find]
52 | exclude = ["tests*"]
53 | namespaces = false
54 | 
55 | [tool.setuptools.dynamic]
56 | version = { attr = "pyvex.__version__" }
57 | 
58 | [tool.setuptools.package-data]
59 | pyvex = ["py.typed", "lib/*", "include/*"]
60 | 
61 | [tool.black]
62 | line-length = 120
63 | target-version = ['py310']
64 | force-exclude = '''
65 | /(
66 |   vex
67 | )/
68 | '''
69 | 
70 | [tool.ruff]
71 | line-length = 120
72 | 
73 | [tool.ruff.lint]
74 | select = [
75 |   "E",
76 |   "F",
77 |   "I",
78 |   "TID",
79 | ]
80 | 


--------------------------------------------------------------------------------
/pyvex/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyVEX provides an interface that translates binary code into the VEX intermediate representation (IR).
 3 | For an introduction to VEX, take a look here: https://docs.angr.io/advanced-topics/ir
 4 | """
 5 | 
 6 | __version__ = "9.2.160.dev0"
 7 | 
 8 | from . import const, expr, stmt
 9 | from .arches import (
10 |     ARCH_AMD64,
11 |     ARCH_ARM64_BE,
12 |     ARCH_ARM64_LE,
13 |     ARCH_ARM_BE,
14 |     ARCH_ARM_BE_LE,
15 |     ARCH_ARM_LE,
16 |     ARCH_MIPS32_BE,
17 |     ARCH_MIPS32_LE,
18 |     ARCH_MIPS64_BE,
19 |     ARCH_MIPS64_LE,
20 |     ARCH_PPC32,
21 |     ARCH_PPC64_BE,
22 |     ARCH_PPC64_LE,
23 |     ARCH_RISCV64_LE,
24 |     ARCH_S390X,
25 |     ARCH_X86,
26 | )
27 | from .block import IRSB, IRTypeEnv
28 | from .const import get_type_size, get_type_spec_size, tag_to_const_class
29 | from .enums import (
30 |     IRCallee,
31 |     IRRegArray,
32 |     VEXObject,
33 |     default_vex_archinfo,
34 |     get_enum_from_int,
35 |     get_int_from_enum,
36 |     irop_enums_to_ints,
37 |     vex_endness_from_string,
38 | )
39 | from .errors import PyVEXError
40 | from .expr import get_op_retty
41 | from .lifting import lift, lifters
42 | from .native import ffi, pvc
43 | 
44 | # aliases....
45 | IRStmt = stmt
46 | IRExpr = expr
47 | IRConst = const
48 | 
49 | 
50 | __all__ = [
51 |     "const",
52 |     "expr",
53 |     "stmt",
54 |     "IRSB",
55 |     "IRTypeEnv",
56 |     "get_type_size",
57 |     "get_type_spec_size",
58 |     "irop_enums_to_ints",
59 |     "tag_to_const_class",
60 |     "IRCallee",
61 |     "IRRegArray",
62 |     "VEXObject",
63 |     "default_vex_archinfo",
64 |     "get_enum_from_int",
65 |     "get_int_from_enum",
66 |     "vex_endness_from_string",
67 |     "PyVEXError",
68 |     "get_op_retty",
69 |     "lift",
70 |     "lifters",
71 |     "ffi",
72 |     "pvc",
73 |     "IRStmt",
74 |     "IRExpr",
75 |     "IRConst",
76 |     "ARCH_X86",
77 |     "ARCH_AMD64",
78 |     "ARCH_ARM_BE",
79 |     "ARCH_ARM_BE_LE",
80 |     "ARCH_ARM_LE",
81 |     "ARCH_ARM64_LE",
82 |     "ARCH_ARM64_BE",
83 |     "ARCH_PPC32",
84 |     "ARCH_PPC64_BE",
85 |     "ARCH_PPC64_LE",
86 |     "ARCH_S390X",
87 |     "ARCH_MIPS32_BE",
88 |     "ARCH_MIPS32_LE",
89 |     "ARCH_MIPS64_BE",
90 |     "ARCH_MIPS64_LE",
91 |     "ARCH_RISCV64_LE",
92 | ]
93 | 


--------------------------------------------------------------------------------
/pyvex/arches.py:
--------------------------------------------------------------------------------
 1 | from ._register_info import REGISTER_OFFSETS
 2 | from .enums import default_vex_archinfo, vex_endness_from_string
 3 | from .types import Register
 4 | from .vex_ffi import guest_offsets
 5 | 
 6 | 
 7 | class PyvexArch:
 8 |     """
 9 |     An architecture definition for use with pyvex - usable version.
10 |     """
11 | 
12 |     def __init__(self, name: str, bits: int, memory_endness: str, instruction_endness: str = "Iend_BE"):
13 |         self.name = name
14 |         self.bits = bits
15 |         self.memory_endness = memory_endness
16 |         self.instruction_endness = instruction_endness
17 |         self.byte_width = 8
18 |         self.register_list: list[Register] = []
19 |         self.registers: dict[str, tuple[int, int]] = {}
20 |         self.vex_arch = {
21 |             "X86": "VexArchX86",
22 |             "AMD64": "VexArchAMD64",
23 |             "ARM": "VexArchARM",
24 |             "ARM64": "VexArchARM64",
25 |             "PPC32": "VexArchPPC32",
26 |             "PPC64": "VexArchPPC64",
27 |             "S390X": "VexArchS390X",
28 |             "MIPS32": "VexArchMIPS32",
29 |             "MIPS64": "VexArchMIPS64",
30 |             "RISCV64": "VexArchRISCV64",
31 |         }[name]
32 |         self.ip_offset = guest_offsets[
33 |             (
34 |                 self.vex_name_small,
35 |                 {
36 |                     "X86": "eip",
37 |                     "AMD64": "rip",
38 |                     "ARM": "r15t",
39 |                     "ARM64": "pc",
40 |                     "PPC32": "cia",
41 |                     "PPC64": "cia",
42 |                     "S390X": "ia",
43 |                     "MIPS32": "pc",
44 |                     "MIPS64": "pc",
45 |                     "RISCV64": "pc",
46 |                 }[name],
47 |             )
48 |         ]
49 |         self.vex_archinfo = default_vex_archinfo()
50 |         if memory_endness == "Iend_BE":
51 |             self.vex_archinfo["endness"] = vex_endness_from_string("VexEndnessBE")
52 | 
53 |     def __repr__(self):
54 |         return f"<PyvexArch {self.name}>"
55 | 
56 |     @property
57 |     def vex_name_small(self):
58 |         return self.vex_arch[7:].lower()
59 | 
60 |     def translate_register_name(self, offset, size=None):  # pylint: disable=unused-argument
61 |         for (arch, reg), offset2 in guest_offsets.items():
62 |             if arch == self.vex_name_small and offset2 == offset:
63 |                 return reg
64 |         for (arch, reg), offset2 in REGISTER_OFFSETS.items():
65 |             if arch == self.vex_name_small and offset2 == offset:
66 |                 return reg
67 |         return str(offset)
68 | 
69 |     def get_register_offset(self, name: str) -> int:
70 |         arch_reg_tuple = (self.vex_name_small, name)
71 |         if arch_reg_tuple in guest_offsets:
72 |             return guest_offsets[arch_reg_tuple]
73 |         elif arch_reg_tuple in REGISTER_OFFSETS:
74 |             return REGISTER_OFFSETS[arch_reg_tuple]
75 |         else:
76 |             raise KeyError(f"Unknown register {name} for architecture {self.name}")
77 | 
78 | 
79 | ARCH_X86 = PyvexArch("X86", 32, "Iend_LE")
80 | ARCH_AMD64 = PyvexArch("AMD64", 64, "Iend_LE")
81 | ARCH_ARM_LE = PyvexArch("ARM", 32, "Iend_LE", instruction_endness="Iend_LE")
82 | ARCH_ARM_BE_LE = PyvexArch("ARM", 32, "Iend_BE", instruction_endness="Iend_LE")
83 | ARCH_ARM_BE = PyvexArch("ARM", 32, "Iend_LE")
84 | ARCH_ARM64_LE = PyvexArch("ARM64", 64, "Iend_LE", instruction_endness="Iend_LE")
85 | ARCH_ARM64_BE = PyvexArch("ARM64", 64, "Iend_BE")
86 | ARCH_PPC32 = PyvexArch("PPC32", 32, "Iend_BE")
87 | ARCH_PPC64_BE = PyvexArch("PPC64", 64, "Iend_BE")
88 | ARCH_PPC64_LE = PyvexArch("PPC64", 64, "Iend_LE")
89 | ARCH_S390X = PyvexArch("S390X", 64, "Iend_BE")
90 | ARCH_MIPS32_BE = PyvexArch("MIPS32", 32, "Iend_BE")
91 | ARCH_MIPS32_LE = PyvexArch("MIPS32", 32, "Iend_LE")
92 | ARCH_MIPS64_BE = PyvexArch("MIPS64", 64, "Iend_BE")
93 | ARCH_MIPS64_LE = PyvexArch("MIPS64", 64, "Iend_LE")
94 | ARCH_RISCV64_LE = PyvexArch("RISCV64", 64, "Iend_LE", instruction_endness="Iend_LE")
95 | 


--------------------------------------------------------------------------------
/pyvex/const.py:
--------------------------------------------------------------------------------
  1 | # pylint:disable=missing-class-docstring,raise-missing-from,not-callable
  2 | import re
  3 | from abc import ABC
  4 | 
  5 | from .enums import VEXObject, get_enum_from_int
  6 | from .errors import PyVEXError
  7 | from .native import ffi, pvc
  8 | 
  9 | 
 10 | # IRConst hierarchy
 11 | class IRConst(VEXObject, ABC):
 12 |     __slots__ = ["_value"]
 13 | 
 14 |     type: str
 15 |     size: int
 16 |     tag: str
 17 |     c_constructor = None
 18 |     _value: int
 19 | 
 20 |     def pp(self):
 21 |         print(str(self))
 22 | 
 23 |     @property
 24 |     def value(self) -> int:
 25 |         return self._value
 26 | 
 27 |     @staticmethod
 28 |     def _from_c(c_const):
 29 |         if c_const[0] == ffi.NULL:
 30 |             return None
 31 | 
 32 |         tag = get_enum_from_int(c_const.tag)
 33 | 
 34 |         try:
 35 |             return tag_to_const_class(tag)._from_c(c_const)
 36 |         except KeyError:
 37 |             raise PyVEXError("Unknown/unsupported IRConstTag %s\n" % tag)
 38 | 
 39 |     _translate = _from_c
 40 | 
 41 |     @classmethod
 42 |     def _to_c(cls, const):
 43 |         # libvex throws an exception when constructing a U1 with a value other than 0 or 1
 44 |         if const.tag == "Ico_U1" and const.value not in (0, 1):
 45 |             raise PyVEXError("Invalid U1 value: %d" % const.value)
 46 | 
 47 |         try:
 48 |             return cls.c_constructor(const.value)
 49 |         except KeyError:
 50 |             raise PyVEXError("Unknown/unsupported IRConstTag %s]n" % const.tag)
 51 | 
 52 |     def __eq__(self, other):
 53 |         if not isinstance(other, type(self)):
 54 |             return False
 55 |         return self._value == other._value
 56 | 
 57 |     def __hash__(self):
 58 |         return hash((type(self), self._value))
 59 | 
 60 | 
 61 | class U1(IRConst):
 62 |     __slots__: list[str] = []
 63 | 
 64 |     type = "Ity_I1"
 65 |     size = 1
 66 |     tag = "Ico_U1"
 67 |     op_format = "1"
 68 |     c_constructor = pvc.IRConst_U1
 69 | 
 70 |     def __init__(self, value):
 71 |         self._value = value
 72 | 
 73 |     def __str__(self):
 74 |         return "%d" % self.value
 75 | 
 76 |     @staticmethod
 77 |     def _from_c(c_const):
 78 |         return U1(c_const.Ico.U1)
 79 | 
 80 | 
 81 | class U8(IRConst):
 82 |     __slots__: list[str] = []
 83 | 
 84 |     type = "Ity_I8"
 85 |     size = 8
 86 |     tag = "Ico_U8"
 87 |     op_format = "8"
 88 |     c_constructor = pvc.IRConst_U8
 89 | 
 90 |     def __init__(self, value):
 91 |         self._value = value
 92 | 
 93 |     def __str__(self):
 94 |         return "0x%02x" % self.value
 95 | 
 96 |     @staticmethod
 97 |     def _from_c(c_const):
 98 |         return _U8_POOL[c_const.Ico.U8]
 99 | 
100 | 
101 | _U8_POOL = [U8(i) for i in range(256)]
102 | 
103 | 
104 | class U16(IRConst):
105 |     __slots__: list[str] = []
106 | 
107 |     type = "Ity_I16"
108 |     size = 16
109 |     tag = "Ico_U16"
110 |     op_format = "16"
111 |     c_constructor = pvc.IRConst_U16
112 | 
113 |     def __init__(self, value):
114 |         self._value = value
115 | 
116 |     def __str__(self):
117 |         return "0x%04x" % self.value
118 | 
119 |     @staticmethod
120 |     def _from_c(c_const):
121 |         val = c_const.Ico.U16
122 |         if val < 1024:
123 |             return _U16_POOL[val]
124 |         if val >= 0xFC00:
125 |             return _U16_POOL[val - 0xFC00 + 1024]
126 |         return U16(val)
127 | 
128 | 
129 | _U16_POOL = [U16(i) for i in range(1024)] + [U16(i) for i in range(0xFC00, 0xFFFF + 1)]
130 | 
131 | 
132 | class U32(IRConst):
133 |     __slots__: list[str] = []
134 | 
135 |     type = "Ity_I32"
136 |     size = 32
137 |     tag = "Ico_U32"
138 |     op_format = "32"
139 |     c_constructor = pvc.IRConst_U32
140 | 
141 |     def __init__(self, value: int):
142 |         self._value = value
143 | 
144 |     def __str__(self):
145 |         return "0x%08x" % self.value
146 | 
147 |     @staticmethod
148 |     def _from_c(c_const):
149 |         val = c_const.Ico.U32
150 |         if val < 1024:
151 |             return _U32_POOL[val]
152 |         if val >= 0xFFFFFC00:
153 |             return _U32_POOL[val - 0xFFFFFC00 + 1024]
154 |         return U32(val)
155 | 
156 | 
157 | _U32_POOL = [U32(i) for i in range(1024)] + [U32(i) for i in range(0xFFFFFC00, 0xFFFFFFFF + 1)]
158 | 
159 | 
160 | class U64(IRConst):
161 |     __slots__: list[str] = []
162 | 
163 |     type = "Ity_I64"
164 |     size = 64
165 |     tag = "Ico_U64"
166 |     op_format = "64"
167 |     c_constructor = pvc.IRConst_U64
168 | 
169 |     def __init__(self, value):
170 |         self._value = value
171 | 
172 |     def __str__(self):
173 |         return "0x%016x" % self.value
174 | 
175 |     @staticmethod
176 |     def _from_c(c_const):
177 |         val = c_const.Ico.U64
178 |         if val < 1024:
179 |             return _U64_POOL[val]
180 |         if val >= 0xFFFFFFFFFFFFFC00:
181 |             return _U64_POOL[val - 0xFFFFFFFFFFFFFC00 + 1024]
182 |         return U64(val)
183 | 
184 | 
185 | _U64_POOL = [U64(i) for i in range(1024)] + [U64(i) for i in range(0xFFFFFFFFFFFFFC00, 0xFFFFFFFFFFFFFFFF + 1)]
186 | 
187 | # Integer Type Imagination
188 | class_cache = {1: U1, 8: U8, 16: U16, 32: U32, 64: U64}
189 | 
190 | 
191 | def vex_int_class(size):
192 |     try:
193 |         return class_cache[size]
194 |     except KeyError:
195 | 
196 |         class VexInt(IRConst):
197 |             type = "Ity_I%d" % size
198 |             tag = "Ico_U%d" % size
199 |             op_format = str(size)
200 | 
201 |             def __init__(self, value):
202 |                 IRConst.__init__(self)
203 |                 self._value = value
204 | 
205 |             def __str__(self):
206 |                 return f"(0x{self.value:x} :: {self.type})"
207 | 
208 |         VexInt.__name__ = "U%d" % size
209 |         class_cache[size] = VexInt
210 |         return VexInt
211 | 
212 | 
213 | class F32(IRConst):
214 |     __slots__: list[str] = []
215 | 
216 |     type = "Ity_F32"
217 |     tag = "Ico_F32"
218 |     op_format = "F32"
219 |     c_constructor = pvc.IRConst_F32
220 |     size = 32
221 | 
222 |     def __init__(self, value):
223 |         self._value = value
224 | 
225 |     def __str__(self):
226 |         return "%f" % self.value
227 | 
228 |     @staticmethod
229 |     def _from_c(c_const):
230 |         return F32(c_const.Ico.F32)
231 | 
232 | 
233 | class F32i(IRConst):
234 |     __slots__: list[str] = []
235 | 
236 |     type = "Ity_F32"
237 |     tag = "Ico_F32i"
238 |     op_format = "F32"
239 |     c_constructor = pvc.IRConst_F32i
240 |     size = 32
241 | 
242 |     def __init__(self, value):
243 |         self._value = value
244 | 
245 |     def __str__(self):
246 |         return "%f" % self.value
247 | 
248 |     @staticmethod
249 |     def _from_c(c_const):
250 |         return F32i(c_const.Ico.F32)
251 | 
252 | 
253 | class F64(IRConst):
254 |     __slots__: list[str] = []
255 | 
256 |     type = "Ity_F64"
257 |     tag = "Ico_F64"
258 |     op_format = "F64"
259 |     c_constructor = pvc.IRConst_F64
260 |     size = 64
261 | 
262 |     def __init__(self, value):
263 |         self._value = value
264 | 
265 |     def __str__(self):
266 |         return "%f" % self.value
267 | 
268 |     @staticmethod
269 |     def _from_c(c_const):
270 |         return F64(c_const.Ico.F64)
271 | 
272 | 
273 | class F64i(IRConst):
274 |     __slots__: list[str] = []
275 | 
276 |     type = "Ity_F64"
277 |     tag = "Ico_F64i"
278 |     op_format = "F64"
279 |     c_constructor = pvc.IRConst_F64i
280 |     size = 64
281 | 
282 |     def __init__(self, value):
283 |         self._value = value
284 | 
285 |     def __str__(self):
286 |         return "%f" % self.value
287 | 
288 |     @staticmethod
289 |     def _from_c(c_const):
290 |         return F64i(c_const.Ico.F64)
291 | 
292 | 
293 | class V128(IRConst):
294 |     __slots__: list[str] = []
295 | 
296 |     type = "Ity_V128"
297 |     tag = "Ico_V128"
298 |     op_format = "V128"
299 |     c_constructor = pvc.IRConst_V128
300 |     size = 128
301 | 
302 |     def __init__(self, value):
303 |         self._value = value
304 | 
305 |     def __str__(self):
306 |         return "%x" % self.value
307 | 
308 |     # vex doesn't store a full 128 bit constant, instead it stores 1 bit per 8 bits of data
309 |     # and duplicates each bit 8 times
310 |     @staticmethod
311 |     def _from_c(c_const):
312 |         base_const = c_const.Ico.V128
313 |         real_const = 0
314 |         for i in range(16):
315 |             if (base_const >> i) & 1 == 1:
316 |                 real_const |= 0xFF << (8 * i)
317 |         return V128(real_const)
318 | 
319 | 
320 | class V256(IRConst):
321 |     __slots__: list[str] = []
322 | 
323 |     type = "Ity_V256"
324 |     tag = "Ico_V256"
325 |     op_format = "V256"
326 |     c_constructor = pvc.IRConst_V256
327 |     size = 256
328 | 
329 |     def __init__(self, value):
330 |         self._value = value
331 | 
332 |     def __str__(self):
333 |         return "%x" % self.value
334 | 
335 |     # see above
336 |     @staticmethod
337 |     def _from_c(c_const):
338 |         base_const = c_const.Ico.V256
339 |         real_const = 0
340 |         for i in range(32):
341 |             if (base_const >> i) & 1 == 1:
342 |                 real_const |= 0xFF << (8 * i)
343 |         return V256(real_const)
344 | 
345 | 
346 | predefined_types = [U1, U8, U16, U32, U64, F32, F32i, F64, F64i, V128, V256]
347 | predefined_types_map = {c.type: c for c in predefined_types}
348 | predefined_classes_map = {c.tag: c for c in predefined_types}
349 | 
350 | # precompiled regexes
351 | int_ty_re = re.compile(r"Ity_I\d+")
352 | int_tag_re = re.compile(r"Ico_U\d+")
353 | tag_size_re = re.compile(r"Ico_[UFV](?P<size>\d+)i?")
354 | 
355 | 
356 | def is_int_ty(ty):
357 |     m = int_ty_re.match(ty)
358 |     return m is not None
359 | 
360 | 
361 | def is_int_tag(tag):
362 |     m = int_tag_re.match(tag)
363 |     return m is not None
364 | 
365 | 
366 | def get_tag_size(tag):
367 |     m = tag_size_re.match(tag)
368 |     if m is None:
369 |         raise ValueError("Tag %s does not have size" % tag)
370 |     return int(m.group("size"))
371 | 
372 | 
373 | type_str_re = re.compile(r"Ity_[IFDV](?P<size>\d+)")
374 | type_tag_str_re = re.compile(r"[IFDV]?(?P<size>\d+)[SU]?")
375 | 
376 | 
377 | def get_type_size(ty):
378 |     """
379 |     Returns the size, in BITS, of a VEX type specifier
380 |     e.g., Ity_I16 -> 16
381 | 
382 |     :param ty:
383 |     :return:
384 |     """
385 |     m = type_str_re.match(ty)
386 |     if m is None:
387 |         raise ValueError("Type %s does not have size" % ty)
388 |     return int(m.group("size"))
389 | 
390 | 
391 | def get_type_spec_size(ty):
392 |     """
393 |     Get the width of a "type specifier"
394 |     like I16U
395 |     or F16
396 |     or just 16
397 |     (Yes, this really just takes the int out.  If we must special-case, do it here.
398 |     :param tyspec:
399 |     :return:
400 |     """
401 |     m = type_tag_str_re.match(ty)
402 |     if m is None:
403 |         raise ValueError("Type specifier %s does not have size" % ty)
404 |     return int(m.group("size"))
405 | 
406 | 
407 | def ty_to_const_class(ty):
408 |     try:
409 |         return predefined_types_map[ty]
410 |     except KeyError:
411 |         if is_int_ty(ty):
412 |             size = get_type_size(ty)
413 |             return vex_int_class(size)
414 |         else:
415 |             raise ValueError("Type %s does not exist" % ty)
416 | 
417 | 
418 | def tag_to_const_class(tag):
419 |     try:
420 |         return predefined_classes_map[tag]
421 |     except KeyError:
422 |         if is_int_tag(tag):
423 |             size = get_tag_size(tag)
424 |             return vex_int_class(size)
425 |         else:
426 |             raise ValueError("Tag %s does not exist" % tag)
427 | 


--------------------------------------------------------------------------------
/pyvex/const_val.py:
--------------------------------------------------------------------------------
 1 | class ConstVal:
 2 |     """
 3 |     A constant value object. Indicates a constant value assignment to a VEX tmp variable.
 4 | 
 5 |     :ivar tmp:          The tmp variable being assigned to.
 6 |     :ivar value:        The value of the tmp variable.
 7 |     :ivar stmt_idx:     The IRSB statement index containing the data access
 8 |     """
 9 | 
10 |     __slots__ = (
11 |         "tmp",
12 |         "value",
13 |         "stmt_idx",
14 |     )
15 | 
16 |     def __init__(self, tmp: int, value: int, stmt_idx: int):
17 |         self.tmp = tmp
18 |         self.value = value
19 |         self.stmt_idx = stmt_idx
20 | 
21 |     def __repr__(self):
22 |         return f"<ConstVal {self.tmp} = {self.value:#x} @ {self.stmt_idx}>"
23 | 
24 |     @classmethod
25 |     def from_c(cls, r):
26 |         return cls(r.tmp, r.value, r.stmt_idx)
27 | 


--------------------------------------------------------------------------------
/pyvex/data_ref.py:
--------------------------------------------------------------------------------
 1 | def data_ref_type_str(dref_enum):
 2 |     """
 3 |     Translate an ``enum DataRefTypes`` value into a string representation.
 4 |     """
 5 |     if dref_enum == 0x9000:
 6 |         return "unknown"
 7 |     elif dref_enum == 0x9001:
 8 |         return "integer"
 9 |     elif dref_enum == 0x9002:
10 |         return "fp"
11 |     elif dref_enum == 0x9003:
12 |         return "integer(store)"
13 |     else:
14 |         return "INVALID"
15 | 
16 | 
17 | class DataRef:
18 |     """
19 |     A data reference object. Indicates a data access in an IRSB.
20 | 
21 |     :ivar data_addr:    The address of the data being accessed
22 |     :ivar data_size:    The size of the data being accessed, in bytes
23 |     :ivar data_type:    The type of the data, a DataRefTypes enum.
24 |     :ivar stmt_idx:     The IRSB statement index containing the data access
25 |     :ivar ins_addr:     The address of the instruction performing the data access
26 |     """
27 | 
28 |     __slots__ = ("data_addr", "data_size", "data_type", "stmt_idx", "ins_addr")
29 | 
30 |     def __init__(self, data_addr, data_size, data_type, stmt_idx, ins_addr):
31 |         self.data_addr = data_addr
32 |         self.data_size = data_size
33 |         self.data_type = data_type
34 |         self.stmt_idx = stmt_idx
35 |         self.ins_addr = ins_addr
36 | 
37 |     @property
38 |     def data_type_str(self):
39 |         """
40 |         The data ref type as a string, "unknown" "integer" "fp" or "INVALID"
41 |         """
42 |         return data_ref_type_str(self.data_type)
43 | 
44 |     def __repr__(self):
45 |         return "<DataRef accessing %#x %s:%d at %#x:%d>" % (
46 |             self.data_addr,
47 |             data_ref_type_str(self.data_type),
48 |             self.data_size,
49 |             self.ins_addr,
50 |             self.stmt_idx,
51 |         )
52 | 
53 |     @classmethod
54 |     def from_c(cls, r):
55 |         return cls(r.data_addr, r.size, r.data_type, r.stmt_idx, r.ins_addr)
56 | 


--------------------------------------------------------------------------------
/pyvex/enums.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | from .native import ffi, pvc
  4 | from .utils import stable_hash
  5 | 
  6 | 
  7 | class VEXObject:
  8 |     """
  9 |     The base class for Vex types.
 10 |     """
 11 | 
 12 |     __slots__: list[str] = []
 13 | 
 14 |     def __eq__(self, other):
 15 |         if not isinstance(other, type(self)):
 16 |             return False
 17 |         # compare values in slots
 18 |         for slot in self.__slots__:
 19 |             if getattr(self, slot) != getattr(other, slot):
 20 |                 return False
 21 |         return True
 22 | 
 23 |     def __hash__(self):
 24 |         values = [getattr(self, slot) for slot in self.__slots__]
 25 |         for i, lst_val in enumerate(values):
 26 |             if isinstance(lst_val, list):
 27 |                 values[i] = tuple(lst_val)
 28 |         return stable_hash(tuple([type(self)] + values))
 29 | 
 30 | 
 31 | class IRCallee(VEXObject):
 32 |     """
 33 |     Describes a helper function to call.
 34 |     """
 35 | 
 36 |     __slots__ = ["regparms", "name", "mcx_mask"]
 37 | 
 38 |     def __init__(self, regparms, name, mcx_mask):
 39 |         VEXObject.__init__(self)
 40 |         self.regparms = regparms
 41 |         self.name = name
 42 |         self.mcx_mask = mcx_mask
 43 | 
 44 |     def __str__(self):
 45 |         return str(self.name)
 46 | 
 47 |     @staticmethod
 48 |     def _from_c(c_callee):
 49 |         return IRCallee(
 50 |             c_callee.regparms,
 51 |             ffi.string(c_callee.name).decode(),
 52 |             # NO. #int(ffi.cast("unsigned long long", c_callee.addr)),
 53 |             c_callee.mcx_mask,
 54 |         )
 55 | 
 56 |     @staticmethod
 57 |     def _to_c(callee):  # pylint: disable=unused-argument
 58 |         raise TypeError(
 59 |             "This doesn't work! Please invent a way to get the correct address for the named function from pyvex_c."
 60 |         )
 61 |         # c_callee = pvc.mkIRCallee(callee.regparms,
 62 |         #                          callee.name.encode(),
 63 |         #                          ffi.cast("void *", callee.addr))
 64 |         # c_callee.mcx_mask = callee.mcx_mask
 65 |         # return c_callee
 66 | 
 67 | 
 68 | class IRRegArray(VEXObject):
 69 |     """
 70 |     A section of the guest state that we want te be able to index at run time, so as to be able to describe indexed or
 71 |     rotating register files on the guest.
 72 | 
 73 |     :ivar int base:     The offset into the state that this array starts
 74 |     :ivar str elemTy:   The types of the elements in this array, as VEX enum strings
 75 |     :ivar int nElems:   The number of elements in this array
 76 |     """
 77 | 
 78 |     __slots__ = ["base", "elemTy", "nElems"]
 79 | 
 80 |     def __init__(self, base, elemTy, nElems):
 81 |         VEXObject.__init__(self)
 82 |         self.base = base
 83 |         self.elemTy = elemTy
 84 |         self.nElems = nElems
 85 | 
 86 |     def __str__(self):
 87 |         return "%s:%sx%d" % (self.base, self.elemTy[4:], self.nElems)
 88 | 
 89 |     @staticmethod
 90 |     def _from_c(c_arr):
 91 |         return IRRegArray(c_arr.base, ints_to_enums[c_arr.elemTy], c_arr.nElems)
 92 | 
 93 |     @staticmethod
 94 |     def _to_c(arr):
 95 |         return pvc.mkIRRegArray(arr.base, get_int_from_enum(arr.elemTy), arr.nElems)
 96 | 
 97 | 
 98 | ints_to_enums: dict[int, str] = {}
 99 | enums_to_ints: dict[str, int] = {}
100 | irop_enums_to_ints: dict[str, int] = {}
101 | will_be_overwritten = ["Ircr_GT", "Ircr_LT"]
102 | 
103 | 
104 | def get_enum_from_int(i):
105 |     return ints_to_enums[i]
106 | 
107 | 
108 | def get_int_from_enum(e):
109 |     return enums_to_ints[e]
110 | 
111 | 
112 | _add_enum_counter = 0
113 | 
114 | 
115 | def _add_enum(s, i=None):  # TODO get rid of this
116 |     global _add_enum_counter  # pylint: disable=global-statement
117 |     if i is None:
118 |         while _add_enum_counter in ints_to_enums:
119 |             _add_enum_counter += 1
120 |         i = _add_enum_counter
121 |         _add_enum_counter += 1  # Update for the next iteration
122 |     if i in ints_to_enums:
123 |         if ints_to_enums[i] not in will_be_overwritten:
124 |             raise ValueError("Enum with intkey %d already present" % i)
125 |     enums_to_ints[s] = i
126 |     ints_to_enums[i] = s
127 |     if s.startswith("Iop_"):
128 |         irop_enums_to_ints[s] = i
129 | 
130 | 
131 | for attr in dir(pvc):
132 |     if attr[0] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" and hasattr(pvc, attr) and isinstance(getattr(pvc, attr), int):
133 |         _add_enum(attr, getattr(pvc, attr))
134 | 
135 | 
136 | def vex_endness_from_string(endness_str):
137 |     return getattr(pvc, endness_str)
138 | 
139 | 
140 | def default_vex_archinfo() -> dict[str, Any]:
141 |     return {
142 |         "hwcaps": 0,
143 |         "endness": vex_endness_from_string("VexEndnessLE"),
144 |         "ppc_icache_line_szB": 0,
145 |         "ppc_dcbz_szB": 0,
146 |         "ppc_dcbzl_szB": 0,
147 |         "arm64_dMinLine_lg2_szB": 0,
148 |         "arm64_iMinLine_lg2_szB": 0,
149 |         "hwcache_info": {
150 |             "num_levels": 0,
151 |             "num_caches": 0,
152 |             "caches": None,
153 |             "icaches_maintain_coherence": True,
154 |         },
155 |         "x86_cr0": 0xFFFFFFFF,
156 |     }
157 | 


--------------------------------------------------------------------------------
/pyvex/errors.py:
--------------------------------------------------------------------------------
 1 | class PyVEXError(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class SkipStatementsError(PyVEXError):
 6 |     pass
 7 | 
 8 | 
 9 | #
10 | # Exceptions and notifications that post-processors can raise
11 | #
12 | 
13 | 
14 | class LiftingException(Exception):
15 |     pass
16 | 
17 | 
18 | class NeedStatementsNotification(LiftingException):
19 |     """
20 |     A post-processor may raise a NeedStatementsNotification if it needs to work with statements, but the current IRSB
21 |     is generated without any statement available (skip_stmts=True). The lifter will re-lift the current block with
22 |     skip_stmts=False upon catching a NeedStatementsNotification, and re-run the post-processors.
23 | 
24 |     It's worth noting that if a post-processor always raises this notification for every basic block without statements,
25 |     it will essentially disable the skipping statement optimization, and it is bad for performance (especially for
26 |     CFGFast, which heavily relies on this optimization). Post-processor authors are encouraged to at least filter the
27 |     IRSBs based on available properties (jumpkind, next, etc.). If a post-processor must work with statements for the
28 |     majority of IRSBs, the author should implement it in PyVEX in C for the sake of a better performance.
29 |     """
30 | 
31 |     pass
32 | 


--------------------------------------------------------------------------------
/pyvex/lifting/__init__.py:
--------------------------------------------------------------------------------
 1 | from .gym import AARCH64Spotter, AMD64Spotter, ARMSpotter, X86Spotter
 2 | from .libvex import LIBVEX_SUPPORTED_ARCHES, LibVEXLifter
 3 | from .lift_function import lift, lifters, register
 4 | from .lifter import Lifter
 5 | from .post_processor import Postprocessor
 6 | from .zerodivision import ZeroDivisionPostProcessor
 7 | 
 8 | for arch in LIBVEX_SUPPORTED_ARCHES:
 9 |     register(LibVEXLifter, arch)
10 | register(AARCH64Spotter, "AARCH64")
11 | register(ARMSpotter, "ARM")
12 | register(ARMSpotter, "ARMEL")
13 | register(ARMSpotter, "ARMHF")
14 | register(ARMSpotter, "ARMCortexM")
15 | register(AMD64Spotter, "AMD64")
16 | register(X86Spotter, "X86")
17 | 
18 | __all__ = ["Lifter", "Postprocessor", "lift", "register", "lifters", "ZeroDivisionPostProcessor"]
19 | 


--------------------------------------------------------------------------------
/pyvex/lifting/gym/README.md:
--------------------------------------------------------------------------------
1 | # The Gym
2 | 
3 | This is where we're putting non-libvex lifters that we feel should be included with the pyvex distribution.
4 | 
5 | These will probably be mostly "spotters", which correct for gaps in libvex's instruction support.
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/pyvex/lifting/gym/__init__.py:
--------------------------------------------------------------------------------
1 | from .aarch64_spotter import AARCH64Spotter
2 | from .arm_spotter import ARMSpotter
3 | from .x86_spotter import AMD64Spotter, X86Spotter
4 | 
5 | __all__ = ("ARMSpotter", "AARCH64Spotter", "X86Spotter", "AMD64Spotter")
6 | 


--------------------------------------------------------------------------------
/pyvex/lifting/gym/aarch64_spotter.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from pyvex.lifting.util.instr_helper import Instruction
 4 | from pyvex.lifting.util.lifter_helper import GymratLifter
 5 | 
 6 | log = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class Aarch64Instruction(Instruction):  # pylint: disable=abstract-method
10 |     # NOTE: WARNING: There is no MRS, MSR, SYSL in VEX's ARM implementation
11 |     # You must use straight nasty hacks instead.
12 |     pass
13 | 
14 | 
15 | class Instruction_SYSL(Aarch64Instruction):
16 |     name = "SYSL"
17 |     bin_format = "1101010100101qqqnnnnmmmmppprrrrr"
18 | 
19 |     def compute_result(self):  # pylint: disable=arguments-differ
20 |         log.debug("Ignoring SYSL instruction at %#x.", self.addr)
21 | 
22 | 
23 | class Instruction_MSR(Aarch64Instruction):
24 |     name = "MSR"
25 |     bin_format = "11010101000ioqqqnnnnmmmmppprrrrr"
26 | 
27 |     def compute_result(self):  # pylint: disable=arguments-differ
28 |         log.debug("Ignoring MSR instruction at %#x.", self.addr)
29 | 
30 | 
31 | class Instruction_MRS(Aarch64Instruction):
32 |     name = "MRS"
33 |     bin_format = "110101010011opppnnnnmmmmppprrrrr"
34 | 
35 |     def compute_result(self):  # pylint: disable=arguments-differ
36 |         log.debug("Ignoring MRS instruction at %#x.", self.addr)
37 | 
38 | 
39 | class AARCH64Spotter(GymratLifter):
40 |     instrs = [Instruction_MRS, Instruction_MSR, Instruction_SYSL]
41 | 


--------------------------------------------------------------------------------
/pyvex/lifting/gym/x86_spotter.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from pyvex.lifting.util import GymratLifter, Instruction, JumpKind, Type
  4 | 
  5 | log = logging.getLogger(__name__)
  6 | 
  7 | # pylint: disable=missing-class-docstring
  8 | 
  9 | 
 10 | class Instruction_SWAPGS(Instruction):
 11 |     name = "SWAPGS"
 12 |     bin_format = "000011110000000111111000"  # 0f 01 f8
 13 | 
 14 |     def compute_result(self, *args):
 15 |         pass  # TODO check for priv mode
 16 | 
 17 | 
 18 | class Instruction_SYSRET(Instruction):
 19 |     name = "SYSRET"
 20 |     bin_format = "010010000000111100000111"  # 48 04 07
 21 | 
 22 |     def compute_result(self, *args):
 23 |         result = self.dirty(Type.int_64, "%sg_dirtyhelper_SYSRET" % self.arch.name.lower(), ())
 24 |         self.jump(None, result, JumpKind.Ret)
 25 | 
 26 | 
 27 | class Instruction_IRETQ(Instruction):
 28 |     name = "IRETQ"
 29 |     bin_format = "0100100011001111"  # 48 cf
 30 | 
 31 |     def compute_result(self, *args):
 32 |         result = self.dirty(Type.int_64, "%sg_dirtyhelper_IRETQ" % self.arch.name.lower(), ())
 33 |         self.jump(None, result, JumpKind.Ret)
 34 | 
 35 | 
 36 | class Instruction_RDMSR(Instruction):
 37 |     name = "RDMSR"
 38 |     bin_format = "0000111100110010"  # 0f 32
 39 | 
 40 |     def compute_result(self, *args):
 41 |         ecx = self.get("ecx", Type.int_32)
 42 |         result = self.dirty(Type.int_64, "%sg_dirtyhelper_RDMSR" % self.arch.name.lower(), (ecx,))
 43 |         edx = result.narrow_high(Type.int_32)
 44 |         eax = result.narrow_low(Type.int_32)
 45 |         if self.arch.bits == 32:
 46 |             self.put(eax, "eax")
 47 |             self.put(edx, "edx")
 48 |         else:
 49 |             self.put(eax.widen_unsigned(Type.int_64), "rax")
 50 |             self.put(edx.widen_unsigned(Type.int_64), "rdx")
 51 | 
 52 | 
 53 | class Instruction_XGETBV(Instruction):
 54 |     name = "XGETBV"
 55 |     bin_format = "000011110000000111010000"  # 0f 01 d0
 56 | 
 57 |     def compute_result(self, *args):
 58 |         ecx = self.get("ecx", Type.int_32)
 59 |         result = self.dirty(Type.int_64, "%sg_dirtyhelper_XGETBV" % self.arch.name.lower(), (ecx,))
 60 |         edx = result.narrow_high(Type.int_32)
 61 |         eax = result.narrow_low(Type.int_32)
 62 |         if self.arch.bits == 32:
 63 |             self.put(eax, "eax")
 64 |             self.put(edx, "edx")
 65 |         else:
 66 |             self.put(eax.widen_unsigned(Type.int_64), "rax")
 67 |             self.put(edx.widen_unsigned(Type.int_64), "rdx")
 68 | 
 69 | 
 70 | class Instruction_AAM(Instruction):
 71 |     name = "AAM"
 72 |     bin_format = "11010100iiiiiiii"
 73 | 
 74 |     # From https://www.felixcloutier.com/x86/aam
 75 |     def compute_result(self):  # pylint: disable=arguments-differ
 76 |         base = self.constant(int(self.data["i"], 2), Type.int_8)
 77 |         temp_al = self.get("al", Type.int_8)
 78 |         temp_ah = temp_al // base
 79 |         temp_al = temp_al % base
 80 |         self.put(temp_ah, "ah")
 81 |         self.put(temp_al, "al")
 82 |         log.debug(
 83 |             "The generalized AAM instruction is not supported by VEX, and is handled specially by pyvex."
 84 |             " It has no flag handling at present.  See pyvex/lifting/gym/x86_spotter.py for details"
 85 |         )
 86 | 
 87 |     # TODO: Flags
 88 | 
 89 | 
 90 | class Instruction_AAD(Instruction):
 91 |     name = "AAD"
 92 |     bin_format = "11010101iiiiiiii"
 93 | 
 94 |     # From https://www.felixcloutier.com/x86/aad
 95 |     def compute_result(self):  # pylint: disable=arguments-differ
 96 |         base = self.constant(int(self.data["i"], 2), Type.int_8)
 97 |         temp_al = self.get("al", Type.int_8)
 98 |         temp_ah = self.get("ah", Type.int_8)
 99 |         temp_al = (temp_al + (temp_ah * base)) & 0xFF
100 |         temp_ah = self.constant(0, Type.int_8)
101 |         self.put(temp_ah, "ah")
102 |         self.put(temp_al, "al")
103 |         log.debug(
104 |             "The generalized AAD instruction is not supported by VEX, and is handled specially by pyvex."
105 |             " It has no flag handling at present.  See pyvex/lifting/gym/x86_spotter.py for details"
106 |         )
107 | 
108 |     # TODO: Flags
109 | 
110 | 
111 | class AMD64Spotter(GymratLifter):
112 |     instrs = [
113 |         Instruction_RDMSR,
114 |         Instruction_XGETBV,
115 |         Instruction_AAD,
116 |         Instruction_AAM,
117 |         Instruction_SWAPGS,
118 |         Instruction_IRETQ,
119 |         Instruction_SYSRET,
120 |     ]
121 | 
122 | 
123 | class X86Spotter(GymratLifter):
124 |     instrs = [
125 |         Instruction_RDMSR,
126 |         Instruction_XGETBV,
127 |         Instruction_AAD,
128 |         Instruction_AAM,
129 |     ]
130 | 


--------------------------------------------------------------------------------
/pyvex/lifting/libvex.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import threading
  3 | from typing import TYPE_CHECKING
  4 | 
  5 | from pyvex.errors import LiftingException
  6 | from pyvex.native import ffi, pvc
  7 | from pyvex.types import CLiftSource, LibvexArch
  8 | 
  9 | from .lift_function import Lifter
 10 | 
 11 | log = logging.getLogger("pyvex.lifting.libvex")
 12 | 
 13 | _libvex_lock = threading.Lock()
 14 | 
 15 | LIBVEX_SUPPORTED_ARCHES = {
 16 |     "X86",
 17 |     "AMD64",
 18 |     "MIPS32",
 19 |     "MIPS64",
 20 |     "ARM",
 21 |     "ARMEL",
 22 |     "ARMHF",
 23 |     "ARMCortexM",
 24 |     "AARCH64",
 25 |     "PPC32",
 26 |     "PPC64",
 27 |     "S390X",
 28 |     "RISCV64",
 29 | }
 30 | 
 31 | VEX_MAX_INSTRUCTIONS = 99
 32 | VEX_MAX_BYTES = 5000
 33 | 
 34 | 
 35 | class VexRegisterUpdates:
 36 |     VexRegUpd_INVALID = 0x700
 37 |     VexRegUpdSpAtMemAccess = 0x701
 38 |     VexRegUpdUnwindregsAtMemAccess = 0x702
 39 |     VexRegUpdAllregsAtMemAccess = 0x703
 40 |     VexRegUpdAllregsAtEachInsn = 0x704
 41 |     VexRegUpdLdAllregsAtEachInsn = 0x705
 42 | 
 43 | 
 44 | class LibVEXLifter(Lifter):
 45 |     __slots__ = ()
 46 | 
 47 |     REQUIRE_DATA_C = True
 48 | 
 49 |     @staticmethod
 50 |     def get_vex_log():
 51 |         return bytes(ffi.buffer(pvc.msg_buffer, pvc.msg_current_size)).decode() if pvc.msg_buffer != ffi.NULL else None
 52 | 
 53 |     def _lift(self):
 54 |         if TYPE_CHECKING:
 55 |             assert isinstance(self.irsb.arch, LibvexArch)
 56 |             assert isinstance(self.data, CLiftSource)
 57 |         try:
 58 |             _libvex_lock.acquire()
 59 | 
 60 |             pvc.log_level = log.getEffectiveLevel()
 61 |             vex_arch = getattr(pvc, self.irsb.arch.vex_arch, None)
 62 |             assert vex_arch is not None
 63 | 
 64 |             if self.bytes_offset is None:
 65 |                 self.bytes_offset = 0
 66 | 
 67 |             if self.max_bytes is None or self.max_bytes > VEX_MAX_BYTES:
 68 |                 max_bytes = VEX_MAX_BYTES
 69 |             else:
 70 |                 max_bytes = self.max_bytes
 71 | 
 72 |             if self.max_inst is None or self.max_inst > VEX_MAX_INSTRUCTIONS:
 73 |                 max_inst = VEX_MAX_INSTRUCTIONS
 74 |             else:
 75 |                 max_inst = self.max_inst
 76 | 
 77 |             strict_block_end = self.strict_block_end
 78 |             if strict_block_end is None:
 79 |                 strict_block_end = True
 80 | 
 81 |             if self.cross_insn_opt:
 82 |                 px_control = VexRegisterUpdates.VexRegUpdUnwindregsAtMemAccess
 83 |             else:
 84 |                 px_control = VexRegisterUpdates.VexRegUpdLdAllregsAtEachInsn
 85 | 
 86 |             self.irsb.arch.vex_archinfo["hwcache_info"]["caches"] = ffi.NULL
 87 |             lift_r = pvc.vex_lift(
 88 |                 vex_arch,
 89 |                 self.irsb.arch.vex_archinfo,
 90 |                 self.data + self.bytes_offset,
 91 |                 self.irsb.addr,
 92 |                 max_inst,
 93 |                 max_bytes,
 94 |                 self.opt_level,
 95 |                 self.traceflags,
 96 |                 self.allow_arch_optimizations,
 97 |                 strict_block_end,
 98 |                 1 if self.collect_data_refs else 0,
 99 |                 1 if self.load_from_ro_regions else 0,
100 |                 1 if self.const_prop else 0,
101 |                 px_control,
102 |                 self.bytes_offset,
103 |             )
104 |             log_str = self.get_vex_log()
105 |             if lift_r == ffi.NULL:
106 |                 raise LiftingException("libvex: unknown error" if log_str is None else log_str)
107 |             else:
108 |                 if log_str is not None:
109 |                     log.debug(log_str)
110 | 
111 |             self.irsb._from_c(lift_r, skip_stmts=self.skip_stmts)
112 |             if self.irsb.size == 0:
113 |                 log.debug("raising lifting exception")
114 |                 raise LiftingException("libvex: could not decode any instructions @ 0x%x" % self.addr)
115 |         finally:
116 |             _libvex_lock.release()
117 |             self.irsb.arch.vex_archinfo["hwcache_info"]["caches"] = None
118 | 


--------------------------------------------------------------------------------
/pyvex/lifting/lift_function.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import defaultdict
  3 | from typing import DefaultDict
  4 | 
  5 | from pyvex import const
  6 | from pyvex.block import IRSB
  7 | from pyvex.const import vex_int_class
  8 | from pyvex.errors import LiftingException, NeedStatementsNotification, PyVEXError, SkipStatementsError
  9 | from pyvex.expr import Const
 10 | from pyvex.native import ffi
 11 | from pyvex.types import LiftSource, PyLiftSource
 12 | 
 13 | from .lifter import Lifter
 14 | from .post_processor import Postprocessor
 15 | 
 16 | log = logging.getLogger(__name__)
 17 | 
 18 | lifters: DefaultDict[str, list[type[Lifter]]] = defaultdict(list)
 19 | postprocessors: DefaultDict[str, list[type[Postprocessor]]] = defaultdict(list)
 20 | 
 21 | 
 22 | def lift(
 23 |     data: LiftSource,
 24 |     addr,
 25 |     arch,
 26 |     max_bytes=None,
 27 |     max_inst=None,
 28 |     bytes_offset=0,
 29 |     opt_level=1,
 30 |     traceflags=0,
 31 |     strict_block_end=True,
 32 |     inner=False,
 33 |     skip_stmts=False,
 34 |     collect_data_refs=False,
 35 |     cross_insn_opt=True,
 36 |     load_from_ro_regions=False,
 37 |     const_prop=False,
 38 | ):
 39 |     """
 40 |     Recursively lifts blocks using the registered lifters and postprocessors. Tries each lifter in the order in
 41 |     which they are registered on the data to lift.
 42 | 
 43 |     If a lifter raises a LiftingException on the data, it is skipped.
 44 |     If it succeeds and returns a block with a jumpkind of Ijk_NoDecode, all of the lifters are tried on the rest
 45 |     of the data and if they work, their output is appended to the first block.
 46 | 
 47 |     :param arch:            The arch to lift the data as.
 48 |     :param addr:            The starting address of the block. Effects the IMarks.
 49 |     :param data:            The bytes to lift as either a python string of bytes or a cffi buffer object.
 50 |     :param max_bytes:       The maximum number of bytes to lift. If set to None, no byte limit is used.
 51 |     :param max_inst:        The maximum number of instructions to lift. If set to None, no instruction limit is used.
 52 |     :param bytes_offset:    The offset into `data` to start lifting at.
 53 |     :param opt_level:       The level of optimization to apply to the IR, -1 through 2. -1 is the strictest
 54 |                             unoptimized level, 0 is unoptimized but will perform some lookahead/lookbehind
 55 |                             optimizations, 1 performs constant propogation, and 2 performs loop unrolling,
 56 |                             which honestly doesn't make much sense in the context of pyvex. The default is 1.
 57 |     :param traceflags:      The libVEX traceflags, controlling VEX debug prints.
 58 | 
 59 |     .. note:: Explicitly specifying the number of instructions to lift (`max_inst`) may not always work
 60 |               exactly as expected. For example, on MIPS, it is meaningless to lift a branch or jump
 61 |               instruction without its delay slot. VEX attempts to Do The Right Thing by possibly decoding
 62 |               fewer instructions than requested. Specifically, this means that lifting a branch or jump
 63 |               on MIPS as a single instruction (`max_inst=1`) will result in an empty IRSB, and subsequent
 64 |               attempts to run this block will raise `SimIRSBError('Empty IRSB passed to SimIRSB.')`.
 65 | 
 66 |     .. note:: If no instruction and byte limit is used, pyvex will continue lifting the block until the block
 67 |               ends properly or until it runs out of data to lift.
 68 |     """
 69 |     if max_bytes is not None and max_bytes <= 0:
 70 |         raise PyVEXError("Cannot lift block with no data (max_bytes <= 0)")
 71 | 
 72 |     if not data:
 73 |         raise PyVEXError("Cannot lift block with no data (data is empty)")
 74 | 
 75 |     if isinstance(data, str):
 76 |         raise TypeError("Cannot pass unicode string as data to lifter")
 77 | 
 78 |     py_data: PyLiftSource | None
 79 |     if isinstance(data, (bytes, bytearray, memoryview)):
 80 |         py_data = data
 81 |         c_data = None
 82 |         allow_arch_optimizations = False
 83 |     else:
 84 |         if max_bytes is None:
 85 |             raise PyVEXError("Cannot lift block with ffi pointer and no size (max_bytes is None)")
 86 |         c_data = data
 87 |         py_data = None
 88 |         allow_arch_optimizations = True
 89 | 
 90 |     # In order to attempt to preserve the property that
 91 |     # VEX lifts the same bytes to the same IR at all times when optimizations are disabled
 92 |     # we hack off all of VEX's non-IROpt optimizations when opt_level == -1.
 93 |     # This is intended to enable comparisons of the lifted IR between code that happens to be
 94 |     # found in different contexts.
 95 |     if opt_level < 0:
 96 |         allow_arch_optimizations = False
 97 |         opt_level = 0
 98 | 
 99 |     for lifter in lifters[arch.name]:
100 |         try:
101 |             u_data: LiftSource = data
102 |             if lifter.REQUIRE_DATA_C:
103 |                 if c_data is None:
104 |                     assert py_data is not None
105 |                     if isinstance(py_data, (bytearray, memoryview)):
106 |                         u_data = ffi.from_buffer(ffi.BVoidP, py_data)
107 |                     else:
108 |                         u_data = ffi.from_buffer(ffi.BVoidP, py_data + b"\0" * 8)
109 |                     max_bytes = min(len(py_data), max_bytes) if max_bytes is not None else len(py_data)
110 |                 else:
111 |                     u_data = c_data
112 |                 skip = 0
113 |             elif lifter.REQUIRE_DATA_PY:
114 |                 if bytes_offset and arch.name.startswith("ARM") and (addr & 1) == 1:
115 |                     skip = bytes_offset - 1
116 |                 else:
117 |                     skip = bytes_offset
118 |                 if py_data is None:
119 |                     assert c_data is not None
120 |                     if max_bytes is None:
121 |                         log.debug("Cannot create py_data from c_data when no max length is given")
122 |                         continue
123 |                     u_data = ffi.buffer(c_data + skip, max_bytes)[:]
124 |                 else:
125 |                     if max_bytes is None:
126 |                         u_data = py_data[skip:]
127 |                     else:
128 |                         u_data = py_data[skip : skip + max_bytes]
129 |             else:
130 |                 raise RuntimeError(
131 |                     "Incorrect lifter configuration. What type of data does %s expect?" % lifter.__class__
132 |                 )
133 | 
134 |             try:
135 |                 final_irsb = lifter(arch, addr).lift(
136 |                     u_data,
137 |                     bytes_offset - skip,
138 |                     max_bytes,
139 |                     max_inst,
140 |                     opt_level,
141 |                     traceflags,
142 |                     allow_arch_optimizations,
143 |                     strict_block_end,
144 |                     skip_stmts,
145 |                     collect_data_refs=collect_data_refs,
146 |                     cross_insn_opt=cross_insn_opt,
147 |                     load_from_ro_regions=load_from_ro_regions,
148 |                     const_prop=const_prop,
149 |                 )
150 |             except SkipStatementsError:
151 |                 assert skip_stmts is True
152 |                 final_irsb = lifter(arch, addr).lift(
153 |                     u_data,
154 |                     bytes_offset - skip,
155 |                     max_bytes,
156 |                     max_inst,
157 |                     opt_level,
158 |                     traceflags,
159 |                     allow_arch_optimizations,
160 |                     strict_block_end,
161 |                     skip_stmts=False,
162 |                     collect_data_refs=collect_data_refs,
163 |                     cross_insn_opt=cross_insn_opt,
164 |                     load_from_ro_regions=load_from_ro_regions,
165 |                     const_prop=const_prop,
166 |                 )
167 |             break
168 |         except LiftingException as ex:
169 |             log.debug("Lifting Exception: %s", str(ex))
170 |             continue
171 |     else:
172 |         final_irsb = IRSB.empty_block(
173 |             arch,
174 |             addr,
175 |             size=0,
176 |             nxt=Const(const.vex_int_class(arch.bits)(addr)),
177 |             jumpkind="Ijk_NoDecode",
178 |         )
179 |         final_irsb.invalidate_direct_next()
180 |         return final_irsb
181 | 
182 |     if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode":
183 |         # We have decoded a few bytes before we hit an undecodeable instruction.
184 | 
185 |         # Determine if this is an intentional NoDecode, like the ud2 instruction on AMD64
186 |         nodecode_addr_expr = final_irsb.next
187 |         if type(nodecode_addr_expr) is Const:
188 |             nodecode_addr = nodecode_addr_expr.con.value
189 |             next_irsb_start_addr = addr + final_irsb.size
190 |             if nodecode_addr != next_irsb_start_addr:
191 |                 # The last instruction of the IRSB has a non-zero length. This is an intentional NoDecode.
192 |                 # The very last instruction has been decoded
193 |                 final_irsb.jumpkind = "Ijk_NoDecode"
194 |                 final_irsb.next = final_irsb.next
195 |                 final_irsb.invalidate_direct_next()
196 |                 return final_irsb
197 | 
198 |         # Decode more bytes
199 |         if skip_stmts:
200 |             # When gymrat will be invoked, we will merge future basic blocks to the current basic block. In this case,
201 |             # statements are usually required.
202 |             # TODO: In the future, we may further optimize it to handle cases where getting statements in gymrat is not
203 |             # TODO: required.
204 |             return lift(
205 |                 data,
206 |                 addr,
207 |                 arch,
208 |                 max_bytes=max_bytes,
209 |                 max_inst=max_inst,
210 |                 bytes_offset=bytes_offset,
211 |                 opt_level=opt_level,
212 |                 traceflags=traceflags,
213 |                 strict_block_end=strict_block_end,
214 |                 skip_stmts=False,
215 |                 collect_data_refs=collect_data_refs,
216 |                 load_from_ro_regions=load_from_ro_regions,
217 |                 const_prop=const_prop,
218 |             )
219 | 
220 |         next_addr = addr + final_irsb.size
221 |         if max_bytes is not None:
222 |             max_bytes -= final_irsb.size
223 |         if isinstance(data, (bytes, bytearray, memoryview)):
224 |             data_left = data[final_irsb.size :]
225 |         else:
226 |             data_left = data + final_irsb.size
227 |         if max_inst is not None:
228 |             max_inst -= final_irsb.instructions
229 |         if (max_bytes is None or max_bytes > 0) and (max_inst is None or max_inst > 0) and data_left:
230 |             more_irsb = lift(
231 |                 data_left,
232 |                 next_addr,
233 |                 arch,
234 |                 max_bytes=max_bytes,
235 |                 max_inst=max_inst,
236 |                 bytes_offset=bytes_offset,
237 |                 opt_level=opt_level,
238 |                 traceflags=traceflags,
239 |                 strict_block_end=strict_block_end,
240 |                 inner=True,
241 |                 skip_stmts=False,
242 |                 collect_data_refs=collect_data_refs,
243 |                 load_from_ro_regions=load_from_ro_regions,
244 |                 const_prop=const_prop,
245 |             )
246 |             if more_irsb.size:
247 |                 # Successfully decoded more bytes
248 |                 final_irsb.extend(more_irsb)
249 |         elif max_bytes == 0:
250 |             # We have no more bytes left. Mark the jumpkind of the IRSB as Ijk_Boring
251 |             if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode":
252 |                 final_irsb.jumpkind = "Ijk_Boring"
253 |                 final_irsb.next = Const(vex_int_class(arch.bits)(final_irsb.addr + final_irsb.size))
254 | 
255 |     if not inner:
256 |         for postprocessor in postprocessors[arch.name]:
257 |             try:
258 |                 postprocessor(final_irsb).postprocess()
259 |             except NeedStatementsNotification as e:
260 |                 # The post-processor cannot work without statements. Re-lift the current block with skip_stmts=False
261 |                 if not skip_stmts:
262 |                     # sanity check
263 |                     # Why does the post-processor raise NeedStatementsNotification when skip_stmts is False?
264 |                     raise TypeError(
265 |                         "Bad post-processor %s: "
266 |                         "NeedStatementsNotification is raised when statements are available." % postprocessor.__class__
267 |                     ) from e
268 | 
269 |                 # Re-lift the current IRSB
270 |                 return lift(
271 |                     data,
272 |                     addr,
273 |                     arch,
274 |                     max_bytes=max_bytes,
275 |                     max_inst=max_inst,
276 |                     bytes_offset=bytes_offset,
277 |                     opt_level=opt_level,
278 |                     traceflags=traceflags,
279 |                     strict_block_end=strict_block_end,
280 |                     inner=inner,
281 |                     skip_stmts=False,
282 |                     collect_data_refs=collect_data_refs,
283 |                     load_from_ro_regions=load_from_ro_regions,
284 |                     const_prop=const_prop,
285 |                 )
286 |             except LiftingException:
287 |                 continue
288 | 
289 |     return final_irsb
290 | 
291 | 
292 | def register(lifter, arch_name):
293 |     """
294 |     Registers a Lifter or Postprocessor to be used by pyvex. Lifters are are given priority based on the order
295 |     in which they are registered. Postprocessors will be run in registration order.
296 | 
297 |     :param lifter:       The Lifter or Postprocessor to register
298 |     :vartype lifter:     :class:`Lifter` or :class:`Postprocessor`
299 |     """
300 |     if issubclass(lifter, Lifter):
301 |         log.debug("Registering lifter %s for architecture %s.", lifter.__name__, arch_name)
302 |         lifters[arch_name].append(lifter)
303 |     if issubclass(lifter, Postprocessor):
304 |         log.debug("Registering postprocessor %s for architecture %s.", lifter.__name__, arch_name)
305 |         postprocessors[arch_name].append(lifter)
306 | 


--------------------------------------------------------------------------------
/pyvex/lifting/lifter.py:
--------------------------------------------------------------------------------
  1 | from pyvex.block import IRSB
  2 | from pyvex.types import Arch, LiftSource
  3 | 
  4 | # pylint:disable=attribute-defined-outside-init
  5 | 
  6 | 
  7 | class Lifter:
  8 |     __slots__ = (
  9 |         "data",
 10 |         "bytes_offset",
 11 |         "opt_level",
 12 |         "traceflags",
 13 |         "allow_arch_optimizations",
 14 |         "strict_block_end",
 15 |         "collect_data_refs",
 16 |         "max_inst",
 17 |         "max_bytes",
 18 |         "skip_stmts",
 19 |         "irsb",
 20 |         "arch",
 21 |         "addr",
 22 |         "cross_insn_opt",
 23 |         "load_from_ro_regions",
 24 |         "const_prop",
 25 |         "disasm",
 26 |         "dump_irsb",
 27 |     )
 28 | 
 29 |     """
 30 |     A lifter is a class of methods for processing a block.
 31 | 
 32 |     :ivar data:             The bytes to lift as either a python string of bytes or a cffi buffer object.
 33 |     :ivar bytes_offset:     The offset into `data` to start lifting at.
 34 |     :ivar max_bytes:        The maximum number of bytes to lift. If set to None, no byte limit is used.
 35 |     :ivar max_inst:         The maximum number of instructions to lift. If set to None, no instruction limit is used.
 36 |     :ivar opt_level:        The level of optimization to apply to the IR, 0-2. Most likely will be ignored in any lifter
 37 |                             other then LibVEX.
 38 |     :ivar traceflags:       The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in any
 39 |                             lifter other than LibVEX.
 40 |     :ivar allow_arch_optimizations:   Should the LibVEX lifter be allowed to perform lift-time preprocessing
 41 |                             optimizations (e.g., lookback ITSTATE optimization on THUMB)
 42 |                             Most likely will be ignored in any lifter other than LibVEX.
 43 |     :ivar strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z.
 44 |     :ivar skip_stmts:       Should LibVEX ignore statements.
 45 |     """
 46 |     REQUIRE_DATA_C = False
 47 |     REQUIRE_DATA_PY = False
 48 | 
 49 |     def __init__(self, arch: Arch, addr: int):
 50 |         self.arch: Arch = arch
 51 |         self.addr: int = addr
 52 | 
 53 |     def lift(
 54 |         self,
 55 |         data: LiftSource,
 56 |         bytes_offset: int | None = None,
 57 |         max_bytes: int | None = None,
 58 |         max_inst: int | None = None,
 59 |         opt_level: int | float = 1,
 60 |         traceflags: int | None = None,
 61 |         allow_arch_optimizations: bool | None = None,
 62 |         strict_block_end: bool | None = None,
 63 |         skip_stmts: bool = False,
 64 |         collect_data_refs: bool = False,
 65 |         cross_insn_opt: bool = True,
 66 |         load_from_ro_regions: bool = False,
 67 |         const_prop: bool = False,
 68 |         disasm: bool = False,
 69 |         dump_irsb: bool = False,
 70 |     ):
 71 |         """
 72 |         Wrapper around the `_lift` method on Lifters. Should not be overridden in child classes.
 73 | 
 74 |         :param data:                The bytes to lift as either a python string of bytes or a cffi buffer object.
 75 |         :param bytes_offset:        The offset into `data` to start lifting at.
 76 |         :param max_bytes:           The maximum number of bytes to lift. If set to None, no byte limit is used.
 77 |         :param max_inst:            The maximum number of instructions to lift. If set to None, no instruction limit is
 78 |                                     used.
 79 |         :param opt_level:           The level of optimization to apply to the IR, 0-2. Most likely will be ignored in
 80 |                                     any lifter other then LibVEX.
 81 |         :param traceflags:          The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in
 82 |                                     any lifter other than LibVEX.
 83 |         :param allow_arch_optimizations:   Should the LibVEX lifter be allowed to perform lift-time preprocessing
 84 |                                     optimizations (e.g., lookback ITSTATE optimization on THUMB) Most likely will be
 85 |                                     ignored in any lifter other than LibVEX.
 86 |         :param strict_block_end:    Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z.
 87 |         :param skip_stmts:          Should the lifter skip transferring IRStmts from C to Python.
 88 |         :param collect_data_refs:   Should the LibVEX lifter collect data references in C.
 89 |         :param cross_insn_opt:      If cross-instruction-boundary optimizations are allowed or not.
 90 |         :param disasm:              Should the GymratLifter generate disassembly during lifting.
 91 |         :param dump_irsb:           Should the GymratLifter log the lifted IRSB.
 92 |         """
 93 |         irsb: IRSB = IRSB.empty_block(self.arch, self.addr)
 94 |         self.data = data
 95 |         self.bytes_offset = bytes_offset
 96 |         self.opt_level = opt_level
 97 |         self.traceflags = traceflags
 98 |         self.allow_arch_optimizations = allow_arch_optimizations
 99 |         self.strict_block_end = strict_block_end
100 |         self.collect_data_refs = collect_data_refs
101 |         self.max_inst = max_inst
102 |         self.max_bytes = max_bytes
103 |         self.skip_stmts = skip_stmts
104 |         self.irsb = irsb
105 |         self.cross_insn_opt = cross_insn_opt
106 |         self.load_from_ro_regions = load_from_ro_regions
107 |         self.const_prop = const_prop
108 |         self.disasm = disasm
109 |         self.dump_irsb = dump_irsb
110 |         self._lift()
111 |         return self.irsb
112 | 
113 |     def _lift(self):
114 |         """
115 |         Lifts the data using the information passed into _lift. Should be overridden in child classes.
116 | 
117 |         Should set the lifted IRSB to self.irsb.
118 |         If a lifter raises a LiftingException on the data, this signals that the lifter cannot lift this data and arch
119 |         and the lifter is skipped.
120 |         If a lifter can lift any amount of data, it should lift it and return the lifted block with a jumpkind of
121 |         Ijk_NoDecode, signalling to pyvex that other lifters should be used on the undecodable data.
122 | 
123 |         """
124 |         raise NotImplementedError()
125 | 


--------------------------------------------------------------------------------
/pyvex/lifting/post_processor.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # The post-processor base class
 3 | #
 4 | 
 5 | 
 6 | class Postprocessor:
 7 |     def __init__(self, irsb):
 8 |         self.irsb = irsb
 9 | 
10 |     def postprocess(self):
11 |         """
12 |         Modify the irsb
13 | 
14 |         All of the postprocessors will be used in the order that they are registered
15 |         """
16 |         pass
17 | 


--------------------------------------------------------------------------------
/pyvex/lifting/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from .instr_helper import Instruction
 2 | from .lifter_helper import GymratLifter, ParseError
 3 | from .syntax_wrapper import VexValue
 4 | from .vex_helper import JumpKind, Type
 5 | 
 6 | __all__ = [
 7 |     "Type",
 8 |     "JumpKind",
 9 |     "VexValue",
10 |     "ParseError",
11 |     "Instruction",
12 |     "GymratLifter",
13 |     "ParseError",
14 | ]
15 | 


--------------------------------------------------------------------------------
/pyvex/lifting/util/lifter_helper.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import TYPE_CHECKING
  3 | 
  4 | import bitstring
  5 | 
  6 | from pyvex.const import vex_int_class
  7 | from pyvex.errors import LiftingException
  8 | from pyvex.lifting.lifter import Lifter
  9 | 
 10 | from .vex_helper import IRSBCustomizer, JumpKind
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from .instr_helper import Instruction
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def is_empty(bitstrm):
 19 |     try:
 20 |         bitstrm.peek(1)
 21 |         return False
 22 |     except bitstring.ReadError:
 23 |         return True
 24 | 
 25 | 
 26 | class ParseError(Exception):
 27 |     pass
 28 | 
 29 | 
 30 | class GymratLifter(Lifter):
 31 |     """
 32 |     This is a base class for lifters that use Gymrat.
 33 |     For most architectures, all you need to do is subclass this, and set the property "instructions"
 34 |     to be a list of classes that define each instruction.
 35 |     By default, a lifter will decode instructions by attempting to instantiate every class until one works.
 36 |     This will use an IRSBCustomizer, which will, if it succeeds, add the appropriate VEX instructions to a pyvex IRSB.
 37 |     pyvex, when lifting a block of code for this architecture, will call the method "lift", which will produce the IRSB
 38 |     of the lifted code.
 39 |     """
 40 | 
 41 |     __slots__ = (
 42 |         "bitstrm",
 43 |         "errors",
 44 |         "thedata",
 45 |         "disassembly",
 46 |     )
 47 | 
 48 |     REQUIRE_DATA_PY = True
 49 |     instrs: list[type["Instruction"]]
 50 | 
 51 |     def __init__(self, arch, addr):
 52 |         super().__init__(arch, addr)
 53 |         self.bitstrm = None
 54 |         self.errors = None
 55 |         self.thedata = None
 56 |         self.disassembly = None
 57 | 
 58 |     def create_bitstrm(self):
 59 |         self.bitstrm = bitstring.ConstBitStream(bytes=self.thedata)
 60 | 
 61 |     def _decode_next_instruction(self, addr):
 62 |         # Try every instruction until one works
 63 |         for possible_instr in self.instrs:
 64 |             try:
 65 |                 log.debug("Trying %s", possible_instr.name)
 66 |                 return possible_instr(self.bitstrm, self.irsb.arch, addr)
 67 |             # a ParserError signals that this instruction did not match
 68 |             # we need to try other instructions, so we ignore this error
 69 |             except ParseError:
 70 |                 pass  # l.exception(repr(possible_instr))
 71 |             # if we are out of input, ignore.
 72 |             # there may be other, shorter instructions that still match,
 73 |             # so we continue with the loop
 74 |             except (bitstring.ReadError, bitstring.InterpretError):
 75 |                 pass
 76 | 
 77 |         # If no instruction matches, log an error
 78 |         errorstr = "Unknown instruction at bit position %d" % self.bitstrm.bitpos
 79 |         log.debug(errorstr)
 80 |         log.debug("Address: %#08x" % addr)
 81 | 
 82 |     def decode(self):
 83 |         try:
 84 |             self.create_bitstrm()
 85 |             count = 0
 86 |             disas = []
 87 |             addr = self.irsb.addr
 88 |             log.debug("Starting block at address: " + hex(addr))
 89 |             bytepos = self.bitstrm.bytepos
 90 | 
 91 |             while not is_empty(self.bitstrm):
 92 |                 instr = self._decode_next_instruction(addr)
 93 |                 if not instr:
 94 |                     break
 95 |                 disas.append(instr)
 96 |                 log.debug("Matched " + instr.name)
 97 |                 addr += self.bitstrm.bytepos - bytepos
 98 |                 bytepos = self.bitstrm.bytepos
 99 |                 count += 1
100 |             return disas
101 |         except Exception as e:
102 |             self.errors = str(e)
103 |             log.exception(f"Error decoding block at offset {bytepos:#x} (address {addr:#x}):")
104 |             raise
105 | 
106 |     def _lift(self):
107 |         self.thedata = (
108 |             self.data[: self.max_bytes]
109 |             if isinstance(self.data, (bytes, bytearray, memoryview))
110 |             else self.data[: self.max_bytes].encode()
111 |         )
112 |         log.debug(repr(self.thedata))
113 |         instructions = self.decode()
114 | 
115 |         if self.disasm:
116 |             self.disassembly = [instr.disassemble() for instr in instructions]
117 |         self.irsb.jumpkind = JumpKind.Invalid
118 |         irsb_c = IRSBCustomizer(self.irsb)
119 |         log.debug("Decoding complete.")
120 |         for i, instr in enumerate(instructions[: self.max_inst]):
121 |             log.debug("Lifting instruction %s", instr.name)
122 |             instr(irsb_c, instructions[:i], instructions[i + 1 :])
123 |             if irsb_c.irsb.jumpkind != JumpKind.Invalid:
124 |                 break
125 |             if (i + 1) == self.max_inst:  # if we are on our last iteration
126 |                 instr.jump(None, irsb_c.irsb.addr + irsb_c.irsb.size)
127 |                 break
128 |         else:
129 |             if len(irsb_c.irsb.statements) == 0:
130 |                 raise LiftingException("Could not decode any instructions")
131 |             irsb_c.irsb.jumpkind = JumpKind.NoDecode
132 |             dst = irsb_c.irsb.addr + irsb_c.irsb.size
133 |             dst_ty = vex_int_class(irsb_c.irsb.arch.bits).type
134 |             irsb_c.irsb.next = irsb_c.mkconst(dst, dst_ty)
135 |         log.debug(str(self.irsb))
136 |         if self.dump_irsb:
137 |             self.irsb.pp()
138 |         return self.irsb
139 | 
140 |     def pp_disas(self):
141 |         disasstr = ""
142 |         insts = self.disassemble()
143 |         for addr, name, args in insts:
144 |             args_str = ",".join(str(a) for a in args)
145 |             disasstr += f"{addr:#08x}:\t{name} {args_str}\n"
146 |         print(disasstr)
147 | 
148 |     def error(self):
149 |         return self.errors
150 | 
151 |     def disassemble(self):
152 |         if self.disassembly is None:
153 |             self.lift(self.data, disasm=True)
154 |         return self.disassembly
155 | 


--------------------------------------------------------------------------------
/pyvex/lifting/util/syntax_wrapper.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | from typing import Union
  3 | 
  4 | from pyvex.const import get_type_size
  5 | from pyvex.expr import Const, IRExpr, RdTmp
  6 | 
  7 | from .vex_helper import IRSBCustomizer, Type
  8 | 
  9 | 
 10 | def checkparams(rhstype=None):
 11 |     def decorator(fn):
 12 |         @functools.wraps(fn)
 13 |         def inner_decorator(self, *args, **kwargs):
 14 |             irsb_cs = {a.irsb_c for a in list(args) + [self] if isinstance(a, VexValue)}  # pylint: disable=no-member
 15 |             assert len(irsb_cs) == 1, "All VexValues must belong to the same irsb_c"
 16 |             args = list(args)
 17 |             for idx, arg in enumerate(args):
 18 |                 if isinstance(arg, int):
 19 |                     thetype = rhstype if rhstype else self.ty
 20 |                     args[idx] = VexValue.Constant(self.irsb_c, arg, thetype)
 21 |                 elif not isinstance(arg, VexValue):
 22 |                     raise Exception("Cannot convert param %s" % str(arg))
 23 |             args = tuple(args)
 24 |             return fn(self, *args, **kwargs)
 25 | 
 26 |         return inner_decorator
 27 | 
 28 |     return decorator
 29 | 
 30 | 
 31 | def vvifyresults(f):
 32 |     @functools.wraps(f)
 33 |     def decor(self, *args, **kwargs):
 34 |         returned = f(self, *args, **kwargs)
 35 |         assert isinstance(returned, RdTmp) or isinstance(returned, Const)
 36 |         return VexValue(self.irsb_c, returned)
 37 | 
 38 |     return decor
 39 | 
 40 | 
 41 | class VexValue:
 42 |     def __init__(self, irsb_c: "IRSBCustomizer", rdt: "Union[RdTmp, Const]", signed=False):
 43 |         self.irsb_c = irsb_c
 44 |         self.ty = self.irsb_c.get_type(rdt)
 45 |         self.rdt = rdt
 46 |         self.width = get_type_size(self.ty)
 47 |         self._is_signed = signed
 48 | 
 49 |     @property
 50 |     def value(self):
 51 |         if isinstance(self.rdt, Const):
 52 |             return self.rdt.con.value
 53 |         else:
 54 |             raise ValueError("Non-constant VexValue has no value property")
 55 | 
 56 |     @property
 57 |     def signed(self):
 58 |         return VexValue(self.irsb_c, self.rdt, True)
 59 | 
 60 |     @vvifyresults
 61 |     def widen_unsigned(self, ty):
 62 |         return self.irsb_c.op_widen_int_unsigned(self.rdt, ty)
 63 | 
 64 |     @vvifyresults
 65 |     def cast_to(self, ty, signed=False, high=False):
 66 |         return self.irsb_c.cast_to(self.rdt, ty, signed=signed, high=high)
 67 | 
 68 |     @vvifyresults
 69 |     def widen_signed(self, ty):
 70 |         return self.irsb_c.op_widen_int_signed(self.rdt, ty)
 71 | 
 72 |     @vvifyresults
 73 |     def narrow_high(self, ty):
 74 |         return self.irsb_c.op_narrow_int(self.rdt, ty, high_half=True)
 75 | 
 76 |     @vvifyresults
 77 |     def narrow_low(self, ty):
 78 |         return self.irsb_c.op_narrow_int(self.rdt, ty, high_half=False)
 79 | 
 80 |     # TODO at some point extend this to Vex nonconstants
 81 |     def __getitem__(self, idx):
 82 |         def getb(i):
 83 |             return VexValue(self.irsb_c, self.irsb_c.get_bit(self.rdt, i))
 84 | 
 85 |         def makeconstant(x):
 86 |             return VexValue.Constant(self.irsb_c, x, Type.int_8).rdt
 87 | 
 88 |         if not isinstance(idx, slice):
 89 |             actualindex = slice(idx).indices(self.width)[1]
 90 |             return getb(makeconstant(actualindex))
 91 |         else:
 92 |             return [getb(makeconstant(i)) for i in range(*idx.indices(self.width))]
 93 | 
 94 |     def __setitem__(self, idx, bval):
 95 |         setted = self.set_bit(idx, bval)
 96 |         self.__init__(setted.irsb_c, setted.rdt)
 97 | 
 98 |     @checkparams(rhstype=Type.int_8)
 99 |     @vvifyresults
100 |     def set_bit(self, idx, bval):
101 |         return self.irsb_c.set_bit(self.rdt, idx.rdt, bval.rdt)
102 | 
103 |     @checkparams()
104 |     @vvifyresults
105 |     def set_bits(self, idxsandvals):
106 |         return self.irsb_c.set_bits(self.rdt, [(i.cast_to(Type.int_8).rdt, b.rdt) for i, b in idxsandvals])
107 | 
108 |     @checkparams()
109 |     @vvifyresults
110 |     def ite(self, iftrue, iffalse):
111 |         onebitcond = self.cast_to(Type.int_1)
112 |         return self.irsb_c.ite(onebitcond.rdt, iftrue.rdt, iffalse.rdt)
113 | 
114 |     @checkparams()
115 |     @vvifyresults
116 |     def sar(self, right):
117 |         """
118 |         `v.sar(r)` should do arithmetic shift right of `v` by `r`
119 | 
120 |         :param right:VexValue value to shift by
121 |         :return: VexValue - result of a shift
122 |         """
123 |         return self.irsb_c.op_sar(self.rdt, right.rdt)
124 | 
125 |     @checkparams()
126 |     @vvifyresults
127 |     def __add__(self, right):
128 |         return self.irsb_c.op_add(self.rdt, right.rdt)
129 | 
130 |     @checkparams()
131 |     def __radd__(self, left):
132 |         return self + left
133 | 
134 |     @checkparams()
135 |     @vvifyresults
136 |     def __sub__(self, right):
137 |         return self.irsb_c.op_sub(self.rdt, right.rdt)
138 | 
139 |     @checkparams()
140 |     def __rsub__(self, left):
141 |         return left - self
142 | 
143 |     @checkparams()
144 |     @vvifyresults
145 |     def __div__(self, right):
146 |         if self._is_signed:
147 |             return self.irsb_c.op_sdiv(self.rdt, right.rdt)
148 |         else:
149 |             return self.irsb_c.op_udiv(self.rdt, right.rdt)
150 | 
151 |     @checkparams()
152 |     def __rdiv__(self, left):
153 |         return left // self
154 | 
155 |     @checkparams()
156 |     def __floordiv__(self, right):  # Note: nonprimitive
157 |         return self.__div__(right)
158 | 
159 |     @checkparams()
160 |     def __rfloordiv__(self, left):
161 |         return left // self
162 | 
163 |     @checkparams()
164 |     def __truediv__(self, right):  # Note: nonprimitive
165 |         return self / right
166 | 
167 |     @checkparams()
168 |     def __rtruediv__(self, left):
169 |         return left.__truediv__(self)
170 | 
171 |     @checkparams()
172 |     @vvifyresults
173 |     def __and__(self, right):
174 |         return self.irsb_c.op_and(self.rdt, right.rdt)
175 | 
176 |     @checkparams()
177 |     def __rand__(self, left):
178 |         return left & self
179 | 
180 |     @checkparams()
181 |     @vvifyresults
182 |     def __eq__(self, right):
183 |         return self.irsb_c.op_cmp_eq(self.rdt, right.rdt)
184 | 
185 |     @checkparams()
186 |     @vvifyresults
187 |     def __ne__(self, other):
188 |         return self.irsb_c.op_cmp_ne(self.rdt, other.rdt)
189 | 
190 |     @checkparams()
191 |     @vvifyresults
192 |     def __invert__(self):
193 |         return self.irsb_c.op_not(self.rdt)
194 | 
195 |     @checkparams()
196 |     @vvifyresults
197 |     def __le__(self, right):
198 |         if self._is_signed:
199 |             return self.irsb_c.op_cmp_sle(self.rdt, right.rdt)
200 |         else:
201 |             return self.irsb_c.op_cmp_ule(self.rdt, right.rdt)
202 | 
203 |     @checkparams()
204 |     @vvifyresults
205 |     def __gt__(self, other):
206 |         if self._is_signed:
207 |             return self.irsb_c.op_cmp_sgt(self.rdt, other.rdt)
208 |         else:
209 |             return self.irsb_c.op_cmp_ugt(self.rdt, other.rdt)
210 | 
211 |     @checkparams()
212 |     @vvifyresults
213 |     def __ge__(self, right):
214 |         if self._is_signed:
215 |             return self.irsb_c.op_cmp_sge(self.rdt, right.rdt)
216 |         else:
217 |             return self.irsb_c.op_cmp_uge(self.rdt, right.rdt)
218 | 
219 |     @checkparams(rhstype=Type.int_8)
220 |     @vvifyresults
221 |     def __lshift__(self, right):  # TODO put better type inference in irsb_c so we can have rlshift
222 |         """
223 |         logical shift left
224 |         """
225 |         return self.irsb_c.op_shl(self.rdt, right.rdt)
226 | 
227 |     @checkparams()
228 |     @vvifyresults
229 |     def __lt__(self, right):
230 |         if self._is_signed:
231 |             return self.irsb_c.op_cmp_slt(self.rdt, right.rdt)
232 |         else:
233 |             return self.irsb_c.op_cmp_ult(self.rdt, right.rdt)
234 | 
235 |     @checkparams()
236 |     @vvifyresults
237 |     def __mod__(self, right):  # Note: nonprimitive
238 |         return self.irsb_c.op_mod(self.rdt, right.rdt)
239 | 
240 |     @checkparams()
241 |     def __rmod__(self, left):
242 |         return left % self
243 | 
244 |     @checkparams()
245 |     @vvifyresults
246 |     def __mul__(self, right):
247 |         if self._is_signed:
248 |             return self.irsb_c.op_smul(self.rdt, right.rdt)
249 |         else:
250 |             return self.irsb_c.op_umul(self.rdt, right.rdt)
251 | 
252 |     @checkparams()
253 |     def __rmul__(self, left):
254 |         return left * self
255 | 
256 |     @checkparams()
257 |     @vvifyresults
258 |     def __neg__(self):  # Note: nonprimitive
259 |         if not self._is_signed:
260 |             raise Exception("Number is unsigned, cannot change sign!")
261 |         else:
262 |             return self.rdt * -1
263 | 
264 |     @checkparams()
265 |     @vvifyresults
266 |     def __or__(self, right):
267 |         return self.irsb_c.op_or(self.rdt, right.rdt)
268 | 
269 |     def __ror__(self, left):
270 |         return self | left
271 | 
272 |     @checkparams()
273 |     @vvifyresults
274 |     def __pos__(self):
275 |         return self
276 | 
277 |     @checkparams(rhstype=Type.int_8)
278 |     @vvifyresults
279 |     def __rshift__(self, right):
280 |         """
281 |         logical shift right
282 |         """
283 |         return self.irsb_c.op_shr(self.rdt, right.rdt)
284 | 
285 |     @checkparams()
286 |     def __rlshift__(self, left):
287 |         return left << self
288 | 
289 |     @checkparams()
290 |     def __rrshift__(self, left):
291 |         return left >> self
292 | 
293 |     @checkparams()
294 |     @vvifyresults
295 |     def __xor__(self, right):
296 |         return self.irsb_c.op_xor(self.rdt, right.rdt)
297 | 
298 |     def __rxor__(self, left):
299 |         return self ^ left
300 | 
301 |     @classmethod
302 |     def Constant(cls, irsb_c, val, ty):
303 |         """
304 |         Creates a constant as a VexValue
305 |         :param irsb_c: The IRSBCustomizer to use
306 |         :param val: The value, as an integer
307 |         :param ty: The type of the resulting VexValue
308 |         :return: a VexValue
309 |         """
310 |         assert not (isinstance(val, VexValue) or isinstance(val, IRExpr))
311 |         rdt = irsb_c.mkconst(val, ty)
312 |         return cls(irsb_c, rdt)
313 | 


--------------------------------------------------------------------------------
/pyvex/lifting/util/vex_helper.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import re
  3 | 
  4 | from pyvex.const import U1, get_type_size, ty_to_const_class, vex_int_class
  5 | from pyvex.enums import IRCallee
  6 | from pyvex.expr import ITE, Binop, CCall, Const, Get, Load, RdTmp, Unop
  7 | from pyvex.stmt import Dirty, Exit, IMark, NoOp, Put, Store, WrTmp
  8 | 
  9 | 
 10 | class JumpKind:
 11 |     Boring = "Ijk_Boring"
 12 |     Call = "Ijk_Call"
 13 |     Ret = "Ijk_Ret"
 14 |     Segfault = "Ijk_SigSEGV"
 15 |     Exit = "Ijk_Exit"
 16 |     Syscall = "Ijk_Sys_syscall"
 17 |     Sysenter = "Ijk_Sys_sysenter"
 18 |     Invalid = "Ijk_INVALID"
 19 |     NoDecode = "Ijk_NoDecode"
 20 | 
 21 | 
 22 | class TypeMeta(type):
 23 |     typemeta_re = re.compile(r"int_(?P<size>\d+)$")
 24 | 
 25 |     def __getattr__(self, name):
 26 |         match = self.typemeta_re.match(name)
 27 |         if match:
 28 |             width = int(match.group("size"))
 29 |             return vex_int_class(width).type
 30 |         else:
 31 |             return type.__getattr__(name)
 32 | 
 33 | 
 34 | class Type(metaclass=TypeMeta):
 35 |     __metaclass__ = TypeMeta
 36 | 
 37 |     ieee_float_16 = "Ity_F16"
 38 |     ieee_float_32 = "Ity_F32"
 39 |     ieee_float_64 = "Ity_F64"
 40 |     ieee_float_128 = "Ity_F128"
 41 |     decimal_float_32 = "Ity_D32"
 42 |     decimal_float_64 = "Ity_D64"
 43 |     decimal_float_128 = "Ity_D128"
 44 |     simd_vector_128 = "Ity_V128"
 45 |     simd_vector_256 = "Ity_V256"
 46 | 
 47 | 
 48 | def get_op_format_from_const_ty(ty):
 49 |     return ty_to_const_class(ty).op_format
 50 | 
 51 | 
 52 | def make_format_op_generator(fmt_string):
 53 |     """
 54 |     Return a function which generates an op format (just a string of the vex instruction)
 55 | 
 56 |     Functions by formatting the fmt_string with the types of the arguments
 57 |     """
 58 | 
 59 |     def gen(arg_types):
 60 |         converted_arg_types = list(map(get_op_format_from_const_ty, arg_types))
 61 |         op = fmt_string.format(arg_t=converted_arg_types)
 62 |         return op
 63 | 
 64 |     return gen
 65 | 
 66 | 
 67 | def mkbinop(fstring):
 68 |     return lambda self, expr_a, expr_b: self.op_binary(make_format_op_generator(fstring))(expr_a, expr_b)
 69 | 
 70 | 
 71 | def mkunop(fstring):
 72 |     return lambda self, expr_a: self.op_unary(make_format_op_generator(fstring))(expr_a)
 73 | 
 74 | 
 75 | def mkcmpop(fstring_fragment, signedness=""):
 76 |     def cmpop(self, expr_a, expr_b):
 77 |         ty = self.get_type(expr_a)
 78 |         fstring = f"Iop_Cmp{fstring_fragment}{{arg_t[0]}}{signedness}"
 79 |         retval = mkbinop(fstring)(self, expr_a, expr_b)
 80 |         return self.cast_to(retval, ty)
 81 | 
 82 |     return cmpop
 83 | 
 84 | 
 85 | class IRSBCustomizer:
 86 |     op_add = mkbinop("Iop_Add{arg_t[0]}")
 87 |     op_sub = mkbinop("Iop_Sub{arg_t[0]}")
 88 |     op_umul = mkbinop("Iop_Mul{arg_t[0]}")
 89 |     op_smul = mkbinop("Iop_MullS{arg_t[0]}")
 90 |     op_sdiv = mkbinop("Iop_DivS{arg_t[0]}")
 91 |     op_udiv = mkbinop("Iop_DivU{arg_t[0]}")
 92 | 
 93 |     # Custom operation that does not exist in libVEX
 94 |     op_mod = mkbinop("Iop_Mod{arg_t[0]}")
 95 | 
 96 |     op_or = mkbinop("Iop_Or{arg_t[0]}")
 97 |     op_and = mkbinop("Iop_And{arg_t[0]}")
 98 |     op_xor = mkbinop("Iop_Xor{arg_t[0]}")
 99 | 
100 |     op_shr = mkbinop("Iop_Shr{arg_t[0]}")  # Shift Right (logical)
101 |     op_shl = mkbinop("Iop_Shl{arg_t[0]}")  # Shift Left (logical)
102 | 
103 |     op_sar = mkbinop("Iop_Sar{arg_t[0]}")  # Shift Arithmetic Right operation
104 | 
105 |     op_not = mkunop("Iop_Not{arg_t[0]}")
106 | 
107 |     op_cmp_eq = mkcmpop("EQ")
108 |     op_cmp_ne = mkcmpop("NE")
109 |     op_cmp_slt = mkcmpop("LT", "S")
110 |     op_cmp_sle = mkcmpop("LE", "S")
111 |     op_cmp_ult = mkcmpop("LT", "U")
112 |     op_cmp_ule = mkcmpop("LE", "U")
113 |     op_cmp_sge = mkcmpop("GE", "S")
114 |     op_cmp_uge = mkcmpop("GE", "U")
115 |     op_cmp_sgt = mkcmpop("GT", "S")
116 |     op_cmp_ugt = mkcmpop("GT", "U")
117 | 
118 |     def __init__(self, irsb):
119 |         self.arch = irsb.arch
120 |         self.irsb = irsb
121 | 
122 |     def get_type(self, rdt):
123 |         return rdt.result_type(self.irsb.tyenv)
124 | 
125 |     # Statements (no return value)
126 |     def _append_stmt(self, stmt):
127 |         self.irsb.statements += [stmt]
128 | 
129 |     def imark(self, int_addr, int_length, int_delta=0):
130 |         self._append_stmt(IMark(int_addr, int_length, int_delta))
131 | 
132 |     def get_reg(self, regname):  # TODO move this into the lifter
133 |         return self.arch.registers[regname][0]
134 | 
135 |     def put(self, expr_val, tuple_reg):
136 |         self._append_stmt(Put(copy.copy(expr_val), tuple_reg))
137 | 
138 |     def store(self, addr, expr):
139 |         self._append_stmt(Store(copy.copy(addr), copy.copy(expr), self.arch.memory_endness))
140 | 
141 |     def noop(self):
142 |         self._append_stmt(NoOp())
143 | 
144 |     def add_exit(self, guard, dst, jk, ip):
145 |         """
146 |         Add an exit out of the middle of an IRSB.
147 |         (e.g., a conditional jump)
148 |         :param guard: An expression, the exit is taken if true
149 |         :param dst: the destination of the exit (a Const)
150 |         :param jk: the JumpKind of this exit (probably Ijk_Boring)
151 |         :param ip: The address of this exit's source
152 |         """
153 |         self.irsb.statements.append(Exit(guard, dst.con, jk, ip))
154 | 
155 |     # end statements
156 | 
157 |     def goto(self, addr):
158 |         self.irsb.next = addr
159 |         self.irsb.jumpkind = JumpKind.Boring
160 | 
161 |     def ret(self, addr):
162 |         self.irsb.next = addr
163 |         self.irsb.jumpkind = JumpKind.Ret
164 | 
165 |     def call(self, addr):
166 |         self.irsb.next = addr
167 |         self.irsb.jumpkind = JumpKind.Call
168 | 
169 |     def _add_tmp(self, t):
170 |         return self.irsb.tyenv.add(t)
171 | 
172 |     def _rdtmp(self, tmp):
173 |         return RdTmp.get_instance(tmp)
174 | 
175 |     def _settmp(self, expr):
176 |         ty = self.get_type(expr)
177 |         tmp = self._add_tmp(ty)
178 |         self._append_stmt(WrTmp(tmp, expr))
179 |         return self._rdtmp(tmp)
180 | 
181 |     def rdreg(self, reg, ty):
182 |         return self._settmp(Get(reg, ty))
183 | 
184 |     def load(self, addr, ty):
185 |         return self._settmp(Load(self.arch.memory_endness, ty, copy.copy(addr)))
186 | 
187 |     def op_ccall(self, retty, funcstr, args):
188 |         return self._settmp(CCall(retty, IRCallee(len(args), funcstr, 0xFFFF), args))
189 | 
190 |     def dirty(self, retty, funcstr, args):
191 |         if retty is None:
192 |             tmp = 0xFFFFFFFF
193 |         else:
194 |             tmp = self._add_tmp(retty)
195 |         self._append_stmt(Dirty(IRCallee(len(args), funcstr, 0xFFFF), Const(U1(1)), args, tmp, None, None, None, None))
196 |         return self._rdtmp(tmp)
197 | 
198 |     def ite(self, condrdt, iftruerdt, iffalserdt):
199 |         return self._settmp(ITE(copy.copy(condrdt), copy.copy(iffalserdt), copy.copy(iftruerdt)))
200 | 
201 |     def mkconst(self, val, ty):
202 |         cls = ty_to_const_class(ty)
203 |         return Const(cls(val))
204 | 
205 |     # Operations
206 |     def op_generic(self, Operation, op_generator):
207 |         def instance(*args):  # Note: The args here are all RdTmps
208 |             for arg in args:
209 |                 assert isinstance(arg, RdTmp) or isinstance(arg, Const)
210 |             arg_types = [self.get_type(arg) for arg in args]
211 |             # two operations should never share the same argument instances, copy them here to ensure that
212 |             args = [copy.copy(a) for a in args]
213 |             op = Operation(op_generator(arg_types), args)
214 |             msg = "operation needs to be well typed: " + str(op)
215 |             assert op.typecheck(self.irsb.tyenv), msg + "\ntypes: " + str(self.irsb.tyenv)
216 |             return self._settmp(op)
217 | 
218 |         return instance
219 | 
220 |     def op_binary(self, op_format_str):
221 |         return self.op_generic(Binop, op_format_str)
222 | 
223 |     def op_unary(self, op_format_str):
224 |         return self.op_generic(Unop, op_format_str)
225 | 
226 |     def cast_to(self, rdt, tydest, signed=False, high=False):
227 |         goalwidth = get_type_size(tydest)
228 |         rdtwidth = self.get_rdt_width(rdt)
229 | 
230 |         if rdtwidth > goalwidth:
231 |             return self.op_narrow_int(rdt, tydest, high_half=high)
232 |         elif rdtwidth < goalwidth:
233 |             return self.op_widen_int(rdt, tydest, signed=signed)
234 |         else:
235 |             return rdt
236 | 
237 |     def op_to_one_bit(self, rdt):
238 |         rdtty = self.get_type(rdt)
239 |         if rdtty not in [Type.int_64, Type.int_32]:
240 |             rdt = self.op_widen_int_unsigned(rdt, Type.int_32)
241 |         onebit = self.op_narrow_int(rdt, Type.int_1)
242 |         return onebit
243 | 
244 |     def op_narrow_int(self, rdt, tydest, high_half=False):
245 |         op_name = "{op}{high}to{dest}".format(
246 |             op="Iop_{arg_t[0]}", high="HI" if high_half else "", dest=get_op_format_from_const_ty(tydest)
247 |         )
248 |         return self.op_unary(make_format_op_generator(op_name))(rdt)
249 | 
250 |     def op_widen_int(self, rdt, tydest, signed=False):
251 |         op_name = "{op}{sign}to{dest}".format(
252 |             op="Iop_{arg_t[0]}", sign="S" if signed else "U", dest=get_op_format_from_const_ty(tydest)
253 |         )
254 |         return self.op_unary(make_format_op_generator(op_name))(rdt)
255 | 
256 |     def op_widen_int_signed(self, rdt, tydest):
257 |         return self.op_widen_int(rdt, tydest, signed=True)
258 | 
259 |     def op_widen_int_unsigned(self, rdt, tydest):
260 |         return self.op_widen_int(rdt, tydest, signed=False)
261 | 
262 |     def get_msb(self, tmp, ty):
263 |         width = get_type_size(ty)
264 |         return self.get_bit(tmp, width - 1)
265 | 
266 |     def get_bit(self, rdt, idx):
267 |         shifted = self.op_shr(rdt, idx)
268 |         bit = self.op_extract_lsb(shifted)
269 |         return bit
270 | 
271 |     def op_extract_lsb(self, rdt):
272 |         bitmask = self.mkconst(1, self.get_type(rdt))
273 |         return self.op_and(bitmask, rdt)
274 | 
275 |     def set_bit(self, rdt, idx, bval):
276 |         currbit = self.get_bit(rdt, idx)
277 |         areequalextrabits = self.op_xor(bval, currbit)
278 |         one = self.mkconst(1, self.get_type(areequalextrabits))
279 |         areequal = self.op_and(areequalextrabits, one)
280 |         shifted = self.op_shl(areequal, idx)
281 |         return self.op_xor(rdt, shifted)
282 | 
283 |     def set_bits(self, rdt, idxsandvals):
284 |         ty = self.get_type(rdt)
285 |         if all([isinstance(idx, Const) for idx, _ in idxsandvals]):
286 |             relevantbits = self.mkconst(sum([1 << idx.con.value for idx, _ in idxsandvals]), ty)
287 |         else:
288 |             relevantbits = self.mkconst(0, ty)
289 |             for idx, _ in idxsandvals:
290 |                 shifted = self.op_shl(self.mkconst(1, ty), idx)
291 |                 relevantbits = self.op_or(relevantbits, shifted)
292 |         setto = self.mkconst(0, ty)
293 |         for idx, bval in idxsandvals:
294 |             bvalbit = self.op_extract_lsb(bval)
295 |             shifted = self.op_shl(bvalbit, idx)
296 |             setto = self.op_or(setto, shifted)
297 |         shouldflip = self.op_and(self.op_xor(setto, rdt), relevantbits)
298 |         return self.op_xor(rdt, shouldflip)
299 | 
300 |     def get_rdt_width(self, rdt):
301 |         return rdt.result_size(self.irsb.tyenv)
302 | 


--------------------------------------------------------------------------------
/pyvex/lifting/zerodivision.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from pyvex import const, expr, stmt
 4 | 
 5 | from .post_processor import Postprocessor
 6 | 
 7 | 
 8 | class ZeroDivisionPostProcessor(Postprocessor):
 9 |     """
10 |     A postprocessor for adding zero-division checks to VEX.
11 | 
12 |     For "div rcx", will turn:
13 | 
14 |               00 | ------ IMark(0x8000, 3, 0) ------
15 |               01 | t0 = GET:I64(rcx)
16 |               02 | t1 = GET:I64(rax)
17 |               03 | t2 = GET:I64(rdx)
18 |               04 | t3 = 64HLto128(t2,t1)
19 |               05 | t4 = DivModU128to64(t3,t0)
20 |               06 | t5 = 128to64(t4)
21 |               07 | PUT(rax) = t5
22 |               08 | t6 = 128HIto64(t4)
23 |               09 | PUT(rdx) = t6
24 |               NEXT: PUT(rip) = 0x0000000000008003; Ijk_Boring
25 | 
26 |     into:
27 | 
28 |               00 | ------ IMark(0x8000, 3, 0) ------
29 |               01 | t0 = GET:I64(rcx)
30 |               02 | t4 = GET:I64(rax)
31 |               03 | t5 = GET:I64(rdx)
32 |               04 | t3 = 64HLto128(t5,t4)
33 |               05 | t9 = CmpEQ(t0,0x0000000000000000)
34 |               06 | if (t9) { PUT(pc) = 0x8000; Ijk_SigFPE_IntDiv }
35 |               07 | t2 = DivModU128to64(t3,t0)
36 |               08 | t6 = 128to64(t2)
37 |               09 | PUT(rax) = t6
38 |               10 | t7 = 128HIto64(t2)
39 |               11 | PUT(rdx) = t7
40 |               NEXT: PUT(rip) = 0x0000000000008003; Ijk_Boring
41 |     """
42 | 
43 |     def postprocess(self):
44 |         if self.irsb.statements is None:
45 |             # This is an optimized IRSB. We cannot really post-process it.
46 |             return
47 | 
48 |         insertions = []
49 |         last_ip = 0
50 |         for i, s in enumerate(self.irsb.statements):
51 |             if s.tag == "Ist_IMark":
52 |                 last_ip = s.addr
53 |             if s.tag == "Ist_WrTmp" and s.data.tag == "Iex_Binop" and ("Div" in s.data.op or "Mod" in s.data.op):
54 |                 arg_size = s.data.args[1].result_size(self.irsb.tyenv)
55 |                 cmp_args = [copy.copy(s.data.args[1]), expr.Const(const.vex_int_class(arg_size)(0))]
56 |                 cmp_tmp = self.irsb.tyenv.add("Ity_I1")
57 |                 insertions.append((i, stmt.WrTmp(cmp_tmp, expr.Binop("Iop_CmpEQ%d" % arg_size, cmp_args))))
58 |                 insertions.append(
59 |                     (
60 |                         i,
61 |                         stmt.Exit(
62 |                             expr.RdTmp.get_instance(cmp_tmp),
63 |                             const.vex_int_class(self.irsb.arch.bits)(last_ip),
64 |                             "Ijk_SigFPE_IntDiv",
65 |                             self.irsb.offsIP,
66 |                         ),
67 |                     )
68 |                 )
69 | 
70 |         for i, s in reversed(insertions):
71 |             self.irsb.statements.insert(i, s)
72 | 


--------------------------------------------------------------------------------
/pyvex/native.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import pickle
 4 | import sys
 5 | import tempfile
 6 | from typing import Any
 7 | 
 8 | import cffi
 9 | 
10 | from .vex_ffi import ffi_str as _ffi_str
11 | 
12 | ffi = cffi.FFI()
13 | 
14 | 
15 | def _locate_lib(module: str, library: str) -> str:
16 |     """
17 |     Attempt to find a native library without using pkg_resources, and only fall back to pkg_resources upon failures.
18 |     This is because "import pkg_resources" is slow.
19 | 
20 |     :return:    The full path of the native library.
21 |     """
22 |     base_dir = os.path.dirname(__file__)
23 |     attempt = os.path.join(base_dir, library)
24 |     if os.path.isfile(attempt):
25 |         return attempt
26 | 
27 |     import pkg_resources  # pylint:disable=import-outside-toplevel
28 | 
29 |     return pkg_resources.resource_filename(module, os.path.join("lib", library))
30 | 
31 | 
32 | def _parse_ffi_str():
33 |     hash_ = hashlib.md5(_ffi_str.encode("utf-8")).hexdigest()
34 |     cache_location = os.path.join(tempfile.gettempdir(), f"pyvex_ffi_parser_cache.{hash_}")
35 | 
36 |     if os.path.isfile(cache_location):
37 |         # load the cache
38 |         with open(cache_location, "rb") as f:
39 |             cache = pickle.loads(f.read())
40 |         ffi._parser._declarations = cache["_declarations"]
41 |         ffi._parser._int_constants = cache["_int_constants"]
42 |     else:
43 |         ffi.cdef(_ffi_str)
44 |         # cache the result
45 |         cache = {
46 |             "_declarations": ffi._parser._declarations,
47 |             "_int_constants": ffi._parser._int_constants,
48 |         }
49 |         # atomically write cache
50 |         with tempfile.NamedTemporaryFile(delete=False) as temp_file:
51 |             temp_file.write(pickle.dumps(cache))
52 |             temp_file_name = temp_file.name
53 |         os.replace(temp_file_name, cache_location)
54 | 
55 | 
56 | def _find_c_lib():
57 |     # Load the c library for calling into VEX
58 |     if sys.platform in ("win32", "cygwin"):
59 |         library_file = "pyvex.dll"
60 |     elif sys.platform == "darwin":
61 |         library_file = "libpyvex.dylib"
62 |     else:
63 |         library_file = "libpyvex.so"
64 | 
65 |     pyvex_path = _locate_lib(__name__, os.path.join("lib", library_file))
66 |     # parse _ffi_str and use cache if possible
67 |     _parse_ffi_str()
68 |     # RTLD_GLOBAL used for sim_unicorn.so
69 |     lib = ffi.dlopen(pyvex_path)
70 |     if not lib.vex_init():
71 |         raise ImportError("libvex failed to initialize")
72 |     # this looks up all the definitions (wtf)
73 |     dir(lib)
74 |     return lib
75 | 
76 | 
77 | pvc: Any = _find_c_lib()  # This should be properly typed, but this seems non trivial
78 | 


--------------------------------------------------------------------------------
/pyvex/py.typed:
--------------------------------------------------------------------------------
1 | partial
2 | 


--------------------------------------------------------------------------------
/pyvex/types.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Any, Protocol, Union, runtime_checkable
 2 | 
 3 | from cffi.api import FFI
 4 | 
 5 | 
 6 | class Register(Protocol):
 7 |     """
 8 |     A register. Pyvex should probably not have this dependency.
 9 |     """
10 | 
11 |     name: str
12 | 
13 | 
14 | class Arch(Protocol):
15 |     """
16 |     An architecture description.
17 |     """
18 | 
19 |     name: str
20 |     ip_offset: int
21 |     bits: int
22 |     instruction_endness: str
23 |     memory_endness: str
24 |     byte_width: int
25 |     register_list: list[Register]
26 |     registers: dict[str, tuple[int, int]]
27 | 
28 |     def translate_register_name(self, offset: int, size: int | None = None) -> str | None: ...
29 | 
30 |     def get_register_offset(self, name: str) -> int: ...
31 | 
32 | 
33 | @runtime_checkable
34 | class LibvexArch(Protocol):
35 |     """
36 |     The description for an architecture that is usable with libvex
37 |     """
38 | 
39 |     vex_arch: str
40 |     vex_archinfo: dict[str, Any]
41 | 
42 | 
43 | PyLiftSource = Union[bytes, bytearray, memoryview]
44 | if TYPE_CHECKING:
45 |     CLiftSource = FFI.CData
46 | else:
47 |     CLiftSource = None
48 | LiftSource = Union[PyLiftSource, CLiftSource]
49 | 


--------------------------------------------------------------------------------
/pyvex/utils.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | from collections.abc import Callable
 3 | from typing import Any
 4 | 
 5 | try:
 6 |     import _md5 as md5lib
 7 | except ImportError:
 8 |     import hashlib as md5lib
 9 | 
10 | 
11 | md5_unpacker = struct.Struct("4I")
12 | 
13 | 
14 | def stable_hash(t: tuple) -> int:
15 |     cnt = _dump_tuple(t)
16 |     hd = md5lib.md5(cnt).digest()
17 |     return md5_unpacker.unpack(hd)[0]  # 32 bits
18 | 
19 | 
20 | def _dump_tuple(t: tuple) -> bytes:
21 |     cnt = b""
22 |     for item in t:
23 |         if item is not None:
24 |             type_ = type(item)
25 |             if type_ in _DUMP_BY_TYPE:
26 |                 cnt += _DUMP_BY_TYPE[type_](item)
27 |             else:
28 |                 cnt += struct.pack("<Q", hash(item) & 0xFFFF_FFFF_FFFF_FFFF)
29 |         cnt += b"\xf0"
30 |     return cnt
31 | 
32 | 
33 | def _dump_str(t: str) -> bytes:
34 |     return t.encode("ascii")
35 | 
36 | 
37 | def _dump_int(t: int) -> bytes:
38 |     prefix = b"" if t >= 0 else b"-"
39 |     t = abs(t)
40 |     if t <= 0xFFFF:
41 |         return prefix + struct.pack("<H", t)
42 |     elif t <= 0xFFFF_FFFF:
43 |         return prefix + struct.pack("<I", t)
44 |     elif t <= 0xFFFF_FFFF_FFFF_FFFF:
45 |         return prefix + struct.pack("<Q", t)
46 |     else:
47 |         cnt = b""
48 |         while t > 0:
49 |             cnt += _dump_int(t & 0xFFFF_FFFF_FFFF_FFFF)
50 |             t >>= 64
51 |         return prefix + cnt
52 | 
53 | 
54 | def _dump_type(t: type) -> bytes:
55 |     return t.__name__.encode("ascii")
56 | 
57 | 
58 | _DUMP_BY_TYPE: dict[type, Callable[[Any], bytes]] = {
59 |     tuple: _dump_tuple,
60 |     str: _dump_str,
61 |     int: _dump_int,
62 |     type: _dump_type,
63 | }
64 | 


--------------------------------------------------------------------------------
/pyvex_c/Makefile:
--------------------------------------------------------------------------------
 1 | UNAME := $(shell uname)
 2 | ifeq ($(UNAME), Darwin)
 3 | 	LIBRARY_FILE=libpyvex.dylib
 4 | 	STATIC_LIBRARY_FILE=libpyvex.a
 5 | 	LDFLAGS=-Wl,-install_name,@rpath/$(LIBRARY_FILE)
 6 | endif
 7 | ifeq ($(UNAME), Linux)
 8 | 	LIBRARY_FILE=libpyvex.so
 9 | 	STATIC_LIBRARY_FILE=libpyvex.a
10 | 	LDFLAGS=-Wl,-soname,$(LIBRARY_FILE)
11 | endif
12 | ifeq ($(UNAME), FreeBSD)
13 | 	LIBRARY_FILE=libpyvex.so
14 | 	STATIC_LIBRARY_FILE=libpyvex.a
15 | 	LDFLAGS=-Wl,-soname,$(LIBRARY_FILE)
16 | endif
17 | ifeq ($(UNAME), NetBSD)
18 | 	LIBRARY_FILE=libpyvex.so
19 | 	STATIC_LIBRARY_FILE=libpyvex.a
20 | 	LDFLAGS=-Wl,-soname,$(LIBRARY_FILE)
21 | endif
22 | ifeq ($(UNAME), OpenBSD)
23 | 	LIBRARY_FILE=libpyvex.so
24 | 	LDFLAGS=-Wl,-soname,$(LIBRARY_FILE) -L/usr/local/lib -lvex
25 | endif
26 | ifeq ($(findstring MINGW,$(UNAME)), MINGW)
27 | 	LIBRARY_FILE=pyvex.dll
28 | 	STATIC_LIBRARY_FILE=libpyvex.a
29 | 	LDFLAGS=
30 | endif
31 | 
32 | # deeply evil
33 | # https://www.cmcrossroads.com/article/gnu-make-meets-file-names-spaces-them
34 | sp =$(null) $(null)
35 | qs = $(subst ?,$(sp),$1)
36 | sq = $(subst $(sp),?,$1)
37 | 
38 | CC?=gcc
39 | AR=ar
40 | INCFLAGS=-I "$(VEX_INCLUDE_PATH)"
41 | CFLAGS=-g -O2 -Wall -shared -fPIC -std=c99 $(INCFLAGS)
42 | 
43 | OBJECTS=pyvex.o logging.o analysis.o postprocess.o
44 | HEADERS=pyvex.h
45 | 
46 | all: $(LIBRARY_FILE) $(STATIC_LIBRARY_FILE)
47 | 
48 | %.o: %.c
49 | 	$(CC) -c $(CFLAGS) $<
50 | 
51 | $(LIBRARY_FILE): $(OBJECTS) $(HEADERS) $(call sq,$(VEX_LIB_PATH)/libvex.a)
52 | 	$(CC) $(CFLAGS) -o $(LIBRARY_FILE) $(OBJECTS) "$(VEX_LIB_PATH)/libvex.a" $(LDFLAGS)
53 | 
54 | $(STATIC_LIBRARY_FILE): $(OBJECTS) $(HEADERS) $(call sq,$(VEX_LIB_PATH)/libvex.a)
55 | 	$(AR) rcs $(STATIC_LIBRARY_FILE) $(OBJECTS)
56 | 
57 | clean:
58 | 	rm -f $(LIBRARY_FILE) $(STATIC_LIBRARY_FILE) *.o
59 | 


--------------------------------------------------------------------------------
/pyvex_c/Makefile-msvc:
--------------------------------------------------------------------------------
 1 | CC=cl
 2 | INCFLAGS=/I "$(VEX_INCLUDE_PATH)"
 3 | CFLAGS=/LD /O2 $(INCFLAGS)
 4 | LDFLAGS=/link /DEF:pyvex.def
 5 | 
 6 | pyvex.dll: postprocess.c analysis.c pyvex.c logging.c "$(VEX_LIB_FILE)" pyvex.h pyvex.def
 7 | 	$(CC) $(CFLAGS) pyvex.c postprocess.c analysis.c logging.c "$(VEX_LIB_FILE)" $(LDFLAGS)
 8 | 
 9 | clean:
10 | 	del pyvex.dll pyvex.lib pyvex.exp pyvex.obj logging.obj
11 | 


--------------------------------------------------------------------------------
/pyvex_c/README:
--------------------------------------------------------------------------------
1 | To generate the list of exports for windows:
2 | 
3 | grep -E -o -h -r "pvc\.[a-zA-Z0-9_]+" | cut -c 5- | sort -u
4 | 
5 | Then remove Ity_I8 and add vex_lift (called from __init__ where we can't use the name pvc to refernce it) and sizeofIRType (called from... the unicorn compatibility layer I think?)
6 | 


--------------------------------------------------------------------------------
/pyvex_c/e4c_lite.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * exceptions4c lightweight version 1.0
 3 |  *
 4 |  * Copyright (c) 2014 Guillermo Calvo
 5 |  * Licensed under the GNU Lesser General Public License
 6 |  */
 7 | 
 8 | #ifndef EXCEPTIONS4C_LITE
 9 | #define EXCEPTIONS4C_LITE
10 | 
11 | #include <stddef.h>
12 | #include <setjmp.h>
13 | 
14 | /* Maximum number of nested `try` blocks */
15 | #ifndef E4C_MAX_FRAMES
16 | # define E4C_MAX_FRAMES 16
17 | #endif
18 | 
19 | /* Maximum length (in bytes) of an exception message */
20 | #ifndef E4C_MESSAGE_SIZE
21 | # define E4C_MESSAGE_SIZE 128
22 | #endif
23 | 
24 | /* Exception handling keywords: try/catch/finally/throw */
25 | #ifndef E4C_NOKEYWORDS
26 | # define try E4C_TRY
27 | # define catch(type) E4C_CATCH(type)
28 | # define finally E4C_FINALLY
29 | # define throw(type, message) E4C_THROW(type, message)
30 | #endif
31 | 
32 | /* Represents an exception type */
33 | struct e4c_exception_type{
34 | 	const char * name;
35 | 	const char * default_message;
36 | 	const struct e4c_exception_type * supertype;
37 | };
38 | 
39 | /* Declarations and definitions of exception types */
40 | #define E4C_DECLARE_EXCEPTION(name) extern const struct e4c_exception_type name
41 | #define E4C_DEFINE_EXCEPTION(name, default_message, supertype) const struct e4c_exception_type name = { #name, default_message, &supertype }
42 | 
43 | /* Predefined exception types */
44 | E4C_DECLARE_EXCEPTION(RuntimeException);
45 | E4C_DECLARE_EXCEPTION(NullPointerException);
46 | 
47 | /* Represents an instance of an exception type */
48 | struct e4c_exception{
49 | 	char message[E4C_MESSAGE_SIZE];
50 | 	const char * file;
51 | 	int line;
52 | 	const struct e4c_exception_type * type;
53 | };
54 | 
55 | /* Retrieve current thrown exception */
56 | #define E4C_EXCEPTION e4c.err
57 | 
58 | /* Returns whether current exception is of a given type */
59 | #define E4C_IS_INSTANCE_OF(t) ( e4c.err.type == &t || e4c_extends(e4c.err.type, &t) )
60 | 
61 | /* Implementation details */
62 | #define E4C_TRY if(e4c_try(E4C_INFO) && setjmp(e4c.jump[e4c.frames - 1]) >= 0) while(e4c_hook(0)) if(e4c.frame[e4c.frames].stage == e4c_trying)
63 | #define E4C_CATCH(type) else if(e4c.frame[e4c.frames].stage == e4c_catching && E4C_IS_INSTANCE_OF(type) && e4c_hook(1))
64 | #define E4C_FINALLY else if(e4c.frame[e4c.frames].stage == e4c_finalizing)
65 | #define E4C_THROW(type, message) e4c_throw(&type, E4C_INFO, message)
66 | #ifndef NDEBUG
67 | # define E4C_INFO __FILE__, __LINE__
68 | #else
69 | # define E4C_INFO NULL, 0
70 | #endif
71 | 
72 | enum e4c_stage{e4c_beginning, e4c_trying, e4c_catching, e4c_finalizing, e4c_done};
73 | extern struct e4c_context{jmp_buf jump[E4C_MAX_FRAMES]; struct e4c_exception err; struct{unsigned char stage; unsigned char uncaught;} frame[E4C_MAX_FRAMES + 1]; int frames;} e4c;
74 | extern int e4c_try(const char * file, int line);
75 | extern int e4c_hook(int is_catch);
76 | extern int e4c_extends(const struct e4c_exception_type * child, const struct e4c_exception_type * parent);
77 | extern void e4c_throw(const struct e4c_exception_type * exception_type, const char * file, int line, const char * message);
78 | 
79 | # endif
80 | 


--------------------------------------------------------------------------------
/pyvex_c/logging.c:
--------------------------------------------------------------------------------
 1 | // This code is GPLed by Yan Shoshitaishvili
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <stdarg.h>
 6 | 
 7 | #include "logging.h"
 8 | 
 9 | int log_level = 50;
10 | 
11 | void pyvex_debug(const char *fmt, ...)
12 | {
13 | 	if (log_level > 10) return;
14 | 
15 | 	fprintf(stderr, "[[pyvex_c]]\tDEBUG:\t");
16 | 	va_list args;
17 | 	va_start(args,fmt);
18 | 	vfprintf(stderr, fmt, args);
19 | 	va_end(args);
20 | 
21 | 	fflush(stdout);
22 | }
23 | 
24 | void pyvex_info(const char *fmt, ...)
25 | {
26 | 	if (log_level > 20) return;
27 | 
28 | 	fprintf(stderr, "[[pyvex_c]]\tINFO:\t");
29 | 	va_list args;
30 | 	va_start(args, fmt);
31 | 	vfprintf(stderr, fmt, args);
32 | 	va_end(args);
33 | 
34 | 	fflush(stdout);
35 | }
36 | 
37 | void pyvex_error(const char *fmt, ...)
38 | {
39 | 	if (log_level > 40) return;
40 | 
41 | 	fprintf(stderr, "[[pyvex_c]]\tERROR:\t");
42 | 	va_list args;
43 | 	va_start(args,fmt);
44 | 	vfprintf(stderr, fmt,args);
45 | 	va_end(args);
46 | 
47 | 	fflush(stderr);
48 | }
49 | 


--------------------------------------------------------------------------------
/pyvex_c/logging.h:
--------------------------------------------------------------------------------
 1 | // This code is GPLed by Yan Shoshitaishvili
 2 | 
 3 | #ifndef __COMMON_H
 4 | #define __COMMON_H
 5 | 
 6 | extern int log_level;
 7 | 
 8 | void pyvex_debug(const char *, ...);
 9 | void pyvex_info(const char *, ...);
10 | void pyvex_error(const char *, ...);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/pyvex_c/postprocess.c:
--------------------------------------------------------------------------------
  1 | #include <libvex.h>
  2 | #include <libvex_guest_arm.h>
  3 | #include <stddef.h>
  4 | 
  5 | #include "pyvex_internal.h"
  6 | 
  7 | //
  8 | // Jumpkind fixes for ARM
  9 | //
 10 | // If PC is moved to LR, then this should be an Ijk_Call
 11 | //
 12 | // Example:
 13 | // MOV LR, PC
 14 | // MOV PC, R8
 15 | //
 16 | // Note that the value of PC is directly used in IRStatements, i.e
 17 | // instead of having:
 18 | //   t0 = GET:I32(pc)
 19 | //   PUT(lr) = t0
 20 | // we have:
 21 | //   PUT(lr) = 0x10400
 22 | // The only case (that I've seen so far) where a temporary variable
 23 | // is assigned to LR is:
 24 | //   t2 = ITE(cond, t0, t1)
 25 | //   PUT(lr) = t2
 26 | //
 27 | void arm_post_processor_determine_calls(
 28 | 	Addr irsb_addr,  // Address of this IRSB
 29 | 	Int irsb_size,  // Size of this IRSB
 30 | 	Int irsb_insts,  // Number of instructions
 31 | 	IRSB *irsb) {
 32 | 
 33 | // Offset to the link register
 34 | #define ARM_OFFB_LR      offsetof(VexGuestARMState,guest_R14)
 35 | // The maximum number of tmps
 36 | #define MAX_TMP 		 1000
 37 | // The maximum offset of registers
 38 | #define MAX_REG_OFFSET	 1000
 39 | // Dummy value
 40 | #define DUMMY 0xffeffeff
 41 | 
 42 | 	if (irsb->jumpkind != Ijk_Boring) {
 43 | 		return;
 44 | 	}
 45 | 
 46 | 	// Emulated CPU context
 47 | 	Addr tmps[MAX_TMP + 1];
 48 | 	Addr regs[MAX_REG_OFFSET + 1];
 49 | 
 50 | 	// Initialize context
 51 | 	Int i;
 52 | 
 53 | 	for (i = 0; i <= MAX_TMP; ++i) {
 54 | 		tmps[i] = DUMMY;
 55 | 	}
 56 | 
 57 | 	for (i = 0; i <= MAX_REG_OFFSET; ++i) {
 58 | 		regs[i] = DUMMY;
 59 | 	}
 60 | 
 61 | 	Int lr_store_pc = 0;
 62 | 	Int inst_ctr = 0;
 63 | 	Int has_exit = 0;
 64 | 	IRStmt *other_exit = NULL;
 65 | 	Addr next_irsb_addr = (irsb_addr & (~1)) + irsb_size; // Clear the least significant bit
 66 | 	Int is_thumb_mode = irsb_addr & 1;
 67 | 
 68 |     // if we pop {..,lr,...}; b xxx, I bet this isn't a boring jump!
 69 |     for (i = 0; i < irsb->stmts_used; ++i) {
 70 | 		IRStmt *stmt = irsb->stmts[i];
 71 | 		if (stmt->tag == Ist_Exit){
 72 | 		    // HACK: FIXME: BLCC and friends set the default exit to Ijk_Boring
 73 | 		    // Yet, the call is there, and it's just fine.
 74 | 		    // We assume if the block has an exit AND lr stores PC, we're probably
 75 | 		    // doing one of those fancy BL-ish things.
 76 | 		    // Should work for BCC and friends though
 77 | 		    has_exit = 1;
 78 | 		    other_exit = stmt;
 79 | 		}
 80 |     }
 81 | 
 82 | 
 83 | 	for (i = 0; i < irsb->stmts_used; ++i) {
 84 | 		IRStmt *stmt = irsb->stmts[i];
 85 | 
 86 | 		if (stmt->tag == Ist_Put) {
 87 | 			// LR is modified just before the last instruction of the block...
 88 | 			if (stmt->Ist.Put.offset == ARM_OFFB_LR /*&& inst_ctr == irsb_insts - 1*/) {
 89 | 				// ... by a constant, so test whether it is the address of the next IRSB
 90 | 				if (stmt->Ist.Put.data->tag == Iex_Const) {
 91 | 					IRConst *con = stmt->Ist.Put.data->Iex.Const.con;
 92 | 					if (get_value_from_const_expr(con) == next_irsb_addr) {
 93 | 						lr_store_pc = 1;
 94 | 					} else {
 95 | 						lr_store_pc = 0;
 96 | 					}
 97 | 				} else if (stmt->Ist.Put.data->tag == Iex_RdTmp) {
 98 | 					Int tmp = stmt->Ist.Put.data->Iex.RdTmp.tmp;
 99 | 					if (tmp <= MAX_TMP && next_irsb_addr == tmps[tmp]) {
100 | 						lr_store_pc = 1;
101 | 					} else {
102 | 						lr_store_pc = 0;
103 | 					}
104 | 				}
105 | 				break;
106 | 			}
107 | 		    else {
108 | 				Int reg_offset = stmt->Ist.Put.offset;
109 | 				if (reg_offset <= MAX_REG_OFFSET) {
110 | 					IRExpr *data = stmt->Ist.Put.data;
111 | 					if (data->tag == Iex_Const) {
112 | 						regs[reg_offset] = get_value_from_const_expr(stmt->Ist.Put.data->Iex.Const.con);
113 | 					} else if (data->tag == Iex_RdTmp) {
114 | 						Int tmp = data->Iex.RdTmp.tmp;
115 | 						if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) {
116 | 							regs[reg_offset] = tmps[tmp];
117 | 						}
118 | 					} else if (data->tag == Iex_Get) {
119 | 						Int src_reg = data->Iex.Get.offset;
120 | 						if (src_reg <= MAX_REG_OFFSET && regs[src_reg] != DUMMY) {
121 | 							regs[reg_offset] = regs[src_reg];
122 | 						}
123 | 					}
124 | 				}
125 | 			}
126 | 		}
127 | 		else if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp <= MAX_TMP) {
128 | 			// The PC value may propagate through the block, and since
129 | 			// LR is modified at the end of the block, the PC value have
130 | 			// to be incremented in order to match the address of the
131 | 			// next IRSB. So the only propagation ways that can lead to
132 | 			// a function call are:
133 | 			//
134 | 			//   - Iop_Add* operations (even "sub r0, #-4" is compiled
135 | 			//   as "add r0, #4")
136 | 			//   - Iop_And*, Iop_Or*, Iop_Xor*, Iop_Sh*, Iop_Not* (there
137 | 			//   may be some tricky and twisted ways to increment PC)
138 | 			//
139 | 			Int tmp_dst = stmt->Ist.WrTmp.tmp;
140 | 			if (stmt->Ist.WrTmp.data->tag == Iex_Binop) {
141 | 				IRExpr* data = stmt->Ist.WrTmp.data;
142 | 				Addr op0 = DUMMY, op1 = DUMMY;
143 | 				// Extract op0
144 | 				if (data->Iex.Binop.arg1->tag == Iex_Const) {
145 | 					op0 = get_value_from_const_expr(data->Iex.Binop.arg1->Iex.Const.con);
146 | 				} else if (data->Iex.Binop.arg1->tag == Iex_RdTmp) {
147 | 					Int tmp = data->Iex.Binop.arg1->Iex.RdTmp.tmp;
148 | 					if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) {
149 | 						op0 = tmps[tmp];
150 | 					}
151 | 				}
152 | 				// Extract op1
153 | 				if (data->Iex.Binop.arg2->tag == Iex_Const) {
154 | 					op1 = get_value_from_const_expr(data->Iex.Binop.arg2->Iex.Const.con);
155 | 				} else if (data->Iex.Binop.arg2->tag == Iex_RdTmp) {
156 | 					Int tmp = data->Iex.Binop.arg2->Iex.RdTmp.tmp;
157 | 					if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) {
158 | 						op1 = tmps[tmp];
159 | 					}
160 | 				}
161 | 				if (op0 != DUMMY && op1 != DUMMY) {
162 | 					// Both operands are loaded. Perfom calculation.
163 | 					switch (data->Iex.Binop.op) {
164 | 					case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
165 | 						tmps[tmp_dst] = op0 + op1;
166 | 						break;
167 | 					case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
168 | 						tmps[tmp_dst] = op0 - op1;
169 | 						break;
170 | 					case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
171 | 						tmps[tmp_dst] = op0 & op1;
172 | 						break;
173 | 					case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
174 | 						tmps[tmp_dst] = op0 | op1;
175 | 						break;
176 | 					case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
177 | 						tmps[tmp_dst] = op0 ^ op1;
178 | 						break;
179 | 					case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
180 | 						tmps[tmp_dst] = op0 << op1;
181 | 						break;
182 | 					case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
183 | 					case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
184 | 						tmps[tmp_dst] = op0 >> op1;
185 | 						break;
186 | 					default:
187 | 						// Unsupported operation
188 | 						break;
189 | 					}
190 | 				}
191 | 			} else if (stmt->Ist.WrTmp.data->tag == Iex_Get) {
192 | 				Int reg_offset = stmt->Ist.WrTmp.data->Iex.Get.offset;
193 | 				if (reg_offset <= MAX_REG_OFFSET && regs[reg_offset] != DUMMY) {
194 | 					tmps[tmp_dst] = regs[reg_offset];
195 | 				}
196 | 			} else if (stmt->Ist.WrTmp.data->tag == Iex_ITE) {
197 | 				// Parse iftrue and iffalse
198 | 				IRExpr *data = stmt->Ist.WrTmp.data;
199 | 				if (data->Iex.ITE.iffalse->tag == Iex_Const) {
200 | 					tmps[tmp_dst] = get_value_from_const_expr(data->Iex.ITE.iffalse->Iex.Const.con);
201 | 				} else if (data->Iex.ITE.iffalse->tag == Iex_RdTmp) {
202 | 					Int tmp = data->Iex.ITE.iffalse->Iex.RdTmp.tmp;
203 | 					if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) {
204 | 						tmps[tmp_dst] = tmps[tmp];
205 | 					}
206 | 				}
207 | 				if (data->Iex.ITE.iftrue->tag == Iex_Const) {
208 | 					tmps[tmp_dst] = get_value_from_const_expr(data->Iex.ITE.iftrue->Iex.Const.con);
209 | 				} else if (data->Iex.ITE.iftrue->tag == Iex_RdTmp) {
210 | 					Int tmp = data->Iex.ITE.iftrue->Iex.RdTmp.tmp;
211 | 					if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) {
212 | 						tmps[tmp_dst] = tmps[tmp];
213 | 					}
214 | 				}
215 | 			} else if (stmt->Ist.WrTmp.data->tag == Iex_RdTmp) {
216 | 				IRExpr *data = stmt->Ist.WrTmp.data;
217 | 				Int tmp = data->Iex.RdTmp.tmp;
218 | 				if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) {
219 | 					tmps[tmp_dst] = tmps[tmp];
220 | 				}
221 | 			} else if (stmt->Ist.WrTmp.data->tag == Iex_Const) {
222 | 				IRConst *con = stmt->Ist.WrTmp.data->Iex.Const.con;
223 | 				tmps[tmp_dst] = get_value_from_const_expr(con);
224 | 			}
225 | 		}
226 | 		else if (stmt->tag == Ist_IMark) {
227 | 			inst_ctr++;
228 | 		}
229 | 	}
230 | 
231 | 	if (lr_store_pc) {
232 | 		if (has_exit &&  // It has a non-default exit
233 | 			other_exit->Ist.Exit.jk == Ijk_Boring &&  // The non-default exit is a Boring jump
234 | 			get_value_from_const_expr(other_exit->Ist.Exit.dst) != next_irsb_addr + is_thumb_mode // The non-defualt exit is not skipping
235 | 																			  // the last instruction
236 | 		) {
237 | 			// Fix the not-default exit
238 | 			other_exit->Ist.Exit.jk = Ijk_Call;
239 | 		}
240 | 		else if (!has_exit || other_exit->Ist.Exit.jk != Ijk_Call) {
241 | 			//Fix the default exit
242 | 			irsb->jumpkind = Ijk_Call;
243 | 		}
244 | 	}
245 | 
246 | // Undefine all defined values
247 | #undef ARM_OFFB_LR
248 | #undef MAX_TMP
249 | #undef MAX_REG_OFFSET
250 | #undef DUMMY
251 | }
252 | 
253 | 
254 | //
255 | // Unconditional branch fixes for MIPS32
256 | //
257 | // Handle unconditional branches
258 | // `beq $zero, $zero, xxxx`
259 | // It is translated to
260 | //
261 | // 15 | ------ IMark(0x401684, 4, 0) ------
262 | // 16 | t0 = CmpEQ32(0x00000000, 0x00000000)
263 | // 17 | PUT(128) = 0x00401688
264 | // 18 | ------ IMark(0x401688, 4, 0) ------
265 | // 19 | if (t0) goto {Ijk_Boring} 0x401684
266 | // 20 | PUT(128) = 0x0040168c
267 | // 21 | t4 = GET:I32(128)
268 | // NEXT: PUT(128) = t4; Ijk_Boring
269 | //
270 | void mips32_post_processor_fix_unconditional_exit(
271 | 	IRSB *irsb) {
272 | 
273 | #define INVALID		0xffff
274 | 
275 | 	Int i;
276 | 	Int tmp_exit = INVALID, exit_stmt_idx = INVALID;
277 | 	IRConst *dst = NULL;
278 | 
279 | 	for (i = irsb->stmts_used - 1; i >= 0; --i) {
280 | 		IRStmt *stmt = irsb->stmts[i];
281 | 		if (tmp_exit == INVALID) {
282 | 			// Looking for the Exit statement
283 | 			if (stmt->tag == Ist_Exit &&
284 | 					stmt->Ist.Exit.jk == Ijk_Boring &&
285 | 					stmt->Ist.Exit.guard->tag == Iex_RdTmp) {
286 | 				tmp_exit = stmt->Ist.Exit.guard->Iex.RdTmp.tmp;
287 | 				dst = stmt->Ist.Exit.dst;
288 | 				exit_stmt_idx = i;
289 | 			}
290 | 		}
291 | 		else if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp == tmp_exit) {
292 | 			// Looking for the WrTmp statement
293 | 			IRExpr *data = stmt->Ist.WrTmp.data;
294 | 			if (data->tag == Iex_Binop &&
295 | 				data->Iex.Binop.op == Iop_CmpEQ32 &&
296 | 				data->Iex.Binop.arg1->tag == Iex_Const &&
297 | 				data->Iex.Binop.arg2->tag == Iex_Const &&
298 | 				get_value_from_const_expr(data->Iex.Binop.arg1->Iex.Const.con) ==
299 | 					get_value_from_const_expr(data->Iex.Binop.arg2->Iex.Const.con)) {
300 | 						// We found it
301 | 
302 | 						// Update the statements
303 | 						Int j;
304 | 						for (j = exit_stmt_idx; j < irsb->stmts_used - 1; ++j) {
305 | 							irsb->stmts[j] = irsb->stmts[j + 1];
306 | 						}
307 | 						irsb->stmts_used -= 1;
308 | 						// Update the default of the IRSB
309 | 						irsb->next = IRExpr_Const(dst);
310 | 			}
311 | 			break;
312 | 		}
313 | 	}
314 | 
315 | #undef INVALID
316 | }
317 | 
318 | void irsb_insert(IRSB *irsb, IRStmt* stmt, Int i) {
319 |     addStmtToIRSB(irsb, stmt);
320 | 
321 | 	IRStmt *in_air = irsb->stmts[irsb->stmts_used - 1];
322 | 	for (Int j = irsb->stmts_used - 1; j > i; j--) {
323 |         irsb->stmts[j] = irsb->stmts[j-1];
324 | 	}
325 | 	irsb->stmts[i] = in_air;
326 | }
327 | 
328 | void zero_division_side_exits(IRSB *irsb) {
329 | 	Int i;
330 | 	Addr lastIp = -1;
331 | 	IRType addrTy = typeOfIRExpr(irsb->tyenv, irsb->next);
332 | 	IRConstTag addrConst = addrTy == Ity_I32 ? Ico_U32 : addrTy == Ity_I16 ? Ico_U16 : Ico_U64;
333 | 	IRType argty;
334 | 	IRTemp cmptmp;
335 | 
336 | 	for (i = 0; i < irsb->stmts_used; i++) {
337 | 		IRStmt *stmt = irsb->stmts[i];
338 | 		switch (stmt->tag) {
339 | 			case Ist_IMark:
340 | 				lastIp = stmt->Ist.IMark.addr;
341 | 				continue;
342 | 			case Ist_WrTmp:
343 | 				if (stmt->Ist.WrTmp.data->tag != Iex_Binop) {
344 | 					continue;
345 | 				}
346 | 
347 | 				switch (stmt->Ist.WrTmp.data->Iex.Binop.op) {
348 | 					case Iop_DivU32:
349 | 					case Iop_DivS32:
350 | 					case Iop_DivU32E:
351 | 					case Iop_DivS32E:
352 | 					case Iop_DivModU64to32:
353 | 					case Iop_DivModS64to32:
354 | 						argty = Ity_I32;
355 | 						break;
356 | 
357 | 					case Iop_DivU64:
358 | 					case Iop_DivS64:
359 | 					case Iop_DivU64E:
360 | 					case Iop_DivS64E:
361 | 					case Iop_DivModU128to64:
362 | 					case Iop_DivModS128to64:
363 | 					case Iop_DivModS64to64:
364 | 						argty = Ity_I64;
365 | 						break;
366 | 
367 | 					// TODO YIKES
368 | 					//case Iop_DivF32:
369 | 					//	argty = Ity_F32;
370 | 
371 | 					//case Iop_DivF64:
372 | 					//case Iop_DivF64r32:
373 | 					//	argty = Ity_F64;
374 | 
375 | 					//case Iop_DivF128:
376 | 					//	argty = Ity_F128;
377 | 
378 | 					//case Iop_DivD64:
379 | 					//	argty = Ity_D64;
380 | 
381 | 					//case Iop_DivD128:
382 | 					//	argty = Ity_D128;
383 | 
384 | 					//case Iop_Div32Fx4:
385 | 					//case Iop_Div32F0x4:
386 | 					//case Iop_Div64Fx2:
387 | 					//case Iop_Div64F0x2:
388 | 					//case Iop_Div64Fx4:
389 | 					//case Iop_Div32Fx8:
390 | 
391 | 					default:
392 | 						continue;
393 | 				}
394 | 
395 | 				cmptmp = newIRTemp(irsb->tyenv, Ity_I1);
396 | 				irsb_insert(irsb, IRStmt_WrTmp(cmptmp, IRExpr_Binop(argty == Ity_I32 ? Iop_CmpEQ32 : Iop_CmpEQ64, stmt->Ist.WrTmp.data->Iex.Binop.arg2, IRExpr_Const(argty == Ity_I32 ? IRConst_U32(0) : IRConst_U64(0)))), i);
397 | 				i++;
398 | 				IRConst *failAddr = IRConst_U64(lastIp); // ohhhhh boy this is a hack
399 | 				failAddr->tag = addrConst;
400 | 				irsb_insert(irsb, IRStmt_Exit(IRExpr_RdTmp(cmptmp), Ijk_SigFPE_IntDiv, failAddr, irsb->offsIP), i);
401 | 				i++;
402 | 				break;
403 | 
404 | 		default:
405 | 			continue;
406 | 		}
407 | 	}
408 | }
409 | 
410 | 


--------------------------------------------------------------------------------
/pyvex_c/pyvex.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | This is shamelessly ripped from Vine, because those guys have very very strange language preferences.
  3 | Vine is Copyright (C) 2006-2009, BitBlaze Team.
  4 | 
  5 | You can redistribute and modify it under the terms of the GNU GPL,
  6 | version 2 or later, but it is made available WITHOUT ANY WARRANTY.
  7 | See the top-level README file for more details.
  8 | 
  9 | For more information about Vine and other BitBlaze software, see our
 10 | web site at: http://bitblaze.cs.berkeley.edu/
 11 | */
 12 | 
 13 | //======================================================================
 14 | //
 15 | // This file provides the interface to VEX that allows block by block
 16 | // translation from binary to VEX IR.
 17 | //
 18 | //======================================================================
 19 | 
 20 | #include <stdio.h>
 21 | #include <stdlib.h>
 22 | #include <string.h>
 23 | #include <setjmp.h>
 24 | #include <stddef.h>
 25 | #include <libvex.h>
 26 | 
 27 | #include "pyvex.h"
 28 | #include "pyvex_internal.h"
 29 | #include "logging.h"
 30 | 
 31 | //======================================================================
 32 | //
 33 | // Globals
 34 | //
 35 | //======================================================================
 36 | 
 37 | // Some info required for translation
 38 | VexArchInfo         vai_host;
 39 | VexGuestExtents     vge;
 40 | VexTranslateArgs    vta;
 41 | VexTranslateResult  vtr;
 42 | VexAbiInfo	        vbi;
 43 | VexControl          vc;
 44 | 
 45 | // Log message buffer, from vex itself
 46 | char *msg_buffer = NULL;
 47 | size_t msg_capacity = 0, msg_current_size = 0;
 48 | 
 49 | jmp_buf jumpout;
 50 | 
 51 | //======================================================================
 52 | //
 53 | // Functions needed for the VEX translation
 54 | //
 55 | //======================================================================
 56 | 
 57 | #ifdef _MSC_VER
 58 | __declspec(noreturn)
 59 | #else
 60 | __attribute__((noreturn))
 61 | #endif
 62 | static void failure_exit(void) {
 63 | 	longjmp(jumpout, 1);
 64 | }
 65 | 
 66 | static void log_bytes(const HChar* bytes, SizeT nbytes) {
 67 | 	if (msg_buffer == NULL) {
 68 | 		msg_buffer = malloc(nbytes);
 69 | 		msg_capacity = nbytes;
 70 | 	}
 71 | 	if (nbytes + msg_current_size > msg_capacity) {
 72 | 		do {
 73 | 			msg_capacity *= 2;
 74 | 		} while (nbytes + msg_current_size > msg_capacity);
 75 | 		msg_buffer = realloc(msg_buffer, msg_capacity);
 76 | 	}
 77 | 
 78 | 	memcpy(&msg_buffer[msg_current_size], bytes, nbytes);
 79 | 	msg_current_size += nbytes;
 80 | }
 81 | 
 82 | void clear_log() {
 83 | 	if (msg_buffer != NULL) {
 84 | 			free(msg_buffer);
 85 | 			msg_buffer = NULL;
 86 | 			msg_capacity = 0;
 87 | 			msg_current_size = 0;
 88 | 	}
 89 | }
 90 | 
 91 | static Bool chase_into_ok(void *closureV, Addr addr64) {
 92 | 	return False;
 93 | }
 94 | 
 95 | static UInt needs_self_check(void *callback_opaque, VexRegisterUpdates* pxControl, const VexGuestExtents *guest_extents) {
 96 | 	return 0;
 97 | }
 98 | 
 99 | static void *dispatch(void) {
100 | 	return NULL;
101 | }
102 | 
103 | 
104 | //----------------------------------------------------------------------
105 | // Initializes VEX
106 | // It must be called before using VEX for translation to Valgrind IR
107 | //----------------------------------------------------------------------
108 | int vex_init() {
109 | 	static int initialized = 0;
110 | 	pyvex_debug("Initializing VEX.\n");
111 | 
112 | 	if (initialized) {
113 | 		pyvex_debug("VEX already initialized.\n");
114 | 		return 1;
115 | 	}
116 | 	initialized = 1;
117 | 
118 | 	// Initialize VEX
119 | 	LibVEX_default_VexControl(&vc);
120 | 	LibVEX_default_VexArchInfo(&vai_host);
121 | 	LibVEX_default_VexAbiInfo(&vbi);
122 | 
123 | 	vc.iropt_verbosity              = 0;
124 | 	vc.iropt_level                  = 0;    // No optimization by default
125 | 	//vc.iropt_precise_memory_exns    = False;
126 | 	vc.iropt_unroll_thresh          = 0;
127 | 	vc.guest_max_insns              = 1;    // By default, we vex 1 instruction at a time
128 | 	vc.guest_chase_thresh           = 0;
129 | 	vc.arm64_allow_reordered_writeback = 0;
130 | 	vc.x86_optimize_callpop_idiom = 0;
131 | 	vc.strict_block_end = 0;
132 | 	vc.special_instruction_support = 0;
133 | 
134 | 	pyvex_debug("Calling LibVEX_Init()....\n");
135 | 	if (setjmp(jumpout) == 0) {
136 |         // the 0 is the debug level
137 |         LibVEX_Init(&failure_exit, &log_bytes, 0, &vc);
138 |         pyvex_debug("LibVEX_Init() done....\n");
139 |     } else {
140 |         pyvex_debug("LibVEX_Init() failed catastrophically...\n");
141 |         return 0;
142 |     }
143 | 
144 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
145 | 	vai_host.endness = VexEndnessLE;
146 | #else
147 | 	vai_host.endness = VexEndnessBE;
148 | #endif
149 | 
150 | 	// various settings to make stuff work
151 | 	// ... former is set to 'unspecified', but gets set in vex_inst for archs which care
152 | 	// ... the latter two are for dealing with gs and fs in VEX
153 | 	vbi.guest_stack_redzone_size = 0;
154 | 	vbi.guest_amd64_assume_fs_is_const = True;
155 | 	vbi.guest_amd64_assume_gs_is_const = True;
156 | 
157 | 	//------------------------------------
158 | 	// options for instruction translation
159 | 
160 | 	//
161 | 	// Architecture info
162 | 	//
163 | 	vta.arch_guest          = VexArch_INVALID; // to be assigned later
164 | #if __amd64__ || _WIN64
165 | 	vta.arch_host = VexArchAMD64;
166 | #elif __i386__ || _WIN32
167 | 	vta.arch_host = VexArchX86;
168 | #elif __arm__
169 | 	vta.arch_host = VexArchARM;
170 | 	vai_host.hwcaps = 7;
171 | #elif __aarch64__
172 | 	vta.arch_host = VexArchARM64;
173 | #elif __s390x__
174 | 	vta.arch_host = VexArchS390X;
175 | 	vai_host.hwcaps = VEX_HWCAPS_S390X_LDISP;
176 | #elif defined(__powerpc__) && defined(__NetBSD__)
177 | #  if defined(__LONG_WIDTH__) && (__LONG_WIDTH__ == 32)
178 | 	vta.arch_host = VexArchPPC32;
179 | #  endif
180 | #elif defined(__powerpc__)
181 |         vta.arch_host = VexArchPPC64;
182 | #elif defined(__riscv)
183 | #  if defined(__riscv_xlen) && (__riscv_xlen == 64)
184 | 	vta.arch_host = VexArchRISCV64;
185 | #  endif
186 | #else
187 | #error "Unsupported host arch"
188 | #endif
189 | 
190 | 	vta.archinfo_host = vai_host;
191 | 
192 | 	//
193 | 	// The actual stuff to vex
194 | 	//
195 | 	vta.guest_bytes         = NULL;             // Set in vex_insts
196 | 	vta.guest_bytes_addr    = 0;                // Set in vex_insts
197 | 
198 | 	//
199 | 	// callbacks
200 | 	//
201 | 	vta.callback_opaque     = NULL;             // Used by chase_into_ok, but never actually called
202 | 	vta.chase_into_ok       = chase_into_ok;    // Always returns false
203 | 	vta.preamble_function   = NULL;
204 | 	vta.instrument1         = NULL;
205 | 	vta.instrument2         = NULL;
206 | 	vta.finaltidy	    	= NULL;
207 | 	vta.needs_self_check	= needs_self_check;
208 | 
209 | 	vta.disp_cp_chain_me_to_slowEP = (void *)dispatch; // Not used
210 | 	vta.disp_cp_chain_me_to_fastEP = (void *)dispatch; // Not used
211 | 	vta.disp_cp_xindir = (void *)dispatch; // Not used
212 | 	vta.disp_cp_xassisted = (void *)dispatch; // Not used
213 | 
214 | 	vta.guest_extents       = &vge;
215 | 	vta.host_bytes          = NULL;           // Buffer for storing the output binary
216 | 	vta.host_bytes_size     = 0;
217 | 	vta.host_bytes_used     = NULL;
218 | 	// doesn't exist? vta.do_self_check       = False;
219 | 	vta.traceflags          = 0;                // Debug verbosity
220 | 	//vta.traceflags          = -1;                // Debug verbosity
221 |     return 1;
222 | }
223 | 
224 | // Prepare the VexArchInfo struct
225 | static void vex_prepare_vai(VexArch arch, VexArchInfo *vai) {
226 | 	switch (arch) {
227 | 		case VexArchX86:
228 | 			vai->hwcaps =   VEX_HWCAPS_X86_MMXEXT |
229 | 							VEX_HWCAPS_X86_SSE1 |
230 | 							VEX_HWCAPS_X86_SSE2 |
231 | 							VEX_HWCAPS_X86_SSE3 |
232 | 							VEX_HWCAPS_X86_LZCNT;
233 | 			break;
234 | 		case VexArchAMD64:
235 | 			vai->hwcaps =   VEX_HWCAPS_AMD64_SSE3 |
236 | 							VEX_HWCAPS_AMD64_CX16 |
237 | 							VEX_HWCAPS_AMD64_LZCNT |
238 | 							VEX_HWCAPS_AMD64_AVX |
239 | 							VEX_HWCAPS_AMD64_RDTSCP |
240 | 							VEX_HWCAPS_AMD64_BMI |
241 | 							VEX_HWCAPS_AMD64_AVX2;
242 | 			break;
243 | 		case VexArchARM:
244 | 			vai->hwcaps = VEX_ARM_ARCHLEVEL(8) |
245 | 							VEX_HWCAPS_ARM_NEON |
246 | 							VEX_HWCAPS_ARM_VFP3;
247 | 			break;
248 | 		case VexArchARM64:
249 | 			vai->hwcaps = 0;
250 | 			vai->arm64_dMinLine_lg2_szB = 6;
251 | 			vai->arm64_iMinLine_lg2_szB = 6;
252 | 			break;
253 | 		case VexArchPPC32:
254 | 			vai->hwcaps =   VEX_HWCAPS_PPC32_F |
255 | 							VEX_HWCAPS_PPC32_V |
256 | 							VEX_HWCAPS_PPC32_FX |
257 | 							VEX_HWCAPS_PPC32_GX |
258 | 							VEX_HWCAPS_PPC32_VX |
259 | 							VEX_HWCAPS_PPC32_DFP |
260 | 							VEX_HWCAPS_PPC32_ISA2_07;
261 | 			vai->ppc_icache_line_szB = 32; // unsure if correct
262 | 			break;
263 | 		case VexArchPPC64:
264 | 			vai->hwcaps =   VEX_HWCAPS_PPC64_V |
265 | 							VEX_HWCAPS_PPC64_FX |
266 | 							VEX_HWCAPS_PPC64_GX |
267 | 							VEX_HWCAPS_PPC64_VX |
268 | 							VEX_HWCAPS_PPC64_DFP |
269 | 							VEX_HWCAPS_PPC64_ISA2_07;
270 | 			vai->ppc_icache_line_szB = 64; // unsure if correct
271 | 			break;
272 | 		case VexArchS390X:
273 | 			vai->hwcaps = 0;
274 | 			break;
275 | 		case VexArchMIPS32:
276 | 		case VexArchMIPS64:
277 | 			vai->hwcaps = VEX_PRID_COMP_CAVIUM;
278 | 			break;
279 | 		case VexArchRISCV64:
280 | 			vai->hwcaps = 0;
281 | 			break;
282 | 		default:
283 | 			pyvex_error("Invalid arch in vex_prepare_vai.\n");
284 | 			break;
285 | 	}
286 | }
287 | 
288 | // Prepare the VexAbiInfo
289 | static void vex_prepare_vbi(VexArch arch, VexAbiInfo *vbi) {
290 | 	// only setting the guest_stack_redzone_size for now
291 | 	// this attribute is only specified by the X86, AMD64 and PPC64 ABIs
292 | 
293 | 	switch (arch) {
294 | 		case VexArchX86:
295 | 			vbi->guest_stack_redzone_size = 0;
296 | 			break;
297 | 		case VexArchAMD64:
298 | 			vbi->guest_stack_redzone_size = 128;
299 | 			break;
300 | 		case VexArchPPC64:
301 | 			vbi->guest_stack_redzone_size = 288;
302 | 			break;
303 | 		default:
304 | 			break;
305 | 	}
306 | }
307 | 
308 | VEXLiftResult _lift_r;
309 | 
310 | //----------------------------------------------------------------------
311 | // Main entry point. Do a lift.
312 | //----------------------------------------------------------------------
313 | VEXLiftResult *vex_lift(
314 | 		VexArch guest,
315 | 		VexArchInfo archinfo,
316 | 		unsigned char *insn_start,
317 | 		unsigned long long insn_addr,
318 | 		unsigned int max_insns,
319 | 		unsigned int max_bytes,
320 | 		int opt_level,
321 | 		int traceflags,
322 | 		int allow_arch_optimizations,
323 | 		int strict_block_end,
324 | 		int collect_data_refs,
325 | 		int load_from_ro_regions,
326 | 		int const_prop,
327 | 		VexRegisterUpdates px_control,
328 | 		unsigned int lookback) {
329 | 	VexRegisterUpdates pxControl = px_control;
330 | 
331 | 	vex_prepare_vai(guest, &archinfo);
332 | 	vex_prepare_vbi(guest, &vbi);
333 | 
334 | 	pyvex_debug("Guest arch: %d\n", guest);
335 | 	pyvex_debug("Guest arch hwcaps: %08x\n", archinfo.hwcaps);
336 | 
337 | 	vta.archinfo_guest = archinfo;
338 | 	vta.arch_guest = guest;
339 | 	vta.abiinfo_both = vbi; // Set the vbi value
340 | 
341 | 	vta.guest_bytes         = (UChar *)(insn_start);  // Ptr to actual bytes of start of instruction
342 | 	vta.guest_bytes_addr    = (Addr64)(insn_addr);
343 | 	vta.traceflags          = traceflags;
344 | 
345 | 	vc.guest_max_bytes     = max_bytes;
346 | 	vc.guest_max_insns     = max_insns;
347 | 	vc.iropt_level         = opt_level;
348 | 	vc.lookback_amount     = lookback;
349 | 
350 | 	// Gate all of these on one flag, they depend on the arch
351 | 	vc.arm_allow_optimizing_lookback = allow_arch_optimizations;
352 | 	vc.arm64_allow_reordered_writeback = allow_arch_optimizations;
353 | 	vc.x86_optimize_callpop_idiom = allow_arch_optimizations;
354 | 
355 | 	vc.strict_block_end = strict_block_end;
356 | 
357 | 	clear_log();
358 | 
359 | 	// Do the actual translation
360 | 	if (setjmp(jumpout) == 0) {
361 | 		LibVEX_Update_Control(&vc);
362 | 		_lift_r.is_noop_block = False;
363 | 		_lift_r.data_ref_count = 0;
364 | 		_lift_r.const_val_count = 0;
365 | 		_lift_r.irsb = LibVEX_Lift(&vta, &vtr, &pxControl);
366 | 		if (!_lift_r.irsb) {
367 | 			// Lifting failed
368 | 			return NULL;
369 | 		}
370 | 		remove_noops(_lift_r.irsb);
371 | 		if (guest == VexArchMIPS32) {
372 | 			// This post processor may potentially remove statements.
373 | 			// Call it before we get exit statements and such.
374 | 			mips32_post_processor_fix_unconditional_exit(_lift_r.irsb);
375 | 		}
376 | 		get_exits_and_inst_addrs(_lift_r.irsb, &_lift_r);
377 | 		get_default_exit_target(_lift_r.irsb, &_lift_r);
378 | 		if (guest == VexArchARM && _lift_r.insts > 0) {
379 | 			arm_post_processor_determine_calls(_lift_r.inst_addrs[0], _lift_r.size, _lift_r.insts, _lift_r.irsb);
380 | 		}
381 | 		zero_division_side_exits(_lift_r.irsb);
382 | 		get_is_noop_block(_lift_r.irsb, &_lift_r);
383 | 		if (collect_data_refs || const_prop) {
384 | 			execute_irsb(_lift_r.irsb, &_lift_r, guest, (Bool)load_from_ro_regions, (Bool)collect_data_refs, (Bool)const_prop);
385 | 		}
386 | 		return &_lift_r;
387 | 	} else {
388 | 		return NULL;
389 | 	}
390 | }
391 | 


--------------------------------------------------------------------------------
/pyvex_c/pyvex.def:
--------------------------------------------------------------------------------
 1 | LIBRARY pyvex.dll
 2 | 
 3 | EXPORTS
 4 |   IRConst_F32
 5 |   IRConst_F32i
 6 |   IRConst_F64
 7 |   IRConst_F64i
 8 |   IRConst_U1
 9 |   IRConst_U16
10 |   IRConst_U32
11 |   IRConst_U64
12 |   IRConst_U8
13 |   IRConst_V128
14 |   IRConst_V256
15 |   IRExpr_Binder
16 |   IRExpr_Binop
17 |   IRExpr_CCall
18 |   IRExpr_Const
19 |   IRExpr_GSPTR
20 |   IRExpr_Get
21 |   IRExpr_GetI
22 |   IRExpr_ITE
23 |   IRExpr_Load
24 |   IRExpr_Qop
25 |   IRExpr_RdTmp
26 |   IRExpr_Triop
27 |   IRExpr_Unop
28 |   IRExpr_VECRET
29 |   emptyIRSB
30 |   emptyIRTypeEnv
31 |   log_level
32 |   mkIRCallee
33 |   mkIRExprVec_0
34 |   mkIRExprVec_1
35 |   mkIRExprVec_2
36 |   mkIRExprVec_3
37 |   mkIRExprVec_4
38 |   mkIRExprVec_5
39 |   mkIRExprVec_6
40 |   mkIRExprVec_7
41 |   mkIRExprVec_8
42 |   mkIRRegArray
43 |   msg_buffer
44 |   msg_current_size
45 |   newIRTemp
46 |   typeOfIRExpr
47 |   typeOfIRLoadGOp
48 |   typeOfPrimop
49 |   clear_log
50 |   vex_lift
51 |   vex_init
52 |   register_readonly_region
53 |   deregister_all_readonly_regions
54 |   register_initial_register_value
55 |   reset_initial_register_values
56 |   sizeofIRType
57 | 


--------------------------------------------------------------------------------
/pyvex_c/pyvex.h:
--------------------------------------------------------------------------------
 1 | // This code is GPLed by Yan Shoshitaishvili
 2 | 
 3 | #ifndef __VEXIR_H
 4 | #define __VEXIR_H
 5 | 
 6 | #include <libvex.h>
 7 | 
 8 | // Some info required for translation
 9 | extern int log_level;
10 | extern VexTranslateArgs    vta;
11 | 
12 | extern char *msg_buffer;
13 | extern size_t msg_current_size;
14 | void clear_log(void);
15 | 
16 | //
17 | // Initializes VEX. This function must be called before vex_lift
18 | // can be used.
19 | //
20 | int vex_init(void);
21 | 
22 | typedef struct _ExitInfo {
23 | 	Int stmt_idx;
24 | 	Addr ins_addr;
25 | 	IRStmt *stmt;
26 | } ExitInfo;
27 | 
28 | typedef enum {
29 | 	Dt_Unknown = 0x9000,
30 | 	Dt_Integer,
31 | 	Dt_FP,
32 | 	Dt_StoreInteger
33 | } DataRefTypes;
34 | 
35 | typedef struct _DataRef {
36 | 	Addr data_addr;
37 | 	Int size;
38 | 	DataRefTypes data_type;
39 | 	Int stmt_idx;
40 | 	Addr ins_addr;
41 | } DataRef;
42 | 
43 | typedef struct _ConstVal {
44 | 	Int tmp;
45 | 	Int stmt_idx;
46 | 	ULong value;  // 64-bit max
47 | } ConstVal;
48 | 
49 | #define MAX_EXITS 400
50 | #define MAX_DATA_REFS 2000
51 | #define MAX_CONST_VALS 1000
52 | 
53 | typedef struct _VEXLiftResult {
54 | 	IRSB* irsb;
55 | 	Int size;
56 | 	Bool is_noop_block;
57 | 	// Conditional exits
58 | 	Int exit_count;
59 | 	ExitInfo exits[MAX_EXITS];
60 | 	// The default exit
61 | 	Int is_default_exit_constant;
62 | 	Addr default_exit;
63 | 	// Instruction addresses
64 | 	Int insts;
65 | 	Addr inst_addrs[200];
66 | 	// Data references
67 | 	Int data_ref_count;
68 | 	DataRef data_refs[MAX_DATA_REFS];
69 | 	// Constant propagation
70 | 	Int const_val_count;
71 | 	ConstVal const_vals[MAX_CONST_VALS];
72 | } VEXLiftResult;
73 | 
74 | VEXLiftResult *vex_lift(
75 | 		VexArch guest,
76 | 		VexArchInfo archinfo,
77 | 		unsigned char *insn_start,
78 | 		unsigned long long insn_addr,
79 | 		unsigned int max_insns,
80 | 		unsigned int max_bytes,
81 | 		int opt_level,
82 | 		int traceflags,
83 | 		int allow_arch_optimizations,
84 | 		int strict_block_end,
85 | 		int collect_data_refs,
86 | 		int load_from_ro_regions,
87 | 		int const_prop,
88 | 		VexRegisterUpdates px_control,
89 | 		unsigned int lookback_amount);
90 | 
91 | Bool register_readonly_region(ULong start, ULong size, unsigned char* content);
92 | void deregister_all_readonly_regions();
93 | Bool register_initial_register_value(UInt offset, UInt size, ULong value);
94 | Bool reset_initial_register_values();
95 | 
96 | #endif
97 | 


--------------------------------------------------------------------------------
/pyvex_c/pyvex_internal.h:
--------------------------------------------------------------------------------
 1 | #include "pyvex.h"
 2 | 
 3 | void arm_post_processor_determine_calls(Addr irsb_addr, Int irsb_size, Int irsb_insts, IRSB *irsb);
 4 | void mips32_post_processor_fix_unconditional_exit(IRSB *irsb);
 5 | 
 6 | void remove_noops(IRSB* irsb);
 7 | void zero_division_side_exits(IRSB* irsb);
 8 | void get_exits_and_inst_addrs(IRSB *irsb, VEXLiftResult *lift_r);
 9 | void get_default_exit_target(IRSB *irsb, VEXLiftResult *lift_r);
10 | void get_is_noop_block(IRSB *irsb, VEXLiftResult *lift_r);
11 | void execute_irsb(IRSB *irsb, VEXLiftResult *lift_r, VexArch guest, Bool load_from_ro_regions, Bool collect_data_refs, Bool const_prop);
12 | Addr get_value_from_const_expr(IRConst* con);
13 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=no-name-in-module,import-error,missing-class-docstring
  2 | import glob
  3 | import multiprocessing
  4 | import os
  5 | import platform
  6 | import shutil
  7 | import subprocess
  8 | import sys
  9 | from distutils.command.build import build as st_build
 10 | from distutils.util import get_platform
 11 | 
 12 | from setuptools import setup
 13 | from setuptools.command.develop import develop as st_develop
 14 | from setuptools.command.sdist import sdist as st_sdist
 15 | from setuptools.errors import LibError
 16 | 
 17 | PROJECT_DIR = os.path.dirname(os.path.realpath(__file__))
 18 | LIB_DIR = os.path.join(PROJECT_DIR, "pyvex", "lib")
 19 | INCLUDE_DIR = os.path.join(PROJECT_DIR, "pyvex", "include")
 20 | 
 21 | 
 22 | if sys.platform in ("win32", "cygwin"):
 23 |     LIBRARY_FILE = "pyvex.dll"
 24 |     STATIC_LIBRARY_FILE = "pyvex.lib"
 25 | elif sys.platform == "darwin":
 26 |     LIBRARY_FILE = "libpyvex.dylib"
 27 |     STATIC_LIBRARY_FILE = "libpyvex.a"
 28 | else:
 29 |     LIBRARY_FILE = "libpyvex.so"
 30 |     STATIC_LIBRARY_FILE = "libpyvex.a"
 31 | 
 32 | 
 33 | VEX_LIB_NAME = "vex"  # can also be vex-amd64-linux
 34 | VEX_PATH = os.path.abspath(os.path.join(PROJECT_DIR, "vex"))
 35 | 
 36 | 
 37 | def _build_vex():
 38 |     if len(os.listdir(VEX_PATH)) == 0:
 39 |         raise LibError(
 40 |             "vex submodule not cloned correctly, aborting.\nThis may be fixed with `git submodule update --init`"
 41 |         )
 42 | 
 43 |     e = os.environ.copy()
 44 |     e["MULTIARCH"] = "1"
 45 |     e["DEBUG"] = "1"
 46 | 
 47 |     if sys.platform == "win32":
 48 |         cmd = ["nmake", "/f", "Makefile-msvc", "all"]
 49 |     elif shutil.which("gmake") is not None:
 50 |         cmd = ["gmake", "-f", "Makefile-gcc", "-j", str(multiprocessing.cpu_count()), "all"]
 51 |     else:
 52 |         cmd = ["make", "-f", "Makefile-gcc", "-j", str(multiprocessing.cpu_count()), "all"]
 53 | 
 54 |     try:
 55 |         subprocess.run(cmd, cwd=VEX_PATH, env=e, check=True)
 56 |     except FileNotFoundError as err:
 57 |         raise LibError("Couldn't find " + cmd[0] + " in PATH") from err
 58 |     except subprocess.CalledProcessError as err:
 59 |         raise LibError("Error while building libvex: " + str(err)) from err
 60 | 
 61 | 
 62 | def _build_pyvex():
 63 |     e = os.environ.copy()
 64 |     e["VEX_LIB_PATH"] = VEX_PATH
 65 |     e["VEX_INCLUDE_PATH"] = os.path.join(VEX_PATH, "pub")
 66 |     e["VEX_LIB_FILE"] = os.path.join(VEX_PATH, "libvex.lib")
 67 | 
 68 |     if sys.platform == "win32":
 69 |         cmd = ["nmake", "/f", "Makefile-msvc"]
 70 |     elif shutil.which("gmake") is not None:
 71 |         cmd = ["gmake", "-f", "Makefile", "-j", str(multiprocessing.cpu_count())]
 72 |     else:
 73 |         cmd = ["make", "-f", "Makefile", "-j", str(multiprocessing.cpu_count())]
 74 | 
 75 |     try:
 76 |         subprocess.run(cmd, cwd="pyvex_c", env=e, check=True)
 77 |     except FileNotFoundError as err:
 78 |         raise LibError("Couldn't find " + cmd[0] + " in PATH") from err
 79 |     except subprocess.CalledProcessError as err:
 80 |         raise LibError("Error while building libpyvex: " + str(err)) from err
 81 | 
 82 | 
 83 | def _shuffle_files():
 84 |     shutil.rmtree(LIB_DIR, ignore_errors=True)
 85 |     shutil.rmtree(INCLUDE_DIR, ignore_errors=True)
 86 |     os.mkdir(LIB_DIR)
 87 |     os.mkdir(INCLUDE_DIR)
 88 | 
 89 |     pyvex_c_dir = os.path.join(PROJECT_DIR, "pyvex_c")
 90 | 
 91 |     shutil.copy(os.path.join(pyvex_c_dir, LIBRARY_FILE), LIB_DIR)
 92 |     shutil.copy(os.path.join(pyvex_c_dir, STATIC_LIBRARY_FILE), LIB_DIR)
 93 |     shutil.copy(os.path.join(pyvex_c_dir, "pyvex.h"), INCLUDE_DIR)
 94 |     for f in glob.glob(os.path.join(VEX_PATH, "pub", "*")):
 95 |         shutil.copy(f, INCLUDE_DIR)
 96 | 
 97 | 
 98 | def _clean_bins():
 99 |     shutil.rmtree(LIB_DIR, ignore_errors=True)
100 |     shutil.rmtree(INCLUDE_DIR, ignore_errors=True)
101 | 
102 | 
103 | def _build_ffi():
104 |     sys.path.append(".")  # PEP 517 doesn't include . in sys.path
105 |     import make_ffi  # pylint: disable=import-outside-toplevel
106 | 
107 |     sys.path.pop()
108 | 
109 |     make_ffi.doit(os.path.join(VEX_PATH, "pub"))
110 | 
111 | 
112 | class build(st_build):
113 |     def run(self, *args):
114 |         self.execute(_build_vex, (), msg="Building libVEX")
115 |         self.execute(_build_pyvex, (), msg="Building libpyvex")
116 |         self.execute(_shuffle_files, (), msg="Copying libraries and headers")
117 |         self.execute(_build_ffi, (), msg="Creating CFFI defs file")
118 |         super().run(*args)
119 | 
120 | 
121 | class develop(st_develop):
122 |     def run(self):
123 |         self.run_command("build")
124 |         super().run()
125 | 
126 | 
127 | class sdist(st_sdist):
128 |     def run(self, *args):
129 |         self.execute(_clean_bins, (), msg="Removing binaries")
130 |         super().run(*args)
131 | 
132 | 
133 | cmdclass = {
134 |     "build": build,
135 |     "develop": develop,
136 |     "sdist": sdist,
137 | }
138 | 
139 | try:
140 |     from setuptools.command.editable_wheel import editable_wheel as st_editable_wheel
141 | 
142 |     class editable_wheel(st_editable_wheel):
143 |         def run(self):
144 |             self.run_command("build")
145 |             super().run()
146 | 
147 |     cmdclass["editable_wheel"] = editable_wheel
148 | except ModuleNotFoundError:
149 |     pass
150 | 
151 | if "bdist_wheel" in sys.argv and "--plat-name" not in sys.argv:
152 |     sys.argv.append("--plat-name")
153 |     name = get_platform()
154 |     if "linux" in name:
155 |         sys.argv.append("manylinux2014_" + platform.machine())
156 |     else:
157 |         # https://www.python.org/dev/peps/pep-0425/
158 |         sys.argv.append(name.replace(".", "_").replace("-", "_"))
159 | 
160 | setup(cmdclass=cmdclass)
161 | 


--------------------------------------------------------------------------------
/tests/test_arm_postprocess.py:
--------------------------------------------------------------------------------
  1 | import pyvex
  2 | 
  3 | 
  4 | ##########################
  5 | ### ARM Postprocessing ###
  6 | ##########################
  7 | def test_arm_postprocess_call():
  8 |     for i in range(3):
  9 |         # Thumb
 10 | 
 11 |         # push  {r7}
 12 |         # add   r7, sp, #0
 13 |         # mov.w r1, #6
 14 |         # mov   r0, pc
 15 |         # add.w lr, r0, r1
 16 |         # b.w   10408
 17 |         irsb = pyvex.IRSB(
 18 |             data=(b"\x80\xb4" b"\x00\xaf" b"\x4f\xf0\x06\x01" b"\x78\x46" b"\x00\xeb\x01\x0e" b"\xff\xf7\xec\xbf"),
 19 |             mem_addr=0x1041F,
 20 |             arch=pyvex.ARCH_ARM_LE,
 21 |             num_inst=6,
 22 |             bytes_offset=1,
 23 |             opt_level=i,
 24 |         )
 25 |         assert irsb.jumpkind == "Ijk_Call"
 26 | 
 27 |         # mov   lr, pc
 28 |         # b.w   10408
 29 |         irsb = pyvex.IRSB(
 30 |             data=(b"\xfe\x46" b"\xe9\xe7"),
 31 |             mem_addr=0x10431,
 32 |             arch=pyvex.ARCH_ARM_LE,
 33 |             num_inst=2,
 34 |             bytes_offset=1,
 35 |             opt_level=i,
 36 |         )
 37 |         assert irsb.jumpkind == "Ijk_Call"
 38 | 
 39 |         # add   r2, pc, #0
 40 |         # add.w lr, r2, #4
 41 |         # ldr.w pc, [pc, #52]
 42 |         irsb = pyvex.IRSB(
 43 |             data=(b"\x00\xa2" b"\x02\xf1\x06\x0e" b"\xdf\xf8\x34\xf0"),
 44 |             mem_addr=0x10435,
 45 |             arch=pyvex.ARCH_ARM_LE,
 46 |             num_inst=3,
 47 |             bytes_offset=1,
 48 |             opt_level=i,
 49 |         )
 50 |         assert irsb.jumpkind == "Ijk_Call"
 51 | 
 52 |         # ldr   r0, [pc, #48]
 53 |         # mov   r1, pc
 54 |         # add.w r2, r1, #4
 55 |         # add.w r3, r2, #4
 56 |         # add.w r4, r3, #4
 57 |         # add.w lr, r4, #4
 58 |         # mov   pc, r0
 59 |         irsb = pyvex.IRSB(
 60 |             data=(
 61 |                 b"\x0c\x48"
 62 |                 b"\x79\x46"
 63 |                 b"\x01\xf1\x04\x02"
 64 |                 b"\x02\xf1\x04\x03"
 65 |                 b"\x03\xf1\x04\x04"
 66 |                 b"\x04\xf1\x04\x0e"
 67 |                 b"\x87\x46"
 68 |             ),
 69 |             mem_addr=0x1043F,
 70 |             arch=pyvex.ARCH_ARM_LE,
 71 |             num_inst=7,
 72 |             bytes_offset=1,
 73 |             opt_level=i,
 74 |         )
 75 |         assert irsb.jumpkind == "Ijk_Call"
 76 | 
 77 |         # eor.w r0, r0, r0
 78 |         # mov   lr, pc
 79 |         # b.n   10460
 80 |         irsb = pyvex.IRSB(
 81 |             data=(b"\x80\xea\x00\x00" b"\x86\x46" b"\x01\xe0"),
 82 |             mem_addr=0x10455,
 83 |             arch=pyvex.ARCH_ARM_LE,
 84 |             num_inst=3,
 85 |             bytes_offset=1,
 86 |             opt_level=i,
 87 |         )
 88 |         assert irsb.jumpkind == "Ijk_Boring"
 89 | 
 90 |         # Thumb compiled with optimizations (gcc -O2)
 91 | 
 92 |         # mov.w r1, #6
 93 |         # mov   r0, pc
 94 |         # add.w lr, r0, r1
 95 |         # b.w   104bc
 96 |         irsb = pyvex.IRSB(
 97 |             data=(b"\x4f\xf0\x06\x01" b"\x78\x46" b"\x00\xeb\x01\x0e" b"\x00\xf0\xc5\xb8"),
 98 |             mem_addr=0x10325,
 99 |             arch=pyvex.ARCH_ARM_LE,
100 |             num_inst=4,
101 |             bytes_offset=1,
102 |             opt_level=i,
103 |         )
104 |         assert irsb.jumpkind == "Ijk_Call"
105 | 
106 |         # ldr   r0, [pc, #56]
107 |         # mov   r1, pc
108 |         # add.w r2, r1, #4
109 |         # add.w r3, r2, #4
110 |         # add.w r4, r3, #4
111 |         # add.w lr, r4, #4
112 |         # mov   pc, r0
113 |         irsb = pyvex.IRSB(
114 |             data=(
115 |                 b"\x0e\x48"
116 |                 b"\x79\x46"
117 |                 b"\x01\xf1\x04\x02"
118 |                 b"\x02\xf1\x04\x03"
119 |                 b"\x03\xf1\x04\x04"
120 |                 b"\x04\xf1\x04\x0e"
121 |                 b"\x87\x46"
122 |             ),
123 |             mem_addr=0x10333,
124 |             arch=pyvex.ARCH_ARM_LE,
125 |             num_inst=7,
126 |             bytes_offset=1,
127 |             opt_level=i,
128 |         )
129 |         assert irsb.jumpkind == "Ijk_Call"
130 | 
131 |         # add   r2, pc, #0
132 |         # add.w lr, r2, #6
133 |         # ldr.w pc, [pc, #28]
134 |         irsb = pyvex.IRSB(
135 |             data=(b"\x00\xa2" b"\x02\xf1\x06\x0e" b"\xdf\xf8\x1c\xf0"),
136 |             mem_addr=0x10349,
137 |             arch=pyvex.ARCH_ARM_LE,
138 |             num_inst=3,
139 |             bytes_offset=1,
140 |             opt_level=i,
141 |         )
142 |         assert irsb.jumpkind == "Ijk_Call"
143 | 
144 |         # mov   lr, pc
145 |         # b.w   104bc
146 |         irsb = pyvex.IRSB(
147 |             data=(b"\xfe\x46" b"\xb2\xe0"),
148 |             mem_addr=0x10353,
149 |             arch=pyvex.ARCH_ARM_LE,
150 |             num_inst=2,
151 |             bytes_offset=1,
152 |             opt_level=i,
153 |         )
154 |         assert irsb.jumpkind == "Ijk_Call"
155 | 
156 |         # eor.w r0, r0, r0
157 |         # mov   lr, pc
158 |         # b.n   10362
159 |         irsb = pyvex.IRSB(
160 |             data=(b"\x80\xea\x00\x00" b"\x86\x46" b"\x01\xe0"),
161 |             mem_addr=0x10357,
162 |             arch=pyvex.ARCH_ARM_LE,
163 |             num_inst=3,
164 |             bytes_offset=1,
165 |             opt_level=i,
166 |         )
167 |         assert irsb.jumpkind == "Ijk_Boring"
168 | 
169 |         # ARM compiled with optimizations (gcc -O2)
170 | 
171 |         # mov   r1, #4
172 |         # mov   r0, pc
173 |         # add   lr, r0, r1
174 |         # ldr   pc, [pc, #56]
175 |         irsb = pyvex.IRSB(
176 |             data=(b"\x04\x10\xa0\xe3" b"\x0f\x00\xa0\xe1" b"\x01\xe0\x80\xe0" b"\x38\xf0\x9f\xe5"),
177 |             mem_addr=0x10298,
178 |             arch=pyvex.ARCH_ARM_LE,
179 |             num_inst=4,
180 |             opt_level=i,
181 |         )
182 |         assert irsb.jumpkind == "Ijk_Call"
183 | 
184 |         # add   r1, pc, #0
185 |         # add   r2, r1, #4
186 |         # add   r3, r2, #4
187 |         # add   r4, r3, #4
188 |         # add   lr, r4, #4
189 |         # b     10414
190 |         irsb = pyvex.IRSB(
191 |             data=(
192 |                 b"\x00\x10\x8f\xe2"
193 |                 b"\x04\x20\x81\xe2"
194 |                 b"\x04\x30\x82\xe2"
195 |                 b"\x04\x40\x83\xe2"
196 |                 b"\x04\xe0\x84\xe2"
197 |                 b"\x54\x00\x00\xea"
198 |             ),
199 |             mem_addr=0x102A8,
200 |             arch=pyvex.ARCH_ARM_LE,
201 |             num_inst=6,
202 |             opt_level=i,
203 |         )
204 |         assert irsb.jumpkind == "Ijk_Call"
205 | 
206 |         # mov   lr, pc
207 |         # b     10414
208 |         irsb = pyvex.IRSB(
209 |             data=(b"\x0f\xe0\xa0\xe1" b"\x52\x00\x00\xea"),
210 |             mem_addr=0x102C0,
211 |             arch=pyvex.ARCH_ARM_LE,
212 |             num_inst=2,
213 |             opt_level=i,
214 |         )
215 |         assert irsb.jumpkind == "Ijk_Call"
216 | 
217 |         # eor   r0, r0, r0
218 |         # mov   lr, r0
219 |         # b     102d8
220 |         irsb = pyvex.IRSB(
221 |             data=(b"\x00\x00\x20\xe0" b"\x00\xe0\xa0\xe1" b"\x00\x00\x00\xea"),
222 |             mem_addr=0x102C8,
223 |             arch=pyvex.ARCH_ARM_LE,
224 |             num_inst=3,
225 |             opt_level=i,
226 |         )
227 |         assert irsb.jumpkind == "Ijk_Boring"
228 | 
229 |         # ARM
230 | 
231 |         # push  {fp}
232 |         # add   fp, sp, #0
233 |         # mov   r1, #4
234 |         # mov   r0, pc
235 |         # add   lr, r0, r1
236 |         # ldr   pc, [pc, #68]
237 |         irsb = pyvex.IRSB(
238 |             data=(
239 |                 b"\x04\xb0\x2d\xe5"
240 |                 b"\x00\xb0\x8d\xe2"
241 |                 b"\x04\x10\xa0\xe3"
242 |                 b"\x0f\x00\xa0\xe1"
243 |                 b"\x01\xe0\x80\xe0"
244 |                 b"\x44\xf0\x9f\xe5"
245 |             ),
246 |             mem_addr=0x103E8,
247 |             arch=pyvex.ARCH_ARM_LE,
248 |             num_inst=6,
249 |             opt_level=i,
250 |         )
251 |         assert irsb.jumpkind == "Ijk_Call"
252 | 
253 |         # add   r1, pc, #0
254 |         # add   r2, r1, #4
255 |         # add   r3, r2, #4
256 |         # add   r4, r3, #4
257 |         # add   lr, r4, #4
258 |         # b     103c4
259 |         irsb = pyvex.IRSB(
260 |             data=(
261 |                 b"\x00\x10\x8f\xe2"
262 |                 b"\x04\x20\x81\xe2"
263 |                 b"\x04\x30\x82\xe2"
264 |                 b"\x04\x40\x83\xe2"
265 |                 b"\x04\xe0\x84\xe2"
266 |                 b"\x54\xff\xff\xea"
267 |             ),
268 |             mem_addr=0x10400,
269 |             arch=pyvex.ARCH_ARM_LE,
270 |             num_inst=6,
271 |             opt_level=i,
272 |         )
273 |         assert irsb.jumpkind == "Ijk_Call"
274 | 
275 |         # mov   lr, pc
276 |         # b     103c4
277 |         irsb = pyvex.IRSB(
278 |             data=(b"\x0f\xe0\xa0\xe1" b"\xe8\xff\xff\xea"),
279 |             mem_addr=0x10418,
280 |             arch=pyvex.ARCH_ARM_LE,
281 |             num_inst=2,
282 |             opt_level=i,
283 |         )
284 |         assert irsb.jumpkind == "Ijk_Call"
285 | 
286 |         # eor   r0, r0, r0
287 |         # mov   lr, r0
288 |         # b     10430
289 |         irsb = pyvex.IRSB(
290 |             data=(b"\x00\x00\x20\xe0" b"\x00\xe0\xa0\xe1" b"\x00\x00\x00\xea"),
291 |             mem_addr=0x10420,
292 |             arch=pyvex.ARCH_ARM_LE,
293 |             num_inst=3,
294 |             opt_level=i,
295 |         )
296 |         assert irsb.jumpkind == "Ijk_Boring"
297 | 
298 |         # From a "real thing" compiled with armc
299 |         # ARM:
300 |         #
301 |         irsb = pyvex.IRSB(
302 |             data=(
303 |                 b"H\x10\x9b\xe5"
304 |                 b"\x0b\x00\xa0\xe1"
305 |                 b"\x04 \x91\xe5"
306 |                 b"\x04\xe0\x8f\xe2"
307 |                 b"\x01\x10\x82\xe0"
308 |                 b"\x01\xf0\xa0\xe1"
309 |             ),
310 |             mem_addr=0x264B4C,
311 |             arch=pyvex.ARCH_ARM_LE,
312 |             num_inst=6,
313 |             opt_level=i,
314 |         )
315 |         assert irsb.jumpkind == "Ijk_Call"
316 | 
317 |         # 400000  str     lr, [sp,#-0x4]!
318 |         # 400004  mov     r1, #0xa
319 |         # 400008  cmp     r0, r1
320 |         # 40000c  blne    #FunctionB
321 |         irsb = pyvex.IRSB(
322 |             data=bytes.fromhex("04e02de50a10a0e3010050e10100001b"),
323 |             mem_addr=0x400000,
324 |             arch=pyvex.ARCH_ARM_LE,
325 |             num_inst=4,
326 |             opt_level=i,
327 |         )
328 |         assert len(irsb.exit_statements) == 1
329 |         assert irsb.exit_statements[0][2].jumpkind == "Ijk_Call"
330 |         assert irsb.jumpkind == "Ijk_Boring"
331 | 
332 | 
333 | def test_arm_postprocess_ret():
334 |     for i in range(3):
335 |         # e91ba8f0
336 |         # ldmdb  R11, {R4,R11,SP,PC}
337 |         irsb = pyvex.IRSB(
338 |             data=b"\xe9\x1b\xa8\xf0",
339 |             mem_addr=0xED4028,
340 |             arch=pyvex.ARCH_ARM_BE_LE,
341 |             num_inst=1,
342 |             opt_level=i,
343 |         )
344 |         assert irsb.jumpkind == "Ijk_Ret"
345 | 
346 |         # e91badf0
347 |         # ldmdb  R11, {R4-R8,R10,R11,SP,PC}
348 |         irsb = pyvex.IRSB(
349 |             data=b"\xe9\x1b\xa8\xf0",
350 |             mem_addr=0x4D4028,
351 |             arch=pyvex.ARCH_ARM_BE_LE,
352 |             num_inst=1,
353 |             opt_level=i,
354 |         )
355 |         assert irsb.jumpkind == "Ijk_Ret"
356 | 
357 |         # 00a89de8
358 |         # ldmfd SP, {R11,SP,PC}
359 |         # Fixed by Fish in the VEX fork, commit 43c78f608490f9a5c71c7fca87c04759c1b93741
360 |         irsb = pyvex.IRSB(
361 |             data=b"\x00\xa8\x9d\xe8",
362 |             mem_addr=0xC800B57C,
363 |             arch=pyvex.ARCH_ARM_BE,
364 |             num_inst=1,
365 |             opt_level=1,
366 |         )
367 |         assert irsb.jumpkind == "Ijk_Ret"
368 | 
369 | 
370 | if __name__ == "__main__":
371 |     test_arm_postprocess_call()
372 |     test_arm_postprocess_ret()
373 | 


--------------------------------------------------------------------------------
/tests/test_gym.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-class-docstring
 2 | import unittest
 3 | 
 4 | import pyvex
 5 | 
 6 | 
 7 | class Tests(unittest.TestCase):
 8 |     def test_x86_aam(self):
 9 |         irsb = pyvex.lift(b"\xd4\x0b", 0, pyvex.ARCH_X86)
10 |         self.assertEqual(irsb.jumpkind, "Ijk_Boring")
11 |         self.assertEqual(irsb.size, 2)
12 | 
13 |     def test_x86_aad(self):
14 |         irsb = pyvex.lift(b"\xd5\x0b", 0, pyvex.ARCH_X86)
15 |         self.assertEqual(irsb.jumpkind, "Ijk_Boring")
16 |         self.assertEqual(irsb.size, 2)
17 | 
18 |     def test_x86_xgetbv(self):
19 |         irsb = pyvex.lift(b"\x0f\x01\xd0", 0, pyvex.ARCH_X86)
20 |         self.assertEqual(irsb.jumpkind, "Ijk_Boring")
21 |         self.assertEqual(irsb.size, 3)
22 | 
23 |     def test_x86_rdmsr(self):
24 |         irsb = pyvex.lift(b"\x0f\x32", 0, pyvex.ARCH_X86)
25 |         self.assertEqual(irsb.jumpkind, "Ijk_Boring")
26 |         self.assertEqual(irsb.size, 2)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main()
31 | 


--------------------------------------------------------------------------------
/tests/test_irsb_property_caching.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-class-docstring,no-self-use
 2 | import unittest
 3 | 
 4 | import pyvex
 5 | 
 6 | 
 7 | class TestCacheInvalidationOnExtend(unittest.TestCase):
 8 |     def test_cache_invalidation_on_extend(self):
 9 |         b = pyvex.block.IRSB(b"\x50", 0, pyvex.ARCH_X86)
10 |         assert b.size == 1
11 |         assert b.instructions == 1
12 |         toappend = pyvex.block.IRSB(b"\x51", 0, pyvex.ARCH_X86)
13 |         toappend.jumpkind = "Ijk_Invalid"
14 |         toappend._direct_next = None  # Invalidate the cache because I manually changed the jumpkind
15 |         assert not toappend.direct_next
16 |         b.extend(toappend)
17 |         assert b.size == 2
18 |         assert b.instructions == 2
19 |         assert not b.direct_next
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     unittest.main()
24 | 


--------------------------------------------------------------------------------
/tests/test_lift.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import pyvex
 4 | from pyvex import IRSB, ffi, lift
 5 | from pyvex.errors import PyVEXError
 6 | from pyvex.lifting.util import GymratLifter, Instruction, JumpKind
 7 | 
 8 | 
 9 | # pylint: disable=R0201
10 | # pylint: disable=C0115
11 | class TestLift(unittest.TestCase):
12 |     def test_partial_lift(self):
13 |         """This tests that gymrat correctly handles the case where an
14 |         instruction is longer than the remaining input.
15 |         """
16 | 
17 |         class NOP(Instruction):
18 |             name = "nop"
19 |             bin_format = "0000111100001111"
20 | 
21 |             def compute_result(self, *args):
22 |                 pass
23 | 
24 |         class NOPLifter(GymratLifter):
25 |             instrs = [NOP]
26 | 
27 |         lifter = NOPLifter(pyvex.ARCH_AMD64, 0)
28 |         # this should not throw an exception
29 |         block = lifter.lift("\x0f\x0fa")
30 |         assert block.size == 2
31 |         assert block.instructions == 1
32 |         assert block.jumpkind == JumpKind.NoDecode
33 | 
34 |     def test_skipstmts_toomanyexits(self):
35 |         # https://github.com/angr/pyvex/issues/153
36 | 
37 |         old_exit_limit = IRSB.MAX_EXITS
38 |         IRSB.MAX_EXITS = 32
39 | 
40 |         bytes_ = bytes.fromhex(
41 |             "0DF1B00B2EAB94E8030008938BE803000DF1C0089AE8030083E"
42 |             "80300019B0DF1F00A339AE669E26193E8030085E8030098E803"
43 |             "0083E80300069B95E8030088E80300A26993E803004A9200236"
44 |             "3622362A361E362A36238AC029A069484E8030012AC09982993"
45 |             "28932B9303C885E8030092E8030084E803009AE8030082E8030"
46 |             "02A460A9D26993E910B9941910D9942910C992A93409548AD43"
47 |             "9194E803008AE8030027983F9927913F909BE803000DF5887B2"
48 |             "69335938BE803000DF58C7B089903C98BE8030098E8030084E8"
49 |             "030095E8030088E803004B993391329394E8030034933793369"
50 |             "3069C059B4C93049B4E9350ABCDF834C1CDF83CE185E8030094"
51 |             "E803004B9683E8030015A94498C4F7E2EA "
52 |         )
53 |         arch = pyvex.ARCH_ARM_LE
54 |         # Lifting the first four bytes will not cause any problem. Statements should be skipped as expected
55 |         b = IRSB(bytes_[:34], 0xC6951, arch, opt_level=1, bytes_offset=5, skip_stmts=True)
56 |         assert len(b.exit_statements) > 0
57 |         assert not b.has_statements
58 | 
59 |         # Lifting the entire block will cause the number of exit statements go
60 |         # beyond the limit (currently 32). PyVEX will
61 |         # automatically relift this block without skipping the statements
62 |         b = IRSB(bytes_, 0xC6951, arch, opt_level=1, bytes_offset=5, skip_stmts=True)
63 |         assert b.statements is not None
64 |         assert len(b.exit_statements) > 32
65 | 
66 |         # Restore the setting
67 |         IRSB.MAX_EXITS = old_exit_limit
68 | 
69 |     def test_max_bytes(self):
70 |         data = bytes.fromhex("909090909090c3")
71 |         arch = pyvex.ARCH_X86
72 |         assert lift(data, 0x1000, arch, max_bytes=None).size == len(data)
73 |         assert lift(data, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1
74 |         assert lift(data, 0x1000, arch, max_bytes=len(data) + 1).size == len(data)
75 | 
76 |         data2 = ffi.from_buffer(data)
77 |         self.assertRaises(PyVEXError, lift, data2, 0x1000, arch)
78 |         assert lift(data2, 0x1000, arch, max_bytes=len(data)).size == len(data)
79 |         assert lift(data2, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/tests/test_mips32_postprocess.py:
--------------------------------------------------------------------------------
 1 | import pyvex
 2 | 
 3 | 
 4 | def test_mips32_unconditional_jumps():
 5 |     # 0040000c: 10000002 ; <input:28> beq $zero, $zero, LABEL_ELSE_IF
 6 |     # 00400010: 00000000 ; <input:31> sll $zero, $zero, 0
 7 |     # 00400014: 08100012 ; <input:34> j LABEL_DONE
 8 |     # 00400018: <LABEL_ELSE_IF> ; <input:37> LABEL_ELSE_IF:
 9 |     irsb = pyvex.IRSB(
10 |         data=(b"\x10\x00\x00\x02" b"\x00\x00\x00\x00"),
11 |         mem_addr=0x40000C,
12 |         arch=pyvex.ARCH_MIPS32_BE,
13 |         num_inst=2,
14 |         opt_level=0,
15 |     )
16 |     assert type(irsb.next) is pyvex.expr.Const
17 |     assert irsb.next.con.value == 0x400018
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     test_mips32_unconditional_jumps()
22 | 


--------------------------------------------------------------------------------
/tests/test_s390x_exrl.py:
--------------------------------------------------------------------------------
 1 | import pyvex
 2 | 
 3 | 
 4 | def test_s390x_exrl():
 5 |     arch = pyvex.ARCH_S390X
 6 |     irsb = pyvex.lift(
 7 |         b"\xc6\x10\x00\x00\x00\x04"  # exrl %r1,0x400408
 8 |         b"\x07\xfe"  # br %r14
 9 |         b"\xd7\x00\x20\x00\x30\x00"  # xc 0(0,%r2),0(%r3)
10 |         b"\x7d\xa7",  # padding
11 |         0x400400,
12 |         arch,
13 |     )
14 |     irsb_str = str(irsb)
15 | 
16 |     # check last_execute_target, only top 6 bytes are relevant
17 |     assert "0xd700200030000000" in irsb_str
18 |     assert "s390x_dirtyhelper_EX" in irsb_str
19 |     assert "{ PUT(ia) = 0x400400; Ijk_Boring }" in irsb_str
20 |     assert "------ IMark(0x400406, 2, 0) ------" in irsb_str
21 |     assert irsb.jumpkind == "Ijk_Ret"
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     test_s390x_exrl()
26 | 


--------------------------------------------------------------------------------
/tests/test_s390x_lochi.py:
--------------------------------------------------------------------------------
 1 | import pyvex
 2 | 
 3 | 
 4 | def test_s390x_lochi():
 5 |     arch = pyvex.ARCH_S390X
 6 |     irsb = pyvex.lift(b"\xec\x18\xab\xcd\x00\x42", 0x400400, arch)  # lochi %r1,0xabcd,8
 7 |     irsb_str = str(irsb)
 8 | 
 9 |     assert "s390_calculate_cond(0x0000000000000008" in irsb_str
10 |     assert "PUT(r1_32) = 0xffffabcd" in irsb_str
11 |     assert irsb.jumpkind in "Ijk_Boring"
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     test_s390x_lochi()
16 | 


--------------------------------------------------------------------------------
/tests/test_s390x_vl.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import pyvex
 3 | 
 4 | 
 5 | def test_s390x_vl():
 6 |     arch = pyvex.ARCH_S390X
 7 |     irsb = pyvex.lift(b"\xe7\x40\x90\xa8\x00\x06", 0x11C6C9E, arch)  # vl %v4, 0xa8(%r9)
 8 |     irsb_str = str(irsb)
 9 | 
10 |     assert "GET:I64(r9)" in irsb_str
11 |     assert "Add64(0x00000000000000a8" in irsb_str
12 |     assert "LDbe:V128" in irsb_str
13 |     assert "PUT(v4) =" in irsb_str
14 |     assert irsb.jumpkind == "Ijk_Boring"
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     test_s390x_vl()
19 | 


--------------------------------------------------------------------------------
/tests/test_spotter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pyvex
  4 | import pyvex.lifting
  5 | from pyvex.lifting import register
  6 | from pyvex.lifting.util import GymratLifter, Instruction, Type
  7 | 
  8 | test_location = str(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../binaries/tests"))
  9 | 
 10 | 
 11 | class Instruction_IMAGINARY(Instruction):
 12 |     bin_format = bin(0x0F0B)[2:].zfill(16)
 13 |     name = "IMAGINARY"
 14 | 
 15 |     def compute_result(self):
 16 |         a = self.constant(10, Type.int_27)
 17 |         b = self.constant(20, Type.int_27)
 18 |         a + b
 19 | 
 20 | 
 21 | class ImaginarySpotter(GymratLifter):
 22 |     instrs = [Instruction_IMAGINARY]
 23 | 
 24 | 
 25 | register(ImaginarySpotter, "X86")
 26 | 
 27 | basic_goal = """
 28 | IRSB {
 29 |    t0:Ity_I27
 30 | 
 31 |    00 | ------ IMark(0x1, 2, 0) ------
 32 |    01 | t0 = Add27((0xa :: Ity_I27),(0x14 :: Ity_I27))
 33 |    NEXT: PUT(eip) = 0x00000003; Ijk_Boring
 34 | }
 35 | """
 36 | 
 37 | 
 38 | def test_basic():
 39 |     b = pyvex.block.IRSB(b"\x0f\x0b", 1, pyvex.ARCH_X86)
 40 |     assert str(b).strip() == basic_goal.strip()
 41 | 
 42 | 
 43 | def test_embedded():
 44 |     b = pyvex.block.IRSB(b"\x50" * 3 + b"\x0f\x0b" + b"\x50" * 6, 1, pyvex.ARCH_X86)
 45 |     for i, stmt in enumerate(b.statements):
 46 |         if type(stmt) is pyvex.stmt.IMark and stmt.addr == 0x4 and stmt.len == 2 and stmt.delta == 0:
 47 |             imaginary_trans_stmt = b.statements[i + 1]
 48 |             assert type(imaginary_trans_stmt) is pyvex.stmt.WrTmp
 49 |             addexpr = imaginary_trans_stmt.data
 50 |             assert type(addexpr) is pyvex.expr.Binop
 51 |             assert addexpr.op == "Iop_Add27"
 52 |             arg1, arg2 = addexpr.args
 53 |             assert type(arg1) is pyvex.expr.Const
 54 |             assert arg1.con.value == 10
 55 |             assert type(arg2) is pyvex.expr.Const
 56 |             assert arg2.con.value == 20
 57 |             return
 58 |     assert False, "Could not find matching IMark"
 59 | 
 60 | 
 61 | class Instruction_MSR(Instruction):
 62 |     bin_format = bin(0x8808F380)[2:].zfill(32)
 63 |     name = "MSR.W"
 64 | 
 65 |     def compute_result(self):
 66 |         a = self.constant(10, Type.int_27)
 67 |         b = self.constant(20, Type.int_27)
 68 |         a + b
 69 | 
 70 | 
 71 | class Instruction_CPSIEI(Instruction):
 72 |     bin_format = bin(0xB662)[2:].zfill(16)
 73 |     name = "CPSIE I"
 74 | 
 75 |     def compute_result(self):
 76 |         a = self.constant(10, Type.int_27)
 77 |         b = self.constant(20, Type.int_27)
 78 |         a + b
 79 | 
 80 | 
 81 | class Instruction_CPSIEF(Instruction):
 82 |     bin_format = bin(0xB661)[2:].zfill(16)
 83 |     name = "CPSIE F"
 84 | 
 85 |     def compute_result(self):
 86 |         a = self.constant(10, Type.int_27)
 87 |         b = self.constant(20, Type.int_27)
 88 |         a + b
 89 | 
 90 | 
 91 | class CortexSpotter(GymratLifter):
 92 |     instrs = [Instruction_MSR, Instruction_CPSIEI, Instruction_CPSIEF]
 93 | 
 94 | 
 95 | register(CortexSpotter, "ARMEL")
 96 | 
 97 | 
 98 | def test_tmrs():
 99 |     arch = pyvex.ARCH_ARM_LE
100 |     ins = b"\xef\xf3\x08\x82"
101 |     b = pyvex.block.IRSB(ins, 1, arch)
102 |     assert b.jumpkind == "Ijk_Boring"
103 |     assert isinstance(b.statements[1].data, pyvex.expr.Get)
104 |     assert arch.translate_register_name(b.statements[1].data.offset) in ["sp", "r13"]
105 |     assert isinstance(b.statements[2], pyvex.stmt.Put)
106 | 
107 | 
108 | def test_tmsr():
109 |     arch = pyvex.ARCH_ARM_LE
110 |     inss = b"\x82\xf3\x08\x88"
111 |     b = pyvex.block.IRSB(inss, 1, arch, opt_level=3)
112 |     assert b.jumpkind == "Ijk_Boring"
113 |     assert isinstance(b.statements[1].data, pyvex.expr.Get)
114 |     assert arch.translate_register_name(b.statements[1].data.offset) == "r2"
115 |     assert isinstance(b.statements[2], pyvex.stmt.Put)
116 | 
117 | 
118 | if __name__ == "__main__":
119 |     test_basic()
120 |     test_embedded()
121 |     test_tmrs()
122 |     test_tmsr()
123 | 


--------------------------------------------------------------------------------
/tests/test_ud2.py:
--------------------------------------------------------------------------------
 1 | import pyvex
 2 | 
 3 | 
 4 | def test_ud2():
 5 |     # On x86 and amd64, ud2 is a valid 2-byte instruction that means "undefined instruction". Upon decoding a basic
 6 |     # block that ends with ud2, we should treat it as an explicit NoDecode, instead of skipping the instruction and
 7 |     # resume lifting.
 8 | 
 9 |     b = pyvex.block.IRSB(b"\x90\x90\x0f\x0b\x90\x90", 0x20, pyvex.ARCH_AMD64)
10 |     assert b.jumpkind == "Ijk_NoDecode"
11 |     assert b.next.con.value == 0x22
12 |     assert b.size == 4
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     test_ud2()
17 | 


--------------------------------------------------------------------------------