├── src └── bytecode │ ├── py.typed │ ├── utils.py │ ├── __init__.py │ ├── flags.py │ └── bytecode.py ├── doc ├── requirements.txt ├── index.rst ├── todo.rst ├── byteplay_codetransformer.rst ├── usage.rst ├── Makefile ├── make.bat ├── cfg.rst ├── conf.py └── changelog.rst ├── codecov.yml ├── MANIFEST.in ├── .github ├── dependabot.yml ├── FUNDING.yml └── workflows │ ├── docs.yml │ ├── frameworks.yml │ ├── cis.yml │ └── release.yml ├── TODO.rst ├── scripts └── frameworks │ └── boto3 │ ├── run.sh │ └── setup.sh ├── .gitignore ├── .pre-commit-config.yaml ├── .coveragerc ├── tests ├── util_annotation.py ├── cell_free_vars_cases.py ├── test_code.py ├── frameworks │ ├── sitecustomize.py │ ├── function.py │ └── module.py ├── __init__.py ├── test_flags.py ├── exception_handling_cases.py ├── long_lines_example.py ├── test_misc.py └── test_instr.py ├── .readthedocs.yaml ├── tox.ini ├── COPYING ├── README.rst └── pyproject.toml /src/bytecode/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx>=4 2 | sphinx-rtd-theme>=1 3 | sphinx-tabs -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # .codecov.yml: 2 | coverage: 3 | fixes: 4 | - "__init__.py::bytecode/__init__.py" 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include COPYING 2 | include MANIFEST.in 3 | include README.rst 4 | include tox.ini 5 | 6 | include doc/conf.py doc/make.bat doc/Makefile 7 | include doc/*.rst 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" -------------------------------------------------------------------------------- /TODO.rst: -------------------------------------------------------------------------------- 1 | Python 3.12 support 2 | =================== 3 | 4 | - LOAD_ATTR changes follow changes made to LOAD_GLOBAL 5 | - update tests 6 | 7 | * ConcreteBytecode.to_code(): better error reporting on bugs in the code 8 | -------------------------------------------------------------------------------- /src/bytecode/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Final 3 | 4 | PY312: Final[bool] = sys.version_info >= (3, 12) 5 | PY313: Final[bool] = sys.version_info >= (3, 13) 6 | PY314: Final[bool] = sys.version_info >= (3, 14) 7 | -------------------------------------------------------------------------------- /scripts/frameworks/boto3/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | 3 | set -e 4 | set -u 5 | 6 | PREFIX=${1}-${2} 7 | PY=${2} 8 | 9 | cd ${PREFIX}/boto3 10 | source ${PREFIX}/.venv/bin/activate 11 | python scripts/ci/run-tests 12 | deactivate 13 | cd - 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *.swp 3 | MANIFEST 4 | build 5 | dist 6 | 7 | # generated by setuptools-scm 8 | src/bytecode/version.py 9 | 10 | # generated by tox 11 | .tox/ 12 | bytecode.egg-info/ 13 | 14 | .mypy_cache 15 | .dmypy.json 16 | .spyproject 17 | .idea/ 18 | .vscode/ 19 | .coverage 20 | coverage.xml 21 | .pytest_cache 22 | .cache 23 | .venv 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | default_language_version: 3 | python: python3.11 4 | repos: 5 | - repo: https://github.com/astral-sh/ruff-pre-commit 6 | # Ruff version. 7 | rev: v0.1.5 8 | hooks: 9 | # Run the linter. 10 | - id: ruff 11 | # Run the formatter. 12 | - id: ruff-format 13 | - repo: https://github.com/pre-commit/mirrors-mypy 14 | rev: v1.7.0 15 | hooks: 16 | - id: mypy 17 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | [paths] 5 | source = 6 | src 7 | */site-packages 8 | 9 | [report] 10 | # Regexes for lines to exclude from consideration 11 | exclude_lines = 12 | # Have to re-enable the standard pragma 13 | pragma: no cover 14 | 15 | # Don't complain if tests don't hit defensive assertion code: 16 | raise NotImplementedError 17 | pass 18 | 19 | # Don't complain about ellipsis in overload 20 | \.\.\. 21 | -------------------------------------------------------------------------------- /scripts/frameworks/boto3/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | 3 | set -e 4 | set -u 5 | 6 | PREFIX=${1}-${2} 7 | PY=${2} 8 | 9 | # Clone boto3 10 | test -d ${PREFIX}/boto3 || git clone --depth=1 https://github.com/boto/boto3.git ${PREFIX}/boto3 11 | 12 | # Create venv 13 | python$PY -m venv ${PREFIX}/.venv 14 | source ${PREFIX}/.venv/bin/activate 15 | 16 | # Install bytecode 17 | pip install setuptools wheel 18 | pip install -e . 19 | 20 | # Install dependencies 21 | cd ${PREFIX}/boto3 22 | python scripts/ci/install 23 | cd - 24 | 25 | deactivate 26 | -------------------------------------------------------------------------------- /tests/util_annotation.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import textwrap 4 | import types 5 | 6 | 7 | def get_code(source, *, filename="", function=False): 8 | source = textwrap.dedent(source).strip() 9 | code = compile(source, filename, "exec") 10 | if function: 11 | sub_code = [ 12 | const for const in code.co_consts if isinstance(const, types.CodeType) 13 | ] 14 | if len(sub_code) != 1: 15 | raise ValueError("unable to find function code") 16 | code = sub_code[0] 17 | return code 18 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [MatthieuDartiailh] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.9" 13 | 14 | # Build documentation in the docs/source directory with Sphinx 15 | sphinx: 16 | configuration: doc/conf.py 17 | 18 | # Enable epub output 19 | formats: 20 | - epub 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | install: 25 | - requirements: doc/requirements.txt 26 | - method: pip 27 | path: . 28 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3, py38, py39, py310, py311, py312, py313, py314, fmt, docs 3 | isolated_build = true 4 | 5 | [testenv] 6 | deps= 7 | pytest 8 | pytest-cov 9 | pytest-subtests 10 | commands = pytest --cov bytecode --cov-report=xml -v tests 11 | 12 | [testenv:fmt] 13 | basepython = python3 14 | deps= 15 | ruff check 16 | commands = 17 | ruff src/bytecode tests 18 | ruff format --check src/bytecode tests 19 | 20 | [testenv:lint] 21 | basepython = python3 22 | deps= 23 | ruff 24 | mypy 25 | pytest 26 | commands = 27 | ruff check src/bytecode tests 28 | ruff format --check src/bytecode tests 29 | mypy src tests 30 | 31 | [testenv:docs] 32 | basepython = python3 33 | deps= 34 | -r doc/requirements.txt 35 | commands = 36 | pip install . 37 | sphinx-build doc docs_output -W -b html 38 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Documentation building 2 | on: 3 | schedule: 4 | - cron: "0 0 * * 3" 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | paths: 12 | - .github/workflows/docs.yml 13 | - "src/**" 14 | - "doc/**" 15 | - pyproject.toml 16 | 17 | jobs: 18 | docs: 19 | name: Docs building 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v6 23 | - name: Get history and tags for SCM versioning to work 24 | run: | 25 | git fetch --prune --unshallow 26 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 27 | - name: Set up Python 28 | uses: actions/setup-python@v6 29 | with: 30 | python-version: '3.x' 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | python -m pip install tox 35 | - name: Build documentation 36 | env: 37 | TOXENV: docs 38 | run: | 39 | tox 40 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | ******** 2 | bytecode 3 | ******** 4 | 5 | ``bytecode`` is a Python module to generate and modify bytecode. 6 | 7 | * `bytecode project homepage at GitHub 8 | `_ (code, bugs) 9 | * `bytecode documentation 10 | `_ (this documentation) 11 | * `Download latest bytecode release at the Python Cheeseshop (PyPI) 12 | `_ 13 | 14 | 15 | Table Of Contents 16 | ================= 17 | 18 | .. toctree:: 19 | :maxdepth: 3 20 | 21 | usage 22 | cfg 23 | api 24 | byteplay_codetransformer 25 | changelog 26 | todo 27 | 28 | 29 | See also 30 | ======== 31 | 32 | * `codetransformer 33 | `_ 34 | * `byteplay 35 | `_ 36 | * `byteasm 37 | `_: an "assembler" for Python 3 38 | bytecodes. 39 | * `BytecodeAssembler `_ 40 | * `PEP 511 -- API for code transformers 41 | `_ 42 | 43 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright Contributors to the bytecode project. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/frameworks.yml: -------------------------------------------------------------------------------- 1 | name: Frameworks tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | paths: 11 | - .github/workflows/cis.yml 12 | - "src/**" 13 | - "tests/frameworks/*" 14 | - pyproject.toml 15 | - tox.ini 16 | 17 | jobs: 18 | boto3: 19 | name: boto3 with Python ${{ matrix.python-version }} 20 | runs-on: ubuntu-latest 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | python-version: ["3.11", "3.12", "3.13", "3.14"] 25 | 26 | steps: 27 | - uses: actions/checkout@v6 28 | with: 29 | fetch-depth: 0 30 | 31 | - name: Set up Python 32 | uses: actions/setup-python@v6 33 | with: 34 | python-version: ${{ matrix.python-version }}-dev 35 | 36 | - name: Setup 37 | run: bash scripts/frameworks/boto3/setup.sh /tmp/boto3 ${{ matrix.python-version }} 38 | 39 | - name: Run 40 | env: 41 | PYTHONPATH: ${{ github.workspace }}/tests/frameworks/ 42 | run: bash scripts/frameworks/boto3/run.sh /tmp/boto3 ${{ matrix.python-version }} 43 | -------------------------------------------------------------------------------- /doc/todo.rst: -------------------------------------------------------------------------------- 1 | TODO list 2 | ========= 3 | 4 | * Remove Bytecode.cellvars and Bytecode.freevars? 5 | * Remove Bytecode.first_lineno? Compute it on conversions. 6 | * Add instruction constants/enums? Example:: 7 | 8 | from bytecode import instructions as i 9 | 10 | bytecode = Bytecode([i.LOAD_NAME('print'), 11 | i.LOAD_CONST('Hello World!'), 12 | i.CALL_FUNCTION(1), 13 | i.POP_TOP(), 14 | i.LOAD_CONST(None), 15 | i.RETURN_VALUE()]) 16 | 17 | Should we support instructions without parenthesis for instruction with no 18 | parameter? Example with POP_TOP and RETURN_VALUE:: 19 | 20 | from bytecode import instructions as i 21 | 22 | bytecode = Bytecode([i.LOAD_NAME('print'), 23 | i.LOAD_CONST('Hello World!'), 24 | i.CALL_FUNCTION(1), 25 | i.POP_TOP, 26 | i.LOAD_CONST(None), 27 | i.RETURN_VALUE]) 28 | 29 | 30 | * Nicer API for function arguments in bytecode object? Bytecode has argcount, 31 | kwonlyargcount and argnames. 4 types of parameters: indexed, ``*args``, 32 | ``**kwargs`` and ``*, kwonly=3``. See inspect.signature() 33 | -------------------------------------------------------------------------------- /.github/workflows/cis.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | on: 3 | schedule: 4 | - cron: "0 0 * * 3" 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | paths: 12 | - .github/workflows/cis.yml 13 | - "src/**" 14 | - "tests/*" 15 | - pyproject.toml 16 | - tox.ini 17 | 18 | jobs: 19 | lint: 20 | name: Lint code 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v6 24 | - name: Set up Python 25 | uses: actions/setup-python@v6 26 | with: 27 | python-version: "3.12" 28 | - name: Install tools 29 | run: | 30 | python -m pip install --upgrade pip 31 | python -m pip install tox 32 | - name: Linting 33 | env: 34 | TOXENV: lint 35 | run: | 36 | tox 37 | 38 | tests: 39 | name: Unit tests 40 | runs-on: ubuntu-latest 41 | strategy: 42 | fail-fast: false 43 | matrix: 44 | include: 45 | - python-version: "3.11" 46 | toxenv: py311 47 | - python-version: "3.12" 48 | toxenv: py312 49 | - python-version: "3.13" 50 | toxenv: py313 51 | - python-version: "3.14" 52 | toxenv: py314 53 | steps: 54 | - uses: actions/checkout@v6 55 | - name: Get history and tags for SCM versioning to work 56 | run: | 57 | git fetch --prune --unshallow 58 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 59 | - name: Set up Python ${{ matrix.python-version }} 60 | uses: actions/setup-python@v6 61 | with: 62 | python-version: ${{ matrix.python-version }} 63 | - name: Install dependencies 64 | run: | 65 | python -m pip install --upgrade pip 66 | python -m pip install tox 67 | - name: Test 68 | env: 69 | TOXENV: ${{ matrix.toxenv }} 70 | run: | 71 | tox 72 | - name: Upload coverage to Codecov 73 | uses: codecov/codecov-action@v5 74 | if: github.event_name != 'schedule' 75 | with: 76 | token: ${{ secrets.CODECOV_TOKEN }} 77 | name: codecov-umbrella 78 | fail_ci_if_error: true 79 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ******** 2 | bytecode 3 | ******** 4 | 5 | .. image:: https://img.shields.io/pypi/v/bytecode.svg 6 | :alt: Latest release on the Python Cheeseshop (PyPI) 7 | :target: https://pypi.python.org/pypi/bytecode 8 | 9 | .. image:: https://github.com/MatthieuDartiailh/bytecode/workflows/Continuous%20Integration/badge.svg 10 | :target: https://github.com/MatthieuDartiailh/bytecode/actions 11 | :alt: Continuous integration 12 | 13 | .. image:: https://github.com/MatthieuDartiailh/bytecode/workflows/Documentation%20building/badge.svg 14 | :target: https://github.com/MatthieuDartiailh/bytecode/actions 15 | :alt: Documentation building 16 | 17 | .. image:: https://img.shields.io/codecov/c/github/MatthieuDartiailh/bytecode/master.svg 18 | :alt: Code coverage of bytecode on codecov.io 19 | :target: https://codecov.io/github/MatthieuDartiailh/bytecode 20 | 21 | .. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json 22 | :target: https://github.com/astral-sh/ruff 23 | :alt: Ruff 24 | 25 | ``bytecode`` is a Python module to generate and modify bytecode. 26 | 27 | * `bytecode project homepage at GitHub 28 | `_ (code, bugs) 29 | * `bytecode documentation 30 | `_ 31 | * `Download latest bytecode release at the Python Cheeseshop (PyPI) 32 | `_ 33 | 34 | Install bytecode: ``python3 -m pip install bytecode``. It requires Python 3.8 35 | or newer. The latest release that supports Python 3.7 and 3.6 is 0.13.0. 36 | The latest release that supports Python 3.5 is 0.12.0. For Python 2.7 support, 37 | have a look at `dead-bytecode `_ 38 | instead. 39 | 40 | Example executing ``print('Hello World!')``: 41 | 42 | .. code:: python 43 | 44 | from bytecode import Instr, Bytecode 45 | 46 | bytecode = Bytecode([Instr("LOAD_GLOBAL", (True, 'print')), 47 | Instr("LOAD_CONST", 'Hello World!'), 48 | Instr("CALL", 1), 49 | Instr("POP_TOP"), 50 | Instr("LOAD_CONST", None), 51 | Instr("RETURN_VALUE")]) 52 | code = bytecode.to_code() 53 | exec(code) 54 | -------------------------------------------------------------------------------- /tests/cell_free_vars_cases.py: -------------------------------------------------------------------------------- 1 | # Function making heavy use of cell and free vars to test bytecode round tripping 2 | # capabilities. 3 | 4 | 5 | def simple_cellvar(): # a cellvar in f 6 | a = 1 7 | 8 | def g(): # a freevar in g 9 | return a 10 | 11 | return g 12 | 13 | 14 | def cellvar_share_name(a=1): # a cellvar in f, but stored as varname 15 | def g(): # a freevar in g 16 | return a 17 | 18 | return g 19 | 20 | 21 | def cellvar_shared_and_unshared(a=1): # a, b cellvar in f, but a stored as varname 22 | b = 1 23 | 24 | def g(): # a, b freevar in g 25 | return a + b 26 | 27 | return g 28 | 29 | 30 | class A: 31 | a = 1 32 | 33 | def f(self): 34 | return 1 35 | 36 | 37 | def class_loadderef(): 38 | a = 1 39 | 40 | class B(A): 41 | b = a 42 | 43 | return B.b 44 | 45 | 46 | # NOTE aliasing super such that there is no LOAD_GLOBAL super cause the omission of 47 | # the required implicit __class__ cell which breaks the subsequent call 48 | # Under Python 3.11 the creation of cellvars is made explicit by MAKE_CELL 49 | 50 | 51 | def class_super(): 52 | class B(A): 53 | def f(self): 54 | super().f() 55 | 56 | return B().f 57 | 58 | 59 | def test_freevar(): 60 | class Foo: 61 | r = 0 62 | 63 | @classmethod 64 | def bar(cls, k): 65 | class Snafu(k): 66 | def do_debug(self, arg): 67 | cls.r += 1 68 | return super().d(arg) 69 | 70 | return Snafu 71 | 72 | 73 | # NOTE this is not really a cell var case but it ensures proper 74 | # placements of CACHE vs labels 75 | _localedirs: dict = {} 76 | _default_localedir = "" 77 | 78 | 79 | def bindtextdomain(domain="", localedir=None): 80 | global _localedirs 81 | if localedir is not None: 82 | _localedirs[domain] = localedir 83 | return _localedirs.get(domain, _default_localedir) 84 | 85 | 86 | TEST_CASES = [ 87 | simple_cellvar, 88 | cellvar_share_name, 89 | cellvar_shared_and_unshared, 90 | class_super, 91 | class_loadderef, 92 | bindtextdomain, 93 | test_freevar, 94 | ] 95 | 96 | if __name__ == "__main__": 97 | import dis 98 | import inspect 99 | 100 | for f in TEST_CASES: 101 | print("--------------------------------------------------------------") 102 | for line in inspect.getsourcelines(f)[0]: # type: ignore 103 | print(line.rstrip()) 104 | print() 105 | dis.dis(f.__code__) 106 | print() 107 | -------------------------------------------------------------------------------- /tests/test_code.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from bytecode import Bytecode, ConcreteBytecode, ControlFlowGraph 4 | 5 | from . import TestCase, get_code 6 | 7 | 8 | class CodeTests(TestCase): 9 | """Check that bytecode.from_code(code).to_code() returns code.""" 10 | 11 | def check(self, source, function=False): 12 | ref_code = get_code(source, function=function) 13 | 14 | code = ConcreteBytecode.from_code(ref_code).to_code() 15 | self.assertCodeObjectEqual(ref_code, code) 16 | 17 | code = Bytecode.from_code(ref_code).to_code() 18 | self.assertCodeObjectEqual(ref_code, code) 19 | 20 | bytecode = Bytecode.from_code(ref_code) 21 | blocks = ControlFlowGraph.from_bytecode(bytecode) 22 | code = blocks.to_bytecode().to_code() 23 | self.assertCodeObjectEqual(ref_code, code) 24 | 25 | def test_loop(self): 26 | self.check( 27 | """ 28 | for x in range(1, 10): 29 | x += 1 30 | if x == 3: 31 | continue 32 | x -= 1 33 | if x > 7: 34 | break 35 | x = 0 36 | print(x) 37 | """ 38 | ) 39 | 40 | def test_varargs(self): 41 | self.check( 42 | """ 43 | def func(a, b, *varargs): 44 | pass 45 | """, 46 | function=True, 47 | ) 48 | 49 | def test_kwargs(self): 50 | self.check( 51 | """ 52 | def func(a, b, **kwargs): 53 | pass 54 | """, 55 | function=True, 56 | ) 57 | 58 | def test_kwonlyargs(self): 59 | self.check( 60 | """ 61 | def func(*, arg, arg2): 62 | pass 63 | """, 64 | function=True, 65 | ) 66 | 67 | # Added because Python 3.10 added some special behavior with respect to 68 | # generators in term of stack size 69 | def test_generator_func(self): 70 | self.check( 71 | """ 72 | def func(arg, arg2): 73 | yield 74 | """, 75 | function=True, 76 | ) 77 | 78 | def test_async_func(self): 79 | self.check( 80 | """ 81 | async def func(arg, arg2): 82 | pass 83 | """, 84 | function=True, 85 | ) 86 | 87 | def test_async_gen(self): 88 | self.check( 89 | """ 90 | async def async_stream(): 91 | yield 42 92 | 93 | async def func(): 94 | async for _ in async_stream(): 95 | pass 96 | """, 97 | function=True, 98 | ) 99 | 100 | 101 | if __name__ == "__main__": 102 | unittest.main() # pragma: no cover 103 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "bytecode" 3 | description = "Python module to generate and modify bytecode" 4 | readme = "README.rst" 5 | requires-python = ">=3.11" 6 | license = { file = "COPYING" } 7 | authors = [{ name = "Victor Stinner", email = "victor.stinner@gmail.com" }] 8 | maintainers = [{ name = "Matthieu C. Dartiailh", email = "m.dartiailh@gmail.com" }] 9 | classifiers = [ 10 | "Development Status :: 4 - Beta", 11 | "Intended Audience :: Developers", 12 | "License :: OSI Approved :: MIT License", 13 | "Natural Language :: English", 14 | "Operating System :: OS Independent", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.11", 17 | "Programming Language :: Python :: 3.12", 18 | "Programming Language :: Python :: 3.13", 19 | "Programming Language :: Python :: 3.14", 20 | "Topic :: Software Development :: Libraries :: Python Modules", 21 | ] 22 | dependencies = ["typing_extensions;python_version<'3.10'"] 23 | dynamic = ["version"] 24 | 25 | 26 | [project.urls] 27 | homepage = "https://github.com/MatthieuDartiailh/bytecode" 28 | documentation = "https://bytecode.readthedocs.io/en/latest/" 29 | repository = "https://github.com/MatthieuDartiailh/bytecode" 30 | changelog = "https://github.com/MatthieuDartiailh/bytecode/blob/main/doc/changelog.rst" 31 | 32 | 33 | [build-system] 34 | requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"] 35 | build-backend = "setuptools.build_meta" 36 | 37 | [dependency-groups] 38 | dev = [ 39 | "mypy>=1.16.1", 40 | "pytest>=8", 41 | "pytest-cov>=6", 42 | "ruff>=0.12.0", 43 | ] 44 | test = [ 45 | "pytest>=8", 46 | "pytest-cov", 47 | ] 48 | 49 | [tool.setuptools_scm] 50 | write_to = "src/bytecode/version.py" 51 | write_to_template = """ 52 | # This file is auto-generated by setuptools-scm do NOT edit it. 53 | 54 | from collections import namedtuple 55 | 56 | #: A namedtuple of the version info for the current release. 57 | _version_info = namedtuple("_version_info", "major minor micro status") 58 | 59 | parts = "{version}".split(".", 3) 60 | version_info = _version_info( 61 | int(parts[0]), 62 | int(parts[1]), 63 | int(parts[2]), 64 | parts[3] if len(parts) == 4 else "", 65 | ) 66 | 67 | # Remove everything but the 'version_info' from this module. 68 | del namedtuple, _version_info, parts 69 | 70 | __version__ = "{version}" 71 | """ 72 | 73 | [tool.ruff] 74 | src = ["src"] 75 | extend-exclude = ["tests/instruments/hardware/nifpga/scope_based"] 76 | line-length = 88 77 | 78 | [tool.ruff.lint] 79 | select = ["B", "C", "E", "F", "W", "B9", "I", "C90", "RUF"] 80 | extend-ignore = ["E203", "E266", "E501", "F403", "F401", "RUF012"] 81 | 82 | [tool.ruff.lint.isort] 83 | combine-as-imports = true 84 | extra-standard-library = ["opcode"] 85 | 86 | [tool.ruff.lint.mccabe] 87 | max-complexity = 43 88 | 89 | [tool.mypy] 90 | follow_imports = "normal" 91 | strict_optional = true 92 | 93 | [tool.pytest.ini_options] 94 | minversion = "6.0" 95 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Build and upload wheels 2 | on: 3 | workflow_dispatch: 4 | schedule: 5 | - cron: '0 0 * * 3' 6 | push: 7 | tags: 8 | - '*' 9 | 10 | jobs: 11 | build_sdist: 12 | name: Build sdist 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v6 17 | - name: Get history and tags for SCM versioning to work 18 | run: | 19 | git fetch --prune --unshallow 20 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 21 | - name: Setup Python 22 | uses: actions/setup-python@v6 23 | with: 24 | python-version: '3.x' 25 | - name: Build sdist 26 | run: | 27 | pip install --upgrade pip 28 | pip install wheel build 29 | python -m build . -s 30 | - name: Test sdist 31 | run: | 32 | pip install pytest 33 | pip install dist/*.tar.gz 34 | python -X dev -m pytest tests 35 | - name: Store artifacts 36 | uses: actions/upload-artifact@v6 37 | with: 38 | name: cibw-sdist 39 | path: dist/* 40 | 41 | build_wheel: 42 | name: Build wheel 43 | runs-on: ubuntu-latest 44 | steps: 45 | - name: Checkout 46 | uses: actions/checkout@v6 47 | - name: Get history and tags for SCM versioning to work 48 | run: | 49 | git fetch --prune --unshallow 50 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 51 | - name: Setup Python 52 | uses: actions/setup-python@v6 53 | with: 54 | python-version: '3.x' 55 | - name: Build wheels 56 | run: | 57 | pip install --upgrade pip 58 | pip install wheel build 59 | python -m build . -w 60 | - name: Test wheel 61 | run: | 62 | pip install pytest 63 | pip install dist/*.whl 64 | python -X dev -m pytest tests 65 | - name: Store artifacts 66 | uses: actions/upload-artifact@v6 67 | with: 68 | name: cibw-wheel 69 | path: dist/*.whl 70 | 71 | publish: 72 | if: github.event_name == 'push' 73 | needs: [build_wheel, build_sdist] 74 | runs-on: ubuntu-latest 75 | environment: 76 | name: pypi 77 | url: https://pypi.org/p/bytecode 78 | permissions: 79 | id-token: write 80 | steps: 81 | - name: Download all the dists 82 | uses: actions/download-artifact@v7.0.0 83 | with: 84 | pattern: cibw-* 85 | path: dist 86 | merge-multiple: true 87 | 88 | - uses: pypa/gh-action-pypi-publish@release/v1 89 | 90 | github-release: 91 | name: >- 92 | Sign the Python 🐍 distribution 📦 with Sigstore 93 | and create a GitHub Release 94 | runs-on: ubuntu-latest 95 | needs: 96 | - publish 97 | 98 | permissions: 99 | contents: write 100 | id-token: write 101 | 102 | steps: 103 | - name: Download all the dists 104 | uses: actions/download-artifact@v7.0.0 105 | with: 106 | pattern: cibw-* 107 | path: dist 108 | merge-multiple: true 109 | - name: Sign the dists with Sigstore 110 | uses: sigstore/gh-action-sigstore-python@v3.2.0 111 | with: 112 | inputs: >- 113 | ./dist/*.tar.gz 114 | ./dist/*.whl 115 | - name: Create GitHub Release 116 | env: 117 | GITHUB_TOKEN: ${{ github.token }} 118 | run: >- 119 | gh release create 120 | '${{ github.ref_name }}' 121 | --repo '${{ github.repository }}' 122 | --generate-notes 123 | - name: Upload artifact signatures to GitHub Release 124 | env: 125 | GITHUB_TOKEN: ${{ github.token }} 126 | run: >- 127 | gh release upload 128 | '${{ github.ref_name }}' dist/** 129 | --repo '${{ github.repository }}' -------------------------------------------------------------------------------- /tests/frameworks/sitecustomize.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import dis 3 | import io 4 | from datetime import timedelta 5 | from time import monotonic as time 6 | from types import CodeType, ModuleType 7 | 8 | from module import BaseModuleWatchdog # type: ignore 9 | 10 | from bytecode import Bytecode, ControlFlowGraph, Instr 11 | 12 | _original_exec = exec 13 | 14 | 15 | def dump_last_traceback_frame(exc, file=None): 16 | tb = exc.__traceback__ 17 | # Get the last frame. This is where we expect the most useful debugging 18 | # information to be 19 | while tb.tb_next is not None: 20 | tb = tb.tb_next 21 | 22 | # Inspect the locals 23 | _locals = tb.tb_frame.f_locals 24 | if w := max(len(_) for _ in _locals) + 2 if _locals else 0 > 0: 25 | print(title := " Locals from last frame ".center(w * 2, "="), file=file) 26 | for name, value in _locals.items(): 27 | print(f"{name:>{w}} = {value}", file=file) 28 | print("=" * len(title), file=file) 29 | 30 | 31 | class BytecodeError(Exception): 32 | def __init__(self, message, code, exc=None): 33 | stream = io.StringIO() 34 | print(message, file=stream) 35 | if exc is not None: 36 | dump_last_traceback_frame(exc, file=stream) 37 | dis.dis(code, file=stream, depth=0, show_caches=True) 38 | super().__init__(stream.getvalue()) 39 | 40 | 41 | class ModuleCodeCollector(BaseModuleWatchdog): 42 | def __init__(self): 43 | super().__init__() 44 | 45 | # Count how many code objects we've recompiled 46 | self.count = 0 47 | self.stopwatch = 0 48 | 49 | # Replace the built-in exec function with our own in the pytest globals 50 | try: 51 | import _pytest.assertion.rewrite as par 52 | 53 | par.exec = self._exec 54 | except ImportError: 55 | pass 56 | 57 | def transform( 58 | self, code: CodeType, _module: ModuleType, root: bool = True 59 | ) -> CodeType: 60 | # Round-trip the code object through the library 61 | try: 62 | start = time() 63 | 64 | abstract_code = Bytecode.from_code(code) 65 | except Exception as e: 66 | msg = f"Failed to convert {code} from {_module} into abstract code" 67 | raise BytecodeError(msg, code, e) from e 68 | 69 | try: 70 | for instr in abstract_code: 71 | if isinstance(instr, Instr) and isinstance(instr.arg, CodeType): 72 | instr.arg = self.transform(instr.arg, _module, root=False) 73 | 74 | cfg = ControlFlowGraph.from_bytecode(abstract_code) 75 | 76 | recompiled_code = cfg.to_code() 77 | 78 | # Check we can still disassemble the code 79 | dis.dis(recompiled_code, file=io.StringIO()) 80 | 81 | if root: 82 | # Only time the root code objects because of the recursion 83 | self.stopwatch += time() - start 84 | 85 | self.count += 1 86 | 87 | return recompiled_code 88 | except Exception as e: 89 | msg = f"Failed to recompile {code} from {_module}" 90 | raise BytecodeError(msg, code, e) from e 91 | 92 | def after_import(self, _module: ModuleType) -> None: 93 | pass 94 | 95 | def _exec(self, _object, _globals=None, _locals=None, **kwargs): 96 | # The pytest module loader doesn't implement a get_code method so we 97 | # need to intercept the loading of test modules by wrapping around the 98 | # exec built-in function. 99 | new_object = ( 100 | self.transform(_object, None) 101 | if isinstance(_object, CodeType) and _object.co_name == "" 102 | else _object 103 | ) 104 | 105 | # Execute the module before calling the after_import hook 106 | _original_exec(new_object, _globals, _locals, **kwargs) 107 | 108 | @classmethod 109 | def uninstall(cls) -> None: 110 | # Restore the original exec function 111 | try: 112 | import _pytest.assertion.rewrite as par 113 | 114 | par.exec = _original_exec # type: ignore 115 | except ImportError: 116 | pass 117 | 118 | # Proof of work 119 | print( 120 | f"Recompiled {cls._instance.count} code objects in {timedelta(seconds=cls._instance.stopwatch)}" 121 | ) 122 | 123 | return super().uninstall() 124 | 125 | 126 | print("Collecting module code objects") 127 | ModuleCodeCollector.install() 128 | 129 | 130 | @atexit.register 131 | def _(): 132 | ModuleCodeCollector.uninstall() 133 | -------------------------------------------------------------------------------- /doc/byteplay_codetransformer.rst: -------------------------------------------------------------------------------- 1 | ++++++++++++++++++++++++++++++++++++++++++++ 2 | Comparison with byteplay and codetransformer 3 | ++++++++++++++++++++++++++++++++++++++++++++ 4 | 5 | History of the bytecode API design 6 | ================================== 7 | 8 | The design of the bytecode module started with a single use case: reimplement 9 | the CPython peephole optimizer (implemented in C) in pure Python. The design of 10 | the API required many iterations to get the current API. 11 | 12 | bytecode now has a clear separation between concrete instructions using integer 13 | arguments and abstract instructions which use Python objects for arguments. 14 | Jump targets are labels or basic blocks. And the control flow graph abstraction 15 | is now an API well separated from the regular abstract bytecode which is a 16 | simple list of instructions. 17 | 18 | 19 | byteplay and codetransformer 20 | ============================ 21 | 22 | The `byteplay `_ and `codetransformer 23 | `_ are clear inspiration for the 24 | design of the bytecode API. Sadly, byteplay and codetransformer API have design 25 | issues (at least for my specific use cases). 26 | 27 | 28 | Free and cell variables 29 | ----------------------- 30 | 31 | Converting a code object to bytecode and then back to code must not modify the 32 | code object. It is an important requirement. 33 | 34 | The LOAD_DEREF instruction supports free variables and cell variables. byteplay 35 | and codetransformer use a simple string for the variable name. When the 36 | bytecode is converted to a code object, they check if the variable is a free 37 | variable, or fallback to a cell variable. 38 | 39 | The CPython code base contains a corner case: code having a free variable and a 40 | cell variable with the same name. The heuristic produces invalid code which 41 | can lead to a crash. 42 | 43 | bytecode uses :class:`FreeVar` and :class:`CellVar` classes to tag the type of 44 | the variable. Trying to use a simple string raise a :exc:`TypeError` in the 45 | :class:`Instr` constructor. 46 | 47 | .. note:: 48 | It's possible to fix this issue in byteplay and codetransformer, maybe even 49 | with keeping support for simple string for free/cell variables for backward 50 | compatibility. 51 | 52 | 53 | Line numbers 54 | ------------ 55 | 56 | codetransformer uses internally a dictionary mapping offsets to line numbers. 57 | It is updated when the ``.steal()`` method is used. 58 | 59 | byteplay uses a pseudo-instruction ``SetLineno`` to set the current line number 60 | of the following instructions. It requires to handle these pseudo-instructions 61 | when you modify the bytecode, especially when instructions are moved. 62 | 63 | In FAT Python, some optimizations move instructions but their line numbers must 64 | be kept. That's also why Python 3.6 was modified to support negative line 65 | number delta in ``code.co_lntotab``. 66 | 67 | bytecode has a different design: line numbers are stored directly inside 68 | instructions (:attr:`Instr.lineno` attribute). Moving an instruction keeps 69 | the line number information by design. 70 | 71 | bytecode also supports the pseudo-instruction :class:`SetLineno`. It was added 72 | to simplify functions emitting bytecode. It's not used when an existing code 73 | object is converted to bytecode. 74 | 75 | 76 | Jump targets 77 | ------------ 78 | 79 | In codetransformer, a jump target is an instruction. Jump targets are computed 80 | when the bytecode is converted to a code object. 81 | 82 | byteplay and bytecode use labels. Jump targets are computed when the abstract 83 | bytecode is converted to a code object. 84 | 85 | .. note:: 86 | A loop is need in the conversion from bytecode to code: if the jump target 87 | is larger than 2**16, the size of the jump instruction changes (from 3 to 6 88 | bytes). So other jump targets must be recomputed. 89 | 90 | bytecode handles this corner case. byteplay and codetransformer don't, but 91 | it should be easy to fix them. 92 | 93 | 94 | Control flow graph 95 | ------------------ 96 | 97 | The peephole optimizer has strong requirements on the control flow: an 98 | optimization must not modify two instructions which are part of two different 99 | basic blocks. Otherwise, the optimizer produces invalid code. 100 | 101 | bytecode provides a control flow graph API for this use case. 102 | 103 | byteplay and codetransformer don't. 104 | 105 | 106 | Functions or methods 107 | -------------------- 108 | 109 | This point is a matter of taste. 110 | 111 | In bytecode, instructions are objects with methods like 112 | :meth:`~Instr.is_final`, :meth:`~Instr.has_cond_jump`, etc. 113 | 114 | The byteplay project uses functions taking an instruction as parameter. 115 | -------------------------------------------------------------------------------- /doc/usage.rst: -------------------------------------------------------------------------------- 1 | ************** 2 | Bytecode Usage 3 | ************** 4 | 5 | Installation 6 | ============ 7 | 8 | Install bytecode:: 9 | 10 | python3 -m pip install bytecode 11 | 12 | ``bytecode`` requires Python 3.8 or newer. 13 | 14 | 15 | Hello World 16 | =========== 17 | 18 | Abstract bytecode 19 | ----------------- 20 | 21 | Example using abstract bytecode to execute ``print('Hello World!')``:: 22 | 23 | from bytecode import Instr, Bytecode 24 | 25 | bytecode = Bytecode([Instr("LOAD_GLOBAL", (True, 'print')), 26 | Instr("LOAD_CONST", 'Hello World!'), 27 | Instr("CALL", 1), 28 | Instr("POP_TOP"), 29 | Instr("LOAD_CONST", None), 30 | Instr("RETURN_VALUE")]) 31 | code = bytecode.to_code() 32 | exec(code) 33 | 34 | Output:: 35 | 36 | Hello World! 37 | 38 | 39 | Concrete bytecode 40 | ----------------- 41 | 42 | Example using concrete bytecode to execute ``print('Hello World!')``:: 43 | 44 | from bytecode import ConcreteInstr, ConcreteBytecode 45 | 46 | bytecode = ConcreteBytecode() 47 | bytecode.names = ['print'] 48 | bytecode.consts = ['Hello World!', None] 49 | bytecode.extend([ConcreteInstr("LOAD_GLOBAL", 1), 50 | ConcreteInstr("LOAD_CONST", 0), 51 | ConcreteInstr("CALL", 1), 52 | ConcreteInstr("POP_TOP"), 53 | ConcreteInstr("LOAD_CONST", 1), 54 | ConcreteInstr("RETURN_VALUE")]) 55 | code = bytecode.to_code() 56 | exec(code) 57 | 58 | Output:: 59 | 60 | Hello World! 61 | 62 | 63 | Setting the compiler flags 64 | -------------------------- 65 | 66 | Bytecode, ConcreteBytecode and ControlFlowGraph instances all have a flags 67 | attribute which is an instance of the CompilerFlag enum. The value can be 68 | manipulated like any binary flags. 69 | 70 | Setting the OPTIMIZED flag:: 71 | 72 | from bytecode import Bytecode, CompilerFlags 73 | 74 | bytecode = Bytecode() 75 | bytecode.flags |= CompilerFlags.OPTIMIZED 76 | 77 | Clearing the OPTIMIZED flag:: 78 | 79 | from bytecode import Bytecode, CompilerFlags 80 | 81 | bytecode = Bytecode() 82 | bytecode.flags ^= CompilerFlags.OPTIMIZED 83 | 84 | 85 | The flags can be updated based on the instructions stored in the code object 86 | using the method update_flags. 87 | 88 | 89 | Simple loop 90 | =========== 91 | 92 | Bytecode of ``for x in (1, 2, 3): print(x)``: 93 | 94 | .. tabs:: 95 | 96 | .. group-tab:: Python >= 3.8 97 | 98 | .. code:: python 99 | 100 | from bytecode import Label, Instr, Bytecode 101 | 102 | loop_start = Label() 103 | loop_done = Label() 104 | loop_exit = Label() 105 | code = Bytecode( 106 | [ 107 | # Python 3.8 removed SETUP_LOOP 108 | Instr("LOAD_CONST", (1, 2, 3)), 109 | Instr("GET_ITER"), 110 | loop_start, 111 | Instr("FOR_ITER", loop_exit), 112 | Instr("STORE_NAME", "x"), 113 | Instr("LOAD_GLOBAL", (True, "print")), 114 | Instr("LOAD_NAME", "x"), 115 | Instr("CALL", 1), 116 | Instr("POP_TOP"), 117 | Instr("JUMP_BACKWARD", loop_start), 118 | # Python 3.8 removed the need to manually manage blocks in loops 119 | # This is now handled internally by the interpreter 120 | loop_exit, 121 | Instr("END_FOR"), 122 | Instr("LOAD_CONST", None), 123 | Instr("RETURN_VALUE"), 124 | ] 125 | ) 126 | 127 | # The conversion to Python code object resolve jump targets: 128 | # abstract labels are replaced with concrete offsets 129 | code = code.to_code() 130 | exec(code) 131 | 132 | Output:: 133 | 134 | 1 135 | 2 136 | 3 137 | 138 | 139 | .. _ex-cond-jump: 140 | 141 | Conditional jump 142 | ================ 143 | 144 | Bytecode of the Python code ``print('yes' if test else 'no')``:: 145 | 146 | from bytecode import Label, Instr, Bytecode 147 | 148 | label_else = Label() 149 | label_print = Label() 150 | bytecode = Bytecode([Instr('LOAD_GLOBAL', (True, 'print')), 151 | Instr('LOAD_NAME', 'test'), 152 | Instr('POP_JUMP_IF_FALSE', label_else), 153 | Instr('LOAD_CONST', 'yes'), 154 | Instr('JUMP_FORWARD', label_print), 155 | label_else, 156 | Instr('LOAD_CONST', 'no'), 157 | label_print, 158 | Instr('CALL', 1), 159 | Instr('LOAD_CONST', None), 160 | Instr('RETURN_VALUE')]) 161 | code = bytecode.to_code() 162 | 163 | test = 0 164 | exec(code) 165 | 166 | test = 1 167 | exec(code) 168 | 169 | Output:: 170 | 171 | no 172 | yes 173 | 174 | .. note:: 175 | Instructions are only indented for readability. 176 | -------------------------------------------------------------------------------- /tests/frameworks/function.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | from collections.abc import Iterator 3 | from os.path import abspath 4 | from types import FunctionType, ModuleType 5 | from typing import Any, Dict, Optional, Protocol, Tuple, Type, Union, cast 6 | 7 | from module import origin # type: ignore 8 | 9 | FunctionContainerType = Union[ 10 | type, property, classmethod, staticmethod, Tuple, ModuleType 11 | ] 12 | 13 | ContainerKey = Union[str, int, Type[staticmethod], Type[classmethod]] 14 | 15 | CONTAINER_TYPES = (type, property, classmethod, staticmethod) 16 | 17 | 18 | def set_cell_contents(cell, contents): # type: ignore[misc] 19 | cell.cell_contents = contents 20 | 21 | 22 | class FullyNamed(Protocol): 23 | """A fully named object.""" 24 | 25 | __name__ = None # type: Optional[str] 26 | __fullname__ = None # type: Optional[str] 27 | 28 | 29 | class FullyNamedFunction(FullyNamed): 30 | """A fully named function object.""" 31 | 32 | def __call__(self, *args, **kwargs): 33 | pass 34 | 35 | 36 | class ContainerIterator(Iterator, FullyNamedFunction): 37 | """Wrapper around different types of function containers. 38 | 39 | A container comes with an origin, i.e. a parent container and a position 40 | within it in the form of a key. 41 | """ 42 | 43 | def __init__( 44 | self, 45 | container, # type: FunctionContainerType 46 | origin=None, # type: Optional[Union[Tuple[ContainerIterator, ContainerKey], Tuple[FullyNamedFunction, str]]] 47 | ): 48 | # type: (...) -> None 49 | if isinstance(container, (type, ModuleType)): 50 | self._iter = iter(container.__dict__.items()) 51 | self.__name__ = container.__name__ 52 | 53 | elif isinstance(container, tuple): 54 | self._iter = iter(enumerate(_.cell_contents for _ in container)) # type: ignore[arg-type] 55 | self.__name__ = "" 56 | 57 | elif isinstance(container, property): 58 | self._iter = iter( 59 | (m, getattr(container, a)) 60 | for m, a in { 61 | ("getter", "fget"), 62 | ("setter", "fset"), 63 | ("deleter", "fdel"), 64 | } 65 | ) 66 | assert container.fget is not None 67 | self.__name__ = container.fget.__name__ 68 | 69 | elif isinstance(container, (classmethod, staticmethod)): 70 | self._iter = iter([(type(container), container.__func__)]) # type: ignore[list-item] 71 | self.__name__ = None 72 | 73 | else: 74 | raise TypeError("Unsupported container type: %s", type(container)) 75 | 76 | self._container = container 77 | 78 | if origin is not None and origin[0].__fullname__ is not None: 79 | origin_fullname = origin[0].__fullname__ 80 | self.__fullname__ = ( 81 | ".".join((origin_fullname, self.__name__)) 82 | if self.__name__ 83 | else origin_fullname 84 | ) 85 | else: 86 | self.__fullname__ = self.__name__ 87 | 88 | def __iter__(self): 89 | # type: () -> Iterator[Tuple[ContainerKey, Any]] 90 | return self._iter 91 | 92 | def __next__(self): 93 | # type: () -> Tuple[ContainerKey, Any] 94 | return next(self._iter) 95 | 96 | next = __next__ 97 | 98 | 99 | def _collect_functions(module): 100 | # type: (ModuleType) -> Dict[str, FullyNamedFunction] 101 | """Collect functions from a given module.""" 102 | assert isinstance(module, ModuleType) 103 | 104 | path = origin(module) 105 | containers = deque([ContainerIterator(module)]) 106 | functions = {} 107 | seen_containers = set() 108 | seen_functions = set() 109 | 110 | while containers: 111 | c = containers.pop() 112 | 113 | if id(c._container) in seen_containers: 114 | continue 115 | seen_containers.add(id(c._container)) 116 | 117 | for k, o in c: 118 | code = getattr(o, "__code__", None) if isinstance(o, FunctionType) else None 119 | if code is not None and abspath(code.co_filename) == path: 120 | if o not in seen_functions: 121 | seen_functions.add(o) 122 | o = cast(FullyNamedFunction, o) 123 | o.__fullname__ = ( 124 | ".".join((c.__fullname__, o.__name__ or "")) 125 | if c.__fullname__ 126 | else o.__name__ 127 | ) 128 | 129 | for name in (k, o.__name__) if isinstance(k, str) else (o.__name__,): 130 | fullname = ( 131 | ".".join((c.__fullname__, name)) if c.__fullname__ else name 132 | ) 133 | functions[fullname] = o 134 | 135 | try: 136 | if o.__closure__: 137 | containers.append( 138 | ContainerIterator(o.__closure__, origin=(o, "")) 139 | ) 140 | except AttributeError: 141 | pass 142 | 143 | elif isinstance(o, CONTAINER_TYPES): 144 | if isinstance(o, property) and not isinstance(o.fget, FunctionType): 145 | continue 146 | containers.append(ContainerIterator(o, origin=(c, k))) 147 | 148 | return functions 149 | 150 | 151 | class FunctionDiscovery(dict): 152 | """Discover all function objects in a module.""" 153 | 154 | def __init__(self, module): 155 | # type: (ModuleType) -> None 156 | super(FunctionDiscovery, self).__init__() 157 | self._module = module 158 | 159 | functions = _collect_functions(module) 160 | seen_functions = set() 161 | 162 | for fname, function in functions.items(): 163 | self[fname] = function 164 | seen_functions.add(function) 165 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bytecode.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bytecode.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/bytecode" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bytecode" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\bytecode.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\bytecode.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /src/bytecode/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "BinaryOp", 3 | "Bytecode", 4 | "Compare", 5 | "CompilerFlags", 6 | "ConcreteBytecode", 7 | "ConcreteInstr", 8 | "ControlFlowGraph", 9 | "Instr", 10 | "Label", 11 | "SetLineno", 12 | "__version__", 13 | ] 14 | 15 | from io import StringIO 16 | from typing import List, Union 17 | 18 | # import needed to use it in bytecode.py 19 | from bytecode.bytecode import ( 20 | BaseBytecode, 21 | Bytecode, 22 | _BaseBytecodeList, 23 | _InstrList, 24 | ) 25 | 26 | # import needed to use it in bytecode.py 27 | from bytecode.cfg import BasicBlock, ControlFlowGraph 28 | 29 | # import needed to use it in bytecode.py 30 | from bytecode.concrete import ( 31 | ConcreteBytecode, 32 | ConcreteInstr, 33 | _ConvertBytecodeToConcrete, 34 | ) 35 | from bytecode.flags import CompilerFlags 36 | 37 | # import needed to use it in bytecode.py 38 | from bytecode.instr import ( 39 | UNSET, 40 | BinaryOp, 41 | CellVar, 42 | Compare, 43 | FreeVar, 44 | Instr, 45 | Intrinsic1Op, 46 | Intrinsic2Op, 47 | Label, 48 | SetLineno, 49 | TryBegin, 50 | TryEnd, 51 | ) 52 | from bytecode.version import __version__ 53 | 54 | 55 | def format_bytecode( 56 | bytecode: Union[Bytecode, ConcreteBytecode, ControlFlowGraph], 57 | *, 58 | lineno: bool = False, 59 | ) -> str: 60 | try_begins: List[TryBegin] = [] 61 | 62 | def format_line(index, line): 63 | nonlocal cur_lineno, prev_lineno 64 | if lineno: 65 | if cur_lineno != prev_lineno: 66 | line = "L.% 3s % 3s: %s" % (cur_lineno, index, line) 67 | prev_lineno = cur_lineno 68 | else: 69 | line = " % 3s: %s" % (index, line) 70 | else: 71 | line = line 72 | return line 73 | 74 | def format_instr(instr, labels=None): 75 | text = instr.name 76 | arg = instr._arg 77 | if arg is not UNSET: 78 | if isinstance(arg, Label): 79 | try: 80 | arg = "<%s>" % labels[arg] 81 | except KeyError: 82 | arg = "" 83 | elif isinstance(arg, BasicBlock): 84 | try: 85 | arg = "<%s>" % labels[id(arg)] 86 | except KeyError: 87 | arg = "" 88 | else: 89 | arg = repr(arg) 90 | text = "%s %s" % (text, arg) 91 | return text 92 | 93 | def format_try_begin(instr: TryBegin, labels: dict) -> str: 94 | if isinstance(instr.target, Label): 95 | try: 96 | arg = "<%s>" % labels[instr.target] 97 | except KeyError: 98 | arg = "" 99 | else: 100 | try: 101 | arg = "<%s>" % labels[id(instr.target)] 102 | except KeyError: 103 | arg = "" 104 | line = "TryBegin %s -> %s [%s]" % ( 105 | len(try_begins), 106 | arg, 107 | instr.stack_depth, 108 | ) + (" last_i" if instr.push_lasti else "") 109 | 110 | # Track the seen try begin 111 | try_begins.append(instr) 112 | 113 | return line 114 | 115 | def format_try_end(instr: TryEnd) -> str: 116 | i = try_begins.index(instr.entry) if instr.entry in try_begins else "" 117 | return "TryEnd (%s)" % i 118 | 119 | buffer = StringIO() 120 | 121 | indent = " " * 4 122 | 123 | cur_lineno = bytecode.first_lineno 124 | prev_lineno = None 125 | 126 | if isinstance(bytecode, ConcreteBytecode): 127 | offset = 0 128 | for c_instr in bytecode: 129 | fields = [] 130 | if c_instr.lineno is not None: 131 | cur_lineno = c_instr.lineno 132 | if lineno: 133 | fields.append(format_instr(c_instr)) 134 | line = "".join(fields) 135 | line = format_line(offset, line) 136 | else: 137 | fields.append("% 3s %s" % (offset, format_instr(c_instr))) 138 | line = "".join(fields) 139 | buffer.write(line + "\n") 140 | 141 | if isinstance(c_instr, ConcreteInstr): 142 | offset += c_instr.size 143 | 144 | if bytecode.exception_table: 145 | buffer.write("\n") 146 | buffer.write("Exception table:\n") 147 | for entry in bytecode.exception_table: 148 | buffer.write( 149 | f"{entry.start_offset} to {entry.stop_offset} -> " 150 | f"{entry.target} [{entry.stack_depth}]" 151 | + (" lasti" if entry.push_lasti else "") 152 | + "\n" 153 | ) 154 | 155 | elif isinstance(bytecode, Bytecode): 156 | labels: dict[Label, str] = {} 157 | for index, instr in enumerate(bytecode): 158 | if isinstance(instr, Label): 159 | labels[instr] = "label_instr%s" % index 160 | 161 | for index, instr in enumerate(bytecode): 162 | if isinstance(instr, Label): 163 | label = labels[instr] 164 | line = "%s:" % label 165 | if index != 0: 166 | buffer.write("\n") 167 | elif isinstance(instr, TryBegin): 168 | line = indent + format_line(index, format_try_begin(instr, labels)) 169 | indent += " " 170 | elif isinstance(instr, TryEnd): 171 | indent = indent[:-2] 172 | line = indent + format_line(index, format_try_end(instr)) 173 | else: 174 | if instr.lineno is not None: 175 | cur_lineno = instr.lineno 176 | line = format_instr(instr, labels) 177 | line = indent + format_line(index, line) 178 | buffer.write(line + "\n") 179 | buffer.write("\n") 180 | 181 | elif isinstance(bytecode, ControlFlowGraph): 182 | cfg_labels = {} 183 | for block_index, block in enumerate(bytecode, 1): 184 | cfg_labels[id(block)] = "block%s" % block_index 185 | 186 | for block in bytecode: 187 | buffer.write("%s:\n" % cfg_labels[id(block)]) 188 | seen_instr = False 189 | for index, instr in enumerate(block): 190 | if isinstance(instr, TryBegin): 191 | line = indent + format_line( 192 | index, format_try_begin(instr, cfg_labels) 193 | ) 194 | indent += " " 195 | elif isinstance(instr, TryEnd): 196 | if seen_instr: 197 | indent = indent[:-2] 198 | line = indent + format_line(index, format_try_end(instr)) 199 | else: 200 | if isinstance(instr, Instr): 201 | seen_instr = True 202 | if instr.lineno is not None: 203 | cur_lineno = instr.lineno 204 | line = format_instr(instr, cfg_labels) 205 | line = indent + format_line(index, line) 206 | buffer.write(line + "\n") 207 | if block.next_block is not None: 208 | buffer.write(indent + "-> %s\n" % cfg_labels[id(block.next_block)]) 209 | buffer.write("\n") 210 | else: 211 | raise TypeError("unknown bytecode class") 212 | 213 | return buffer.getvalue()[:-1] 214 | 215 | 216 | def dump_bytecode( 217 | bytecode: Union[Bytecode, ConcreteBytecode, ControlFlowGraph], 218 | *, 219 | lineno: bool = False, 220 | ) -> None: 221 | print(format_bytecode(bytecode, lineno=lineno)) 222 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import dis 2 | import textwrap 3 | import types 4 | import unittest 5 | 6 | from bytecode import ( 7 | UNSET, 8 | BasicBlock, 9 | Bytecode, 10 | ConcreteBytecode, 11 | ConcreteInstr, 12 | ControlFlowGraph, 13 | Instr, 14 | Label, 15 | ) 16 | from bytecode.utils import PY312 17 | 18 | 19 | def _format_instr_list(block, labels, lineno): 20 | instr_list = [] 21 | for instr in block: 22 | if not isinstance(instr, Label): 23 | if isinstance(instr, ConcreteInstr): 24 | cls_name = "ConcreteInstr" 25 | else: 26 | cls_name = "Instr" 27 | arg = instr.arg 28 | if arg is not UNSET: 29 | if isinstance(arg, Label): 30 | arg = labels[arg] 31 | elif isinstance(arg, BasicBlock): 32 | arg = labels[id(arg)] 33 | else: 34 | arg = repr(arg) 35 | if lineno: 36 | text = "%s(%r, %s, lineno=%s)" % ( 37 | cls_name, 38 | instr.name, 39 | arg, 40 | instr.lineno, 41 | ) 42 | else: 43 | text = "%s(%r, %s)" % (cls_name, instr.name, arg) 44 | else: 45 | if lineno: 46 | text = "%s(%r, lineno=%s)" % (cls_name, instr.name, instr.lineno) 47 | else: 48 | text = "%s(%r)" % (cls_name, instr.name) 49 | else: 50 | text = labels[instr] 51 | instr_list.append(text) 52 | return "[%s]" % ",\n ".join(instr_list) 53 | 54 | 55 | def dump_bytecode(code, lineno=False): 56 | """ 57 | Use this function to write unit tests: copy/paste its output to 58 | write a self.assertBlocksEqual() check. 59 | """ 60 | print() 61 | 62 | if isinstance(code, (Bytecode, ConcreteBytecode)): 63 | is_concrete = isinstance(code, ConcreteBytecode) 64 | if is_concrete: 65 | block = list(code) 66 | else: 67 | block = code 68 | 69 | indent = " " * 8 70 | labels = {} 71 | for index, instr in enumerate(block): 72 | if isinstance(instr, Label): 73 | name = "label_instr%s" % index 74 | labels[instr] = name 75 | 76 | if is_concrete: 77 | name = "ConcreteBytecode" 78 | print(indent + "code = %s()" % name) 79 | if code.argcount: 80 | print(indent + "code.argcount = %s" % code.argcount) 81 | if code.posonlyargcount: 82 | print(indent + "code.posonlyargcount = %s" % code.posonlyargcount) 83 | if code.kwonlyargcount: 84 | print(indent + "code.kwargonlycount = %s" % code.kwonlyargcount) 85 | print(indent + "code.flags = %#x" % code.flags) 86 | if code.consts: 87 | print(indent + "code.consts = %r" % code.consts) 88 | if code.names: 89 | print(indent + "code.names = %r" % code.names) 90 | if code.varnames: 91 | print(indent + "code.varnames = %r" % code.varnames) 92 | 93 | for name in sorted(labels.values()): 94 | print(indent + "%s = Label()" % name) 95 | 96 | if is_concrete: 97 | text = indent + "code.extend(" 98 | indent = " " * len(text) 99 | else: 100 | text = indent + "code = Bytecode(" 101 | indent = " " * len(text) 102 | 103 | lines = _format_instr_list(code, labels, lineno).splitlines() 104 | last_line = len(lines) - 1 105 | for index, line in enumerate(lines): 106 | if index == 0: 107 | print(text + lines[0]) 108 | elif index == last_line: 109 | print(indent + line + ")") 110 | else: 111 | print(indent + line) 112 | 113 | print() 114 | else: 115 | assert isinstance(code, ControlFlowGraph) 116 | labels = {} 117 | for block_index, block in enumerate(code): 118 | labels[id(block)] = "code[%s]" % block_index 119 | 120 | for block_index, block in enumerate(code): 121 | text = _format_instr_list(block, labels, lineno) 122 | if block_index != len(code) - 1: 123 | text += "," 124 | print(text) 125 | print() 126 | 127 | 128 | def get_code(source, *, filename="", function=False): 129 | source = textwrap.dedent(source).strip() 130 | code = compile(source, filename, "exec") 131 | if function: 132 | sub_code = [ 133 | const for const in code.co_consts if isinstance(const, types.CodeType) 134 | ] 135 | if len(sub_code) == 0: 136 | raise ValueError("unable to find function code") 137 | code = sub_code[-1] 138 | return code 139 | 140 | 141 | def disassemble(source, *, filename="", function=False): 142 | code = get_code(source, filename=filename, function=function) 143 | return Bytecode.from_code(code) 144 | 145 | 146 | class TestCase(unittest.TestCase): 147 | def assertInstructionListEqual(self, l1, l2): 148 | # DO not check location information 149 | for i1, i2 in zip(l1, l2, strict=True): 150 | if isinstance(i1, Instr): 151 | self.assertEqual(i1.name, i2.name) 152 | if not isinstance(i1.arg, Label): 153 | self.assertEqual(i1.arg, i2.arg) 154 | else: 155 | self.assertIs(l1.index(i1.arg), l2.index(i2.arg)) 156 | self.assertEqual(i1.lineno, i2.lineno) 157 | else: 158 | assert type(i1) is type(i2) 159 | 160 | def assertCodeObjectEqual(self, code1: types.CodeType, code2: types.CodeType): 161 | self.assertEqual(code1.co_stacksize, code2.co_stacksize) 162 | self.assertEqual(code1.co_firstlineno, code2.co_firstlineno) 163 | self.assertSequenceEqual(code1.co_cellvars, code2.co_cellvars) 164 | self.assertSequenceEqual(code1.co_freevars, code2.co_freevars) 165 | self.assertSetEqual(set(code1.co_varnames), set(code2.co_varnames)) 166 | self.assertSequenceEqual(code1.co_exceptiontable, code2.co_exceptiontable) 167 | # We do not compare linetables because CPython does not always optimize 168 | # the packing of the table 169 | self.assertSequenceEqual(list(code1.co_positions()), list(code2.co_positions())) 170 | self.assertEqual(code1.co_qualname, code2.co_qualname) 171 | 172 | # If names or consts have been re-ordered compared the output of dis.instructions 173 | if PY312 and ( 174 | code1.co_consts != code2.co_consts 175 | or code1.co_names != code2.co_names 176 | or code1.co_varnames != code2.co_varnames 177 | ): 178 | instrs1 = list(dis.get_instructions(code1)) 179 | instrs2 = list(dis.get_instructions(code2)) 180 | self.assertEqual(len(instrs1), len(instrs2)) 181 | for i1, i2 in zip(instrs1, instrs2, strict=False): 182 | self.assertEqual(i1.opcode, i2.opcode) 183 | if isinstance(i1.argval, types.CodeType): 184 | pass 185 | else: 186 | self.assertEqual(i1.argval, i2.argval) 187 | else: 188 | self.assertSequenceEqual(code1.co_code, code2.co_code) 189 | 190 | self.assertEqual(code1.co_flags, code2.co_flags) 191 | 192 | def assertBlocksEqual(self, code, *expected_blocks): 193 | self.assertEqual(len(code), len(expected_blocks)) 194 | 195 | for block1, block2 in zip(code, expected_blocks, strict=False): 196 | self.assertInstructionListEqual(list(block1), block2) 197 | -------------------------------------------------------------------------------- /doc/cfg.rst: -------------------------------------------------------------------------------- 1 | ************************ 2 | Control Flow Graph (CFG) 3 | ************************ 4 | 5 | To analyze or optimize existing code, ``bytecode`` provides a 6 | :class:`ControlFlowGraph` class which is a `control flow graph (CFG) 7 | `_. 8 | 9 | The control flow graph is used to perform the stack depth analysis when 10 | converting to code. Because it is better at identifying dead code than CPython 11 | it can lead to reduced stack size. 12 | 13 | Example 14 | ======= 15 | 16 | Dump the control flow graph of the :ref:`conditional jump example 17 | `:: 18 | 19 | from bytecode import Label, Instr, Bytecode, ControlFlowGraph, dump_bytecode 20 | 21 | label_else = Label() 22 | label_print = Label() 23 | bytecode = Bytecode([Instr('LOAD_GLOBAL', (True, 'print')), 24 | Instr('LOAD_NAME', 'test'), 25 | Instr('POP_JUMP_IF_FALSE', label_else), 26 | Instr('LOAD_CONST', 'yes'), 27 | Instr('JUMP_FORWARD', label_print), 28 | label_else, 29 | Instr('LOAD_CONST', 'no'), 30 | label_print, 31 | Instr('CALL', 1), 32 | Instr('LOAD_CONST', None), 33 | Instr('RETURN_VALUE')]) 34 | 35 | blocks = ControlFlowGraph.from_bytecode(bytecode) 36 | dump_bytecode(blocks) 37 | 38 | Output:: 39 | 40 | block1: 41 | LOAD_GLOBAL (True, 'print') 42 | LOAD_NAME 'test' 43 | POP_JUMP_IF_FALSE 44 | -> block2 45 | 46 | block2: 47 | LOAD_CONST 'yes' 48 | JUMP_FORWARD 49 | 50 | block3: 51 | LOAD_CONST 'no' 52 | -> block4 53 | 54 | block4: 55 | CALL 1 56 | LOAD_CONST None 57 | RETURN_VALUE 58 | 59 | We get 4 blocks: 60 | 61 | * block #1 is the start block and ends with ``POP_JUMP_IF_FALSE`` conditional 62 | jump and is followed by the block #2 63 | * block #2 ends with ``JUMP_FORWARD`` unconditional jump 64 | * block #3 does not contain jump and is followed by the block #4 65 | * block #4 is the final block 66 | 67 | The start block is always the first block. 68 | 69 | 70 | Analyze the control flow graph 71 | ============================== 72 | 73 | The ``bytecode`` module provides two ways to iterate on blocks: 74 | 75 | * iterate on the basic block as a sequential list 76 | * browse the graph by following jumps and links to next blocks 77 | 78 | Iterate on basic blocks 79 | ----------------------- 80 | 81 | Iterating on basic blocks is a simple as this loop:: 82 | 83 | for block in blocks: 84 | ... 85 | 86 | Example of a ``display_blocks()`` function:: 87 | 88 | from bytecode import UNSET, Label, Instr, Bytecode, BasicBlock, ControlFlowGraph 89 | 90 | def display_blocks(blocks): 91 | for block in blocks: 92 | print("Block #%s" % (1 + blocks.get_block_index(block))) 93 | for instr in block: 94 | if isinstance(instr.arg, BasicBlock): 95 | arg = "" % (1 + blocks.get_block_index(instr.arg)) 96 | elif instr.arg is not UNSET: 97 | arg = repr(instr.arg) 98 | else: 99 | arg = '' 100 | print(" %s %s" % (instr.name, arg)) 101 | 102 | if block.next_block is not None: 103 | print(" => " 104 | % (1 + blocks.get_block_index(block.next_block))) 105 | 106 | print() 107 | 108 | label_else = Label() 109 | label_print = Label() 110 | bytecode = Bytecode([Instr('LOAD_GLOBAL', (True, 'print')), 111 | Instr('LOAD_NAME', 'test'), 112 | Instr('POP_JUMP_IF_FALSE', label_else), 113 | Instr('LOAD_CONST', 'yes'), 114 | Instr('JUMP_FORWARD', label_print), 115 | label_else, 116 | Instr('LOAD_CONST', 'no'), 117 | label_print, 118 | Instr('CALL', 1), 119 | Instr('LOAD_CONST', None), 120 | Instr('RETURN_VALUE')]) 121 | 122 | blocks = ControlFlowGraph.from_bytecode(bytecode) 123 | display_blocks(blocks) 124 | 125 | Output:: 126 | 127 | Block #1 128 | LOAD_GLOBAL (True, 'print') 129 | LOAD_NAME 'test' 130 | POP_JUMP_IF_FALSE 131 | => 132 | 133 | Block #2 134 | LOAD_CONST 'yes' 135 | JUMP_FORWARD 136 | 137 | Block #3 138 | LOAD_CONST 'no' 139 | => 140 | 141 | Block #4 142 | CALL 1 143 | LOAD_CONST None 144 | RETURN_VALUE 145 | 146 | .. note:: 147 | :class:`SetLineno` is not handled in the example to keep it simple. 148 | 149 | 150 | Browse the graph 151 | ---------------- 152 | 153 | Recursive function is a simple solution to browse the control flow graph. 154 | 155 | Example to a recursive ``display_block()`` function:: 156 | 157 | from bytecode import UNSET, Label, Instr, Bytecode, BasicBlock, ControlFlowGraph 158 | 159 | def display_block(blocks, block, seen=None): 160 | # avoid loop: remember which blocks were already seen 161 | if seen is None: 162 | seen = set() 163 | if id(block) in seen: 164 | return 165 | seen.add(id(block)) 166 | 167 | # display instructions of the block 168 | print("Block #%s" % (1 + blocks.get_block_index(block))) 169 | for instr in block: 170 | if isinstance(instr.arg, BasicBlock): 171 | arg = "" % (1 + blocks.get_block_index(instr.arg)) 172 | elif instr.arg is not UNSET: 173 | arg = repr(instr.arg) 174 | else: 175 | arg = '' 176 | print(" %s %s" % (instr.name, arg)) 177 | 178 | # is the block followed directly by another block? 179 | if block.next_block is not None: 180 | print(" => " 181 | % (1 + blocks.get_block_index(block.next_block))) 182 | 183 | print() 184 | 185 | # display the next block 186 | if block.next_block is not None: 187 | display_block(blocks, block.next_block, seen) 188 | 189 | # display the block linked by jump (if any) 190 | target_block = block.get_jump() 191 | if target_block is not None: 192 | display_block(blocks, target_block, seen) 193 | 194 | label_else = Label() 195 | label_print = Label() 196 | bytecode = Bytecode([Instr('LOAD_GLOBAL', (True, 'print')), 197 | Instr('LOAD_NAME', 'test'), 198 | Instr('POP_JUMP_IF_FALSE', label_else), 199 | Instr('LOAD_CONST', 'yes'), 200 | Instr('JUMP_FORWARD', label_print), 201 | label_else, 202 | Instr('LOAD_CONST', 'no'), 203 | label_print, 204 | Instr('CALL', 1), 205 | Instr('LOAD_CONST', None), 206 | Instr('RETURN_VALUE')]) 207 | 208 | blocks = ControlFlowGraph.from_bytecode(bytecode) 209 | display_block(blocks, blocks[0]) 210 | 211 | Output:: 212 | 213 | Block #1 214 | LOAD_GLOBAL (True, 'print') 215 | LOAD_NAME 'test' 216 | POP_JUMP_IF_FALSE 217 | => 218 | 219 | Block #2 220 | LOAD_CONST 'yes' 221 | JUMP_FORWARD 222 | 223 | Block #4 224 | CALL 1 225 | LOAD_CONST None 226 | RETURN_VALUE 227 | 228 | Block #3 229 | LOAD_CONST 'no' 230 | => 231 | 232 | Block numbers are no displayed in the sequential order: block #4 is displayed 233 | before block #3. 234 | 235 | .. note:: 236 | Dead code (unreachable blocks) is not displayed by ``display_block``. 237 | -------------------------------------------------------------------------------- /src/bytecode/flags.py: -------------------------------------------------------------------------------- 1 | import opcode as _opcode 2 | from enum import IntFlag 3 | from typing import Optional 4 | 5 | # alias to keep the 'bytecode' variable free 6 | import bytecode as _bytecode 7 | 8 | from .instr import DUAL_ARG_OPCODES, RESUME_OPCODE, CellVar, FreeVar 9 | from .utils import PY312, PY313, PY314 10 | 11 | 12 | class CompilerFlags(IntFlag): 13 | """Possible values of the co_flags attribute of Code object. 14 | 15 | Note: We do not rely on inspect values here as some of them are missing and 16 | furthermore would be version dependent. 17 | 18 | """ 19 | 20 | OPTIMIZED = 0x00001 21 | NEWLOCALS = 0x00002 22 | VARARGS = 0x00004 23 | VARKEYWORDS = 0x00008 24 | NESTED = 0x00010 25 | GENERATOR = 0x00020 26 | NOFREE = 0x00040 27 | # New in Python 3.5 28 | # Used for coroutines defined using async def ie native coroutine 29 | COROUTINE = 0x00080 30 | # Used for coroutines defined as a generator and then decorated using 31 | # types.coroutine 32 | ITERABLE_COROUTINE = 0x00100 33 | # New in Python 3.6 34 | # Generator defined in an async def function 35 | ASYNC_GENERATOR = 0x00200 36 | 37 | FUTURE_GENERATOR_STOP = 0x800000 38 | FUTURE_ANNOTATIONS = 0x1000000 39 | 40 | 41 | UNOPTIMIZED_OPCODES = ( 42 | _opcode.opmap["STORE_NAME"], 43 | _opcode.opmap["LOAD_NAME"], 44 | _opcode.opmap["DELETE_NAME"], 45 | ) 46 | 47 | ASYNC_OPCODES = ( 48 | _opcode.opmap["GET_AWAITABLE"], 49 | _opcode.opmap["GET_AITER"], 50 | _opcode.opmap["GET_ANEXT"], 51 | *((_opcode.opmap["BEFORE_ASYNC_WITH"],) if not PY314 else ()), # Removed in 3.14+ 52 | _opcode.opmap["END_ASYNC_FOR"], 53 | *((_opcode.opmap["ASYNC_GEN_WRAP"],) if not PY312 else ()), # New in 3.11 54 | ) 55 | 56 | YIELD_VALUE_OPCODE = _opcode.opmap["YIELD_VALUE"] 57 | GENERATOR_LIKE_OPCODES = ( 58 | _opcode.opmap["RETURN_GENERATOR"], # Added in 3.11+ 59 | ) 60 | 61 | 62 | def infer_flags( 63 | bytecode: "_bytecode.Bytecode |_bytecode.ConcreteBytecode |_bytecode.ControlFlowGraph", 64 | is_async: bool | None = None, 65 | ): 66 | """Infer the proper flags for a bytecode based on the instructions. 67 | 68 | Because the bytecode does not have enough context to guess if a function 69 | is asynchronous the algorithm tries to be conservative and will never turn 70 | a previously async code into a sync one. 71 | 72 | Parameters 73 | ---------- 74 | bytecode : Bytecode | ConcreteBytecode | ControlFlowGraph 75 | Bytecode for which to infer the proper flags 76 | is_async : bool | None, optional 77 | Force the code to be marked as asynchronous if True, prevent it from 78 | being marked as asynchronous if False and simply infer the best 79 | solution based on the opcode and the existing flag if None. 80 | 81 | """ 82 | flags = CompilerFlags(0) 83 | if not isinstance( 84 | bytecode, 85 | (_bytecode.Bytecode, _bytecode.ConcreteBytecode, _bytecode.ControlFlowGraph), 86 | ): 87 | msg = ( 88 | "Expected a Bytecode, ConcreteBytecode or ControlFlowGraph instance not %s" 89 | ) 90 | raise ValueError(msg % bytecode) 91 | 92 | instructions = ( 93 | bytecode._get_instructions() 94 | if isinstance(bytecode, _bytecode.ControlFlowGraph) 95 | else bytecode 96 | ) 97 | 98 | # Iterate over the instructions and inspect the arguments 99 | is_concrete = isinstance(bytecode, _bytecode.ConcreteBytecode) 100 | optimized = True 101 | has_free = False if not is_concrete else bytecode.cellvars and bytecode.freevars 102 | known_async = False 103 | known_generator = False 104 | possible_generator = False 105 | instr_iter = iter(instructions) 106 | for instr in instr_iter: 107 | if isinstance( 108 | instr, 109 | ( 110 | _bytecode.SetLineno, 111 | _bytecode.Label, 112 | _bytecode.TryBegin, 113 | _bytecode.TryEnd, 114 | ), 115 | ): 116 | continue 117 | opcode = instr.opcode 118 | if opcode in UNOPTIMIZED_OPCODES: 119 | optimized = False 120 | elif opcode in ASYNC_OPCODES: 121 | known_async = True 122 | elif opcode == YIELD_VALUE_OPCODE: 123 | while isinstance( 124 | ni := next(instr_iter), 125 | ( 126 | _bytecode.SetLineno, 127 | _bytecode.Label, 128 | _bytecode.TryBegin, 129 | _bytecode.TryEnd, 130 | ), 131 | ): 132 | pass 133 | assert ni._opcode == RESUME_OPCODE 134 | if (ni.arg & 3) != 3: 135 | known_generator = True 136 | else: 137 | known_async = True 138 | elif opcode in GENERATOR_LIKE_OPCODES: 139 | possible_generator = True 140 | elif opcode in _opcode.hasfree: 141 | has_free = True 142 | elif ( 143 | not is_concrete 144 | and opcode in DUAL_ARG_OPCODES 145 | and (isinstance(instr.arg[0], CellVar) or isinstance(instr.arg[1], CellVar)) 146 | ): 147 | has_free = True 148 | elif ( 149 | PY313 150 | and opcode in _opcode.haslocal 151 | and isinstance(instr.arg, (CellVar, FreeVar)) 152 | ): 153 | has_free = True 154 | 155 | # Identify optimized code 156 | if optimized: 157 | flags |= CompilerFlags.OPTIMIZED 158 | 159 | # Check for free variables 160 | if not has_free: 161 | flags |= CompilerFlags.NOFREE 162 | 163 | # Copy flags for which we cannot infer the right value 164 | flags |= bytecode.flags & ( 165 | CompilerFlags.NEWLOCALS 166 | | CompilerFlags.VARARGS 167 | | CompilerFlags.VARKEYWORDS 168 | | CompilerFlags.NESTED 169 | ) 170 | 171 | # If performing inference or forcing an async behavior, first inspect 172 | # the flags since this is the only way to identify iterable coroutines 173 | if is_async in (None, True): 174 | if ( 175 | bytecode.flags & CompilerFlags.COROUTINE 176 | or bytecode.flags & CompilerFlags.ASYNC_GENERATOR 177 | ): 178 | if known_generator: 179 | flags |= CompilerFlags.ASYNC_GENERATOR 180 | else: 181 | flags |= CompilerFlags.COROUTINE 182 | elif bytecode.flags & CompilerFlags.ITERABLE_COROUTINE: 183 | if known_async: 184 | msg = ( 185 | "The ITERABLE_COROUTINE flag is set but bytecode that" 186 | "can only be used in async functions have been " 187 | "detected. Please unset that flag before performing " 188 | "inference." 189 | ) 190 | raise ValueError(msg) 191 | flags |= CompilerFlags.ITERABLE_COROUTINE 192 | 193 | # If the code was not asynchronous before determine if it should now be 194 | # asynchronous based on the opcode and the is_async argument. 195 | else: 196 | if known_async: 197 | # YIELD_FROM is not allowed in async generator 198 | if known_generator: 199 | flags |= CompilerFlags.ASYNC_GENERATOR 200 | else: 201 | flags |= CompilerFlags.COROUTINE 202 | 203 | elif known_generator or possible_generator: 204 | if is_async: 205 | if known_generator: 206 | flags |= CompilerFlags.ASYNC_GENERATOR 207 | else: 208 | flags |= CompilerFlags.COROUTINE 209 | else: 210 | flags |= CompilerFlags.GENERATOR 211 | 212 | elif is_async: 213 | flags |= CompilerFlags.COROUTINE 214 | 215 | # If the code should not be asynchronous, check first it is possible and 216 | # next set the GENERATOR flag if relevant 217 | else: 218 | if known_async: 219 | raise ValueError( 220 | "The is_async argument is False but bytecodes " 221 | "that can only be used in async functions have " 222 | "been detected." 223 | ) 224 | 225 | if known_generator or possible_generator: 226 | flags |= CompilerFlags.GENERATOR 227 | 228 | flags |= bytecode.flags & CompilerFlags.FUTURE_GENERATOR_STOP 229 | 230 | return flags 231 | -------------------------------------------------------------------------------- /tests/test_flags.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import unittest 4 | from copy import copy 5 | 6 | from bytecode import ( 7 | Bytecode, 8 | CompilerFlags, 9 | ConcreteBytecode, 10 | ConcreteInstr, 11 | ControlFlowGraph, 12 | ) 13 | from bytecode.flags import infer_flags 14 | from bytecode.instr import UNSET, FreeVar, Instr 15 | from bytecode.utils import PY312 16 | 17 | 18 | def trivial(): 19 | pass 20 | 21 | 22 | def _fact(a): 23 | def inner(): 24 | return a 25 | 26 | return inner 27 | 28 | 29 | hasfree = _fact(1) 30 | 31 | 32 | def gen(): 33 | yield 1 34 | 35 | 36 | async def trivial_async(): 37 | pass 38 | 39 | 40 | async def async_await(a): 41 | await a 42 | 43 | 44 | async def async_with_comprehension(): 45 | return [await i for i in range(10)] 46 | 47 | 48 | async def async_generator(): 49 | yield 1 50 | 51 | 52 | FLAG_INFERENCE_TEST_CASES = [ 53 | trivial, 54 | hasfree, 55 | gen, 56 | trivial_async, 57 | async_await, 58 | async_with_comprehension, 59 | async_generator, 60 | ] 61 | 62 | 63 | class FlagsTests(unittest.TestCase): 64 | def test_type_validation_on_inference(self): 65 | with self.assertRaises(ValueError): 66 | infer_flags(1) 67 | 68 | def test_flag_inference(self): 69 | # Check no loss of non-infered flags 70 | code = ControlFlowGraph() 71 | code.flags |= ( 72 | CompilerFlags.NEWLOCALS 73 | | CompilerFlags.VARARGS 74 | | CompilerFlags.VARKEYWORDS 75 | | CompilerFlags.NESTED 76 | | CompilerFlags.FUTURE_GENERATOR_STOP 77 | ) 78 | code.update_flags() 79 | for f in ( 80 | CompilerFlags.NEWLOCALS, 81 | CompilerFlags.VARARGS, 82 | CompilerFlags.VARKEYWORDS, 83 | CompilerFlags.NESTED, 84 | CompilerFlags.NOFREE, 85 | CompilerFlags.OPTIMIZED, 86 | CompilerFlags.FUTURE_GENERATOR_STOP, 87 | ): 88 | self.assertTrue(bool(code.flags & f)) 89 | 90 | # Infer optimized and nofree 91 | code = Bytecode() 92 | flags = infer_flags(code) 93 | self.assertTrue(bool(flags & CompilerFlags.OPTIMIZED)) 94 | self.assertTrue(bool(flags & CompilerFlags.NOFREE)) 95 | code.append(Instr("STORE_NAME", "a")) 96 | flags = infer_flags(code) 97 | self.assertFalse(bool(flags & CompilerFlags.OPTIMIZED)) 98 | self.assertTrue(bool(flags & CompilerFlags.NOFREE)) 99 | code.append(Instr("STORE_DEREF", FreeVar("b"))) 100 | code.update_flags() 101 | self.assertFalse(bool(code.flags & CompilerFlags.OPTIMIZED)) 102 | self.assertFalse(bool(code.flags & CompilerFlags.NOFREE)) 103 | 104 | def test_function_rountrip(self): 105 | for f in FLAG_INFERENCE_TEST_CASES: 106 | for cls in (Bytecode, ConcreteBytecode): 107 | with self.subTest(f"Testing {f.__name__} with {cls}"): 108 | b = cls.from_code(f.__code__) 109 | existing = copy(b.flags) 110 | b.update_flags() 111 | # NOTE: as far as I can tell NOFREE is not used by CPython anymore 112 | # it shows up nowhere in the interpreter logic and only exist in 113 | # dis and inspect... 114 | self.assertEqual( 115 | existing & ~CompilerFlags.NOFREE, 116 | b.flags & ~CompilerFlags.NOFREE, 117 | ) 118 | 119 | def test_async_gen_no_flag_is_async_None(self): 120 | # Test inference in the absence of any flag set on the bytecode 121 | 122 | # Infer generator 123 | code = ConcreteBytecode() 124 | code.append( 125 | ConcreteInstr("YIELD_VALUE", 0) if PY312 else ConcreteInstr("YIELD_VALUE") 126 | ) 127 | code.append(ConcreteInstr("RESUME", 1)) 128 | code.update_flags() 129 | self.assertTrue(bool(code.flags & CompilerFlags.GENERATOR)) 130 | 131 | # Infer coroutine 132 | code = ConcreteBytecode() 133 | code.append(ConcreteInstr("GET_AWAITABLE", 0)) 134 | code.update_flags() 135 | self.assertTrue(bool(code.flags & CompilerFlags.COROUTINE)) 136 | 137 | # Infer coroutine or async generator 138 | for i, r, expected in ( 139 | ("YIELD_VALUE", 1, CompilerFlags.ASYNC_GENERATOR), 140 | ("YIELD_VALUE", 2, CompilerFlags.ASYNC_GENERATOR), 141 | # YIELD_VALUE is used for normal await flow in Py 3.11+ when followed 142 | # by a RESUME whose lowest two bits are set to 3 143 | *((("YIELD_VALUE", 3, CompilerFlags.COROUTINE),)), 144 | ): 145 | with self.subTest(i): 146 | code = ConcreteBytecode() 147 | code.append(ConcreteInstr("GET_AWAITABLE", 0)) 148 | code.append(ConcreteInstr(i, 0) if PY312 else ConcreteInstr(i)) 149 | code.append(ConcreteInstr("RESUME", r)) 150 | code.update_flags() 151 | self.assertTrue(bool(code.flags & expected)) 152 | 153 | def test_async_gen_no_flag_is_async_True(self): 154 | # Test inference when we request an async function 155 | 156 | # Force coroutine 157 | code = ConcreteBytecode() 158 | code.update_flags(is_async=True) 159 | self.assertTrue(bool(code.flags & CompilerFlags.COROUTINE)) 160 | 161 | # Infer coroutine or async generator 162 | for i, r, expected in ( 163 | ("YIELD_VALUE", 1, CompilerFlags.ASYNC_GENERATOR), 164 | ("YIELD_VALUE", 2, CompilerFlags.ASYNC_GENERATOR), 165 | # YIELD_VALUE is used for normal await flow in Py 3.11+ when followed 166 | # by a RESUME whose lowest two bits are set to 3 167 | *((("YIELD_VALUE", 3, CompilerFlags.COROUTINE),)), 168 | ): 169 | with self.subTest(i): 170 | code = ConcreteBytecode() 171 | code.append(ConcreteInstr(i, 0) if PY312 else ConcreteInstr(i)) 172 | code.append(ConcreteInstr("RESUME", r)) 173 | code.update_flags(is_async=True) 174 | self.assertEqual(code.flags & expected, expected) 175 | 176 | def test_async_gen_no_flag_is_async_False(self): 177 | # Test inference when we request a non-async function 178 | 179 | # Infer generator 180 | code = ConcreteBytecode() 181 | code.append( 182 | ConcreteInstr("YIELD_VALUE", 0) if PY312 else ConcreteInstr("YIELD_VALUE") 183 | ) 184 | code.append(ConcreteInstr("RESUME", 1)) 185 | code.flags = CompilerFlags(CompilerFlags.COROUTINE) 186 | code.update_flags(is_async=False) 187 | self.assertTrue(bool(code.flags & CompilerFlags.GENERATOR)) 188 | 189 | # Abort on coroutine 190 | code = ConcreteBytecode() 191 | code.append(ConcreteInstr("GET_AWAITABLE", 0)) 192 | code.flags = CompilerFlags(CompilerFlags.COROUTINE) 193 | with self.assertRaises(ValueError): 194 | code.update_flags(is_async=False) 195 | 196 | def test_async_gen_flags(self): 197 | # Test inference in the presence of pre-existing flags 198 | 199 | for is_async in (None, True): 200 | # Infer generator 201 | code = ConcreteBytecode() 202 | code.append( 203 | ConcreteInstr("YIELD_VALUE", 0) 204 | if PY312 205 | else ConcreteInstr("YIELD_VALUE") 206 | ) 207 | code.append(ConcreteInstr("RESUME", 1)) 208 | for f, expected in ( 209 | (CompilerFlags.COROUTINE, CompilerFlags.ASYNC_GENERATOR), 210 | (CompilerFlags.ASYNC_GENERATOR, CompilerFlags.ASYNC_GENERATOR), 211 | (CompilerFlags.ITERABLE_COROUTINE, CompilerFlags.ITERABLE_COROUTINE), 212 | ): 213 | code.flags = CompilerFlags(f) 214 | code.update_flags(is_async=is_async) 215 | self.assertTrue(bool(code.flags & expected)) 216 | 217 | # Crash on ITERABLE_COROUTINE with async bytecode 218 | code = ConcreteBytecode() 219 | code.append(ConcreteInstr("GET_AWAITABLE", 0)) 220 | code.flags = CompilerFlags(CompilerFlags.ITERABLE_COROUTINE) 221 | with self.assertRaises(ValueError): 222 | code.update_flags(is_async=is_async) 223 | 224 | 225 | if __name__ == "__main__": 226 | unittest.main() # pragma: no cover 227 | -------------------------------------------------------------------------------- /tests/exception_handling_cases.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import contextlib 3 | import sys 4 | 5 | # Functions attempting to cover most combination of exception error handling mechanisms 6 | # to test bytecode round tripping capabilities. 7 | 8 | # NOTE we use call in except/finally clause expression requiring a larger stack usage 9 | 10 | 11 | def try_except(): 12 | try: 13 | a = 1 14 | except Exception: 15 | return min(1, 2) 16 | 17 | return a 18 | 19 | 20 | def try_multi_except(): 21 | try: 22 | a = 1 23 | except ValueError: 24 | return min(1, 2) 25 | except Exception: 26 | return min(1, 2) 27 | 28 | return a 29 | 30 | 31 | def try_finally(): 32 | try: 33 | a = 1 34 | finally: 35 | c = min(1, 2) 36 | 37 | return a 38 | 39 | 40 | def try_except_else(): 41 | try: 42 | a = 1 43 | except Exception: 44 | return min(1, 2) 45 | else: 46 | b = 1 47 | 48 | return a 49 | 50 | 51 | def try_except_finally(): 52 | try: 53 | a = 1 54 | except Exception: 55 | return min(1, 2) 56 | finally: 57 | c = 1 58 | 59 | return a 60 | 61 | 62 | def try_except_else_finally(): 63 | try: 64 | a = 1 65 | except Exception: 66 | return min(1, 2) 67 | else: 68 | b = 1 69 | finally: 70 | c = min(1, 2) 71 | 72 | return a 73 | 74 | 75 | def nested_try(): 76 | try: 77 | a = 1 78 | try: 79 | b = 2 80 | except Exception: 81 | e = min(1, 2) 82 | c = 3 83 | except Exception: 84 | d = min(1, 2) 85 | 86 | return a 87 | 88 | 89 | def nested_try_finally(): 90 | try: 91 | a = 1 92 | try: 93 | b = 2 94 | finally: 95 | e = min(1, 2) 96 | c = 3 97 | finally: 98 | d = min(1, 2) 99 | 100 | return a 101 | 102 | 103 | # This case exhibits several pitfalls: 104 | # - a TryBegin appears in the block as a reraise requiring to create an artificial 105 | # TryBegin/TryEnd pair 106 | # - complex exit conditions through jumps 107 | # - TryEnd following a non conditional jump 108 | def nested_try_with_looping_construct(): 109 | try: 110 | try: 111 | a = 1 112 | finally: 113 | b = min(1, 2) 114 | 115 | while a: 116 | c = 0 117 | if min(5, 6): 118 | break 119 | finally: 120 | c = 3 121 | 122 | return a 123 | 124 | 125 | # Test converting from bytecode to concrete in the presence of extended arg 126 | # which means the number of instruction before generating extended arg is not 127 | # the offset. 128 | # Here if we ignore this we end with wrong start/stop value in the table 129 | def try_except_with_extended_arg(): 130 | a = [1] 131 | b = [(1, 2), (3, 4)] 132 | for x in a: 133 | if a[0] is b[1]: 134 | try: 135 | a.append(b.index((a[0], 2))) 136 | except BrokenPipeError: 137 | sys.stdout.write(str(a)) 138 | sys.stdout.flush() 139 | else: 140 | c = 1 141 | d = 2 142 | b.append(a.append((c, d))) 143 | sys.stdout.write(str(b)) 144 | sys.stdout.flush() 145 | 146 | 147 | # Here extended arg can lead to omitting a TryEnd because we went over the offset 148 | # value at which we expected it. 149 | def try_except_with_extended_arg2(): 150 | a = list(range(10)) 151 | 152 | with contextlib.nullcontext() as selector: 153 | while a.pop(): 154 | # timeout = self._remaining_time(endtime) 155 | if sys is not None and sys.hexversion < 0: 156 | sys.stdout.write(a) 157 | raise RuntimeError("test") 158 | 159 | for key in sys.version_info: 160 | # Dead code for the execution but help trigger the bug this test 161 | # is meant to avoid regressing. 162 | if key is sys.stdin: 163 | chunk = a[self._input_offset : self._input_offset + _PIPE_BUF] 164 | try: 165 | self._input_offset += os.write(key.fd, chunk) 166 | except BrokenPipeError: 167 | selector.unregister(key.fileobj) 168 | key.fileobj.close() 169 | else: 170 | if self._input_offset >= len(self._input): 171 | selector.unregister(key.fileobj) 172 | key.fileobj.close() 173 | 174 | 175 | def try_except_in_except(): 176 | try: 177 | a = 1 178 | except Exception: 179 | d = 4 180 | try: 181 | b = 2 182 | except Exception: 183 | return min(1, 2) 184 | c = 3 185 | 186 | return a 187 | 188 | 189 | def try_finally_in_except(): 190 | try: 191 | a = min(1, 2) 192 | except Exception: 193 | try: 194 | b = min(3, 4) 195 | finally: 196 | c = 1 197 | return c 198 | return a 199 | 200 | 201 | def try_except_in_else(): 202 | try: 203 | a = min(1, 2) 204 | except Exception: 205 | a = 1 206 | else: 207 | try: 208 | b = min(3, 4) 209 | except Exception: 210 | b = 1 211 | return b 212 | 213 | return a 214 | 215 | 216 | def try_finally_in_else(): 217 | try: 218 | a = "a" 219 | except ValueError as e: 220 | return 221 | else: 222 | try: 223 | pass 224 | finally: 225 | a = "a" 226 | 227 | 228 | def try_except_in_finally(): 229 | try: 230 | a = min(1, 2) 231 | finally: 232 | try: 233 | a = max(1, 2) 234 | except Exception: 235 | a = 1 236 | 237 | return a 238 | 239 | 240 | def try_finally_in_finally(): 241 | a = 0 242 | try: 243 | a = min(1, 2) 244 | finally: 245 | try: 246 | a = max(1, 2) 247 | finally: 248 | a = min(a, 1) 249 | 250 | return a 251 | 252 | 253 | def try_except_group(): 254 | try: 255 | a = 1 256 | except* ValueError: 257 | b = min(1, 2) 258 | return a 259 | 260 | 261 | def with_no_store(): 262 | with contextlib.nullcontext("1"): 263 | a = "1" 264 | return a 265 | 266 | 267 | def with_store(): 268 | with contextlib.nullcontext("1") as b: 269 | a = "1" 270 | return a 271 | 272 | 273 | def try_with(): 274 | try: 275 | with contextlib.nullcontext("1"): 276 | a = "1" 277 | except Exception: 278 | return min("1", "2") 279 | 280 | return a 281 | 282 | 283 | def with_try(): 284 | with contextlib.nullcontext("1"): 285 | try: 286 | b = "1" 287 | except Exception: 288 | return min("1", "2") 289 | 290 | return b 291 | 292 | 293 | async def async_with_no_store(): 294 | async with contextlib.nullcontext(): 295 | a = "1" 296 | return a 297 | 298 | 299 | async def async_with_store(): 300 | async with contextlib.nullcontext() as b: 301 | a = "1" 302 | return a 303 | 304 | 305 | async def try_async_with(): 306 | try: 307 | async with contextlib.nullcontext(1): 308 | a = "1" 309 | except Exception: 310 | return min("1", "2") 311 | 312 | return a 313 | 314 | 315 | async def async_with_try(): 316 | async with contextlib.nullcontext(1): 317 | try: 318 | b = "1" 319 | except Exception: 320 | return min(1.0, 2.0) 321 | 322 | return b 323 | 324 | 325 | TEST_CASES = [ 326 | try_except, 327 | try_multi_except, 328 | try_finally, 329 | try_except_else, 330 | try_except_finally, 331 | try_except_else_finally, 332 | nested_try, 333 | nested_try_finally, 334 | nested_try_with_looping_construct, 335 | try_except_in_except, 336 | try_except_in_else, 337 | try_except_in_finally, 338 | try_finally_in_except, 339 | try_finally_in_else, 340 | try_finally_in_finally, 341 | try_except_with_extended_arg, 342 | try_except_with_extended_arg2, 343 | with_no_store, 344 | with_store, 345 | try_with, 346 | with_try, 347 | async_with_no_store, 348 | async_with_store, 349 | try_async_with, 350 | async_with_try, 351 | ] 352 | 353 | TEST_CASES.insert(0, try_except_group) # type: ignore 354 | 355 | 356 | if __name__ == "__main__": 357 | import dis 358 | import inspect 359 | 360 | for f in TEST_CASES: 361 | print("--------------------------------------------------------------") 362 | for l in inspect.getsourcelines(f)[0]: 363 | print(l.rstrip()) 364 | print() 365 | dis.dis(f) 366 | print() 367 | -------------------------------------------------------------------------------- /tests/frameworks/module.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import sys 3 | import typing as t 4 | from importlib._bootstrap import _init_module_attrs # type: ignore 5 | from importlib.abc import Loader 6 | from importlib.machinery import ModuleSpec 7 | from importlib.util import find_spec 8 | from types import CodeType, ModuleType 9 | 10 | TransformerType = t.Callable[[CodeType, ModuleType], CodeType] 11 | 12 | 13 | def find_loader(fullname: str) -> t.Optional[Loader]: 14 | return getattr(find_spec(fullname), "loader", None) 15 | 16 | 17 | def is_namespace_spec(spec: ModuleSpec) -> bool: 18 | return spec.origin is None and spec.submodule_search_locations is not None 19 | 20 | 21 | class _ImportHookChainedLoader: 22 | def __init__( 23 | self, loader: t.Optional[Loader], spec: t.Optional[ModuleSpec] = None 24 | ) -> None: 25 | self.loader = loader 26 | self.spec = spec 27 | 28 | self.callbacks: t.Dict[t.Any, t.Callable[[ModuleType], None]] = {} 29 | self.transformers: t.Dict[t.Any, TransformerType] = {} 30 | 31 | # A missing loader is generally an indication of a namespace package. 32 | if loader is None or hasattr(loader, "create_module"): 33 | self.create_module = self._create_module 34 | if loader is None or hasattr(loader, "exec_module"): 35 | self.exec_module = self._exec_module 36 | 37 | def __getattr__(self, name): 38 | # Proxy any other attribute access to the underlying loader. 39 | return getattr(self.loader, name) 40 | 41 | def namespace_module(self, spec: ModuleSpec) -> ModuleType: 42 | module = ModuleType(spec.name) 43 | # Pretend that we do not have a loader (this would be self), to 44 | # allow _init_module_attrs to create the appropriate NamespaceLoader 45 | # for the namespace module. 46 | spec.loader = None 47 | 48 | _init_module_attrs(spec, module, override=True) 49 | 50 | # Chain the loaders 51 | self.loader = spec.loader 52 | module.__loader__ = spec.loader = self # type: ignore[assignment] 53 | 54 | return module 55 | 56 | def add_callback( 57 | self, key: t.Any, callback: t.Callable[[ModuleType], None] 58 | ) -> None: 59 | self.callbacks[key] = callback 60 | 61 | def add_transformer(self, key: t.Any, transformer: TransformerType) -> None: 62 | self.transformers[key] = transformer 63 | 64 | def call_back(self, module: ModuleType) -> None: 65 | if module.__name__ == "pkg_resources": 66 | # DEV: pkg_resources support to prevent errors such as 67 | # NotImplementedError: Can't perform this operation for unregistered 68 | # loader type 69 | module.register_loader_type( 70 | _ImportHookChainedLoader, module.DefaultProvider 71 | ) 72 | 73 | for callback in self.callbacks.values(): 74 | callback(module) 75 | 76 | def load_module(self, fullname: str) -> t.Optional[ModuleType]: 77 | if self.loader is None: 78 | if self.spec is None: 79 | return None 80 | sys.modules[self.spec.name] = module = self.namespace_module(self.spec) 81 | else: 82 | module = self.loader.load_module(fullname) 83 | 84 | self.call_back(module) 85 | 86 | return module 87 | 88 | def _create_module(self, spec): 89 | if self.loader is not None: 90 | return self.loader.create_module(spec) 91 | 92 | if is_namespace_spec(spec): 93 | return self.namespace_module(spec) 94 | 95 | return None 96 | 97 | def _exec_module(self, module: ModuleType) -> None: 98 | _get_code = getattr(self.loader, "get_code", None) 99 | if _get_code is not None: 100 | 101 | def get_code(_loader, fullname): 102 | code = _get_code(fullname) 103 | 104 | for callback in self.transformers.values(): 105 | code = callback(code, module) 106 | 107 | return code 108 | 109 | self.loader.get_code = get_code.__get__(self.loader, type(self.loader)) # type: ignore[union-attr] 110 | 111 | if self.loader is None: 112 | spec = getattr(module, "__spec__", None) 113 | if spec is not None and is_namespace_spec(spec): 114 | sys.modules[spec.name] = module 115 | else: 116 | self.loader.exec_module(module) 117 | 118 | self.call_back(module) 119 | 120 | 121 | class BaseModuleWatchdog: 122 | """Base module watchdog. 123 | 124 | Invokes ``after_import`` every time a new module is imported. 125 | """ 126 | 127 | _instance: t.Optional["BaseModuleWatchdog"] = None 128 | 129 | def __init__(self) -> None: 130 | self._finding: t.Set[str] = set() 131 | 132 | # DEV: pkg_resources support to prevent errors such as 133 | # NotImplementedError: Can't perform this operation for unregistered 134 | pkg_resources = sys.modules.get("pkg_resources") 135 | if pkg_resources is not None: 136 | pkg_resources.register_loader_type( 137 | _ImportHookChainedLoader, pkg_resources.DefaultProvider 138 | ) 139 | 140 | def _add_to_meta_path(self) -> None: 141 | sys.meta_path.insert(0, self) # type: ignore[arg-type] 142 | 143 | @classmethod 144 | def _find_in_meta_path(cls) -> t.Optional[int]: 145 | for i, meta_path in enumerate(sys.meta_path): 146 | if type(meta_path) is cls: 147 | return i 148 | return None 149 | 150 | @classmethod 151 | def _remove_from_meta_path(cls) -> None: 152 | i = cls._find_in_meta_path() 153 | 154 | if i is None: 155 | raise RuntimeError("%s is not installed" % cls.__name__) 156 | 157 | sys.meta_path.pop(i) 158 | 159 | def after_import(self, module: ModuleType) -> None: 160 | raise NotImplementedError() 161 | 162 | def transform(self, code: CodeType, _module: ModuleType) -> CodeType: 163 | return code 164 | 165 | def find_module( 166 | self, fullname: str, path: t.Optional[str] = None 167 | ) -> t.Optional[Loader]: 168 | if fullname in self._finding: 169 | return None 170 | 171 | self._finding.add(fullname) 172 | 173 | try: 174 | original_loader = find_loader(fullname) 175 | if original_loader is not None: 176 | loader = ( 177 | _ImportHookChainedLoader(original_loader) 178 | if not isinstance(original_loader, _ImportHookChainedLoader) 179 | else original_loader 180 | ) 181 | 182 | loader.add_callback(type(self), self.after_import) 183 | loader.add_transformer(type(self), self.transform) 184 | 185 | return t.cast(Loader, loader) 186 | 187 | finally: 188 | self._finding.remove(fullname) 189 | 190 | return None 191 | 192 | def find_spec( 193 | self, 194 | fullname: str, 195 | path: t.Optional[str] = None, 196 | target: t.Optional[ModuleType] = None, 197 | ) -> t.Optional[ModuleSpec]: 198 | if fullname in self._finding: 199 | return None 200 | 201 | self._finding.add(fullname) 202 | 203 | try: 204 | try: 205 | # Best effort 206 | spec = find_spec(fullname) 207 | except Exception: 208 | return None 209 | 210 | if spec is None: 211 | return None 212 | 213 | loader = getattr(spec, "loader", None) 214 | 215 | if not isinstance(loader, _ImportHookChainedLoader): 216 | spec.loader = t.cast(Loader, _ImportHookChainedLoader(loader, spec)) 217 | 218 | t.cast(_ImportHookChainedLoader, spec.loader).add_callback( 219 | type(self), self.after_import 220 | ) 221 | t.cast(_ImportHookChainedLoader, spec.loader).add_transformer( 222 | type(self), self.transform 223 | ) 224 | 225 | return spec 226 | 227 | finally: 228 | self._finding.remove(fullname) 229 | 230 | @classmethod 231 | def _check_installed(cls) -> None: 232 | if not cls.is_installed(): 233 | raise RuntimeError("%s is not installed" % cls.__name__) 234 | 235 | @classmethod 236 | def install(cls) -> None: 237 | """Install the module watchdog.""" 238 | if cls.is_installed(): 239 | raise RuntimeError("%s is already installed" % cls.__name__) 240 | 241 | cls._instance = cls() 242 | cls._instance._add_to_meta_path() 243 | 244 | @classmethod 245 | def is_installed(cls): 246 | """Check whether this module watchdog class is installed.""" 247 | return cls._instance is not None and type(cls._instance) is cls 248 | 249 | @classmethod 250 | def uninstall(cls) -> None: 251 | """Uninstall the module watchdog. 252 | 253 | This will uninstall only the most recently installed instance of this 254 | class. 255 | """ 256 | cls._check_installed() 257 | cls._remove_from_meta_path() 258 | 259 | cls._instance = None 260 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # bytecode documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Feb 29 00:54:53 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import os 16 | import sys 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.abspath("../src")) 22 | 23 | from bytecode import __version__ 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ["sphinx_tabs.tabs"] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ["_templates"] 37 | 38 | # The suffix of source filenames. 39 | source_suffix = ".rst" 40 | 41 | # The encoding of source files. 42 | # source_encoding = 'utf-8-sig' 43 | 44 | # The master toctree document. 45 | master_doc = "index" 46 | 47 | # General information about the project. 48 | project = "bytecode" 49 | copyright = "Contributors to the bytecode project" 50 | 51 | # The version info for the project you're documenting, acts as replacement for 52 | # |version| and |release|, also used in various other places throughout the 53 | # built documents. 54 | # 55 | # The short X.Y version. 56 | # The full version, including alpha/beta/rc tags. 57 | version = release = __version__ 58 | 59 | 60 | # The language for content autogenerated by Sphinx. Refer to documentation 61 | # for a list of supported languages. 62 | # language = None 63 | 64 | # There are two options for replacing |today|: either, you set today to some 65 | # non-false value, then it is used: 66 | # today = '' 67 | # Else, today_fmt is used as the format for a strftime call. 68 | # today_fmt = '%B %d, %Y' 69 | 70 | # List of patterns, relative to source directory, that match files and 71 | # directories to ignore when looking for source files. 72 | exclude_patterns = ["_build"] 73 | 74 | # The reST default role (used for this markup: `text`) to use for all 75 | # documents. 76 | # default_role = None 77 | 78 | # If true, '()' will be appended to :func: etc. cross-reference text. 79 | # add_function_parentheses = True 80 | 81 | # If true, the current module name will be prepended to all description 82 | # unit titles (such as .. function::). 83 | # add_module_names = True 84 | 85 | # If true, sectionauthor and moduleauthor directives will be shown in the 86 | # output. They are ignored by default. 87 | # show_authors = False 88 | 89 | # The name of the Pygments (syntax highlighting) style to use. 90 | pygments_style = "sphinx" 91 | 92 | # A list of ignored prefixes for module index sorting. 93 | # modindex_common_prefix = [] 94 | 95 | # If true, keep warnings as "system message" paragraphs in the built documents. 96 | # keep_warnings = False 97 | 98 | 99 | # -- Options for HTML output ---------------------------------------------- 100 | 101 | # The theme to use for HTML and HTML Help pages. See the documentation for 102 | # a list of builtin themes. 103 | html_theme = "sphinx_rtd_theme" 104 | 105 | # Theme options are theme-specific and customize the look and feel of a theme 106 | # further. For a list of options available for each theme, see the 107 | # documentation. 108 | # html_theme_options = {} 109 | 110 | # Add any paths that contain custom themes here, relative to this directory. 111 | # html_theme_path = [] 112 | 113 | # The name for this set of Sphinx documents. If None, it defaults to 114 | # " v documentation". 115 | # html_title = None 116 | 117 | # A shorter title for the navigation bar. Default is the same as html_title. 118 | # html_short_title = None 119 | 120 | # The name of an image file (relative to this directory) to place at the top 121 | # of the sidebar. 122 | # html_logo = None 123 | 124 | # The name of an image file (within the static path) to use as favicon of the 125 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 126 | # pixels large. 127 | # html_favicon = None 128 | 129 | # Add any paths that contain custom static files (such as style sheets) here, 130 | # relative to this directory. They are copied after the builtin static files, 131 | # so a file named "default.css" will overwrite the builtin "default.css". 132 | # html_static_path = ["_static"] 133 | 134 | # Add any extra paths that contain custom files (such as robots.txt or 135 | # .htaccess) here, relative to this directory. These files are copied 136 | # directly to the root of the documentation. 137 | # html_extra_path = [] 138 | 139 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 140 | # using the given strftime format. 141 | # html_last_updated_fmt = '%b %d, %Y' 142 | 143 | # If true, SmartyPants will be used to convert quotes and dashes to 144 | # typographically correct entities. 145 | # html_use_smartypants = True 146 | 147 | # Custom sidebar templates, maps document names to template names. 148 | # html_sidebars = {} 149 | 150 | # Additional templates that should be rendered to pages, maps page names to 151 | # template names. 152 | # html_additional_pages = {} 153 | 154 | # If false, no module index is generated. 155 | # html_domain_indices = True 156 | 157 | # If false, no index is generated. 158 | # html_use_index = True 159 | 160 | # If true, the index is split into individual pages for each letter. 161 | # html_split_index = False 162 | 163 | # If true, links to the reST sources are added to the pages. 164 | # html_show_sourcelink = True 165 | 166 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 167 | # html_show_sphinx = True 168 | 169 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 170 | # html_show_copyright = True 171 | 172 | # If true, an OpenSearch description file will be output, and all pages will 173 | # contain a tag referring to it. The value of this option must be the 174 | # base URL from which the finished HTML is served. 175 | # html_use_opensearch = '' 176 | 177 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 178 | # html_file_suffix = None 179 | 180 | # Output file base name for HTML help builder. 181 | htmlhelp_basename = "bytecodedoc" 182 | 183 | 184 | # -- Options for LaTeX output --------------------------------------------- 185 | 186 | latex_elements = { 187 | # The paper size ('letterpaper' or 'a4paper'). 188 | #'papersize': 'letterpaper', 189 | # The font size ('10pt', '11pt' or '12pt'). 190 | #'pointsize': '10pt', 191 | # Additional stuff for the LaTeX preamble. 192 | #'preamble': '', 193 | } 194 | 195 | # Grouping the document tree into LaTeX files. List of tuples 196 | # (source start file, target name, title, 197 | # author, documentclass [howto, manual, or own class]). 198 | latex_documents = [ 199 | ("index", "bytecode.tex", "bytecode Documentation", "Victor Stinner", "manual"), 200 | ] 201 | 202 | # The name of an image file (relative to this directory) to place at the top of 203 | # the title page. 204 | # latex_logo = None 205 | 206 | # For "manual" documents, if this is true, then toplevel headings are parts, 207 | # not chapters. 208 | # latex_use_parts = False 209 | 210 | # If true, show page references after internal links. 211 | # latex_show_pagerefs = False 212 | 213 | # If true, show URL addresses after external links. 214 | # latex_show_urls = False 215 | 216 | # Documents to append as an appendix to all manuals. 217 | # latex_appendices = [] 218 | 219 | # If false, no module index is generated. 220 | # latex_domain_indices = True 221 | 222 | 223 | # -- Options for manual page output --------------------------------------- 224 | 225 | # One entry per manual page. List of tuples 226 | # (source start file, name, description, authors, manual section). 227 | man_pages = [ 228 | ( 229 | "index", 230 | "bytecode", 231 | "bytecode Documentation", 232 | ["Victor Stinner", "Matthieu C. Dartiailh"], 233 | 1, 234 | ) 235 | ] 236 | 237 | # If true, show URL addresses after external links. 238 | # man_show_urls = False 239 | 240 | 241 | # -- Options for Texinfo output ------------------------------------------- 242 | 243 | # Grouping the document tree into Texinfo files. List of tuples 244 | # (source start file, target name, title, author, 245 | # dir menu entry, description, category) 246 | texinfo_documents = [ 247 | ( 248 | "index", 249 | "bytecode", 250 | "bytecode Documentation", 251 | "Victor Stinner", 252 | "bytecode", 253 | "Python module to generate and modify bytecode", 254 | "Miscellaneous", 255 | ), 256 | ] 257 | 258 | # Documents to append as an appendix to all manuals. 259 | # texinfo_appendices = [] 260 | 261 | # If false, no module index is generated. 262 | # texinfo_domain_indices = True 263 | 264 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 265 | # texinfo_show_urls = 'footnote' 266 | 267 | # If true, do not generate a @detailmenu in the "Top" node's menu. 268 | # texinfo_no_detailmenu = False 269 | -------------------------------------------------------------------------------- /tests/long_lines_example.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # fmt: off 3 | def long_lines(): 4 | a = 1 5 | b = 1 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 6 | 7 | # Notice the huge space here to the next instruction (we want to hit some odd conditions 8 | # in the line table generation, which is why this sample file has such long lines). 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 885 | 886 | 887 | 888 | 889 | 890 | 891 | 892 | 893 | 894 | 895 | 896 | 897 | 898 | 899 | 900 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 909 | 910 | 911 | 912 | 913 | 914 | 915 | 916 | 917 | 918 | 919 | 920 | 921 | 922 | 923 | 924 | 925 | 926 | 927 | 928 | 929 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 937 | 938 | 939 | 940 | 941 | 942 | 943 | 944 | 945 | 946 | 947 | 948 | 949 | 950 | 951 | 952 | 953 | 954 | 955 | 956 | 957 | 958 | 959 | 960 | 961 | 962 | 963 | 964 | 965 | 966 | 967 | 968 | 969 | 970 | 971 | 972 | 973 | 974 | 975 | 976 | 977 | 978 | 979 | 980 | 981 | 982 | 983 | 984 | 985 | 986 | 987 | 988 | 989 | 990 | 991 | 992 | 993 | 994 | 995 | 996 | 997 | 998 | 999 | 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 1010 | 1011 | 1012 | 1013 | 1014 | 1015 | 1016 | 1017 | 1018 | 1019 | 1020 | 1021 | 1022 | 1023 | 1024 | 1025 | 1026 | 1027 | 1028 | 1029 | 1030 | 1031 | 1032 | 1033 | 1034 | 1035 | 1036 | 1037 | 1038 | 1039 | 1040 | 1041 | 1042 | 1043 | 1044 | 1045 | 1046 | 1047 | 1048 | c = 1 if b > 1 else 2 if b > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 1049 | 1050 | d = 1 if c > 1 else 2 if c > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 1051 | e = d + 1 1052 | return e 1053 | # fmt: on 1054 | -------------------------------------------------------------------------------- /src/bytecode/bytecode.py: -------------------------------------------------------------------------------- 1 | # alias to keep the 'bytecode' variable free 2 | import types 3 | from abc import abstractmethod 4 | from typing import ( 5 | Any, 6 | Dict, 7 | Generic, 8 | Iterator, 9 | List, 10 | Optional, 11 | Sequence, 12 | SupportsIndex, 13 | TypeVar, 14 | Union, 15 | overload, 16 | ) 17 | 18 | import bytecode as _bytecode 19 | from bytecode.flags import CompilerFlags, infer_flags 20 | from bytecode.instr import ( 21 | _UNSET, 22 | UNSET, 23 | BaseInstr, 24 | Instr, 25 | Label, 26 | SetLineno, 27 | TryBegin, 28 | TryEnd, 29 | ) 30 | 31 | 32 | class BaseBytecode: 33 | def __init__(self) -> None: 34 | self.argcount = 0 35 | self.posonlyargcount = 0 36 | self.kwonlyargcount = 0 37 | self.first_lineno = 1 38 | self.name = "" 39 | self.qualname = self.name 40 | self.filename = "" 41 | self.docstring: Union[str, None, _UNSET] = UNSET 42 | # We cannot recreate cellvars/freevars from instructions because of super() 43 | # special-case, which involves an implicit __class__ cell/free variable 44 | # We could try to detect it. 45 | # CPython itself breaks if one aliases super so we could maybe make it work 46 | # but it will require careful design and will be done later in the future. 47 | self.cellvars: List[str] = [] 48 | self.freevars: List[str] = [] 49 | self._flags: CompilerFlags = CompilerFlags(0) 50 | 51 | def _copy_attr_from(self, bytecode: "BaseBytecode") -> None: 52 | self.argcount = bytecode.argcount 53 | self.posonlyargcount = bytecode.posonlyargcount 54 | self.kwonlyargcount = bytecode.kwonlyargcount 55 | self.flags = bytecode.flags 56 | self.first_lineno = bytecode.first_lineno 57 | self.name = bytecode.name 58 | self.qualname = bytecode.qualname 59 | self.filename = bytecode.filename 60 | self.docstring = bytecode.docstring 61 | self.cellvars = list(bytecode.cellvars) 62 | self.freevars = list(bytecode.freevars) 63 | 64 | def __eq__(self, other: Any) -> bool: 65 | if type(self) is not type(other): 66 | return False 67 | 68 | if self.argcount != other.argcount: 69 | return False 70 | if self.posonlyargcount != other.posonlyargcount: 71 | return False 72 | if self.kwonlyargcount != other.kwonlyargcount: 73 | return False 74 | if self.flags != other.flags: 75 | return False 76 | if self.first_lineno != other.first_lineno: 77 | return False 78 | if self.filename != other.filename: 79 | return False 80 | if self.name != other.name: 81 | return False 82 | if self.qualname != other.qualname: 83 | return False 84 | if self.docstring != other.docstring: 85 | return False 86 | if self.cellvars != other.cellvars: 87 | return False 88 | if self.freevars != other.freevars: 89 | return False 90 | if self.compute_stacksize() != other.compute_stacksize(): 91 | return False 92 | 93 | return True 94 | 95 | @property 96 | def flags(self) -> CompilerFlags: 97 | return self._flags 98 | 99 | @flags.setter 100 | def flags(self, value: CompilerFlags) -> None: 101 | if not isinstance(value, CompilerFlags): 102 | value = CompilerFlags(value) 103 | self._flags = value 104 | 105 | def update_flags(self, *, is_async: Optional[bool] = None) -> None: 106 | # infer_flags reasonably only accept concrete subclasses 107 | self.flags = infer_flags(self, is_async) # type: ignore 108 | 109 | @abstractmethod 110 | def compute_stacksize(self, *, check_pre_and_post: bool = True) -> int: 111 | raise NotImplementedError 112 | 113 | 114 | T = TypeVar("T", bound="_BaseBytecodeList") 115 | U = TypeVar("U") 116 | 117 | 118 | class _BaseBytecodeList(BaseBytecode, list, Generic[U]): 119 | """List subclass providing type stable slicing and copying.""" 120 | 121 | @overload 122 | def __getitem__(self, index: SupportsIndex) -> U: ... 123 | 124 | @overload 125 | def __getitem__(self: T, index: slice) -> T: ... 126 | 127 | def __getitem__(self, index): 128 | value = super().__getitem__(index) 129 | if isinstance(index, slice): 130 | value = type(self)(value) 131 | value._copy_attr_from(self) 132 | 133 | return value 134 | 135 | def copy(self: T) -> T: 136 | # This is a list subclass and works 137 | new = type(self)(super().copy()) # type: ignore 138 | new._copy_attr_from(self) 139 | return new 140 | 141 | def legalize(self) -> None: 142 | """Check that all the element of the list are valid and remove SetLineno.""" 143 | lineno_pos = [] 144 | set_lineno = None 145 | current_lineno = self.first_lineno 146 | 147 | for pos, instr in enumerate(self): 148 | if isinstance(instr, SetLineno): 149 | set_lineno = instr.lineno 150 | lineno_pos.append(pos) 151 | continue 152 | # Filter out other pseudo instructions 153 | if not isinstance(instr, BaseInstr): 154 | continue 155 | if set_lineno is not None: 156 | instr.lineno = set_lineno 157 | elif instr.lineno is UNSET: 158 | instr.lineno = current_lineno 159 | elif instr.lineno is not None: 160 | current_lineno = instr.lineno 161 | 162 | for i in reversed(lineno_pos): 163 | del self[i] 164 | 165 | def __iter__(self) -> Iterator[U]: 166 | instructions = super().__iter__() 167 | for instr in instructions: 168 | self._check_instr(instr) 169 | yield instr 170 | 171 | def _check_instr(self, instr): 172 | raise NotImplementedError() 173 | 174 | 175 | V = TypeVar("V") 176 | 177 | 178 | class _InstrList(List[V]): 179 | # Providing a stricter typing for this helper whose use is limited to the __eq__ 180 | # implementation is more effort than it is worth. 181 | def _flat(self) -> List: 182 | instructions: List = [] 183 | labels = {} 184 | jumps = [] 185 | try_begins: Dict[TryBegin, int] = {} 186 | try_jumps = [] 187 | 188 | offset = 0 189 | instr: Any 190 | for index, instr in enumerate(self): 191 | if isinstance(instr, Label): 192 | instructions.append("label_instr%s" % index) 193 | labels[instr] = offset 194 | elif isinstance(instr, TryBegin): 195 | try_begins.setdefault(instr, len(try_begins)) 196 | assert isinstance(instr.target, Label) 197 | try_jumps.append((instr.target, len(instructions))) 198 | instructions.append(instr) 199 | elif isinstance(instr, TryEnd): 200 | instructions.append(("TryEnd", try_begins[instr.entry])) 201 | else: 202 | if isinstance(instr, Instr) and isinstance(instr.arg, Label): 203 | target_label = instr.arg 204 | instr = _bytecode.ConcreteInstr( 205 | instr.name, 0, location=instr.location 206 | ) 207 | jumps.append((target_label, instr)) 208 | instructions.append(instr) 209 | offset += 1 210 | 211 | for target_label, instr in jumps: 212 | instr.arg = labels[target_label] 213 | 214 | for target_label, index in try_jumps: 215 | instr = instructions[index] 216 | assert isinstance(instr, TryBegin) 217 | instructions[index] = ( 218 | "TryBegin", 219 | try_begins[instr], 220 | labels[target_label], 221 | instr.push_lasti, 222 | ) 223 | 224 | return instructions 225 | 226 | def __eq__(self, other: Any) -> bool: 227 | if not isinstance(other, _InstrList): 228 | other = _InstrList(other) 229 | 230 | return self._flat() == other._flat() 231 | 232 | 233 | class Bytecode( 234 | _InstrList[Union[Instr, Label, TryBegin, TryEnd, SetLineno]], 235 | _BaseBytecodeList[Union[Instr, Label, TryBegin, TryEnd, SetLineno]], 236 | ): 237 | def __init__( 238 | self, 239 | instructions: Sequence[Union[Instr, Label, TryBegin, TryEnd, SetLineno]] = (), 240 | ) -> None: 241 | BaseBytecode.__init__(self) 242 | self.argnames: List[str] = [] 243 | for instr in instructions: 244 | self._check_instr(instr) 245 | self.extend(instructions) 246 | 247 | def __iter__(self) -> Iterator[Union[Instr, Label, TryBegin, TryEnd, SetLineno]]: 248 | instructions = super().__iter__() 249 | seen_try_begin = False 250 | for instr in instructions: 251 | self._check_instr(instr) 252 | if isinstance(instr, TryBegin): 253 | if seen_try_begin: 254 | raise RuntimeError("TryBegin pseudo instructions cannot be nested.") 255 | seen_try_begin = True 256 | elif isinstance(instr, TryEnd): 257 | seen_try_begin = False 258 | yield instr 259 | 260 | def _check_instr(self, instr: Any) -> None: 261 | if not isinstance(instr, (Label, SetLineno, Instr, TryBegin, TryEnd)): 262 | raise ValueError( 263 | "Bytecode must only contain Label, " 264 | "SetLineno, and Instr objects, " 265 | "but %s was found" % type(instr).__name__ 266 | ) 267 | 268 | def _copy_attr_from(self, bytecode: BaseBytecode) -> None: 269 | super()._copy_attr_from(bytecode) 270 | if isinstance(bytecode, Bytecode): 271 | self.argnames = bytecode.argnames 272 | 273 | @staticmethod 274 | def from_code( 275 | code: types.CodeType, 276 | prune_caches: bool = True, 277 | conserve_exception_block_stackdepth: bool = False, 278 | ) -> "Bytecode": 279 | concrete = _bytecode.ConcreteBytecode.from_code(code) 280 | return concrete.to_bytecode( 281 | prune_caches=prune_caches, 282 | conserve_exception_block_stackdepth=conserve_exception_block_stackdepth, 283 | ) 284 | 285 | def compute_stacksize(self, *, check_pre_and_post: bool = True) -> int: 286 | cfg = _bytecode.ControlFlowGraph.from_bytecode(self) 287 | return cfg.compute_stacksize(check_pre_and_post=check_pre_and_post) 288 | 289 | def to_code( 290 | self, 291 | compute_jumps_passes: Optional[int] = None, 292 | stacksize: Optional[int] = None, 293 | *, 294 | check_pre_and_post: bool = True, 295 | compute_exception_stack_depths: bool = True, 296 | ) -> types.CodeType: 297 | # Prevent reconverting the concrete bytecode to bytecode and cfg to do the 298 | # calculation if we need to do it. 299 | if stacksize is None or compute_exception_stack_depths: 300 | cfg = _bytecode.ControlFlowGraph.from_bytecode(self) 301 | stacksize = cfg.compute_stacksize( 302 | check_pre_and_post=check_pre_and_post, 303 | compute_exception_stack_depths=compute_exception_stack_depths, 304 | ) 305 | self = cfg.to_bytecode() 306 | compute_exception_stack_depths = False # avoid redoing everything 307 | bc = self.to_concrete_bytecode( 308 | compute_jumps_passes=compute_jumps_passes, 309 | compute_exception_stack_depths=compute_exception_stack_depths, 310 | ) 311 | return bc.to_code( 312 | stacksize=stacksize, 313 | compute_exception_stack_depths=compute_exception_stack_depths, 314 | ) 315 | 316 | def to_concrete_bytecode( 317 | self, 318 | compute_jumps_passes: Optional[int] = None, 319 | compute_exception_stack_depths: bool = True, 320 | ) -> "_bytecode.ConcreteBytecode": 321 | converter = _bytecode._ConvertBytecodeToConcrete(self) 322 | return converter.to_concrete_bytecode( 323 | compute_jumps_passes=compute_jumps_passes, 324 | compute_exception_stack_depths=compute_exception_stack_depths, 325 | ) 326 | -------------------------------------------------------------------------------- /doc/changelog.rst: -------------------------------------------------------------------------------- 1 | ChangeLog 2 | ========= 3 | 4 | unreleased: Version 0.18.0 5 | -------------------------- 6 | 7 | - drop support for Python 3.9 and 3.10 PR #180 8 | 9 | Bugfixes: 10 | 11 | - Fix handling of END_ASYNC_FOR which is a backward jump PR #179 12 | 13 | 03-09-2025: Version 0.17.0 14 | -------------------------- 15 | 16 | New features: 17 | 18 | - Add support for Python 3.14 PR #166 19 | 20 | Support for Python 3.14, comes with a number of changes reflecting changes in 21 | CPython bytecode itself: 22 | 23 | - introduced an enum for BINARY_OP argument which now supports subscribe. 24 | When disassembling the enum is always used, when creating bytecode from 25 | scratch integer values are coerced into the right enum member. 26 | - support BUILD_TEMPLATE, BUILD_INTERPOLATION, LOAD_SMALL_INT, LOAD_FAST_BORROW 27 | and LOAD_FAST_BORROW_LOAD_FAST_BORROW 28 | - LOAD_COMMON_CONSTANT, LOAD_SPECIAL whose argument is described using dedicated 29 | enums CommonConstant, SpecialMethod 30 | - CONVERT_VALUE (FORMAT_VALUE in Python < 3.13) now use the FormatValue enum. 31 | When disassembling the enum is always used, when creating bytecode from 32 | scratch integer values are coerced into the right enum member. 33 | 34 | Bugfixes: 35 | 36 | - properly set the next_block attribute of the new block created by 37 | ControlFlowGraph.split_block. PR #170 38 | 39 | 2025-04-14: Version 0.16.2 40 | -------------------------- 41 | 42 | Bugfixes: 43 | 44 | - fix ControlFlowGraph dead block detection by accounting for fall-through 45 | edges. PR #161 46 | 47 | 2025-01-21: Version 0.16.1 48 | -------------------------- 49 | 50 | Bugfixes: 51 | 52 | - fix flag inference for async code PR #157 53 | 54 | 55 | 2024-10-28: Version 0.16.0 56 | -------------------------- 57 | 58 | New features: 59 | 60 | - Add support for Python 3.13 PR #146 61 | 62 | Support for Python 3.13, comes with a number of changes reflecting changes in 63 | CPython bytecode itself: 64 | 65 | - handle the ability of comparison to cast with new enum variants: 66 | LT_CAST, LE_CAST, etc 67 | - allow LOAD_FAST to access free and cell vars 68 | 69 | Bugfixes: 70 | 71 | - Properly handle TryEnd with no matching TryBegin in stack size computation on 72 | the CFG PR #149 73 | - Ensure that empty or small (one-instruction) try blocks are handled without 74 | problems when compiling and de-compiling abstract code for CPython 3.11 and 75 | later. PR #145 76 | 77 | 2023-10-13: Version 0.15.1 78 | -------------------------- 79 | 80 | Bugfixes: 81 | 82 | - Disallow creating an instruction targeting a pseudo/instrumented opcode PR #133 83 | - Fixes encoding of 0 as a varint PR #132 84 | - Correct spelling of "INTRINSIC" in several places; this affected 85 | some ops in Python 3.12. PR #131 86 | 87 | 2023-09-01: Version 0.15.0 88 | -------------------------- 89 | 90 | New features: 91 | 92 | - Add support for Python 3.12 PR #122 93 | 94 | Support for Python 3.12, comes with a number of changes reflecting changes in 95 | CPython bytecode itself: 96 | 97 | - handle the ability of ``LOAD_ATTR`` to replace ``LOAD_METHOD`` 98 | As a consequence the argument is now a ``tuple[bool, str]`` 99 | - similarly ``LOAD_SUPER_ATTR`` which uses the 2 lowest bits as flag takes 100 | a ``tuple[bool, bool, str]`` as argument 101 | - ``POP_JUMP_IF_*`` instructions are undirected in Python 3.12 102 | - ``YIELD_VALUE`` now takes an argument 103 | - Support for ``CALL_INTRINSIC_1/2`` led to the addition of 2 new enums to 104 | represent the argument 105 | 106 | 2023-05-24: Version 0.14.2 107 | -------------------------- 108 | 109 | Bugfixes: 110 | 111 | - allow to convert a CFG, for which stack sizes have not been computed, to Bytecode 112 | even in the presence of mergeable TryBegin/TryEnd PR #120 113 | - remove spurious TryEnd leftover when going from CFG to Bytecode PR #120 114 | 115 | 116 | 2023-04-04: Version 0.14.1 117 | -------------------------- 118 | 119 | Bugfixes: 120 | 121 | - allow to disassemble code containing ``EXTENDED_ARG`` targeting a ``NOP`` PR #117 122 | 123 | 124 | 2022-11-30: Version 0.14.0 125 | -------------------------- 126 | 127 | New features: 128 | 129 | - Removed the peephole optimizer PR #107 130 | 131 | Basically changes in Python 3.11 made it hard to port and the maintenance cost 132 | exceeded the perceived use. It could be re-added if there is a demand for it. 133 | 134 | - Add support for Python 3.11 PR #107 135 | 136 | Support for Python 3.11, comes with a number of changes reflecting changes in 137 | CPython bytecode itself: 138 | 139 | - support for the exception table in ``ConcreteBytecode`` 140 | - support for pseudo-instruction ``TryBegin`` and ``TryEnd`` describing the 141 | exception table in ``Bytecode`` and ``ControlflowGraph`` 142 | - new keyword arguments in conversion method related to computations required 143 | for the exception table 144 | - handling of CACHE opcode at the ``ConcreteBytecode`` level 145 | - handling of the ability of ``LOAD_GLOBAL`` to push NULL (the argument is 146 | now a ``tuple[bool, str]``) 147 | - support for end_lineno and column offsets in instructions 148 | - support for ``co_qualname`` (as ``qualname`` on bytecode objects) 149 | 150 | and a number of internal changes related to changes in the internal bytecode 151 | representation. 152 | 153 | - Add type annotations and make types stricter PR # 105 154 | In particular, ConcreteInstr does not inherit from Instr anymore and one 155 | cannot use ConcreteInstr in Bytecode object. This is saner than before. 156 | 157 | Bugfixes: 158 | 159 | - Removed ``EXC_MATCH`` from the ``Compare`` enumeration starting with Python 160 | 3.9. The new ``JUMP_IF_NOT_EXC_MATCH`` opcode should be used instead. 161 | 162 | - Removed ``IN``, ``NOT_IN``, ``IS``, ``NOT_IS`` from the ``Compare`` 163 | enumeration starting with Python 3.9. The new ``CONTAINS_OP`` and ``IS_OP`` 164 | opcodes should be used instead. 165 | 166 | - Add proper pre and post stack effects to all opcodes (up to Python 3.11) 167 | PR #106 #107 168 | 169 | Maintenance: 170 | 171 | - Make the install process PEP517 compliant PR #97 172 | - Drop support for Python 3.6 and 3.7 PR #100 173 | 174 | 175 | 2021-10-04: Version 0.13.0 176 | -------------------------- 177 | 178 | New features: 179 | 180 | - Add support for Python 3.10 new encoding of line number. This support is 181 | minimal in the sense that we still systematically assign a line number 182 | while the new format allow bytecode with absolutely no line number. PR #72 183 | 184 | 185 | Bugfixes: 186 | 187 | - Fix handling of RERAISE (introduced in 3.9) when creating a ControlFlowGraph, 188 | previously it was not considered final. PR #72 189 | 190 | - Fix line table assembly in Python 3.10. PR #85 191 | 192 | 193 | 2021-02-02: Version 0.12.0 194 | -------------------------- 195 | 196 | New features: 197 | 198 | - All calculations of stacksize now check for stack underflow to avoid segfault at 199 | runtime PR #69 200 | 201 | Bugfixes: 202 | 203 | - Fix recursion limitations when compiling bytecode with numerous basic 204 | blocks. PR #57 205 | - Fix handling of line offsets. Issue #67, PR #71 206 | 207 | API changes: 208 | 209 | - Forbid an :class:`Instr` to hold an EXTENDED_ARG op_code PR #65 210 | - Forbid the use of :class:`ConcreteInstr` in :class:`Bytecode` and 211 | :class:`ControlFlowGraph` PR #65 212 | This is motivated by the extra complexity that handling possible EXTENDED_ARG 213 | instruction in those representation would bring (stack computation, etc) 214 | - Always remove EXTENDED_ARG when converting :class:`ConcreteBytecode` to 215 | :class:`Bytecode` PR #65 216 | This is equivalent to say that the :class:`ConcreteBytecode` converted to 217 | :class:`Bytecode` was generated by :meth:`ConcreteBytecode.from_code` 218 | with extended_args=False 219 | - :class:`Instr` now has a new method :meth:`Instr.pre_and_post_stack_effect` 220 | for checking the prerequisite stack size of an operation PR #69 221 | - :meth:`_compute_stack_size` now uses :meth:`Instr.pre_and_post_stack_effect` 222 | to compute the stack size to reject code that will lead to runtime segfault 223 | caused by stack underflow PR #69 224 | 225 | 226 | 2020-03-02: Version 0.11.0 227 | -------------------------- 228 | 229 | New features: 230 | 231 | - The :func:`infer_flags` can now be used to forcibly mark a function as 232 | asynchronous or not. 233 | 234 | Bugfixes: 235 | 236 | - Fix a design flaw in the flag inference mechanism that could very easily 237 | lead to invalid flags configuration PR #56 238 | 239 | 240 | 2020-02-02: Version 0.10.0 241 | -------------------------- 242 | 243 | New features: 244 | 245 | - Slices and copy of :class:`Bytecode`, :class:`ConcreteBytecode` and 246 | :class:`BasicBlock` are now of the same type as the original container. PR #52 247 | - :class:`Bytecode`, :class:`ConcreteBytecode`, :class:`BasicBlock` and 248 | :class:`ControlFlowGraph` have a new :meth:`legalize` method validating 249 | their content and removing SetLineno. PR #52 250 | - Modify the implementation of :code:`const_key` to avoid manual 251 | synchronizations with :code:`_PyCode_ConstantKey` in CPython codebase and 252 | allow the use of arbitrary Python objects as constants of nested code 253 | objects. #54 254 | 255 | API changes: 256 | 257 | - Add :class:`Compare` enum to public API. PR #53 258 | 259 | 260 | 2019-12-01: Version 0.9.0 261 | ------------------------- 262 | 263 | New features: 264 | 265 | - Add support for released version of Python 3.8 and update documentation. 266 | 267 | 268 | 2019-02-18: Version 0.8.0 269 | ------------------------- 270 | 271 | New features: 272 | 273 | - Add support for Python 3.7 PR #29 274 | - Add preliminary support for Python 3.8-dev PR #41 275 | - Allow to use any Python object as constants to enable aggressive 276 | optimizations PR #34 277 | 278 | API changes: 279 | 280 | - `stack_effect` is now a method of :class:`Instr` and not as property anymore. PR #29 281 | 282 | Bugfixes: 283 | 284 | - Avoid throwing `OverflowError` when applying `stack_effect` on valid :class:`Instr` 285 | objects. PR #43, PR #44 286 | 287 | 288 | 2018-04-15: Version 0.7.0 289 | ------------------------- 290 | 291 | New features: 292 | 293 | - Add `compute_jumps_passes` optional argument to :meth:`Bytecode.to_code` and 294 | to :meth:`Bytecode.to_concrete_bytecode` to control the number of passes 295 | performed to compute jump targets. In theory the required number is only 296 | bounded by the size of the code, but usually the algorithm converges quickly 297 | (< 10 iterations). 298 | 299 | Bugfixes: 300 | 301 | - proper handling of `EXTENDED_ARG` without arguments PR #28: 302 | 303 | `EXTENDED_ARG` are once again removed but their presence is recorded to avoid 304 | having issues with offsets in jumps. Similarly when round tripping code 305 | through :class:`ConcreteBytecode` the `EXTENDED_ARG` without args are 306 | preserved while if going through :class:`Bytecode` they are removed. 307 | 308 | 309 | 2018-03-24: Version 0.6 310 | ----------------------- 311 | 312 | * Add stack depth computation based on control flow graph analysis 313 | * Add higher level flags handling using IntFlags enum and inference function 314 | * Add an instructions argument to ConcreteBytecode, and validate its value 315 | * Do not delete `EXTENDED_ARG` instructions that have no arg 316 | 317 | 318 | 2017-01-05: Version 0.5 319 | ----------------------- 320 | 321 | * Add the new bytecode format of Python 3.6. 322 | * Remove the ``BaseInstr`` class which became useless. It was replaced 323 | with the :class:`Instr` class. 324 | * Documentation: Add a comparison with byteplay and codetransformer. 325 | * Remove the BaseIntr class: Instr becomes the new base class. 326 | * Fix PEP 8 issues and check PEP 8 on Travis CI. 327 | 328 | 329 | 2016-04-12: Version 0.4 330 | ----------------------- 331 | 332 | Peephole optimizer: 333 | 334 | * Reenable optimization on ``JUMP_IF_TRUE_OR_POP`` jumping to 335 | ``POP_JUMP_IF_FALSE ``. 336 | 337 | 338 | 2016-03-02: Version 0.3 339 | ----------------------- 340 | 341 | New features: 342 | 343 | - Add :meth:`ControlFlowGraph.get_block_index` method 344 | 345 | API changes: 346 | 347 | - Rename ``Block`` class to :class:`BasicBlock` 348 | - Rename ``BytecodeBlocks`` class to :class:`ControlFlowGraph` 349 | - Rename ``BaseInstr.op`` to :attr:`BaseInstr.opcode` 350 | - Rename ``BaseBytecode.kw_only_argcount`` attribute to 351 | :attr:`BaseBytecode.kwonlyargcount`, name closer to the Python code object 352 | attribute (``co_kwonlyargcount``) 353 | - :class:`Instr` constructor and its :meth:`~BaseInstr.set` method now 354 | validates the argument type 355 | - Add :class:`Compare` enum, used for ``COMPARE_OP`` argument of :class:`Instr` 356 | - Remove *lineno* parameter from the :meth:`BaseInstr.set` method 357 | - Add :class:`CellVar` and :class:`FreeVar` classes: instructions having 358 | a cell or free variable now require a :class:`CellVar` or :class:`FreeVar` 359 | instance rather than a simple string (``str``). This change is required 360 | to handle correctly code with duplicated variable names in cell and free 361 | variables. 362 | - :class:`ControlFlowGraph`: remove undocumented ``to_concrete_bytecode()`` 363 | and ``to_code()`` methods 364 | 365 | Bugfixes: 366 | 367 | - Fix support of :class:`SetLineno` 368 | 369 | Peephole optimizer: 370 | 371 | - Better code for LOAD_CONST x n + BUILD_LIST + UNPACK_SEQUENCE: rewrite 372 | LOAD_CONST in the reverse order instead of using ROT_TWO and ROT_THREE. 373 | This optimization supports more than 3 items. 374 | - Remove JUMP_ABSOLUTE pointing to the following code. It can occur 375 | after dead code was removed. 376 | - Remove NOP instructions 377 | - Bugfix: catch IndexError when trying to get the next instruction. 378 | 379 | 380 | 2016-02-29: Version 0.2 381 | ----------------------- 382 | 383 | - Again, the API is deeply reworked. 384 | - The project has now a documentation: 385 | `bytecode documentation `_ 386 | - Fix bug #1: support jumps larger than 2^16. 387 | - Add a new bytecode.peephole_opt module: a peephole 388 | optimizer, code based on peephole optimizer of CPython 3.6 which is 389 | implemented in C 390 | - Add :func:`dump_bytecode` function to ease debug. 391 | - :class:`Instr`: 392 | 393 | * Add :func:`Instr.is_final` method 394 | * Add :meth:`Instr.copy` and :meth:`ConcreteInstr.copy` methods 395 | * :class:`Instr` now uses variable name instead of integer for cell and 396 | free variables. 397 | * Rename ``Instr.is_jump`` to :meth:`Instr.has_jump` 398 | 399 | 400 | - :class:`ConcreteInstr` is now mutable 401 | - Redesign the :class:`BytecodeBlocks` class: 402 | 403 | - :class:`Block` have no more label attribute: jump targets are now 404 | directly blocks 405 | - Rename ``BytecodeBlocks.add_label()`` method to 406 | :meth:`BytecodeBlocks.split_block` 407 | - Labels are not more allowed in blocks 408 | - :meth:`BytecodeBlocks.from_bytecode` now splits blocks after final 409 | instructions (:meth:`Instr.is_final`) and after conditional jumps 410 | (:meth:`Instr.is_cond_jump`). It helps the peephole optimizer to 411 | respect the control flow and to remove dead code. 412 | 413 | - Rework API to convert bytecode classes: 414 | 415 | - BytecodeBlocks: Remove ``to_concrete_bytecode()`` and ``to_code()`` 416 | methods. Now you first have to convert blocks to bytecode using 417 | :meth:`~BytecodeBlocks.to_bytecode`. 418 | - Remove ``Bytecode.to_bytecode_blocks()`` method, replaced with 419 | :meth:`BytecodeBlocks.from_bytecode` 420 | - Remove ``ConcreteBytecode.to_concrete_bytecode()`` and 421 | ``Bytecode.to_bytecode()`` methods which did nothing (return ``self``) 422 | 423 | - Fix :class:`ConcreteBytecode` for code with no constant (empty list of 424 | constants) 425 | - Fix argnames in :meth:`ConcreteBytecode.to_bytecode`: use CO_VARARGS and 426 | CO_VARKEYWORDS flags to count the number of arguments 427 | - Fix const_key() to compare correctly constants equal but of different types 428 | and special cases like ``-0.0`` and ``+0.0`` 429 | 430 | 431 | 2016-02-26: Version 0.1 432 | ----------------------- 433 | 434 | - Rewrite completely the API! 435 | 436 | 437 | 2016-02-23: Release 0.0 438 | ----------------------- 439 | 440 | - First public release 441 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import contextlib 3 | import io 4 | import sys 5 | import textwrap 6 | import unittest 7 | 8 | import bytecode 9 | from bytecode import BasicBlock, Bytecode, ControlFlowGraph, Instr, Label 10 | from bytecode.utils import PY312, PY313, PY314 11 | 12 | from . import disassemble 13 | 14 | 15 | class DumpCodeTests(unittest.TestCase): 16 | maxDiff = 80 * 100 17 | 18 | def check_dump_bytecode(self, code, expected, lineno=None): 19 | with contextlib.redirect_stdout(io.StringIO()) as stderr: 20 | if lineno is not None: 21 | bytecode.dump_bytecode(code, lineno=True) 22 | else: 23 | bytecode.dump_bytecode(code) 24 | output = stderr.getvalue() 25 | 26 | self.assertMultiLineEqual(output, expected) 27 | 28 | def test_bytecode(self): 29 | source = """ 30 | def func(test): 31 | if test == 1: 32 | return 1 33 | elif test == 2: 34 | return 2 35 | return 3 36 | """ 37 | code = disassemble(source, function=True) 38 | 39 | # without line numbers 40 | enum_repr = "" if PY313 else "" 41 | if PY314: 42 | expected = f""" 43 | RESUME 0 44 | LOAD_FAST_BORROW 'test' 45 | LOAD_SMALL_INT 1 46 | COMPARE_OP {enum_repr} 47 | POP_JUMP_IF_FALSE 48 | NOT_TAKEN 49 | LOAD_SMALL_INT 1 50 | RETURN_VALUE 51 | 52 | label_instr8: 53 | LOAD_FAST_BORROW 'test' 54 | LOAD_SMALL_INT 2 55 | COMPARE_OP {enum_repr} 56 | POP_JUMP_IF_FALSE 57 | NOT_TAKEN 58 | LOAD_SMALL_INT 2 59 | RETURN_VALUE 60 | 61 | label_instr16: 62 | LOAD_SMALL_INT 3 63 | RETURN_VALUE 64 | 65 | """ 66 | elif PY312: 67 | expected = f""" 68 | RESUME 0 69 | LOAD_FAST 'test' 70 | LOAD_CONST 1 71 | COMPARE_OP {enum_repr} 72 | POP_JUMP_IF_FALSE 73 | RETURN_CONST 1 74 | 75 | label_instr6: 76 | LOAD_FAST 'test' 77 | LOAD_CONST 2 78 | COMPARE_OP {enum_repr} 79 | POP_JUMP_IF_FALSE 80 | RETURN_CONST 2 81 | 82 | label_instr12: 83 | RETURN_CONST 3 84 | 85 | """ 86 | else: 87 | expected = f""" 88 | RESUME 0 89 | LOAD_FAST 'test' 90 | LOAD_CONST 1 91 | COMPARE_OP {enum_repr} 92 | POP_JUMP_FORWARD_IF_FALSE 93 | LOAD_CONST 1 94 | RETURN_VALUE 95 | 96 | label_instr7: 97 | LOAD_FAST 'test' 98 | LOAD_CONST 2 99 | COMPARE_OP {enum_repr} 100 | POP_JUMP_FORWARD_IF_FALSE 101 | LOAD_CONST 2 102 | RETURN_VALUE 103 | 104 | label_instr14: 105 | LOAD_CONST 3 106 | RETURN_VALUE 107 | 108 | """ 109 | self.check_dump_bytecode(code, expected[1:].rstrip(" ")) 110 | 111 | # with line numbers 112 | if PY314: 113 | expected = f""" 114 | L. 1 0: RESUME 0 115 | L. 2 1: LOAD_FAST_BORROW 'test' 116 | 2: LOAD_SMALL_INT 1 117 | 3: COMPARE_OP {enum_repr} 118 | 4: POP_JUMP_IF_FALSE 119 | 5: NOT_TAKEN 120 | L. 3 6: LOAD_SMALL_INT 1 121 | 7: RETURN_VALUE 122 | 123 | label_instr8: 124 | L. 4 9: LOAD_FAST_BORROW 'test' 125 | 10: LOAD_SMALL_INT 2 126 | 11: COMPARE_OP {enum_repr} 127 | 12: POP_JUMP_IF_FALSE 128 | 13: NOT_TAKEN 129 | L. 5 14: LOAD_SMALL_INT 2 130 | 15: RETURN_VALUE 131 | 132 | label_instr16: 133 | L. 6 17: LOAD_SMALL_INT 3 134 | 18: RETURN_VALUE 135 | 136 | """ 137 | elif PY312: 138 | expected = f""" 139 | L. 1 0: RESUME 0 140 | L. 2 1: LOAD_FAST 'test' 141 | 2: LOAD_CONST 1 142 | 3: COMPARE_OP {enum_repr} 143 | 4: POP_JUMP_IF_FALSE 144 | L. 3 5: RETURN_CONST 1 145 | 146 | label_instr6: 147 | L. 4 7: LOAD_FAST 'test' 148 | 8: LOAD_CONST 2 149 | 9: COMPARE_OP {enum_repr} 150 | 10: POP_JUMP_IF_FALSE 151 | L. 5 11: RETURN_CONST 2 152 | 153 | label_instr12: 154 | L. 6 13: RETURN_CONST 3 155 | 156 | """ 157 | else: 158 | expected = f""" 159 | L. 1 0: RESUME 0 160 | L. 2 1: LOAD_FAST 'test' 161 | 2: LOAD_CONST 1 162 | 3: COMPARE_OP {enum_repr} 163 | 4: POP_JUMP_FORWARD_IF_FALSE 164 | L. 3 5: LOAD_CONST 1 165 | 6: RETURN_VALUE 166 | 167 | label_instr7: 168 | L. 4 8: LOAD_FAST 'test' 169 | 9: LOAD_CONST 2 170 | 10: COMPARE_OP {enum_repr} 171 | 11: POP_JUMP_FORWARD_IF_FALSE 172 | L. 5 12: LOAD_CONST 2 173 | 13: RETURN_VALUE 174 | 175 | label_instr14: 176 | L. 6 15: LOAD_CONST 3 177 | 16: RETURN_VALUE 178 | 179 | """ 180 | self.check_dump_bytecode(code, expected[1:].rstrip(" "), lineno=True) 181 | 182 | def test_bytecode_broken_label(self): 183 | label = Label() 184 | code = Bytecode([Instr("JUMP_FORWARD", label)]) 185 | 186 | expected = " JUMP_FORWARD \n\n" 187 | self.check_dump_bytecode(code, expected) 188 | 189 | def test_blocks_broken_jump(self): 190 | block = BasicBlock() 191 | code = ControlFlowGraph() 192 | code[0].append(Instr("JUMP_FORWARD", block)) 193 | 194 | expected = textwrap.dedent( 195 | """ 196 | block1: 197 | JUMP_FORWARD 198 | 199 | """ 200 | ).lstrip("\n") 201 | self.check_dump_bytecode(code, expected) 202 | 203 | def test_bytecode_blocks(self): 204 | source = """ 205 | def func(test): 206 | if test == 1: 207 | return 1 208 | elif test == 2: 209 | return 2 210 | return 3 211 | """ 212 | code = disassemble(source, function=True) 213 | code = ControlFlowGraph.from_bytecode(code) 214 | 215 | # without line numbers 216 | enum_repr = "" if PY313 else "" 217 | if PY314: 218 | expected = textwrap.dedent( 219 | f""" 220 | block1: 221 | RESUME 0 222 | LOAD_FAST_BORROW 'test' 223 | LOAD_SMALL_INT 1 224 | COMPARE_OP {enum_repr} 225 | POP_JUMP_IF_FALSE 226 | -> block2 227 | 228 | block2: 229 | NOT_TAKEN 230 | LOAD_SMALL_INT 1 231 | RETURN_VALUE 232 | 233 | block3: 234 | LOAD_FAST_BORROW 'test' 235 | LOAD_SMALL_INT 2 236 | COMPARE_OP {enum_repr} 237 | POP_JUMP_IF_FALSE 238 | -> block4 239 | 240 | block4: 241 | NOT_TAKEN 242 | LOAD_SMALL_INT 2 243 | RETURN_VALUE 244 | 245 | block5: 246 | LOAD_SMALL_INT 3 247 | RETURN_VALUE 248 | 249 | """ 250 | ) 251 | elif PY312: 252 | expected = textwrap.dedent( 253 | f""" 254 | block1: 255 | RESUME 0 256 | LOAD_FAST 'test' 257 | LOAD_CONST 1 258 | COMPARE_OP {enum_repr} 259 | POP_JUMP_IF_FALSE 260 | -> block2 261 | 262 | block2: 263 | RETURN_CONST 1 264 | 265 | block3: 266 | LOAD_FAST 'test' 267 | LOAD_CONST 2 268 | COMPARE_OP {enum_repr} 269 | POP_JUMP_IF_FALSE 270 | -> block4 271 | 272 | block4: 273 | RETURN_CONST 2 274 | 275 | block5: 276 | RETURN_CONST 3 277 | 278 | """ 279 | ) 280 | else: 281 | expected = textwrap.dedent( 282 | f""" 283 | block1: 284 | RESUME 0 285 | LOAD_FAST 'test' 286 | LOAD_CONST 1 287 | COMPARE_OP {enum_repr} 288 | POP_JUMP_FORWARD_IF_FALSE 289 | -> block2 290 | 291 | block2: 292 | LOAD_CONST 1 293 | RETURN_VALUE 294 | 295 | block3: 296 | LOAD_FAST 'test' 297 | LOAD_CONST 2 298 | COMPARE_OP {enum_repr} 299 | POP_JUMP_FORWARD_IF_FALSE 300 | -> block4 301 | 302 | block4: 303 | LOAD_CONST 2 304 | RETURN_VALUE 305 | 306 | block5: 307 | LOAD_CONST 3 308 | RETURN_VALUE 309 | 310 | """ 311 | ) 312 | self.check_dump_bytecode(code, expected.lstrip()) 313 | 314 | # with line numbers 315 | if PY314: 316 | expected = textwrap.dedent( 317 | f""" 318 | block1: 319 | L. 1 0: RESUME 0 320 | L. 2 1: LOAD_FAST_BORROW 'test' 321 | 2: LOAD_SMALL_INT 1 322 | 3: COMPARE_OP {enum_repr} 323 | 4: POP_JUMP_IF_FALSE 324 | -> block2 325 | 326 | block2: 327 | 0: NOT_TAKEN 328 | L. 3 1: LOAD_SMALL_INT 1 329 | 2: RETURN_VALUE 330 | 331 | block3: 332 | L. 4 0: LOAD_FAST_BORROW 'test' 333 | 1: LOAD_SMALL_INT 2 334 | 2: COMPARE_OP {enum_repr} 335 | 3: POP_JUMP_IF_FALSE 336 | -> block4 337 | 338 | block4: 339 | 0: NOT_TAKEN 340 | L. 5 1: LOAD_SMALL_INT 2 341 | 2: RETURN_VALUE 342 | 343 | block5: 344 | L. 6 0: LOAD_SMALL_INT 3 345 | 1: RETURN_VALUE 346 | 347 | """ 348 | ) 349 | elif PY312: 350 | expected = textwrap.dedent( 351 | f""" 352 | block1: 353 | L. 1 0: RESUME 0 354 | L. 2 1: LOAD_FAST 'test' 355 | 2: LOAD_CONST 1 356 | 3: COMPARE_OP {enum_repr} 357 | 4: POP_JUMP_IF_FALSE 358 | -> block2 359 | 360 | block2: 361 | L. 3 0: RETURN_CONST 1 362 | 363 | block3: 364 | L. 4 0: LOAD_FAST 'test' 365 | 1: LOAD_CONST 2 366 | 2: COMPARE_OP {enum_repr} 367 | 3: POP_JUMP_IF_FALSE 368 | -> block4 369 | 370 | block4: 371 | L. 5 0: RETURN_CONST 2 372 | 373 | block5: 374 | L. 6 0: RETURN_CONST 3 375 | 376 | """ 377 | ) 378 | else: 379 | expected = textwrap.dedent( 380 | f""" 381 | block1: 382 | L. 1 0: RESUME 0 383 | L. 2 1: LOAD_FAST 'test' 384 | 2: LOAD_CONST 1 385 | 3: COMPARE_OP {enum_repr} 386 | 4: POP_JUMP_FORWARD_IF_FALSE 387 | -> block2 388 | 389 | block2: 390 | L. 3 0: LOAD_CONST 1 391 | 1: RETURN_VALUE 392 | 393 | block3: 394 | L. 4 0: LOAD_FAST 'test' 395 | 1: LOAD_CONST 2 396 | 2: COMPARE_OP {enum_repr} 397 | 3: POP_JUMP_FORWARD_IF_FALSE 398 | -> block4 399 | 400 | block4: 401 | L. 5 0: LOAD_CONST 2 402 | 1: RETURN_VALUE 403 | 404 | block5: 405 | L. 6 0: LOAD_CONST 3 406 | 1: RETURN_VALUE 407 | 408 | """ 409 | ) 410 | self.check_dump_bytecode(code, expected.lstrip(), lineno=True) 411 | 412 | def test_concrete_bytecode(self): 413 | source = """ 414 | def func(test): 415 | if test == 1: 416 | return 1 417 | elif test == 2: 418 | return 2 419 | return 3 420 | """ 421 | code = disassemble(source, function=True) 422 | code = code.to_concrete_bytecode() 423 | 424 | # without line numbers 425 | if PY314: 426 | # COMPARE_OP use the 4 lowest bits as a cache 427 | expected = """ 428 | 0 RESUME 0 429 | 2 LOAD_FAST_BORROW 0 430 | 4 LOAD_SMALL_INT 1 431 | 6 COMPARE_OP 88 432 | 8 CACHE 0 433 | 10 POP_JUMP_IF_FALSE 3 434 | 12 CACHE 0 435 | 14 NOT_TAKEN 436 | 16 LOAD_SMALL_INT 1 437 | 18 RETURN_VALUE 438 | 20 LOAD_FAST_BORROW 0 439 | 22 LOAD_SMALL_INT 2 440 | 24 COMPARE_OP 88 441 | 26 CACHE 0 442 | 28 POP_JUMP_IF_FALSE 3 443 | 30 CACHE 0 444 | 32 NOT_TAKEN 445 | 34 LOAD_SMALL_INT 2 446 | 36 RETURN_VALUE 447 | 38 LOAD_SMALL_INT 3 448 | 40 RETURN_VALUE 449 | """ 450 | elif PY313: 451 | # COMPARE_OP use the 4 lowest bits as a cache 452 | expected = """ 453 | 0 RESUME 0 454 | 2 LOAD_FAST 0 455 | 4 LOAD_CONST 1 456 | 6 COMPARE_OP 88 457 | 8 CACHE 0 458 | 10 POP_JUMP_IF_FALSE 1 459 | 12 CACHE 0 460 | 14 RETURN_CONST 1 461 | 16 LOAD_FAST 0 462 | 18 LOAD_CONST 2 463 | 20 COMPARE_OP 88 464 | 22 CACHE 0 465 | 24 POP_JUMP_IF_FALSE 1 466 | 26 CACHE 0 467 | 28 RETURN_CONST 2 468 | 30 RETURN_CONST 3 469 | """ 470 | 471 | elif PY312: 472 | # COMPARE_OP use the 4 lowest bits as a cache 473 | expected = """ 474 | 0 RESUME 0 475 | 2 LOAD_FAST 0 476 | 4 LOAD_CONST 1 477 | 6 COMPARE_OP 40 478 | 8 CACHE 0 479 | 10 POP_JUMP_IF_FALSE 1 480 | 12 RETURN_CONST 1 481 | 14 LOAD_FAST 0 482 | 16 LOAD_CONST 2 483 | 18 COMPARE_OP 40 484 | 20 CACHE 0 485 | 22 POP_JUMP_IF_FALSE 1 486 | 24 RETURN_CONST 2 487 | 26 RETURN_CONST 3 488 | """ 489 | else: 490 | expected = """ 491 | 0 RESUME 0 492 | 2 LOAD_FAST 0 493 | 4 LOAD_CONST 1 494 | 6 COMPARE_OP 2 495 | 8 CACHE 0 496 | 10 CACHE 0 497 | 12 POP_JUMP_FORWARD_IF_FALSE 2 498 | 14 LOAD_CONST 1 499 | 16 RETURN_VALUE 500 | 18 LOAD_FAST 0 501 | 20 LOAD_CONST 2 502 | 22 COMPARE_OP 2 503 | 24 CACHE 0 504 | 26 CACHE 0 505 | 28 POP_JUMP_FORWARD_IF_FALSE 2 506 | 30 LOAD_CONST 2 507 | 32 RETURN_VALUE 508 | 34 LOAD_CONST 3 509 | 36 RETURN_VALUE 510 | """ 511 | self.check_dump_bytecode(code, expected.lstrip("\n")) 512 | 513 | # with line numbers 514 | if PY314: 515 | expected = """ 516 | L. 1 0: RESUME 0 517 | L. 2 2: LOAD_FAST_BORROW 0 518 | 4: LOAD_SMALL_INT 1 519 | 6: COMPARE_OP 88 520 | 8: CACHE 0 521 | 10: POP_JUMP_IF_FALSE 3 522 | 12: CACHE 0 523 | 14: NOT_TAKEN 524 | L. 3 16: LOAD_SMALL_INT 1 525 | 18: RETURN_VALUE 526 | L. 4 20: LOAD_FAST_BORROW 0 527 | 22: LOAD_SMALL_INT 2 528 | 24: COMPARE_OP 88 529 | 26: CACHE 0 530 | 28: POP_JUMP_IF_FALSE 3 531 | 30: CACHE 0 532 | 32: NOT_TAKEN 533 | L. 5 34: LOAD_SMALL_INT 2 534 | 36: RETURN_VALUE 535 | L. 6 38: LOAD_SMALL_INT 3 536 | 40: RETURN_VALUE 537 | """ 538 | elif PY313: 539 | expected = """ 540 | L. 1 0: RESUME 0 541 | L. 2 2: LOAD_FAST 0 542 | 4: LOAD_CONST 1 543 | 6: COMPARE_OP 88 544 | 8: CACHE 0 545 | 10: POP_JUMP_IF_FALSE 1 546 | 12: CACHE 0 547 | L. 3 14: RETURN_CONST 1 548 | L. 4 16: LOAD_FAST 0 549 | 18: LOAD_CONST 2 550 | 20: COMPARE_OP 88 551 | 22: CACHE 0 552 | 24: POP_JUMP_IF_FALSE 1 553 | 26: CACHE 0 554 | L. 5 28: RETURN_CONST 2 555 | L. 6 30: RETURN_CONST 3 556 | """ 557 | elif PY312: 558 | expected = """ 559 | L. 1 0: RESUME 0 560 | L. 2 2: LOAD_FAST 0 561 | 4: LOAD_CONST 1 562 | 6: COMPARE_OP 40 563 | 8: CACHE 0 564 | 10: POP_JUMP_IF_FALSE 1 565 | L. 3 12: RETURN_CONST 1 566 | L. 4 14: LOAD_FAST 0 567 | 16: LOAD_CONST 2 568 | 18: COMPARE_OP 40 569 | 20: CACHE 0 570 | 22: POP_JUMP_IF_FALSE 1 571 | L. 5 24: RETURN_CONST 2 572 | L. 6 26: RETURN_CONST 3 573 | """ 574 | else: 575 | expected = """ 576 | L. 1 0: RESUME 0 577 | L. 2 2: LOAD_FAST 0 578 | 4: LOAD_CONST 1 579 | 6: COMPARE_OP 2 580 | 8: CACHE 0 581 | 10: CACHE 0 582 | 12: POP_JUMP_FORWARD_IF_FALSE 2 583 | L. 3 14: LOAD_CONST 1 584 | 16: RETURN_VALUE 585 | L. 4 18: LOAD_FAST 0 586 | 20: LOAD_CONST 2 587 | 22: COMPARE_OP 2 588 | 24: CACHE 0 589 | 26: CACHE 0 590 | 28: POP_JUMP_FORWARD_IF_FALSE 2 591 | L. 5 30: LOAD_CONST 2 592 | 32: RETURN_VALUE 593 | L. 6 34: LOAD_CONST 3 594 | 36: RETURN_VALUE 595 | """ 596 | self.check_dump_bytecode(code, expected.lstrip("\n"), lineno=True) 597 | 598 | def test_type_validation(self): 599 | class T: 600 | first_lineno = 1 601 | 602 | with self.assertRaises(TypeError): 603 | bytecode.dump_bytecode(T()) 604 | 605 | 606 | if __name__ == "__main__": 607 | unittest.main() # pragma: no cover 608 | -------------------------------------------------------------------------------- /tests/test_instr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import opcode 3 | import sys 4 | import unittest 5 | 6 | from bytecode import ( 7 | UNSET, 8 | BasicBlock, 9 | BinaryOp, 10 | CellVar, 11 | Compare, 12 | FreeVar, 13 | Instr, 14 | Label, 15 | SetLineno, 16 | ) 17 | from bytecode.instr import ( 18 | BINARY_OPS, 19 | BITFLAG2_OPCODES, 20 | BITFLAG_OPCODES, 21 | COMMON_CONSTANT_OPS, 22 | DUAL_ARG_OPCODES, 23 | FORMAT_VALUE_OPS, 24 | INTRINSIC_1OP, 25 | INTRINSIC_2OP, 26 | SMALL_INT_OPS, 27 | SPECIAL_OPS, 28 | CommonConstant, 29 | FormatValue, 30 | InstrLocation, 31 | Intrinsic1Op, 32 | Intrinsic2Op, 33 | SpecialMethod, 34 | opcode_has_argument, 35 | ) 36 | from bytecode.utils import PY312, PY313, PY314 37 | 38 | from . import TestCase 39 | 40 | # FIXME tests for location and lineno setter 41 | 42 | UNCONDITIONAL_JUMP = "JUMP_FORWARD" 43 | CONDITIONAL_JUMP = "POP_JUMP_IF_TRUE" if PY312 else "POP_JUMP_FORWARD_IF_TRUE" 44 | CALL = "CALL" 45 | 46 | 47 | class SetLinenoTests(TestCase): 48 | def test_lineno(self): 49 | lineno = SetLineno(1) 50 | self.assertEqual(lineno.lineno, 1) 51 | 52 | def test_equality(self): 53 | lineno = SetLineno(1) 54 | self.assertNotEqual(lineno, 1) 55 | self.assertEqual(lineno, SetLineno(1)) 56 | self.assertNotEqual(lineno, SetLineno(2)) 57 | 58 | 59 | class VariableTests(TestCase): 60 | def test_str(self): 61 | for cls in (CellVar, FreeVar): 62 | var = cls("a") 63 | self.assertEqual(str(var), "a") 64 | 65 | def test_repr(self): 66 | for cls in (CellVar, FreeVar): 67 | var = cls("_a_x_a_") 68 | r = repr(var) 69 | self.assertIn("_a_x_a_", r) 70 | self.assertIn(cls.__name__, r) 71 | 72 | def test_eq(self): 73 | f1 = FreeVar("a") 74 | f2 = FreeVar("b") 75 | c1 = CellVar("a") 76 | c2 = CellVar("b") 77 | 78 | for v1, v2, eq in ( 79 | (f1, f1, True), 80 | (f1, f2, False), 81 | (f1, c1, False), 82 | (c1, c1, True), 83 | (c1, c2, False), 84 | ): 85 | if eq: 86 | self.assertEqual(v1, v2) 87 | else: 88 | self.assertNotEqual(v1, v2) 89 | 90 | 91 | class InstrLocationTests(TestCase): 92 | def test_init(self): 93 | for args, error in [ 94 | ((None, None, None, None), ""), 95 | ((None, 1, None, None), "End lineno specified with no lineno"), 96 | ((12, 1, None, None), "cannot be smaller than lineno"), 97 | ((12, 13, None, None), ""), 98 | ((None, None, 1, None), "lineno information are incomplete"), 99 | ((None, None, None, 1), "lineno information are incomplete"), 100 | ((1, None, 1, None), "lineno information are incomplete"), 101 | ((1, None, None, 1), "lineno information are incomplete"), 102 | ((1, 2, None, 1), "with no column offset"), 103 | ((1, 2, 12, 1), ""), 104 | ((1, 1, 12, 1), "cannot be smaller than column offset"), 105 | ((1, 1, 12, None), "No end column offset was"), 106 | ]: 107 | print(f"{args}, {error}") 108 | with self.subTest(f"{args}, {error}"): 109 | if error: 110 | with self.assertRaises(ValueError) as e: 111 | InstrLocation(*args) 112 | self.assertIn(error, str(e.exception)) 113 | else: 114 | InstrLocation(*args) 115 | 116 | 117 | class InstrTests(TestCase): 118 | def test_constructor(self): 119 | # invalid line number 120 | with self.assertRaises(TypeError): 121 | Instr("NOP", lineno="x") 122 | with self.assertRaises(ValueError): 123 | Instr("NOP", lineno=-1) 124 | 125 | # invalid name 126 | with self.assertRaises(TypeError): 127 | Instr(1) 128 | with self.assertRaises(ValueError): 129 | Instr("xxx") 130 | 131 | def test_repr(self): 132 | # No arg 133 | r = repr(Instr("NOP", lineno=10)) 134 | self.assertIn("NOP", r) 135 | self.assertIn("10", r) 136 | self.assertIn("lineno", r) 137 | 138 | # Arg 139 | r = repr(Instr("LOAD_FAST", "_x_", lineno=10)) 140 | self.assertIn("LOAD_FAST", r) 141 | self.assertIn("lineno", r) 142 | self.assertIn("10", r) 143 | self.assertIn("arg", r) 144 | self.assertIn("_x_", r) 145 | 146 | def test_reject_pseudo_opcode(self): 147 | if PY314: 148 | with self.assertRaises(ValueError) as e: 149 | Instr("INSTRUMENTED_END_FOR", "x") 150 | self.assertIn("is an instrumented or pseudo opcode", str(e.exception)) 151 | elif PY312: 152 | with self.assertRaises(ValueError) as e: 153 | Instr("LOAD_METHOD", "x") 154 | self.assertIn("is an instrumented or pseudo opcode", str(e.exception)) 155 | 156 | def test_invalid_arg(self): 157 | label = Label() 158 | block = BasicBlock() 159 | 160 | # EXTENDED_ARG 161 | self.assertRaises(ValueError, Instr, "EXTENDED_ARG", 0) 162 | 163 | # has_jump() 164 | self.assertRaises(TypeError, Instr, UNCONDITIONAL_JUMP, 1) 165 | self.assertRaises(TypeError, Instr, UNCONDITIONAL_JUMP, 1.0) 166 | Instr(UNCONDITIONAL_JUMP, label) 167 | Instr(UNCONDITIONAL_JUMP, block) 168 | 169 | # hasfree 170 | self.assertRaises(TypeError, Instr, "STORE_DEREF", "x") 171 | Instr("STORE_DEREF", CellVar("x")) 172 | Instr("STORE_DEREF", FreeVar("x")) 173 | 174 | # haslocal 175 | self.assertRaises(TypeError, Instr, "LOAD_FAST", 1) 176 | Instr("LOAD_FAST", "x") 177 | 178 | # hasname 179 | self.assertRaises(TypeError, Instr, "LOAD_NAME", 1) 180 | Instr("LOAD_NAME", "x") 181 | 182 | # hasconst 183 | self.assertRaises(ValueError, Instr, "LOAD_CONST") # UNSET 184 | self.assertRaises(ValueError, Instr, "LOAD_CONST", label) 185 | self.assertRaises(ValueError, Instr, "LOAD_CONST", block) 186 | Instr("LOAD_CONST", 1.0) 187 | Instr("LOAD_CONST", object()) 188 | 189 | # hascompare 190 | self.assertRaises(TypeError, Instr, "COMPARE_OP", 1) 191 | Instr("COMPARE_OP", Compare.EQ) 192 | 193 | # HAVE_ARGUMENT 194 | self.assertRaises(ValueError, Instr, CALL, -1) 195 | self.assertRaises(TypeError, Instr, CALL, 3.0) 196 | Instr(CALL, 3) 197 | 198 | # test maximum argument 199 | self.assertRaises(ValueError, Instr, CALL, 2147483647 + 1) 200 | instr = Instr(CALL, 2147483647) 201 | self.assertEqual(instr.arg, 2147483647) 202 | 203 | # not HAVE_ARGUMENT 204 | self.assertRaises(ValueError, Instr, "NOP", 0) 205 | Instr("NOP") 206 | 207 | # Instructions using a bitflag in their oparg 208 | for name in (opcode.opname[op] for op in BITFLAG_OPCODES): 209 | self.assertRaises(TypeError, Instr, name, "arg") 210 | self.assertRaises(TypeError, Instr, name, ("arg",)) 211 | self.assertRaises(TypeError, Instr, name, ("", "arg")) 212 | self.assertRaises(TypeError, Instr, name, (False, 1)) 213 | if opcode.opmap[name] in FORMAT_VALUE_OPS: 214 | Instr(name, (True, FormatValue.ASCII)) 215 | else: 216 | Instr(name, (True, "arg")) 217 | 218 | # Instructions using 2 bitflag in their oparg 219 | for name in (opcode.opname[op] for op in BITFLAG2_OPCODES): 220 | self.assertRaises(TypeError, Instr, name, "arg") 221 | self.assertRaises(TypeError, Instr, name, ("arg",)) 222 | self.assertRaises(TypeError, Instr, name, ("", True, "arg")) 223 | self.assertRaises(TypeError, Instr, name, (True, "", "arg")) 224 | self.assertRaises(TypeError, Instr, name, (False, True, 1)) 225 | Instr(name, (False, True, "arg")) 226 | 227 | # Instructions packing 2 args in their oparg 228 | for name in (opcode.opname[op] for op in DUAL_ARG_OPCODES): 229 | self.assertRaises(TypeError, Instr, name, "arg") 230 | self.assertRaises(TypeError, Instr, name, ("arg",)) 231 | self.assertRaises(TypeError, Instr, name, ("", True)) 232 | Instr(name, ("arg1", "arg2")) 233 | 234 | for name in [opcode.opname[i] for i in INTRINSIC_1OP]: 235 | self.assertRaises(TypeError, Instr, name, 1) 236 | Instr(name, Intrinsic1Op.INTRINSIC_PRINT) 237 | 238 | for name in [opcode.opname[i] for i in INTRINSIC_2OP]: 239 | self.assertRaises(TypeError, Instr, name, 1) 240 | Instr(name, Intrinsic2Op.INTRINSIC_PREP_RERAISE_STAR) 241 | 242 | for name in (opcode.opname[op] for op in BINARY_OPS): 243 | Instr(name, BinaryOp.ADD) 244 | Instr(name, BinaryOp.ADD.value) 245 | self.assertRaises(TypeError, Instr, name, "") 246 | 247 | for name in (opcode.opname[op] for op in SPECIAL_OPS): 248 | Instr(name, SpecialMethod.EXIT) 249 | self.assertRaises(TypeError, Instr, name, SpecialMethod.EXIT.value) 250 | 251 | for name in (opcode.opname[op] for op in COMMON_CONSTANT_OPS): 252 | Instr(name, CommonConstant.BUILTIN_ALL) 253 | self.assertRaises( 254 | TypeError, 255 | Instr, 256 | name, 257 | CommonConstant.BUILTIN_ALL.value, 258 | ) 259 | 260 | for name in (opcode.opname[op] for op in SMALL_INT_OPS): 261 | Instr(name, 1) 262 | self.assertRaises(ValueError, Instr, name, 256) 263 | 264 | for op, name in ((op, opcode.opname[op]) for op in FORMAT_VALUE_OPS): 265 | if op in BITFLAG_OPCODES: 266 | Instr(name, (True, FormatValue.STR)) 267 | Instr(name, (False, FormatValue.STR)) 268 | self.assertRaises(TypeError, Instr, name, True, FormatValue.STR) 269 | self.assertRaises(TypeError, Instr, name, False, FormatValue.STR) 270 | else: 271 | Instr(name, FormatValue.STR) 272 | Instr(name, FormatValue.STR.value) 273 | self.assertRaises(TypeError, Instr, name, "STR") 274 | 275 | def test_require_arg(self): 276 | i = Instr(CALL, 3) 277 | self.assertTrue(i.require_arg()) 278 | i = Instr("NOP") 279 | self.assertFalse(i.require_arg()) 280 | 281 | def test_attr(self): 282 | instr = Instr("LOAD_CONST", 3, lineno=5) 283 | self.assertEqual(instr.name, "LOAD_CONST") 284 | self.assertEqual(instr.opcode, opcode.opmap["LOAD_CONST"]) 285 | self.assertEqual(instr.arg, 3) 286 | self.assertEqual(instr.lineno, 5) 287 | 288 | # invalid values/types 289 | self.assertRaises(ValueError, setattr, instr, "lineno", -1) 290 | self.assertRaises(TypeError, setattr, instr, "lineno", 1.0) 291 | self.assertRaises(TypeError, setattr, instr, "name", 5) 292 | self.assertRaises(TypeError, setattr, instr, "opcode", 1.0) 293 | self.assertRaises(ValueError, setattr, instr, "opcode", -1) 294 | self.assertRaises(ValueError, setattr, instr, "opcode", 255) 295 | 296 | # arg can take any attribute but cannot be deleted 297 | instr.arg = -8 298 | instr.arg = object() 299 | self.assertRaises(AttributeError, delattr, instr, "arg") 300 | 301 | # no argument 302 | instr = Instr("RETURN_VALUE") 303 | self.assertIs(instr.arg, UNSET) 304 | 305 | def test_modify_op(self): 306 | instr = Instr("LOAD_NAME", "x") 307 | load_fast = opcode.opmap["LOAD_FAST"] 308 | instr.opcode = load_fast 309 | self.assertEqual(instr.name, "LOAD_FAST") 310 | self.assertEqual(instr.opcode, load_fast) 311 | 312 | def test_extended_arg(self): 313 | instr = Instr("LOAD_CONST", 0x1234ABCD) 314 | self.assertEqual(instr.arg, 0x1234ABCD) 315 | 316 | def test_slots(self): 317 | instr = Instr("NOP") 318 | with self.assertRaises(AttributeError): 319 | instr.myattr = 1 320 | 321 | def test_compare(self): 322 | instr = Instr("LOAD_CONST", 3, lineno=7) 323 | self.assertEqual(instr, Instr("LOAD_CONST", 3, lineno=7)) 324 | self.assertNotEqual(instr, 1) 325 | 326 | # different lineno 327 | self.assertNotEqual(instr, Instr("LOAD_CONST", 3)) 328 | self.assertNotEqual(instr, Instr("LOAD_CONST", 3, lineno=6)) 329 | # different op 330 | self.assertNotEqual(instr, Instr("LOAD_FAST", "x", lineno=7)) 331 | # different arg 332 | self.assertNotEqual(instr, Instr("LOAD_CONST", 4, lineno=7)) 333 | 334 | def test_has_jump(self): 335 | label = Label() 336 | jump = Instr(UNCONDITIONAL_JUMP, label) 337 | self.assertTrue(jump.has_jump()) 338 | 339 | instr = Instr("LOAD_FAST", "x") 340 | self.assertFalse(instr.has_jump()) 341 | 342 | def test_is_cond_jump(self): 343 | label = Label() 344 | jump = Instr(CONDITIONAL_JUMP, label) 345 | self.assertTrue(jump.is_cond_jump()) 346 | 347 | instr = Instr("LOAD_FAST", "x") 348 | self.assertFalse(instr.is_cond_jump()) 349 | 350 | def test_is_uncond_jump(self): 351 | label = Label() 352 | jump = Instr(UNCONDITIONAL_JUMP, label) 353 | self.assertTrue(jump.is_uncond_jump()) 354 | 355 | instr = Instr(CONDITIONAL_JUMP, label) 356 | self.assertFalse(instr.is_uncond_jump()) 357 | 358 | def test_const_key_not_equal(self): 359 | def check(value): 360 | self.assertEqual(Instr("LOAD_CONST", value), Instr("LOAD_CONST", value)) 361 | 362 | def func(): 363 | pass 364 | 365 | check(None) 366 | check(0) 367 | check(0.0) 368 | check(b"bytes") 369 | check("text") 370 | check(Ellipsis) 371 | check((1, 2, 3)) 372 | check(frozenset({1, 2, 3})) 373 | check(func.__code__) 374 | check(object()) 375 | 376 | def test_const_key_equal(self): 377 | neg_zero = -0.0 378 | pos_zero = +0.0 379 | 380 | # int and float: 0 == 0.0 381 | self.assertNotEqual(Instr("LOAD_CONST", 0), Instr("LOAD_CONST", 0.0)) 382 | 383 | # float: -0.0 == +0.0 384 | self.assertNotEqual( 385 | Instr("LOAD_CONST", neg_zero), Instr("LOAD_CONST", pos_zero) 386 | ) 387 | 388 | # complex 389 | self.assertNotEqual( 390 | Instr("LOAD_CONST", complex(neg_zero, 1.0)), 391 | Instr("LOAD_CONST", complex(pos_zero, 1.0)), 392 | ) 393 | self.assertNotEqual( 394 | Instr("LOAD_CONST", complex(1.0, neg_zero)), 395 | Instr("LOAD_CONST", complex(1.0, pos_zero)), 396 | ) 397 | 398 | # tuple 399 | self.assertNotEqual(Instr("LOAD_CONST", (0,)), Instr("LOAD_CONST", (0.0,))) 400 | nested_tuple1 = (0,) 401 | nested_tuple1 = (nested_tuple1,) 402 | nested_tuple2 = (0.0,) 403 | nested_tuple2 = (nested_tuple2,) 404 | self.assertNotEqual( 405 | Instr("LOAD_CONST", nested_tuple1), Instr("LOAD_CONST", nested_tuple2) 406 | ) 407 | 408 | # frozenset 409 | self.assertNotEqual( 410 | Instr("LOAD_CONST", frozenset({0})), Instr("LOAD_CONST", frozenset({0.0})) 411 | ) 412 | 413 | def test_stack_effects(self): 414 | # Verify all opcodes are handled and that "jump=None" really returns 415 | # the max of the other cases. 416 | from bytecode.concrete import ConcreteInstr 417 | 418 | def check_pre_post(instr, jump): 419 | effect = instr.stack_effect(jump) 420 | pre, post = instr.pre_and_post_stack_effect(jump) 421 | self.assertEqual(pre + post, effect) 422 | return effect 423 | 424 | def check(instr): 425 | jump = check_pre_post(instr, jump=True) 426 | no_jump = check_pre_post(instr, jump=False) 427 | max_effect = check_pre_post(instr, jump=None) 428 | self.assertEqual(instr.stack_effect(), max_effect) 429 | self.assertEqual(max_effect, max(jump, no_jump)) 430 | 431 | if not instr.has_jump(): 432 | self.assertEqual(jump, no_jump) 433 | 434 | for name, op in opcode.opmap.items(): 435 | if PY312 and op >= opcode.MIN_INSTRUMENTED_OPCODE: 436 | continue 437 | print(name) 438 | with self.subTest(name): 439 | # Use ConcreteInstr instead of Instr because it doesn't care 440 | # what kind of argument it is constructed with. 441 | # The 0 handles the CACHE case 442 | if not opcode_has_argument(op) and op != 0: 443 | check(ConcreteInstr(name)) 444 | else: 445 | for arg in range(256): 446 | check(ConcreteInstr(name, arg)) 447 | 448 | # LOAD_CONST uses a concrete python object as its oparg, however, in 449 | # dis.stack_effect(opcode.opmap['LOAD_CONST'], oparg), 450 | # oparg should be the index of that python object in the constants. 451 | # 452 | # Fortunately, for an instruction whose oparg isn't equivalent to its 453 | # form in binary files(pyc format), the stack effect is a 454 | # constant which does not depend on its oparg. 455 | # 456 | # The second argument of dis.stack_effect cannot be 457 | # more than 2**31 - 1. If stack effect of an instruction is 458 | # independent of its oparg, we pass 0 as the second argument 459 | # of dis.stack_effect. 460 | # (As a result we can calculate stack_effect for 461 | # any LOAD_CONST instructions, even for large integers) 462 | 463 | for arg in 2**31, 2**32, 2**63, 2**64, -1: 464 | self.assertEqual(Instr("LOAD_CONST", arg).stack_effect(), 1) 465 | 466 | def test_code_object_containing_mutable_data(self): 467 | from types import CodeType 468 | 469 | from bytecode import Bytecode, Instr 470 | 471 | def f(): 472 | def g(): 473 | # Under Python 3.12+ we need a temporary var to be sure we use 474 | # LOAD_CONST rather than RETURN_CONST 475 | a = "value" 476 | return a 477 | 478 | return g 479 | 480 | f_code = Bytecode.from_code(f.__code__) 481 | instr_load_code = None 482 | mutable_datum = [4, 2] 483 | 484 | for each in f_code: 485 | if ( 486 | isinstance(each, Instr) 487 | and each.name == "LOAD_CONST" 488 | and isinstance(each.arg, CodeType) 489 | ): 490 | instr_load_code = each 491 | break 492 | 493 | self.assertIsNotNone(instr_load_code) 494 | 495 | g_code = Bytecode.from_code(instr_load_code.arg) 496 | # Under Python 3.11+, the first instruction is not LOAD_CONST but RESUME 497 | for instr in g_code: 498 | if isinstance(each, Instr) and instr.name == "LOAD_CONST": 499 | instr.arg = mutable_datum 500 | instr_load_code.arg = g_code.to_code() 501 | f.__code__ = f_code.to_code() 502 | 503 | self.assertIs(f()(), mutable_datum) 504 | 505 | 506 | class CompareTests(TestCase): 507 | def test_compare_ops(self): 508 | from bytecode import Bytecode, Instr 509 | 510 | def f(): 511 | pass 512 | 513 | params = zip( 514 | iter(Compare), (True, True, False, True, False, False), strict=False 515 | ) 516 | for cmp, expected in params: 517 | for cast in (False, True) if PY313 else (False,): 518 | with self.subTest(cmp): 519 | operation = Compare(cmp + (16 if cast else 0)) 520 | print(f"Subtest: {operation.name}") 521 | bcode = Bytecode( 522 | [ 523 | Instr("RESUME", 0), 524 | Instr("LOAD_CONST", 24), 525 | Instr("LOAD_CONST", 42), 526 | Instr("COMPARE_OP", operation), 527 | Instr("RETURN_VALUE"), 528 | ] 529 | ) 530 | bcode.update_flags() 531 | f.__code__ = bcode.to_code() 532 | self.assertIs(f(), expected) 533 | 534 | 535 | if __name__ == "__main__": 536 | unittest.main() # pragma: no cover 537 | --------------------------------------------------------------------------------