├── asttokens ├── py.typed ├── astroid_compat.py ├── __init__.py ├── line_numbers.py ├── util.py ├── asttokens.py └── mark_tokens.py ├── tests ├── __init__.py ├── testdata │ ├── python3 │ │ └── astroid │ │ │ ├── absimport.py │ │ │ ├── recursion.py │ │ │ ├── email.py │ │ │ ├── __init__.py │ │ │ ├── notall.py │ │ │ ├── all.py │ │ │ ├── descriptor_crash.py │ │ │ ├── suppliermodule_test.py │ │ │ ├── format.py │ │ │ ├── noendingnewline.py │ │ │ ├── clientmodule_test.py │ │ │ ├── nonregr.py │ │ │ ├── module.py │ │ │ └── module2.py │ ├── python2 │ │ └── astroid │ │ │ ├── recursion.py │ │ │ ├── email.py │ │ │ ├── __init__.py │ │ │ ├── absimport.py │ │ │ ├── notall.py │ │ │ ├── all.py │ │ │ ├── descriptor_crash.py │ │ │ ├── suppliermodule_test.py │ │ │ ├── format.py │ │ │ ├── noendingnewline.py │ │ │ ├── clientmodule_test.py │ │ │ ├── nonregr.py │ │ │ ├── module.py │ │ │ └── module2.py │ └── README.md ├── context.py ├── test_astroid.py ├── test_line_numbers.py ├── test_tokenless.py ├── test_util.py ├── test_asttokens.py ├── tools.py └── test_mark_tokens.py ├── .coveragerc ├── MANIFEST.in ├── docs ├── user-guide.rst ├── requirements.in ├── api-index.rst ├── Makefile ├── index.rst ├── conf.py └── requirements.txt ├── .editorconfig ├── .gitignore ├── setup.py ├── tox.ini ├── .readthedocs.yaml ├── pyproject.toml ├── Makefile ├── setup.cfg ├── .github └── workflows │ └── build-and-test.yml ├── README.rst ├── LICENSE └── .pylintrc /asttokens/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | include = asttokens/* 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE 2 | recursive-include tests *.py 3 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/absimport.py: -------------------------------------------------------------------------------- 1 | 2 | import email 3 | from email import message 4 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/recursion.py: -------------------------------------------------------------------------------- 1 | """ For issue #25 """ 2 | class Base(object): 3 | pass -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/recursion.py: -------------------------------------------------------------------------------- 1 | """ For issue #25 """ 2 | class Base(object): 3 | pass -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/email.py: -------------------------------------------------------------------------------- 1 | """fake email module to test absolute import doesn't grab this one""" 2 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/email.py: -------------------------------------------------------------------------------- 1 | """fake email module to test absolute import doesn't grab this one""" 2 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/__init__.py: -------------------------------------------------------------------------------- 1 | __revision__="$Id: __init__.py,v 1.1 2005-06-13 20:55:20 syt Exp $" 2 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/__init__.py: -------------------------------------------------------------------------------- 1 | __revision__="$Id: __init__.py,v 1.1 2005-06-13 20:55:20 syt Exp $" 2 | -------------------------------------------------------------------------------- /docs/user-guide.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | .. include:: ../README.rst 5 | :start-after: Start of user-guide 6 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/absimport.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import email 3 | from email import message 4 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_size = 2 3 | indent_style = space 4 | 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/notall.py: -------------------------------------------------------------------------------- 1 | name = 'a' 2 | _bla = 2 3 | other = 'o' 4 | class Aaa: pass 5 | 6 | def func(): print('yo') 7 | 8 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/notall.py: -------------------------------------------------------------------------------- 1 | 2 | name = 'a' 3 | _bla = 2 4 | other = 'o' 5 | class Aaa: pass 6 | 7 | def func(): print('yo') 8 | 9 | -------------------------------------------------------------------------------- /tests/testdata/README.md: -------------------------------------------------------------------------------- 1 | Everything in astroid subdirectories comes from the astroid library's test suite, which cover a 2 | variety of Python syntax constructs. 3 | -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from os.path import abspath, dirname 3 | sys.path.insert(0, dirname(dirname(abspath(__file__)))) 4 | 5 | import asttokens 6 | 7 | __all__ = ["asttokens"] 8 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/all.py: -------------------------------------------------------------------------------- 1 | 2 | name = 'a' 3 | _bla = 2 4 | other = 'o' 5 | class Aaa: pass 6 | 7 | def func(): print 'yo' 8 | 9 | __all__ = 'Aaa', '_bla', 'name' 10 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/all.py: -------------------------------------------------------------------------------- 1 | 2 | name = 'a' 3 | _bla = 2 4 | other = 'o' 5 | class Aaa: pass 6 | 7 | def func(): print('yo') 8 | 9 | __all__ = 'Aaa', '_bla', 'name' 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /asttokens/version.py 2 | 3 | .coverage 4 | *.py[co] 5 | 6 | # Building and packaging 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | docs/_build/ 11 | .eggs 12 | .tox 13 | .vscode -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | 3 | See: 4 | https://packaging.python.org/en/latest/distributing.html 5 | https://github.com/pypa/sampleproject 6 | """ 7 | 8 | from setuptools import setup 9 | 10 | if __name__ == "__main__": 11 | setup() 12 | -------------------------------------------------------------------------------- /docs/requirements.in: -------------------------------------------------------------------------------- 1 | # After updating, or to pick up newer versions, run: 2 | # env/bin/pip-compile -o docs/requirements.txt docs/requirements.in 3 | # To bring venv up-to-date, run: 4 | # env/bin/pip-sync docs/requirements.txt 5 | sphinx 6 | sphinx_rtd_theme 7 | readthedocs-sphinx-search 8 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/descriptor_crash.py: -------------------------------------------------------------------------------- 1 | 2 | import urllib 3 | 4 | class Page(object): 5 | _urlOpen = staticmethod(urllib.urlopen) 6 | 7 | def getPage(self, url): 8 | handle = self._urlOpen(url) 9 | data = handle.read() 10 | handle.close() 11 | return data 12 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/descriptor_crash.py: -------------------------------------------------------------------------------- 1 | 2 | import urllib 3 | 4 | class Page(object): 5 | _urlOpen = staticmethod(urllib.urlopen) 6 | 7 | def getPage(self, url): 8 | handle = self._urlOpen(url) 9 | data = handle.read() 10 | handle.close() 11 | return data 12 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/suppliermodule_test.py: -------------------------------------------------------------------------------- 1 | """ file suppliermodule.py """ 2 | 3 | class NotImplemented(Exception): 4 | pass 5 | 6 | class Interface: 7 | def get_value(self): 8 | raise NotImplemented() 9 | 10 | def set_value(self, value): 11 | raise NotImplemented() 12 | 13 | class DoNothing : pass 14 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/suppliermodule_test.py: -------------------------------------------------------------------------------- 1 | """ file suppliermodule.py """ 2 | 3 | class NotImplemented(Exception): 4 | pass 5 | 6 | class Interface: 7 | def get_value(self): 8 | raise NotImplemented() 9 | 10 | def set_value(self, value): 11 | raise NotImplemented() 12 | 13 | class DoNothing : pass 14 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (https://tox.wiki/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py{38,39,310,311,312,313,py3} 8 | 9 | [testenv] 10 | commands = pytest {posargs} 11 | deps = 12 | .[test] 13 | -------------------------------------------------------------------------------- /docs/api-index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ===================================== 3 | 4 | .. automodule:: asttokens 5 | 6 | ASTTokens 7 | --------- 8 | .. autoclass:: asttokens.ASTTokens 9 | :members: 10 | 11 | ASTText 12 | --------- 13 | .. autoclass:: asttokens.ASTText 14 | :members: 15 | 16 | LineNumbers 17 | ----------- 18 | .. autoclass:: asttokens.LineNumbers 19 | :members: 20 | 21 | util 22 | ---- 23 | .. automodule:: asttokens.util 24 | :members: Token, token_repr, visit_tree, walk, replace 25 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/format.py: -------------------------------------------------------------------------------- 1 | """A multiline string 2 | """ 3 | 4 | function('aeozrijz\ 5 | earzer', hop) 6 | # XXX write test 7 | x = [i for i in range(5) 8 | if i % 4] 9 | 10 | fonction(1, 11 | 2, 12 | 3, 13 | 4) 14 | 15 | def definition(a, 16 | b, 17 | c): 18 | return a + b + c 19 | 20 | class debile(dict, 21 | object): 22 | pass 23 | 24 | if aaaa: pass 25 | else: 26 | aaaa,bbbb = 1,2 27 | aaaa,bbbb = bbbb,aaaa 28 | # XXX write test 29 | hop = \ 30 | aaaa 31 | 32 | 33 | __revision__.lower(); 34 | 35 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/format.py: -------------------------------------------------------------------------------- 1 | """A multiline string 2 | """ 3 | 4 | function('aeozrijz\ 5 | earzer', hop) 6 | # XXX write test 7 | x = [i for i in range(5) 8 | if i % 4] 9 | 10 | fonction(1, 11 | 2, 12 | 3, 13 | 4) 14 | 15 | def definition(a, 16 | b, 17 | c): 18 | return a + b + c 19 | 20 | class debile(dict, 21 | object): 22 | pass 23 | 24 | if aaaa: pass 25 | else: 26 | aaaa,bbbb = 1,2 27 | aaaa,bbbb = bbbb,aaaa 28 | # XXX write test 29 | hop = \ 30 | aaaa 31 | 32 | 33 | __revision__.lower(); 34 | 35 | -------------------------------------------------------------------------------- /asttokens/astroid_compat.py: -------------------------------------------------------------------------------- 1 | try: 2 | from astroid import nodes as astroid_node_classes 3 | 4 | # astroid_node_classes should be whichever module has the NodeNG class 5 | from astroid.nodes import NodeNG 6 | from astroid.nodes import BaseContainer 7 | except Exception: 8 | try: 9 | from astroid import node_classes as astroid_node_classes 10 | from astroid.node_classes import NodeNG 11 | from astroid.node_classes import _BaseContainer as BaseContainer 12 | except Exception: # pragma: no cover 13 | astroid_node_classes = None 14 | NodeNG = None 15 | BaseContainer = None 16 | 17 | 18 | __all__ = ["astroid_node_classes", "NodeNG", "BaseContainer"] 19 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | # Build documentation in the docs/ directory with Sphinx 14 | sphinx: 15 | configuration: docs/conf.py 16 | 17 | # We recommend specifying your dependencies to enable reproducible builds: 18 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 19 | python: 20 | install: 21 | - requirements: docs/requirements.txt 22 | - method: pip 23 | path: . 24 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = asttokens 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/noendingnewline.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestCase(unittest.TestCase): 5 | 6 | def setUp(self): 7 | unittest.TestCase.setUp(self) 8 | 9 | 10 | def tearDown(self): 11 | unittest.TestCase.tearDown(self) 12 | 13 | def testIt(self): 14 | self.a = 10 15 | self.xxx() 16 | 17 | 18 | def xxx(self): 19 | if False: 20 | pass 21 | print 'a' 22 | 23 | if False: 24 | pass 25 | pass 26 | 27 | if False: 28 | pass 29 | print 'rara' 30 | 31 | 32 | if __name__ == '__main__': 33 | print 'test2' 34 | unittest.main() 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/noendingnewline.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestCase(unittest.TestCase): 5 | 6 | def setUp(self): 7 | unittest.TestCase.setUp(self) 8 | 9 | 10 | def tearDown(self): 11 | unittest.TestCase.tearDown(self) 12 | 13 | def testIt(self): 14 | self.a = 10 15 | self.xxx() 16 | 17 | 18 | def xxx(self): 19 | if False: 20 | pass 21 | print('a') 22 | 23 | if False: 24 | pass 25 | pass 26 | 27 | if False: 28 | pass 29 | print('rara') 30 | 31 | 32 | if __name__ == '__main__': 33 | print('test2') 34 | unittest.main() 35 | 36 | 37 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=44", "wheel", "setuptools_scm[toml]>=3.4.3"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | write_to = "asttokens/version.py" 7 | write_to_template = "__version__ = \"{version}\"\n" 8 | 9 | [tool.mypy] 10 | show_error_codes=true 11 | warn_unused_ignores=true 12 | disallow_untyped_defs=true 13 | disallow_untyped_calls=true 14 | no_implicit_reexport=true 15 | 16 | [[tool.mypy.overrides]] 17 | module = "tests.*" 18 | disallow_untyped_defs=false 19 | disallow_untyped_calls=false 20 | ignore_missing_imports=true 21 | 22 | [[tool.mypy.overrides]] 23 | module = ["astroid", "astroid.manager", "astroid.node_classes", "astroid.nodes", "astroid.nodes.utils"] 24 | ignore_missing_imports = true 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | dist: 2 | @echo Build python distribution 3 | @echo "(If no 'build' module, install with 'python -m pip install build setuptools_scm')" 4 | python -m build 5 | 6 | publish: 7 | @echo "Publish to PyPI at https://pypi.python.org/pypi/asttokens/" 8 | @VER=`python -c 'import setuptools_scm; print(setuptools_scm.get_version())'`; \ 9 | echo "Version in setup.py is $$VER"; \ 10 | echo "Git tag is `git describe --tags`"; \ 11 | echo "Run this manually: twine upload dist/asttokens-$$VER*" 12 | 13 | docs: 14 | @echo Build documentation in docs/_build/html 15 | source env/bin/activate ; PYTHONPATH=$(abspath .) $(MAKE) -C docs html 16 | 17 | clean: 18 | python setup.py clean 19 | source env/bin/activate ; PYTHONPATH=$(abspath .) $(MAKE) -C docs clean 20 | 21 | .PHONY: dist publish docs clean 22 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. asttokens documentation master file, created by 2 | sphinx-quickstart on Mon Aug 7 11:16:41 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ASTTokens Documentation 7 | ======================= 8 | 9 | The ``asttokens`` module annotates Python abstract syntax trees (ASTs) with the positions of tokens 10 | and text in the source code that generated them. This is helpful for tools that make source code 11 | transformations. 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | 16 | user-guide 17 | api-index 18 | 19 | License 20 | ------- 21 | Copyright 2023, Grist Labs, Inc. Licensed under the Apache License, Version 2.0. 22 | 23 | Indices and tables 24 | ================== 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/clientmodule_test.py: -------------------------------------------------------------------------------- 1 | """ docstring for file clientmodule.py """ 2 | from data.suppliermodule_test import Interface as IFace, DoNothing 3 | 4 | class Toto: pass 5 | 6 | class Ancestor: 7 | """ Ancestor method """ 8 | __implements__ = (IFace,) 9 | 10 | def __init__(self, value): 11 | local_variable = 0 12 | self.attr = 'this method shouldn\'t have a docstring' 13 | self.__value = value 14 | 15 | def get_value(self): 16 | """ nice docstring ;-) """ 17 | return self.__value 18 | 19 | def set_value(self, value): 20 | self.__value = value 21 | return 'this method shouldn\'t have a docstring' 22 | 23 | class Specialization(Ancestor): 24 | TYPE = 'final class' 25 | top = 'class' 26 | 27 | def __init__(self, value, _id): 28 | Ancestor.__init__(self, value) 29 | self._id = _id 30 | self.relation = DoNothing() 31 | self.toto = Toto() 32 | 33 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/clientmodule_test.py: -------------------------------------------------------------------------------- 1 | """ docstring for file clientmodule.py """ 2 | from data.suppliermodule_test import Interface as IFace, DoNothing 3 | 4 | class Toto: pass 5 | 6 | class Ancestor: 7 | """ Ancestor method """ 8 | __implements__ = (IFace,) 9 | 10 | def __init__(self, value): 11 | local_variable = 0 12 | self.attr = 'this method shouldn\'t have a docstring' 13 | self.__value = value 14 | 15 | def get_value(self): 16 | """ nice docstring ;-) """ 17 | return self.__value 18 | 19 | def set_value(self, value): 20 | self.__value = value 21 | return 'this method shouldn\'t have a docstring' 22 | 23 | class Specialization(Ancestor): 24 | TYPE = 'final class' 25 | top = 'class' 26 | 27 | def __init__(self, value, _id): 28 | Ancestor.__init__(self, value) 29 | self._id = _id 30 | self.relation = DoNothing() 31 | self.toto = Toto() 32 | 33 | -------------------------------------------------------------------------------- /asttokens/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Grist Labs, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | This module enhances the Python AST tree with token and source code information, sufficent to 17 | detect the source text of each AST node. This is helpful for tools that make source code 18 | transformations. 19 | """ 20 | 21 | from .line_numbers import LineNumbers 22 | from .asttokens import ASTText, ASTTokens, supports_tokenless 23 | 24 | __all__ = ['ASTText', 'ASTTokens', 'LineNumbers', 'supports_tokenless'] 25 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'asttokens' 10 | copyright = '2023, Grist Labs' 11 | author = 'Grist Labs' 12 | 13 | # -- General configuration --------------------------------------------------- 14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 15 | 16 | extensions = [ 17 | 'sphinx.ext.autodoc', 18 | 'sphinx.ext.viewcode', 19 | 'sphinx_rtd_theme', 20 | ] 21 | 22 | templates_path = ['_templates'] 23 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 24 | 25 | 26 | 27 | # -- Options for HTML output ------------------------------------------------- 28 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 29 | 30 | html_theme = "sphinx_rtd_theme" 31 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/nonregr.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | try: 4 | enumerate = enumerate 5 | except NameError: 6 | 7 | def enumerate(iterable): 8 | """emulates the python2.3 enumerate() function""" 9 | i = 0 10 | for val in iterable: 11 | yield i, val 12 | i += 1 13 | 14 | def toto(value): 15 | for k, v in value: 16 | print(v.get('yo')) 17 | 18 | 19 | import imp 20 | fp, mpath, desc = imp.find_module('optparse',a) 21 | s_opt = imp.load_module('std_optparse', fp, mpath, desc) 22 | 23 | class OptionParser(s_opt.OptionParser): 24 | 25 | def parse_args(self, args=None, values=None, real_optparse=False): 26 | if real_optparse: 27 | pass 28 | ## return super(OptionParser, self).parse_args() 29 | else: 30 | import optcomp 31 | optcomp.completion(self) 32 | 33 | 34 | class Aaa(object): 35 | """docstring""" 36 | def __init__(self): 37 | self.__setattr__('a','b') 38 | pass 39 | 40 | def one_public(self): 41 | """docstring""" 42 | pass 43 | 44 | def another_public(self): 45 | """docstring""" 46 | pass 47 | 48 | class Ccc(Aaa): 49 | """docstring""" 50 | 51 | class Ddd(Aaa): 52 | """docstring""" 53 | pass 54 | 55 | class Eee(Ddd): 56 | """docstring""" 57 | pass 58 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/nonregr.py: -------------------------------------------------------------------------------- 1 | from __future__ import generators, print_function 2 | 3 | try: 4 | enumerate = enumerate 5 | except NameError: 6 | 7 | def enumerate(iterable): 8 | """emulates the python2.3 enumerate() function""" 9 | i = 0 10 | for val in iterable: 11 | yield i, val 12 | i += 1 13 | 14 | def toto(value): 15 | for k, v in value: 16 | print(v.get('yo')) 17 | 18 | 19 | import imp 20 | fp, mpath, desc = imp.find_module('optparse',a) 21 | s_opt = imp.load_module('std_optparse', fp, mpath, desc) 22 | 23 | class OptionParser(s_opt.OptionParser): 24 | 25 | def parse_args(self, args=None, values=None, real_optparse=False): 26 | if real_optparse: 27 | pass 28 | ## return super(OptionParser, self).parse_args() 29 | else: 30 | import optcomp 31 | optcomp.completion(self) 32 | 33 | 34 | class Aaa(object): 35 | """docstring""" 36 | def __init__(self): 37 | self.__setattr__('a','b') 38 | pass 39 | 40 | def one_public(self): 41 | """docstring""" 42 | pass 43 | 44 | def another_public(self): 45 | """docstring""" 46 | pass 47 | 48 | class Ccc(Aaa): 49 | """docstring""" 50 | 51 | class Ddd(Aaa): 52 | """docstring""" 53 | pass 54 | 55 | class Eee(Ddd): 56 | """docstring""" 57 | pass 58 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --config=pyproject.toml --output-file=docs/requirements.txt docs/requirements.in 6 | # 7 | alabaster==0.7.13 8 | # via sphinx 9 | babel==2.12.1 10 | # via sphinx 11 | certifi==2024.7.4 12 | # via requests 13 | charset-normalizer==3.2.0 14 | # via requests 15 | docutils==0.18.1 16 | # via 17 | # sphinx 18 | # sphinx-rtd-theme 19 | idna==3.7 20 | # via requests 21 | imagesize==1.4.1 22 | # via sphinx 23 | jinja2==3.1.6 24 | # via sphinx 25 | markupsafe==2.1.3 26 | # via jinja2 27 | packaging==23.1 28 | # via sphinx 29 | pygments==2.16.1 30 | # via sphinx 31 | readthedocs-sphinx-search==0.3.2 32 | # via -r requirements.in 33 | requests==2.32.4 34 | # via sphinx 35 | snowballstemmer==2.2.0 36 | # via sphinx 37 | sphinx==6.2.1 38 | # via 39 | # -r requirements.in 40 | # sphinx-rtd-theme 41 | # sphinxcontrib-jquery 42 | sphinx-rtd-theme==1.2.2 43 | # via -r requirements.in 44 | sphinxcontrib-applehelp==1.0.4 45 | # via sphinx 46 | sphinxcontrib-devhelp==1.0.2 47 | # via sphinx 48 | sphinxcontrib-htmlhelp==2.0.1 49 | # via sphinx 50 | sphinxcontrib-jquery==4.1 51 | # via sphinx-rtd-theme 52 | sphinxcontrib-jsmath==1.0.1 53 | # via sphinx 54 | sphinxcontrib-qthelp==1.0.3 55 | # via sphinx 56 | sphinxcontrib-serializinghtml==1.1.5 57 | # via sphinx 58 | urllib3==2.5.0 59 | # via requests 60 | -------------------------------------------------------------------------------- /tests/test_astroid.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import astroid 3 | 4 | from asttokens import ASTTokens 5 | from asttokens.astroid_compat import astroid_node_classes 6 | from astroid.manager import AstroidManager 7 | from . import test_mark_tokens 8 | 9 | 10 | class TestAstroid(test_mark_tokens.TestMarkTokens): 11 | 12 | is_astroid_test = True 13 | astroid_version = int(astroid.__version__.split('.')[0]) 14 | module = astroid 15 | 16 | nodes_classes = astroid_node_classes.NodeNG 17 | context_classes = [ 18 | (astroid.Name, astroid.DelName, astroid.AssignName), 19 | (astroid.Attribute, astroid.DelAttr, astroid.AssignAttr), 20 | ] 21 | 22 | @staticmethod 23 | def iter_fields(node): 24 | """ 25 | Yield a tuple of ``(fieldname, value)`` for each field 26 | that is present on *node*. 27 | 28 | Similar to ast.iter_fields, but for astroid and ignores context 29 | """ 30 | fields = node._astroid_fields + node._other_fields 31 | for field in fields: 32 | if field == 'ctx': 33 | continue 34 | if field == 'doc' and 'doc_node' in fields: 35 | continue 36 | yield field, getattr(node, field) 37 | 38 | @staticmethod 39 | def create_asttokens(source): 40 | manager = AstroidManager() 41 | builder = astroid.builder.AstroidBuilder(manager) 42 | try: 43 | tree = builder.string_build(source) 44 | except AttributeError as e: 45 | raise AstroidTreeException(str(e)) 46 | return ASTTokens(source, tree=tree) 47 | 48 | 49 | class AstroidTreeException(Exception): 50 | pass 51 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_files = LICENSE 3 | name = asttokens 4 | author = Dmitry Sagalovskiy, Grist Labs 5 | author_email = dmitry@getgrist.com 6 | license = Apache 2.0 7 | description = Annotate AST trees with source code positions 8 | keywords = code, ast, parse, tokenize, refactor 9 | url = https://github.com/gristlabs/asttokens 10 | project_urls = 11 | Documentation = https://asttokens.readthedocs.io/en/latest/index.html 12 | Issue Tracker = https://github.com/gristlabs/asttokens/issues 13 | long_description = file: README.rst 14 | classifiers = 15 | Development Status :: 5 - Production/Stable 16 | Intended Audience :: Developers 17 | Topic :: Software Development :: Libraries :: Python Modules 18 | Topic :: Software Development :: Code Generators 19 | Topic :: Software Development :: Compilers 20 | Topic :: Software Development :: Interpreters 21 | Topic :: Software Development :: Pre-processors 22 | Environment :: Console 23 | Operating System :: OS Independent 24 | Programming Language :: Python :: 3.9 25 | Programming Language :: Python :: 3.10 26 | Programming Language :: Python :: 3.11 27 | Programming Language :: Python :: 3.12 28 | Programming Language :: Python :: 3.13 29 | Programming Language :: Python :: 3.14 30 | Programming Language :: Python :: Implementation :: CPython 31 | Programming Language :: Python :: Implementation :: PyPy 32 | 33 | [options] 34 | packages = asttokens 35 | setup_requires = setuptools>=44; setuptools_scm[toml]>=3.4.3 36 | python_requires = >=3.8 37 | 38 | [options.extras_require] 39 | astroid = 40 | astroid >=2, <5 41 | test = 42 | astroid >=2, <5 43 | pytest < 9.0 44 | pytest-cov 45 | pytest-xdist 46 | 47 | [options.package_data] 48 | asttokens = py.typed 49 | 50 | [tool:pytest] 51 | addopts = --disable-warnings --ignore=tests/testdata --strict-markers -m 'not slow' 52 | markers = 53 | slow: marks tests as slow (deselected by default) 54 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/module.py: -------------------------------------------------------------------------------- 1 | """test module for astroid 2 | """ 3 | 4 | __revision__ = '$Id: module.py,v 1.2 2005-11-02 11:56:54 syt Exp $' 5 | from astroid.node_classes import Name as NameNode 6 | from astroid import modutils 7 | from astroid.utils import * 8 | import os.path 9 | MY_DICT = {} 10 | 11 | def global_access(key, val): 12 | """function test""" 13 | local = 1 14 | MY_DICT[key] = val 15 | for i in val: 16 | if i: 17 | del MY_DICT[i] 18 | continue 19 | else: 20 | break 21 | else: 22 | return 23 | 24 | 25 | class YO: 26 | """hehe""" 27 | a = 1 28 | 29 | def __init__(self): 30 | try: 31 | self.yo = 1 32 | except ValueError as ex: 33 | pass 34 | except (NameError, TypeError): 35 | raise XXXError() 36 | except: 37 | raise 38 | 39 | 40 | 41 | class YOUPI(YO): 42 | class_attr = None 43 | 44 | def __init__(self): 45 | self.member = None 46 | 47 | def method(self): 48 | """method test""" 49 | global MY_DICT 50 | try: 51 | MY_DICT = {} 52 | local = None 53 | autre = [a for (a, b) in MY_DICT if b] 54 | if b in autre: 55 | return 56 | else: 57 | if a in autre: 58 | return 'hehe' 59 | global_access(local, val=autre) 60 | finally: 61 | return local 62 | 63 | def static_method(): 64 | """static method test""" 65 | assert MY_DICT, '???' 66 | static_method = staticmethod(static_method) 67 | 68 | def class_method(cls): 69 | """class method test""" 70 | exec(a, b) 71 | class_method = classmethod(class_method) 72 | 73 | 74 | def four_args(a, b, c, d): 75 | """four arguments (was nested_args)""" 76 | while 1: 77 | if a: 78 | break 79 | a += +1 80 | else: 81 | b += -2 82 | if c: 83 | d = ((a) and (b)) or (c) 84 | else: 85 | c = ((a) and (b)) or (d) 86 | list(map(lambda x, y: (y, x), a)) 87 | redirect = four_args 88 | 89 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/module.py: -------------------------------------------------------------------------------- 1 | """test module for astroid 2 | """ 3 | 4 | __revision__ = '$Id: module.py,v 1.2 2005-11-02 11:56:54 syt Exp $' 5 | from astroid.node_classes import Name as NameNode 6 | from astroid import modutils 7 | from astroid.utils import * 8 | import os.path 9 | MY_DICT = {} 10 | 11 | def global_access(key, val): 12 | """function test""" 13 | local = 1 14 | MY_DICT[key] = val 15 | for i in val: 16 | if i: 17 | del MY_DICT[i] 18 | continue 19 | else: 20 | break 21 | else: 22 | return local 23 | 24 | 25 | class YO: 26 | """hehe""" 27 | a = 1 28 | 29 | def __init__(self): 30 | try: 31 | self.yo = 1 32 | except ValueError, ex: 33 | pass 34 | except (NameError, TypeError): 35 | raise XXXError() 36 | except: 37 | raise 38 | 39 | 40 | 41 | class YOUPI(YO): 42 | class_attr = None 43 | 44 | def __init__(self): 45 | self.member = None 46 | 47 | def method(self): 48 | """method test""" 49 | global MY_DICT 50 | try: 51 | MY_DICT = {} 52 | local = None 53 | autre = [a for (a, b) in MY_DICT if b] 54 | if b in autre: 55 | return b 56 | else: 57 | if a in autre: 58 | return a 59 | global_access(local, val=autre) 60 | finally: 61 | return local 62 | 63 | def static_method(): 64 | """static method test""" 65 | assert MY_DICT, '???' 66 | static_method = staticmethod(static_method) 67 | 68 | def class_method(cls): 69 | """class method test""" 70 | exec a in b 71 | class_method = classmethod(class_method) 72 | 73 | 74 | def four_args(a, b, c, d): 75 | """four arguments (was nested_args)""" 76 | pass 77 | while 1: 78 | if a: 79 | break 80 | a += +1 81 | else: 82 | b += -2 83 | if c: 84 | d = ((a) and (b)) or (c) 85 | else: 86 | c = ((a) and (b)) or (d) 87 | map(lambda x, y: (y, x), a) 88 | redirect = four_args 89 | 90 | -------------------------------------------------------------------------------- /tests/testdata/python2/astroid/module2.py: -------------------------------------------------------------------------------- 1 | from data.module import YO, YOUPI 2 | import data 3 | 4 | 5 | class Specialization(YOUPI, YO): 6 | pass 7 | 8 | 9 | 10 | class Metaclass(type): 11 | pass 12 | 13 | 14 | 15 | class Interface: 16 | pass 17 | 18 | 19 | 20 | class MyIFace(Interface): 21 | pass 22 | 23 | 24 | 25 | class AnotherIFace(Interface): 26 | pass 27 | 28 | 29 | 30 | class MyException(Exception): 31 | pass 32 | 33 | 34 | 35 | class MyError(MyException): 36 | pass 37 | 38 | 39 | 40 | class AbstractClass(object): 41 | 42 | def to_override(self, whatever): 43 | raise NotImplementedError() 44 | 45 | def return_something(self, param): 46 | if param: 47 | return 'toto' 48 | return 49 | 50 | 51 | 52 | class Concrete0: 53 | __implements__ = MyIFace 54 | 55 | 56 | 57 | class Concrete1: 58 | __implements__ = (MyIFace, AnotherIFace) 59 | 60 | 61 | 62 | class Concrete2: 63 | __implements__ = (MyIFace, AnotherIFace) 64 | 65 | 66 | 67 | class Concrete23(Concrete1): 68 | pass 69 | 70 | del YO.member 71 | del YO 72 | [SYN1, SYN2] = (Concrete0, Concrete1) 73 | assert '1' 74 | b = (1) | (((2) & (3)) ^ (8)) 75 | bb = ((1) | (two)) | (6) 76 | ccc = ((one) & (two)) & (three) 77 | dddd = ((x) ^ (o)) ^ (r) 78 | exec 'c = 3' 79 | exec 'c = 3' in {}, {} 80 | 81 | def raise_string(a=2, *args, **kwargs): 82 | raise Exception, 'yo' 83 | yield 'coucou' 84 | yield 85 | a = (b) + (2) 86 | c = (b) * (2) 87 | c = (b) / (2) 88 | c = (b) // (2) 89 | c = (b) - (2) 90 | c = (b) % (2) 91 | c = (b) ** (2) 92 | c = (b) << (2) 93 | c = (b) >> (2) 94 | c = ~b 95 | c = not b 96 | d = [c] 97 | e = d[:] 98 | e = d[a:b:c] 99 | raise_string(*args, **kwargs) 100 | print >> stream, 'bonjour' 101 | print >> stream, 'salut', 102 | 103 | def make_class(any, base=data.module.YO, *args, **kwargs): 104 | """check base is correctly resolved to Concrete0""" 105 | 106 | 107 | class Aaaa(base): 108 | """dynamic class""" 109 | 110 | 111 | return Aaaa 112 | from os.path import abspath 113 | import os as myos 114 | 115 | 116 | class A: 117 | pass 118 | 119 | 120 | 121 | class A(A): 122 | pass 123 | 124 | 125 | def generator(): 126 | """A generator.""" 127 | yield 128 | 129 | def not_a_generator(): 130 | """A function that contains generator, but is not one.""" 131 | 132 | def generator(): 133 | yield 134 | genl = lambda : (yield) 135 | 136 | def with_metaclass(meta, *bases): 137 | return meta('NewBase', bases, {}) 138 | 139 | 140 | class NotMetaclass(with_metaclass(Metaclass)): 141 | pass 142 | 143 | 144 | -------------------------------------------------------------------------------- /tests/testdata/python3/astroid/module2.py: -------------------------------------------------------------------------------- 1 | from data.module import YO, YOUPI 2 | import data 3 | 4 | 5 | class Specialization(YOUPI, YO): 6 | pass 7 | 8 | 9 | 10 | class Metaclass(type): 11 | pass 12 | 13 | 14 | 15 | class Interface: 16 | pass 17 | 18 | 19 | 20 | class MyIFace(Interface): 21 | pass 22 | 23 | 24 | 25 | class AnotherIFace(Interface): 26 | pass 27 | 28 | 29 | 30 | class MyException(Exception): 31 | pass 32 | 33 | 34 | 35 | class MyError(MyException): 36 | pass 37 | 38 | 39 | 40 | class AbstractClass(object): 41 | 42 | def to_override(self, whatever): 43 | raise NotImplementedError() 44 | 45 | def return_something(self, param): 46 | if param: 47 | return 'toto' 48 | return 49 | 50 | 51 | 52 | class Concrete0: 53 | __implements__ = MyIFace 54 | 55 | 56 | 57 | class Concrete1: 58 | __implements__ = (MyIFace, AnotherIFace) 59 | 60 | 61 | 62 | class Concrete2: 63 | __implements__ = (MyIFace, AnotherIFace) 64 | 65 | 66 | 67 | class Concrete23(Concrete1): 68 | pass 69 | 70 | del YO.member 71 | del YO 72 | [SYN1, SYN2] = (Concrete0, Concrete1) 73 | assert repr(1) 74 | b = (1) | (((2) & (3)) ^ (8)) 75 | bb = ((1) | (two)) | (6) 76 | ccc = ((one) & (two)) & (three) 77 | dddd = ((x) ^ (o)) ^ (r) 78 | exec('c = 3') 79 | exec('c = 3', {}, {}) 80 | 81 | def raise_string(a=2, *args, **kwargs): 82 | raise Exception('yo') 83 | yield 'coucou' 84 | yield 85 | a = (b) + (2) 86 | c = (b) * (2) 87 | c = (b) / (2) 88 | c = (b) // (2) 89 | c = (b) - (2) 90 | c = (b) % (2) 91 | c = (b) ** (2) 92 | c = (b) << (2) 93 | c = (b) >> (2) 94 | c = ~b 95 | c = not b 96 | d = [c] 97 | e = d[:] 98 | e = d[a:b:c] 99 | raise_string(*args, **kwargs) 100 | print('bonjour', file=stream) 101 | print('salut', end=' ', file=stream) 102 | 103 | def make_class(any, base=data.module.YO, *args, **kwargs): 104 | """check base is correctly resolved to Concrete0""" 105 | 106 | 107 | class Aaaa(base): 108 | """dynamic class""" 109 | 110 | 111 | return Aaaa 112 | from os.path import abspath 113 | import os as myos 114 | 115 | 116 | class A: 117 | pass 118 | 119 | 120 | 121 | class A(A): 122 | pass 123 | 124 | 125 | def generator(): 126 | """A generator.""" 127 | yield 128 | 129 | def not_a_generator(): 130 | """A function that contains generator, but is not one.""" 131 | 132 | def generator(): 133 | yield 134 | genl = lambda : (yield) 135 | 136 | def with_metaclass(meta, *bases): 137 | return meta('NewBase', bases, {}) 138 | 139 | 140 | class NotMetaclass(with_metaclass(Metaclass)): 141 | pass 142 | 143 | 144 | -------------------------------------------------------------------------------- /tests/test_line_numbers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | from .context import asttokens 4 | 5 | class TestLineNumbers(unittest.TestCase): 6 | 7 | def test_line_numbers(self): 8 | ln = asttokens.LineNumbers("Hello\nworld\nThis\n\nis\n\na test.\n") 9 | self.assertEqual(ln.line_to_offset(1, 0), 0) 10 | self.assertEqual(ln.line_to_offset(1, 5), 5) 11 | self.assertEqual(ln.line_to_offset(2, 0), 6) 12 | self.assertEqual(ln.line_to_offset(2, 5), 11) 13 | self.assertEqual(ln.line_to_offset(3, 0), 12) 14 | self.assertEqual(ln.line_to_offset(4, 0), 17) 15 | self.assertEqual(ln.line_to_offset(5, 0), 18) 16 | self.assertEqual(ln.line_to_offset(6, 0), 21) 17 | self.assertEqual(ln.line_to_offset(7, 0), 22) 18 | self.assertEqual(ln.line_to_offset(7, 7), 29) 19 | self.assertEqual(ln.offset_to_line(0), (1, 0)) 20 | self.assertEqual(ln.offset_to_line(5), (1, 5)) 21 | self.assertEqual(ln.offset_to_line(6), (2, 0)) 22 | self.assertEqual(ln.offset_to_line(11), (2, 5)) 23 | self.assertEqual(ln.offset_to_line(12), (3, 0)) 24 | self.assertEqual(ln.offset_to_line(17), (4, 0)) 25 | self.assertEqual(ln.offset_to_line(18), (5, 0)) 26 | self.assertEqual(ln.offset_to_line(21), (6, 0)) 27 | self.assertEqual(ln.offset_to_line(22), (7, 0)) 28 | self.assertEqual(ln.offset_to_line(29), (7, 7)) 29 | 30 | # Test that out-of-bounds inputs still return something sensible. 31 | self.assertEqual(ln.line_to_offset(6, 19), 30) 32 | self.assertEqual(ln.line_to_offset(100, 99), 30) 33 | self.assertEqual(ln.line_to_offset(2, -1), 6) 34 | self.assertEqual(ln.line_to_offset(-1, 99), 0) 35 | self.assertEqual(ln.offset_to_line(30), (8, 0)) 36 | self.assertEqual(ln.offset_to_line(100), (8, 0)) 37 | self.assertEqual(ln.offset_to_line(-100), (1, 0)) 38 | 39 | def test_unicode(self): 40 | ln = asttokens.LineNumbers("фыва\nячсм") 41 | self.assertEqual(ln.line_to_offset(1, 0), 0) 42 | self.assertEqual(ln.line_to_offset(1, 4), 4) 43 | self.assertEqual(ln.line_to_offset(2, 0), 5) 44 | self.assertEqual(ln.line_to_offset(2, 4), 9) 45 | 46 | self.assertEqual(ln.offset_to_line(0), (1, 0)) 47 | self.assertEqual(ln.offset_to_line(4), (1, 4)) 48 | self.assertEqual(ln.offset_to_line(5), (2, 0)) 49 | self.assertEqual(ln.offset_to_line(9), (2, 4)) 50 | 51 | def test_utf8_offsets(self): 52 | ln = asttokens.LineNumbers("фыва\nф.в.") 53 | self.assertEqual(ln.from_utf8_col(1, 0), 0) 54 | self.assertEqual(ln.from_utf8_col(1, 2), 1) 55 | self.assertEqual(ln.from_utf8_col(1, 3), 1) 56 | self.assertEqual(ln.from_utf8_col(1, 6), 3) 57 | self.assertEqual(ln.from_utf8_col(1, 8), 4) 58 | self.assertEqual(ln.from_utf8_col(2, 0), 0) 59 | self.assertEqual(ln.from_utf8_col(2, 2), 1) 60 | self.assertEqual(ln.from_utf8_col(2, 3), 2) 61 | self.assertEqual(ln.from_utf8_col(2, 4), 2) 62 | self.assertEqual(ln.from_utf8_col(2, 5), 3) 63 | self.assertEqual(ln.from_utf8_col(2, 6), 4) 64 | -------------------------------------------------------------------------------- /asttokens/line_numbers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Grist Labs, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import bisect 16 | import re 17 | from typing import Dict, List, Tuple 18 | 19 | _line_start_re = re.compile(r'^', re.M) 20 | 21 | class LineNumbers: 22 | """ 23 | Class to convert between character offsets in a text string, and pairs (line, column) of 1-based 24 | line and 0-based column numbers, as used by tokens and AST nodes. 25 | 26 | This class expects unicode for input and stores positions in unicode. But it supports 27 | translating to and from utf8 offsets, which are used by ast parsing. 28 | """ 29 | def __init__(self, text): 30 | # type: (str) -> None 31 | # A list of character offsets of each line's first character. 32 | self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)] 33 | self._text = text 34 | self._text_len = len(text) 35 | self._utf8_offset_cache = {} # type: Dict[int, List[int]] # maps line num to list of char offset for each byte in line 36 | 37 | def from_utf8_col(self, line, utf8_column): 38 | # type: (int, int) -> int 39 | """ 40 | Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column. 41 | """ 42 | offsets = self._utf8_offset_cache.get(line) 43 | if offsets is None: 44 | end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len 45 | line_text = self._text[self._line_offsets[line - 1] : end_offset] 46 | 47 | offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')] 48 | offsets.append(len(line_text)) 49 | self._utf8_offset_cache[line] = offsets 50 | 51 | return offsets[max(0, min(len(offsets)-1, utf8_column))] 52 | 53 | def line_to_offset(self, line, column): 54 | # type: (int, int) -> int 55 | """ 56 | Converts 1-based line number and 0-based column to 0-based character offset into text. 57 | """ 58 | line -= 1 59 | if line >= len(self._line_offsets): 60 | return self._text_len 61 | elif line < 0: 62 | return 0 63 | else: 64 | return min(self._line_offsets[line] + max(0, column), self._text_len) 65 | 66 | def offset_to_line(self, offset): 67 | # type: (int) -> Tuple[int, int] 68 | """ 69 | Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column 70 | numbers. 71 | """ 72 | offset = max(0, min(self._text_len, offset)) 73 | line_index = bisect.bisect_right(self._line_offsets, offset) - 1 74 | return (line_index + 1, offset - self._line_offsets[line_index]) 75 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | workflow_dispatch: 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | python-version: 15 | - 3.8 16 | - 3.9 17 | - '3.10' 18 | - 3.11 19 | - 3.12 20 | - 3.13 21 | # As per https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#pypy list of versions 22 | - pypy-3.9 23 | - pypy-3.10 24 | astroid-version: 25 | - '' 26 | include: 27 | # Test recent Python on Astroid v2 and v3 too (the above natural tests will 28 | # pick up the latest Astroid version for these Python versions) 29 | - python-version: '3.8' 30 | astroid-version: '<3' 31 | - python-version: '3.9' 32 | astroid-version: '<3' 33 | - python-version: '3.10' 34 | astroid-version: '<3' 35 | - python-version: '3.11' 36 | astroid-version: '<3' 37 | - python-version: '3.12' 38 | astroid-version: '<3' 39 | - python-version: '3.13' 40 | astroid-version: '<3' 41 | - python-version: 'pypy-3.10' 42 | astroid-version: '<3' 43 | - python-version: '3.10' 44 | astroid-version: '<4' 45 | - python-version: '3.12' 46 | astroid-version: '<4' 47 | - python-version: '3.14' 48 | 49 | env: 50 | COVERALLS_PARALLEL: true 51 | 52 | steps: 53 | - uses: actions/checkout@v3 54 | 55 | - name: Set up Python ${{ matrix.python-version }} 56 | uses: actions/setup-python@v5 57 | with: 58 | python-version: ${{ matrix.python-version }} 59 | allow-prereleases: true 60 | 61 | - name: Install dependencies 62 | run: | 63 | pip install --upgrade coveralls "pytest<9.0" setuptools setuptools_scm pep517 64 | pip install .[test] 'astroid${{ matrix.astroid-version }}' 65 | 66 | - name: Mypy testing 67 | run: | 68 | # Not an exact mypy version, as we need 0.942 for pypy-3.8 support, but it's not available on 3.5 69 | pip install "mypy>=1.10" 70 | python -m mypy asttokens tests/*.py 71 | 72 | - name: Fast tests with coverage 73 | run: | 74 | pytest --cov -n auto -m "not slow" 75 | coverage report -m 76 | 77 | - name: Slow tests without coverage 78 | run: | 79 | pytest -n auto -m slow 80 | 81 | - name: Collect coverage results 82 | env: 83 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 84 | run: | 85 | pip3 install --upgrade coveralls 86 | python3 -m coveralls --service=github 87 | 88 | finish: 89 | needs: test 90 | runs-on: ubuntu-latest 91 | steps: 92 | - name: Coveralls Finished 93 | uses: AndreMiras/coveralls-python-action@v20201129 94 | with: 95 | parallel-finished: true 96 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ASTTokens 2 | ========= 3 | 4 | .. image:: https://img.shields.io/pypi/v/asttokens.svg 5 | :target: https://pypi.python.org/pypi/asttokens/ 6 | .. image:: https://img.shields.io/pypi/pyversions/asttokens.svg 7 | :target: https://pypi.python.org/pypi/asttokens/ 8 | .. image:: https://github.com/gristlabs/asttokens/actions/workflows/build-and-test.yml/badge.svg 9 | :target: https://github.com/gristlabs/asttokens/actions/workflows/build-and-test.yml 10 | .. image:: https://readthedocs.org/projects/asttokens/badge/?version=latest 11 | :target: https://asttokens.readthedocs.io/en/latest/index.html 12 | .. image:: https://coveralls.io/repos/github/gristlabs/asttokens/badge.svg 13 | :target: https://coveralls.io/github/gristlabs/asttokens 14 | 15 | .. Start of user-guide 16 | 17 | The ``asttokens`` module annotates Python abstract syntax trees (ASTs) with the positions of tokens 18 | and text in the source code that generated them. 19 | 20 | It makes it possible for tools that work with logical AST nodes to find the particular text that 21 | resulted in those nodes, for example for automated refactoring or highlighting. 22 | 23 | Installation 24 | ------------ 25 | asttokens is available on PyPI: https://pypi.python.org/pypi/asttokens/:: 26 | 27 | pip install asttokens 28 | 29 | The code is on GitHub: https://github.com/gristlabs/asttokens. 30 | 31 | The API Reference is here: https://asttokens.readthedocs.io/en/latest/api-index.html. 32 | 33 | Usage 34 | ----- 35 | 36 | ASTTokens can annotate both trees built by `ast `_, 37 | AND those built by `astroid `_. 38 | 39 | Here's an example: 40 | 41 | .. code-block:: python 42 | 43 | import asttokens, ast 44 | source = "Robot('blue').walk(steps=10*n)" 45 | atok = asttokens.ASTTokens(source, parse=True) 46 | 47 | Once the tree has been marked, nodes get ``.first_token``, ``.last_token`` attributes, and 48 | the ``ASTTokens`` object offers helpful methods: 49 | 50 | .. code-block:: python 51 | 52 | attr_node = next(n for n in ast.walk(atok.tree) if isinstance(n, ast.Attribute)) 53 | print(atok.get_text(attr_node)) 54 | start, end = attr_node.last_token.startpos, attr_node.last_token.endpos 55 | print(atok.text[:start] + 'RUN' + atok.text[end:]) 56 | 57 | Which produces this output: 58 | 59 | .. code-block:: text 60 | 61 | Robot('blue').walk 62 | Robot('blue').RUN(steps=10*n) 63 | 64 | The ``ASTTokens`` object also offers methods to walk and search the list of tokens that make up 65 | the code (or a particular AST node), which is more useful and powerful than dealing with the text 66 | directly. 67 | 68 | 69 | Contribute 70 | ---------- 71 | 72 | To contribute: 73 | 74 | 1. Fork this repository, and clone your fork. 75 | 2. Install the package with test dependencies (ideally in a virtualenv) with:: 76 | 77 | pip install -e '.[test]' 78 | 79 | 3. Run tests in your current interpreter with the command ``pytest`` or ``python -m pytest``. 80 | 4. Run tests across all supported interpreters with the ``tox`` command. You will need to have the interpreters installed separately. We recommend ``pyenv`` for that. Use ``tox -p auto`` to run the tests in parallel. 81 | 5. By default certain tests which take a very long time to run are skipped, but they are run in CI. 82 | These are marked using the ``pytest`` marker ``slow`` and can be run on their own with ``pytest -m slow`` or as part of the full suite with ``pytest -m ''``. 83 | -------------------------------------------------------------------------------- /tests/test_tokenless.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import sys 3 | import unittest 4 | 5 | from asttokens import ASTText, supports_tokenless 6 | from asttokens.util import fstring_positions_work 7 | 8 | source = """ 9 | x = 1 10 | if x > 0: 11 | for i in range(10): 12 | print(i) 13 | else: 14 | print('negative') 15 | 16 | def foo(bar): 17 | pass 18 | 19 | print(f"{xx + 22} is negative {1.23:.2f} {'a'!r} {yy =} {aa:{bb}}") 20 | 21 | import a 22 | import b as c, d.e as f 23 | from foo.bar import baz as spam 24 | """ 25 | 26 | fstring_node_dumps = [ 27 | ast.dump(ast.parse(s).body[0].value) # type: ignore 28 | for s in ["xx", "yy", "aa", "bb", "xx + 22", "22", "1.23", "'a'"] 29 | ] 30 | 31 | 32 | def is_fstring_internal_node(node): 33 | """ 34 | Returns True if the given node is an internal node in an f-string. 35 | Only applies for nodes parsed from the source above. 36 | """ 37 | return ast.dump(node) in fstring_node_dumps 38 | 39 | 40 | def is_fstring_format_spec(node): 41 | """ 42 | Returns True if the given node is a format specifier in an f-string. 43 | Only applies for nodes parsed from the source above. 44 | """ 45 | return ( 46 | isinstance(node, ast.JoinedStr) 47 | and len(node.values) == 1 48 | and ( 49 | ( 50 | isinstance(node.values[0], ast.Constant) 51 | and node.values[0].value in ['.2f'] 52 | ) or ( 53 | isinstance(node.values[0], ast.FormattedValue) 54 | and isinstance(node.values[0].value, ast.Name) 55 | and node.values[0].value.id == 'bb' 56 | ) 57 | ) 58 | ) 59 | 60 | 61 | @unittest.skipUnless(supports_tokenless(), "Python version does not support not using tokens") 62 | class TestTokenless(unittest.TestCase): 63 | def test_get_text_tokenless(self): 64 | atok = ASTText(source) 65 | 66 | for node in ast.walk(atok.tree): 67 | if not isinstance(node, (ast.arguments, ast.arg)): 68 | self.check_node(atok, node) 69 | self.assertTrue(supports_tokenless(node), node) 70 | 71 | # Check that we didn't need to fall back to using tokens 72 | self.assertIsNone(atok._asttokens) 73 | 74 | has_tokens = False 75 | for node in ast.walk(atok.tree): 76 | self.check_node(atok, node) 77 | 78 | if isinstance(node, ast.arguments): 79 | has_tokens = True 80 | 81 | self.assertEqual(atok._asttokens is not None, has_tokens) 82 | 83 | # Now we have started using tokens as fallback 84 | self.assertIsNotNone(atok._asttokens) 85 | self.assertTrue(has_tokens) 86 | 87 | def check_node(self, atok, node): 88 | if not hasattr(node, 'lineno'): 89 | self.assertEqual(ast.get_source_segment(source, node), None) 90 | atok_text = atok.get_text(node) 91 | if not isinstance(node, (ast.arg, ast.arguments)): 92 | self.assertEqual(atok_text, source if isinstance(node, ast.Module) else '', node) 93 | return 94 | 95 | for padded in [True, False]: 96 | ast_text = ast.get_source_segment(source, node, padded=padded) 97 | atok_text = atok.get_text(node, padded=padded) 98 | if ast_text: 99 | if sys.version_info < (3, 12) and ( 100 | ast_text.startswith("f") and isinstance(node, (ast.Constant, ast.FormattedValue)) 101 | or is_fstring_format_spec(node) 102 | or (not fstring_positions_work() and is_fstring_internal_node(node)) 103 | ): 104 | self.assertEqual(atok_text, "", node) 105 | else: 106 | self.assertEqual(atok_text, ast_text, node) 107 | self.assertEqual( 108 | atok.get_text_positions(node, padded=False), 109 | ( 110 | (node.lineno, node.col_offset), 111 | (node.end_lineno, node.end_col_offset), 112 | ), 113 | ) 114 | 115 | def test_nested_fstrings(self): 116 | f1 = 'f"a {1+2} b {3+4} c"' 117 | f2 = "f'd {" + f1 + "} e'" 118 | f3 = "f'''{" + f2 + "}{" + f1 + "}'''" 119 | f4 = 'f"""{' + f3 + '}"""' 120 | s = 'f = ' + f4 121 | atok = ASTText(s) 122 | self.assertEqual(atok.get_text(atok.tree), s) 123 | n4 = atok.tree.body[0].value 124 | n3 = n4.values[0].value 125 | n2 = n3.values[0].value 126 | n1 = n2.values[1].value 127 | self.assertEqual(atok.get_text(n4), f4) 128 | if fstring_positions_work(): 129 | self.assertEqual(atok.get_text(n3), f3) 130 | self.assertEqual(atok.get_text(n2), f2) 131 | self.assertEqual(atok.get_text(n1), f1) 132 | else: 133 | self.assertEqual(atok.get_text(n3), '') 134 | self.assertEqual(atok.get_text(n2), '') 135 | self.assertEqual(atok.get_text(n1), '') 136 | 137 | 138 | class TestFstringPositionsWork(unittest.TestCase): 139 | def test_fstring_positions_work(self): 140 | self.assertEqual( 141 | fstring_positions_work() and supports_tokenless(), 142 | sys.version_info >= (3, 10, 6) and 'pypy' not in sys.version.lower(), 143 | ) 144 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import ast 3 | import io 4 | import sys 5 | import token 6 | import unittest 7 | 8 | import astroid 9 | import pytest 10 | 11 | from .context import asttokens 12 | from .tools import get_node_name 13 | 14 | 15 | class TestUtil(unittest.TestCase): 16 | 17 | def print_timing(self): 18 | # pylint: disable=no-self-use 19 | # Test the implementation of asttokens.util.walk, which uses the same approach as 20 | # visit_tree(). This doesn't run as a normal unittest, but if you'd like to see timings, e.g. 21 | # after experimenting with the implementation, run this to see them: 22 | # 23 | # nosetests -i print_timing -s tests.test_util 24 | # 25 | import timeit 26 | import textwrap 27 | setup = textwrap.dedent( 28 | ''' 29 | import ast, asttokens 30 | source = "foo(bar(1 + 2), 'hello' + ', ' + 'world')" 31 | atok = asttokens.ASTTokens(source, parse=True) 32 | ''') 33 | print("ast", sorted(timeit.repeat( 34 | setup=setup, number=10000, 35 | stmt='len(list(ast.walk(atok.tree)))'))) 36 | print("util", sorted(timeit.repeat( 37 | setup=setup, number=10000, 38 | stmt='len(list(asttokens.util.walk(atok.tree)))'))) 39 | 40 | 41 | source = "foo(bar(1 + 2), 'hello' + ', ' + 'world')" 42 | 43 | def test_walk_ast(self): 44 | atok = asttokens.ASTTokens(self.source, parse=True) 45 | 46 | def view(node): 47 | return "%s:%s" % (get_node_name(node), atok.get_text(node)) 48 | 49 | scan = [view(n) for n in asttokens.util.walk(atok.tree)] 50 | self.assertEqual(scan, [ 51 | "Module:foo(bar(1 + 2), 'hello' + ', ' + 'world')", 52 | "Expr:foo(bar(1 + 2), 'hello' + ', ' + 'world')", 53 | "Call:foo(bar(1 + 2), 'hello' + ', ' + 'world')", 54 | 'Name:foo', 55 | 'Call:bar(1 + 2)', 56 | 'Name:bar', 57 | 'BinOp:1 + 2', 58 | 'Constant:1', 59 | 'Constant:2', 60 | "BinOp:'hello' + ', ' + 'world'", 61 | "BinOp:'hello' + ', '", 62 | "Constant:'hello'", 63 | "Constant:', '", 64 | "Constant:'world'" 65 | ]) 66 | 67 | def test_walk_astroid(self): 68 | atok = asttokens.ASTTokens(self.source, tree=astroid.builder.parse(self.source)) 69 | 70 | def view(node): 71 | return "%s:%s" % (get_node_name(node), atok.get_text(node)) 72 | 73 | scan = [view(n) for n in asttokens.util.walk(atok.tree)] 74 | self.assertEqual(scan, [ 75 | "Module:foo(bar(1 + 2), 'hello' + ', ' + 'world')", 76 | "Expr:foo(bar(1 + 2), 'hello' + ', ' + 'world')", 77 | "Call:foo(bar(1 + 2), 'hello' + ', ' + 'world')", 78 | 'Name:foo', 79 | 'Call:bar(1 + 2)', 80 | 'Name:bar', 81 | 'BinOp:1 + 2', 82 | 'Const:1', 83 | 'Const:2', 84 | "BinOp:'hello' + ', ' + 'world'", 85 | "BinOp:'hello' + ', '", 86 | "Const:'hello'", 87 | "Const:', '", 88 | "Const:'world'" 89 | ]) 90 | 91 | 92 | def test_replace(self): 93 | self.assertEqual(asttokens.util.replace("this is a test", [(0, 4, "X"), (8, 9, "THE")]), 94 | "X is THE test") 95 | self.assertEqual(asttokens.util.replace("this is a test", []), "this is a test") 96 | self.assertEqual(asttokens.util.replace("this is a test", [(7,7," NOT")]), "this is NOT a test") 97 | 98 | source = "foo(bar(1 + 2), 'hello' + ', ' + 'world')" 99 | atok = asttokens.ASTTokens(source, parse=True) 100 | names = [n for n in asttokens.util.walk(atok.tree) if isinstance(n, ast.Name)] 101 | strings = [n for n in asttokens.util.walk(atok.tree) if isinstance(n, ast.Constant) and isinstance(n.value, str)] 102 | repl1 = [atok.get_text_range(n) + ('TEST',) for n in names] 103 | repl2 = [atok.get_text_range(n) + ('val',) for n in strings] 104 | self.assertEqual(asttokens.util.replace(source, repl1 + repl2), 105 | "TEST(TEST(1 + 2), val + val + val)") 106 | self.assertEqual(asttokens.util.replace(source, repl2 + repl1), 107 | "TEST(TEST(1 + 2), val + val + val)") 108 | 109 | 110 | def test_expect_token(): 111 | atok = asttokens.ASTTokens("a", parse=True) 112 | tok = atok.tokens[0] 113 | with pytest.raises(ValueError): 114 | asttokens.util.expect_token(tok, token.OP) 115 | 116 | 117 | def test_combine_tokens(): 118 | from tokenize import TokenInfo, generate_tokens, ERRORTOKEN, OP, NUMBER, NAME 119 | from asttokens.util import combine_tokens, patched_generate_tokens 120 | 121 | text = "℘·2=1" 122 | original_tokens = [] 123 | for tok in generate_tokens(io.StringIO(text).readline): 124 | original_tokens.append(tok) 125 | if tok.type == OP: 126 | break 127 | 128 | correct_tokens = [ 129 | TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'), 130 | TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'), 131 | ] 132 | if sys.version_info >= (3, 12): 133 | # The tokenizing bug was fixed in 3.12, so the original tokens are correct, 134 | # rather than starting with false ERRORTOKENs. 135 | assert original_tokens == correct_tokens 136 | else: 137 | assert original_tokens == [ 138 | TokenInfo(ERRORTOKEN, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'), 139 | TokenInfo(ERRORTOKEN, string='·', start=(1, 1), end=(1, 2), line='℘·2=1'), 140 | TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='℘·2=1'), 141 | TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'), 142 | ] 143 | assert combine_tokens(original_tokens[:1]) == [ 144 | TokenInfo(NAME, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'), 145 | ] 146 | assert combine_tokens(original_tokens[:2]) == [ 147 | TokenInfo(NAME, string='℘·', start=(1, 0), end=(1, 2), line='℘·2=1'), 148 | ] 149 | assert combine_tokens(original_tokens[:3]) == [ 150 | TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'), 151 | ] 152 | 153 | assert list(patched_generate_tokens(iter(original_tokens))) == correct_tokens 154 | assert list(patched_generate_tokens(iter(original_tokens[:-1]))) == [ 155 | TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'), 156 | ] 157 | 158 | 159 | if __name__ == "__main__": 160 | unittest.main() 161 | -------------------------------------------------------------------------------- /tests/test_asttokens.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import ast 3 | import token 4 | import tokenize 5 | import unittest 6 | from .context import asttokens 7 | 8 | class TestASTTokens(unittest.TestCase): 9 | 10 | def assertTokenizing(self, generate_tokens): 11 | source = "import re # comment\n\nfoo = 'bar'\n" 12 | atok = asttokens.ASTTokens(source, tokens=generate_tokens(source)) 13 | self.assertEqual(atok.text, source) 14 | self.assertEqual([str(t) for t in atok.tokens], [ 15 | "NAME:'import'", 16 | "NAME:'re'", 17 | "COMMENT:'# comment'", 18 | "NEWLINE:'\\n'", 19 | "NL:'\\n'", 20 | "NAME:'foo'", 21 | "OP:'='", 22 | 'STRING:"\'bar\'"', 23 | "NEWLINE:'\\n'", 24 | "ENDMARKER:''" 25 | ]) 26 | 27 | self.assertEqual(atok.tokens[5].type, token.NAME) 28 | self.assertEqual(atok.tokens[5].string, 'foo') 29 | self.assertEqual(atok.tokens[5].index, 5) 30 | self.assertEqual(atok.tokens[5].startpos, 22) 31 | self.assertEqual(atok.tokens[5].endpos, 25) 32 | 33 | def test_tokenizing(self): 34 | # Test that we produce meaningful tokens on initialization. 35 | self.assertTokenizing(generate_tokens=lambda x: None) 36 | 37 | def test_given_existing_tokens(self): 38 | # type: () -> None 39 | # Test that we process a give list of tokens on initialization. 40 | 41 | self.was_called = False 42 | 43 | def generate_tokens(source): 44 | def tokens_iter(): 45 | # force nonlocal into scope 46 | for token in asttokens.util.generate_tokens(source): 47 | yield token 48 | self.was_called = True 49 | return tokens_iter() 50 | 51 | self.assertTokenizing(generate_tokens) 52 | 53 | self.assertTrue(self.was_called, "Should have used tokens from given iterable") 54 | 55 | 56 | def test_token_methods(self): 57 | # Test the methods that deal with tokens: prev/next_token, get_token, get_token_from_offset. 58 | source = "import re # comment\n\nfoo = 'bar'\n" 59 | atok = asttokens.ASTTokens(source) 60 | self.assertEqual(str(atok.tokens[3]), "NEWLINE:'\\n'") 61 | self.assertEqual(str(atok.tokens[4]), "NL:'\\n'") 62 | self.assertEqual(str(atok.tokens[5]), "NAME:'foo'") 63 | self.assertEqual(str(atok.tokens[6]), "OP:'='") 64 | self.assertEqual(atok.prev_token(atok.tokens[5]), atok.tokens[3]) 65 | self.assertEqual(atok.prev_token(atok.tokens[5], include_extra=True), atok.tokens[4]) 66 | self.assertEqual(atok.next_token(atok.tokens[5]), atok.tokens[6]) 67 | self.assertEqual(atok.next_token(atok.tokens[1]), atok.tokens[3]) 68 | self.assertEqual(atok.next_token(atok.tokens[1], include_extra=True), atok.tokens[2]) 69 | 70 | self.assertEqual(atok.get_token_from_offset(21), atok.tokens[4]) 71 | self.assertEqual(atok.get_token_from_offset(22), atok.tokens[5]) 72 | self.assertEqual(atok.get_token_from_offset(23), atok.tokens[5]) 73 | self.assertEqual(atok.get_token_from_offset(24), atok.tokens[5]) 74 | self.assertEqual(atok.get_token_from_offset(25), atok.tokens[5]) 75 | self.assertEqual(atok.get_token_from_offset(26), atok.tokens[6]) 76 | 77 | self.assertEqual(atok.get_token(2, 0), atok.tokens[4]) 78 | self.assertEqual(atok.get_token(3, 0), atok.tokens[5]) 79 | self.assertEqual(atok.get_token(3, 1), atok.tokens[5]) 80 | self.assertEqual(atok.get_token(3, 2), atok.tokens[5]) 81 | self.assertEqual(atok.get_token(3, 3), atok.tokens[5]) 82 | self.assertEqual(atok.get_token(3, 4), atok.tokens[6]) 83 | 84 | self.assertEqual(list(atok.token_range(atok.tokens[4], atok.tokens[6], include_extra=True)), 85 | atok.tokens[4:7]) 86 | 87 | # Verify that find_token works, including for non-coding tokens. 88 | self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo'), atok.tokens[5]) 89 | self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo', reverse=True), 90 | atok.tokens[9]) 91 | self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, reverse=True), atok.tokens[1]) 92 | self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT), atok.tokens[9]) 93 | self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT, reverse=True), 94 | atok.tokens[2]) 95 | self.assertEqual(atok.find_token(atok.tokens[5], token.NEWLINE), atok.tokens[8]) 96 | self.assertFalse(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NEWLINE).type)) 97 | self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL), atok.tokens[9]) 98 | self.assertTrue(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type)) 99 | 100 | def test_unicode_offsets(self): 101 | # ast modules provides utf8 offsets, while tokenize uses unicode offsets. Make sure we 102 | # translate correctly. 103 | source = "foo('фыва',a,b)\n" 104 | atok = asttokens.ASTTokens(source) 105 | self.assertEqual([str(t) for t in atok.tokens], [ 106 | "NAME:'foo'", 107 | "OP:'('", 108 | 'STRING:"%s"' % repr('фыва').lstrip('u'), 109 | "OP:','", 110 | "NAME:'a'", 111 | "OP:','", 112 | "NAME:'b'", 113 | "OP:')'", 114 | "NEWLINE:'\\n'", 115 | "ENDMARKER:''" 116 | ]) 117 | self.assertEqual(atok.tokens[2].startpos, 4) 118 | self.assertEqual(atok.tokens[2].endpos, 10) # Counting characters, not bytes 119 | self.assertEqual(atok.tokens[4].startpos, 11) 120 | self.assertEqual(atok.tokens[4].endpos, 12) 121 | self.assertEqual(atok.tokens[6].startpos, 13) 122 | self.assertEqual(atok.tokens[6].endpos, 14) 123 | 124 | root = ast.parse(source) 125 | 126 | # Verify that ast parser produces offsets as we expect. This is just to inform the 127 | # implementation. 128 | string_node = next(n for n in ast.walk(root) if isinstance(n, ast.Constant)) 129 | self.assertEqual(string_node.lineno, 1) 130 | self.assertEqual(string_node.col_offset, 4) 131 | 132 | a_node = next(n for n in ast.walk(root) if isinstance(n, ast.Name) and n.id == 'a') 133 | self.assertEqual((a_node.lineno, a_node.col_offset), (1, 15)) # Counting bytes, not chars. 134 | 135 | b_node = next(n for n in ast.walk(root) if isinstance(n, ast.Name) and n.id == 'b') 136 | self.assertEqual((b_node.lineno, b_node.col_offset), (1, 17)) 137 | 138 | # Here we verify that we use correct offsets (translating utf8 to unicode offsets) when 139 | # extracting text ranges. 140 | atok.mark_tokens(root) 141 | self.assertEqual(atok.get_text(string_node), "'фыва'") 142 | self.assertEqual(atok.get_text(a_node), "a") 143 | self.assertEqual(atok.get_text(b_node), "b") 144 | 145 | def test_coding_declaration(self): 146 | """ASTTokens should be able to parse a string with a coding declaration.""" 147 | # In Python 2, a unicode string with a coding declaration is a SyntaxError, but we should be 148 | # able to parse a byte string with a coding declaration (as long as its utf-8 compatible). 149 | atok = asttokens.ASTTokens(str("# coding: ascii\n1\n"), parse=True) 150 | self.assertEqual([str(t) for t in atok.tokens], [ 151 | "COMMENT:'# coding: ascii'", 152 | "NL:'\\n'", 153 | "NUMBER:'1'", 154 | "NEWLINE:'\\n'", 155 | "ENDMARKER:''" 156 | ]) 157 | 158 | 159 | def test_filename(): 160 | filename = "myfile.py" 161 | atok = asttokens.ASTTokens("a", parse=True, filename=filename) 162 | assert filename == atok.filename 163 | 164 | 165 | def test_doesnt_have_location(): 166 | atok = asttokens.ASTTokens("a", parse=True) 167 | 168 | # Testing the documentation that says: 169 | # "Returns (0, 0) for nodes (like `Load`) that don't correspond 170 | # to any particular text." 171 | context = atok.tree.body[0].value.ctx 172 | assert isinstance(context, ast.Load) 173 | assert atok.get_text_range(context) == (0, 0) 174 | assert atok.get_text(context) == "" 175 | 176 | # This actually also applies to non-nodes 177 | assert atok.get_text_range(None) == (0, 0) 178 | assert atok.get_text(None) == "" 179 | 180 | 181 | if __name__ == "__main__": 182 | unittest.main() 183 | -------------------------------------------------------------------------------- /tests/tools.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import io 3 | import os 4 | import re 5 | import sys 6 | 7 | import astroid 8 | 9 | from asttokens import util, supports_tokenless, ASTText 10 | from asttokens.util import is_ellipsis, is_expr_stmt 11 | 12 | 13 | def get_fixture_path(*path_parts): 14 | python_dir = 'python%s' % sys.version_info[0] 15 | return os.path.join(os.path.dirname(__file__), "testdata", python_dir, *path_parts) 16 | 17 | def read_fixture(*path_parts): 18 | with io.open(get_fixture_path(*path_parts), "r", newline="\n") as f: 19 | return f.read() 20 | 21 | 22 | def collect_nodes_preorder(root): 23 | """Returns a list of all nodes using pre-order traversal (i.e. parent before children).""" 24 | nodes = [] 25 | def append(node, par_value): # pylint: disable=unused-argument 26 | nodes.append(node) 27 | return (None, None) 28 | util.visit_tree(root, append, None) 29 | return nodes 30 | 31 | def get_node_name(node): 32 | name = node.__class__.__name__ 33 | return 'Constant' if name in ('Num', 'Str', 'NameConstant') else name 34 | 35 | 36 | class MarkChecker(object): 37 | """ 38 | Helper tool to parse and mark an AST tree, with useful methods for verifying it. 39 | """ 40 | def __init__(self, atok): 41 | self.atok = atok 42 | self.all_nodes = collect_nodes_preorder(self.atok.tree) 43 | self.atext = ASTText(atok.text, atok.tree, atok.filename) 44 | 45 | def get_nodes_at(self, line, col): 46 | """Returns all nodes that start with the token at the given position.""" 47 | token = self.atok.get_token(line, col) 48 | return [n for n in self.all_nodes if n.first_token == token] 49 | 50 | def view_node(self, node): 51 | """Returns a representation of a node and its text, such as "Call:foo()". """ 52 | return "%s:%s" % (get_node_name(node), self.atok.get_text(node)) 53 | 54 | def view_nodes_at(self, line, col): 55 | """ 56 | Returns a set of all node representations for nodes that start at the given position. 57 | E.g. {"Call:foo()", "Name:foo"} 58 | """ 59 | return {self.view_node(n) for n in self.get_nodes_at(line, col)} 60 | 61 | def view_node_types_at(self, line, col): 62 | """ 63 | Returns a set of all node types for nodes that start at the given position. 64 | E.g. {"Call", "Name"} 65 | """ 66 | return {n.__class__.__name__ for n in self.get_nodes_at(line, col)} 67 | 68 | def verify_all_nodes(self, test_case): 69 | """ 70 | Generically test atok.get_text() on the ast tree: for each statement and expression in the 71 | tree, we extract the text, parse it, and see if it produces an equivalent tree. Returns the 72 | number of nodes that were tested this way. 73 | """ 74 | test_case.longMessage = True 75 | 76 | if supports_tokenless() and not test_case.is_astroid_test: 77 | num_supported = sum(supports_tokenless(n) for n in self.all_nodes) 78 | num_nodes = len(self.all_nodes) 79 | test_case.assertGreater(num_supported / num_nodes, 0.5, (num_supported, num_nodes)) 80 | 81 | tested_nodes = 0 82 | for node in self.all_nodes: 83 | # slices currently only get the correct tokens/text for ast, not astroid. 84 | if util.is_slice(node) and test_case.is_astroid_test: 85 | continue 86 | 87 | text = self.atok.get_text(node) 88 | self.check_get_text_tokenless(node, test_case, text) 89 | 90 | if not ( 91 | util.is_stmt(node) or 92 | util.is_expr(node) or 93 | util.is_module(node)): 94 | continue 95 | 96 | # await is not allowed outside async functions below 3.7 97 | # parsing again would give a syntax error 98 | if 'await' in text and 'async def' not in text and sys.version_info < (3, 7): 99 | continue 100 | 101 | # `elif:` is really just `else: if:` to the AST, 102 | # so get_text can return text starting with elif when given an If node. 103 | # This is generally harmless and there's probably no good alternative, 104 | # but in isolation it's invalid syntax 105 | text = re.sub(r'^(\s*)elif(\W)', r'\1if\2', text, re.MULTILINE) 106 | 107 | rebuilt_node = test_case.parse_snippet(text, node) 108 | 109 | try: 110 | test_case.assert_nodes_equal(node, rebuilt_node) 111 | except AssertionError: 112 | if test_case.is_astroid_test: 113 | # This can give a more helpful failure message with a diff 114 | test_case.assertEqual( 115 | repr_tree(node), 116 | repr_tree(rebuilt_node), 117 | ) 118 | raise 119 | 120 | tested_nodes += 1 121 | 122 | return tested_nodes 123 | 124 | def check_get_text_tokenless(self, node, test_case, text): 125 | """ 126 | Check that `text` (returned from get_text()) usually returns the same text 127 | whether from `ASTTokens` or `ASTText`. 128 | """ 129 | 130 | if not supports_tokenless(): 131 | return 132 | 133 | text_tokenless = self.atext.get_text(node) 134 | if isinstance(node, ast.alias): 135 | self._check_alias_tokenless(node, test_case, text_tokenless) 136 | elif util.is_module(node): 137 | test_case.assertEqual(text_tokenless, self.atext._text) 138 | elif isinstance(node, astroid.DictUnpack): 139 | # This is a strange node that *seems* to represent just the `**` in `{**foo}` 140 | # (not `**foo` or `foo`), but text_tokenless is `foo` 141 | # while `text` is just the first token of that. 142 | # 'Fixing' either of these or making them match doesn't seem useful. 143 | return 144 | elif isinstance(node, astroid.Decorators): 145 | # Another strange node where it's not worth making the two texts match 146 | return 147 | elif supports_tokenless(node): 148 | has_lineno = getattr(node, 'lineno', None) is not None 149 | test_case.assertEqual(has_lineno, text_tokenless != '') 150 | if has_lineno: 151 | if text != text_tokenless: 152 | if ( 153 | text_tokenless.startswith(text) 154 | and test_case.is_astroid_test 155 | and ( 156 | # astroid positions can include type comments, which we can ignore. 157 | text_tokenless[len(text):].strip().startswith('# type: ') 158 | # astroid+ASTTokens doesn't correctly handle the 3.12+ type variable syntax. 159 | # Since ASTText is preferred for new Python versions, this is not a priority. 160 | or isinstance(node.parent, astroid.TypeVar) 161 | ) 162 | ): 163 | return 164 | 165 | if ( 166 | text == text_tokenless.lstrip() 167 | and isinstance(getattr(node, 'body', None), list) 168 | and len(node.body) == 1 169 | and is_expr_stmt(node.body[0]) 170 | and is_ellipsis(node.body[0].value) 171 | ): 172 | # ASTTokens doesn't include padding for compound statements where the 173 | # body is a single statement starting on the same line where the header ends. 174 | # ASTText does include padding in this case if the header spans multiple lines, 175 | # as does ast.get_source_segment(padded=True). 176 | # This is a minor difference and not worth fixing. 177 | # In practice it arises in test_sys_modules when testing files containing 178 | # function definition stubs like: 179 | # def foo( 180 | # 181 | # ): ... # (actual ellipsis) 182 | return 183 | 184 | test_case.assertEqual(text, text_tokenless) 185 | else: 186 | # _get_text_positions_tokenless can't work with nodes without lineno. 187 | # Double-check that such nodes are unusual. 188 | test_case.assertFalse(util.is_stmt(node) or util.is_expr(node)) 189 | if not test_case.is_astroid_test: 190 | with test_case.assertRaises(SyntaxError, msg=(text, ast.dump(node))): 191 | test_case.parse_snippet(text, node) 192 | 193 | def _check_alias_tokenless(self, node, test_case, text): 194 | if sys.version_info < (3, 10): 195 | # Before 3.10, aliases don't have position information 196 | test_case.assertEqual(text, '') 197 | # For 3.10+, ASTTokens.get_text often returns the wrong value for aliases. 198 | # So to verify ASTText.get_text, we instead check the general form. 199 | elif node.asname: 200 | test_case.assertEqual(text.split(), [node.name, 'as', node.asname]) 201 | else: 202 | test_case.assertEqual(text, node.name) 203 | 204 | 205 | def repr_tree(node): 206 | """ 207 | Returns a canonical string representation of an astroid node 208 | normalised to ignore the context of each node which can change when parsing 209 | substrings of source code. 210 | 211 | E.g. "a" is a Name in expression "a + 1" and is an AssignName in expression "a = 1", 212 | but we don't care about this difference when comparing structure and content. 213 | """ 214 | result = node.repr_tree() 215 | 216 | # astroid represents context in multiple ways 217 | # Convert Store and Del contexts to Load 218 | # Similarly convert Assign/Del Name/Attr to just Name/Attribute (i.e. Load) 219 | result = re.sub(r'(AssignName|DelName)(\(\s*name=)', r'Name\2', result) 220 | result = re.sub(r'(AssignAttr|DelAttr)(\(\s*attrname=)', r'Attribute\2', result) 221 | result = re.sub(r'ctx=', r'ctx=', result) 222 | 223 | # Weird bug in astroid that collapses spaces in docstrings sometimes maybe 224 | result = re.sub(r"' +\\n'", r"'\\n'", result) 225 | 226 | return result 227 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | https://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | https://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Profiled execution. 11 | profile=no 12 | 13 | # Add files or directories to the blacklist. They should be base names, not 14 | # paths. 15 | ignore=CVS 16 | 17 | # Pickle collected data for later comparisons. 18 | persistent=yes 19 | 20 | # List of plugins (as comma separated values of python modules names) to load, 21 | # usually to register additional checkers. 22 | load-plugins= 23 | 24 | # Deprecated. It was used to include message's id in output. Use --msg-template 25 | # instead. 26 | include-ids=no 27 | 28 | # Deprecated. It was used to include symbolic ids of messages in output. Use 29 | # --msg-template instead. 30 | symbols=no 31 | 32 | # Use multiple processes to speed up Pylint. 33 | jobs=1 34 | 35 | # Allow loading of arbitrary C extensions. Extensions are imported into the 36 | # active Python interpreter and may run arbitrary code. 37 | unsafe-load-any-extension=no 38 | 39 | # A comma-separated list of package or module names from where C extensions may 40 | # be loaded. Extensions are loading into the active Python interpreter and may 41 | # run arbitrary code 42 | extension-pkg-whitelist= 43 | 44 | # Allow optimization of some AST trees. This will activate a peephole AST 45 | # optimizer, which will apply various small optimizations. For instance, it can 46 | # be used to obtain the result of joining multiple strings with the addition 47 | # operator. Joining a lot of strings can lead to a maximum recursion error in 48 | # Pylint and this flag can prevent that. It has one side effect, the resulting 49 | # AST will be different than the one from reality. 50 | optimize-ast=no 51 | 52 | 53 | [MESSAGES CONTROL] 54 | 55 | # Only show warnings with the listed confidence levels. Leave empty to show 56 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 57 | confidence= 58 | 59 | # Enable the message, report, category or checker with the given id(s). You can 60 | # either give multiple identifier separated by comma (,) or put this option 61 | # multiple time. See also the "--disable" option for examples. 62 | #enable= 63 | 64 | # Disable the message, report, category or checker with the given id(s). You 65 | # can either give multiple identifiers separated by comma (,) or put this 66 | # option multiple times (only on the command line, not in the configuration 67 | # file where it should appear only once).You can also use "--disable=all" to 68 | # disable everything first and then reenable specific checks. For example, if 69 | # you want to run only the similarities checker, you can use "--disable=all 70 | # --enable=similarities". If you want to run only the classes checker, but have 71 | # no Warning level messages displayed, use"--disable=all --enable=classes 72 | # --disable=W" 73 | #disable=E1608,W1627,E1601,E1603,E1602,E1605,E1604,E1607,E1606,W1621,W1620,W1623,W1622,W1625,W1624,W1609,W1608,W1607,W1606,W1605,W1604,W1603,W1602,W1601,R0914,W1639,W0403,R0913,R0912,W0102,W1640,I0021,C0326,C0103,W1638,C0330,I0020,C0111,W1618,W1619,W1630,W1626,W1637,W1634,W1635,W1610,W1611,W1612,W1613,W1614,W1615,W1616,W1617,W1632,R0902,R0903,W1633,W0704,W0703,W1628,W1629,W0201,W1636,W0212 74 | disable=invalid-name,missing-docstring 75 | 76 | 77 | [REPORTS] 78 | 79 | # Set the output format. Available formats are text, parseable, colorized, msvs 80 | # (visual studio) and html. You can also give a reporter class, eg 81 | # mypackage.mymodule.MyReporterClass. 82 | output-format=text 83 | 84 | # Put messages in a separate file for each module / package specified on the 85 | # command line instead of printing them on stdout. Reports (if any) will be 86 | # written in a file name "pylint_global.[txt|html]". 87 | files-output=no 88 | 89 | # Tells whether to display a full report or only the messages 90 | reports=yes 91 | 92 | # Python expression which should return a note less than 10 (10 is the highest 93 | # note). You have access to the variables errors warning, statement which 94 | # respectively contain the number of errors / warnings messages and the total 95 | # number of statements analyzed. This is used by the global evaluation report 96 | # (RP0004). 97 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 98 | 99 | # Add a comment according to your evaluation note. This is used by the global 100 | # evaluation report (RP0004). 101 | comment=no 102 | 103 | # Template used to display messages. This is a python new-style format string 104 | # used to format the message information. See doc for all details 105 | #msg-template= 106 | 107 | 108 | [BASIC] 109 | 110 | # Required attributes for module, separated by a comma 111 | required-attributes= 112 | 113 | # List of builtins function names that should not be used, separated by a comma 114 | bad-functions= 115 | 116 | # Good variable names which should always be accepted, separated by a comma 117 | good-names=i,j,k,ex,Run,_ 118 | 119 | # Bad variable names which should always be refused, separated by a comma 120 | bad-names=foo,bar,baz,toto,tutu,tata 121 | 122 | # Colon-delimited sets of names that determine each other's naming style when 123 | # the name regexes allow several styles. 124 | name-group= 125 | 126 | # Include a hint for the correct naming format with invalid-name 127 | include-naming-hint=no 128 | 129 | # Regular expression matching correct function names 130 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 131 | 132 | # Naming hint for function names 133 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 134 | 135 | # Regular expression matching correct variable names 136 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 137 | 138 | # Naming hint for variable names 139 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 140 | 141 | # Regular expression matching correct constant names 142 | const-rgx=(([A-Z_][a-zA-Z0-9_]*)|(__.*__))$ 143 | 144 | # Naming hint for constant names 145 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 146 | 147 | # Regular expression matching correct attribute names 148 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 149 | 150 | # Naming hint for attribute names 151 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 152 | 153 | # Regular expression matching correct argument names 154 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 155 | 156 | # Naming hint for argument names 157 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 158 | 159 | # Regular expression matching correct class attribute names 160 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 161 | 162 | # Naming hint for class attribute names 163 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 164 | 165 | # Regular expression matching correct inline iteration names 166 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 167 | 168 | # Naming hint for inline iteration names 169 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 170 | 171 | # Regular expression matching correct class names 172 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 173 | 174 | # Naming hint for class names 175 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 176 | 177 | # Regular expression matching correct module names 178 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 179 | 180 | # Naming hint for module names 181 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 182 | 183 | # Regular expression matching correct method names 184 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 185 | 186 | # Naming hint for method names 187 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 188 | 189 | # Regular expression which should only match function or class names that do 190 | # not require a docstring. 191 | no-docstring-rgx=__.*__ 192 | 193 | # Minimum line length for functions/classes that require docstrings, shorter 194 | # ones are exempt. 195 | docstring-min-length=-1 196 | 197 | 198 | [FORMAT] 199 | 200 | # Maximum number of characters on a single line. 201 | max-line-length=100 202 | 203 | # Regexp for a line that is allowed to be longer than the limit. 204 | ignore-long-lines=^\s*(# )??$ 205 | 206 | # Allow the body of an if to be on the same line as the test if there is no 207 | # else. 208 | single-line-if-stmt=no 209 | 210 | # List of optional constructs for which whitespace checking is disabled 211 | no-space-check=trailing-comma,dict-separator 212 | 213 | # Maximum number of lines in a module 214 | max-module-lines=1000 215 | 216 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 217 | # tab). 218 | indent-string=' ' 219 | 220 | # Number of spaces of indent required inside a hanging or continued line. 221 | indent-after-paren=2 222 | 223 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 224 | expected-line-ending-format= 225 | 226 | 227 | [LOGGING] 228 | 229 | # Logging modules to check that the string format arguments are in logging 230 | # function parameter format 231 | logging-modules=logging 232 | 233 | 234 | [MISCELLANEOUS] 235 | 236 | # List of note tags to take in consideration, separated by a comma. 237 | notes=FIXME,XXX 238 | 239 | 240 | [SIMILARITIES] 241 | 242 | # Minimum lines number of a similarity. 243 | min-similarity-lines=3 244 | 245 | # Ignore comments when computing similarities. 246 | ignore-comments=yes 247 | 248 | # Ignore docstrings when computing similarities. 249 | ignore-docstrings=yes 250 | 251 | # Ignore imports when computing similarities. 252 | ignore-imports=no 253 | 254 | 255 | [SPELLING] 256 | 257 | # Spelling dictionary name. Available dictionaries: none. To make it working 258 | # install python-enchant package. 259 | spelling-dict= 260 | 261 | # List of comma separated words that should not be checked. 262 | spelling-ignore-words= 263 | 264 | # A path to a file that contains private dictionary; one word per line. 265 | spelling-private-dict-file= 266 | 267 | # Tells whether to store unknown words to indicated private dictionary in 268 | # --spelling-private-dict-file option instead of raising a message. 269 | spelling-store-unknown-words=no 270 | 271 | 272 | [TYPECHECK] 273 | 274 | # Tells whether missing members accessed in mixin class should be ignored. A 275 | # mixin class is detected if its name ends with "mixin" (case insensitive). 276 | ignore-mixin-members=no 277 | 278 | # List of module names for which member attributes should not be checked 279 | # (useful for modules/projects where namespaces are manipulated during runtime 280 | # and thus existing member attributes cannot be deduced by static analysis 281 | ignored-modules= 282 | 283 | # List of classes names for which member attributes should not be checked 284 | # (useful for classes with attributes dynamically set). 285 | ignored-classes=SQLObject 286 | 287 | # When zope mode is activated, add a predefined set of Zope acquired attributes 288 | # to generated-members. 289 | zope=no 290 | 291 | # List of members which are set dynamically and missed by pylint inference 292 | # system, and so shouldn't trigger E0201 when accessed. Python regular 293 | # expressions are accepted. 294 | generated-members=REQUEST,acl_users,aq_parent,_fields,_make,simplify 295 | 296 | 297 | [VARIABLES] 298 | 299 | # Tells whether we should check for unused import in __init__ files. 300 | init-import=no 301 | 302 | # A regular expression matching the name of dummy variables (i.e. expectedly 303 | # not used). 304 | dummy-variables-rgx=_$|dummy|_*|ignore 305 | 306 | # List of additional names supposed to be defined in builtins. Remember that 307 | # you should avoid to define new builtins when possible. 308 | additional-builtins= 309 | 310 | # List of strings which can identify a callback function by name. A callback 311 | # name must start or end with one of those strings. 312 | callbacks=cb_,_cb 313 | 314 | 315 | [CLASSES] 316 | 317 | # List of interface methods to ignore, separated by a comma. This is used for 318 | # instance to not check methods defines in Zope's Interface base class. 319 | ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by 320 | 321 | # List of method names used to declare (i.e. assign) instance attributes. 322 | defining-attr-methods=__init__,__new__,setUp 323 | 324 | # List of valid names for the first argument in a class method. 325 | valid-classmethod-first-arg=cls 326 | 327 | # List of valid names for the first argument in a metaclass class method. 328 | valid-metaclass-classmethod-first-arg=mcs 329 | 330 | # List of member names, which should be excluded from the protected access 331 | # warning. 332 | exclude-protected=_asdict,_fields,_replace,_source,_make 333 | 334 | 335 | [DESIGN] 336 | 337 | # Maximum number of arguments for function / method 338 | max-args=5 339 | 340 | # Argument names that match this expression will be ignored. Default to name 341 | # with leading underscore 342 | ignored-argument-names=_.* 343 | 344 | # Maximum number of locals for function / method body 345 | max-locals=15 346 | 347 | # Maximum number of return / yield for function / method body 348 | max-returns=6 349 | 350 | # Maximum number of branch for function / method body 351 | max-branches=12 352 | 353 | # Maximum number of statements in function / method body 354 | max-statements=50 355 | 356 | # Maximum number of parents for a class (see R0901). 357 | max-parents=7 358 | 359 | # Maximum number of attributes for a class (see R0902). 360 | max-attributes=7 361 | 362 | # Minimum number of public methods for a class (see R0903). 363 | min-public-methods=2 364 | 365 | # Maximum number of public methods for a class (see R0904). 366 | max-public-methods=100 367 | 368 | 369 | [IMPORTS] 370 | 371 | # Deprecated modules which should not be used, separated by a comma 372 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 373 | 374 | # Create a graph of every (i.e. internal and external) dependencies in the 375 | # given file (report RP0402 must not be disabled) 376 | import-graph= 377 | 378 | # Create a graph of external dependencies in the given file (report RP0402 must 379 | # not be disabled) 380 | ext-import-graph= 381 | 382 | # Create a graph of internal dependencies in the given file (report RP0402 must 383 | # not be disabled) 384 | int-import-graph= 385 | 386 | 387 | [EXCEPTIONS] 388 | 389 | # Exceptions that will emit a warning when being caught. Defaults to 390 | # "Exception" 391 | overgeneral-exceptions=Exception 392 | -------------------------------------------------------------------------------- /asttokens/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Grist Labs, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ast 16 | import collections 17 | import io 18 | import sys 19 | import token 20 | import tokenize 21 | from abc import ABCMeta 22 | from ast import Module, expr, AST 23 | from functools import lru_cache 24 | from typing import ( 25 | Callable, 26 | Dict, 27 | Iterable, 28 | Iterator, 29 | List, 30 | Optional, 31 | Tuple, 32 | Union, 33 | cast, 34 | Any, 35 | TYPE_CHECKING, 36 | Type, 37 | ) 38 | 39 | if TYPE_CHECKING: # pragma: no cover 40 | from .astroid_compat import NodeNG 41 | 42 | # Type class used to expand out the definition of AST to include fields added by this library 43 | # It's not actually used for anything other than type checking though! 44 | class EnhancedAST(AST): 45 | # Additional attributes set by mark_tokens 46 | first_token = None # type: Token 47 | last_token = None # type: Token 48 | lineno = 0 # type: int 49 | end_lineno = 0 # type : int 50 | end_col_offset = 0 # type : int 51 | 52 | AstNode = Union[EnhancedAST, NodeNG] 53 | 54 | TokenInfo = tokenize.TokenInfo 55 | 56 | 57 | def token_repr(tok_type, string): 58 | # type: (int, Optional[str]) -> str 59 | """Returns a human-friendly representation of a token with the given type and string.""" 60 | # repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency. 61 | return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u')) 62 | 63 | 64 | class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')): 65 | """ 66 | TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize 67 | module, and 3 additional ones useful for this module: 68 | 69 | - [0] .type Token type (see token.py) 70 | - [1] .string Token (a string) 71 | - [2] .start Starting (row, column) indices of the token (a 2-tuple of ints) 72 | - [3] .end Ending (row, column) indices of the token (a 2-tuple of ints) 73 | - [4] .line Original line (string) 74 | - [5] .index Index of the token in the list of tokens that it belongs to. 75 | - [6] .startpos Starting character offset into the input text. 76 | - [7] .endpos Ending character offset into the input text. 77 | """ 78 | def __str__(self): 79 | # type: () -> str 80 | return token_repr(self.type, self.string) 81 | 82 | 83 | def match_token(token, tok_type, tok_str=None): 84 | # type: (Token, int, Optional[str]) -> bool 85 | """Returns true if token is of the given type and, if a string is given, has that string.""" 86 | return token.type == tok_type and (tok_str is None or token.string == tok_str) 87 | 88 | 89 | def expect_token(token, tok_type, tok_str=None): 90 | # type: (Token, int, Optional[str]) -> None 91 | """ 92 | Verifies that the given token is of the expected type. If tok_str is given, the token string 93 | is verified too. If the token doesn't match, raises an informative ValueError. 94 | """ 95 | if not match_token(token, tok_type, tok_str): 96 | raise ValueError("Expected token %s, got %s on line %s col %s" % ( 97 | token_repr(tok_type, tok_str), str(token), 98 | token.start[0], token.start[1] + 1)) 99 | 100 | 101 | def is_non_coding_token(token_type): 102 | # type: (int) -> bool 103 | """ 104 | These are considered non-coding tokens, as they don't affect the syntax tree. 105 | """ 106 | return token_type in (token.NL, token.COMMENT, token.ENCODING) 107 | 108 | 109 | def generate_tokens(text): 110 | # type: (str) -> Iterator[TokenInfo] 111 | """ 112 | Generates standard library tokens for the given code. 113 | """ 114 | # tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for 115 | # Python2. See https://stackoverflow.com/a/4952291/328565. 116 | # FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed 117 | return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline)) 118 | 119 | 120 | def iter_children_func(node): 121 | # type: (AST) -> Callable 122 | """ 123 | Returns a function which yields all direct children of a AST node, 124 | skipping children that are singleton nodes. 125 | The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module. 126 | """ 127 | return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast 128 | 129 | 130 | def iter_children_astroid(node, include_joined_str=False): 131 | # type: (NodeNG, bool) -> Union[Iterator, List] 132 | if not include_joined_str and is_joined_str(node): 133 | return [] 134 | 135 | return node.get_children() 136 | 137 | 138 | SINGLETONS = {c for n, c in ast.__dict__.items() if isinstance(c, type) and 139 | issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))} 140 | 141 | 142 | def iter_children_ast(node, include_joined_str=False): 143 | # type: (AST, bool) -> Iterator[Union[AST, expr]] 144 | if not include_joined_str and is_joined_str(node): 145 | return 146 | 147 | if isinstance(node, ast.Dict): 148 | # override the iteration order: instead of , , 149 | # yield keys and values in source order (key1, value1, key2, value2, ...) 150 | for (key, value) in zip(node.keys, node.values): 151 | if key is not None: 152 | yield key 153 | yield value 154 | return 155 | 156 | for child in ast.iter_child_nodes(node): 157 | # Skip singleton children; they don't reflect particular positions in the code and break the 158 | # assumptions about the tree consisting of distinct nodes. Note that collecting classes 159 | # beforehand and checking them in a set is faster than using isinstance each time. 160 | if child.__class__ not in SINGLETONS: 161 | yield child 162 | 163 | 164 | stmt_class_names = {n for n, c in ast.__dict__.items() 165 | if isinstance(c, type) and issubclass(c, ast.stmt)} 166 | expr_class_names = ({n for n, c in ast.__dict__.items() 167 | if isinstance(c, type) and issubclass(c, ast.expr)} | 168 | {'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'}) 169 | 170 | # These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes 171 | # in the same way, and without even importing astroid. 172 | def is_expr(node): 173 | # type: (AstNode) -> bool 174 | """Returns whether node is an expression node.""" 175 | return node.__class__.__name__ in expr_class_names 176 | 177 | def is_stmt(node): 178 | # type: (AstNode) -> bool 179 | """Returns whether node is a statement node.""" 180 | return node.__class__.__name__ in stmt_class_names 181 | 182 | def is_module(node): 183 | # type: (AstNode) -> bool 184 | """Returns whether node is a module node.""" 185 | return node.__class__.__name__ == 'Module' 186 | 187 | def is_joined_str(node): 188 | # type: (AstNode) -> bool 189 | """Returns whether node is a JoinedStr node, used to represent f-strings.""" 190 | # At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only 191 | # leads to errors. 192 | return node.__class__.__name__ == 'JoinedStr' 193 | 194 | 195 | def is_expr_stmt(node): 196 | # type: (AstNode) -> bool 197 | """Returns whether node is an `Expr` node, which is a statement that is an expression.""" 198 | return node.__class__.__name__ == 'Expr' 199 | 200 | 201 | 202 | CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,) 203 | try: 204 | from astroid.nodes import Const 205 | CONSTANT_CLASSES += (Const,) 206 | except ImportError: # pragma: no cover 207 | # astroid is not available 208 | pass 209 | 210 | def is_constant(node): 211 | # type: (AstNode) -> bool 212 | """Returns whether node is a Constant node.""" 213 | return isinstance(node, CONSTANT_CLASSES) 214 | 215 | 216 | def is_ellipsis(node): 217 | # type: (AstNode) -> bool 218 | """Returns whether node is an Ellipsis node.""" 219 | return is_constant(node) and node.value is Ellipsis # type: ignore 220 | 221 | 222 | def is_starred(node): 223 | # type: (AstNode) -> bool 224 | """Returns whether node is a starred expression node.""" 225 | return node.__class__.__name__ == 'Starred' 226 | 227 | 228 | def is_slice(node): 229 | # type: (AstNode) -> bool 230 | """Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`""" 231 | # Before 3.9, a tuple containing a slice is an ExtSlice, 232 | # but this was removed in https://bugs.python.org/issue34822 233 | return ( 234 | node.__class__.__name__ in ('Slice', 'ExtSlice') 235 | or ( 236 | node.__class__.__name__ == 'Tuple' 237 | and any(map(is_slice, cast(ast.Tuple, node).elts)) 238 | ) 239 | ) 240 | 241 | 242 | def is_empty_astroid_slice(node): 243 | # type: (AstNode) -> bool 244 | return ( 245 | node.__class__.__name__ == "Slice" 246 | and not isinstance(node, ast.AST) 247 | and node.lower is node.upper is node.step is None 248 | ) 249 | 250 | 251 | # Sentinel value used by visit_tree(). 252 | _PREVISIT = object() 253 | 254 | def visit_tree(node, previsit, postvisit): 255 | # type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None 256 | """ 257 | Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion 258 | via the function call stack to avoid hitting 'maximum recursion depth exceeded' error. 259 | 260 | It calls ``previsit()`` and ``postvisit()`` as follows: 261 | 262 | * ``previsit(node, par_value)`` - should return ``(par_value, value)`` 263 | ``par_value`` is as returned from ``previsit()`` of the parent. 264 | 265 | * ``postvisit(node, par_value, value)`` - should return ``value`` 266 | ``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as 267 | returned from ``previsit()`` of this node itself. The return ``value`` is ignored except 268 | the one for the root node, which is returned from the overall ``visit_tree()`` call. 269 | 270 | For the initial node, ``par_value`` is None. ``postvisit`` may be None. 271 | """ 272 | if not postvisit: 273 | postvisit = lambda node, pvalue, value: None 274 | 275 | iter_children = iter_children_func(node) 276 | done = set() 277 | ret = None 278 | stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]] 279 | while stack: 280 | current, par_value, value = stack.pop() 281 | if value is _PREVISIT: 282 | assert current not in done # protect againt infinite loop in case of a bad tree. 283 | done.add(current) 284 | 285 | pvalue, post_value = previsit(current, par_value) 286 | stack.append((current, par_value, post_value)) 287 | 288 | # Insert all children in reverse order (so that first child ends up on top of the stack). 289 | ins = len(stack) 290 | for n in iter_children(current): 291 | stack.insert(ins, (n, pvalue, _PREVISIT)) 292 | else: 293 | ret = postvisit(current, par_value, cast(Optional[Token], value)) 294 | return ret 295 | 296 | 297 | def walk(node, include_joined_str=False): 298 | # type: (AST, bool) -> Iterator[Union[Module, AstNode]] 299 | """ 300 | Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node`` 301 | itself), using depth-first pre-order traversal (yieling parents before their children). 302 | 303 | This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and 304 | ``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``. 305 | 306 | By default, ``JoinedStr`` (f-string) nodes and their contents are skipped 307 | because they previously couldn't be handled. Set ``include_joined_str`` to True to include them. 308 | """ 309 | iter_children = iter_children_func(node) 310 | done = set() 311 | stack = [node] 312 | while stack: 313 | current = stack.pop() 314 | assert current not in done # protect againt infinite loop in case of a bad tree. 315 | done.add(current) 316 | 317 | yield current 318 | 319 | # Insert all children in reverse order (so that first child ends up on top of the stack). 320 | # This is faster than building a list and reversing it. 321 | ins = len(stack) 322 | for c in iter_children(current, include_joined_str): 323 | stack.insert(ins, c) 324 | 325 | 326 | def replace(text, replacements): 327 | # type: (str, List[Tuple[int, int, str]]) -> str 328 | """ 329 | Replaces multiple slices of text with new values. This is a convenience method for making code 330 | modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is 331 | an iterable of ``(start, end, new_text)`` tuples. 332 | 333 | For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces 334 | ``"X is THE test"``. 335 | """ 336 | p = 0 337 | parts = [] 338 | for (start, end, new_text) in sorted(replacements): 339 | parts.append(text[p:start]) 340 | parts.append(new_text) 341 | p = end 342 | parts.append(text[p:]) 343 | return ''.join(parts) 344 | 345 | 346 | class NodeMethods: 347 | """ 348 | Helper to get `visit_{node_type}` methods given a node's class and cache the results. 349 | """ 350 | def __init__(self): 351 | # type: () -> None 352 | self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]] 353 | 354 | def get(self, obj, cls): 355 | # type: (Any, Union[ABCMeta, type]) -> Callable 356 | """ 357 | Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`, 358 | or `obj.visit_default` if the type-specific method is not found. 359 | """ 360 | method = self._cache.get(cls) 361 | if not method: 362 | name = "visit_" + cls.__name__.lower() 363 | method = getattr(obj, name, obj.visit_default) 364 | self._cache[cls] = method 365 | return method 366 | 367 | 368 | def patched_generate_tokens(original_tokens): 369 | # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 370 | """ 371 | Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers. 372 | Workaround for https://github.com/python/cpython/issues/68382. 373 | Should only be used when tokenizing a string that is known to be valid syntax, 374 | because it assumes that error tokens are not actually errors. 375 | Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token. 376 | """ 377 | group = [] # type: List[tokenize.TokenInfo] 378 | for tok in original_tokens: 379 | if ( 380 | tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER) 381 | # Only combine tokens if they have no whitespace in between 382 | and (not group or group[-1].end == tok.start) 383 | ): 384 | group.append(tok) 385 | else: 386 | for combined_token in combine_tokens(group): 387 | yield combined_token 388 | group = [] 389 | yield tok 390 | for combined_token in combine_tokens(group): 391 | yield combined_token 392 | 393 | def combine_tokens(group): 394 | # type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo] 395 | if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1: 396 | return group 397 | return [ 398 | tokenize.TokenInfo( 399 | type=tokenize.NAME, 400 | string="".join(t.string for t in group), 401 | start=group[0].start, 402 | end=group[-1].end, 403 | line=group[0].line, 404 | ) 405 | ] 406 | 407 | 408 | def last_stmt(node): 409 | # type: (AstNode) -> AstNode 410 | """ 411 | If the given AST node contains multiple statements, return the last one. 412 | Otherwise, just return the node. 413 | """ 414 | child_stmts = [ 415 | child for child in iter_children_func(node)(node) 416 | if is_stmt(child) or type(child).__name__ in ( 417 | "excepthandler", 418 | "ExceptHandler", 419 | "match_case", 420 | "MatchCase", 421 | "TryExcept", 422 | "TryFinally", 423 | ) 424 | ] 425 | if child_stmts: 426 | return last_stmt(child_stmts[-1]) 427 | return node 428 | 429 | 430 | 431 | @lru_cache(maxsize=None) 432 | def fstring_positions_work(): 433 | # type: () -> bool 434 | """ 435 | The positions attached to nodes inside f-string FormattedValues have some bugs 436 | that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. 437 | This checks for those bugs more concretely without relying on the Python version. 438 | Specifically this checks: 439 | - Values with a format spec or conversion 440 | - Repeated (i.e. identical-looking) expressions 441 | - f-strings implicitly concatenated over multiple lines. 442 | - Multiline, triple-quoted f-strings. 443 | """ 444 | source = """( 445 | f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 446 | f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 447 | f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" 448 | f''' 449 | {s} {t} 450 | {u} {v} 451 | ''' 452 | )""" 453 | tree = ast.parse(source) 454 | name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] 455 | name_positions = [(node.lineno, node.col_offset) for node in name_nodes] 456 | positions_are_unique = len(set(name_positions)) == len(name_positions) 457 | correct_source_segments = all( 458 | ast.get_source_segment(source, node) == node.id 459 | for node in name_nodes 460 | ) 461 | return positions_are_unique and correct_source_segments 462 | 463 | def annotate_fstring_nodes(tree): 464 | # type: (ast.AST) -> None 465 | """ 466 | Add a special attribute `_broken_positions` to nodes inside f-strings 467 | if the lineno/col_offset cannot be trusted. 468 | """ 469 | if sys.version_info >= (3, 12): 470 | # f-strings were weirdly implemented until https://peps.python.org/pep-0701/ 471 | # In Python 3.12, inner nodes have sensible positions. 472 | return 473 | for joinedstr in walk(tree, include_joined_str=True): 474 | if not isinstance(joinedstr, ast.JoinedStr): 475 | continue 476 | for part in joinedstr.values: 477 | # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. 478 | setattr(part, '_broken_positions', True) # use setattr for mypy 479 | 480 | if isinstance(part, ast.FormattedValue): 481 | if not fstring_positions_work(): 482 | for child in walk(part.value): 483 | setattr(child, '_broken_positions', True) 484 | 485 | if part.format_spec: # this is another JoinedStr 486 | # Again, the standard positions span the full f-string. 487 | setattr(part.format_spec, '_broken_positions', True) 488 | -------------------------------------------------------------------------------- /asttokens/asttokens.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Grist Labs, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import abc 16 | import ast 17 | import bisect 18 | import sys 19 | import token 20 | from ast import Module 21 | from typing import Iterable, Iterator, List, Optional, Tuple, Any, cast, TYPE_CHECKING 22 | 23 | from .line_numbers import LineNumbers 24 | from .util import ( 25 | Token, match_token, is_non_coding_token, patched_generate_tokens, last_stmt, 26 | annotate_fstring_nodes, generate_tokens, is_module, is_stmt 27 | ) 28 | 29 | if TYPE_CHECKING: # pragma: no cover 30 | from .util import AstNode, TokenInfo 31 | 32 | 33 | class ASTTextBase(metaclass=abc.ABCMeta): 34 | def __init__(self, source_text: str, filename: str) -> None: 35 | self._filename = filename 36 | 37 | # Decode source after parsing to let Python 2 handle coding declarations. 38 | # (If the encoding was not utf-8 compatible, then even if it parses correctly, 39 | # we'll fail with a unicode error here.) 40 | source_text = str(source_text) 41 | 42 | self._text = source_text 43 | self._line_numbers = LineNumbers(source_text) 44 | 45 | @abc.abstractmethod 46 | def get_text_positions(self, node, padded): 47 | # type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]] 48 | """ 49 | Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node. 50 | If the positions can't be determined, or the nodes don't correspond to any particular text, 51 | returns ``(1, 0)`` for both. 52 | 53 | ``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``. 54 | This means that if ``padded`` is True, the start position will be adjusted to include 55 | leading whitespace if ``node`` is a multiline statement. 56 | """ 57 | raise NotImplementedError # pragma: no cover 58 | 59 | def get_text_range(self, node, padded=True): 60 | # type: (AstNode, bool) -> Tuple[int, int] 61 | """ 62 | Returns the (startpos, endpos) positions in source text corresponding to the given node. 63 | Returns (0, 0) for nodes (like `Load`) that don't correspond to any particular text. 64 | 65 | See ``get_text_positions()`` for details on the ``padded`` argument. 66 | """ 67 | start, end = self.get_text_positions(node, padded) 68 | return ( 69 | self._line_numbers.line_to_offset(*start), 70 | self._line_numbers.line_to_offset(*end), 71 | ) 72 | 73 | def get_text(self, node, padded=True): 74 | # type: (AstNode, bool) -> str 75 | """ 76 | Returns the text corresponding to the given node. 77 | Returns '' for nodes (like `Load`) that don't correspond to any particular text. 78 | 79 | See ``get_text_positions()`` for details on the ``padded`` argument. 80 | """ 81 | start, end = self.get_text_range(node, padded) 82 | return self._text[start: end] 83 | 84 | 85 | class ASTTokens(ASTTextBase): 86 | """ 87 | ASTTokens maintains the text of Python code in several forms: as a string, as line numbers, and 88 | as tokens, and is used to mark and access token and position information. 89 | 90 | ``source_text`` must be a unicode or UTF8-encoded string. If you pass in UTF8 bytes, remember 91 | that all offsets you'll get are to the unicode text, which is available as the ``.text`` 92 | property. 93 | 94 | If ``parse`` is set, the ``source_text`` will be parsed with ``ast.parse()``, and the resulting 95 | tree marked with token info and made available as the ``.tree`` property. 96 | 97 | If ``tree`` is given, it will be marked and made available as the ``.tree`` property. In 98 | addition to the trees produced by the ``ast`` module, ASTTokens will also mark trees produced 99 | using ``astroid`` library . 100 | 101 | If only ``source_text`` is given, you may use ``.mark_tokens(tree)`` to mark the nodes of an AST 102 | tree created separately. 103 | """ 104 | 105 | def __init__(self, source_text, parse=False, tree=None, filename='', tokens=None): 106 | # type: (Any, bool, Optional[Module], str, Optional[Iterable[TokenInfo]]) -> None 107 | super(ASTTokens, self).__init__(source_text, filename) 108 | 109 | self._tree = ast.parse(source_text, filename) if parse else tree 110 | 111 | # Tokenize the code. 112 | if tokens is None: 113 | tokens = generate_tokens(self._text) 114 | self._tokens = list(self._translate_tokens(tokens)) 115 | 116 | # Extract the start positions of all tokens, so that we can quickly map positions to tokens. 117 | self._token_offsets = [tok.startpos for tok in self._tokens] 118 | 119 | if self._tree: 120 | self.mark_tokens(self._tree) 121 | 122 | def mark_tokens(self, root_node): 123 | # type: (Module) -> None 124 | """ 125 | Given the root of the AST or Astroid tree produced from source_text, visits all nodes marking 126 | them with token and position information by adding ``.first_token`` and 127 | ``.last_token`` attributes. This is done automatically in the constructor when ``parse`` or 128 | ``tree`` arguments are set, but may be used manually with a separate AST or Astroid tree. 129 | """ 130 | # The hard work of this class is done by MarkTokens 131 | from .mark_tokens import MarkTokens # to avoid import loops 132 | MarkTokens(self).visit_tree(root_node) 133 | 134 | def _translate_tokens(self, original_tokens): 135 | # type: (Iterable[TokenInfo]) -> Iterator[Token] 136 | """ 137 | Translates the given standard library tokens into our own representation. 138 | """ 139 | for index, tok in enumerate(patched_generate_tokens(original_tokens)): 140 | tok_type, tok_str, start, end, line = tok 141 | yield Token(tok_type, tok_str, start, end, line, index, 142 | self._line_numbers.line_to_offset(start[0], start[1]), 143 | self._line_numbers.line_to_offset(end[0], end[1])) 144 | 145 | @property 146 | def text(self): 147 | # type: () -> str 148 | """The source code passed into the constructor.""" 149 | return self._text 150 | 151 | @property 152 | def tokens(self): 153 | # type: () -> List[Token] 154 | """The list of tokens corresponding to the source code from the constructor.""" 155 | return self._tokens 156 | 157 | @property 158 | def tree(self): 159 | # type: () -> Optional[Module] 160 | """The root of the AST tree passed into the constructor or parsed from the source code.""" 161 | return self._tree 162 | 163 | @property 164 | def filename(self): 165 | # type: () -> str 166 | """The filename that was parsed""" 167 | return self._filename 168 | 169 | def get_token_from_offset(self, offset): 170 | # type: (int) -> Token 171 | """ 172 | Returns the token containing the given character offset (0-based position in source text), 173 | or the preceeding token if the position is between tokens. 174 | """ 175 | return self._tokens[bisect.bisect(self._token_offsets, offset) - 1] 176 | 177 | def get_token(self, lineno, col_offset): 178 | # type: (int, int) -> Token 179 | """ 180 | Returns the token containing the given (lineno, col_offset) position, or the preceeding token 181 | if the position is between tokens. 182 | """ 183 | # TODO: add test for multibyte unicode. We need to translate offsets from ast module (which 184 | # are in utf8) to offsets into the unicode text. tokenize module seems to use unicode offsets 185 | # but isn't explicit. 186 | return self.get_token_from_offset(self._line_numbers.line_to_offset(lineno, col_offset)) 187 | 188 | def get_token_from_utf8(self, lineno, col_offset): 189 | # type: (int, int) -> Token 190 | """ 191 | Same as get_token(), but interprets col_offset as a UTF8 offset, which is what `ast` uses. 192 | """ 193 | return self.get_token(lineno, self._line_numbers.from_utf8_col(lineno, col_offset)) 194 | 195 | def next_token(self, tok, include_extra=False): 196 | # type: (Token, bool) -> Token 197 | """ 198 | Returns the next token after the given one. If include_extra is True, includes non-coding 199 | tokens from the tokenize module, such as NL and COMMENT. 200 | """ 201 | i = tok.index + 1 202 | if not include_extra: 203 | while is_non_coding_token(self._tokens[i].type): 204 | i += 1 205 | return self._tokens[i] 206 | 207 | def prev_token(self, tok, include_extra=False): 208 | # type: (Token, bool) -> Token 209 | """ 210 | Returns the previous token before the given one. If include_extra is True, includes non-coding 211 | tokens from the tokenize module, such as NL and COMMENT. 212 | """ 213 | i = tok.index - 1 214 | if not include_extra: 215 | while is_non_coding_token(self._tokens[i].type): 216 | i -= 1 217 | return self._tokens[i] 218 | 219 | def find_token(self, start_token, tok_type, tok_str=None, reverse=False): 220 | # type: (Token, int, Optional[str], bool) -> Token 221 | """ 222 | Looks for the first token, starting at start_token, that matches tok_type and, if given, the 223 | token string. Searches backwards if reverse is True. Returns ENDMARKER token if not found (you 224 | can check it with `token.ISEOF(t.type)`). 225 | """ 226 | t = start_token 227 | advance = self.prev_token if reverse else self.next_token 228 | while not match_token(t, tok_type, tok_str) and not token.ISEOF(t.type): 229 | t = advance(t, include_extra=True) 230 | return t 231 | 232 | def token_range(self, 233 | first_token, # type: Token 234 | last_token, # type: Token 235 | include_extra=False, # type: bool 236 | ): 237 | # type: (...) -> Iterator[Token] 238 | """ 239 | Yields all tokens in order from first_token through and including last_token. If 240 | include_extra is True, includes non-coding tokens such as tokenize.NL and .COMMENT. 241 | """ 242 | for i in range(first_token.index, last_token.index + 1): 243 | if include_extra or not is_non_coding_token(self._tokens[i].type): 244 | yield self._tokens[i] 245 | 246 | def get_tokens(self, node, include_extra=False): 247 | # type: (AstNode, bool) -> Iterator[Token] 248 | """ 249 | Yields all tokens making up the given node. If include_extra is True, includes non-coding 250 | tokens such as tokenize.NL and .COMMENT. 251 | """ 252 | return self.token_range(node.first_token, node.last_token, include_extra=include_extra) 253 | 254 | def get_text_positions(self, node, padded): 255 | # type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]] 256 | """ 257 | Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node. 258 | If the positions can't be determined, or the nodes don't correspond to any particular text, 259 | returns ``(1, 0)`` for both. 260 | 261 | ``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``. 262 | This means that if ``padded`` is True, the start position will be adjusted to include 263 | leading whitespace if ``node`` is a multiline statement. 264 | """ 265 | if not hasattr(node, 'first_token'): 266 | return (1, 0), (1, 0) 267 | 268 | start = node.first_token.start 269 | end = node.last_token.end 270 | if padded and any(match_token(t, token.NEWLINE) for t in self.get_tokens(node)): 271 | # Set col_offset to 0 to include leading indentation for multiline statements. 272 | start = (start[0], 0) 273 | 274 | return start, end 275 | 276 | 277 | class ASTText(ASTTextBase): 278 | """ 279 | Supports the same ``get_text*`` methods as ``ASTTokens``, 280 | but uses the AST to determine the text positions instead of tokens. 281 | This is faster than ``ASTTokens`` as it requires less setup work. 282 | 283 | It also (sometimes) supports nodes inside f-strings, which ``ASTTokens`` doesn't. 284 | 285 | Some node types and/or Python versions are not supported. 286 | In these cases the ``get_text*`` methods will fall back to using ``ASTTokens`` 287 | which incurs the usual setup cost the first time. 288 | If you want to avoid this, check ``supports_tokenless(node)`` before calling ``get_text*`` methods. 289 | """ 290 | def __init__(self, source_text, tree=None, filename=''): 291 | # type: (Any, Optional[Module], str) -> None 292 | super(ASTText, self).__init__(source_text, filename) 293 | 294 | self._tree = tree 295 | if self._tree is not None: 296 | annotate_fstring_nodes(self._tree) 297 | 298 | self._asttokens = None # type: Optional[ASTTokens] 299 | 300 | @property 301 | def tree(self): 302 | # type: () -> Module 303 | if self._tree is None: 304 | self._tree = ast.parse(self._text, self._filename) 305 | annotate_fstring_nodes(self._tree) 306 | return self._tree 307 | 308 | @property 309 | def asttokens(self): 310 | # type: () -> ASTTokens 311 | if self._asttokens is None: 312 | self._asttokens = ASTTokens( 313 | self._text, 314 | tree=self.tree, 315 | filename=self._filename, 316 | ) 317 | return self._asttokens 318 | 319 | def _get_text_positions_tokenless(self, node, padded): 320 | # type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]] 321 | """ 322 | Version of ``get_text_positions()`` that doesn't use tokens. 323 | """ 324 | if is_module(node): 325 | # Modules don't have position info, so just return the range of the whole text. 326 | # The token-using method does something different, but its behavior seems weird and inconsistent. 327 | # For example, in a file with only comments, it only returns the first line. 328 | # It's hard to imagine a case when this matters. 329 | return (1, 0), self._line_numbers.offset_to_line(len(self._text)) 330 | 331 | if getattr(node, 'lineno', None) is None: 332 | return (1, 0), (1, 0) 333 | 334 | assert node # tell mypy that node is not None, which we allowed up to here for compatibility 335 | 336 | decorators = getattr(node, 'decorator_list', []) 337 | if not decorators: 338 | # Astroid uses node.decorators.nodes instead of node.decorator_list. 339 | decorators_node = getattr(node, 'decorators', None) 340 | decorators = getattr(decorators_node, 'nodes', []) 341 | if decorators: 342 | # Function/Class definition nodes are marked by AST as starting at def/class, 343 | # not the first decorator. This doesn't match the token-using behavior, 344 | # or inspect.getsource(), and just seems weird. 345 | start_node = decorators[0] 346 | else: 347 | start_node = node 348 | 349 | start_lineno = start_node.lineno 350 | end_node = last_stmt(node) 351 | 352 | # Include leading indentation for multiline statements. 353 | # This doesn't mean simple statements that happen to be on multiple lines, 354 | # but compound statements where inner indentation matters. 355 | # So we don't just compare node.lineno and node.end_lineno, 356 | # we check for a contained statement starting on a different line. 357 | if padded and ( 358 | start_lineno != end_node.lineno 359 | or ( 360 | # Astroid docstrings aren't treated as separate statements. 361 | # So to handle function/class definitions with a docstring but no other body, 362 | # we just check that the node is a statement with a docstring 363 | # and spanning multiple lines in the simple, literal sense. 364 | start_lineno != node.end_lineno 365 | and getattr(node, "doc_node", None) 366 | and is_stmt(node) 367 | ) 368 | ): 369 | start_col_offset = 0 370 | else: 371 | start_col_offset = self._line_numbers.from_utf8_col(start_lineno, start_node.col_offset) 372 | 373 | start = (start_lineno, start_col_offset) 374 | 375 | # To match the token-using behaviour, we exclude trailing semicolons and comments. 376 | # This means that for blocks containing multiple statements, we have to use the last one 377 | # instead of the actual node for end_lineno and end_col_offset. 378 | end_lineno = cast(int, end_node.end_lineno) 379 | end_col_offset = cast(int, end_node.end_col_offset) 380 | end_col_offset = self._line_numbers.from_utf8_col(end_lineno, end_col_offset) 381 | end = (end_lineno, end_col_offset) 382 | 383 | return start, end 384 | 385 | def get_text_positions(self, node, padded): 386 | # type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]] 387 | """ 388 | Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node. 389 | If the positions can't be determined, or the nodes don't correspond to any particular text, 390 | returns ``(1, 0)`` for both. 391 | 392 | ``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``. 393 | This means that if ``padded`` is True, the start position will be adjusted to include 394 | leading whitespace if ``node`` is a multiline statement. 395 | """ 396 | if getattr(node, "_broken_positions", None): 397 | # This node was marked in util.annotate_fstring_nodes as having untrustworthy lineno/col_offset. 398 | return (1, 0), (1, 0) 399 | 400 | if supports_tokenless(node): 401 | return self._get_text_positions_tokenless(node, padded) 402 | 403 | return self.asttokens.get_text_positions(node, padded) 404 | 405 | 406 | # Node types that _get_text_positions_tokenless doesn't support. 407 | # These initial values are missing lineno. 408 | _unsupported_tokenless_types = ("arguments", "Arguments", "withitem") # type: Tuple[str, ...] 409 | if sys.version_info[:2] == (3, 8): 410 | # _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8. 411 | _unsupported_tokenless_types += ("arg", "Starred") 412 | # no lineno in 3.8 413 | _unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword") 414 | 415 | 416 | def supports_tokenless(node=None): 417 | # type: (Any) -> bool 418 | """ 419 | Returns True if the Python version and the node (if given) are supported by 420 | the ``get_text*`` methods of ``ASTText`` without falling back to ``ASTTokens``. 421 | See ``ASTText`` for why this matters. 422 | 423 | The following cases are not supported: 424 | 425 | - PyPy 426 | - ``ast.arguments`` / ``astroid.Arguments`` 427 | - ``ast.withitem`` 428 | - ``astroid.Comprehension`` 429 | - ``astroid.AssignName`` inside ``astroid.Arguments`` or ``astroid.ExceptHandler`` 430 | - The following nodes in Python 3.8 only: 431 | - ``ast.arg`` 432 | - ``ast.Starred`` 433 | - ``ast.Slice`` 434 | - ``ast.ExtSlice`` 435 | - ``ast.Index`` 436 | - ``ast.keyword`` 437 | """ 438 | return ( 439 | type(node).__name__ not in _unsupported_tokenless_types 440 | and not ( 441 | # astroid nodes 442 | not isinstance(node, ast.AST) and node is not None and ( 443 | ( 444 | type(node).__name__ == "AssignName" 445 | and type(node.parent).__name__ in ("Arguments", "ExceptHandler") 446 | ) 447 | ) 448 | ) 449 | and 'pypy' not in sys.version.lower() 450 | ) 451 | -------------------------------------------------------------------------------- /asttokens/mark_tokens.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Grist Labs, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ast 16 | import numbers 17 | import sys 18 | import token 19 | from ast import Module 20 | from typing import Callable, List, Union, cast, Optional, Tuple, TYPE_CHECKING 21 | 22 | from . import util 23 | from .asttokens import ASTTokens 24 | from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer 25 | 26 | if TYPE_CHECKING: 27 | from .util import AstNode 28 | 29 | 30 | # Mapping of matching braces. To find a token here, look up token[:2]. 31 | _matching_pairs_left = { 32 | (token.OP, '('): (token.OP, ')'), 33 | (token.OP, '['): (token.OP, ']'), 34 | (token.OP, '{'): (token.OP, '}'), 35 | } 36 | 37 | _matching_pairs_right = { 38 | (token.OP, ')'): (token.OP, '('), 39 | (token.OP, ']'): (token.OP, '['), 40 | (token.OP, '}'): (token.OP, '{'), 41 | } 42 | 43 | 44 | class MarkTokens: 45 | """ 46 | Helper that visits all nodes in the AST tree and assigns .first_token and .last_token attributes 47 | to each of them. This is the heart of the token-marking logic. 48 | """ 49 | def __init__(self, code): 50 | # type: (ASTTokens) -> None 51 | self._code = code 52 | self._methods = util.NodeMethods() 53 | self._iter_children = None # type: Optional[Callable] 54 | 55 | def visit_tree(self, node): 56 | # type: (Module) -> None 57 | self._iter_children = util.iter_children_func(node) 58 | util.visit_tree(node, self._visit_before_children, self._visit_after_children) 59 | 60 | def _visit_before_children(self, node, parent_token): 61 | # type: (AstNode, Optional[util.Token]) -> Tuple[Optional[util.Token], Optional[util.Token]] 62 | col = getattr(node, 'col_offset', None) 63 | token = self._code.get_token_from_utf8(node.lineno, col) if col is not None else None 64 | 65 | if not token and util.is_module(node): 66 | # We'll assume that a Module node starts at the start of the source code. 67 | token = self._code.get_token(1, 0) 68 | 69 | # Use our own token, or our parent's if we don't have one, to pass to child calls as 70 | # parent_token argument. The second value becomes the token argument of _visit_after_children. 71 | return (token or parent_token, token) 72 | 73 | def _visit_after_children(self, node, parent_token, token): 74 | # type: (AstNode, Optional[util.Token], Optional[util.Token]) -> None 75 | # This processes the node generically first, after all children have been processed. 76 | 77 | # Get the first and last tokens that belong to children. Note how this doesn't assume that we 78 | # iterate through children in order that corresponds to occurrence in source code. This 79 | # assumption can fail (e.g. with return annotations). 80 | first = token 81 | last = None 82 | for child in cast(Callable, self._iter_children)(node): 83 | # astroid slices have especially wrong positions, we don't want them to corrupt their parents. 84 | if util.is_empty_astroid_slice(child): 85 | continue 86 | if not first or child.first_token.index < first.index: 87 | first = child.first_token 88 | if not last or child.last_token.index > last.index: 89 | last = child.last_token 90 | 91 | # If we don't have a first token from _visit_before_children, and there were no children, then 92 | # use the parent's token as the first token. 93 | first = first or parent_token 94 | 95 | # If no children, set last token to the first one. 96 | last = last or first 97 | 98 | # Statements continue to before NEWLINE. This helps cover a few different cases at once. 99 | if util.is_stmt(node): 100 | last = self._find_last_in_stmt(cast(util.Token, last)) 101 | 102 | # Capture any unmatched brackets. 103 | first, last = self._expand_to_matching_pairs(cast(util.Token, first), cast(util.Token, last), node) 104 | 105 | # Give a chance to node-specific methods to adjust. 106 | nfirst, nlast = self._methods.get(self, node.__class__)(node, first, last) 107 | 108 | if (nfirst, nlast) != (first, last): 109 | # If anything changed, expand again to capture any unmatched brackets. 110 | nfirst, nlast = self._expand_to_matching_pairs(nfirst, nlast, node) 111 | 112 | node.first_token = nfirst 113 | node.last_token = nlast 114 | 115 | def _find_last_in_stmt(self, start_token): 116 | # type: (util.Token) -> util.Token 117 | t = start_token 118 | while (not util.match_token(t, token.NEWLINE) and 119 | not util.match_token(t, token.OP, ';') and 120 | not token.ISEOF(t.type)): 121 | t = self._code.next_token(t, include_extra=True) 122 | return self._code.prev_token(t) 123 | 124 | def _expand_to_matching_pairs(self, first_token, last_token, node): 125 | # type: (util.Token, util.Token, AstNode) -> Tuple[util.Token, util.Token] 126 | """ 127 | Scan tokens in [first_token, last_token] range that are between node's children, and for any 128 | unmatched brackets, adjust first/last tokens to include the closing pair. 129 | """ 130 | # We look for opening parens/braces among non-child tokens (i.e. tokens between our actual 131 | # child nodes). If we find any closing ones, we match them to the opens. 132 | to_match_right = [] # type: List[Tuple[int, str]] 133 | to_match_left = [] 134 | for tok in self._code.token_range(first_token, last_token): 135 | tok_info = tok[:2] 136 | if to_match_right and tok_info == to_match_right[-1]: 137 | to_match_right.pop() 138 | elif tok_info in _matching_pairs_left: 139 | to_match_right.append(_matching_pairs_left[tok_info]) 140 | elif tok_info in _matching_pairs_right: 141 | to_match_left.append(_matching_pairs_right[tok_info]) 142 | 143 | # Once done, extend `last_token` to match any unclosed parens/braces. 144 | for match in reversed(to_match_right): 145 | last = self._code.next_token(last_token) 146 | # Allow for trailing commas or colons (allowed in subscripts) before the closing delimiter 147 | while any(util.match_token(last, token.OP, x) for x in (',', ':')): 148 | last = self._code.next_token(last) 149 | # Now check for the actual closing delimiter. 150 | if util.match_token(last, *match): 151 | last_token = last 152 | 153 | # And extend `first_token` to match any unclosed opening parens/braces. 154 | for match in to_match_left: 155 | first = self._code.prev_token(first_token) 156 | if util.match_token(first, *match): 157 | first_token = first 158 | 159 | return (first_token, last_token) 160 | 161 | #---------------------------------------------------------------------- 162 | # Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair 163 | # that will actually be assigned. 164 | 165 | def visit_default(self, node, first_token, last_token): 166 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 167 | # pylint: disable=no-self-use 168 | # By default, we don't need to adjust the token we computed earlier. 169 | return (first_token, last_token) 170 | 171 | def handle_comp(self, open_brace, node, first_token, last_token): 172 | # type: (str, AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 173 | # For list/set/dict comprehensions, we only get the token of the first child, so adjust it to 174 | # include the opening brace (the closing brace will be matched automatically). 175 | before = self._code.prev_token(first_token) 176 | util.expect_token(before, token.OP, open_brace) 177 | return (before, last_token) 178 | 179 | def visit_comprehension(self, 180 | node, # type: AstNode 181 | first_token, # type: util.Token 182 | last_token, # type: util.Token 183 | ): 184 | # type: (...) -> Tuple[util.Token, util.Token] 185 | # The 'comprehension' node starts with 'for' but we only get first child; we search backwards 186 | # to find the 'for' keyword. 187 | first = self._code.find_token(first_token, token.NAME, 'for', reverse=True) 188 | return (first, last_token) 189 | 190 | def visit_if(self, node, first_token, last_token): 191 | # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token] 192 | while first_token.string not in ('if', 'elif'): 193 | first_token = self._code.prev_token(first_token) 194 | return first_token, last_token 195 | 196 | def handle_attr(self, node, first_token, last_token): 197 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 198 | # Attribute node has ".attr" (2 tokens) after the last child. 199 | dot = self._code.find_token(last_token, token.OP, '.') 200 | name = self._code.next_token(dot) 201 | util.expect_token(name, token.NAME) 202 | return (first_token, name) 203 | 204 | visit_attribute = handle_attr 205 | visit_assignattr = handle_attr 206 | visit_delattr = handle_attr 207 | 208 | def handle_def(self, node, first_token, last_token): 209 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 210 | # With astroid, nodes that start with a doc-string can have an empty body, in which case we 211 | # need to adjust the last token to include the doc string. 212 | if not node.body and (getattr(node, 'doc_node', None) or getattr(node, 'doc', None)): # type: ignore[union-attr] 213 | last_token = self._code.find_token(last_token, token.STRING) 214 | 215 | # Include @ from decorator 216 | if first_token.index > 0: 217 | prev = self._code.prev_token(first_token) 218 | if util.match_token(prev, token.OP, '@'): 219 | first_token = prev 220 | return (first_token, last_token) 221 | 222 | visit_classdef = handle_def 223 | visit_functiondef = handle_def 224 | 225 | def handle_following_brackets(self, node, last_token, opening_bracket): 226 | # type: (AstNode, util.Token, str) -> util.Token 227 | # This is for calls and subscripts, which have a pair of brackets 228 | # at the end which may contain no nodes, e.g. foo() or bar[:]. 229 | # We look for the opening bracket and then let the matching pair be found automatically 230 | # Remember that last_token is at the end of all children, 231 | # so we are not worried about encountering a bracket that belongs to a child. 232 | first_child = next(cast(Callable, self._iter_children)(node)) 233 | call_start = self._code.find_token(first_child.last_token, token.OP, opening_bracket) 234 | if call_start.index > last_token.index: 235 | last_token = call_start 236 | return last_token 237 | 238 | def visit_call(self, node, first_token, last_token): 239 | # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token] 240 | last_token = self.handle_following_brackets(node, last_token, '(') 241 | 242 | # Handling a python bug with decorators with empty parens, e.g. 243 | # @deco() 244 | # def ... 245 | if util.match_token(first_token, token.OP, '@'): 246 | first_token = self._code.next_token(first_token) 247 | return (first_token, last_token) 248 | 249 | def visit_matchclass(self, node, first_token, last_token): 250 | # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token] 251 | last_token = self.handle_following_brackets(node, last_token, '(') 252 | return (first_token, last_token) 253 | 254 | def visit_subscript(self, 255 | node, # type: AstNode 256 | first_token, # type: util.Token 257 | last_token, # type: util.Token 258 | ): 259 | # type: (...) -> Tuple[util.Token, util.Token] 260 | last_token = self.handle_following_brackets(node, last_token, '[') 261 | return (first_token, last_token) 262 | 263 | def visit_slice(self, node, first_token, last_token): 264 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 265 | # consume `:` tokens to the left and right. In Python 3.9, Slice nodes are 266 | # given a col_offset, (and end_col_offset), so this will always start inside 267 | # the slice, even if it is the empty slice. However, in 3.8 and below, this 268 | # will only expand to the full slice if the slice contains a node with a 269 | # col_offset. So x[:] will only get the correct tokens in 3.9, but x[1:] and 270 | # x[:1] will even on earlier versions of Python. 271 | while True: 272 | prev = self._code.prev_token(first_token) 273 | if prev.string != ':': 274 | break 275 | first_token = prev 276 | while True: 277 | next_ = self._code.next_token(last_token) 278 | if next_.string != ':': 279 | break 280 | last_token = next_ 281 | return (first_token, last_token) 282 | 283 | def handle_bare_tuple(self, node, first_token, last_token): 284 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 285 | # A bare tuple doesn't include parens; if there is a trailing comma, make it part of the tuple. 286 | maybe_comma = self._code.next_token(last_token) 287 | if util.match_token(maybe_comma, token.OP, ','): 288 | last_token = maybe_comma 289 | return (first_token, last_token) 290 | 291 | # In Python3.8 parsed tuples include parentheses when present. 292 | def handle_tuple_nonempty(self, node, first_token, last_token): 293 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 294 | assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer) 295 | # It's a bare tuple if the first token belongs to the first child. The first child may 296 | # include extraneous parentheses (which don't create new nodes), so account for those too. 297 | child = node.elts[0] 298 | if TYPE_CHECKING: 299 | child = cast(AstNode, child) 300 | child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True) 301 | if first_token == child_first: 302 | return self.handle_bare_tuple(node, first_token, last_token) 303 | return (first_token, last_token) 304 | 305 | def visit_tuple(self, node, first_token, last_token): 306 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 307 | assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer) 308 | if not node.elts: 309 | # An empty tuple is just "()", and we need no further info. 310 | return (first_token, last_token) 311 | return self.handle_tuple_nonempty(node, first_token, last_token) 312 | 313 | def _gobble_parens(self, first_token, last_token, include_all=False): 314 | # type: (util.Token, util.Token, bool) -> Tuple[util.Token, util.Token] 315 | # Expands a range of tokens to include one or all pairs of surrounding parentheses, and 316 | # returns (first, last) tokens that include these parens. 317 | while first_token.index > 0: 318 | prev = self._code.prev_token(first_token) 319 | next = self._code.next_token(last_token) 320 | if util.match_token(prev, token.OP, '(') and util.match_token(next, token.OP, ')'): 321 | first_token, last_token = prev, next 322 | if include_all: 323 | continue 324 | break 325 | return (first_token, last_token) 326 | 327 | def visit_str(self, node, first_token, last_token): 328 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 329 | return self.handle_str(first_token, last_token) 330 | 331 | def visit_joinedstr(self, 332 | node, # type: AstNode 333 | first_token, # type: util.Token 334 | last_token, # type: util.Token 335 | ): 336 | # type: (...) -> Tuple[util.Token, util.Token] 337 | if sys.version_info < (3, 12): 338 | # Older versions don't tokenize the contents of f-strings 339 | return self.handle_str(first_token, last_token) 340 | 341 | last = first_token 342 | while True: 343 | if util.match_token(last, getattr(token, "FSTRING_START")): 344 | # Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`) 345 | # of the f-string. We can't just look for the next FSTRING_END 346 | # because f-strings can be nested, e.g. f"{f'{x}'}", so we need 347 | # to treat this like matching balanced parentheses. 348 | count = 1 349 | while count > 0: 350 | last = self._code.next_token(last) 351 | # mypy complains about token.FSTRING_START and token.FSTRING_END. 352 | if util.match_token(last, getattr(token, "FSTRING_START")): 353 | count += 1 354 | elif util.match_token(last, getattr(token, "FSTRING_END")): 355 | count -= 1 356 | last_token = last 357 | last = self._code.next_token(last_token) 358 | elif util.match_token(last, token.STRING): 359 | # Similar to handle_str, we also need to handle adjacent strings. 360 | last_token = last 361 | last = self._code.next_token(last_token) 362 | else: 363 | break 364 | return (first_token, last_token) 365 | 366 | def visit_bytes(self, node, first_token, last_token): 367 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 368 | return self.handle_str(first_token, last_token) 369 | 370 | def handle_str(self, first_token, last_token): 371 | # type: (util.Token, util.Token) -> Tuple[util.Token, util.Token] 372 | # Multiple adjacent STRING tokens form a single string. 373 | last = self._code.next_token(last_token) 374 | while util.match_token(last, token.STRING): 375 | last_token = last 376 | last = self._code.next_token(last_token) 377 | return (first_token, last_token) 378 | 379 | def handle_num(self, 380 | node, # type: AstNode 381 | value, # type: Union[complex, int, numbers.Number] 382 | first_token, # type: util.Token 383 | last_token, # type: util.Token 384 | ): 385 | # type: (...) -> Tuple[util.Token, util.Token] 386 | # A constant like '-1' gets turned into two tokens; this will skip the '-'. 387 | while util.match_token(last_token, token.OP): 388 | last_token = self._code.next_token(last_token) 389 | 390 | if isinstance(value, complex): 391 | # A complex number like -2j cannot be compared directly to 0 392 | # A complex number like 1-2j is expressed as a binary operation 393 | # so we don't need to worry about it 394 | value = value.imag 395 | 396 | # This makes sure that the - is included 397 | if value < 0 and first_token.type == token.NUMBER: # type: ignore[operator] 398 | first_token = self._code.prev_token(first_token) 399 | return (first_token, last_token) 400 | 401 | def visit_num(self, node, first_token, last_token): 402 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 403 | n = node.n # type: ignore[union-attr] # ast.Num has been removed in python 3.14 404 | assert isinstance(n, (complex, int, numbers.Number)) 405 | return self.handle_num(node, n, first_token, last_token) 406 | 407 | def visit_const(self, node, first_token, last_token): 408 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 409 | assert isinstance(node, ast.Constant) or isinstance(node, nc.Const) 410 | if isinstance(node.value, numbers.Number): 411 | return self.handle_num(node, node.value, first_token, last_token) 412 | elif isinstance(node.value, (str, bytes)): 413 | return self.visit_str(node, first_token, last_token) 414 | return (first_token, last_token) 415 | 416 | visit_constant = visit_const 417 | 418 | def visit_keyword(self, node, first_token, last_token): 419 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 420 | # Until python 3.9 (https://bugs.python.org/issue40141), 421 | # ast.keyword nodes didn't have line info. Astroid has lineno None. 422 | assert isinstance(node, ast.keyword) or isinstance(node, nc.Keyword) 423 | if node.arg is not None and getattr(node, 'lineno', None) is None: 424 | equals = self._code.find_token(first_token, token.OP, '=', reverse=True) 425 | name = self._code.prev_token(equals) 426 | util.expect_token(name, token.NAME, node.arg) 427 | first_token = name 428 | return (first_token, last_token) 429 | 430 | def visit_starred(self, node, first_token, last_token): 431 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 432 | # Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted. 433 | if not util.match_token(first_token, token.OP, '*'): 434 | star = self._code.prev_token(first_token) 435 | if util.match_token(star, token.OP, '*'): 436 | first_token = star 437 | return (first_token, last_token) 438 | 439 | def visit_assignname(self, node, first_token, last_token): 440 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 441 | # Astroid may turn 'except' clause into AssignName, but we need to adjust it. 442 | if util.match_token(first_token, token.NAME, 'except'): 443 | colon = self._code.find_token(last_token, token.OP, ':') 444 | first_token = last_token = self._code.prev_token(colon) 445 | return (first_token, last_token) 446 | 447 | # Async nodes should typically start with the word 'async' 448 | # but Python < 3.7 doesn't put the col_offset there 449 | # AsyncFunctionDef is slightly different because it might have 450 | # decorators before that, which visit_functiondef handles 451 | def handle_async(self, node, first_token, last_token): 452 | # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 453 | if not first_token.string == 'async': 454 | first_token = self._code.prev_token(first_token) 455 | return (first_token, last_token) 456 | 457 | visit_asyncfor = handle_async 458 | visit_asyncwith = handle_async 459 | 460 | def visit_asyncfunctiondef(self, 461 | node, # type: AstNode 462 | first_token, # type: util.Token 463 | last_token, # type: util.Token 464 | ): 465 | # type: (...) -> Tuple[util.Token, util.Token] 466 | if util.match_token(first_token, token.NAME, 'def'): 467 | # Include the 'async' token 468 | first_token = self._code.prev_token(first_token) 469 | return self.visit_functiondef(node, first_token, last_token) 470 | -------------------------------------------------------------------------------- /tests/test_mark_tokens.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import ast 3 | import inspect 4 | import io 5 | import os 6 | import pytest 7 | import re 8 | import sys 9 | import textwrap 10 | import token 11 | from typing import List 12 | import unittest 13 | from time import time 14 | 15 | import astroid 16 | from asttokens import util, ASTTokens 17 | 18 | from . import tools 19 | 20 | try: 21 | from astroid.nodes.utils import Position as AstroidPosition 22 | except Exception: 23 | AstroidPosition = () 24 | 25 | 26 | class TestMarkTokens(unittest.TestCase): 27 | maxDiff = None 28 | 29 | # We use the same test cases to test both nodes produced by the built-in `ast` module, and by 30 | # the `astroid` library. The latter derives TestAstroid class from TestMarkTokens. For checks 31 | # that differ between them, .is_astroid_test allows to distinguish. 32 | is_astroid_test = False 33 | astroid_version = None # type: int | None 34 | module = ast 35 | 36 | def create_mark_checker(self, source, verify=True): 37 | atok = self.create_asttokens(source) 38 | checker = tools.MarkChecker(atok) 39 | 40 | # The last token should always be an ENDMARKER 41 | # None of the nodes should contain that token 42 | assert atok.tokens[-1].type == token.ENDMARKER 43 | if atok.text: # except for empty files 44 | for node in checker.all_nodes: 45 | assert node.last_token.type != token.ENDMARKER 46 | 47 | if verify: 48 | checker.verify_all_nodes(self) 49 | return checker 50 | 51 | @staticmethod 52 | def create_asttokens(source): 53 | return ASTTokens(source, parse=True) 54 | 55 | def print_timing(self): 56 | # Print the timing of mark_tokens(). This doesn't normally run as a unittest, but if you'd like 57 | # to see timings, e.g. while optimizing the implementation, run this to see them: 58 | # 59 | # nosetests -m print_timing -s tests.test_mark_tokens tests.test_astroid 60 | # 61 | # pylint: disable=no-self-use 62 | import timeit 63 | print("mark_tokens", sorted(timeit.repeat( 64 | setup=textwrap.dedent( 65 | ''' 66 | import ast, asttokens 67 | source = "foo(bar(1 + 2), 'hello' + ', ' + 'world')" 68 | atok = asttokens.ASTTokens(source) 69 | tree = ast.parse(source) 70 | '''), 71 | stmt='atok.mark_tokens(tree)', 72 | repeat=3, 73 | number=1000))) 74 | 75 | 76 | def test_mark_tokens_simple(self): 77 | source = tools.read_fixture('astroid', 'module.py') 78 | m = self.create_mark_checker(source) 79 | 80 | # Line 14 is: [indent 4] MY_DICT[key] = val 81 | self.assertEqual(m.view_nodes_at(14, 4), { 82 | "Name:MY_DICT", 83 | "Subscript:MY_DICT[key]", 84 | "Assign:MY_DICT[key] = val" 85 | }) 86 | 87 | # Line 35 is: [indent 12] raise XXXError() 88 | self.assertEqual(m.view_nodes_at(35, 12), {'Raise:raise XXXError()'}) 89 | self.assertEqual(m.view_nodes_at(35, 18), {'Call:XXXError()', 'Name:XXXError'}) 90 | 91 | # Line 53 is: [indent 12] autre = [a for (a, b) in MY_DICT if b] 92 | self.assertEqual(m.view_nodes_at(53, 20), {'ListComp:[a for (a, b) in MY_DICT if b]'}) 93 | self.assertEqual(m.view_nodes_at(53, 21), {'Name:a'}) 94 | if self.is_astroid_test: 95 | self.assertEqual(m.view_nodes_at(53, 23), {'Comprehension:for (a, b) in MY_DICT if b'}) 96 | else: 97 | self.assertEqual(m.view_nodes_at(53, 23), {'comprehension:for (a, b) in MY_DICT if b'}) 98 | 99 | # Line 59 is: [indent 12] global_access(local, val=autre) 100 | self.assertEqual(m.view_node_types_at(59, 12), {'Name', 'Call', 'Expr'}) 101 | self.assertEqual(m.view_nodes_at(59, 26), {'Name:local'}) 102 | if self.is_astroid_test: 103 | self.assertEqual(m.view_nodes_at(59, 33), {'Keyword:val=autre'}) 104 | else: 105 | self.assertEqual(m.view_nodes_at(59, 33), {'keyword:val=autre'}) 106 | self.assertEqual(m.view_nodes_at(59, 37), {'Name:autre'}) 107 | 108 | def test_mark_tokens_multiline(self): 109 | source = ( 110 | """( # line1 111 | a, # line2 112 | b + # line3 113 | c + # line4 114 | d # line5 115 | )""") 116 | m = self.create_mark_checker(source) 117 | 118 | self.assertIn('Name:a', m.view_nodes_at(2, 0)) 119 | self.assertEqual(m.view_nodes_at(3, 0), { 120 | 'Name:b', 121 | 'BinOp:b + # line3\n c', 122 | 'BinOp:b + # line3\n c + # line4\n d', 123 | }) 124 | 125 | all_text = {m.atok.get_text(node) for node in m.all_nodes} 126 | self.assertEqual(all_text, { 127 | source, 128 | 'a', 'b', 'c', 'd', 129 | # All other expressions preserve newlines and comments but are parenthesized. 130 | 'b + # line3\n c', 131 | 'b + # line3\n c + # line4\n d', 132 | }) 133 | self.assertIn('Tuple:' + source, m.view_nodes_at(1, 0)) 134 | 135 | 136 | def verify_fixture_file(self, path): 137 | source = tools.read_fixture(path) 138 | m = self.create_mark_checker(source, verify=False) 139 | tested_nodes = m.verify_all_nodes(self) 140 | 141 | exp_index = 1 + (3 if self.is_astroid_test else 0) 142 | if not self.is_astroid_test: 143 | # For ast on Python 3.9, slices are expressions, we handle them and test them. 144 | if issubclass(ast.Slice, ast.expr): 145 | exp_index += 1 146 | else: 147 | # Astroid v3 and v4 have some changes from v2 148 | if self.astroid_version in (3, 4): 149 | exp_index += 1 150 | exp_tested_nodes = self.expect_tested_nodes[path][exp_index] 151 | self.assertEqual(tested_nodes, exp_tested_nodes) 152 | 153 | 154 | # There is not too much need to verify these counts. The main reason is: if we find that some 155 | # change reduces the count by a lot, it's a red flag that the test is now covering fewer nodes. 156 | expect_tested_nodes = { 157 | # AST | Astroid 158 | # Py2 Py3 Py3+slice | Py2 Py3+v2 Py3+v3 159 | 'astroid/__init__.py': ( 4, 4, 4, 4, 4, 4, ), 160 | 'astroid/absimport.py': ( 4, 3, 3, 4, 3, 3, ), 161 | 'astroid/all.py': ( 21, 23, 23, 21, 23, 23, ), 162 | 'astroid/clientmodule_test.py': ( 75, 67, 67, 69, 69, 69, ), 163 | 'astroid/descriptor_crash.py': ( 30, 28, 28, 30, 30, 30, ), 164 | 'astroid/email.py': ( 3, 3, 3, 1, 1, 1, ), 165 | 'astroid/format.py': ( 64, 61, 61, 62, 62, 62, ), 166 | 'astroid/module.py': ( 185, 174, 174, 171, 171, 173, ), 167 | 'astroid/module2.py': ( 248, 253, 255, 240, 253, 253, ), 168 | 'astroid/noendingnewline.py': ( 57, 59, 59, 57, 63, 63, ), 169 | 'astroid/notall.py': ( 15, 17, 17, 15, 17, 17, ), 170 | 'astroid/recursion.py': ( 6, 6, 6, 4, 4, 4, ), 171 | 'astroid/suppliermodule_test.py': ( 20, 17, 17, 18, 18, 18, ), 172 | } 173 | 174 | # This set of methods runs verifications for the variety of syntax constructs used in the 175 | # fixture test files. 176 | # pylint: disable=multiple-statements 177 | def test_fixture1(self): self.verify_fixture_file('astroid/__init__.py') 178 | def test_fixture2(self): self.verify_fixture_file('astroid/absimport.py') 179 | def test_fixture3(self): self.verify_fixture_file('astroid/all.py') 180 | def test_fixture4(self): self.verify_fixture_file('astroid/clientmodule_test.py') 181 | def test_fixture5(self): self.verify_fixture_file('astroid/descriptor_crash.py') 182 | def test_fixture6(self): self.verify_fixture_file('astroid/email.py') 183 | def test_fixture7(self): self.verify_fixture_file('astroid/format.py') 184 | def test_fixture8(self): self.verify_fixture_file('astroid/module.py') 185 | def test_fixture9(self): self.verify_fixture_file('astroid/module2.py') 186 | def test_fixture10(self): self.verify_fixture_file('astroid/noendingnewline.py') 187 | def test_fixture11(self): self.verify_fixture_file('astroid/notall.py') 188 | def test_fixture12(self): self.verify_fixture_file('astroid/recursion.py') 189 | def test_fixture13(self): self.verify_fixture_file('astroid/suppliermodule_test.py') 190 | 191 | 192 | def test_deep_recursion(self): 193 | # This testcase has 1050 strings joined with '+', which causes naive recursions to fail with 194 | # 'maximum recursion depth exceeded' error. We actually handle it just fine, but we can't use 195 | # to_source() on it because it chokes on recursion depth. So we test individual nodes. 196 | source = tools.read_fixture('astroid/joined_strings.py') 197 | 198 | if self.is_astroid_test: 199 | if getattr(astroid, '__version__', '1') >= '2': 200 | # Astroid 2 no longer supports this; see 201 | # https://github.com/PyCQA/astroid/issues/557#issuecomment-396004274 202 | self.skipTest('astroid-2.0 does not support this') 203 | 204 | # Astroid < 2 does support this with optimize_ast set to True 205 | astroid.MANAGER.optimize_ast = True 206 | try: 207 | m = self.create_mark_checker(source, verify=False) 208 | finally: 209 | astroid.MANAGER.optimize_ast = False 210 | 211 | self.assertEqual(len(m.all_nodes), 4) # This is the result of astroid's optimization 212 | self.assertEqual(m.view_node_types_at(1, 0), {'Module', 'Assign', 'AssignName'}) 213 | const = next(n for n in m.all_nodes if isinstance(n, astroid.nodes.Const)) 214 | # TODO: Astroid's optimization makes it impossible to get the right start-end information 215 | # for the combined node. So this test fails. To avoid it, don't set 'optimize_ast=True'. To 216 | # fix it, astroid would probably need to record the info from the nodes it's combining. Or 217 | # astroid could avoid the need for the optimization by using an explicit stack like we do. 218 | #self.assertEqual(m.atok.get_text_range(const), (5, len(source) - 1)) 219 | else: 220 | m = self.create_mark_checker(source, verify=False) 221 | self.assertEqual(len(m.all_nodes), 2104) 222 | self.assertEqual(m.view_node(m.all_nodes[-1]), 223 | "Constant:'F1akOFFiRIgPHTZksKBAgMCLGTdGNIAAQgKfDAcgZbj0odOnUA8GBAA7'") 224 | self.assertEqual(m.view_node(m.all_nodes[-2]), 225 | "Constant:'Ii0uLDAxLzI0Mh44U0gxMDI5JkM0JjU3NDY6Kjc5Njo7OUE8Ozw+Oz89QTxA'") 226 | self.assertEqual(m.view_node(m.all_nodes[1053]), 227 | "Constant:'R0lGODlhigJnAef/AAABAAEEAAkCAAMGAg0GBAYJBQoMCBMODQ4QDRITEBkS'") 228 | self.assertEqual(m.view_node(m.all_nodes[1052]), 229 | "BinOp:'R0lGODlhigJnAef/AAABAAEEAAkCAAMGAg0GBAYJBQoMCBMODQ4QDRITEBkS'\r\n" + 230 | " +'CxsSEhkWDhYYFQ0aJhkaGBweGyccGh8hHiIkIiMmGTEiHhQoPSYoJSkqKDcp'") 231 | 232 | binop = next(n for n in m.all_nodes if n.__class__.__name__ == 'BinOp') 233 | self.assertTrue(m.atok.get_text(binop).startswith("'R0l")) 234 | self.assertTrue(m.atok.get_text(binop).endswith("AA7'")) 235 | 236 | assign = next(n for n in m.all_nodes if n.__class__.__name__ == 'Assign') 237 | self.assertTrue(m.atok.get_text(assign).startswith("x = (")) 238 | self.assertTrue(m.atok.get_text(assign).endswith(")")) 239 | 240 | def test_slices(self): 241 | # Make sure we don't fail on parsing slices of the form `foo[4:]`. 242 | source = "(foo.Area_Code, str(foo.Phone)[:3], str(foo.Phone)[3:], foo[:], bar[::2, :], bar2[:, ::2], [a[:]][::-1])" 243 | m = self.create_mark_checker(source) 244 | self.assertIn("Tuple:" + source, m.view_nodes_at(1, 0)) 245 | self.assertEqual(m.view_nodes_at(1, 1), 246 | { "Attribute:foo.Area_Code", "Name:foo" }) 247 | self.assertEqual(m.view_nodes_at(1, 16), 248 | { "Subscript:str(foo.Phone)[:3]", "Call:str(foo.Phone)", "Name:str"}) 249 | self.assertEqual(m.view_nodes_at(1, 36), 250 | { "Subscript:str(foo.Phone)[3:]", "Call:str(foo.Phone)", "Name:str"}) 251 | # Slice and ExtSlice nodes are wrong, and in particular placed with parents. They are not very 252 | # important, so we skip them here. 253 | self.assertEqual({n for n in m.view_nodes_at(1, 56) if 'Slice:' not in n}, 254 | { "Subscript:foo[:]", "Name:foo" }) 255 | self.assertEqual({n for n in m.view_nodes_at(1, 64) if 'Slice:' not in n and 'Tuple:' not in n}, 256 | { "Subscript:bar[::2, :]", "Name:bar" }) 257 | 258 | def test_adjacent_strings(self): 259 | source = """ 260 | foo = 'x y z' \\ 261 | '''a b c''' "u v w" 262 | bar = ('x y z' # comment2 263 | 'a b c' # comment3 264 | 'u v w' 265 | ) 266 | """ 267 | m = self.create_mark_checker(source) 268 | node_name = 'Const' if self.is_astroid_test else 'Constant' 269 | self.assertEqual(m.view_nodes_at(2, 6), { 270 | node_name + ":'x y z' \\\n'''a b c''' \"u v w\"" 271 | }) 272 | self.assertEqual(m.view_nodes_at(4, 7), { 273 | node_name + ":'x y z' # comment2\n 'a b c' # comment3\n 'u v w'" 274 | }) 275 | 276 | 277 | def test_print_function(self): 278 | # This testcase imports print as function (using from __future__). Check that we can parse it. 279 | # verify_all_nodes doesn't work on Python 2 because the print() call parsed in isolation 280 | # is viewed as a Print node since it doesn't see the future import 281 | source = tools.read_fixture('astroid/nonregr.py') 282 | m = self.create_mark_checker(source, verify=True) 283 | 284 | # Line 16 is: [indent 8] print(v.get('yo')) 285 | self.assertEqual(m.view_nodes_at(16, 8), 286 | { "Call:print(v.get('yo'))", "Expr:print(v.get('yo'))", "Name:print" }) 287 | self.assertEqual(m.view_nodes_at(16, 14), {"Call:v.get('yo')", "Attribute:v.get", "Name:v"}) 288 | 289 | # To make sure we can handle various hard cases, we include tests for issues reported for a 290 | # similar project here: https://bitbucket.org/plas/thonny 291 | 292 | def test_nonascii(self): 293 | # Test of https://bitbucket.org/plas/thonny/issues/162/weird-range-marker-crash-with-non-ascii 294 | # Only on PY3 because Py2 doesn't support unicode identifiers. 295 | for source in ( 296 | "℘·2=1+a℘·b+a℘·2b", # example from https://github.com/python/cpython/issues/68382 297 | "sünnikuupäev=str((18+int(isikukood[0:1])-1)//2)+isikukood[1:3]", 298 | "sünnikuupaev=str((18+int(isikukood[0:1])-1)//2)+isikukood[1:3]"): 299 | m = self.create_mark_checker(source) 300 | self.assertEqual(m.view_nodes_at(1, 0), { 301 | "Module:%s" % source, 302 | "Assign:%s" % source, 303 | "%s:%s" % ("AssignName" if self.is_astroid_test else "Name", source.split("=")[0]), 304 | }) 305 | 306 | def test_bytes_smoke(self): 307 | const = 'Const' if self.is_astroid_test else ( 308 | 'Constant' 309 | if sys.version_info >= (3, 8) 310 | else 'Bytes' 311 | ) 312 | 313 | for source in ( 314 | 'b"123abd"', 315 | r"b'\x12\x3a\xbc'", 316 | ): 317 | expected = { 318 | "Module:" + source, 319 | const + ":" + source, 320 | "Expr:" + source, 321 | } 322 | 323 | m = self.create_mark_checker(source) 324 | self.assertEqual(m.view_nodes_at(1, 1), expected) 325 | m.verify_all_nodes(self) 326 | 327 | 328 | if sys.version_info[0:2] >= (3, 6): 329 | # f-strings are only supported in Python36. We don't handle them fully, for a couple of 330 | # reasons: parsed AST nodes are not annotated with correct line and col_offset (see 331 | # https://bugs.python.org/issue29051), and there are confusingly two levels of tokenizing. 332 | # Meanwhile, we only parse to the level of JoinedStr, and verify that. 333 | def test_fstrings(self): 334 | for source in ( 335 | '(f"He said his name is {name!r}.",)', 336 | "f'{function(kwarg=24)}'", 337 | '''(f'{f"""{f"{val!r}"}"""}')''', 338 | 'a = f"""result: {value:{width}.{precision}}"""', 339 | """[f"abc {a['x']} def"]""", 340 | "def t():\n return f'{function(kwarg=24)}'"): 341 | self.create_mark_checker(source) 342 | 343 | def test_adjacent_joined_strings(self): 344 | source = """ 345 | foo = f'x y z' \\ 346 | f'''a b c''' f"u v w" 347 | bar = ('x y z' # comment2 348 | 'a b c' # comment3 349 | f'u v w' 350 | ) 351 | """ 352 | m = self.create_mark_checker(source) 353 | self.assertEqual(m.view_nodes_at(2, 6), { 354 | "JoinedStr:f'x y z' \\\nf'''a b c''' f\"u v w\"" 355 | }) 356 | self.assertEqual(m.view_nodes_at(4, 7), { 357 | "JoinedStr:'x y z' # comment2\n 'a b c' # comment3\n f'u v w'" 358 | }) 359 | 360 | if sys.version_info >= (3, 12): 361 | def test_fstrings_3_12_plus(self): 362 | const = 'Const' if self.is_astroid_test else 'Constant' 363 | 364 | m = self.create_mark_checker( 365 | 'x = (f"Wobble {f"{func(kwarg=f"{boo!r}")}"!r}.",)', 366 | ) 367 | 368 | self.assertEqual(m.view_nodes_at(1, 6), { 369 | 'JoinedStr:f"Wobble {f"{func(kwarg=f"{boo!r}")}"!r}."', 370 | }) 371 | 372 | # TODO: Nodes within an f-string currently don't have tokens attached so 373 | # we don't get their text ranges (hence no text after the colon in the 374 | # below assertion). Ideally we should update the mark-tokens logic to 375 | # attach tokens to the nodes within f-strings, at which point this test 376 | # should be updated with the relevant node texts. 377 | node, = m.get_nodes_at(1, 6) 378 | self.assertEqual( 379 | [ 380 | const + ':', 381 | 'FormattedValue:', 382 | const + ':', 383 | ], 384 | [m.view_node(x) for x in node.values], 385 | "Wrong children within JoinedStr", 386 | ) 387 | 388 | m.verify_all_nodes(self) 389 | 390 | def test_splat(self): 391 | # See https://bitbucket.org/plas/thonny/issues/151/debugger-crashes-when-encountering-a-splat 392 | source = textwrap.dedent(""" 393 | arr = [1,2,3,4,5] 394 | def print_all(a, b, c, d, e): 395 | print(a, b, c, d ,e) 396 | print_all(*arr) 397 | """) 398 | m = self.create_mark_checker(source) 399 | self.assertEqual(m.view_nodes_at(5, 0), 400 | { "Expr:print_all(*arr)", "Call:print_all(*arr)", "Name:print_all" }) 401 | self.assertEqual(m.view_nodes_at(5, 10), { "Starred:*arr" }) 402 | self.assertEqual(m.view_nodes_at(5, 11), { "Name:arr" }) 403 | 404 | 405 | def test_paren_attr(self): 406 | # See https://bitbucket.org/plas/thonny/issues/123/attribute-access-on-parenthesized 407 | source = "(x).foo()" 408 | m = self.create_mark_checker(source) 409 | self.assertEqual(m.view_nodes_at(1, 1), {"Name:x"}) 410 | self.assertEqual(m.view_nodes_at(1, 0), 411 | {"Module:(x).foo()", "Expr:(x).foo()", "Call:(x).foo()", "Attribute:(x).foo"}) 412 | 413 | def test_conditional_expr(self): 414 | # See https://bitbucket.org/plas/thonny/issues/108/ast-marker-crashes-with-conditional 415 | source = "a = True if True else False\nprint(a)" 416 | m = self.create_mark_checker(source) 417 | name_a = 'AssignName:a' if self.is_astroid_test else 'Name:a' 418 | const_true = ('Const:True' if self.is_astroid_test else 419 | 'Constant:True') 420 | self.assertEqual(m.view_nodes_at(1, 0), 421 | {name_a, "Assign:a = True if True else False", "Module:" + source}) 422 | self.assertEqual(m.view_nodes_at(1, 4), 423 | {const_true, 'IfExp:True if True else False'}) 424 | self.assertEqual(m.view_nodes_at(2, 0), {"Name:print", "Call:print(a)", "Expr:print(a)"}) 425 | 426 | def test_calling_lambdas(self): 427 | # See https://bitbucket.org/plas/thonny/issues/96/calling-lambdas-crash-the-debugger 428 | source = "y = (lambda x: x + 1)(2)" 429 | m = self.create_mark_checker(source) 430 | self.assertEqual(m.view_nodes_at(1, 4), {'Call:(lambda x: x + 1)(2)'}) 431 | self.assertEqual(m.view_nodes_at(1, 15), {'BinOp:x + 1', 'Name:x'}) 432 | if self.is_astroid_test: 433 | self.assertEqual(m.view_nodes_at(1, 0), {'AssignName:y', 'Assign:'+source, 'Module:'+source}) 434 | else: 435 | self.assertEqual(m.view_nodes_at(1, 0), {'Name:y', 'Assign:' + source, 'Module:' + source}) 436 | 437 | def test_comprehensions(self): 438 | # See https://bitbucket.org/plas/thonny/issues/8/range-marker-doesnt-work-correctly-with 439 | for source in ( 440 | "[(key, val) for key, val in ast.iter_fields(node)]", 441 | "((key, val) for key, val in ast.iter_fields(node))", 442 | "{(key, val) for key, val in ast.iter_fields(node)}", 443 | "{key: val for key, val in ast.iter_fields(node)}", 444 | "[[c for c in key] for key, val in ast.iter_fields(node)]"): 445 | self.create_mark_checker(source) 446 | 447 | def test_trailing_commas(self): 448 | # Make sure we handle trailing commas on comma-separated structures (e.g. tuples, sets, etc.) 449 | for source in ( 450 | "(a,b,)", 451 | "[c,d,]", 452 | "{e,f,}", 453 | "{h:1,i:2,}"): 454 | self.create_mark_checker(source) 455 | 456 | def test_tuples(self): 457 | def get_tuples(code): 458 | m = self.create_mark_checker(code) 459 | return [m.atok.get_text(n) for n in m.all_nodes if n.__class__.__name__ == "Tuple"] 460 | 461 | self.assertEqual(get_tuples("a,"), ["a,"]) 462 | self.assertEqual(get_tuples("(a,)"), ["(a,)"]) 463 | self.assertEqual(get_tuples("(a),"), ["(a),"]) 464 | self.assertEqual(get_tuples("((a),)"), ["((a),)"]) 465 | self.assertEqual(get_tuples("(a,),"), ["(a,),", "(a,)"]) 466 | self.assertEqual(get_tuples("((a,),)"), ["((a,),)", "(a,)"]) 467 | self.assertEqual(get_tuples("()"), ["()"]) 468 | self.assertEqual(get_tuples("(),"), ["(),", "()"]) 469 | self.assertEqual(get_tuples("((),)"), ["((),)", "()"]) 470 | self.assertEqual(get_tuples("((),(a,))"), ["((),(a,))", "()", "(a,)"]) 471 | self.assertEqual(get_tuples("((),(a,),)"), ["((),(a,),)", "()", "(a,)"]) 472 | self.assertEqual(get_tuples("((),(a,),),"), ["((),(a,),),", "((),(a,),)", "()", "(a,)"]) 473 | self.assertEqual(get_tuples('((foo, bar),)'), ['((foo, bar),)', '(foo, bar)']) 474 | self.assertEqual(get_tuples('(foo, bar),'), ['(foo, bar),', '(foo, bar)']) 475 | self.assertEqual(get_tuples('def foo(a=()): ((x, (y,)),) = ((), (a,),),'), [ 476 | '()', '((x, (y,)),)', '(x, (y,))', '(y,)', '((), (a,),),', '((), (a,),)', '()', '(a,)']) 477 | self.assertEqual(get_tuples('def foo(a=()): ((x, (y,)),) = [(), [a,],],'), [ 478 | '()', '((x, (y,)),)', '(x, (y,))', '(y,)', '[(), [a,],],', '()']) 479 | 480 | def test_dict_order(self): 481 | # Make sure we iterate over dict keys/values in source order. 482 | # See https://github.com/gristlabs/asttokens/issues/31 483 | source = 'f({1: (2), 3: 4}, object())' 484 | self.create_mark_checker(source) 485 | 486 | def test_del_dict(self): 487 | # See https://bitbucket.org/plas/thonny/issues/24/try-del-from-dictionary-in-debugging-mode 488 | source = "x = {4:5}\ndel x[4]" 489 | m = self.create_mark_checker(source) 490 | self.assertEqual(m.view_nodes_at(1, 4), {'Dict:{4:5}'}) 491 | if self.is_astroid_test: 492 | self.assertEqual(m.view_nodes_at(1, 5), {'Const:4'}) 493 | else: 494 | self.assertEqual(m.view_nodes_at(1, 5), {'Constant:4'}) 495 | self.assertEqual(m.view_nodes_at(2, 0), {'Delete:del x[4]'}) 496 | self.assertEqual(m.view_nodes_at(2, 4), {'Name:x', 'Subscript:x[4]'}) 497 | 498 | def test_bad_tokenless_types(self): 499 | # Cases where _get_text_positions_tokenless is incorrect in 3.8. 500 | source = textwrap.dedent(""" 501 | def foo(*, name: str): # keyword-only argument with type annotation 502 | pass 503 | 504 | f(*(x)) # ast.Starred with parentheses 505 | """) 506 | self.create_mark_checker(source) 507 | 508 | def test_return_annotation(self): 509 | # See https://bitbucket.org/plas/thonny/issues/9/range-marker-crashes-on-function-return 510 | source = textwrap.dedent(""" 511 | def liida_arvud(x: int, y: int) -> int: 512 | return x + y 513 | """) 514 | m = self.create_mark_checker(source) 515 | self.assertEqual(m.view_nodes_at(2, 0), 516 | {'FunctionDef:def liida_arvud(x: int, y: int) -> int:\n return x + y'}) 517 | if self.is_astroid_test: 518 | self.assertEqual(m.view_nodes_at(2, 16), {'Arguments:x: int, y: int', 'AssignName:x'}) 519 | else: 520 | self.assertEqual(m.view_nodes_at(2, 16), {'arguments:x: int, y: int', 'arg:x: int'}) 521 | self.assertEqual(m.view_nodes_at(2, 19), {'Name:int'}) 522 | self.assertEqual(m.view_nodes_at(2, 35), {'Name:int'}) 523 | self.assertEqual(m.view_nodes_at(3, 2), {'Return:return x + y'}) 524 | 525 | def test_keyword_arg_only(self): 526 | # See https://bitbucket.org/plas/thonny/issues/52/range-marker-fails-with-ridastrip-split 527 | source = "f(x=1)\ng(a=(x),b=[y])" 528 | m = self.create_mark_checker(source) 529 | self.assertEqual(m.view_nodes_at(1, 0), 530 | {'Name:f', 'Call:f(x=1)', 'Expr:f(x=1)', 'Module:' + source}) 531 | self.assertEqual(m.view_nodes_at(2, 0), 532 | {'Name:g', 'Call:g(a=(x),b=[y])', 'Expr:g(a=(x),b=[y])'}) 533 | self.assertEqual(m.view_nodes_at(2, 11), {'Name:y'}) 534 | if self.is_astroid_test: 535 | self.assertEqual(m.view_nodes_at(1, 2), {'Keyword:x=1'}) 536 | self.assertEqual(m.view_nodes_at(1, 4), {'Const:1'}) 537 | self.assertEqual(m.view_nodes_at(2, 2), {'Keyword:a=(x)'}) 538 | self.assertEqual(m.view_nodes_at(2, 8), {'Keyword:b=[y]'}) 539 | else: 540 | self.assertEqual(m.view_nodes_at(1, 2), {'keyword:x=1'}) 541 | self.assertEqual(m.view_nodes_at(1, 4), {'Constant:1'}) 542 | self.assertEqual(m.view_nodes_at(2, 2), {'keyword:a=(x)'}) 543 | self.assertEqual(m.view_nodes_at(2, 8), {'keyword:b=[y]'}) 544 | 545 | def test_decorators(self): 546 | # See https://bitbucket.org/plas/thonny/issues/49/range-marker-fails-with-decorators 547 | source = textwrap.dedent(""" 548 | @deco1 549 | def f(): 550 | pass 551 | @deco2(a=1) 552 | def g(x): 553 | pass 554 | 555 | @deco3() 556 | def g(x): 557 | pass 558 | 559 | @deco4 560 | class C: 561 | pass 562 | """) 563 | m = self.create_mark_checker(source) 564 | # The `arguments` node has bogus positions here (and whenever there are no arguments). We 565 | # don't let that break our test because it's unclear if it matters to anything anyway. 566 | self.assertIn('FunctionDef:@deco1\ndef f():\n pass', m.view_nodes_at(2, 0)) 567 | self.assertEqual(m.view_nodes_at(2, 1), {'Name:deco1'}) 568 | if self.is_astroid_test: 569 | self.assertEqual(m.view_nodes_at(5, 0), { 570 | 'FunctionDef:@deco2(a=1)\ndef g(x):\n pass', 571 | 'Decorators:@deco2(a=1)' 572 | }) 573 | else: 574 | self.assertEqual(m.view_nodes_at(5, 0), {'FunctionDef:@deco2(a=1)\ndef g(x):\n pass'}) 575 | self.assertEqual(m.view_nodes_at(5, 1), {'Name:deco2', 'Call:deco2(a=1)'}) 576 | 577 | self.assertEqual(m.view_nodes_at(9, 1), {'Name:deco3', 'Call:deco3()'}) 578 | 579 | def test_with(self): 580 | source = "with foo: pass" 581 | m = self.create_mark_checker(source) 582 | self.assertEqual(m.view_node_types_at(1, 0), {"Module", "With"}) 583 | self.assertEqual(m.view_nodes_at(1, 0), { 584 | "Module:with foo: pass", 585 | "With:with foo: pass", 586 | }) 587 | 588 | source = textwrap.dedent( 589 | ''' 590 | def f(x): 591 | with A() as a: 592 | log(a) 593 | with B() as b, C() as c: log(b, c) 594 | log(x) 595 | ''') 596 | # verification fails on Python2 which turns `with X, Y` turns into `with X: with Y`. 597 | m = self.create_mark_checker(source, verify=True) 598 | self.assertEqual(m.view_nodes_at(5, 4), { 599 | 'With:with B() as b, C() as c: log(b, c)' 600 | }) 601 | self.assertEqual(m.view_nodes_at(3, 2), { 602 | 'With: with A() as a:\n log(a)\n with B() as b, C() as c: log(b, c)' 603 | }) 604 | with_nodes = [n for n in m.all_nodes if n.__class__.__name__ == 'With'] 605 | self.assertEqual({m.view_node(n) for n in with_nodes}, { 606 | 'With:with B() as b, C() as c: log(b, c)', 607 | 'With: with A() as a:\n log(a)\n with B() as b, C() as c: log(b, c)', 608 | }) 609 | 610 | def test_one_line_if_elif(self): 611 | source = """ 612 | if 1: a 613 | elif 2: b 614 | """ 615 | self.create_mark_checker(source) 616 | 617 | 618 | def test_statements_with_semicolons(self): 619 | source = """ 620 | a; b; c( 621 | 17 622 | ); d # comment1; comment2 623 | if 2: a; b; # comment3 624 | if a: 625 | if b: c; d # comment4 626 | """ 627 | m = self.create_mark_checker(source) 628 | self.assertEqual( 629 | [m.atok.get_text(n) for n in m.all_nodes if util.is_stmt(n)], 630 | ['a', 'b', 'c(\n 17\n)', 'd', 'if 2: a; b', 'a', 'b', 631 | 'if a:\n if b: c; d', 'if b: c; d', 'c', 'd']) 632 | 633 | 634 | def test_complex_numbers(self): 635 | source = """ 636 | 1 637 | -1 638 | j # not a complex number, just a name 639 | 1j 640 | -1j 641 | 1+2j 642 | 3-4j 643 | 1j-1j-1j-1j 644 | """ 645 | self.create_mark_checker(source) 646 | 647 | def test_parens_around_func(self): 648 | source = textwrap.dedent( 649 | ''' 650 | foo() 651 | (foo)() 652 | (lambda: 0)() 653 | (lambda: ())() 654 | (foo)((1)) 655 | (lambda: ())((2)) 656 | x = (obj.attribute.get_callback() or default_callback)() 657 | ''') 658 | m = self.create_mark_checker(source) 659 | self.assertEqual(m.view_nodes_at(2, 0), {"Name:foo", "Expr:foo()", "Call:foo()"}) 660 | self.assertEqual(m.view_nodes_at(3, 1), {"Name:foo"}) 661 | self.assertEqual(m.view_nodes_at(3, 0), {"Expr:(foo)()", "Call:(foo)()"}) 662 | self.assertEqual(m.view_nodes_at(4, 0), {"Expr:(lambda: 0)()", "Call:(lambda: 0)()"}) 663 | self.assertEqual(m.view_nodes_at(5, 0), {"Expr:(lambda: ())()", "Call:(lambda: ())()"}) 664 | self.assertEqual(m.view_nodes_at(6, 0), {"Expr:(foo)((1))", "Call:(foo)((1))"}) 665 | self.assertEqual(m.view_nodes_at(7, 0), {"Expr:(lambda: ())((2))", "Call:(lambda: ())((2))"}) 666 | self.assertEqual(m.view_nodes_at(8, 4), 667 | {"Call:(obj.attribute.get_callback() or default_callback)()"}) 668 | self.assertIn('BoolOp:obj.attribute.get_callback() or default_callback', m.view_nodes_at(8, 5)) 669 | 670 | def test_complex_slice_and_parens(self): 671 | source = 'f((x)[:, 0])' 672 | self.create_mark_checker(source) 673 | 674 | @pytest.mark.slow 675 | def test_sys_modules(self): 676 | """ 677 | Verify all nodes on source files obtained from sys.modules. 678 | 679 | This can take a long time as there are many modules, 680 | so is marked as a slow test and must be explicitly selected 681 | for running. 682 | """ 683 | from .test_astroid import AstroidTreeException 684 | 685 | modules = list(sys.modules.values()) 686 | 687 | start = time() 688 | for module in modules: 689 | # Don't let this test (which runs twice) take longer than 13 minutes 690 | # to avoid the travis build time limit of 30 minutes 691 | if time() - start > 13 * 60: 692 | break 693 | 694 | if 'annotationlib' == module.__name__: 695 | break 696 | 697 | try: 698 | filename = inspect.getsourcefile(module) 699 | except Exception: # some modules raise weird errors 700 | continue 701 | 702 | if not filename: 703 | continue 704 | 705 | filename = os.path.abspath(filename) 706 | print(filename) 707 | try: 708 | with io.open(filename) as f: 709 | source = f.read() 710 | except OSError: 711 | continue 712 | 713 | if self.is_astroid_test and ( 714 | # Astroid fails with a syntax error if a type comment is on its own line 715 | re.search(r'^\s*# type: ', source, re.MULTILINE) 716 | ): 717 | print('Skipping', filename) 718 | continue 719 | 720 | try: 721 | self.create_mark_checker(source) 722 | except AstroidTreeException: 723 | # Astroid sometimes fails with errors like: 724 | # AttributeError: 'TreeRebuilder' object has no attribute 'visit_typealias' 725 | # See https://github.com/gristlabs/asttokens/actions/runs/6015907789/job/16318767911?pr=110 726 | # Should be fixed in the next astroid release: 727 | # https://github.com/pylint-dev/pylint/issues/8782#issuecomment-1669967220 728 | # Note that this exception is raised before asttokens is even involved, 729 | # it's purely an astroid bug that we can safely ignore. 730 | continue 731 | 732 | def test_dict_merge(self): 733 | self.create_mark_checker("{**{}}") 734 | 735 | def test_async_def(self): 736 | self.create_mark_checker(""" 737 | async def foo(): 738 | pass 739 | 740 | @decorator 741 | async def foo(): 742 | pass 743 | """) 744 | 745 | def test_async_for_and_with(self): 746 | # Can't verify all nodes because in < 3.7 747 | # async for/with outside of a function is invalid syntax 748 | m = self.create_mark_checker(""" 749 | async def foo(): 750 | async for x in y: pass 751 | async with x as y: pass 752 | """, verify=False) 753 | assert m.view_nodes_at(3, 2) == {"AsyncFor:async for x in y: pass"} 754 | assert m.view_nodes_at(4, 2) == {"AsyncWith:async with x as y: pass"} 755 | 756 | def test_await(self): 757 | # Can't verify all nodes because in astroid 758 | # await outside of an async function is invalid syntax 759 | m = self.create_mark_checker(""" 760 | async def foo(): 761 | await bar 762 | """, verify=False) 763 | assert m.view_nodes_at(3, 2) == {"Await:await bar", "Expr:await bar"} 764 | 765 | if sys.version_info >= (3, 8): 766 | def test_assignment_expressions(self): 767 | # From https://www.python.org/dev/peps/pep-0572/ 768 | self.create_mark_checker(""" 769 | # Handle a matched regex 770 | if (match := pattern.search(data)) is not None: 771 | # Do something with match 772 | pass 773 | 774 | # A loop that can't be trivially rewritten using 2-arg iter() 775 | while chunk := file.read(8192): 776 | process(chunk) 777 | 778 | # Reuse a value that's expensive to compute 779 | [y := f(x), y**2, y**3] 780 | 781 | # Share a subexpression between a comprehension filter clause and its output 782 | filtered_data = [y for x in data if (y := f(x)) is not None] 783 | 784 | y0 = (y1 := f(x)) # Valid, though discouraged 785 | 786 | foo(x=(y := f(x))) # Valid, though probably confusing 787 | 788 | def foo(answer=(p := 42)): # Valid, though not great style 789 | ... 790 | 791 | def foo(answer: (p := 42) = 5): # Valid, but probably never useful 792 | ... 793 | 794 | lambda: (x := 1) # Valid, but unlikely to be useful 795 | 796 | (x := lambda: 1) # Valid 797 | 798 | lambda line: (m := re.match(pattern, line)) and m.group(1) # Valid 799 | 800 | if any((comment := line).startswith('#') for line in lines): 801 | print("First comment:", comment) 802 | 803 | if all((nonblank := line).strip() == '' for line in lines): 804 | print("All lines are blank") 805 | 806 | partial_sums = [total := total + v for v in values] 807 | """) 808 | 809 | if sys.version_info >= (3, 10): 810 | def test_match_case(self): 811 | m = self.create_mark_checker(""" 812 | if 0: 813 | match x: 814 | case ast.BinOp(): 815 | if z: 816 | pass 817 | case cls(a,b) if y: 818 | pass 819 | case _: 820 | match y: 821 | case 1: 822 | pass 823 | case _: 824 | pass 825 | """) 826 | self.assertEqual(m.view_nodes_at(10, 6), { 827 | 'Match:' 828 | ' match y:\n' 829 | ' case 1:\n' 830 | ' pass\n' 831 | ' case _:\n' 832 | ' pass', 833 | }) 834 | 835 | def parse_snippet(self, text, node): 836 | """ 837 | Returns the parsed AST tree for the given text, handling issues with indentation and newlines 838 | when text is really an extracted part of larger code. 839 | """ 840 | # If text is indented, it's a statement, and we need to put in a scope for indents to be valid 841 | # (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or 842 | # multiline strings). If text is an expression but has newlines, we parenthesize it to make it 843 | # parsable. 844 | # For expressions and statements, we add a dummy statement '_' before it because if it's just a 845 | # string contained in an astroid.Const or astroid.Expr it will end up in the doc attribute and be 846 | # a pain to extract for comparison 847 | # For starred expressions, e.g. `*args`, we wrap it in a function call to make it parsable. 848 | # For slices, e.g. `x:`, we wrap it in an indexing expression to make it parsable. 849 | indented = re.match(r'^[ \t]+\S', text) 850 | if indented: 851 | return self.module.parse('def dummy():\n' + text).body[0].body[0] 852 | if util.is_starred(node): 853 | return self.module.parse('f(' + text + ')').body[0].value.args[0] 854 | if util.is_slice(node): 855 | return self.module.parse('a[' + text + ']').body[0].value.slice 856 | if util.is_expr(node): 857 | return self.module.parse('_\n(' + text + ')').body[1].value 858 | if util.is_module(node): 859 | return self.module.parse(text) 860 | return self.module.parse('_\n' + text).body[1] 861 | 862 | def test_assert_nodes_equal(self): 863 | """ 864 | Checks that assert_nodes_equal actually fails when given different nodes 865 | """ 866 | 867 | def check(s1, s2): 868 | n1 = self.module.parse(s1) 869 | n2 = self.module.parse(s2) 870 | with self.assertRaises(AssertionError): 871 | self.assert_nodes_equal(n1, n2) 872 | 873 | check('a', 'b') 874 | check('a*b', 'a+b') 875 | check('a*b', 'b*a') 876 | check('(a and b) or c', 'a and (b or c)') 877 | check('a = 1', 'a = 2') 878 | check('a = 1', 'a += 1') 879 | check('a *= 1', 'a += 1') 880 | check('[a for a in []]', '[a for a in ()]') 881 | check("for x in y: pass", "for x in y: fail") 882 | check("1", "1.0") 883 | check("foo(a, b, *d, c=2, **e)", 884 | "foo(a, b, *d, c=2.0, **e)") 885 | check("foo(a, b, *d, c=2, **e)", 886 | "foo(a, b, *d, c=2)") 887 | check('def foo():\n """xxx"""\n None', 888 | 'def foo():\n """xx"""\n None') 889 | 890 | nodes_classes = ast.AST 891 | context_classes = [ast.expr_context] # type: List[util.AstNode] 892 | iter_fields = staticmethod(ast.iter_fields) 893 | 894 | def assert_nodes_equal(self, t1, t2): 895 | # Ignore the context of each node which can change when parsing 896 | # substrings of source code. We just want equal structure and contents. 897 | for context_classes_group in self.context_classes: 898 | if isinstance(t1, context_classes_group): 899 | self.assertIsInstance(t2, context_classes_group) 900 | break 901 | else: 902 | self.assertEqual(type(t1), type(t2)) 903 | 904 | if isinstance(t1, AstroidPosition): 905 | # Ignore the lineno/col_offset etc. from astroid 906 | return 907 | 908 | if isinstance(t1, (list, tuple)): 909 | self.assertEqual(len(t1), len(t2)) 910 | for vc1, vc2 in zip(t1, t2): 911 | self.assert_nodes_equal(vc1, vc2) 912 | elif isinstance(t1, self.nodes_classes): 913 | self.assert_nodes_equal( 914 | list(self.iter_fields(t1)), 915 | list(self.iter_fields(t2)), 916 | ) 917 | else: 918 | # Weird bug in astroid that collapses spaces in docstrings sometimes maybe 919 | if self.is_astroid_test and isinstance(t1, str): 920 | t1 = re.sub(r'^ +$', '', t1, flags=re.MULTILINE) 921 | t2 = re.sub(r'^ +$', '', t2, flags=re.MULTILINE) 922 | 923 | self.assertEqual(t1, t2) 924 | 925 | def test_list_comprehension(self): 926 | source = '[x for x in range(2)]' 927 | m = self.create_mark_checker(source) 928 | 929 | def test_dict_comprehension(self): 930 | source = '{x: 2 * x for x in range(2)}' 931 | m = self.create_mark_checker(source) 932 | assert 'DictComp' in str(m.view_nodes_at(0, 0)) 933 | 934 | --------------------------------------------------------------------------------