├── tests ├── __init__.py ├── conftest.py ├── test_regression.py ├── test_loop.sh ├── test_utils.py ├── test_baron.py ├── test_future.py ├── test_inner_formatting_grouper.py ├── test_grouper.py ├── test_spliter.py └── test_render.py ├── requirements.txt ├── baron ├── setup.cfg ├── __init__.py ├── dumper.py ├── helpers.py ├── future.py ├── token.py ├── spliter.py ├── baron.py ├── formatting_grouper.py ├── indentation_marker.py ├── grouper.py ├── tokenizer.py ├── utils.py ├── inner_formatting_grouper.py ├── grammator_imports.py └── parser.py ├── requirements-docs.txt ├── .env ├── setup.cfg ├── .gitignore ├── docs ├── grammar_diff │ ├── more_atoms.png │ ├── raise_from.png │ ├── star_expr.png │ ├── typed_args.png │ ├── yield_from.png │ ├── exec_function.png │ ├── print_function.png │ ├── new_lambda_grammar.png │ ├── nonlocal_statement.png │ ├── function_return_type.png │ ├── no_more_list_for_rule.png │ ├── ellipsis_in_from_import.png │ ├── new_grammar_for_if_cond.png │ ├── star_expr_in_expr_list.png │ ├── no_more_backquote_syntax.png │ ├── star_expr_in_testlist_comp.png │ ├── testlist_start_expressiong.png │ ├── class_inherit_is_arglist_now.png │ ├── no_more_commat_in_execption_close.png │ ├── remove_old_list_comprehension_syntax.png │ └── ellipsis_is_first_class_now_not_needed_anymore.png ├── loop.sh ├── grammar-python-2.7-3.6-diff-1.png ├── grammar-python-2.7-3.6-diff-2.png ├── grammar-python-2.7-3.6-diff-3.png ├── basics.rst ├── index.rst ├── advanced.rst ├── make.bat ├── Makefile ├── technical.rst └── conf.py ├── MANIFEST.in ├── .travis.yml ├── tox.ini ├── setup.py ├── add_new_grammar.md ├── grammar ├── reference_grammar_python2 ├── reference_grammar_python3.3 ├── reference_grammar_python3.4 ├── reference_grammar_python3.7 ├── reference_grammar_python3.6 ├── baron_grammar └── reference_grammar_python3.5 ├── README.md ├── LICENSE └── CHANGELOG /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rply 2 | -------------------------------------------------------------------------------- /baron/setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | -------------------------------------------------------------------------------- /requirements-docs.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | matplotlib 3 | sphinx 4 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | source $(/bin/readlink -f ${0%/*})/ve/bin/activate 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_file = LICENSE 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.swo 4 | __pycache__ 5 | .coverage 6 | .pytest_cache/ 7 | baron.egg-info/ 8 | -------------------------------------------------------------------------------- /docs/grammar_diff/more_atoms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/more_atoms.png -------------------------------------------------------------------------------- /docs/grammar_diff/raise_from.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/raise_from.png -------------------------------------------------------------------------------- /docs/grammar_diff/star_expr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/star_expr.png -------------------------------------------------------------------------------- /docs/grammar_diff/typed_args.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/typed_args.png -------------------------------------------------------------------------------- /docs/grammar_diff/yield_from.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/yield_from.png -------------------------------------------------------------------------------- /docs/grammar_diff/exec_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/exec_function.png -------------------------------------------------------------------------------- /docs/loop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while true; do clear; make html; sleep 0.1; inotifywait -e modify *.rst; done 4 | -------------------------------------------------------------------------------- /docs/grammar-python-2.7-3.6-diff-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar-python-2.7-3.6-diff-1.png -------------------------------------------------------------------------------- /docs/grammar-python-2.7-3.6-diff-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar-python-2.7-3.6-diff-2.png -------------------------------------------------------------------------------- /docs/grammar-python-2.7-3.6-diff-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar-python-2.7-3.6-diff-3.png -------------------------------------------------------------------------------- /docs/grammar_diff/print_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/print_function.png -------------------------------------------------------------------------------- /docs/grammar_diff/new_lambda_grammar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/new_lambda_grammar.png -------------------------------------------------------------------------------- /docs/grammar_diff/nonlocal_statement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/nonlocal_statement.png -------------------------------------------------------------------------------- /docs/grammar_diff/function_return_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/function_return_type.png -------------------------------------------------------------------------------- /docs/grammar_diff/no_more_list_for_rule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/no_more_list_for_rule.png -------------------------------------------------------------------------------- /docs/grammar_diff/ellipsis_in_from_import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/ellipsis_in_from_import.png -------------------------------------------------------------------------------- /docs/grammar_diff/new_grammar_for_if_cond.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/new_grammar_for_if_cond.png -------------------------------------------------------------------------------- /docs/grammar_diff/star_expr_in_expr_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/star_expr_in_expr_list.png -------------------------------------------------------------------------------- /docs/grammar_diff/no_more_backquote_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/no_more_backquote_syntax.png -------------------------------------------------------------------------------- /docs/grammar_diff/star_expr_in_testlist_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/star_expr_in_testlist_comp.png -------------------------------------------------------------------------------- /docs/grammar_diff/testlist_start_expressiong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/testlist_start_expressiong.png -------------------------------------------------------------------------------- /docs/grammar_diff/class_inherit_is_arglist_now.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/class_inherit_is_arglist_now.png -------------------------------------------------------------------------------- /docs/grammar_diff/no_more_commat_in_execption_close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/no_more_commat_in_execption_close.png -------------------------------------------------------------------------------- /docs/grammar_diff/remove_old_list_comprehension_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/remove_old_list_comprehension_syntax.png -------------------------------------------------------------------------------- /docs/grammar_diff/ellipsis_is_first_class_now_not_needed_anymore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyCQA/baron/HEAD/docs/grammar_diff/ellipsis_is_first_class_now_not_needed_anymore.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md CHANGELOG LICENSE 2 | include tox.ini 3 | 4 | exclude *.txt 5 | 6 | recursive-include docs * 7 | 8 | graft tests 9 | 10 | prune docs/_build 11 | prune grammar 12 | 13 | reverse-exclude */__pycache__/* 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | install: "pip install -r requirements.txt" 3 | python: 4 | - "2.7" 5 | - "3.4" 6 | - "3.5" 7 | - "3.6" 8 | - "3.7" 9 | script: "py.test tests" 10 | notifications: 11 | irc: "chat.freenode.net#baron" 12 | -------------------------------------------------------------------------------- /baron/__init__.py: -------------------------------------------------------------------------------- 1 | from . import grouper # noqa 2 | from . import spliter # noqa 3 | from .baron import parse, tokenize # noqa 4 | from .dumper import dumps # noqa 5 | from .inner_formatting_grouper import GroupingError, UnExpectedFormattingToken # noqa 6 | from .parser import ParsingError # noqa 7 | from .render import nodes_rendering_order # noqa 8 | from .spliter import UntreatedError # noqa 9 | from .utils import BaronError # noqa 10 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | 5 | def pytest_assertrepr_compare(config, op, left, right): 6 | if isinstance(left, list) and isinstance(right, list) and op == "==": 7 | with open("/tmp/a", "w") as a: 8 | a.write(json.dumps(left, indent=4, sort_keys=True)) 9 | with open("/tmp/b", "w") as b: 10 | b.write(json.dumps(right, indent=4, sort_keys=True)) 11 | 12 | os.system("diff -y /tmp/a /tmp/b") 13 | -------------------------------------------------------------------------------- /baron/dumper.py: -------------------------------------------------------------------------------- 1 | from .render import RenderWalker 2 | 3 | 4 | def dumps(tree, strict=False): 5 | return Dumper(strict=strict).dump(tree) 6 | 7 | 8 | class Dumper(RenderWalker): 9 | def before_string(self, string, key): 10 | self.dump += string 11 | 12 | def before_constant(self, constant, key): 13 | self.dump += constant 14 | 15 | def dump(self, tree): 16 | self.dump = '' 17 | self.walk(tree) 18 | return self.dump 19 | -------------------------------------------------------------------------------- /baron/helpers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from os import linesep 4 | from . import parse 5 | 6 | 7 | def show(source_code): 8 | sys.stdout.write(json.dumps(parse(source_code), indent=4) + linesep) 9 | 10 | 11 | def show_file(target_file): 12 | with open(target_file, "r") as source_code: 13 | sys.stdout.write(json.dumps(parse(source_code.read()), indent=4) + linesep) 14 | 15 | 16 | def show_node(node): 17 | sys.stdout.write(json.dumps(node, indent=4) + linesep) 18 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py2,py3,flake8,check-manifest 3 | 4 | [testenv] 5 | commands = 6 | pytest {posargs:{toxinidir}/tests} 7 | deps = 8 | pytest 9 | -r requirements.txt 10 | 11 | [testenv:flake8] 12 | skip_install = true 13 | deps = 14 | flake8 15 | commands = flake8 16 | 17 | [flake8] 18 | format = pylint 19 | ignore = E501,E218,W503 20 | exclude = docs/*,.tox/*,ve/*,ve3/* 21 | 22 | [testenv:check-manifest] 23 | skip_install = true 24 | deps = 25 | check-manifest 26 | commands = 27 | {envpython} -m check_manifest {toxinidir} 28 | -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- 1 | from baron import parse, dumps, tokenize 2 | 3 | 4 | def test_regression_trailing_comment_after_colon(): 5 | assert parse("def a(): # pouf\n pass") 6 | 7 | 8 | def test_regression_trailing_comment_after_colon_no_space(): 9 | assert parse("def a():# pouf\n pass") 10 | 11 | 12 | def test_regression_trailing_comment_after_colon_dump(): 13 | code = "def a(): # pouf\n pass\n" 14 | assert dumps(parse(code)) == code 15 | 16 | 17 | def test_regression_trailing_comment_after_colon_no_space_dump(): 18 | code = "def a():# pouf\n pass\n" 19 | assert dumps(parse(code)) == code 20 | 21 | 22 | def test_comment_in_middle_of_ifelseblock(): 23 | code = 'if a:\n pass\n# comment\nelse:\n pass\n' 24 | assert dumps(parse(code)) == code 25 | 26 | 27 | def test_new_float_notation(): 28 | code = 'a._' 29 | assert tokenize(code)[:-1] == [('NAME', 'a'), ('DOT', '.'), ('NAME', '_')] 30 | -------------------------------------------------------------------------------- /tests/test_loop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while true 4 | do 5 | clear 6 | if [ "$1" == "grammator" ] 7 | then 8 | py.test test_grammator* -x | tee /tmp/.baron_test_ouput 9 | else 10 | py.test -x | tee /tmp/.baron_test_ouput 11 | fi 12 | if ([ ! "$1" ] || [ "$1" == "grammator" ]) && [ "$(grep ': AssertionError' /tmp/.baron_test_ouput)" ] 13 | then 14 | grep " def" /tmp/.baron_test_ouput | head -n 1 | sed 's/^ *//' 15 | sed 's/, *$//' /tmp/a > /tmp/aa 16 | sed 's/, *$//' /tmp/b > /tmp/bb 17 | colordiff -W $(stty size | cut -d " " -f 2) -y /tmp/aa /tmp/bb 18 | echo 19 | grep ":[0-9]\+:" /tmp/.baron_test_ouput | sed -n '$d;1p' 20 | elif [ "$1" == "dump" ] && [ "$(grep ': AssertionError' /tmp/.baron_test_ouput)" ] && [ ! "$(grep 'Warning: couldn.t write dumps output to debug file' /tmp/.baron_test_ouput)" ] 21 | then 22 | grep " def" /tmp/.baron_test_ouput | head -n 1 | sed 's/^ *//' 23 | colordiff -W $(stty size | cut -d " " -f 2) -y /tmp/c /tmp/d 24 | echo 25 | fi 26 | inotifywait -e modify *.py ../baron/*.py 27 | done 28 | -------------------------------------------------------------------------------- /baron/future.py: -------------------------------------------------------------------------------- 1 | def has_print_function(tokens): 2 | p = 0 3 | while p < len(tokens): 4 | if tokens[p][0] != 'FROM': 5 | p += 1 6 | continue 7 | if tokens[p + 1][0:2] != ('NAME', '__future__'): 8 | p += 1 9 | continue 10 | if tokens[p + 2][0] != 'IMPORT': 11 | p += 1 12 | continue 13 | 14 | current = p + 3 15 | # ignore LEFT_PARENTHESIS token 16 | if tokens[current][0] == 'LEFT_PARENTHESIS': 17 | current += 1 18 | 19 | while (current < len(tokens) and tokens[current][0] == 'NAME'): 20 | if tokens[current][1] == 'print_function': 21 | return True 22 | 23 | # ignore AS and NAME tokens if present 24 | # anyway, ignore COMMA token 25 | if current + 1 < len(tokens) and tokens[current + 1][0] == 'AS': 26 | current += 4 27 | else: 28 | current += 2 29 | p += 1 30 | 31 | return False 32 | 33 | 34 | def replace_print_by_name(tokens): 35 | def is_print(token): 36 | return token[0] == 'PRINT' 37 | 38 | return [('NAME', 'print') if is_print(x) else x for x in tokens] 39 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:Utf-8 -*- 3 | 4 | from setuptools import setup 5 | 6 | 7 | setup(name='baron', 8 | version='0.10.1', 9 | description='Full Syntax Tree for python to make writing refactoring code a realist task', 10 | author='Laurent Peuch', 11 | long_description=open("README.md").read() + "\n\n" + open("CHANGELOG", "r").read(), 12 | long_description_content_type="text/markdown", 13 | author_email='cortex@worlddomination.be', 14 | url='https://github.com/PyCQA/baron', 15 | install_requires=['rply'], 16 | packages=['baron'], 17 | license='lgplv3+', 18 | scripts=[], 19 | keywords='ast fst refactoring syntax tree', 20 | classifiers=[ 21 | 'Development Status :: 3 - Alpha', 22 | 'Intended Audience :: Developers', 23 | 'Programming Language :: Python :: 2', 24 | 'Programming Language :: Python :: 2.7', 25 | 'Programming Language :: Python :: 3', 26 | 'Programming Language :: Python :: 3.2', 27 | 'Programming Language :: Python :: 3.3', 28 | 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)', 29 | 'Topic :: Software Development', 30 | 'Topic :: Software Development :: Code Generators', 31 | 'Topic :: Software Development :: Libraries', 32 | ], 33 | ) 34 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from baron.grammator import generate_parse 4 | from baron.dumper import dumps 5 | from baron.baron import parse as baron_parse 6 | from baron.utils import python_version 7 | 8 | if python_version == 3: 9 | from itertools import zip_longest 10 | else: 11 | from itertools import izip_longest 12 | zip_longest = izip_longest 13 | 14 | parse = generate_parse(False) 15 | 16 | 17 | def parse_simple(tokens, result): 18 | if not tokens or tokens[-1][0] != "ENDL": 19 | tokens += [('ENDL', '\n')] 20 | assert parse(tokens + [('ENDMARKER', ''), None]) == (result + [{"type": "endl", "value": "\n", "formatting": [], "indent": ""}]) 21 | 22 | 23 | def parse_multi(tokens, result): 24 | assert parse(tokens + [('ENDMARKER', ''), None]) == result 25 | 26 | 27 | def check_dumps(source_code): 28 | try: 29 | open("/tmp/c", "w").write(source_code) 30 | open("/tmp/d", "w").write(dumps(baron_parse(source_code))) 31 | except Exception as e: 32 | import json 33 | import traceback 34 | traceback.print_exc(file=sys.stdout) 35 | sys.stdout.write("Warning: couldn't write dumps output to debug file, exception: %s\n\n" % e) 36 | sys.stdout.write("Tree: %s" % json.dumps(baron_parse(source_code), indent=4) + "\n") 37 | 38 | assert dumps(baron_parse(source_code), strict=True) == source_code 39 | -------------------------------------------------------------------------------- /tests/test_baron.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from baron import (parse, BaronError, ParsingError, 4 | GroupingError, UntreatedError) 5 | import pytest 6 | 7 | 8 | def test_dummy_parse(): 9 | parse("pouet") 10 | 11 | 12 | def test_error_parsing_error(): 13 | with pytest.raises(ParsingError): 14 | parse("(") 15 | with pytest.raises(BaronError): 16 | parse("(") 17 | 18 | 19 | def test_error_unexpected_formatting(): 20 | with pytest.raises(ParsingError): 21 | parse(" a\nb") 22 | with pytest.raises(BaronError): 23 | parse(" a\nb") 24 | 25 | 26 | def test_error_grouping(): 27 | with pytest.raises(GroupingError): 28 | parse(" (a\n b") 29 | with pytest.raises(BaronError): 30 | parse(" (a\n b") 31 | 32 | 33 | def test_error_untreated_error(): 34 | with pytest.raises(UntreatedError): 35 | parse("?") 36 | with pytest.raises(BaronError): 37 | parse("?") 38 | 39 | 40 | def test_missing_quote_yields_error(): 41 | with pytest.raises(UntreatedError): 42 | parse("'") 43 | with pytest.raises(UntreatedError): 44 | parse("'\n") 45 | with pytest.raises(BaronError): 46 | parse("'\n") 47 | 48 | 49 | def test_error_bad_indentation(): 50 | """ Regression test case 51 | 52 | It shows a discrepency between python2 and python3 in string 53 | comparisons with None. 54 | """ 55 | with pytest.raises(ParsingError): 56 | parse("def fun():\nass") 57 | with pytest.raises(BaronError): 58 | parse("def fun():\nass") 59 | -------------------------------------------------------------------------------- /docs/basics.rst: -------------------------------------------------------------------------------- 1 | Basic Usage 2 | =========== 3 | 4 | Baron provides two main functions: 5 | 6 | * :file:`parse` to transform a string into Baron's FST; 7 | * :file:`dumps` to transform the FST back into a string. 8 | 9 | .. ipython:: python 10 | :suppress: 11 | 12 | import sys 13 | sys.path.append("..") 14 | 15 | .. ipython:: python 16 | 17 | from baron import parse, dumps 18 | 19 | source_code = "def f(x = 1):\n return x\n" 20 | fst = parse(source_code) 21 | generated_source_code = dumps(fst) 22 | generated_source_code 23 | source_code == generated_source_code 24 | 25 | Like said in the introduction, the FST keeps the formatting unlike ASTs. 26 | Here the following 3 codes are equivalent but their formatting is 27 | different. Baron keeps the difference so when dumping back the FST, all 28 | the formatting is respected: 29 | 30 | .. ipython:: python 31 | 32 | dumps(parse("a = 1")) 33 | 34 | dumps(parse("a=1")) 35 | 36 | dumps(parse("a = 1")) 37 | 38 | 39 | Helpers 40 | ------- 41 | 42 | Baron also provides 3 helper functions `show`, `show_file` and 43 | `show_node` to explore the FST (in iPython for example). Those functions 44 | will print a formatted version of the FST so you can play with it to 45 | explore the FST and have an idea of what you are playing with. 46 | 47 | Show 48 | ~~~~ 49 | :file:`show` is used directly on a string: 50 | 51 | .. ipython:: python 52 | 53 | from baron.helpers import show 54 | 55 | show("a = 1") 56 | 57 | show("a += b") 58 | 59 | Show_file 60 | ~~~~~~~~~ 61 | :file:`show_file` is used on a file path: 62 | 63 | :: 64 | 65 | from baron.helpers import show_file 66 | 67 | show_file("/path/to/a/file") 68 | 69 | Show_node 70 | ~~~~~~~~~ 71 | :file:`show_node` is used on an already parsed string: 72 | 73 | .. ipython:: python 74 | 75 | from baron.helpers import show_node 76 | 77 | fst = parse("a = 1") 78 | 79 | show_node(fst) 80 | 81 | Under the hood, the FST is serialized into JSON so the helpers are 82 | simply encapsulating JSON pretty printers. 83 | 84 | -------------------------------------------------------------------------------- /baron/token.py: -------------------------------------------------------------------------------- 1 | from rply.token import BaseBox 2 | 3 | 4 | class BaronToken(BaseBox): 5 | """ 6 | Represents a syntactically relevant piece of text. 7 | 8 | :param name: A string describing the kind of text represented. 9 | :param value: The actual text represented. 10 | :param source_pos: A :class:`SourcePosition` object representing the 11 | position of the first character in the source from which 12 | this token was generated. 13 | """ 14 | def __init__(self, name, value, hidden_tokens_before=None, hidden_tokens_after=None): 15 | self.name = name 16 | self.value = value 17 | self.hidden_tokens_before = list(map(self._translate_tokens_to_ast_node, hidden_tokens_before if hidden_tokens_before else [])) 18 | self.hidden_tokens_after = list(map(self._translate_tokens_to_ast_node, hidden_tokens_after if hidden_tokens_after else [])) 19 | 20 | def _translate_tokens_to_ast_node(self, token): 21 | if token[0] == "ENDL": 22 | return { 23 | "type": token[0].lower(), 24 | "value": token[1], 25 | "indent": token[3][0][1] if len(token) == 4 and token[3] else "", 26 | "formatting": list(map(self._translate_tokens_to_ast_node, token[2]) if len(token) >= 3 else []), 27 | } 28 | if len(token) >= 3: 29 | return { 30 | "type": token[0].lower(), 31 | "value": token[1], 32 | "formatting": list(map(self._translate_tokens_to_ast_node, token[2]) if len(token) >= 3 else []), 33 | } 34 | if token[0] == "COMMENT": 35 | return { 36 | "type": token[0].lower(), 37 | "value": token[1], 38 | "formatting": [], 39 | } 40 | return { 41 | "type": token[0].lower(), 42 | "value": token[1], 43 | } 44 | 45 | def __repr__(self): 46 | return "Token(%r, %r, %s, %s)" % (self.name, self.value, self.hidden_tokens_before, self.hidden_tokens_after) 47 | 48 | def __eq__(self, other): 49 | if not isinstance(other, BaronToken): 50 | return NotImplemented 51 | return self.name == other.name and self.value == other.value 52 | 53 | def render(self): 54 | before = "".join([(x["indent"] if x["type"] == "endl" else "") + x["value"] for x in self.hidden_tokens_before]) 55 | after = "".join([(x["indent"] if x["type"] == "endl" else "") + x["value"] for x in self.hidden_tokens_after]) 56 | # print self.hidden_tokens_before, self.value, self.hidden_tokens_after 57 | return before + self.value + after 58 | 59 | def gettokentype(self): 60 | """ 61 | Returns the type or name of the token. 62 | """ 63 | return self.name 64 | 65 | def getstr(self): 66 | """ 67 | Returns the string represented by this token. 68 | """ 69 | return self.value 70 | -------------------------------------------------------------------------------- /tests/test_future.py: -------------------------------------------------------------------------------- 1 | from baron.future import has_print_function, replace_print_by_name 2 | from baron.baron import parse, tokenize 3 | import json 4 | 5 | 6 | def print_token_is_a_function(code): 7 | return has_print_function(tokenize(code)) 8 | 9 | 10 | def print_is_parsed_as_a_function(parsed_code): 11 | code_json = json.dumps(parsed_code) 12 | return '"type": "name", "value": "print"' in code_json \ 13 | or '"value": "print", "type": "name"' in code_json 14 | 15 | 16 | def test_no_future(): 17 | code = "a = 1" 18 | assert not print_token_is_a_function(code) 19 | 20 | 21 | def test_other_future(): 22 | code = "from __future__ import other" 23 | assert not print_token_is_a_function(code) 24 | 25 | 26 | def test_print_future(): 27 | code = "from __future__ import print_function" 28 | assert print_token_is_a_function(code) 29 | 30 | 31 | def test_print_future_as(): 32 | code = "from __future__ import print_function as p_f" 33 | assert print_token_is_a_function(code) 34 | 35 | 36 | def test_print_future_comma(): 37 | code = "from __future__ import a, b, print_function" 38 | assert print_token_is_a_function(code) 39 | 40 | 41 | def test_print_future_comma_as(): 42 | code = "from __future__ import a as c, b as d, print_function as e" 43 | assert print_token_is_a_function(code) 44 | 45 | 46 | def test_print_no_future_comma_as(): 47 | code = "from __future__ import a as c, b as d" 48 | assert not print_token_is_a_function(code) 49 | 50 | 51 | def test_print_future_in_parenthesis(): 52 | code = "from __future__ import (a, b, print_function)" 53 | assert print_token_is_a_function(code) 54 | 55 | 56 | def test_print_future_in_parenthesis_as(): 57 | code = "from __future__ import (a as c, b as d, print_function as e)" 58 | assert print_token_is_a_function(code) 59 | 60 | 61 | def test_print_no_future_in_parenthesis_as(): 62 | code = "from __future__ import (a as c, b as d)" 63 | assert not print_token_is_a_function(code) 64 | 65 | 66 | def test_print_future_second(): 67 | code = """from __future__ import a, b as e 68 | from __future__ import c, print_function""" 69 | assert print_token_is_a_function(code) 70 | 71 | 72 | def test_auto_print_as_name(): 73 | code = "from __future__ import print_function\nprint(a)" 74 | assert print_is_parsed_as_a_function(parse(code)) 75 | 76 | 77 | def test_auto_print_as_print(): 78 | code = "print(a)" 79 | assert not print_is_parsed_as_a_function(parse(code)) 80 | 81 | 82 | def test_print_as_name(): 83 | code = "print(a)" 84 | assert print_is_parsed_as_a_function(parse(code, True)) 85 | 86 | 87 | def test_print_as_print(): 88 | code = "print(a)" 89 | assert not print_is_parsed_as_a_function(parse(code, False)) 90 | 91 | 92 | def test_replace_print_token(): 93 | tokens = [('PRINT', 'print'), ('LEFT_PARENTHESIS', '('), ('NAME', 'A'), ('RIGHT_PARENTHESIS', ')'), ('ENDMARKER', '')] 94 | after = [('NAME', 'print'), ('LEFT_PARENTHESIS', '('), ('NAME', 'A'), ('RIGHT_PARENTHESIS', ')'), ('ENDMARKER', '')] 95 | assert after == replace_print_by_name(tokens) 96 | -------------------------------------------------------------------------------- /baron/spliter.py: -------------------------------------------------------------------------------- 1 | import string 2 | from .utils import FlexibleIterator, BaronError 3 | 4 | 5 | def split(sequence): 6 | return list(split_generator(sequence)) 7 | 8 | 9 | class UntreatedError(BaronError): 10 | pass 11 | 12 | 13 | def split_generator(sequence): 14 | iterator = FlexibleIterator(sequence) 15 | 16 | # Pay attention that if a next() call fails, a StopIteration error 17 | # is raised. This coincidently is the same error used by python to 18 | # understand that a function using yield has finished processing. 19 | # It's not a bad thing, but it must be kept in mind. 20 | while not iterator.end(): 21 | not_found = True 22 | 23 | if iterator.next_in("#"): 24 | not_found = False 25 | result = iterator.grab(lambda iterator: (iterator.show_next() not in "\r\n")) 26 | yield result 27 | 28 | for section in ("'", '"'): 29 | if iterator.next_starts_with(section * 3): 30 | not_found = False 31 | result = next(iterator) 32 | result += next(iterator) 33 | result += next(iterator) 34 | result += iterator.grab_string(lambda iterator: not iterator.next_starts_with(section * 3)) 35 | # This next() call can fail if no closing quote exists. We 36 | # still want to yield so we catch it. 37 | try: 38 | result += next(iterator) 39 | result += next(iterator) 40 | result += next(iterator) 41 | except StopIteration: 42 | pass 43 | yield result 44 | elif iterator.next_in(section): 45 | not_found = False 46 | result = next(iterator) 47 | result += iterator.grab_string(lambda iterator: iterator.show_next() not in section) 48 | # This next() call can fail if no closing quote exists. We 49 | # still want to yield so we catch it. 50 | try: 51 | result += next(iterator) 52 | except StopIteration: 53 | pass 54 | yield result 55 | 56 | for section in (string.ascii_letters + "_" + "1234567890", " \t"): 57 | if iterator.next_in(section): 58 | not_found = False 59 | yield iterator.grab(lambda iterator: iterator.show_next() in section) 60 | 61 | for one in "@,.;()=*:+-/^%&<>|\r\n~[]{}!``\\": 62 | if iterator.next_in(one): 63 | not_found = False 64 | yield next(iterator) 65 | 66 | if iterator.show_next().__repr__().startswith("'\\x"): 67 | # guys, seriously, how do you manage to put this shit in your code? 68 | # I mean, I don't even know how this is possible! 69 | # example of guilty file: ve/lib/python2.7/site-packages/tests/test_oauth.py 70 | # example of crapy unicode stuff found in some source files: \x0c\xef\xbb\xbf 71 | not_found = False 72 | # let's drop that crap 73 | next(iterator) 74 | 75 | if not_found: 76 | raise UntreatedError("Untreated elements: %s" % iterator.rest_of_the_sequence().__repr__()[:50]) 77 | -------------------------------------------------------------------------------- /baron/baron.py: -------------------------------------------------------------------------------- 1 | from .spliter import split 2 | from .grouper import group 3 | from .tokenizer import tokenize as _tokenize 4 | from .formatting_grouper import group as space_group 5 | from .future import has_print_function, replace_print_by_name 6 | from .grammator import generate_parse 7 | from .indentation_marker import mark_indentation 8 | from .inner_formatting_grouper import group as inner_group 9 | from .parser import ParsingError 10 | 11 | 12 | parse_tokens = generate_parse(False) 13 | parse_tokens_print_function = generate_parse(True) 14 | 15 | 16 | def _parse(tokens, print_function): 17 | parser = parse_tokens if not print_function else parse_tokens_print_function 18 | try: 19 | try: 20 | return parser(tokens) 21 | except ParsingError: 22 | # swap parsers for print_function situation where I failed to find it 23 | parser = parse_tokens if print_function else parse_tokens_print_function 24 | return parser(tokens) 25 | except ParsingError as e: 26 | raise 27 | except Exception as e: 28 | import sys 29 | import traceback 30 | traceback.print_exc(file=sys.stderr) 31 | sys.stderr.write("%s\n" % e) 32 | sys.stderr.write("\nBaron has failed to parse this input. If this is valid python code (and by that I mean that the python binary successfully parse this code without any syntax error) (also consider that python does not yet parse python 3 code integrally) it would be kind if you can extract a snippet of your code that make Baron fails and open a bug here: https://github.com/PyCQA/baron/issues\n\nSorry for the inconvenience.") 33 | 34 | 35 | def parse(source_code, print_function=None): 36 | # Python syntax requires source code to end with an ENDL token 37 | # the endl token is removed afterward if and only if it's the last token of the root level 38 | # It is possible that this token end up in a 'suite' grammar rule 39 | # which means that it is 'traped' in an indented block of code 40 | # I don't want to recursively cross the tree to hope to find it 41 | # This solution behave in the expected way for 90% of the case 42 | newline_appended = False 43 | linesep = "\r\n" if source_code.endswith("\r\n") else "\n" 44 | if source_code and not source_code.endswith(linesep): 45 | source_code += linesep 46 | newline_appended = True 47 | 48 | if print_function is None: 49 | tokens = tokenize(source_code, False) 50 | print_function = has_print_function(tokens) 51 | if print_function: 52 | replace_print_by_name(tokens) 53 | else: 54 | tokens = tokenize(source_code, print_function) 55 | 56 | if newline_appended: 57 | to_return = _parse(tokens, print_function) 58 | 59 | if to_return[-1]["type"] == "endl" and not to_return[-1]["formatting"]: 60 | return to_return[:-1] 61 | elif to_return[-1]["type"] == "endl" and to_return[-1]["formatting"]: 62 | return to_return[:-1] + to_return[-1]["formatting"] 63 | else: 64 | return to_return 65 | 66 | return _parse(tokens, print_function) 67 | 68 | 69 | def tokenize(pouet, print_function=False): 70 | splitted = split(pouet) 71 | grouped = group(splitted) 72 | print_tokenized = _tokenize(grouped, print_function) 73 | space_grouped = space_group(print_tokenized) 74 | inner_grouped = inner_group(space_grouped) 75 | indentation_marked = mark_indentation(inner_grouped) 76 | return indentation_marked 77 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Baron's documentation! 2 | ================================= 3 | 4 | Introduction 5 | ------------ 6 | 7 | Baron is a Full Syntax Tree (FST) for Python. It represents source code 8 | as a structured tree, easily parsable by a computer. By opposition to 9 | an `Abstract Syntax Tree 10 | `_ (AST) which drops 11 | syntax information in the process of its creation (like empty lines, 12 | comments, formatting), a FST keeps everything and guarantees the 13 | operation :file:`fst_to_code(code_to_fst(source_code)) == source_code`. 14 | 15 | If you want to understand why this is important, read this: 16 | https://github.com/PyCQA/baron#why-is-this-important 17 | 18 | Github (code, bug tracker, etc.) 19 | -------------------------------- 20 | 21 | https://github.com/PyCQA/baron 22 | 23 | Installation 24 | ------------ 25 | 26 | :: 27 | 28 | pip install baron 29 | 30 | RedBaron 31 | -------- 32 | 33 | There is a good chance that you'll want to use `RedBaron 34 | `_ instead of using Baron directly. 35 | Think of Baron as the "bytecode of python source code" and RedBaron as 36 | some sort of usable layer on top of it, a bit like dom/jQuery or 37 | html/Beautifulsoup. 38 | 39 | Basic usage 40 | ----------- 41 | 42 | .. ipython:: python 43 | :suppress: 44 | 45 | import sys 46 | sys.path.append("..") 47 | 48 | .. ipython:: python 49 | 50 | from baron import parse, dumps 51 | 52 | source_code = "a = 1" 53 | fst = parse(source_code) 54 | fst 55 | generated_source_code = dumps(fst) 56 | generated_source_code 57 | source_code == generated_source_code 58 | 59 | 60 | Financial support 61 | ----------------- 62 | 63 | Baron and RedBaron are a very advanced piece of engineering that requires a lot 64 | of time of concentration to work on. Until the end of 2018, the development 65 | has been a full volunteer work mostly done by [Bram](https://github.com/psycojoker), 66 | but now, to reach the next level and bring those projects to the stability and 67 | quality you expect, we need your support. 68 | 69 | You can join our contributors and sponsors on our transparent 70 | [OpenCollective](https://opencollective.com/redbaron), every contribution will 71 | count and will be mainly used to work on the projects stability and quality but 72 | also on continuing, on the side, the R&D side of those projects. 73 | 74 | Our supporters 75 | ~~~~~~~~~~~~~~ 76 | 77 | .. image:: https://opencollective.com/redbaron/tiers/i-like-this,-keep-going!/badge.svg?label=I like this, keep going!&color=brightgreen 78 | .. image:: https://opencollective.com/redbaron/tiers/it-looks-cool!/badge.svg?label=It looks cool!&color=brightgreen 79 | .. image:: https://opencollective.com/redbaron/tiers/oh-god,-that-saved-me-so-much-time!/badge.svg?label=Oh god, that saved me so much time!&color=brightgreen 80 | 81 | \ 82 | 83 | .. image:: https://opencollective.com/redbaron/tiers/i-like-this,-keep-going!.svg?avatarHeight=36&width=600 84 | 85 | Become our first sponsor! 86 | 87 | .. image:: https://opencollective.com/redbaron/tiers/long-term-sponsor.svg?avatarHeight=36&width=600 88 | 89 | Table of content 90 | ---------------- 91 | 92 | .. toctree:: 93 | :maxdepth: 2 94 | 95 | basics 96 | advanced 97 | technical 98 | 99 | 100 | Indices and tables 101 | ================== 102 | 103 | * :ref:`genindex` 104 | * :ref:`modindex` 105 | * :ref:`search` 106 | 107 | -------------------------------------------------------------------------------- /baron/formatting_grouper.py: -------------------------------------------------------------------------------- 1 | from .utils import FlexibleIterator, BaronError 2 | 3 | 4 | class UnExpectedSpaceToken(BaronError): 5 | pass 6 | 7 | 8 | PRIORITY_ORDER = ( 9 | "IMPORT", 10 | "ENDL", 11 | ) 12 | 13 | BOTH = ( 14 | "SEMICOLON", 15 | "AS", 16 | "IMPORT", 17 | "DOUBLE_STAR", 18 | "DOT", 19 | "LEFT_SQUARE_BRACKET", 20 | "LEFT_PARENTHESIS", 21 | "STAR", 22 | "SLASH", 23 | "PERCENT", 24 | "DOUBLE_SLASH", 25 | "PLUS", 26 | "MINUS", 27 | "AT", 28 | "LEFT_SHIFT", 29 | "RIGHT_SHIFT", 30 | "AMPER", 31 | "CIRCUMFLEX", 32 | "VBAR", 33 | "LESS", 34 | "GREATER", 35 | "EQUAL_EQUAL", 36 | "LESS_EQUAL", 37 | "GREATER_EQUAL", 38 | "NOT_EQUAL", 39 | "IN", 40 | "IS", 41 | "NOT", 42 | "AND", 43 | "OR", 44 | "IF", 45 | "ELSE", 46 | "EQUAL", 47 | "PLUS_EQUAL", 48 | "MINUS_EQUAL", 49 | "STAR_EQUAL", 50 | "AT_EQUAL", 51 | "SLASH_EQUAL", 52 | "PERCENT_EQUAL", 53 | "AMPER_EQUAL", 54 | "VBAR_EQUAL", 55 | "CIRCUMFLEX_EQUAL", 56 | "LEFT_SHIFT_EQUAL", 57 | "RIGHT_SHIFT_EQUAL", 58 | "DOUBLE_STAR_EQUAL", 59 | "DOUBLE_SLASH_EQUAL", 60 | "ENDL", 61 | "COMMA", 62 | "FOR", 63 | "COLON", 64 | "BACKQUOTE", 65 | "RIGHT_ARROW", 66 | "FROM", 67 | ) 68 | 69 | STRING = ( 70 | "STRING", 71 | "RAW_STRING", 72 | "INTERPOLATED_STRING", 73 | "INTERPOLATED_RAW_STRING", 74 | "UNICODE_STRING", 75 | "UNICODE_RAW_STRING", 76 | "BINARY_STRING", 77 | "BINARY_RAW_STRING", 78 | ) 79 | 80 | GROUP_SPACE_BEFORE = BOTH + ( 81 | "RIGHT_PARENTHESIS", 82 | "COMMENT", 83 | ) + STRING 84 | 85 | GROUP_SPACE_AFTER = BOTH + ( 86 | "TILDE", 87 | "RETURN", 88 | "YIELD", 89 | "WITH", 90 | "DEL", 91 | "ASSERT", 92 | "RAISE", 93 | "EXEC", 94 | "GLOBAL", 95 | "NONLOCAL", 96 | "PRINT", 97 | "INDENT", 98 | "WHILE", 99 | "ELIF", 100 | "EXCEPT", 101 | "DEF", 102 | "CLASS", 103 | "LAMBDA", 104 | ) 105 | 106 | 107 | def less_prioritary_than(a, b): 108 | if b not in PRIORITY_ORDER: 109 | return False 110 | 111 | if a not in PRIORITY_ORDER: 112 | return True 113 | 114 | return PRIORITY_ORDER.index(a) < PRIORITY_ORDER.index(b) 115 | 116 | 117 | def group(sequence): 118 | return list(group_generator(sequence)) 119 | 120 | 121 | def group_generator(sequence): 122 | iterator = FlexibleIterator(sequence) 123 | 124 | while not iterator.end(): 125 | current = next(iterator) 126 | 127 | if current is None: 128 | return 129 | 130 | if current[0] == "SPACE" and iterator.show_next() and iterator.show_next()[0] in GROUP_SPACE_BEFORE: 131 | new_current = next(iterator) 132 | current = (new_current[0], new_current[1], [current]) 133 | 134 | if current[0] in GROUP_SPACE_AFTER + STRING and\ 135 | (iterator.show_next() and iterator.show_next()[0] == "SPACE") and\ 136 | (not iterator.show_next(2) or (iterator.show_next(2) and not less_prioritary_than(current[0], iterator.show_next(2)[0]))): 137 | 138 | # do not be greedy when you are grouping on strings 139 | if current[0] in STRING and iterator.show_next(2) and iterator.show_next(2)[0] in GROUP_SPACE_BEFORE: 140 | yield current 141 | continue 142 | 143 | after_space = next(iterator) 144 | current = (current[0], current[1], current[2] if len(current) > 2 else [], [after_space]) 145 | 146 | # in case of "def a(): # comment\n pass" 147 | # not really happy about this solution but that avoid a broken release 148 | if current[0] == "COLON" and iterator.show_next() and iterator.show_next()[0] == "COMMENT": 149 | comment = next(iterator) 150 | current = (current[0], current[1], ((current[2]) if len(current) > 2 else []), ((current[3]) if len(current) > 3 else []) + [comment]) 151 | 152 | yield current 153 | -------------------------------------------------------------------------------- /baron/indentation_marker.py: -------------------------------------------------------------------------------- 1 | from .utils import FlexibleIterator 2 | 3 | """ 4 | Objective: add an INDENT token and a DEDENT token arround every block. 5 | 6 | Strategy: put after every ":" that is not in a slice/dictionary declaration/lambda. 7 | 8 | Slice and dictionary are easy: increase a number when a "[" or "{" is found, 9 | decrease it for a "]" or "}". If the number is != 0, we are in a dictionary or 10 | slice -> do not put a INDENT when a ":" is found. 11 | 12 | Lambda are a bit different: increase another number when a "lambda" is found, 13 | if the number is != 0 and a ":" is found, decrease this number, otherwise put a 14 | INDENT. 15 | 16 | For the DEDENT, I'm probably going to need to keep a list of indentation and 17 | decheck the last one every time I encounter a meaningfull line. Still need to 18 | test this idea. 19 | """ 20 | 21 | 22 | def mark_indentation(sequence): 23 | return list(mark_indentation_generator(sequence)) 24 | 25 | 26 | def transform_tabs_to_spaces(string): 27 | return string.replace("\t", " " * 8) 28 | 29 | 30 | def get_space(node): 31 | """ Return space formatting information of node. 32 | 33 | If the node does not have a third formatting item - like in 34 | a ('ENDL', '\n') node - then we return None as a flag value. This is 35 | maybe not the best behavior but it seems to work for now. 36 | """ 37 | if len(node) < 4 or len(node[3]) == 0: 38 | return None 39 | return transform_tabs_to_spaces(node[3][0][1]) 40 | 41 | 42 | def mark_indentation_generator(sequence): 43 | iterator = FlexibleIterator(sequence) 44 | current = None, None 45 | indentations = [] 46 | while True: 47 | if iterator.end(): 48 | return 49 | 50 | current = next(iterator) 51 | 52 | if current is None: 53 | return 54 | 55 | # end of the file, I need to pop all indentations left and put the 56 | # corresponding dedent token for them 57 | if current[0] == "ENDMARKER" and indentations: 58 | while len(indentations) > 0: 59 | yield ('DEDENT', '') 60 | indentations.pop() 61 | 62 | # if were are at ":\n" like in "if stuff:\n" 63 | if current[0] == "COLON" and iterator.show_next(1)[0] == "ENDL": 64 | # if we aren't in "if stuff:\n\n" 65 | if iterator.show_next(2)[0] not in ("ENDL",): 66 | indentations.append(get_space(iterator.show_next())) 67 | yield current 68 | yield next(iterator) 69 | yield ('INDENT', '') 70 | continue 71 | else: # else, skip all "\n" 72 | yield current 73 | for i in iterator: 74 | if i[0] == 'ENDL' and iterator.show_next()[0] not in ('ENDL',): 75 | indentations.append(get_space(i)) 76 | yield ('INDENT', '') 77 | yield i 78 | break 79 | yield i 80 | continue 81 | 82 | # if we were in an indented situation and that the next line has a lower indentation 83 | if indentations and current[0] == "ENDL": 84 | the_indentation_level_changed = get_space(current) is None or get_space(current) != indentations[-1] 85 | if the_indentation_level_changed and iterator.show_next()[0] not in ("ENDL", "COMMENT"): 86 | new_indent = get_space(current) if len(current) == 4 else "" 87 | yield current 88 | 89 | # pop until reaching the matching indentation level 90 | while indentations and string_is_bigger(indentations[-1], new_indent): 91 | indentations.pop() 92 | yield ('DEDENT', '') 93 | yield next(iterator) 94 | continue 95 | 96 | yield current 97 | 98 | 99 | def string_is_bigger(s1, s2): 100 | """ Return s1 > s2 by taking into account None values. 101 | 102 | None is always smaller than any string. 103 | 104 | None > "string" works in python2 but not in python3. This function 105 | makes it work in python3 too. 106 | """ 107 | if s1 is None: 108 | return False 109 | elif s2 is None: 110 | return True 111 | else: 112 | return s1 > s2 113 | -------------------------------------------------------------------------------- /baron/grouper.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import re 4 | from .utils import FlexibleIterator 5 | 6 | to_group = ( 7 | ("+", "="), 8 | ("-", "="), 9 | ("*", "="), 10 | ("/", "="), 11 | ("%", "="), 12 | ("&", "="), 13 | ("|", "="), 14 | ("^", "="), 15 | ("@", "="), 16 | ("/", "/"), 17 | ("*", "*"), 18 | ("<", "<"), 19 | (">", ">"), 20 | ("=", "="), 21 | ("!", "="), 22 | ("<", ">"), 23 | ("<", "="), 24 | (">", "="), 25 | ("**", "="), 26 | ("//", "="), 27 | ("<<", "="), 28 | (">>", "="), 29 | ("\r", "\n"), 30 | (".", "."), 31 | ("..", "."), 32 | ("-", ">"), 33 | ) 34 | 35 | to_group_keys, _ = list(zip(*to_group)) 36 | 37 | 38 | def group(sequence): 39 | return list(group_generator(sequence)) 40 | 41 | 42 | def match_on_next(regex, iterator): 43 | return iterator.show_next() and re.match(regex, iterator.show_next()) 44 | 45 | 46 | def group_generator(sequence): 47 | iterator = FlexibleIterator(sequence) 48 | current = None 49 | while True: 50 | if iterator.end(): 51 | return 52 | 53 | current = next(iterator) 54 | if current in to_group_keys and matching_found(to_group, current, iterator.show_next()): 55 | current += next(iterator) 56 | if current in to_group_keys and matching_found(to_group, current, iterator.show_next()): 57 | current += next(iterator) 58 | if current in list('uUfFrRbB') and str(iterator.show_next()).startswith(('"', "'")): 59 | current += next(iterator) 60 | if str(current).lower() in ["ur", "br", "fr", "rf"] and str(iterator.show_next()).startswith(('"', "'")): 61 | current += next(iterator) 62 | if any([re.match(x, current) for x in (r'^\d+[eE]$', r'^\d+\.\d*[eE]$', r'^\.\d+[eE]$')]): 63 | current += next(iterator) 64 | current += next(iterator) 65 | 66 | # It's required in a case where I have something like that: 67 | # ['123.123e', '[+-]', '123'] 68 | assert re.match(r'^\d+[eE][-+]?\d+[jJ]?$', current) or re.match(r'^\d*.\d*[eE][-+]?\d+[jJ]?$', current) 69 | 70 | if current == "\\" and iterator.show_next() in ('\n', '\r\n'): 71 | current += next(iterator) 72 | if re.match(r'^\s+$', str(iterator.show_next())): 73 | current += next(iterator) 74 | 75 | if current == "\\" and iterator.show_next() == "\r" and iterator.show_next(2) == "\n": 76 | current += next(iterator) 77 | current += next(iterator) 78 | if re.match(r'^\s+$', str(iterator.show_next())): 79 | current += next(iterator) 80 | 81 | if re.match(r'^\s+$', current) and iterator.show_next() == "\\": 82 | current += next(iterator) 83 | current += next(iterator) 84 | if iterator.show_next() == "\n": 85 | current += next(iterator) 86 | if re.match(r'^\s+$', str(iterator.show_next())): 87 | current += next(iterator) 88 | 89 | if (re.match(r'^[_\d]+$', current) and match_on_next(r'^\.$', iterator)) or\ 90 | (current == "." and match_on_next(r'^\d+[_\d]*([jJ]|[eE]\d*)?$', iterator)): 91 | current += next(iterator) 92 | 93 | if match_on_next(r'^[_\d]*[jJ]?$', iterator) and match_on_next(r'^[_\d]*[jJ]?$', iterator).group(): 94 | current += next(iterator) 95 | 96 | if re.match(r'^\d+\.$', current) and match_on_next(r'^\d*[eE]\d*$', iterator): 97 | current += next(iterator) 98 | 99 | if re.match(r'^\d+\.?[eE]$', current) and match_on_next(r'^\d+$', iterator): 100 | current += next(iterator) 101 | 102 | if re.match(r'^\d*\.?\d*[eE]$', current) and not re.match('[eE]', current) and match_on_next(r'^[-+]$', iterator) and iterator.show_next(2) and re.match(r'^\d+$', iterator.show_next(2)): 103 | current += next(iterator) 104 | current += next(iterator) 105 | 106 | # edge case where 2 dots follow themselves but not 3 (an ellipsis) 107 | if current == "..": 108 | yield "." 109 | yield "." 110 | continue 111 | 112 | yield current 113 | 114 | 115 | def matching_found(to_group, current, target): 116 | return target in [x[1] for x in to_group if x[0] == current] 117 | -------------------------------------------------------------------------------- /add_new_grammar.md: -------------------------------------------------------------------------------- 1 | # How to modify what Baron can parse 2 | 3 | This is a todo list of things to do to allows baron to parse new syntax. 4 | 5 | This is the full version, for minor things like adding a new binary operator (like the "@" for matrix multiplication) this is not needed. 6 | 7 | # Checklists 8 | 9 | ### Preparation 10 | 11 | - [ ] first of all start by comparing [the grammar from python 2.7](https://docs.python.org/2/reference/grammar.html) with the [targeted version](https://docs.python.org/3.7/reference/grammar.html) (also available in https://github.com/PyCQA/baron/tree/master/grammar) 12 | - [ ] check the reference page here https://baron.readthedocs.io/en/latest/grammar.html to see if things are already planned 13 | - [ ] look at [baron's grammar](https://github.com/PyCQA/baron/blob/master/grammar/baron_grammar) to check if it's not colyding with something already done (very low chance) 14 | - [ ] does the lexer needs to be modified? This is the case for new keywords and new statements 15 | - [ ] be mentally prepared that you'll need to write tests for everything 16 | 17 | ### Modification 18 | 19 | Lexer: 20 | 21 | - [ ] if you need to modify the lexer, stars with it, check all the lexer steps (found here: https://github.com/PyCQA/baron/blob/master/baron/baron.py#L69, the correct line might change in the futur, it's the tokenize function) 22 | - `split` only needs to be modified if python ever introduce new character like "?" for example 23 | - `group` is if 2 characters needs to be merged like "?" and "=" 24 | - `_tokenize` is for new token, obviously, like new keywords or new grouped characters 25 | - `space_group` will need to be modified for new keywords or statement, it's quite tricky, it's to group space on neighbour tokens (they will be unfold during grammar parsing) following the general rules of "a node needs to be responsible for its formatting" 26 | - `inner_group` is a variation of the previous one, it's for the case of tokens between `() [] {}` 27 | - `mark_indentation` is to handle inserting `IDENT`/`DEDENT` tokens, it very unlikely you'll ever need to work on this one except if python includes new statements (like the `with` statement) 28 | 29 | - [ ] have tests for everything regarding the lexer (if possible in a TDD fashion) 30 | 31 | Grammar: 32 | 33 | The hardest part is going to be to correctly design the extension of the tree with new or by modifying existing nodes (if needed). 34 | 35 | Before anything: RedBaron (and not Baron) is an API design project to make writing code that analyse and modify source code as easy as possible, Baron is here to support this task, this mean that this a tree designed to be intuitive to human, no easy to handle for interpreters. 36 | 37 | Therefor, when you design a modification or an addition to the tree, you need to answer to the question: what will be the easiest to handle and the more intuitive for humans. 38 | 39 | Here are some general advices: 40 | 41 | - when that makes sens, prefer flat structure with lower number of nodes instead of sub nodes. For example: for the "async" keyword, extend the related nodes instead of creating a subnode 42 | - prefer lists other single-child series of branches of a tree, for example, the python code "a.b.c.d" shouldn't be structured as "d->c->b->a" like in ast.py but as "[a, b, c, d]" 43 | - uses attributes and nodes name as close as possible to python keywords and what is used in the python community (and close to the grammar) 44 | 45 | Regarding the implementation: 46 | 47 | - [ ] try to find the good file in which to put your code, the name and content should be enough for that https://github.com/PyCQA/baron/tree/master/baron 48 | - [ ] write/update tests for everything regarding producing the new additions to the tree 49 | - [ ] implement the new grammar (if relevant) 50 | - [ ] modifying the rendering tree in [render.py](https://github.com/PyCQA/baron/blob/master/baron/render.py) 51 | - [ ] write rendering and, if needed, rendering after modification, tests for everything here https://github.com/PyCQA/baron/blob/master/tests/test_dumper.py 52 | 53 | And you should be good, congratz if you reached this point! 54 | 55 | ### Completion, documentation 56 | 57 | - [ ] modify the reference page https://baron.readthedocs.io/en/latest/grammar.html 58 | - [ ] [modify baron's grammar](https://github.com/PyCQA/baron/blob/master/grammar/baron_grammar) 59 | - [ ] consider implementing the new additions in [RedBaron](https://github.com/pycqa/redbaron) 60 | - [ ] udpate CHANGELOG 61 | -------------------------------------------------------------------------------- /baron/tokenizer.py: -------------------------------------------------------------------------------- 1 | import re 2 | from .utils import BaronError 3 | 4 | 5 | class UnknowItem(BaronError): 6 | pass 7 | 8 | 9 | KEYWORDS = ("and", "as", "assert", "break", "class", "continue", "def", "del", 10 | "elif", "else", "except", "exec", "finally", "for", "from", 11 | "global", "nonlocal", "if", "import", "in", "is", "lambda", "not", 12 | "or", "pass", "print", "raise", "return", "try", "while", "with", 13 | "yield") 14 | 15 | TOKENS = ( 16 | (r'[a-zA-Z_]\w*', 'NAME'), 17 | (r'0', 'INT'), 18 | (r'[-+]?\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), 19 | (r'[-+]?\d+.\d?[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), 20 | (r'[-+]?\d?.\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), 21 | (r'\d+[eE][-+]?\d*', 'FLOAT_EXPONANT'), 22 | (r'\d+\.\d*[eE][-+]?\d*', 'FLOAT_EXPONANT'), 23 | (r'\.\d+[eE][-+]?\d*', 'FLOAT_EXPONANT'), 24 | (r'\d*\.\d+[jJ]', 'COMPLEX'), 25 | (r'\d+\.[jJ]', 'COMPLEX'), 26 | (r'\d+[jJ]', 'COMPLEX'), 27 | (r'\d+\.', 'FLOAT'), 28 | (r'\d*[_\d]*\.[_\d]+[lL]?', 'FLOAT'), 29 | (r'\d+[_\d]+\.[_\d]*[lL]?', 'FLOAT'), 30 | (r'\.', 'DOT'), 31 | (r'[1-9]+[_\d]*[lL]', 'LONG'), 32 | (r'[1-9]+[_\d]*', 'INT'), 33 | (r'0[xX][\d_a-fA-F]+[lL]?', 'HEXA'), 34 | (r'(0[oO][0-7]+)|(0[0-7_]*)[lL]?', 'OCTA'), 35 | (r'0[bB][01_]+[lL]?', 'BINARY'), 36 | (r'\(', 'LEFT_PARENTHESIS'), 37 | (r'\)', 'RIGHT_PARENTHESIS'), 38 | (r':', 'COLON'), 39 | (r',', 'COMMA'), 40 | (r';', 'SEMICOLON'), 41 | (r'@', 'AT'), 42 | (r'\+', 'PLUS'), 43 | (r'-', 'MINUS'), 44 | (r'\*', 'STAR'), 45 | (r'/', 'SLASH'), 46 | (r'\|', 'VBAR'), 47 | (r'&', 'AMPER'), 48 | (r'@', 'AT'), 49 | (r'<', 'LESS'), 50 | (r'>', 'GREATER'), 51 | (r'=', 'EQUAL'), 52 | (r'%', 'PERCENT'), 53 | (r'\[', 'LEFT_SQUARE_BRACKET'), 54 | (r'\]', 'RIGHT_SQUARE_BRACKET'), 55 | (r'\{', 'LEFT_BRACKET'), 56 | (r'\}', 'RIGHT_BRACKET'), 57 | (r'`', 'BACKQUOTE'), 58 | (r'==', 'EQUAL_EQUAL'), 59 | (r'<>', 'NOT_EQUAL'), 60 | (r'!=', 'NOT_EQUAL'), 61 | (r'<=', 'LESS_EQUAL'), 62 | (r'>=', 'GREATER_EQUAL'), 63 | (r'~', 'TILDE'), 64 | (r'\^', 'CIRCUMFLEX'), 65 | (r'<<', 'LEFT_SHIFT'), 66 | (r'>>', 'RIGHT_SHIFT'), 67 | (r'\*\*', 'DOUBLE_STAR'), 68 | (r'\+=', 'PLUS_EQUAL'), 69 | (r'-=', 'MINUS_EQUAL'), 70 | (r'@=', 'AT_EQUAL'), 71 | (r'\*=', 'STAR_EQUAL'), 72 | (r'/=', 'SLASH_EQUAL'), 73 | (r'%=', 'PERCENT_EQUAL'), 74 | (r'&=', 'AMPER_EQUAL'), 75 | (r'\|=', 'VBAR_EQUAL'), 76 | (r'\^=', 'CIRCUMFLEX_EQUAL'), 77 | (r'<<=', 'LEFT_SHIFT_EQUAL'), 78 | (r'>>=', 'RIGHT_SHIFT_EQUAL'), 79 | (r'\.\.\.', 'ELLIPSIS'), 80 | (r'->', 'RIGHT_ARROW'), 81 | (r'\*\*=', 'DOUBLE_STAR_EQUAL'), 82 | (r'//', 'DOUBLE_SLASH'), 83 | (r'//=', 'DOUBLE_SLASH_EQUAL'), 84 | (r'\n', 'ENDL'), 85 | (r'\r\n', 'ENDL'), 86 | (r'#.*', 'COMMENT'), 87 | (r'(\s|\\\n|\\\r\n)+', 'SPACE'), 88 | (r'["\'](.|\n|\r)*["\']', 'STRING'), 89 | (r'[uU]["\'](.|\n|\r)*["\']', 'UNICODE_STRING'), 90 | (r'[fF]["\'](.|\n|\r)*["\']', 'INTERPOLATED_STRING'), 91 | (r'[rR]["\'](.|\n|\r)*["\']', 'RAW_STRING'), 92 | (r'[bB]["\'](.|\n|\r)*["\']', 'BINARY_STRING'), 93 | (r'[uU][rR]["\'](.|\n|\r)*["\']', 'UNICODE_RAW_STRING'), 94 | (r'[bB][rR]["\'](.|\n|\r)*["\']', 'BINARY_RAW_STRING'), 95 | (r'[fF][rR]["\'](.|\n|\r)*["\']', 'INTERPOLATED_RAW_STRING'), 96 | (r'[rR][fF]["\'](.|\n|\r)*["\']', 'INTERPOLATED_RAW_STRING'), 97 | ) 98 | 99 | 100 | TOKENS = [(re.compile('^' + x[0] + '$'), x[1]) for x in TOKENS] 101 | 102 | 103 | def tokenize(sequence, print_function=False): 104 | return list(tokenize_generator(sequence, print_function)) 105 | 106 | 107 | def tokenize_current_keywords(print_function=False): 108 | if print_function is True: 109 | return [x for x in KEYWORDS if x not in ("print", "exec")] 110 | else: 111 | return KEYWORDS 112 | 113 | 114 | def tokenize_generator(sequence, print_function=False): 115 | current_keywords = tokenize_current_keywords() 116 | 117 | for item in sequence: 118 | if item in current_keywords: 119 | yield (item.upper(), item) 120 | continue 121 | 122 | for candidate, token_name in TOKENS: 123 | if candidate.match(item): 124 | yield (token_name, item) 125 | break 126 | else: 127 | raise UnknowItem("Can't find a matching token for this item: '%s'" % item) 128 | yield ('ENDMARKER', '') 129 | yield 130 | -------------------------------------------------------------------------------- /baron/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | 5 | python_version = sys.version_info[0] 6 | python_subversion = sys.version_info[1] 7 | string_instance = str if python_version == 3 else basestring 8 | 9 | 10 | class BaronError(Exception): 11 | pass 12 | 13 | 14 | class FlexibleIterator(): 15 | def __init__(self, sequence): 16 | self.sequence = sequence 17 | self.position = -1 18 | 19 | def __iter__(self): 20 | return self 21 | 22 | def next(self): 23 | return self.__next__() 24 | 25 | def __next__(self): 26 | self.position += 1 27 | if self.position == len(self.sequence): 28 | raise StopIteration 29 | return self.sequence[self.position] 30 | 31 | def next_starts_with(self, sentence): 32 | size_of_choice = len(sentence) 33 | return self.sequence[self.position + 1: self.position + 1 + size_of_choice] == sentence 34 | 35 | def next_in(self, choice): 36 | if self.position + 1 >= len(self.sequence): 37 | return False 38 | return self.sequence[self.position + 1] in choice 39 | 40 | def show_next(self, at=1): 41 | if self.position + at >= len(self.sequence): 42 | return None 43 | return self.sequence[self.position + at] 44 | 45 | def rest_of_the_sequence(self): 46 | return self.sequence[self.position + 1:] 47 | 48 | def end(self): 49 | return self.position == (len(self.sequence) - 1) 50 | 51 | def grab(self, test): 52 | to_return = "" 53 | current = None 54 | while self.show_next() is not None and test(self): 55 | current = next(self) 56 | to_return += current 57 | 58 | return to_return 59 | 60 | def grab_string(self, test): 61 | to_return = "" 62 | current = None 63 | escaped = False 64 | while self.show_next() is not None and (escaped or test(self)): 65 | current = next(self) 66 | to_return += current 67 | if escaped: 68 | escaped = False 69 | elif current == "\\": 70 | escaped = True 71 | 72 | return to_return 73 | 74 | 75 | def create_node_from_token(token, **kwargs): 76 | result = {"type": token.name.lower(), "value": token.value} 77 | if kwargs: 78 | result.update(kwargs) 79 | return result 80 | 81 | 82 | def create_node(name, value, **kwargs): 83 | result = {"type": name, "value": value} 84 | if kwargs: 85 | result.update(kwargs) 86 | return result 87 | 88 | 89 | newline_regex = re.compile("(\r\n|\n|\r)") 90 | 91 | 92 | def is_newline(text): 93 | return newline_regex.match(text) 94 | 95 | 96 | def split_on_newlines(text): 97 | newlines = newline_regex.finditer(text) 98 | if not newlines: 99 | yield text 100 | else: 101 | current_position = 0 102 | for newline in newlines: 103 | yield text[current_position:newline.start(1)] 104 | yield text[newline.start(1):newline.end(1)] 105 | current_position = newline.end(1) 106 | yield text[current_position:] 107 | 108 | 109 | # Thanks to 110 | # https://github.com/nvie/rq/commit/282f4be9316d608ebbacd6114aab1203591e8f95 111 | if python_version >= 3 or python_subversion >= 7: 112 | from functools import total_ordering 113 | else: 114 | def total_ordering(cls): 115 | """Class decorator that fills in missing ordering methods""" 116 | convert = { 117 | '__lt__': [('__gt__', lambda self, other: other < self), 118 | ('__le__', lambda self, other: not other < self), 119 | ('__ge__', lambda self, other: not self < other)], 120 | '__le__': [('__ge__', lambda self, other: other <= self), 121 | ('__lt__', lambda self, other: not other <= self), 122 | ('__gt__', lambda self, other: not self <= other)], 123 | '__gt__': [('__lt__', lambda self, other: other > self), 124 | ('__ge__', lambda self, other: not other > self), 125 | ('__le__', lambda self, other: not self > other)], 126 | '__ge__': [('__le__', lambda self, other: other >= self), 127 | ('__gt__', lambda self, other: not other >= self), 128 | ('__lt__', lambda self, other: not self >= other)] 129 | } 130 | roots = set(dir(cls)) & set(convert) 131 | if not roots: 132 | raise ValueError('must define at least one ordering operation: < > <= >=') # noqa 133 | root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ 134 | for opname, opfunc in convert[root]: 135 | if opname not in roots: 136 | opfunc.__name__ = opname 137 | opfunc.__doc__ = getattr(int, opname).__doc__ 138 | setattr(cls, opname, opfunc) 139 | return cls 140 | -------------------------------------------------------------------------------- /docs/advanced.rst: -------------------------------------------------------------------------------- 1 | Advanced Usage 2 | ============== 3 | 4 | The topics presented here are less often needed but are still very useful. 5 | 6 | Locate a Node 7 | ------------- 8 | 9 | Since Baron produces a tree, a path is sufficient to locate univocally 10 | a node in the tree. A common task where a path is involved is when 11 | translating a position in a file (a line and a column) into a node of 12 | the FST. 13 | 14 | Baron provides 2 helper functions for that: 15 | 16 | * :file:`position_to_node(fst, line, column)` 17 | * :file:`position_to_path(fst, line, column)` 18 | 19 | Both take a FST tree as first argument, then the line number and the 20 | column number. Line and column numbers **start at 1**, like in a text 21 | editor. 22 | 23 | :file:`position_to_node` returns an FST node. This is okay if you only 24 | want to know which node it is but not enough to locate the node in the 25 | tree. Indeed, there can be mutiple identical nodes within the tree. 26 | 27 | That's where :file:`position_to_path` is useful. It returns a list of 28 | int and strings which represent either the key to take in a Node or the 29 | index in a ListNode. For example: :file:`["target", "value", 0]`) 30 | 31 | Let's first see the difference between the two functions: 32 | 33 | .. ipython:: python 34 | 35 | from baron import parse 36 | from baron.path import position_to_node, position_to_path 37 | from baron.helpers import show_node 38 | 39 | some_code = """from baron import parse\nfrom baron.helpers import show_node\nfst = parse("a = 1")\nshow_node(fst)""" 40 | print some_code 41 | 42 | tree = parse(some_code) 43 | 44 | node = position_to_node(tree, (3, 8)) 45 | show_node(node) 46 | path = position_to_path(tree, (3, 8)) 47 | path 48 | 49 | The first one gives the node and the second one the node's path in the 50 | tree. The latter tells you that to get to the node, you must take the 51 | 4th index of the root ListNode, followed twice by the "value" key of 52 | first the "assignment" Node and next the "atomtrailers" Node. Finally, 53 | take the 0th index in the resulting ListNode: 54 | 55 | .. ipython:: python 56 | 57 | show_node(tree[4]["value"]["value"][0]) 58 | 59 | Neat. This is so common that there is a function to do that: 60 | 61 | .. ipython:: python 62 | 63 | from baron.path import path_to_node 64 | 65 | show_node(path_to_node(tree, path)) 66 | 67 | With the two above, that's a total of three functions to locate a node. 68 | 69 | You can also locate easily a "constant" node like a left parenthesis in 70 | a :file:`funcdef` node: 71 | 72 | .. ipython:: python 73 | 74 | from baron.path import position_to_path 75 | 76 | fst = parse("a(1)") 77 | 78 | position_to_path(fst, (1, 1)) 79 | position_to_path(fst, (1, 2)) 80 | position_to_path(fst, (1, 3)) 81 | position_to_path(fst, (1, 4)) 82 | 83 | By the way, out of bound positions are handled gracefully: 84 | 85 | .. ipython:: python 86 | 87 | print(position_to_node(fst, (-1, 1))) 88 | print(position_to_node(fst, (1, 0))) 89 | print(position_to_node(fst, (1, 5))) 90 | print(position_to_node(fst, (2, 4))) 91 | 92 | 93 | Bounding Box 94 | ------------ 95 | 96 | Sometimes you want to know what are the left most and right most 97 | position of a rendered node or part of it. It is not a trivial task 98 | since you do not know easily each rendered line's length. That's why 99 | baron provides two helpers: 100 | 101 | * :file:`node_to_bounding_box(fst)` 102 | * :file:`path_to_bounding_box(fst, path)` 103 | 104 | Examples are worth a thousand words so: 105 | 106 | .. ipython:: python 107 | 108 | from baron.path import node_to_bounding_box, path_to_bounding_box 109 | from baron import dumps 110 | 111 | fst = parse("a(1)\nb(2)") 112 | 113 | fst 114 | print dumps(fst) 115 | node_to_bounding_box(fst) 116 | path_to_bounding_box(fst, []) 117 | 118 | fst[0] 119 | print dumps(fst[0]) 120 | node_to_bounding_box(fst[0]) 121 | path_to_bounding_box(fst, [0]) 122 | 123 | fst[0]["value"] 124 | print dumps(fst[0]["value"]) 125 | node_to_bounding_box(fst[1]) 126 | path_to_bounding_box(fst, [1]) 127 | 128 | fst[0]["value"][1] 129 | print dumps(fst[0]["value"][1]) 130 | node_to_bounding_box(fst[0]["value"][1]) 131 | path_to_bounding_box(fst, [0, "value", 1]) 132 | 133 | fst[0]["value"][1]["value"] 134 | print dumps(fst[0]["value"][1]["value"]) 135 | node_to_bounding_box(fst[0]["value"][1]["value"]) 136 | path_to_bounding_box(fst, [0, "value", 1, "value"]) 137 | 138 | The bounding box's `top_left` and `bottom_right` positions follow the 139 | same convention as for when locating a node: the line and column start 140 | at 1. 141 | 142 | As you can see, the major difference between the two functions is that 143 | :file:`node_to_bounding_box` will always give a left position of 144 | :file:`(1, 1)` since it considers you want the bounding box of the whole 145 | node while :file:`path_to_bounding_box` takes the location of the node 146 | in the fst into account. 147 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Baron.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Baron.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /baron/inner_formatting_grouper.py: -------------------------------------------------------------------------------- 1 | from .utils import FlexibleIterator, BaronError 2 | 3 | 4 | class UnExpectedFormattingToken(BaronError): 5 | pass 6 | 7 | 8 | class GroupingError(BaronError): 9 | pass 10 | 11 | 12 | GROUP_THOSE = ( 13 | "ENDL", 14 | 15 | # TODO test those 2 16 | "COMMENT", 17 | "SPACE", 18 | ) 19 | 20 | ENTER_GROUPING_MODE = ( 21 | "LEFT_PARENTHESIS", 22 | "LEFT_BRACKET", 23 | "LEFT_SQUARE_BRACKET", 24 | ) 25 | 26 | QUIT_GROUPING_MODE = ( 27 | "RIGHT_PARENTHESIS", 28 | "RIGHT_BRACKET", 29 | "RIGHT_SQUARE_BRACKET", 30 | ) 31 | 32 | GROUP_ON = ( 33 | "COMMA", 34 | "COLON", 35 | 36 | # TODO test everything bellow 37 | "STRING", 38 | "RAW_STRING", 39 | "INTERPOLATED_STRING", 40 | "INTERPOLATED_RAW_STRING", 41 | "BINARY_STRING", 42 | "BINARY_RAW_STRING", 43 | "UNICODE_STRING", 44 | "UNICODE_RAW_STRING", 45 | 46 | "AS", 47 | "IMPORT", 48 | "DOUBLE_STAR", 49 | "DOT", 50 | "LEFT_SQUARE_BRACKET", 51 | "STAR", 52 | "SLASH", 53 | "PERCENT", 54 | "DOUBLE_SLASH", 55 | "PLUS", 56 | "MINUS", 57 | "LEFT_SHIFT", 58 | "RIGHT_SHIFT", 59 | "AMPER", 60 | "CIRCUMFLEX", 61 | "VBAR", 62 | "LESS", 63 | "GREATER", 64 | "EQUAL_EQUAL", 65 | "LESS_EQUAL", 66 | "GREATER_EQUAL", 67 | "NOT_EQUAL", 68 | "IN", 69 | "IS", 70 | "NOT", 71 | "AND", 72 | "OR", 73 | "AT", 74 | "IF", 75 | "ELSE", 76 | "FROM", 77 | "EQUAL", 78 | "PLUS_EQUAL", 79 | "MINUS_EQUAL", 80 | "AT_EQUAL", 81 | "STAR_EQUAL", 82 | "SLASH_EQUAL", 83 | "PERCENT_EQUAL", 84 | "AMPER_EQUAL", 85 | "VBAR_EQUAL", 86 | "CIRCUMFLEX_EQUAL", 87 | "LEFT_SHIFT_EQUAL", 88 | "RIGHT_SHIFT_EQUAL", 89 | "DOUBLE_STAR_EQUAL", 90 | "DOUBLE_SLASH_EQUAL", 91 | "ENDL", 92 | "FOR", 93 | "COLON", 94 | "RAW_STRING", 95 | "UNICODE_STRING", 96 | "UNICODE_RAW_STRING", 97 | ) + ENTER_GROUPING_MODE + QUIT_GROUPING_MODE 98 | 99 | 100 | def append_to_token_after(token, to_append_list): 101 | if len(token) == 2: 102 | return (token[0], token[1], [], to_append_list) 103 | elif len(token) == 3: 104 | return (token[0], token[1], token[2], to_append_list) 105 | elif len(token) == 4: 106 | return (token[0], token[1], token[2], token[3] + to_append_list) 107 | 108 | 109 | def append_to_token_before(token, to_append_list): 110 | if len(token) == 2: 111 | return (token[0], token[1], to_append_list, []) 112 | elif len(token) == 3: 113 | return (token[0], token[1], to_append_list + token[2], []) 114 | elif len(token) == 4: 115 | return (token[0], token[1], to_append_list + token[2], token[3]) 116 | 117 | 118 | def group(sequence): 119 | return list(group_generator(sequence)) 120 | 121 | 122 | def fail_on_bad_token(token, debug_file_content, in_grouping_mode): 123 | if token[0] in GROUP_ON: 124 | return 125 | 126 | debug_file_content += _append_to_debug_file_content(token) 127 | 128 | debug_file_content = debug_file_content.split("\n") 129 | debug_file_content = list(zip(range(1, len(debug_file_content) + 1), debug_file_content)) 130 | debug_file_content = debug_file_content[-8:] 131 | debug_file_content = "\n".join(["%4s %s" % (x[0], x[1]) for x in debug_file_content]) 132 | raise GroupingError("Fail to group formatting tokens, here:\n%s <----\n\n'%s' should have been in: %s\n\nCurrent value of 'in_grouping_mode': %s" % (debug_file_content, token, ', '.join(sorted(GROUP_ON)), in_grouping_mode)) 133 | 134 | 135 | def _append_to_debug_file_content(token): 136 | before_debug = "".join(map(lambda x: x[1], token[2] if len(token) >= 3 else [])) 137 | after_debug = "".join(map(lambda x: x[1], token[3] if len(token) >= 4 else [])) 138 | return before_debug + token[1] + after_debug 139 | 140 | 141 | def group_generator(sequence): 142 | iterator = FlexibleIterator(sequence) 143 | current = None, None 144 | in_grouping_mode = 0 145 | debug_file_content = "" 146 | 147 | while True: 148 | if iterator.end(): 149 | return 150 | 151 | current = next(iterator) 152 | debug_file_content += _append_to_debug_file_content(current) 153 | 154 | if current[0] in ENTER_GROUPING_MODE: 155 | in_grouping_mode += 1 156 | elif current[0] in QUIT_GROUPING_MODE: 157 | in_grouping_mode -= 1 158 | assert in_grouping_mode >= 0 159 | 160 | if in_grouping_mode: 161 | if current[0] in GROUP_THOSE: 162 | to_group = [current] 163 | while iterator.show_next() and iterator.show_next()[0] in GROUP_THOSE: 164 | to_group.append(next(iterator)) 165 | debug_file_content += _append_to_debug_file_content(to_group[-1]) 166 | 167 | # XXX don't remember how (:() but I can end up finding a 168 | # DEDENT/INDENT token in this situation and I don't want to 169 | # group on it. Need to do test for that. 170 | if iterator.show_next()[0] in ("INDENT", "DEDENT"): 171 | yield next(iterator) 172 | 173 | fail_on_bad_token(iterator.show_next(), debug_file_content, in_grouping_mode) 174 | 175 | current = append_to_token_before(next(iterator), to_group) 176 | 177 | if current[0] in ENTER_GROUPING_MODE: 178 | in_grouping_mode += 1 179 | # TODO test 180 | if current[0] in QUIT_GROUPING_MODE: 181 | in_grouping_mode -= 1 182 | assert in_grouping_mode >= 0 183 | yield current 184 | continue 185 | 186 | if current[0] in GROUP_ON: 187 | while iterator.show_next() and iterator.show_next()[0] in GROUP_THOSE: 188 | debug_file_content += _append_to_debug_file_content(iterator.show_next()) 189 | current = append_to_token_after(current, [next(iterator)]) 190 | 191 | yield current 192 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Baron.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Baron.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Baron" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Baron" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /grammar/reference_grammar_python2: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # Note: Changing the grammar specified in this file will most likely 4 | # require corresponding changes in the parser module 5 | # (../Modules/parsermodule.c). If you can't make the changes to 6 | # that module yourself, please co-ordinate the required changes 7 | # with someone who can; ask around on python-dev for help. Fred 8 | # Drake will probably be listening there. 9 | 10 | # NOTE WELL: You should also follow all the steps listed in PEP 306, 11 | # "How to Change Python's Grammar" 12 | 13 | # Start symbols for the grammar: 14 | # single_input is a single interactive statement; 15 | # file_input is a module or sequence of commands read from an input file; 16 | # eval_input is the input for the eval() and input() functions. 17 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 18 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 19 | file_input: (NEWLINE | stmt)* ENDMARKER 20 | eval_input: testlist NEWLINE* ENDMARKER 21 | 22 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 23 | decorators: decorator+ 24 | decorated: decorators (classdef | funcdef) 25 | funcdef: 'def' NAME parameters ':' suite 26 | parameters: '(' [varargslist] ')' 27 | varargslist: ((fpdef ['=' test] ',')* 28 | ('*' NAME [',' '**' NAME] | '**' NAME) | 29 | fpdef ['=' test] (',' fpdef ['=' test])* [',']) 30 | fpdef: NAME | '(' fplist ')' 31 | fplist: fpdef (',' fpdef)* [','] 32 | 33 | stmt: simple_stmt | compound_stmt 34 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 35 | small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | 36 | import_stmt | global_stmt | exec_stmt | assert_stmt) 37 | expr_stmt: testlist (augassign (yield_expr|testlist) | 38 | ('=' (yield_expr|testlist))*) 39 | augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 40 | '<<=' | '>>=' | '**=' | '//=') 41 | # For normal assignments, additional restrictions enforced by the interpreter 42 | print_stmt: 'print' ( [ test (',' test)* [','] ] | 43 | '>>' test [ (',' test)+ [','] ] ) 44 | del_stmt: 'del' exprlist 45 | pass_stmt: 'pass' 46 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 47 | break_stmt: 'break' 48 | continue_stmt: 'continue' 49 | return_stmt: 'return' [testlist] 50 | yield_stmt: yield_expr 51 | raise_stmt: 'raise' [test [',' test [',' test]]] 52 | import_stmt: import_name | import_from 53 | import_name: 'import' dotted_as_names 54 | import_from: ('from' ('.'* dotted_name | '.'+) 55 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 56 | import_as_name: NAME ['as' NAME] 57 | dotted_as_name: dotted_name ['as' NAME] 58 | import_as_names: import_as_name (',' import_as_name)* [','] 59 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 60 | dotted_name: NAME ('.' NAME)* 61 | global_stmt: 'global' NAME (',' NAME)* 62 | exec_stmt: 'exec' expr ['in' test [',' test]] 63 | assert_stmt: 'assert' test [',' test] 64 | 65 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated 66 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 67 | while_stmt: 'while' test ':' suite ['else' ':' suite] 68 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 69 | try_stmt: ('try' ':' suite 70 | ((except_clause ':' suite)+ 71 | ['else' ':' suite] 72 | ['finally' ':' suite] | 73 | 'finally' ':' suite)) 74 | with_stmt: 'with' with_item (',' with_item)* ':' suite 75 | with_item: test ['as' expr] 76 | # NB compile.c makes sure that the default except clause is last 77 | except_clause: 'except' [test [('as' | ',') test]] 78 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 79 | 80 | # Backward compatibility cruft to support: 81 | # [ x for x in lambda: True, lambda: False if x() ] 82 | # even while also allowing: 83 | # lambda x: 5 if x else 2 84 | # (But not a mix of the two) 85 | testlist_safe: old_test [(',' old_test)+ [',']] 86 | old_test: or_test | old_lambdef 87 | old_lambdef: 'lambda' [varargslist] ':' old_test 88 | 89 | test: or_test ['if' or_test 'else' test] | lambdef 90 | lambdef: 'lambda' [varargslist] ':' test 91 | or_test: and_test ('or' and_test)* 92 | and_test: not_test ('and' not_test)* 93 | not_test: 'not' not_test | comparison 94 | comparison: expr (comp_op expr)* 95 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 96 | expr: xor_expr ('|' xor_expr)* 97 | xor_expr: and_expr ('^' and_expr)* 98 | and_expr: shift_expr ('&' shift_expr)* 99 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 100 | arith_expr: term (('+'|'-') term)* 101 | term: factor (('*'|'/'|'%'|'//') factor)* 102 | factor: ('+'|'-'|'~') factor | power 103 | power: atom trailer* ['**' factor] 104 | atom: ('(' [yield_expr|testlist_comp] ')' | 105 | '[' [listmaker] ']' | 106 | '{' [dictorsetmaker] '}' | 107 | '`' testlist1 '`' | 108 | NAME | NUMBER | STRING+) 109 | testlist_comp: test ( comp_for | (',' test)* [','] ) 110 | listmaker: test ( list_for | (',' test)* [','] ) 111 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 112 | subscriptlist: subscript (',' subscript)* [','] 113 | subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 114 | sliceop: ':' [test] 115 | exprlist: expr (',' expr)* [','] 116 | testlist: test (',' test)* [','] 117 | dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | 118 | (test (comp_for | (',' test)* [','])) ) 119 | 120 | classdef: 'class' NAME ['(' [testlist] ')'] ':' suite 121 | 122 | arglist: (argument ',')* (argument [','] 123 | |'*' test (',' argument)* [',' '**' test] 124 | |'**' test) 125 | # The reason that keywords are test nodes instead of NAME is that using NAME 126 | # results in an ambiguity. ast.c makes sure it's a NAME. 127 | argument: test [comp_for] | test '=' test 128 | 129 | list_iter: list_for | list_if 130 | list_for: 'for' exprlist 'in' testlist_safe [list_iter] 131 | list_if: 'if' old_test [list_iter] 132 | 133 | comp_iter: comp_for | comp_if 134 | comp_for: 'for' exprlist 'in' or_test [comp_iter] 135 | comp_if: 'if' old_test [comp_iter] 136 | 137 | testlist1: test (',' test)* 138 | 139 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 140 | encoding_decl: NAME 141 | 142 | yield_expr: 'yield' [testlist] 143 | -------------------------------------------------------------------------------- /tests/test_inner_formatting_grouper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | 4 | from baron.inner_formatting_grouper import group 5 | 6 | 7 | def test_empty(): 8 | assert group([]) == [] 9 | 10 | 11 | def test_some_stuff(): 12 | assert group([ 13 | ('INT', '1'), 14 | ('PLUS', '+', [('SPACE', ' ')], [('SPACE', ' ')]), 15 | ('INT', '2') 16 | ]) == [ 17 | ('INT', '1'), 18 | ('PLUS', '+', [('SPACE', ' ')], [('SPACE', ' ')]), 19 | ('INT', '2') 20 | ] 21 | 22 | 23 | def test_parenthesis(): 24 | assert group([ 25 | ('LEFT_PARENTHESIS', '('), 26 | ('ENDL', '\n'), 27 | ('RIGHT_PARENTHESIS', ')'), 28 | ]) == [ 29 | ('LEFT_PARENTHESIS', '(', [], [('ENDL', '\n')]), 30 | ('RIGHT_PARENTHESIS', ')'), 31 | ] 32 | 33 | 34 | def test_parenthesis_one_space(): 35 | assert group([ 36 | ('LEFT_PARENTHESIS', '(', [('SPACE', ' ')]), 37 | ('ENDL', '\n'), 38 | ('RIGHT_PARENTHESIS', ')'), 39 | ]) == [ 40 | ('LEFT_PARENTHESIS', '(', [('SPACE', ' ')], [('ENDL', '\n')]), 41 | ('RIGHT_PARENTHESIS', ')'), 42 | ] 43 | 44 | 45 | def test_parenthesis_two_space(): 46 | assert group([ 47 | ('LEFT_PARENTHESIS', '(', [('SPACE', ' ')], [('SPACE', ' ')]), 48 | ('ENDL', '\n'), 49 | ('RIGHT_PARENTHESIS', ')'), 50 | ]) == [ 51 | ('LEFT_PARENTHESIS', '(', [('SPACE', ' ')], [('SPACE', ' '), ('ENDL', '\n')]), 52 | ('RIGHT_PARENTHESIS', ')'), 53 | ] 54 | 55 | 56 | def test_two_parenthesis(): 57 | assert group([ 58 | ('LEFT_PARENTHESIS', '('), 59 | ('ENDL', '\n'), 60 | ('ENDL', '\n'), 61 | ('RIGHT_PARENTHESIS', ')'), 62 | ]) == [ 63 | ('LEFT_PARENTHESIS', '(', [], [('ENDL', '\n'), ('ENDL', '\n')]), 64 | ('RIGHT_PARENTHESIS', ')'), 65 | ] 66 | 67 | 68 | def test_two_parenthesis_comma(): 69 | assert group([ 70 | ('LEFT_PARENTHESIS', '('), 71 | ('ENDL', '\n'), 72 | ('COMMA', ','), 73 | ('ENDL', '\n'), 74 | ('RIGHT_PARENTHESIS', ')'), 75 | ]) == [ 76 | ('LEFT_PARENTHESIS', '(', [], [('ENDL', '\n'), ]), 77 | ('COMMA', ',', [], [('ENDL', '\n')]), 78 | ('RIGHT_PARENTHESIS', ')'), 79 | ] 80 | 81 | 82 | def test_tuple_one(): 83 | assert group([ 84 | ('LEFT_PARENTHESIS', '('), 85 | ('ENDL', '\n'), 86 | ('NAME', 'a'), 87 | ('ENDL', '\n'), 88 | ('COMMA', ','), 89 | ('ENDL', '\n'), 90 | ('NAME', 'a'), 91 | ('ENDL', '\n'), 92 | ('RIGHT_PARENTHESIS', ')'), 93 | ]) == [ 94 | ('LEFT_PARENTHESIS', '(', [], [('ENDL', '\n'), ]), 95 | ('NAME', 'a'), 96 | ('COMMA', ',', [('ENDL', '\n')], [('ENDL', '\n')]), 97 | ('NAME', 'a'), 98 | ('RIGHT_PARENTHESIS', ')', [('ENDL', '\n')], []), 99 | ] 100 | 101 | 102 | def test_set_one(): 103 | assert group([ 104 | ('LEFT_BRACKET', '{'), 105 | ('ENDL', '\n'), 106 | ('NAME', 'a'), 107 | ('ENDL', '\n'), 108 | ('COMMA', ','), 109 | ('ENDL', '\n'), 110 | ('NAME', 'a'), 111 | ('ENDL', '\n'), 112 | ('RIGHT_BRACKET', '}'), 113 | ]) == [ 114 | ('LEFT_BRACKET', '{', [], [('ENDL', '\n'), ]), 115 | ('NAME', 'a'), 116 | ('COMMA', ',', [('ENDL', '\n')], [('ENDL', '\n')]), 117 | ('NAME', 'a'), 118 | ('RIGHT_BRACKET', '}', [('ENDL', '\n')], []), 119 | ] 120 | 121 | 122 | def test_list_one(): 123 | assert group([ 124 | ('LEFT_SQUARE_BRACKET', '['), 125 | ('ENDL', '\n'), 126 | ('NAME', 'a'), 127 | ('ENDL', '\n'), 128 | ('COMMA', ','), 129 | ('ENDL', '\n'), 130 | ('NAME', 'a'), 131 | ('ENDL', '\n'), 132 | ('RIGHT_SQUARE_BRACKET', ']'), 133 | ]) == [ 134 | ('LEFT_SQUARE_BRACKET', '[', [], [('ENDL', '\n'), ]), 135 | ('NAME', 'a'), 136 | ('COMMA', ',', [('ENDL', '\n')], [('ENDL', '\n')]), 137 | ('NAME', 'a'), 138 | ('RIGHT_SQUARE_BRACKET', ']', [('ENDL', '\n')], []), 139 | ] 140 | 141 | 142 | def test_dict_one(): 143 | assert group([ 144 | ('LEFT_BRACKET', '{'), 145 | ('ENDL', '\n'), 146 | ('NAME', 'a'), 147 | ('ENDL', '\n'), 148 | ('COLON', ':'), 149 | ('ENDL', '\n'), 150 | ('NAME', 'a'), 151 | ('ENDL', '\n'), 152 | ('COMMA', ','), 153 | ('ENDL', '\n'), 154 | ('NAME', 'a'), 155 | ('ENDL', '\n'), 156 | ('RIGHT_BRACKET', '}'), 157 | ]) == [ 158 | ('LEFT_BRACKET', '{', [], [('ENDL', '\n')]), 159 | ('NAME', 'a'), 160 | ('COLON', ':', [('ENDL', '\n')], [('ENDL', '\n')]), 161 | ('NAME', 'a'), 162 | ('COMMA', ',', [('ENDL', '\n')], [('ENDL', '\n')]), 163 | ('NAME', 'a'), 164 | ('RIGHT_BRACKET', '}', [('ENDL', '\n')], []), 165 | ] 166 | 167 | 168 | def test_number_backslash(): 169 | assert group([ 170 | ('INT', '3'), 171 | ('SPACE', '\\'), 172 | ]) == [ 173 | ('INT', '3'), 174 | ('SPACE', '\\'), 175 | ] 176 | 177 | 178 | def test_number_backslash_newline(): 179 | assert group([ 180 | ('INT', '3'), 181 | ('SPACE', '\\\n'), 182 | ]) == [ 183 | ('INT', '3'), 184 | ('SPACE', '\\\n'), 185 | ] 186 | 187 | 188 | def test_nested_grouping_after_endl(): 189 | """ 190 | (b 191 | [0]) 192 | """ 193 | assert group([ 194 | ('LEFT_PARENTHESIS', '('), 195 | ('NAME', 'b'), 196 | ('ENDL', '\n'), 197 | ('SPACE', ' '), 198 | ('LEFT_SQUARE_BRACKET', '['), 199 | ('INT', '0'), 200 | ('RIGHT_SQUARE_BRACKET', ']'), 201 | ('RIGHT_PARENTHESIS', ')'), 202 | ]) == [ 203 | ('LEFT_PARENTHESIS', '('), 204 | ('NAME', 'b'), 205 | ('LEFT_SQUARE_BRACKET', '[', [('ENDL', '\n'), ('SPACE', ' ')], []), 206 | ('INT', '0'), 207 | ('RIGHT_SQUARE_BRACKET', ']'), 208 | ('RIGHT_PARENTHESIS', ')'), 209 | ] 210 | 211 | 212 | def test_equal(): 213 | """ 214 | (a = b) 215 | """ 216 | assert group([ 217 | ('LEFT_PARENTHESIS', '('), 218 | ('NAME', 'a'), 219 | ('SPACE', ' '), 220 | ('EQUAL', '='), 221 | ('SPACE', ' '), 222 | ('RIGHT_PARENTHESIS', ')'), 223 | ]) == [ 224 | ('LEFT_PARENTHESIS', '('), 225 | ('NAME', 'a'), 226 | ('EQUAL', '=', [('SPACE', ' ')], [('SPACE', ' ')]), 227 | ('RIGHT_PARENTHESIS', ')'), 228 | ] 229 | -------------------------------------------------------------------------------- /grammar/reference_grammar_python3.3: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # Note: Changing the grammar specified in this file will most likely 4 | # require corresponding changes in the parser module 5 | # (../Modules/parsermodule.c). If you can't make the changes to 6 | # that module yourself, please co-ordinate the required changes 7 | # with someone who can; ask around on python-dev for help. Fred 8 | # Drake will probably be listening there. 9 | 10 | # NOTE WELL: You should also follow all the steps listed in PEP 306, 11 | # "How to Change Python's Grammar" 12 | 13 | # Start symbols for the grammar: 14 | # single_input is a single interactive statement; 15 | # file_input is a module or sequence of commands read from an input file; 16 | # eval_input is the input for the eval() functions. 17 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 18 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 19 | file_input: (NEWLINE | stmt)* ENDMARKER 20 | eval_input: testlist NEWLINE* ENDMARKER 21 | 22 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 23 | decorators: decorator+ 24 | decorated: decorators (classdef | funcdef) 25 | funcdef: 'def' NAME parameters ['->' test] ':' suite 26 | parameters: '(' [typedargslist] ')' 27 | typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' 28 | ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] 29 | | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) 30 | tfpdef: NAME [':' test] 31 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' 32 | ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] 33 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) 34 | vfpdef: NAME 35 | 36 | stmt: simple_stmt | compound_stmt 37 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 38 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 39 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 40 | expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | 41 | ('=' (yield_expr|testlist_star_expr))*) 42 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 43 | augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 44 | '<<=' | '>>=' | '**=' | '//=') 45 | # For normal assignments, additional restrictions enforced by the interpreter 46 | del_stmt: 'del' exprlist 47 | pass_stmt: 'pass' 48 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 49 | break_stmt: 'break' 50 | continue_stmt: 'continue' 51 | return_stmt: 'return' [testlist] 52 | yield_stmt: yield_expr 53 | raise_stmt: 'raise' [test ['from' test]] 54 | import_stmt: import_name | import_from 55 | import_name: 'import' dotted_as_names 56 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 57 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 58 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 59 | import_as_name: NAME ['as' NAME] 60 | dotted_as_name: dotted_name ['as' NAME] 61 | import_as_names: import_as_name (',' import_as_name)* [','] 62 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 63 | dotted_name: NAME ('.' NAME)* 64 | global_stmt: 'global' NAME (',' NAME)* 65 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 66 | assert_stmt: 'assert' test [',' test] 67 | 68 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated 69 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 70 | while_stmt: 'while' test ':' suite ['else' ':' suite] 71 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 72 | try_stmt: ('try' ':' suite 73 | ((except_clause ':' suite)+ 74 | ['else' ':' suite] 75 | ['finally' ':' suite] | 76 | 'finally' ':' suite)) 77 | with_stmt: 'with' with_item (',' with_item)* ':' suite 78 | with_item: test ['as' expr] 79 | # NB compile.c makes sure that the default except clause is last 80 | except_clause: 'except' [test ['as' NAME]] 81 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 82 | 83 | test: or_test ['if' or_test 'else' test] | lambdef 84 | test_nocond: or_test | lambdef_nocond 85 | lambdef: 'lambda' [varargslist] ':' test 86 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 87 | or_test: and_test ('or' and_test)* 88 | and_test: not_test ('and' not_test)* 89 | not_test: 'not' not_test | comparison 90 | comparison: expr (comp_op expr)* 91 | # <> isn't actually a valid comparison operator in Python. It's here for the 92 | # sake of a __future__ import described in PEP 401 93 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 94 | star_expr: '*' expr 95 | expr: xor_expr ('|' xor_expr)* 96 | xor_expr: and_expr ('^' and_expr)* 97 | and_expr: shift_expr ('&' shift_expr)* 98 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 99 | arith_expr: term (('+'|'-') term)* 100 | term: factor (('*'|'/'|'%'|'//') factor)* 101 | factor: ('+'|'-'|'~') factor | power 102 | power: atom trailer* ['**' factor] 103 | atom: ('(' [yield_expr|testlist_comp] ')' | 104 | '[' [testlist_comp] ']' | 105 | '{' [dictorsetmaker] '}' | 106 | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') 107 | testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 108 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 109 | subscriptlist: subscript (',' subscript)* [','] 110 | subscript: test | [test] ':' [test] [sliceop] 111 | sliceop: ':' [test] 112 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 113 | testlist: test (',' test)* [','] 114 | dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | 115 | (test (comp_for | (',' test)* [','])) ) 116 | 117 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 118 | 119 | arglist: (argument ',')* (argument [','] 120 | |'*' test (',' argument)* [',' '**' test] 121 | |'**' test) 122 | # The reason that keywords are test nodes instead of NAME is that using NAME 123 | # results in an ambiguity. ast.c makes sure it's a NAME. 124 | argument: test [comp_for] | test '=' test # Really [keyword '='] test 125 | comp_iter: comp_for | comp_if 126 | comp_for: 'for' exprlist 'in' or_test [comp_iter] 127 | comp_if: 'if' test_nocond [comp_iter] 128 | 129 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 130 | encoding_decl: NAME 131 | 132 | yield_expr: 'yield' [yield_arg] 133 | yield_arg: 'from' test | testlist 134 | -------------------------------------------------------------------------------- /grammar/reference_grammar_python3.4: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # Note: Changing the grammar specified in this file will most likely 4 | # require corresponding changes in the parser module 5 | # (../Modules/parsermodule.c). If you can't make the changes to 6 | # that module yourself, please co-ordinate the required changes 7 | # with someone who can; ask around on python-dev for help. Fred 8 | # Drake will probably be listening there. 9 | 10 | # NOTE WELL: You should also follow all the steps listed at 11 | # https://docs.python.org/devguide/grammar.html 12 | 13 | # Start symbols for the grammar: 14 | # single_input is a single interactive statement; 15 | # file_input is a module or sequence of commands read from an input file; 16 | # eval_input is the input for the eval() functions. 17 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 18 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 19 | file_input: (NEWLINE | stmt)* ENDMARKER 20 | eval_input: testlist NEWLINE* ENDMARKER 21 | 22 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 23 | decorators: decorator+ 24 | decorated: decorators (classdef | funcdef) 25 | funcdef: 'def' NAME parameters ['->' test] ':' suite 26 | parameters: '(' [typedargslist] ')' 27 | typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' 28 | ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] 29 | | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) 30 | tfpdef: NAME [':' test] 31 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' 32 | ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] 33 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) 34 | vfpdef: NAME 35 | 36 | stmt: simple_stmt | compound_stmt 37 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 38 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 39 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 40 | expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | 41 | ('=' (yield_expr|testlist_star_expr))*) 42 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 43 | augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 44 | '<<=' | '>>=' | '**=' | '//=') 45 | # For normal assignments, additional restrictions enforced by the interpreter 46 | del_stmt: 'del' exprlist 47 | pass_stmt: 'pass' 48 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 49 | break_stmt: 'break' 50 | continue_stmt: 'continue' 51 | return_stmt: 'return' [testlist] 52 | yield_stmt: yield_expr 53 | raise_stmt: 'raise' [test ['from' test]] 54 | import_stmt: import_name | import_from 55 | import_name: 'import' dotted_as_names 56 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 57 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 58 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 59 | import_as_name: NAME ['as' NAME] 60 | dotted_as_name: dotted_name ['as' NAME] 61 | import_as_names: import_as_name (',' import_as_name)* [','] 62 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 63 | dotted_name: NAME ('.' NAME)* 64 | global_stmt: 'global' NAME (',' NAME)* 65 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 66 | assert_stmt: 'assert' test [',' test] 67 | 68 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated 69 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 70 | while_stmt: 'while' test ':' suite ['else' ':' suite] 71 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 72 | try_stmt: ('try' ':' suite 73 | ((except_clause ':' suite)+ 74 | ['else' ':' suite] 75 | ['finally' ':' suite] | 76 | 'finally' ':' suite)) 77 | with_stmt: 'with' with_item (',' with_item)* ':' suite 78 | with_item: test ['as' expr] 79 | # NB compile.c makes sure that the default except clause is last 80 | except_clause: 'except' [test ['as' NAME]] 81 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 82 | 83 | test: or_test ['if' or_test 'else' test] | lambdef 84 | test_nocond: or_test | lambdef_nocond 85 | lambdef: 'lambda' [varargslist] ':' test 86 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 87 | or_test: and_test ('or' and_test)* 88 | and_test: not_test ('and' not_test)* 89 | not_test: 'not' not_test | comparison 90 | comparison: expr (comp_op expr)* 91 | # <> isn't actually a valid comparison operator in Python. It's here for the 92 | # sake of a __future__ import described in PEP 401 93 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 94 | star_expr: '*' expr 95 | expr: xor_expr ('|' xor_expr)* 96 | xor_expr: and_expr ('^' and_expr)* 97 | and_expr: shift_expr ('&' shift_expr)* 98 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 99 | arith_expr: term (('+'|'-') term)* 100 | term: factor (('*'|'/'|'%'|'//') factor)* 101 | factor: ('+'|'-'|'~') factor | power 102 | power: atom trailer* ['**' factor] 103 | atom: ('(' [yield_expr|testlist_comp] ')' | 104 | '[' [testlist_comp] ']' | 105 | '{' [dictorsetmaker] '}' | 106 | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') 107 | testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 108 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 109 | subscriptlist: subscript (',' subscript)* [','] 110 | subscript: test | [test] ':' [test] [sliceop] 111 | sliceop: ':' [test] 112 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 113 | testlist: test (',' test)* [','] 114 | dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | 115 | (test (comp_for | (',' test)* [','])) ) 116 | 117 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 118 | 119 | arglist: (argument ',')* (argument [','] 120 | |'*' test (',' argument)* [',' '**' test] 121 | |'**' test) 122 | # The reason that keywords are test nodes instead of NAME is that using NAME 123 | # results in an ambiguity. ast.c makes sure it's a NAME. 124 | argument: test [comp_for] | test '=' test # Really [keyword '='] test 125 | comp_iter: comp_for | comp_if 126 | comp_for: 'for' exprlist 'in' or_test [comp_iter] 127 | comp_if: 'if' test_nocond [comp_iter] 128 | 129 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 130 | encoding_decl: NAME 131 | 132 | yield_expr: 'yield' [yield_arg] 133 | yield_arg: 'from' test | testlist 134 | -------------------------------------------------------------------------------- /baron/grammator_imports.py: -------------------------------------------------------------------------------- 1 | from .utils import create_node_from_token 2 | 3 | 4 | def include_imports(pg): 5 | @pg.production("small_stmt : import") 6 | @pg.production("small_stmt : from_import") 7 | def separator(pack): 8 | (statement,) = pack 9 | return statement 10 | 11 | @pg.production("import : IMPORT dotted_as_names") 12 | def importeu(pack): 13 | (import_, dotted_as_names) = pack 14 | return { 15 | "type": "import", 16 | "value": dotted_as_names, 17 | "first_formatting": import_.hidden_tokens_before, 18 | "second_formatting": import_.hidden_tokens_after 19 | } 20 | 21 | @pg.production("from_import : FROM dotted_name IMPORT from_import_target") 22 | def from_import_with_space(pack): 23 | (from_, dotted_name, import_, from_import_target) = pack 24 | return { 25 | "type": "from_import", 26 | "targets": from_import_target, 27 | "first_formatting": from_.hidden_tokens_after, 28 | "second_formatting": import_.hidden_tokens_before, 29 | "third_formatting": import_.hidden_tokens_after, 30 | "value": dotted_name 31 | } 32 | 33 | @pg.production("from_import_target : name_as_names") 34 | def from_import_target_name_as_names(pack): 35 | (name_as_names,) = pack 36 | return name_as_names 37 | 38 | @pg.production("from_import_target : LEFT_PARENTHESIS name_as_names RIGHT_PARENTHESIS") 39 | def from_import_parenthesis(pack): 40 | (left_parenthesis, name_as_names, right_parenthesis) = pack 41 | return left_parenthesis.hidden_tokens_before +\ 42 | [{"type": "left_parenthesis", "value": "("}] +\ 43 | left_parenthesis.hidden_tokens_after +\ 44 | name_as_names +\ 45 | right_parenthesis.hidden_tokens_before +\ 46 | [{"type": "right_parenthesis", "value": ")"}] +\ 47 | right_parenthesis.hidden_tokens_after 48 | 49 | @pg.production("from_import_target : STAR") 50 | def from_import_star(pack): 51 | (star,) = pack 52 | return [{ 53 | "type": "star", 54 | "value": "*", 55 | "first_formatting": star.hidden_tokens_before, 56 | "second_formatting": star.hidden_tokens_after 57 | }] 58 | 59 | @pg.production("name_as_names : name_as_names name_as_name") 60 | def name_as_names_name_as_name(pack): 61 | (name_as_names, name_as_name) = pack 62 | return name_as_names + name_as_name 63 | 64 | @pg.production("name_as_names : name_as_name") 65 | def name_as_names(pack): 66 | (name_as_name,) = pack 67 | return name_as_name 68 | 69 | @pg.production("name_as_name : NAME AS NAME") 70 | def name_as_name_name_as_name(pack): 71 | (name, as_, name2) = pack 72 | return [{ 73 | "type": "name_as_name", 74 | "value": name.value, 75 | "first_formatting": as_.hidden_tokens_before, 76 | "second_formatting": as_.hidden_tokens_after, 77 | "target": name2.value 78 | }] 79 | 80 | @pg.production("name_as_name : NAME") 81 | def name_as_name_name(pack): 82 | (name,) = pack 83 | return [{ 84 | "type": "name_as_name", 85 | "value": name.value, 86 | "target": "", 87 | "first_formatting": [], 88 | "second_formatting": [] 89 | }] 90 | 91 | @pg.production("name_as_name : NAME SPACE") 92 | def name_as_name_name_space(pack): 93 | (name, space) = pack 94 | return [{ 95 | "type": "name_as_name", 96 | "target": None, 97 | "value": name.value, 98 | "first_formatting": [], 99 | "second_formatting": [] 100 | }] + [create_node_from_token(space)] 101 | 102 | @pg.production("name_as_name : comma") 103 | def name_as_name_comma_space(pack): 104 | (comma,) = pack 105 | return [comma] 106 | 107 | @pg.production("dotted_as_names : dotted_as_names comma dotted_as_name") 108 | def dotted_as_names_dotted_as_names_dotted_as_name(pack): 109 | (dotted_as_names, comma, dotted_as_names2) = pack 110 | return dotted_as_names + [comma] + dotted_as_names2 111 | 112 | @pg.production("dotted_as_names : dotted_as_name") 113 | def dotted_as_names_dotted_as_name(pack): 114 | (dotted_as_name,) = pack 115 | return dotted_as_name 116 | 117 | @pg.production("dotted_as_name : dotted_name AS NAME") 118 | def dotted_as_name_as(pack): 119 | (dotted_name, as_, name) = pack 120 | return [{ 121 | "type": "dotted_as_name", 122 | "value": dotted_name, 123 | "first_formatting": as_.hidden_tokens_before, 124 | "second_formatting": as_.hidden_tokens_after, 125 | "target": name.value, 126 | }] 127 | 128 | @pg.production("dotted_as_name : dotted_name") 129 | def dotted_as_name(pack): 130 | (dotted_name,) = pack 131 | return [{ 132 | "type": "dotted_as_name", 133 | "value": dotted_name, 134 | "first_formatting": [], 135 | "second_formatting": [], 136 | "target": "" 137 | }] 138 | 139 | @pg.production("dotted_name : dotted_name dotted_name_element") 140 | def dotted_name_elements_element(pack): 141 | (dotted_name, dotted_name_element) = pack 142 | return dotted_name + dotted_name_element 143 | 144 | @pg.production("dotted_name : dotted_name_element") 145 | def dotted_name_element(pack): 146 | (dotted_name_element,) = pack 147 | return dotted_name_element 148 | 149 | @pg.production("dotted_name_element : NAME") 150 | @pg.production("dotted_name_element : SPACE") 151 | def dotted_name(pack): 152 | (token,) = pack 153 | return [create_node_from_token(token)] 154 | 155 | @pg.production("dotted_name_element : DOT") 156 | def dotted_name_dot(pack): 157 | (dot,) = pack 158 | return [{ 159 | "type": "dot", 160 | "first_formatting": dot.hidden_tokens_before, 161 | "second_formatting": dot.hidden_tokens_after, 162 | }] 163 | 164 | @pg.production("dotted_name_element : ELLIPSIS") 165 | def dotted_name_dot_dot_dot(pack): 166 | ellipsis = pack[0] 167 | return [{ 168 | "type": "ellipsis", 169 | "first_formatting": ellipsis.hidden_tokens_before, 170 | "second_formatting": ellipsis.hidden_tokens_after, 171 | }] 172 | -------------------------------------------------------------------------------- /grammar/reference_grammar_python3.7: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # NOTE WELL: You should also follow all the steps listed at 4 | # https://devguide.python.org/grammar/ 5 | 6 | # Start symbols for the grammar: 7 | # single_input is a single interactive statement; 8 | # file_input is a module or sequence of commands read from an input file; 9 | # eval_input is the input for the eval() functions. 10 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 11 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 12 | file_input: (NEWLINE | stmt)* ENDMARKER 13 | eval_input: testlist NEWLINE* ENDMARKER 14 | 15 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 16 | decorators: decorator+ 17 | decorated: decorators (classdef | funcdef | async_funcdef) 18 | 19 | async_funcdef: 'async' funcdef 20 | funcdef: 'def' NAME parameters ['->' test] ':' suite 21 | 22 | parameters: '(' [typedargslist] ')' 23 | typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ 24 | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 25 | | '**' tfpdef [',']]] 26 | | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 27 | | '**' tfpdef [',']) 28 | tfpdef: NAME [':' test] 29 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ 30 | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 31 | | '**' vfpdef [',']]] 32 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 33 | | '**' vfpdef [','] 34 | ) 35 | vfpdef: NAME 36 | 37 | stmt: simple_stmt | compound_stmt 38 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 39 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 40 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 41 | expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | 42 | ('=' (yield_expr|testlist_star_expr))*) 43 | annassign: ':' test ['=' test] 44 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 45 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | 46 | '<<=' | '>>=' | '**=' | '//=') 47 | # For normal and annotated assignments, additional restrictions enforced by the interpreter 48 | del_stmt: 'del' exprlist 49 | pass_stmt: 'pass' 50 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 51 | break_stmt: 'break' 52 | continue_stmt: 'continue' 53 | return_stmt: 'return' [testlist] 54 | yield_stmt: yield_expr 55 | raise_stmt: 'raise' [test ['from' test]] 56 | import_stmt: import_name | import_from 57 | import_name: 'import' dotted_as_names 58 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 59 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 60 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 61 | import_as_name: NAME ['as' NAME] 62 | dotted_as_name: dotted_name ['as' NAME] 63 | import_as_names: import_as_name (',' import_as_name)* [','] 64 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 65 | dotted_name: NAME ('.' NAME)* 66 | global_stmt: 'global' NAME (',' NAME)* 67 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 68 | assert_stmt: 'assert' test [',' test] 69 | 70 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt 71 | async_stmt: 'async' (funcdef | with_stmt | for_stmt) 72 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 73 | while_stmt: 'while' test ':' suite ['else' ':' suite] 74 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 75 | try_stmt: ('try' ':' suite 76 | ((except_clause ':' suite)+ 77 | ['else' ':' suite] 78 | ['finally' ':' suite] | 79 | 'finally' ':' suite)) 80 | with_stmt: 'with' with_item (',' with_item)* ':' suite 81 | with_item: test ['as' expr] 82 | # NB compile.c makes sure that the default except clause is last 83 | except_clause: 'except' [test ['as' NAME]] 84 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 85 | 86 | test: or_test ['if' or_test 'else' test] | lambdef 87 | test_nocond: or_test | lambdef_nocond 88 | lambdef: 'lambda' [varargslist] ':' test 89 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 90 | or_test: and_test ('or' and_test)* 91 | and_test: not_test ('and' not_test)* 92 | not_test: 'not' not_test | comparison 93 | comparison: expr (comp_op expr)* 94 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 95 | star_expr: '*' expr 96 | expr: xor_expr ('|' xor_expr)* 97 | xor_expr: and_expr ('^' and_expr)* 98 | and_expr: shift_expr ('&' shift_expr)* 99 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 100 | arith_expr: term (('+'|'-') term)* 101 | term: factor (('*'|'@'|'/'|'%'|'//') factor)* 102 | factor: ('+'|'-'|'~') factor | power 103 | power: atom_expr ['**' factor] 104 | atom_expr: ['await'] atom trailer* 105 | atom: ('(' [yield_expr|testlist_comp] ')' | 106 | '[' [testlist_comp] ']' | 107 | '{' [dictorsetmaker] '}' | 108 | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') 109 | testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 110 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 111 | subscriptlist: subscript (',' subscript)* [','] 112 | subscript: test | [test] ':' [test] [sliceop] 113 | sliceop: ':' [test] 114 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 115 | testlist: test (',' test)* [','] 116 | dictorsetmaker: ( ((test ':' test | '**' expr) 117 | (comp_for | (',' (test ':' test | '**' expr))* [','])) | 118 | ((test | star_expr) 119 | (comp_for | (',' (test | star_expr))* [','])) ) 120 | 121 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 122 | 123 | arglist: argument (',' argument)* [','] 124 | 125 | # The reason that keywords are test nodes instead of NAME is that using NAME 126 | # results in an ambiguity. ast.c makes sure it's a NAME. 127 | # "test '=' test" is really "keyword '=' test", but we have no such token. 128 | # These need to be in a single rule to avoid grammar that is ambiguous 129 | # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, 130 | # we explicitly match '*' here, too, to give it proper precedence. 131 | # Illegal combinations and orderings are blocked in ast.c: 132 | # multiple (test comp_for) arguments are blocked; keyword unpackings 133 | # that precede iterable unpackings are blocked; etc. 134 | argument: ( test [comp_for] | 135 | test '=' test | 136 | '**' test | 137 | '*' test ) 138 | 139 | comp_iter: comp_for | comp_if 140 | sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] 141 | comp_for: ['async'] sync_comp_for 142 | comp_if: 'if' test_nocond [comp_iter] 143 | 144 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 145 | encoding_decl: NAME 146 | 147 | yield_expr: 'yield' [yield_arg] 148 | yield_arg: 'from' test | testlist 149 | -------------------------------------------------------------------------------- /baron/parser.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | import json 4 | import stat 5 | import tempfile 6 | import warnings 7 | 8 | from .token import BaronToken 9 | 10 | from rply import ParserGenerator 11 | from rply.parser import LRParser 12 | from rply.parsergenerator import LRTable 13 | from rply.errors import ParserGeneratorWarning 14 | from rply.grammar import Grammar 15 | from .utils import BaronError 16 | 17 | 18 | class ParsingError(BaronError): 19 | pass 20 | 21 | 22 | class BaronParserGenerator(ParserGenerator): 23 | def build(self): 24 | g = Grammar(self.tokens) 25 | 26 | for level, (assoc, terms) in enumerate(self.precedence, 1): 27 | for term in terms: 28 | g.set_precedence(term, assoc, level) 29 | 30 | for prod_name, syms, func, precedence in self.productions: 31 | g.add_production(prod_name, syms, func, precedence) 32 | 33 | g.set_start() 34 | 35 | for unused_term in g.unused_terminals(): 36 | warnings.warn( 37 | "Token %r is unused" % unused_term, 38 | ParserGeneratorWarning, 39 | stacklevel=2 40 | ) 41 | for unused_prod in g.unused_productions(): 42 | warnings.warn( 43 | "Production %r is not reachable" % unused_prod, 44 | ParserGeneratorWarning, 45 | stacklevel=2 46 | ) 47 | 48 | g.build_lritems() 49 | g.compute_first() 50 | g.compute_follow() 51 | 52 | # win32 temp directories are already per-user 53 | if os.name == "nt": 54 | cache_file = os.path.join( 55 | tempfile.gettempdir(), 56 | "rply-%s-%s-%s.json" % (self.VERSION, self.cache_id, self.compute_grammar_hash(g)) 57 | ) 58 | else: 59 | cache_file = os.path.join( 60 | tempfile.gettempdir(), 61 | "rply-%s-%s-%s-%s.json" % (self.VERSION, os.getuid(), self.cache_id, self.compute_grammar_hash(g)) 62 | ) 63 | table = None 64 | if os.path.exists(cache_file): 65 | with open(cache_file) as f: 66 | try: 67 | data = json.load(f) 68 | except Exception: 69 | os.remove(cache_file) 70 | data = None 71 | 72 | if data is not None: 73 | stat_result = os.fstat(f.fileno()) 74 | if ( 75 | os.name == "nt" or ( 76 | stat_result.st_uid == os.getuid() 77 | and stat.S_IMODE(stat_result.st_mode) == 0o0600 78 | ) 79 | ): 80 | if self.data_is_valid(g, data): 81 | table = LRTable.from_cache(g, data) 82 | 83 | if table is None: 84 | table = LRTable.from_grammar(g) 85 | try: 86 | fd = os.open(cache_file, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o0600) 87 | except OSError as e: 88 | if e.errno != errno.EEXIST: 89 | raise 90 | else: 91 | with os.fdopen(fd, "w") as f: 92 | json.dump(self.serialize_table(table), f) 93 | # meh :( 94 | # if table.sr_conflicts: 95 | # warnings.warn( 96 | # "%d shift/reduce conflict%s" % (len(table.sr_conflicts), "s" if len(table.sr_conflicts) > 1 else ""), 97 | # ParserGeneratorWarning, 98 | # stacklevel=2, 99 | # ) 100 | # if table.rr_conflicts: 101 | # warnings.warn( 102 | # "%d reduce/reduce conflict%s" % (len(table.rr_conflicts), "s" if len(table.rr_conflicts) > 1 else ""), 103 | # ParserGeneratorWarning, 104 | # stacklevel=2, 105 | # ) 106 | return BaronLRParser(table, self.error_handler) 107 | 108 | 109 | class BaronLRParser(LRParser): 110 | def parse(self, tokenizer, state=None): 111 | lookahead = None 112 | lookaheadstack = [] 113 | 114 | statestack = [0] 115 | symstack = [BaronToken("$end", "$end")] 116 | 117 | current_state = 0 118 | 119 | parsed_file_content = "" 120 | 121 | while True: 122 | if self.lr_table.default_reductions[current_state]: 123 | t = self.lr_table.default_reductions[current_state] 124 | current_state = self._reduce_production(t, symstack, statestack, state) 125 | continue 126 | 127 | if lookahead is None: 128 | if lookaheadstack: 129 | lookahead = lookaheadstack.pop() 130 | else: 131 | try: 132 | lookahead = next(tokenizer) 133 | except StopIteration: 134 | lookahead = None 135 | 136 | if lookahead is None: 137 | lookahead = BaronToken("$end", "$end") 138 | else: 139 | parsed_file_content += lookahead.render() 140 | 141 | ltype = lookahead.gettokentype() 142 | if ltype in self.lr_table.lr_action[current_state]: 143 | t = self.lr_table.lr_action[current_state][ltype] 144 | if t > 0: 145 | statestack.append(t) 146 | current_state = t 147 | symstack.append(lookahead) 148 | lookahead = None 149 | continue 150 | elif t < 0: 151 | current_state = self._reduce_production(t, symstack, statestack, state) 152 | continue 153 | else: 154 | n = symstack[-1] 155 | return n 156 | else: 157 | debug_output = parsed_file_content.split("\n") 158 | debug_output = list(zip(range(1, len(debug_output) + 1), debug_output)) 159 | debug_output = debug_output[-8:] 160 | debug_output = "\n".join(["%4s %s" % (x[0], x[1]) for x in debug_output]) 161 | debug_output += "<---- here" 162 | debug_output = "Error, got an unexpected token %s here:\n\n" % ltype + debug_output 163 | debug_output += "\n\nThe token %s should be one of those: %s" % (ltype, ", ".join(sorted(self.lr_table.lr_action[current_state].keys()))) 164 | debug_output += "\n\nBaron has failed to parse this input. If this is valid python code (and by that I mean that the python binary successfully parse this code without any syntax error) (also consider that baron does not yet parse python 3 code integrally) it would be kind if you can extract a snippet of your code that make Baron fails and open a bug here: https://github.com/PyCQA/baron/issues\n\nSorry for the inconvenience." 165 | raise ParsingError(debug_output) 166 | -------------------------------------------------------------------------------- /grammar/reference_grammar_python3.6: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # NOTE WELL: You should also follow all the steps listed at 4 | # https://docs.python.org/devguide/grammar.html 5 | 6 | # Start symbols for the grammar: 7 | # single_input is a single interactive statement; 8 | # file_input is a module or sequence of commands read from an input file; 9 | # eval_input is the input for the eval() functions. 10 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 11 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 12 | file_input: (NEWLINE | stmt)* ENDMARKER 13 | eval_input: testlist NEWLINE* ENDMARKER 14 | 15 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 16 | decorators: decorator+ 17 | decorated: decorators (classdef | funcdef | async_funcdef) 18 | 19 | async_funcdef: ASYNC funcdef 20 | funcdef: 'def' NAME parameters ['->' test] ':' suite 21 | 22 | parameters: '(' [typedargslist] ')' 23 | typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' 24 | ['*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 25 | | '**' tfpdef [',']]] 26 | | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 27 | | '**' tfpdef [',']) 28 | tfpdef: NAME [':' test] 29 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' 30 | ['*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 31 | | '**' vfpdef [',']]] 32 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 33 | | '**' vfpdef [','] 34 | ) 35 | vfpdef: NAME 36 | 37 | stmt: simple_stmt | compound_stmt 38 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 39 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 40 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 41 | expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | 42 | ('=' (yield_expr|testlist_star_expr))*) 43 | annassign: ':' test ['=' test] 44 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 45 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | 46 | '<<=' | '>>=' | '**=' | '//=') 47 | # For normal and annotated assignments, additional restrictions enforced by the interpreter 48 | del_stmt: 'del' exprlist 49 | pass_stmt: 'pass' 50 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 51 | break_stmt: 'break' 52 | continue_stmt: 'continue' 53 | return_stmt: 'return' [testlist] 54 | yield_stmt: yield_expr 55 | raise_stmt: 'raise' [test ['from' test]] 56 | import_stmt: import_name | import_from 57 | import_name: 'import' dotted_as_names 58 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 59 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 60 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 61 | import_as_name: NAME ['as' NAME] 62 | dotted_as_name: dotted_name ['as' NAME] 63 | import_as_names: import_as_name (',' import_as_name)* [','] 64 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 65 | dotted_name: NAME ('.' NAME)* 66 | global_stmt: 'global' NAME (',' NAME)* 67 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 68 | assert_stmt: 'assert' test [',' test] 69 | 70 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt 71 | async_stmt: ASYNC (funcdef | with_stmt | for_stmt) 72 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 73 | while_stmt: 'while' test ':' suite ['else' ':' suite] 74 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 75 | try_stmt: ('try' ':' suite 76 | ((except_clause ':' suite)+ 77 | ['else' ':' suite] 78 | ['finally' ':' suite] | 79 | 'finally' ':' suite)) 80 | with_stmt: 'with' with_item (',' with_item)* ':' suite 81 | with_item: test ['as' expr] 82 | # NB compile.c makes sure that the default except clause is last 83 | except_clause: 'except' [test ['as' NAME]] 84 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 85 | 86 | test: or_test ['if' or_test 'else' test] | lambdef 87 | test_nocond: or_test | lambdef_nocond 88 | lambdef: 'lambda' [varargslist] ':' test 89 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 90 | or_test: and_test ('or' and_test)* 91 | and_test: not_test ('and' not_test)* 92 | not_test: 'not' not_test | comparison 93 | comparison: expr (comp_op expr)* 94 | # <> isn't actually a valid comparison operator in Python. It's here for the 95 | # sake of a __future__ import described in PEP 401 (which really works :-) 96 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 97 | star_expr: '*' expr 98 | expr: xor_expr ('|' xor_expr)* 99 | xor_expr: and_expr ('^' and_expr)* 100 | and_expr: shift_expr ('&' shift_expr)* 101 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 102 | arith_expr: term (('+'|'-') term)* 103 | term: factor (('*'|'@'|'/'|'%'|'//') factor)* 104 | factor: ('+'|'-'|'~') factor | power 105 | power: atom_expr ['**' factor] 106 | atom_expr: [AWAIT] atom trailer* 107 | atom: ('(' [yield_expr|testlist_comp] ')' | 108 | '[' [testlist_comp] ']' | 109 | '{' [dictorsetmaker] '}' | 110 | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') 111 | testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 112 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 113 | subscriptlist: subscript (',' subscript)* [','] 114 | subscript: test | [test] ':' [test] [sliceop] 115 | sliceop: ':' [test] 116 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 117 | testlist: test (',' test)* [','] 118 | dictorsetmaker: ( ((test ':' test | '**' expr) 119 | (comp_for | (',' (test ':' test | '**' expr))* [','])) | 120 | ((test | star_expr) 121 | (comp_for | (',' (test | star_expr))* [','])) ) 122 | 123 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 124 | 125 | arglist: argument (',' argument)* [','] 126 | 127 | # The reason that keywords are test nodes instead of NAME is that using NAME 128 | # results in an ambiguity. ast.c makes sure it's a NAME. 129 | # "test '=' test" is really "keyword '=' test", but we have no such token. 130 | # These need to be in a single rule to avoid grammar that is ambiguous 131 | # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, 132 | # we explicitly match '*' here, too, to give it proper precedence. 133 | # Illegal combinations and orderings are blocked in ast.c: 134 | # multiple (test comp_for) arguments are blocked; keyword unpackings 135 | # that precede iterable unpackings are blocked; etc. 136 | argument: ( test [comp_for] | 137 | test '=' test | 138 | '**' test | 139 | '*' test ) 140 | 141 | comp_iter: comp_for | comp_if 142 | comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter] 143 | comp_if: 'if' test_nocond [comp_iter] 144 | 145 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 146 | encoding_decl: NAME 147 | 148 | yield_expr: 'yield' [yield_arg] 149 | yield_arg: 'from' test | testlist 150 | -------------------------------------------------------------------------------- /grammar/baron_grammar: -------------------------------------------------------------------------------- 1 | # Grammar for Baron 2 | # this file is a REFERENCE on what grammar baron is supposed to be implementing 3 | # to be a mixed between python2 and python3 grammar 4 | 5 | # IT IS NOT USED BY BARON DIRECTLY 6 | 7 | # Start symbols for the grammar: 8 | # single_input is a single interactive statement; 9 | # file_input is a module or sequence of commands read from an input file; 10 | # eval_input is the input for the eval() functions. 11 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 12 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 13 | file_input: (NEWLINE | stmt)* ENDMARKER 14 | eval_input: testlist NEWLINE* ENDMARKER 15 | 16 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 17 | decorators: decorator+ 18 | decorated: decorators (classdef | funcdef | async_funcdef) 19 | 20 | async_funcdef: 'async' funcdef 21 | funcdef: 'def' NAME parameters ['->' test] ':' suite 22 | 23 | parameters: '(' [typedargslist] ')' 24 | typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ 25 | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 26 | | '**' tfpdef [',']]] 27 | | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 28 | | '**' tfpdef [',']) 29 | tfpdef: NAME [':' test] 30 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ 31 | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 32 | | '**' vfpdef [',']]] 33 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 34 | | '**' vfpdef [','] 35 | ) 36 | vfpdef: NAME 37 | 38 | stmt: simple_stmt | compound_stmt 39 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 40 | small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | 41 | import_stmt | global_stmt | nonlocal_stmt | exec_stmt | assert_stmt) 42 | expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | 43 | ('=' (yield_expr|testlist_star_expr))*) 44 | annassign: ':' test ['=' test] 45 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 46 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | 47 | '<<=' | '>>=' | '**=' | '//=') 48 | # For normal assignments, additional restrictions enforced by the interpreter 49 | print_stmt: 'print' ( [ test (',' test)* [','] ] | 50 | '>>' test [ (',' test)+ [','] ] ) 51 | del_stmt: 'del' exprlist 52 | pass_stmt: 'pass' 53 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 54 | break_stmt: 'break' 55 | continue_stmt: 'continue' 56 | return_stmt: 'return' [testlist] 57 | yield_stmt: yield_expr 58 | raise_stmt: 'raise' [test [(',' test [',' test] | 'from' test)] 59 | import_stmt: import_name | import_from 60 | import_name: 'import' dotted_as_names 61 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 62 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 63 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 64 | import_as_name: NAME ['as' NAME] 65 | dotted_as_name: dotted_name ['as' NAME] 66 | import_as_names: import_as_name (',' import_as_name)* [','] 67 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 68 | dotted_name: NAME ('.' NAME)* 69 | global_stmt: 'global' NAME (',' NAME)* 70 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 71 | exec_stmt: 'exec' expr ['in' test [',' test]] 72 | assert_stmt: 'assert' test [',' test] 73 | 74 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt 75 | async_stmt: 'async' (funcdef | with_stmt | for_stmt) 76 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 77 | while_stmt: 'while' test ':' suite ['else' ':' suite] 78 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 79 | try_stmt: ('try' ':' suite 80 | ((except_clause ':' suite)+ 81 | ['else' ':' suite] 82 | ['finally' ':' suite] | 83 | 'finally' ':' suite)) 84 | with_stmt: 'with' with_item (',' with_item)* ':' suite 85 | with_item: test ['as' expr] 86 | # NB compile.c makes sure that the default except clause is last 87 | except_clause: 'except' [test [('as' | ',') test]] 88 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 89 | 90 | # Backward compatibility cruft to support: 91 | # [ x for x in lambda: True, lambda: False if x() ] 92 | # even while also allowing: 93 | # lambda x: 5 if x else 2 94 | # (But not a mix of the two) 95 | testlist_safe: old_test [(',' old_test)+ [',']] 96 | old_test: or_test | old_lambdef 97 | old_lambdef: 'lambda' [varargslist] ':' old_test 98 | 99 | test: or_test ['if' or_test 'else' test] | lambdef 100 | lambdef: 'lambda' [varargslist] ':' test 101 | or_test: and_test ('or' and_test)* 102 | and_test: not_test ('and' not_test)* 103 | not_test: 'not' not_test | comparison 104 | comparison: expr (comp_op expr)* 105 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 106 | expr: xor_expr ('|' xor_expr)* 107 | xor_expr: and_expr ('^' and_expr)* 108 | and_expr: shift_expr ('&' shift_expr)* 109 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 110 | arith_expr: term (('+'|'-') term)* 111 | term: factor (('*'|'@'|'/'|'%'|'//') factor)* 112 | factor: ('+'|'-'|'~') factor | power 113 | power: atom_expr ['**' factor] 114 | atom_expr: ['await'] atom trailer* 115 | atom: ('(' [yield_expr|testlist_comp] ')' | 116 | '[' [listmaker] ']' | 117 | '{' [dictorsetmaker] '}' | 118 | '`' testlist1 '`' | 119 | NAME | NUMBER | STRING+ | '...') 120 | testlist_comp: test ( comp_for | (',' test)* [','] ) 121 | listmaker: test ( list_for | (',' test)* [','] ) 122 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 123 | subscriptlist: subscript (',' subscript)* [','] 124 | subscript: test | [test] ':' [test] [sliceop] 125 | sliceop: ':' [test] 126 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 127 | testlist: test (',' test)* [','] 128 | dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | 129 | (test (comp_for | (',' test)* [','])) ) 130 | 131 | classdef: 'class' NAME ['(' [(testlist | arglist)] ')'] ':' suite 132 | 133 | arglist: (argument ',')* (argument [','] 134 | |'*' test (',' argument)* [',' '**' test] 135 | |'**' test) 136 | # The reason that keywords are test nodes instead of NAME is that using NAME 137 | # results in an ambiguity. ast.c makes sure it's a NAME. 138 | argument: test [comp_for] | test '=' test 139 | 140 | list_iter: list_for | list_if 141 | list_for: 'for' exprlist 'in' testlist_safe [list_iter] 142 | list_if: 'if' old_test [list_iter] 143 | 144 | comp_iter: comp_for | comp_if 145 | sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] 146 | comp_for: ['async'] sync_comp_for 147 | comp_if: 'if' old_test [comp_iter] 148 | 149 | testlist1: test (',' test)* 150 | 151 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 152 | encoding_decl: NAME 153 | 154 | yield_expr: 'yield' [yield_arg] 155 | yield_arg: 'from' test | testlist 156 | -------------------------------------------------------------------------------- /grammar/reference_grammar_python3.5: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # Note: Changing the grammar specified in this file will most likely 4 | # require corresponding changes in the parser module 5 | # (../Modules/parsermodule.c). If you can't make the changes to 6 | # that module yourself, please co-ordinate the required changes 7 | # with someone who can; ask around on python-dev for help. Fred 8 | # Drake will probably be listening there. 9 | 10 | # NOTE WELL: You should also follow all the steps listed at 11 | # https://docs.python.org/devguide/grammar.html 12 | 13 | # Start symbols for the grammar: 14 | # single_input is a single interactive statement; 15 | # file_input is a module or sequence of commands read from an input file; 16 | # eval_input is the input for the eval() functions. 17 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 18 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 19 | file_input: (NEWLINE | stmt)* ENDMARKER 20 | eval_input: testlist NEWLINE* ENDMARKER 21 | 22 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 23 | decorators: decorator+ 24 | decorated: decorators (classdef | funcdef | async_funcdef) 25 | 26 | async_funcdef: ASYNC funcdef 27 | funcdef: 'def' NAME parameters ['->' test] ':' suite 28 | 29 | parameters: '(' [typedargslist] ')' 30 | typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' 31 | ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] 32 | | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) 33 | tfpdef: NAME [':' test] 34 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' 35 | ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] 36 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) 37 | vfpdef: NAME 38 | 39 | stmt: simple_stmt | compound_stmt 40 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 41 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 42 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 43 | expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | 44 | ('=' (yield_expr|testlist_star_expr))*) 45 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 46 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | 47 | '<<=' | '>>=' | '**=' | '//=') 48 | # For normal assignments, additional restrictions enforced by the interpreter 49 | del_stmt: 'del' exprlist 50 | pass_stmt: 'pass' 51 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 52 | break_stmt: 'break' 53 | continue_stmt: 'continue' 54 | return_stmt: 'return' [testlist] 55 | yield_stmt: yield_expr 56 | raise_stmt: 'raise' [test ['from' test]] 57 | import_stmt: import_name | import_from 58 | import_name: 'import' dotted_as_names 59 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 60 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 61 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 62 | import_as_name: NAME ['as' NAME] 63 | dotted_as_name: dotted_name ['as' NAME] 64 | import_as_names: import_as_name (',' import_as_name)* [','] 65 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 66 | dotted_name: NAME ('.' NAME)* 67 | global_stmt: 'global' NAME (',' NAME)* 68 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 69 | assert_stmt: 'assert' test [',' test] 70 | 71 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt 72 | async_stmt: ASYNC (funcdef | with_stmt | for_stmt) 73 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 74 | while_stmt: 'while' test ':' suite ['else' ':' suite] 75 | for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 76 | try_stmt: ('try' ':' suite 77 | ((except_clause ':' suite)+ 78 | ['else' ':' suite] 79 | ['finally' ':' suite] | 80 | 'finally' ':' suite)) 81 | with_stmt: 'with' with_item (',' with_item)* ':' suite 82 | with_item: test ['as' expr] 83 | # NB compile.c makes sure that the default except clause is last 84 | except_clause: 'except' [test ['as' NAME]] 85 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 86 | 87 | test: or_test ['if' or_test 'else' test] | lambdef 88 | test_nocond: or_test | lambdef_nocond 89 | lambdef: 'lambda' [varargslist] ':' test 90 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 91 | or_test: and_test ('or' and_test)* 92 | and_test: not_test ('and' not_test)* 93 | not_test: 'not' not_test | comparison 94 | comparison: expr (comp_op expr)* 95 | # <> isn't actually a valid comparison operator in Python. It's here for the 96 | # sake of a __future__ import described in PEP 401 (which really works :-) 97 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 98 | star_expr: '*' expr 99 | expr: xor_expr ('|' xor_expr)* 100 | xor_expr: and_expr ('^' and_expr)* 101 | and_expr: shift_expr ('&' shift_expr)* 102 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 103 | arith_expr: term (('+'|'-') term)* 104 | term: factor (('*'|'@'|'/'|'%'|'//') factor)* 105 | factor: ('+'|'-'|'~') factor | power 106 | power: atom_expr ['**' factor] 107 | atom_expr: [AWAIT] atom trailer* 108 | atom: ('(' [yield_expr|testlist_comp] ')' | 109 | '[' [testlist_comp] ']' | 110 | '{' [dictorsetmaker] '}' | 111 | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') 112 | testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 113 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 114 | subscriptlist: subscript (',' subscript)* [','] 115 | subscript: test | [test] ':' [test] [sliceop] 116 | sliceop: ':' [test] 117 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 118 | testlist: test (',' test)* [','] 119 | dictorsetmaker: ( ((test ':' test | '**' expr) 120 | (comp_for | (',' (test ':' test | '**' expr))* [','])) | 121 | ((test | star_expr) 122 | (comp_for | (',' (test | star_expr))* [','])) ) 123 | 124 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 125 | 126 | arglist: argument (',' argument)* [','] 127 | 128 | # The reason that keywords are test nodes instead of NAME is that using NAME 129 | # results in an ambiguity. ast.c makes sure it's a NAME. 130 | # "test '=' test" is really "keyword '=' test", but we have no such token. 131 | # These need to be in a single rule to avoid grammar that is ambiguous 132 | # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, 133 | # we explicitly match '*' here, too, to give it proper precedence. 134 | # Illegal combinations and orderings are blocked in ast.c: 135 | # multiple (test comp_for) arguments are blocked; keyword unpackings 136 | # that precede iterable unpackings are blocked; etc. 137 | argument: ( test [comp_for] | 138 | test '=' test | 139 | '**' test | 140 | '*' test ) 141 | 142 | comp_iter: comp_for | comp_if 143 | comp_for: 'for' exprlist 'in' or_test [comp_iter] 144 | comp_if: 'if' test_nocond [comp_iter] 145 | 146 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 147 | encoding_decl: NAME 148 | 149 | yield_expr: 'yield' [yield_arg] 150 | yield_arg: 'from' test | testlist 151 | -------------------------------------------------------------------------------- /tests/test_grouper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:Utf-8 -*- 3 | 4 | from baron.grouper import group 5 | from baron.spliter import split 6 | 7 | 8 | def grouper_test(input_, split_output, group_output): 9 | assert split(input_) == split_output 10 | assert group(split_output) == group_output 11 | 12 | 13 | def test_empty(): 14 | grouper_test("", [], []) 15 | 16 | 17 | def test_one(): 18 | grouper_test('a', ['a'], ['a']) 19 | 20 | 21 | def test_random(): 22 | assert group(list("abcdef")) == list("abcdef") 23 | 24 | 25 | def test_add_egual(): 26 | grouper_test("+=", ["+", "="], ["+="]) 27 | 28 | 29 | def test_add_add(): 30 | grouper_test("++", ["+", "+"], ["+", "+"]) 31 | 32 | 33 | def test_add_egual_double(): 34 | grouper_test("+=+=", ["+", "=", "+", "="], ["+=", "+="]) 35 | 36 | 37 | def test_add_egual_random(): 38 | grouper_test(" qsd+=qsd", [' ', 'qsd', '+', '=', 'qsd'], [' ', 'qsd', '+=', 'qsd']) 39 | 40 | 41 | def test_minus_egual(): 42 | grouper_test("-=", ["-", "="], ["-="]) 43 | 44 | 45 | def test_mult_egual(): 46 | grouper_test("*=", ["*", "="], ["*="]) 47 | 48 | 49 | def test_div_egual(): 50 | grouper_test("/=", ["/", "="], ["/="]) 51 | 52 | 53 | def test_modulo_egual(): 54 | grouper_test("%=", ["%", "="], ["%="]) 55 | 56 | 57 | def test_amper_egual(): 58 | grouper_test("&=", ["&", "="], ["&="]) 59 | 60 | 61 | def test_bar_egual(): 62 | grouper_test("|=", ["|", "="], ["|="]) 63 | 64 | 65 | def test_power_egual(): 66 | grouper_test("^=", ["^", "="], ["^="]) 67 | 68 | 69 | def test_less_less(): 70 | grouper_test("<<", ["<", "<"], ["<<"]) 71 | 72 | 73 | def test_more_more(): 74 | grouper_test(">>", [">", ">"], [">>"]) 75 | 76 | 77 | def test_egual_egual(): 78 | grouper_test("==", ["=", "="], ["=="]) 79 | 80 | 81 | def test_different(): 82 | grouper_test("!=", ["!", "="], ["!="]) 83 | 84 | 85 | def test_inferior_egual(): 86 | grouper_test(">=", [">", "="], [">="]) 87 | 88 | 89 | def test_superior_egual(): 90 | grouper_test("<=", ["<", "="], ["<="]) 91 | 92 | 93 | def test_different_old_style(): 94 | grouper_test("<>", ["<", ">"], ["<>"]) 95 | 96 | 97 | def test_power_power_egual(): 98 | grouper_test("**=", ["*", "*", "="], ["**="]) 99 | 100 | 101 | def test_div_div_egual(): 102 | grouper_test("//=", ["/", "/", "="], ["//="]) 103 | 104 | 105 | def test_less_less_egual(): 106 | grouper_test("<<=", ["<", "<", "="], ["<<="]) 107 | 108 | 109 | def test_more_more_egual(): 110 | grouper_test(">>=", [">", ">", "="], [">>="]) 111 | 112 | 113 | def test_decorator(): 114 | grouper_test("@pouet", ["@", "pouet"], ["@", "pouet"]) 115 | 116 | 117 | def test_endl(): 118 | grouper_test("\r\n", ["\r", "\n"], ["\r\n"]) 119 | 120 | 121 | def test_raw_string(): 122 | grouper_test("r'pouet'", ["r", "'pouet'"], ["r'pouet'"]) 123 | grouper_test("R'pouet'", ["R", "'pouet'"], ["R'pouet'"]) 124 | 125 | 126 | def test_unicode_string(): 127 | grouper_test("u'pouet'", ["u", "'pouet'"], ["u'pouet'"]) 128 | grouper_test("U'pouet'", ["U", "'pouet'"], ["U'pouet'"]) 129 | 130 | 131 | def test_binary_string(): 132 | grouper_test("b'pouet'", ["b", "'pouet'"], ["b'pouet'"]) 133 | grouper_test("B'pouet'", ["B", "'pouet'"], ["B'pouet'"]) 134 | 135 | 136 | def test_binary_raw_string(): 137 | grouper_test("br'pouet'", ["br", "'pouet'"], ["br'pouet'"]) 138 | grouper_test("Br'pouet'", ["Br", "'pouet'"], ["Br'pouet'"]) 139 | grouper_test("bR'pouet'", ["bR", "'pouet'"], ["bR'pouet'"]) 140 | grouper_test("BR'pouet'", ["BR", "'pouet'"], ["BR'pouet'"]) 141 | 142 | 143 | def test_unicode_raw_string(): 144 | grouper_test("ur'pouet'", ["ur", "'pouet'"], ["ur'pouet'"]) 145 | grouper_test("Ur'pouet'", ["Ur", "'pouet'"], ["Ur'pouet'"]) 146 | grouper_test("uR'pouet'", ["uR", "'pouet'"], ["uR'pouet'"]) 147 | grouper_test("UR'pouet'", ["UR", "'pouet'"], ["UR'pouet'"]) 148 | 149 | 150 | def test_exponant(): 151 | grouper_test("1e+123", ['1e', '+', '123'], ['1e+123']) 152 | grouper_test("1e-123", ['1e', '-', '123'], ['1e-123']) 153 | grouper_test("1.1e+123", ['1', '.', '1e', '+', '123'], ['1.1e+123']) 154 | grouper_test("1.1e-123", ['1', '.', '1e', '-', '123'], ['1.1e-123']) 155 | grouper_test(".1e+123", ['.', '1e', '+', '123'], ['.1e+123']) 156 | grouper_test(".1e-123", ['.', '1e', '-', '123'], ['.1e-123']) 157 | 158 | 159 | def test_endl_with_backslash(): 160 | grouper_test("\\\n", ['\\', '\n'], ['\\\n']) 161 | 162 | 163 | def test_space_endl_with_backslash(): 164 | grouper_test(" \\\n ", [' ', '\\', '\n', ' '], [' \\\n ']) 165 | grouper_test(" \\\npouet", [' ', '\\', '\n', 'pouet'], [' \\\n', 'pouet']) 166 | 167 | 168 | def test_number_with_backslash(): 169 | grouper_test("3\\\n", ['3', '\\', '\n'], ['3', '\\\n']) 170 | 171 | 172 | def test_regression(): 173 | grouper_test("0x045e: ", ['0x045e', ':', ' '], ['0x045e', ':', ' ']) 174 | grouper_test("180.\n", ['180', '.', '\n'], ['180.', '\n']) 175 | 176 | 177 | def test_backslash_window_endl(): 178 | grouper_test("\\\r\n", ['\\', '\r', '\n'], ['\\\r\n']) 179 | 180 | 181 | def test_regression_float(): 182 | grouper_test('1.', ['1', '.'], ['1.']) 183 | grouper_test('.1', ['.', '1'], ['.1']) 184 | grouper_test('1.1', ['1', '.', '1'], ['1.1']) 185 | grouper_test('7.629e-6', ['7', '.', '629e', '-', '6'], ['7.629e-6']) 186 | 187 | 188 | def test_complex(): 189 | grouper_test(".1j", ['.', '1j'], ['.1j']) 190 | grouper_test(".1J", ['.', '1J'], ['.1J']) 191 | grouper_test("1.j", ['1', '.', 'j'], ['1.j']) 192 | grouper_test("1.J", ['1', '.', 'J'], ['1.J']) 193 | grouper_test("1.1j", ['1', '.', '1j'], ['1.1j']) 194 | grouper_test("1.1J", ['1', '.', '1J'], ['1.1J']) 195 | grouper_test("1J", ['1J'], ['1J']) 196 | grouper_test("1e-1j", ['1e', '-', '1j'], ['1e-1j']) 197 | grouper_test("1e1j", ['1e1j'], ['1e1j']) 198 | 199 | 200 | def test_float_exponant(): 201 | grouper_test("1E1", ['1E1'], ['1E1']) 202 | grouper_test("1E-2", ['1E', '-', '2'], ['1E-2']) 203 | grouper_test("1E+2", ['1E', '+', '2'], ['1E+2']) 204 | grouper_test("1.E+2", ['1', '.', 'E', '+', '2'], ['1.E+2']) 205 | grouper_test("1.E-2", ['1', '.', 'E', '-', '2'], ['1.E-2']) 206 | grouper_test("1.E2", ['1', '.', 'E2'], ['1.E2']) 207 | grouper_test("1e1", ['1e1'], ['1e1']) 208 | grouper_test("1e-2", ['1e', '-', '2'], ['1e-2']) 209 | grouper_test("1e+2", ['1e', '+', '2'], ['1e+2']) 210 | grouper_test("1.e+2", ['1', '.', 'e', '+', '2'], ['1.e+2']) 211 | grouper_test("1.e-2", ['1', '.', 'e', '-', '2'], ['1.e-2']) 212 | grouper_test("1.e2", ['1', '.', 'e2'], ['1.e2']) 213 | grouper_test(".3e55", ['.', '3e55'], ['.3e55']) 214 | 215 | 216 | def test_float_with_underscores(): 217 | grouper_test(".098_765", ['.', '098_765'], ['.098_765']) 218 | grouper_test("123.098_765", ['123', '.', '098_765'], ['123.098_765']) 219 | grouper_test("123_456.098", ['123_456', '.', '098'], ['123_456.098']) 220 | grouper_test("123_456.098_765", ['123_456', '.', '098_765'], ['123_456.098_765']) 221 | 222 | 223 | def test_arrow(): 224 | grouper_test("->", ['-', '>'], ['->']) 225 | 226 | 227 | def test_dot_dot(): 228 | grouper_test("..", ['.', '.'], ['.', '.']) 229 | 230 | 231 | def test_dot_dot_dot(): 232 | grouper_test("...", ['.', '.', '.'], ['...']) 233 | 234 | 235 | def test_split_float_notation(): 236 | grouper_test("a._", ["a", ".", "_"], ["a", ".", "_"]) 237 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | Baron is a Full Syntax Tree (FST) library for Python. By opposition to an [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) which 5 | drops some syntax information in the process of its creation (like empty lines, 6 | comments, formatting), a FST keeps everything and guarantees the operation 7 | fst\_to\_code(code\_to\_fst(source\_code)) == source\_code. 8 | 9 | Roadmap 10 | ======= 11 | 12 | Current roadmap is as boring as needed: 13 | 14 | * bug fixs 15 | * new small features (walker pattern, maybe code generation) and performance improvement. 16 | 17 | Installation 18 | ============ 19 | 20 | pip install baron 21 | 22 | Basic Usage 23 | =========== 24 | 25 | ```python 26 | from baron import parse, dumps 27 | 28 | fst = parse(source_code_string) 29 | source_code_string == dumps(fst) 30 | ``` 31 | 32 | Unless you want to do low level things, **use 33 | [RedBaron](https://github.com/PyCQA/redbaron) instead of using Baron 34 | directly**. Think of Baron as the "bytecode of python source code" and RedBaron 35 | as some sort of usable layer on top of it. 36 | 37 | If you don't know what Baron is or don't understand yet why it might be 38 | useful for you, read the [« Why is this important? » section](#why-is-this-important). 39 | 40 | Documentation 41 | ============= 42 | 43 | Baron documentation is available on [Read The Docs](http://baron.readthedocs.io/en/latest/). 44 | 45 | Contributing 46 | ============ 47 | 48 | If you want to implement new grammar elements for newer python versions, here 49 | are the documented steps for that: 50 | https://github.com/PyCQA/baron/blob/master/add_new_grammar.md 51 | 52 | Also note that reviewing most grammar modifications takes several hours of 53 | advanced focusing (we can't really afford bugs here) so don't despair if you PR 54 | seems to be hanging around, sorry for that :/ 55 | 56 | And thanks in advance for your work! 57 | 58 | Financial support 59 | ================= 60 | 61 | Baron and RedBaron are a very advanced piece of engineering that requires a lot 62 | of time of concentration to work on. Until the end of 2018, the development 63 | has been a full volunteer work mostly done by [Bram](https://github.com/psycojoker), 64 | but now, to reach the next level and bring those projects to the stability and 65 | quality you expect, we need your support. 66 | 67 | You can join our contributors and sponsors on our transparent 68 | [OpenCollective](https://opencollective.com/redbaron), every contribution will 69 | count and will be mainly used to work on the projects stability and quality but 70 | also on continuing, on the side, the R&D side of those projects. 71 | 72 | Our supporters 73 | -------------- 74 | 75 | 76 | [![badge with number of supporters at tier I like this, keep going!](https://opencollective.com/redbaron/tiers/i-like-this,-keep-going!/badge.svg) 77 | ![badge with number of supporters at tier it looks cool!](https://opencollective.com/redbaron/tiers/it-looks-cool!/badge.svg) 78 | ![badge with number of supporters at tier Oh god, that saved me so much time!](https://opencollective.com/redbaron/tiers/oh-god,-that-saved-me-so-much-time!/badge.svg)](https://opencollective.com/redbaron/tiers/) 79 | 80 | 81 | Why is this important? 82 | ====================== 83 | 84 | The usage of a FST might not be obvious at first sight so let's consider a 85 | series of problems to illustrate it. Let's say that you want to write a program that will: 86 | 87 | * rename a variable in a source file... without clashing with things that are not a variable (example: stuff inside a string) 88 | * inline a function/method 89 | * extract a function/method from a series of line of code 90 | * split a class into several classes 91 | * split a file into several modules 92 | * convert your whole code base from one ORM to another 93 | * do custom refactoring operation not implemented by IDE/rope 94 | * implement the class browser of smalltalk for python (the whole one where you can edit the code of the methods, not just showing code) 95 | 96 | It is very likely that you will end up with the awkward feeling of writing 97 | clumpsy weak code that is very likely to break because you didn't thought about 98 | all the annoying special cases and the formatting keeps bothering you. You may 99 | end up playing with [ast.py](https://docs.python.org/3/library/ast.html) until 100 | you realize that it removes too much information to be suitable for those 101 | situations. You will probably ditch this task as simply too complicated and 102 | really not worth the effort. You are missing a good abstraction that will take 103 | care of all of the code structure and formatting for you so you can concentrate 104 | on your task. 105 | 106 | The FST tries to be this abstraction. With it you can now work on a tree which 107 | represents your code with its formatting. Moreover, since it is the exact 108 | representation of your code, modifying it and converting it back to a string 109 | will give you back your code only modified where you have modified the tree. 110 | 111 | Said in another way, what I'm trying to achieve with Baron is a paradigm change in 112 | which writing code that will modify code is now a realist task that is worth 113 | the price (I'm not saying a simple task, but a realistic one: it's still a 114 | complex task). 115 | 116 | Other 117 | ----- 118 | 119 | Having a FST (or at least a good abstraction build on it) also makes it easier 120 | to do code generation and code analysis while those two operations are already 121 | quite feasible (using [ast.py](https://docs.python.org/3/library/ast.html) 122 | and a templating engine for example). 123 | 124 | Some technical details 125 | ====================== 126 | 127 | Baron produces a FST in the form of JSON (and by JSON I mean Python lists 128 | and dicts that can be dumped into JSON) for maximum interoperability. 129 | 130 | Baron FST is quite similar to Python AST with some modifications to be more 131 | intuitive to humans, since Python AST has been made for CPython interpreter. 132 | 133 | Since playing directly with JSON is a bit raw I'm going to build an abstraction 134 | on top of it that will looks like BeautifulSoup/jQuery. 135 | 136 | State of the project 137 | ==================== 138 | 139 | Currently, Baron has been tested on the top 100 projects and the FST converts 140 | back exactly into the original source code. So, it can be considered quite 141 | stable, but it is far away from having been battle tested. 142 | 143 | Since the project is very young and no one is already using it except my 144 | project, I'm open to changes of the FST nodes but I will quickly become 145 | conservative once it gets some adoption and will probably accept to 146 | modify it only once or twice in the future with clear indications on how to 147 | migrate. 148 | 149 | Baron is supporting python 2 grammar and up to python 3.7 grammar. 150 | 151 | Tests 152 | ===== 153 | Run either `py.test tests/` or `nosetests` in the baron directory. 154 | 155 | Community 156 | ========= 157 | 158 | You can reach us on [irc.freenode.net#baron](https://webchat.freenode.net/?channels=%23baron) or [irc.freenode.net##python-code-quality](https://webchat.freenode.net/?channels=%23%23python-code-quality). 159 | 160 | Code of Conduct 161 | =============== 162 | 163 | As a member of [PyCQA](https://github.com/PyCQA), Baron follows its [Code of Conduct](http://meta.pycqa.org/en/latest/code-of-conduct.html). 164 | 165 | Misc 166 | ==== 167 | [Old blog post announcing the project.](http://worlddomination.be/blog/2013/the-baron-project-part-1-what-and-why.html) Not that much up to date. 168 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 0.10.1 (2021-12-08) 5 | ----------------- 6 | 7 | - bug fix: in "a._" the "._" part was incorrectly recognized as a float, by bram 8 | 9 | 0.10 (2021-12-08) 10 | ----------------- 11 | 12 | - bug fix: baron is now able to parse "class A(b, c=d): pass" by bram 13 | - some project cleaned and integration of tox with good pratices like flake8 and check-manifest 14 | - bug fix for missing edge case in inner formatting by EhsanKia https://github.com/PyCQA/baron/pull/156 15 | - complet support for float with underscores in them by tamentis https://github.com/PyCQA/baron/pull/157 16 | - bug fix for failure of parsing of "{**a}" by wavenator https://github.com/PyCQA/baron/pull/161 17 | 18 | 0.9 (2019-02-01) 19 | ---------------- 20 | 21 | First version of full python 3.7 grammar support. 22 | 23 | - BREAKING CHANGE: annotations are now member of {def,list,dict}_argument to flatten the data structure 24 | - add support for ... in from import by bram 25 | - add support for return annotation by bram 26 | - add support for exec function by bram 27 | - add support for variable annotation https://github.com/PyCQA/baron/pull/145 by scottbelden and additional work by bram 28 | - add support for *var expressions in tuple assignment by bram 29 | - add support for raise from https://github.com/PyCQA/baron/pull/120 by odcinek with additional work by bram 30 | - add support for arglist usage in class definition inheritence by bram 31 | - bug fix by https://github.com/PyCQA/baron/pull/126/commits/91e839a228293698cc755a7f28afeca2669cb66e kyleatmakrs 32 | 33 | 0.8 (2018-10-29) 34 | ---------------- 35 | 36 | - add typed parameters support https://github.com/PyCQA/baron/pull/140 by Scott Belden and and additional work by bram 37 | 38 | 0.7 (2018-08-21) 39 | ---------------- 40 | 41 | - fix line continuation https://github.com/PyCQA/baron/pull/92 by ibizaman 42 | - handle corrupt cache file situation https://github.com/PyCQA/baron/pull/76 by ryu2 43 | - fix special crashing edge case in indentation marker https://github.com/PyCQA/bar by Ahuge 44 | - fixed incorrect tokenization case "d*e-1". Fixes #85 https://github.com/PyCQA/baron/pull/107 by boxed 45 | - fix endl handling inside groupings by kyleatmakrs (extracted from https://github.com/PyCQA/baron/pull/126) 46 | 47 | Python 3: 48 | - python 3 parsing extracted from https://github.com/PyCQA/baron/pull/126 49 | - support ellipsis https://github.com/PyCQA/baron/pull/121 by odcinek 50 | - support matrix operator https://github.com/PyCQA/baron/pull/117 by odcinek 51 | - support f-strings https://github.com/PyCQA/baron/pull/110 by odcinek 52 | - support numeric literals https://github.com/PyCQA/baron/pull/111 by odcinek 53 | - support nonlocal statement https://github.com/PyCQA/baron/pull/112 by odcinek 54 | - support keyword only markers https://github.com/PyCQA/baron/pull/108 by boxed 55 | - support yield from statement https://github.com/PyCQA/baron/pull/113 by odcinek and additional work by bram 56 | - support async/await statements https://github.com/PyCQA/baron/pull/114 by odcinek and additional work by bram 57 | 58 | 0.6.6 (2017-06-12) 59 | ------------------ 60 | 61 | - fix situation where a deindented comment between a if and elif/else broken 62 | parsing, see https://github.com/PyCQA/baron/issues/87 63 | - around 35-40% to 75% parsing speed improvment on big files by duncf 64 | https://github.com/PyCQA/baron/pull/99 65 | 66 | 0.6.5 (2017-01-26) 67 | ------------------ 68 | 69 | - fix previous regression fix was broken 70 | 71 | 0.6.4 (2017-01-14) 72 | ------------------ 73 | 74 | - fix regression in case a comment follow the ":" of a if/def/other 75 | 76 | 0.6.3 (2017-01-02) 77 | ------------------ 78 | 79 | - group formatting at start of file or preceded by space with comment 80 | 81 | 0.6.2 (2016-03-18) 82 | ------------------ 83 | 84 | - fix race condition when generating parser cache file 85 | - make all user-facing errors inherit from the same BaronError class 86 | - fix: dotted_name and float_exponant_complex were missing from 87 | nodes_rendering_order 88 | 89 | 0.6.1 (2015-01-31) 90 | ------------------ 91 | 92 | - fix: the string was having a greedy behavior on grouping the string tokens 93 | surrounding it (for string chains), this ends up creating an inconsistancy in 94 | the way string was grouped in general 95 | - fix: better number parsing handling, everything isn't fixed yet 96 | - make all (expected) errors inherit from the same BaronError class 97 | - fix: parsing fails correctly if a quoted string is not closed 98 | 99 | 0.6 (2014-12-11) 100 | ---------------- 101 | 102 | - FST structure modification: def_argument_tuple is no more and all arguments 103 | now have a coherent structure: 104 | * def_argument node name attribute has been renamed to target, like in assign 105 | * target attribute now points to a dict, not to a string 106 | * old name -> string are now target -> name_node 107 | * def_argument_tuple is now a def_argument where target points to a tuple 108 | * this specific tuple will only has name and comma and tuple members (no more 109 | def_argument for name) 110 | - new node: long, before int and long where merged but that was causing problems 111 | 112 | 0.5 (2014-11-10) 113 | ---------------- 114 | 115 | - rename "funcdef" node to "def" node to be way more intuitive. 116 | 117 | 0.4 (2014-09-29) 118 | ---------------- 119 | 120 | - new rendering type in the nodes_rendering_order dictionary: string. This 121 | remove an ambiguity where a key could be pointing to a dict or a string, thus 122 | forcing third party tools to do guessing. 123 | 124 | 0.3.1 (2014-09-04) 125 | ------------------ 126 | 127 | - setup.py wasn't working if wheel wasn't used because the CHANGELOG file 128 | wasn't included in the MANIFEST.in 129 | 130 | 0.3 (2014-08-21) 131 | ---------------- 132 | 133 | - path becomes a simple list and is easier to deal with 134 | - bounding box allows you to know the left most and right most position 135 | of a node see https://baron.readthedocs.io/en/latest/#bounding-box 136 | - redbaron is classified as supporting python3 137 | https://github.com/PyCQA/baron/pull/51 138 | - ensure than when a key is a string, it's empty value is an empty string and 139 | not None to avoid breaking libs that use introspection to guess the type of 140 | the key 141 | - key renaming in the FST: "delimiteur" -> "delimiter" 142 | - name_as_name and dotted_as_name node don't have the "as" key anymore as it 143 | was useless (it can be deduce from the state of the "target" key) 144 | - dotted_name node doesn't exist anymore, its existance was unjustified. In 145 | import, from_import and decorator node, it has been replaced from a key to a 146 | dict (with only a list inside of it) to a simple list. 147 | - dumps now accept a strict boolean argument to check the validity of the FST 148 | on dumping, but this isn't that much a public feature and should probably be 149 | changed of API in the futur 150 | - name_as_name and dotted_as_name empty value for target is now an empty string 151 | and not None since this is a string type key 152 | - boundingbox now includes the newlines at the end of a node 153 | - all raised exceptions inherit from a common base exception to ease try/catch 154 | constructions 155 | - Position's left and right functions become properties and thus 156 | attributes 157 | - Position objects can be compared to other Position objects or any 158 | iterables 159 | - make_position and make_bounding_box functions are deleted in favor of 160 | always using the corresponding class' constructor 161 | 162 | 0.2 (2014-06-11) 163 | ---------------- 164 | 165 | - Baron now provides documentation on https://baron.readthedocs.io 166 | - feature: baron now run in python3 (*but* doesn't implement the full python3 167 | grammar yet) by Pierre Penninckx https://github.com/ibizaman 168 | - feature: drop the usage of ast.py to find print_function, this allow any 169 | version of python to parse any other version of python also by Pierre 170 | Penninckx 171 | - fix: rare bug where a comment end up being confused as an indentation level 172 | - 2 new helpers: show_file and show_node, see https://baron.readthedocs.io/en/latest/#show-file 173 | and https://baron.readthedocs.io/en/latest/#show-node 174 | - new dictionary that provides the informations on how to render a FST node: 175 | nodes_rendering_order see https://baron.readthedocs.io/en/latest/#rendering-the-fst 176 | - new utilities to find a node, see https://baron.readthedocs.io/en/latest/#locate-a-node 177 | - new generic class that provide templates to work on the FST see 178 | https://baron.readthedocs.io/en/latest/#rendering-the-fst 179 | 180 | 0.1.3 (2014-04-13) 181 | ------------------ 182 | 183 | - set sugar syntaxic notation wasn't handled by the dumper (apparently no one 184 | use this on pypi top 100) 185 | 186 | 0.1.2 (2014-04-08) 187 | ------------------ 188 | 189 | - baron.dumps now accept a single FST node, it was only working with a list of 190 | FST nodes 191 | - don't add a endl node at the end if not present in the input string 192 | - de-uniformise call_arguments and function_arguments node, this is just 193 | creating more problems that anything else 194 | - fix https://github.com/PyCQA/redbaron/issues/4 195 | - fix the fact that baron can't parse "{1,}" (but "{1}" is working) 196 | 197 | 0.1.1 (2014-03-23) 198 | ------------------ 199 | 200 | - It appears that I don't know how to write MANIFEST.in correctly 201 | 202 | 203 | 0.1 (2014-03-22) 204 | ---------------- 205 | 206 | - Init 207 | -------------------------------------------------------------------------------- /docs/technical.rst: -------------------------------------------------------------------------------- 1 | Rendering the FST 2 | ================= 3 | 4 | This section is quite advanced and you will maybe never need to use 5 | what is in here. But if you want to process the whole rendered fst or 6 | part of it as a chunk, please read along since several helpers are 7 | provided. 8 | 9 | Understanding core rendering 10 | ---------------------------- 11 | 12 | Baron renders the FST back into source code by following the 13 | instructions given by the :file:`nodes_rendering_order` dictionary. It 14 | gives, for every FST node, the order in which the node components must be 15 | rendered and the nature of those components. 16 | 17 | .. ipython:: python 18 | 19 | from baron import nodes_rendering_order, parse 20 | from baron.helpers import show_node 21 | 22 | nodes_rendering_order["name"] 23 | show_node(parse("a_name")[0]) 24 | nodes_rendering_order["tuple"] 25 | show_node(parse("(a_name,another_name,yet_another_name)")[0]) 26 | nodes_rendering_order["comma"] 27 | 28 | For a "name" node, it is a list containing a unique component stored in a tuple 29 | but it can contain multiple ones like for a "tuple" node. 30 | 31 | To render a node, you just need to render each element of the list, one 32 | by one, in the given order. As you can see, they are all formatted as 33 | a 3-tuple. The first column is the type which is one of the following: 34 | 35 | .. ipython:: python 36 | 37 | from baron.render import node_types 38 | 39 | node_types 40 | 41 | With the exception of the "constant" node, the second column contains the key 42 | of the FST node which must be rendered. The first column explains how that key 43 | must be rendered. We'll see the third column later. 44 | 45 | * A :file:`node` node is one of the nodes in the 46 | :file:`nodes_rendering_order` we just introduced, it is rendered by 47 | following the rules mentionned here. This is indeed a recursive 48 | definition. 49 | * A :file:`key` node is a branch of the tree that contains another node (a 50 | python dictionary). 51 | * A :file:`string` node is a leaf of the tree that contains a variable value, 52 | like the name of a function. 53 | former case, it is rendered by rendering its content. 54 | * A :file:`list` node is like a :file:`key` node but can contain 0, 1 or 55 | several other nodes stored in a python list. For example, Baron root node is 56 | a :file:`list` node since a python program is a list of statements. It is 57 | rendered by rendering each of its elements in order. 58 | * A :file:`formatting` node is similar in behaviour to a :file:`list` 59 | node but contains only formatting nodes. This is basically where Baron 60 | distinguish itself from other ASTs. 61 | * A :file:`constant` node is a leaf of the FST tree. The second column 62 | always contain a string which is outputted directly. Compared to 63 | a :file:`string` node, the :file:`constant` node is 64 | identical for every instance of the nodes (e.g. the left parenthesis 65 | character :file:`(` in a function call node or the :file:`def` keyword 66 | of a function definition) while the :file:`string` node's value can 67 | change (e.g. the name of the function in a function definition node). 68 | * A :file:`bool` node is a node used exclusively for conditional 69 | rendering. It's exact use will be explained later on with the tuple's 70 | third column but the main point for now is to know that they are never 71 | rendered. 72 | 73 | 74 | Walkthrough 75 | ~~~~~~~~~~ 76 | 77 | Let's see all this is in action by rendering a "lambda" node. First, the 78 | root node is always a "list" node and since we are only parsing one 79 | statement, the root node contains our "lambda" node at index 0: 80 | 81 | .. ipython:: python 82 | 83 | fst = parse("lambda x, y = 1: x + y") 84 | 85 | fst[0]["type"] 86 | 87 | For reference, you can find the (long) FST produced by the lambda node 88 | at the end of this section. 89 | 90 | Now, let's see how to render a "lambda" node: 91 | 92 | .. ipython:: python 93 | 94 | nodes_rendering_order["lambda"] 95 | 96 | Okay, first the string constant "lambda", then a first_formatting node 97 | which represents the space between the string "lambda" and the variable 98 | "x". 99 | 100 | .. ipython:: python 101 | 102 | fst[0]["first_formatting"] 103 | 104 | The "first_formatting" contains a list whose unique element is a "space" 105 | node. 106 | 107 | .. ipython:: python 108 | 109 | fst[0]["first_formatting"][0] 110 | 111 | nodes_rendering_order["space"] 112 | 113 | Which in turn is rendered by printing the value of the string of the space 114 | node. 115 | 116 | .. ipython:: python 117 | 118 | fst[0]["first_formatting"][0]["value"] 119 | 120 | So far we have outputted "lambda ". Tedious but exhaustive. 121 | 122 | We have exhausted the "first_formatting" node so we go back up the tree. 123 | Next is the "list" node representing the arguments: 124 | 125 | .. ipython:: python 126 | 127 | fst[0]["arguments"] 128 | 129 | Rendering a "list" node is done one element at a time. First 130 | a "def_argument", then a "comma" and again a "def_argument". 131 | 132 | .. ipython:: python 133 | 134 | fst[0]["arguments"][0] 135 | 136 | nodes_rendering_order["def_argument"] 137 | 138 | The first "def_argument" is rendered by first outputting the content of 139 | a name "string" node: 140 | 141 | .. ipython:: python 142 | 143 | fst[0]["arguments"][0]["name"] 144 | 145 | Now, we have outputted "lambda x". At first glance we could say we 146 | should render the second element of the "def_argument" node but as we'll 147 | see in the next section, it is not the case because of the third column 148 | of the tuple. 149 | 150 | For reference, the FST of the lambda node: 151 | 152 | .. ipython:: python 153 | 154 | show_node(fst[0]) 155 | 156 | Dependent rendering 157 | ~~~~~~~~~~~~~~~~~~~ 158 | 159 | Sometimes, some node elements must not be outputted. In our 160 | "def_argument" example, all but the first are conditional. They are only 161 | rendered if the FST's "value" node exists and is not empty. Let's 162 | compare the two "def_arguments" FST nodes: 163 | 164 | .. ipython:: python 165 | 166 | fst[0]["arguments"][0] 167 | 168 | fst[0]["arguments"][2] 169 | 170 | nodes_rendering_order[fst[0]["arguments"][2]["type"]] 171 | 172 | The "value" is empty for the former "def_argument" but not for the 173 | latter because it has a default value of "= 1". 174 | 175 | .. ipython:: python 176 | 177 | from baron import dumps 178 | 179 | dumps(fst[0]["arguments"][0]) 180 | 181 | dumps(fst[0]["arguments"][2]) 182 | 183 | The rule here is that the third column of a node is one of: 184 | 185 | * True, it is always rendered; 186 | * False, it is never rendered; 187 | * A string, it is rendered conditionnally. It is not rendered if the key it references is either empty or False. It also must reference an existing key. In our example above, it references the existing "value" key which is empty in the first case and not empty in the second. 188 | 189 | This is how "bool" nodes are never outputted: their third column is 190 | always False. 191 | 192 | We will conclude here now that we have seen an example of every aspect 193 | of FST rendering. Understanding everything is not required to use Baron 194 | since several helpers like :file:`render`, :file:`RenderWalker` or 195 | :file:`dumps` handle all the complexity under the hood. 196 | 197 | Render Helper 198 | ------------- 199 | 200 | Baron provides a render function helper which walks recursively the 201 | :file:`nodes_rendering_order` dictionnary for you: 202 | 203 | .. autofunction:: baron.render.render 204 | 205 | RenderWalker Helper 206 | ------------------- 207 | 208 | But even easier, Baron provides a walker class whose job is to walk the 209 | fst while rendering it and to call user-provided callbacks at each step: 210 | 211 | .. autoclass:: baron.render.RenderWalker 212 | 213 | Internally, Baron uses the :file:`RenderWalker` for multiple tasks like 214 | for the :file:`dumps` function: 215 | 216 | :: 217 | 218 | from baron.render import RenderWalker 219 | 220 | def dumps(tree): 221 | return Dumper().dump(tree) 222 | 223 | class Dumper(RenderWalker): 224 | def before_constant(self, constant, key): 225 | self.dump += constant 226 | 227 | def before_string(self, string, key): 228 | self.dump += string 229 | 230 | def dump(self, tree): 231 | self.dump = '' 232 | self.walk(tree) 233 | return self.dump 234 | 235 | As you can see it is quite simple since it only needs the 236 | :file:`before_constant` and the :file:`before_string` methods with the same 237 | exact code. 238 | 239 | PathWalker Helper 240 | ----------------- 241 | 242 | If while walking you need to know the current path of the node, then you 243 | should subclass :file:`PathWalker` instead: 244 | 245 | .. autoclass:: baron.path.PathWalker 246 | 247 | Here is a succint example of what you should expect when using the 248 | :file:`PathWalker`: 249 | 250 | .. ipython:: python 251 | 252 | from baron.path import PathWalker 253 | 254 | fst = parse("a = 1") 255 | 256 | class PathWalkerPrinter(PathWalker): 257 | def before(self, key_type, item, render_key): 258 | super(PathWalkerPrinter, self).before(key_type, item, render_key) 259 | print(self.current_path) 260 | 261 | def after(self, key_type, item, render_key): 262 | print(self.current_path) 263 | super(PathWalkerPrinter, self).after(key_type, item, render_key) 264 | 265 | walker = PathWalkerPrinter() 266 | walker.walk(fst) 267 | 268 | Like in the example, don't forget to call the before and after methods 269 | of the parent class. Furthermore, you need to respect the order 270 | specified above, that is: 271 | 272 | * Calling :file:`super().before()` should be done before your code using 273 | the :file:`self.path` attribute. 274 | * Calling :file:`super().after()` should be done after your code using 275 | the :file:`self.path` attribute. 276 | 277 | -------------------------------------------------------------------------------- /tests/test_spliter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:Utf-8 -*- 3 | 4 | 5 | from baron.spliter import split, UntreatedError 6 | from baron.utils import python_version 7 | import pytest 8 | 9 | 10 | def test_empty(): 11 | assert split("") == [] 12 | 13 | 14 | def test_print(): 15 | assert split("print") == ["print"] 16 | 17 | 18 | def test_print_space(): 19 | assert split("print ") == ['print', ' '] 20 | 21 | 22 | def test_import(): 23 | assert split("import pouet") == ["import", " ", "pouet"] 24 | 25 | 26 | def test_from_import(): 27 | assert split(" from zob import pouet ") == [" ", "from", " ", "zob", " ", "import", " ", "pouet", " "] 28 | 29 | 30 | def test_from_import_as(): 31 | assert split("from a import b as c") == ["from", " ", "a", " ", "import", " ", "b", " ", "as", " ", "c"] 32 | 33 | 34 | def test_underscore_variable(): 35 | assert split("some_variable") == ["some_variable"] 36 | 37 | 38 | def test_different_case(): 39 | assert split("AbCd cDeF") == ["AbCd", " ", "cDeF"] 40 | 41 | 42 | def test_decorator(): 43 | assert split("@pouet") == ["@", "pouet"] 44 | 45 | 46 | def test_tab_n_space(): 47 | assert split(" ") == [" "] 48 | 49 | 50 | def test_several_spaces(): 51 | assert split(" ") == [" "] 52 | 53 | 54 | def test_numbers(): 55 | assert split("1234") == ["1234"] 56 | 57 | 58 | def test_several_numbers(): 59 | assert split("12 34") == ["12", " ", "34"] 60 | 61 | 62 | def test_comma(): 63 | assert split(",") == [","] 64 | 65 | 66 | def test_comma_with_words(): 67 | assert split("a, b") == ["a", ",", " ", "b"] 68 | 69 | 70 | def test_dot(): 71 | assert split(".") == ["."] 72 | 73 | 74 | def test_dot_with_word(): 75 | assert split("a.b") == ["a", ".", "b"] 76 | 77 | 78 | def test_dot_with_words(): 79 | assert split("a.b.c") == ["a", ".", "b", ".", "c"] 80 | 81 | 82 | def test_colon(): 83 | assert split(";") == [";"] 84 | 85 | 86 | def test_colon_word(): 87 | assert split("pouet;") == ["pouet", ";"] 88 | 89 | 90 | def test_assign(): 91 | assert split("a = b") == ["a", " ", "=", " ", "b"] 92 | 93 | 94 | def test_call(): 95 | assert split("function()") == ["function", "(", ")"] 96 | 97 | 98 | def test_call_with_arg(): 99 | assert split("function(a)") == ["function", "(", "a", ")"] 100 | 101 | 102 | def test_call_with_args(): 103 | assert split("function(a, b, c)") == ["function", "(", "a", ",", " ", "b", ",", " ", "c", ")"] 104 | 105 | 106 | def test_call_with_kwarg(): 107 | assert split("function(a=b)") == ["function", "(", "a", "=", "b", ")"] 108 | 109 | 110 | def test_call_with_kwargs(): 111 | assert split("function(a=b, c= d)") == ["function", "(", "a", "=", "b", ",", " ", "c", "=", " ", "d", ")"] 112 | 113 | 114 | def test_call_with_start_args(): 115 | assert split("function(*args)") == ["function", "(", "*", "args", ")"] 116 | 117 | 118 | def test_call_with_start_kwargs(): 119 | assert split("function(**kwargs)") == ["function", "(", "*", "*", "kwargs", ")"] 120 | 121 | 122 | def test_function_def(): 123 | assert split("def pouet(): pass") == ["def", " ", "pouet", "(", ")", ":", " ", "pass"] 124 | 125 | 126 | def test_addition(): 127 | assert split("a + 2") == ["a", " ", "+", " ", "2"] 128 | 129 | 130 | def test_substraction(): 131 | assert split("a - 2") == ["a", " ", "-", " ", "2"] 132 | 133 | 134 | def test_multiplication(): 135 | assert split("a * 2") == ["a", " ", "*", " ", "2"] 136 | 137 | 138 | def test_division(): 139 | assert split("a/2") == ["a", "/", "2"] 140 | 141 | 142 | def test_power(): 143 | assert split("a**2") == ["a", "*", "*", "2"] 144 | 145 | 146 | def test_modulo(): 147 | assert split("a % 2") == ["a", " ", "%", " ", "2"] 148 | 149 | 150 | def test_binary_stuff(): 151 | assert split("a^2") == ["a", "^", "2"] 152 | assert split("a&2") == ["a", "&", "2"] 153 | assert split("a|2") == ["a", "|", "2"] 154 | assert split("a << 2") == ["a", " ", "<", "<", " ", "2"] 155 | assert split("a >> 2") == ["a", " ", ">", ">", " ", "2"] 156 | 157 | 158 | def test_operator_assign(): 159 | assert split("a += 2") == ["a", " ", "+", "=", " ", "2"] 160 | assert split("a -= 2") == ["a", " ", "-", "=", " ", "2"] 161 | assert split("a *= 2") == ["a", " ", "*", "=", " ", "2"] 162 | assert split("a /= 2") == ["a", " ", "/", "=", " ", "2"] 163 | assert split("a %= 2") == ["a", " ", "%", "=", " ", "2"] 164 | assert split("a &= 2") == ["a", " ", "&", "=", " ", "2"] 165 | assert split("a |= 2") == ["a", " ", "|", "=", " ", "2"] 166 | assert split("a ^= 2") == ["a", " ", "^", "=", " ", "2"] 167 | assert split("a //= 2") == ["a", " ", "/", "/", "=", " ", "2"] 168 | assert split("a **= 2") == ["a", " ", "*", "*", "=", " ", "2"] 169 | assert split("a <<= 2") == ["a", " ", "<", "<", "=", " ", "2"] 170 | assert split("a >>= 2") == ["a", " ", ">", ">", "=", " ", "2"] 171 | 172 | 173 | def test_factor(): 174 | assert split("~a") == ["~", "a"] 175 | 176 | 177 | def test_del_pouet(): 178 | assert split("del pouet") == ["del", " ", "pouet"] 179 | 180 | 181 | def test_pass(): 182 | assert split("pass") == ["pass"] 183 | 184 | 185 | def test_break(): 186 | assert split("break") == ["break"] 187 | 188 | 189 | def test_continue(): 190 | assert split("continue") == ["continue"] 191 | 192 | 193 | def test_return(): 194 | assert split("return") == ["return"] 195 | 196 | 197 | def test_return_stuff(): 198 | assert split("return stuff") == ["return", " ", "stuff"] 199 | 200 | 201 | def test_yield(): 202 | assert split("yield") == ["yield"] 203 | 204 | 205 | def test_yield_stuff(): 206 | assert split("yield stuff") == ["yield", " ", "stuff"] 207 | 208 | 209 | def test_raise(): 210 | assert split("raise") == ["raise"] 211 | 212 | 213 | def test_raise_stuff(): 214 | assert split("raise Exception()") == ["raise", " ", "Exception", "(", ")"] 215 | 216 | 217 | def test_global_stuff(): 218 | assert split("global stuff") == ["global", " ", "stuff"] 219 | 220 | 221 | def test_exec(): 222 | assert split("exec") == ["exec"] 223 | 224 | 225 | def test_exec_stuff(): 226 | assert split("exec stuff") == ["exec", " ", "stuff"] 227 | 228 | 229 | def test_assert(): 230 | assert split("assert") == ["assert"] 231 | 232 | 233 | def test_assert_stuff(): 234 | assert split("assert stuff") == ["assert", " ", "stuff"] 235 | 236 | 237 | def test_line_end(): 238 | assert split("\n") == ["\n"] 239 | 240 | 241 | def test_line_end_windows(): 242 | assert split("\r\n") == ["\r", "\n"] 243 | 244 | 245 | def test_if(): 246 | assert split("if ab: pass") == ["if", " ", "ab", ":", " ", "pass"] 247 | 248 | 249 | def test_if_elif_else(): 250 | assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n", "elif", " ", "b", ":", "\n", " ", "pass", "\n", "else", ":", " ", "\n", " ", "pass"] 251 | 252 | 253 | def test_while(): 254 | assert split("while a: pass") == ["while", " ", "a", ":", " ", "pass"] 255 | 256 | 257 | def test_lambda(): 258 | assert split("lambda x: x + 1") == ["lambda", " ", "x", ":", " ", "x", " ", "+", " ", "1"] 259 | 260 | 261 | def test_for(): 262 | assert split("for a in b: pass") == ["for", " ", "a", " ", "in", " ", "b", ":", " ", "pass"] 263 | 264 | 265 | def test_empty_list(): 266 | assert split("[]") == ["[", "]"] 267 | 268 | 269 | def test_list(): 270 | assert split("[a, b, c]") == ["[", "a", ",", " ", "b", ",", " ", "c", "]"] 271 | 272 | 273 | def test_empty_dict(): 274 | assert split("{}") == ["{", "}"] 275 | 276 | 277 | def test_dict(): 278 | assert split("{a: b, c: d}") == ["{", "a", ":", " ", "b", ",", " ", "c", ":", " ", "d", "}"] 279 | 280 | 281 | def test_not(): 282 | assert split("!a") == ["!", "a"] 283 | 284 | 285 | def test_not_equal(): 286 | assert split("a != b") == ["a", " ", "!", "=", " ", "b"] 287 | 288 | 289 | def test_backquote(): 290 | assert split("`a`") == ["`", "a", "`"] 291 | 292 | 293 | def test_number_in_var(): 294 | assert split("a1") == ["a1"] 295 | 296 | 297 | def test_comment(): 298 | assert split("# a b c d") == ["# a b c d"] 299 | 300 | 301 | def test_comments(): 302 | assert split("# a b c d\n# pouet") == ["# a b c d", "\n", "# pouet"] 303 | 304 | 305 | def test_empty_string(): 306 | assert split("''") == ["''"] 307 | 308 | 309 | def test_string(): 310 | assert split("'pouet pouet'") == ["'pouet pouet'"] 311 | 312 | 313 | def test_empty_string_other_quotes(): 314 | assert split('"pouet pouet"') == ['"pouet pouet"'] 315 | 316 | 317 | def test_multi_string(): 318 | assert split("'''pouet pouet'''") == ["'''pouet pouet'''"] 319 | 320 | 321 | def test_multi_string_other_quotes(): 322 | assert split('"""pouet pouet"""') == ['"""pouet pouet"""'] 323 | 324 | 325 | def test_missing_quote_yields_error(): 326 | with pytest.raises(UntreatedError): 327 | split("'") 328 | 329 | with pytest.raises(UntreatedError): 330 | split("'''") 331 | 332 | with pytest.raises(UntreatedError): 333 | split('"') 334 | 335 | with pytest.raises(UntreatedError): 336 | split('"""') 337 | 338 | 339 | def test_escape(): 340 | assert split("\\\\") == ["\\", "\\"] 341 | 342 | 343 | def test_escape_in_string(): 344 | assert split("'\\\\'") == ["'\\\\'"] 345 | 346 | 347 | def test_other_escape_string(): 348 | assert split("'\\\\'") == ["'\\\\'"] 349 | 350 | 351 | def test_hexa(): 352 | assert split("0x7F") == ["0x7F"] 353 | 354 | 355 | def test_multi_string_with_same_quotes_in(): 356 | assert split('"""pouet " "" pouet"""') == ['"""pouet " "" pouet"""'] 357 | 358 | 359 | def test_comment_backslash(): 360 | assert split('# pouet \\\npouet') == ["# pouet \\", "\n", "pouet"] 361 | 362 | 363 | def test_backslash_in_comment(): 364 | assert split("# pouet \\t pouet\npouet") == ["# pouet \\t pouet", "\n", "pouet"] 365 | 366 | 367 | def test_regression(): 368 | assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ", "'STRING'", ")", ","] 369 | 370 | 371 | # TODO: make this test pass in python3 also 372 | # requires to remove dependency on ast.py 373 | if python_version == 2: 374 | def test_remove_crap(): 375 | assert split("\x0c\xef\xbb\xbf") == [] 376 | 377 | 378 | def test_split_float_notation(): 379 | assert split("a._") == ["a", ".", "_"] 380 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Baron documentation build configuration file, created by 4 | # sphinx-quickstart on Sat May 10 02:16:20 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = [ 29 | 'sphinx.ext.autodoc', 30 | 'sphinx.ext.doctest', 31 | 'sphinx.ext.todo', 32 | 'IPython.sphinxext.ipython_directive', 33 | 'IPython.sphinxext.ipython_console_highlighting', 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = '.rst' 41 | 42 | # The encoding of source files. 43 | #source_encoding = 'utf-8-sig' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'Baron' 50 | copyright = u'2014, Laurent Peuch' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '0.6' 58 | # The full version, including alpha/beta/rc tags. 59 | release = '0.6' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all documents. 76 | #default_role = None 77 | 78 | # If true, '()' will be appended to :func: etc. cross-reference text. 79 | #add_function_parentheses = True 80 | 81 | # If true, the current module name will be prepended to all description 82 | # unit titles (such as .. function::). 83 | #add_module_names = True 84 | 85 | # If true, sectionauthor and moduleauthor directives will be shown in the 86 | # output. They are ignored by default. 87 | #show_authors = False 88 | 89 | # The name of the Pygments (syntax highlighting) style to use. 90 | pygments_style = 'sphinx' 91 | 92 | # A list of ignored prefixes for module index sorting. 93 | #modindex_common_prefix = [] 94 | 95 | 96 | # -- Options for HTML output --------------------------------------------------- 97 | 98 | # The theme to use for HTML and HTML Help pages. See the documentation for 99 | # a list of builtin themes. 100 | html_theme = 'default' 101 | 102 | # Theme options are theme-specific and customize the look and feel of a theme 103 | # further. For a list of options available for each theme, see the 104 | # documentation. 105 | #html_theme_options = {} 106 | 107 | # Add any paths that contain custom themes here, relative to this directory. 108 | #html_theme_path = [] 109 | 110 | # The name for this set of Sphinx documents. If None, it defaults to 111 | # " v documentation". 112 | #html_title = None 113 | 114 | # A shorter title for the navigation bar. Default is the same as html_title. 115 | #html_short_title = None 116 | 117 | # The name of an image file (relative to this directory) to place at the top 118 | # of the sidebar. 119 | #html_logo = None 120 | 121 | # The name of an image file (within the static path) to use as favicon of the 122 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 123 | # pixels large. 124 | #html_favicon = None 125 | 126 | # Add any paths that contain custom static files (such as style sheets) here, 127 | # relative to this directory. They are copied after the builtin static files, 128 | # so a file named "default.css" will overwrite the builtin "default.css". 129 | html_static_path = ['_static'] 130 | 131 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 132 | # using the given strftime format. 133 | #html_last_updated_fmt = '%b %d, %Y' 134 | 135 | # If true, SmartyPants will be used to convert quotes and dashes to 136 | # typographically correct entities. 137 | #html_use_smartypants = True 138 | 139 | # Custom sidebar templates, maps document names to template names. 140 | #html_sidebars = {} 141 | 142 | # Additional templates that should be rendered to pages, maps page names to 143 | # template names. 144 | #html_additional_pages = {} 145 | 146 | # If false, no module index is generated. 147 | #html_domain_indices = True 148 | 149 | # If false, no index is generated. 150 | #html_use_index = True 151 | 152 | # If true, the index is split into individual pages for each letter. 153 | #html_split_index = False 154 | 155 | # If true, links to the reST sources are added to the pages. 156 | #html_show_sourcelink = True 157 | 158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 159 | #html_show_sphinx = True 160 | 161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 162 | #html_show_copyright = True 163 | 164 | # If true, an OpenSearch description file will be output, and all pages will 165 | # contain a tag referring to it. The value of this option must be the 166 | # base URL from which the finished HTML is served. 167 | #html_use_opensearch = '' 168 | 169 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 170 | #html_file_suffix = None 171 | 172 | # Output file base name for HTML help builder. 173 | htmlhelp_basename = 'Barondoc' 174 | 175 | 176 | # -- Options for LaTeX output -------------------------------------------------- 177 | 178 | latex_elements = { 179 | # The paper size ('letterpaper' or 'a4paper'). 180 | #'papersize': 'letterpaper', 181 | 182 | # The font size ('10pt', '11pt' or '12pt'). 183 | #'pointsize': '10pt', 184 | 185 | # Additional stuff for the LaTeX preamble. 186 | #'preamble': '', 187 | } 188 | 189 | # Grouping the document tree into LaTeX files. List of tuples 190 | # (source start file, target name, title, author, documentclass [howto/manual]). 191 | latex_documents = [ 192 | ('index', 'Baron.tex', u'Baron Documentation', 193 | u'Laurent Peuch', 'manual'), 194 | ] 195 | 196 | # The name of an image file (relative to this directory) to place at the top of 197 | # the title page. 198 | #latex_logo = None 199 | 200 | # For "manual" documents, if this is true, then toplevel headings are parts, 201 | # not chapters. 202 | #latex_use_parts = False 203 | 204 | # If true, show page references after internal links. 205 | #latex_show_pagerefs = False 206 | 207 | # If true, show URL addresses after external links. 208 | #latex_show_urls = False 209 | 210 | # Documents to append as an appendix to all manuals. 211 | #latex_appendices = [] 212 | 213 | # If false, no module index is generated. 214 | #latex_domain_indices = True 215 | 216 | 217 | # -- Options for manual page output -------------------------------------------- 218 | 219 | # One entry per manual page. List of tuples 220 | # (source start file, name, description, authors, manual section). 221 | man_pages = [ 222 | ('index', 'baron', u'Baron Documentation', 223 | [u'Laurent Peuch'], 1) 224 | ] 225 | 226 | # If true, show URL addresses after external links. 227 | #man_show_urls = False 228 | 229 | 230 | # -- Options for Texinfo output ------------------------------------------------ 231 | 232 | # Grouping the document tree into Texinfo files. List of tuples 233 | # (source start file, target name, title, author, 234 | # dir menu entry, description, category) 235 | texinfo_documents = [ 236 | ('index', 'Baron', u'Baron Documentation', 237 | u'Laurent Peuch', 'Baron', 'One line description of project.', 238 | 'Miscellaneous'), 239 | ] 240 | 241 | # Documents to append as an appendix to all manuals. 242 | #texinfo_appendices = [] 243 | 244 | # If false, no module index is generated. 245 | #texinfo_domain_indices = True 246 | 247 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 248 | #texinfo_show_urls = 'footnote' 249 | 250 | 251 | # -- Options for Epub output --------------------------------------------------- 252 | 253 | # Bibliographic Dublin Core info. 254 | epub_title = u'Baron' 255 | epub_author = u'Laurent Peuch' 256 | epub_publisher = u'Laurent Peuch' 257 | epub_copyright = u'2014, Laurent Peuch' 258 | 259 | # The language of the text. It defaults to the language option 260 | # or en if the language is not set. 261 | #epub_language = '' 262 | 263 | # The scheme of the identifier. Typical schemes are ISBN or URL. 264 | #epub_scheme = '' 265 | 266 | # The unique identifier of the text. This can be a ISBN number 267 | # or the project homepage. 268 | #epub_identifier = '' 269 | 270 | # A unique identification for the text. 271 | #epub_uid = '' 272 | 273 | # A tuple containing the cover image and cover page html template filenames. 274 | #epub_cover = () 275 | 276 | # HTML files that should be inserted before the pages created by sphinx. 277 | # The format is a list of tuples containing the path and title. 278 | #epub_pre_files = [] 279 | 280 | # HTML files shat should be inserted after the pages created by sphinx. 281 | # The format is a list of tuples containing the path and title. 282 | #epub_post_files = [] 283 | 284 | # A list of files that should not be packed into the epub file. 285 | #epub_exclude_files = [] 286 | 287 | # The depth of the table of contents in toc.ncx. 288 | #epub_tocdepth = 3 289 | 290 | # Allow duplicate toc entries. 291 | #epub_tocdup = True 292 | -------------------------------------------------------------------------------- /tests/test_render.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from baron import parse 3 | from baron.render import render, node_types, nodes_rendering_order, RenderWalker 4 | 5 | 6 | @pytest.fixture(params=nodes_rendering_order.keys()) 7 | def dictionnary_node(request): 8 | return nodes_rendering_order[request.param] 9 | 10 | 11 | def test_render_crap(): 12 | with pytest.raises(NotImplementedError): 13 | render("crap") 14 | 15 | 16 | def test_dictionnary_key_validity(dictionnary_node): 17 | for key_type, _, _ in dictionnary_node: 18 | assert key_type in node_types 19 | 20 | 21 | def test_dictionnary_dependent_validity(dictionnary_node): 22 | keys = set([t[1] for t in dictionnary_node]) 23 | for key_type, _, dependent in dictionnary_node: 24 | assert isinstance(dependent, bool) \ 25 | or (isinstance(dependent, str) and dependent in keys) \ 26 | or (isinstance(dependent, list) and all([d in keys for d in dependent])) 27 | 28 | if key_type == 'bool': 29 | assert dependent is False 30 | 31 | 32 | def test_render_dictionnary_bad_type(): 33 | nodes_rendering_order['bad_type'] = [('wtf', 'hello', True)] 34 | with pytest.raises(NotImplementedError) as e: 35 | list(render({'type': 'bad_type'})) 36 | assert str(e.value) == "Unknown key type \"wtf\" in \"bad_type\" node" 37 | 38 | 39 | def test_render_dictionnary_bad_bool_dependency(): 40 | nodes_rendering_order['bad_bool_dependency'] = [('bool', True, True)] 41 | with pytest.raises(NotImplementedError) as e: 42 | list(render({'type': 'bad_bool_dependency'})) 43 | assert str(e.value) == "Bool keys are only used for dependency, they cannot be rendered. Please set the \"('bool', True, True)\"'s dependent key in \"bad_bool_dependency\" node to False" 44 | 45 | 46 | def test_render_dictionnary_bad_bool_dependency2(): 47 | nodes_rendering_order['bad_bool_dependency2'] = [('bool', False, 'other_key')] 48 | with pytest.raises(NotImplementedError) as e: 49 | list(render({'type': 'bad_bool_dependency2'})) 50 | assert str(e.value) == "Bool keys are only used for dependency, they cannot be rendered. Please set the \"('bool', False, 'other_key')\"'s dependent key in \"bad_bool_dependency2\" node to False" 51 | 52 | 53 | class RenderWalkerTester(RenderWalker): 54 | def __init__(self, steps): 55 | super(RenderWalkerTester, self).__init__(strict=True) 56 | self.steps = steps 57 | 58 | def before(self, *args): 59 | super(RenderWalkerTester, self).before(*args) 60 | return self.process_test('>', *args) 61 | 62 | def after(self, *args): 63 | stop = self.process_test('<', *args) 64 | super(RenderWalkerTester, self).after(*args) 65 | return stop 66 | 67 | def process_test(self, direction, node_type, node, render_key): 68 | _direction, _node_type, _node, _render_key, _stop = self.steps.pop(0) 69 | target = (_direction, _node_type, _node, _render_key) 70 | if node_type in ['constant', 'string']: 71 | assert target == (direction, node_type, node, render_key) 72 | elif "type" in node: 73 | assert target == (direction, node_type, node["type"], render_key) 74 | else: 75 | assert target == (direction, node_type, node.__class__.__name__, render_key) 76 | return _stop 77 | 78 | 79 | # flake8: noqa 80 | def test_walk_stop(): 81 | node = parse("a = 1") 82 | walker = RenderWalkerTester([ 83 | ('>', 'node', 'assignment', 0, False), 84 | ('>', 'key', 'name', 'target', False), 85 | ('>', 'string', 'a', 'value', True), 86 | ('<', 'key', 'name', 'target', False), 87 | ('<', 'node', 'assignment', 0, False), 88 | ]) 89 | 90 | walker.walk(node) 91 | 92 | 93 | def test_walk_assignment(): 94 | node = parse("a = 1") 95 | walker = RenderWalkerTester([ 96 | ('>', 'node', 'assignment', 0, False), 97 | ('>', 'key', 'name', 'target', False), 98 | ('>', 'string', 'a', 'value', False), 99 | ('<', 'string', 'a', 'value', False), 100 | ('<', 'key', 'name', 'target', False), 101 | ('>', 'formatting', 'list', 'first_formatting', False), 102 | ('>', 'node', 'space', 0, False), 103 | ('>', 'string', ' ', 'value', False), 104 | ('<', 'string', ' ', 'value', False), 105 | ('<', 'node', 'space', 0, False), 106 | ('<', 'formatting', 'list', 'first_formatting', False), 107 | ('>', 'constant', '=', '=', False), 108 | ('<', 'constant', '=', '=', False), 109 | ('>', 'formatting', 'list', 'second_formatting', False), 110 | ('>', 'node', 'space', 0, False), 111 | ('>', 'string', ' ', 'value', False), 112 | ('<', 'string', ' ', 'value', False), 113 | ('<', 'node', 'space', 0, False), 114 | ('<', 'formatting', 'list', 'second_formatting', False), 115 | ('>', 'key', 'int', 'value', False), 116 | ('>', 'string', '1', 'value', False), 117 | ('<', 'string', '1', 'value', False), 118 | ('<', 'key', 'int', 'value', False), 119 | ('<', 'node', 'assignment', 0, False), 120 | ]) 121 | 122 | walker.walk(node) 123 | 124 | 125 | def test_walk_funcdef_with_leading_space(): 126 | node = parse("""\ 127 | 128 | @deco 129 | def fun(arg1): 130 | pass 131 | """) 132 | walker = RenderWalkerTester([ 133 | ('>', 'node', 'endl', 0, False), 134 | ('>', 'formatting', 'list', 'formatting', False), 135 | ('<', 'formatting', 'list', 'formatting', False), 136 | ('>', 'string', '\n', 'value', False), 137 | ('<', 'string', '\n', 'value', False), 138 | ('>', 'string', '', 'indent', False), 139 | ('<', 'string', '', 'indent', False), 140 | ('<', 'node', 'endl', 0, False), 141 | ('>', 'node', 'def', 1, False), 142 | ('>', 'list', 'list', 'decorators', False), 143 | ('>', 'node', 'decorator', 0, False), 144 | ('>', 'constant', '@', '@', False), 145 | ('<', 'constant', '@', '@', False), 146 | ('>', 'key', 'dotted_name', 'value', False), 147 | ('>', 'list', 'list', 'value', False), 148 | ('>', 'node', 'name', 0, False), 149 | ('>', 'string', 'deco', 'value', False), 150 | ('<', 'string', 'deco', 'value', False), 151 | ('<', 'node', 'name', 0, False), 152 | ('<', 'list', 'list', 'value', False), 153 | ('<', 'key', 'dotted_name', 'value', False), 154 | ('<', 'node', 'decorator', 0, False), 155 | ('>', 'node', 'endl', 1, False), 156 | ('>', 'formatting', 'list', 'formatting', False), 157 | ('<', 'formatting', 'list', 'formatting', False), 158 | ('>', 'string', '\n', 'value', False), 159 | ('<', 'string', '\n', 'value', False), 160 | ('>', 'string', '', 'indent', False), 161 | ('<', 'string', '', 'indent', False), 162 | ('<', 'node', 'endl', 1, False), 163 | ('<', 'list', 'list', 'decorators', False), 164 | ('>', 'constant', 'def', 'def', False), 165 | ('<', 'constant', 'def', 'def', False), 166 | ('>', 'formatting', 'list', 'first_formatting', False), 167 | ('>', 'node', 'space', 0, False), 168 | ('>', 'string', ' ', 'value', False), 169 | ('<', 'string', ' ', 'value', False), 170 | ('<', 'node', 'space', 0, False), 171 | ('<', 'formatting', 'list', 'first_formatting', False), 172 | ('>', 'string', 'fun', 'name', False), 173 | ('<', 'string', 'fun', 'name', False), 174 | ('>', 'formatting', 'list', 'second_formatting', False), 175 | ('<', 'formatting', 'list', 'second_formatting', False), 176 | ('>', 'constant', '(', '(', False), 177 | ('<', 'constant', '(', '(', False), 178 | ('>', 'formatting', 'list', 'third_formatting', False), 179 | ('<', 'formatting', 'list', 'third_formatting', False), 180 | ('>', 'list', 'list', 'arguments', False), 181 | ('>', 'node', 'def_argument', 0, False), 182 | ('>', 'key', 'name', 'target', False), 183 | ('>', 'string', 'arg1', 'value', False), 184 | ('<', 'string', 'arg1', 'value', False), 185 | ('<', 'key', 'name', 'target', False), 186 | ('<', 'node', 'def_argument', 0, False), 187 | ('<', 'list', 'list', 'arguments', False), 188 | ('>', 'formatting', 'list', 'fourth_formatting', False), 189 | ('<', 'formatting', 'list', 'fourth_formatting', False), 190 | ('>', 'constant', ')', ')', False), 191 | ('<', 'constant', ')', ')', False), 192 | ('>', 'formatting', 'list', 'fifth_formatting', False), 193 | ('<', 'formatting', 'list', 'fifth_formatting', False), 194 | ('>', 'constant', ':', ':', False), 195 | ('<', 'constant', ':', ':', False), 196 | ('>', 'formatting', 'list', 'sixth_formatting', False), 197 | ('<', 'formatting', 'list', 'sixth_formatting', False), 198 | ('>', 'list', 'list', 'value', False), 199 | ('>', 'node', 'endl', 0, False), 200 | ('>', 'formatting', 'list', 'formatting', False), 201 | ('<', 'formatting', 'list', 'formatting', False), 202 | ('>', 'string', '\n', 'value', False), 203 | ('<', 'string', '\n', 'value', False), 204 | ('>', 'string', ' ', 'indent', False), 205 | ('<', 'string', ' ', 'indent', False), 206 | ('<', 'node', 'endl', 0, False), 207 | ('>', 'node', 'pass', 1, False), 208 | ('>', 'string', 'pass', 'type', False), 209 | ('<', 'string', 'pass', 'type', False), 210 | ('<', 'node', 'pass', 1, False), 211 | ('>', 'node', 'endl', 2, False), 212 | ('>', 'formatting', 'list', 'formatting', False), 213 | ('<', 'formatting', 'list', 'formatting', False), 214 | ('>', 'string', '\n', 'value', False), 215 | ('<', 'string', '\n', 'value', False), 216 | ('>', 'string', '', 'indent', False), 217 | ('<', 'string', '', 'indent', False), 218 | ('<', 'node', 'endl', 2, False), 219 | ('<', 'list', 'list', 'value', False), 220 | ('<', 'node', 'def', 1, False), 221 | ]) 222 | 223 | walker.walk(node) 224 | --------------------------------------------------------------------------------