├── tests ├── __init__.py ├── antlr_py │ ├── Expr.tokens │ ├── ExprLexer.tokens │ ├── __init__.py │ ├── ExprVisitor.py │ ├── ExprLexer.py │ └── ExprParser.py ├── Expr.g4 ├── test_visitor.py ├── test_ast_node.py ├── json │ └── test_marshalling │ │ └── test_marshalling.json ├── test_marshalling.py ├── test_base_node_registry.py └── test_expr_ast.py ├── MANIFEST.in ├── antlr_ast ├── __init__.py ├── inputstream.py ├── marshalling.py └── ast.py ├── requirements.txt ├── .travis.yml ├── CHANGELOG.md ├── setup.py ├── README.md └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from . import antlr_py as grammar 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include requirements.txt 3 | -------------------------------------------------------------------------------- /antlr_ast/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.8.1" 2 | 3 | from . import ast 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | antlr4-python3-runtime~=4.7.2 2 | 3 | # test deps 4 | pytest~=3.7.4 5 | codecov~=2.0.15 6 | pytest-cov~=2.5.1 7 | -------------------------------------------------------------------------------- /tests/antlr_py/Expr.tokens: -------------------------------------------------------------------------------- 1 | T__0=1 2 | T__1=2 3 | T__2=3 4 | T__3=4 5 | INT=5 6 | NOT=6 7 | WS=7 8 | '+'=1 9 | '-'=2 10 | '('=3 11 | ')'=4 12 | 'not'=6 13 | -------------------------------------------------------------------------------- /tests/antlr_py/ExprLexer.tokens: -------------------------------------------------------------------------------- 1 | T__0=1 2 | T__1=2 3 | T__2=3 4 | T__3=4 5 | INT=5 6 | NOT=6 7 | WS=7 8 | '+'=1 9 | '-'=2 10 | '('=3 11 | ')'=4 12 | 'not'=6 13 | -------------------------------------------------------------------------------- /tests/antlr_py/__init__.py: -------------------------------------------------------------------------------- 1 | from .ExprLexer import ExprLexer as Lexer 2 | from .ExprParser import ExprParser as Parser 3 | from .ExprVisitor import ExprVisitor as Visitor 4 | -------------------------------------------------------------------------------- /tests/Expr.g4: -------------------------------------------------------------------------------- 1 | grammar Expr; 2 | 3 | expr: left=expr op=('+'|'-') right=expr #BinaryExpr 4 | | NOT expr #NotExpr 5 | | INT #Integer 6 | | '(' expr ')' #SubExpr 7 | ; 8 | 9 | INT : [0-9]+ ; // match integers 10 | NOT : 'not' ; 11 | 12 | WS : [ \t]+ -> skip ; // toss out whitespace 13 | -------------------------------------------------------------------------------- /tests/test_visitor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from antlr_ast.ast import get_transformer_method_name 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "text, result", 8 | [ 9 | ("Test", "visit_Test"), 10 | ("teSt", "visit_TeSt"), 11 | ("Test_method", "visit_Test_method"), 12 | ("test_method", "visit_Test_method"), 13 | ("Test_Method", "visit_Test_Method"), 14 | ], 15 | ) 16 | def test_upper_first(text, result): 17 | assert get_transformer_method_name(text) == result 18 | -------------------------------------------------------------------------------- /tests/test_ast_node.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from antlr_ast.ast import AliasNode, parse_field_spec 4 | 5 | 6 | def test_double_field(): 7 | class Test(AliasNode): 8 | _fields_spec = ["x=a", "x=b"] 9 | 10 | assert Test._fields == ("x",) 11 | 12 | 13 | @pytest.mark.parametrize("field_spec_str, field_spec", [ 14 | ("x", ("x", ["x"])), 15 | ("x=a", ("x", ["a"])), 16 | ("x=a.b", ("x", ["a", "b"])), 17 | ("x = a", ("x", ["a"])), 18 | ("x= a ", ("x", ["a"])), 19 | ]) 20 | def test_spec_parse(field_spec_str, field_spec): 21 | spec = parse_field_spec(field_spec_str) 22 | 23 | assert spec.name == field_spec[0] 24 | assert spec.origin == field_spec[1] 25 | assert spec == field_spec 26 | -------------------------------------------------------------------------------- /tests/json/test_marshalling/test_marshalling.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "NotExpr", 3 | "@fields": [ 4 | "op", 5 | "expr" 6 | ], 7 | "@position": { 8 | "line_start": 1, 9 | "column_start": 0, 10 | "line_end": 1, 11 | "column_end": 4 12 | }, 13 | "@text": "not2", 14 | "field_references": { 15 | "NOT": 0, 16 | "expr": 1 17 | }, 18 | "label_references": {}, 19 | "children": [ 20 | "not", 21 | { 22 | "@type": "Integer", 23 | "@fields": [ 24 | "INT" 25 | ], 26 | "@position": { 27 | "line_start": 1, 28 | "column_start": 4, 29 | "line_end": 1, 30 | "column_end": 4 31 | }, 32 | "@text": "2", 33 | "field_references": { 34 | "INT": 0 35 | }, 36 | "label_references": {}, 37 | "children": [ 38 | "2" 39 | ] 40 | } 41 | ] 42 | } 43 | -------------------------------------------------------------------------------- /tests/test_marshalling.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import pytest 3 | 4 | from json import dumps, JSONDecoder, load, loads 5 | from antlr_ast.marshalling import AstEncoder, get_decoder 6 | 7 | 8 | @pytest.fixture 9 | def ast(): 10 | return importlib.import_module("tests.test_expr_ast") 11 | 12 | 13 | def test_marshalling(ast): 14 | # Given 15 | code = "not 2" 16 | correct_json = load(open("tests/json/test_marshalling/test_marshalling.json")) 17 | 18 | # When 19 | ast_tree = ast.parse(code) 20 | json_tree = dumps(ast_tree, cls=AstEncoder) 21 | 22 | # Then 23 | assert loads(json_tree) == correct_json 24 | ast_tree = JSONDecoder(object_hook=get_decoder()).decode(json_tree) 25 | assert isinstance(ast_tree, ast.AstNode) 26 | assert isinstance(ast_tree.children_by_field["NOT"], str) 27 | assert isinstance(ast_tree.NOT, str) 28 | assert isinstance(ast_tree.children_by_field["expr"], ast.AstNode) 29 | assert isinstance(ast_tree.expr, ast.AstNode) 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.5' 4 | install: 5 | - pip install -r requirements.txt 6 | - pip install . 7 | script: py.test --cov=antlr_ast 8 | after_success: codecov 9 | deploy: 10 | provider: pypi 11 | user: datacamp 12 | password: 13 | secure: XgR1FFi6xknnCmDfvtcTTlyiIINk30mi33td364W3uVGKw/fAc5J01tTwOwbauuYtSyWPCp6GzX4sMTl40m6wSB/dbCzZxb7PxDxtfQyBBXeBBIOuJeRfo5nm+u1M1X33qmT9aidcCAv23TW3NakjnCIeDiCn1Ga3xhiwP/11zfnfQfEZlMgoXi6XTZmqAQj5OnbFF1CYEcO5LSipfx+sdqndYwMGR3iVxwrApExlY96XXMv9MSZXiSPcSk+mpgbdElwRaHw2PN43o2G9R6IMsuTRFijA0TnKlmA73jsd+tP5oAVRngLtZnzAhe2YwlGdmojnd0PROOyd1moBzq0hU/WJYm1fM1ipBG/iVSRgV3CDljMvItx5B9TMjeKcOhisAx/SMw0XNdB3DsqyaMVhUB6fGNlSj0AlrGEULggD8xi+gsK4SRf4F2hfADZR8vLyIx1rrTnXaINqhEG4Vu64g0PyodhXVqiAwcKxDq2Y4ywmilHGh6f3R708hBeNKa87a6vcCB/DqevH+7X2a8bfWU+uDQLFb5yM6rtopeWYovvB7lLZI2zb/dVXr/QhUZryIafs3Vb+/5lawmtcgGhU8Fk6r/Q8vFTXwBiE//uOQrURiUVhF7YUgFeXwOwpvqwtVJmPMyirteo2DUTsxtnuRT8b9McSJC4wUx0wi+eHKo= 14 | on: 15 | tags: true 16 | distributions: sdist bdist_wheel 17 | repo: datacamp/antlr-ast 18 | skip_upload_docs: true 19 | -------------------------------------------------------------------------------- /tests/antlr_py/ExprVisitor.py: -------------------------------------------------------------------------------- 1 | # Generated from tests/Expr.g4 by ANTLR 4.7 2 | from antlr4 import * 3 | if __name__ is not None and "." in __name__: 4 | from .ExprParser import ExprParser 5 | else: 6 | from ExprParser import ExprParser 7 | 8 | # This class defines a complete generic visitor for a parse tree produced by ExprParser. 9 | 10 | class ExprVisitor(ParseTreeVisitor): 11 | 12 | # Visit a parse tree produced by ExprParser#Integer. 13 | def visitInteger(self, ctx:ExprParser.IntegerContext): 14 | return self.visitChildren(ctx) 15 | 16 | 17 | # Visit a parse tree produced by ExprParser#SubExpr. 18 | def visitSubExpr(self, ctx:ExprParser.SubExprContext): 19 | return self.visitChildren(ctx) 20 | 21 | 22 | # Visit a parse tree produced by ExprParser#BinaryExpr. 23 | def visitBinaryExpr(self, ctx:ExprParser.BinaryExprContext): 24 | return self.visitChildren(ctx) 25 | 26 | 27 | # Visit a parse tree produced by ExprParser#NotExpr. 28 | def visitNotExpr(self, ctx:ExprParser.NotExprContext): 29 | return self.visitChildren(ctx) 30 | 31 | 32 | 33 | del ExprParser -------------------------------------------------------------------------------- /antlr_ast/inputstream.py: -------------------------------------------------------------------------------- 1 | from operator import methodcaller 2 | 3 | from antlr4 import InputStream 4 | 5 | 6 | class CaseTransformInputStream(InputStream): 7 | """Support case insensitive languages 8 | https://github.com/antlr/antlr4/blob/master/doc/case-insensitive-lexing.md#custom-character-streams-approach 9 | """ 10 | UPPER = "upper" 11 | LOWER = "lower" 12 | 13 | def __init__(self, *args, transform=None, **kwargs): 14 | if transform is None: 15 | self.transform = lambda x: x 16 | elif transform == self.UPPER: 17 | self.transform = methodcaller("upper") 18 | elif transform == self.LOWER: 19 | self.transform = methodcaller("lower") 20 | elif callable(transform): 21 | self.transform = transform 22 | else: 23 | raise ValueError("Invalid transform") 24 | 25 | super().__init__(*args, **kwargs) 26 | 27 | def _loadString(self): 28 | self._index = 0 29 | 30 | self.data = [ord(self.transform(c)) for c in self.strdata] 31 | self._size = len(self.data) 32 | 33 | def __repr__(self): 34 | return "<{} {}>".format(self.__class__.__name__, self.transform) 35 | -------------------------------------------------------------------------------- /tests/test_base_node_registry.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from antlr_ast.ast import BaseNodeRegistry 4 | 5 | 6 | def test_base_node_registry_get_cls(): 7 | # Given 8 | base_node_registry = BaseNodeRegistry() 9 | 10 | # When 11 | cls1 = base_node_registry.get_cls("cls1", ("field1",)) 12 | cls1_2 = base_node_registry.get_cls("cls1", ("field1", "field2")) 13 | 14 | # Then 15 | assert set(cls1._fields) == {"field1", "field2"} 16 | assert set(cls1_2._fields) == {"field1", "field2"} 17 | 18 | 19 | def test_base_node_registry_isinstance(): 20 | # Given 21 | base_node_registry = BaseNodeRegistry() 22 | 23 | # When 24 | Cls1 = base_node_registry.get_cls("cls1", ("field1",)) 25 | Cls1_2 = base_node_registry.get_cls("cls1", ("field1", "field2")) 26 | Cls2 = base_node_registry.get_cls("cls2", ("field_a", "field_b")) 27 | 28 | cls1_obj = Cls1([], [], []) 29 | cls1_2_obj = Cls1_2([], [], []) 30 | cls2_obj = Cls2([], [], []) 31 | 32 | # Then 33 | assert isinstance(cls1_obj, type(cls1_2_obj)) 34 | assert base_node_registry.isinstance(cls1_obj, "cls1") 35 | assert base_node_registry.isinstance(cls1_2_obj, "cls1") 36 | assert base_node_registry.isinstance(cls2_obj, "cls2") 37 | assert not base_node_registry.isinstance(cls1_obj, "cls2") 38 | assert not base_node_registry.isinstance(cls1_2_obj, "cls2") 39 | assert not base_node_registry.isinstance(cls2_obj, "cls1") 40 | 41 | assert not base_node_registry.isinstance(cls2_obj, "cls3") 42 | 43 | with pytest.raises( 44 | TypeError, match="This function can only be used for BaseNode objects" 45 | ): 46 | base_node_registry.isinstance([], "cls1") 47 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## v0.8.1 6 | 7 | - Fix get_cls in BaseNodeRegistry, now updates fields of classes already in the registry 8 | 9 | ## v0.8.0 10 | 11 | - Support for Terminal nodes in `get_text` 12 | - Add optional text attribute to BaseNode 13 | - Fix marshalling to recursively transform children 14 | 15 | ## v0.7.0 16 | 17 | - Return `None` if `get_position` has no better result 18 | - Add type signatures 19 | - Improve lexer errors 20 | 21 | ## v0.6.0 22 | 23 | - Support `get_position` for Terminal nodes 24 | 25 | ## v0.5.0 26 | 27 | - Rewrite as a staged approach 28 | - autodetection of ANTLR fields and labels: Unshaped doesn’t exist anymore 29 | - more isolated steps: enables serialization (and possibility to let ANTLR parser run in a separate service) 30 | - more powerful definition of reshaped node (= AliasNode) fields using tree paths 31 | - easier definition of transforms (simplification & AliasNodes) (no ANTLR API knowledge needed, no other visiting in transformation methods) 32 | - more shared code 33 | 34 | ## v0.4.2 35 | 36 | ### Added 37 | 38 | - Add parameter to `parse` function to set a custom error listener (or remove the default listener) 39 | 40 | ## v0.4.1 41 | 42 | - Fix setup.py 43 | 44 | ## v0.4.0 45 | 46 | ## Changed 47 | 48 | - Better package structure 49 | 50 | ## v0.3.0 51 | 52 | ### Added 53 | 54 | - Helper to handle case sensitivity during lexing of ANTLR grammar 55 | 56 | ### Changed 57 | 58 | - The fields for AstNode subclasses are now defined in `_fields_spec` instead of `_fields` so `_fields` is now compatible with how the `ast` module defines it. 59 | - `parse()` doesn't accept a visitor but returns the parsed input. 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import re 4 | import ast 5 | from os import path 6 | from setuptools import setup 7 | 8 | PACKAGE_NAME = "antlr_ast" 9 | REQUIREMENT_NAMES = ["antlr4-python3-runtime"] 10 | 11 | HERE = path.abspath(path.dirname(__file__)) 12 | VERSION_FILE = path.join(HERE, PACKAGE_NAME, "__init__.py") 13 | REQUIREMENTS_FILE = path.join(HERE, "requirements.txt") 14 | README_FILE = path.join(HERE, "README.md") 15 | 16 | with open(VERSION_FILE, encoding="utf-8") as fp: 17 | _version_re = re.compile(r"__version__\s+=\s+(.*)") 18 | VERSION = str(ast.literal_eval(_version_re.search(fp.read()).group(1))) 19 | with open(REQUIREMENTS_FILE, encoding="utf-8") as fp: 20 | req_txt = fp.read() 21 | _requirements_re_template = r"^({}(?:\s*[~<>=]+\s*\S*)?)\s*(?:#.*)?$" 22 | REQUIREMENTS = [ 23 | re.search(_requirements_re_template.format(requirement), req_txt, re.M).group(0) 24 | for requirement in REQUIREMENT_NAMES 25 | ] 26 | with open(README_FILE, encoding="utf-8") as fp: 27 | README = fp.read() 28 | 29 | setup( 30 | name=PACKAGE_NAME.replace("_", "-"), 31 | version=VERSION, 32 | packages=[PACKAGE_NAME], 33 | install_requires=REQUIREMENTS, 34 | description="AST shaping for antlr parsers", 35 | long_description=README, 36 | long_description_content_type="text/markdown", 37 | author="Michael Chow", 38 | author_email="michael@datacamp.com", 39 | maintainer="Jeroen Hermans", 40 | maintainer_email="content-engineering@datacamp.com", 41 | url="https://github.com/datacamp/antlr-ast", 42 | classifiers=[ 43 | "Programming Language :: Python :: 3", 44 | "License :: OSI Approved :: GNU Affero General Public License v3", 45 | "Operating System :: OS Independent", 46 | ], 47 | ) 48 | -------------------------------------------------------------------------------- /antlr_ast/marshalling.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from json import JSONEncoder, JSONDecoder 3 | 4 | from antlr_ast.ast import BaseNode, BaseNodeRegistry, Terminal 5 | 6 | 7 | class AstEncoder(JSONEncoder): 8 | """JSON encoder for BaseNodes""" 9 | 10 | def default(self, o): 11 | if isinstance(o, Terminal): 12 | encoded = str(o) 13 | elif isinstance(o, BaseNode): 14 | encoded = { 15 | "@type": o.__class__.__name__, 16 | "@fields": o._fields, 17 | "@position": o.get_position(), 18 | "@text": o.get_text(), 19 | "field_references": o._field_references, 20 | "label_references": o._label_references, 21 | "children": o.children, 22 | } 23 | else: 24 | encoded = o 25 | return encoded 26 | 27 | 28 | def decode_ast(registry, ast_json): 29 | """JSON decoder for BaseNodes""" 30 | if ast_json.get("@type"): 31 | subclass = registry.get_cls(ast_json["@type"], tuple(ast_json["@fields"])) 32 | children = [ 33 | decode_ast(registry, child) if isinstance(child, dict) else child 34 | for child in ast_json["children"] 35 | ] 36 | return subclass( 37 | children, 38 | ast_json["field_references"], 39 | ast_json["label_references"], 40 | position=ast_json.get("@position", None), 41 | text=ast_json.get("@text", None), 42 | ) 43 | else: 44 | return ast_json 45 | 46 | 47 | def get_decoder(registry=None): 48 | """Get a JSON decoding hook that shares a dynamic node registry between decoding calls""" 49 | if registry is None: 50 | registry = BaseNodeRegistry() 51 | return partial(decode_ast, registry) 52 | -------------------------------------------------------------------------------- /tests/antlr_py/ExprLexer.py: -------------------------------------------------------------------------------- 1 | # Generated from tests/Expr.g4 by ANTLR 4.7 2 | from antlr4 import * 3 | from io import StringIO 4 | from typing.io import TextIO 5 | import sys 6 | 7 | 8 | def serializedATN(): 9 | with StringIO() as buf: 10 | buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\t") 11 | buf.write(")\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7") 12 | buf.write("\4\b\t\b\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3\5\3\6\6\6\33\n") 13 | buf.write("\6\r\6\16\6\34\3\7\3\7\3\7\3\7\3\b\6\b$\n\b\r\b\16\b%") 14 | buf.write("\3\b\3\b\2\2\t\3\3\5\4\7\5\t\6\13\7\r\b\17\t\3\2\4\3\2") 15 | buf.write("\62;\4\2\13\13\"\"\2*\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2") 16 | buf.write("\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2") 17 | buf.write("\3\21\3\2\2\2\5\23\3\2\2\2\7\25\3\2\2\2\t\27\3\2\2\2\13") 18 | buf.write("\32\3\2\2\2\r\36\3\2\2\2\17#\3\2\2\2\21\22\7-\2\2\22\4") 19 | buf.write("\3\2\2\2\23\24\7/\2\2\24\6\3\2\2\2\25\26\7*\2\2\26\b\3") 20 | buf.write("\2\2\2\27\30\7+\2\2\30\n\3\2\2\2\31\33\t\2\2\2\32\31\3") 21 | buf.write("\2\2\2\33\34\3\2\2\2\34\32\3\2\2\2\34\35\3\2\2\2\35\f") 22 | buf.write("\3\2\2\2\36\37\7p\2\2\37 \7q\2\2 !\7v\2\2!\16\3\2\2\2") 23 | buf.write("\"$\t\3\2\2#\"\3\2\2\2$%\3\2\2\2%#\3\2\2\2%&\3\2\2\2&") 24 | buf.write("\'\3\2\2\2\'(\b\b\2\2(\20\3\2\2\2\5\2\34%\3\b\2\2") 25 | return buf.getvalue() 26 | 27 | 28 | class ExprLexer(Lexer): 29 | 30 | atn = ATNDeserializer().deserialize(serializedATN()) 31 | 32 | decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] 33 | 34 | T__0 = 1 35 | T__1 = 2 36 | T__2 = 3 37 | T__3 = 4 38 | INT = 5 39 | NOT = 6 40 | WS = 7 41 | 42 | channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ] 43 | 44 | modeNames = [ "DEFAULT_MODE" ] 45 | 46 | literalNames = [ "", 47 | "'+'", "'-'", "'('", "')'", "'not'" ] 48 | 49 | symbolicNames = [ "", 50 | "INT", "NOT", "WS" ] 51 | 52 | ruleNames = [ "T__0", "T__1", "T__2", "T__3", "INT", "NOT", "WS" ] 53 | 54 | grammarFileName = "Expr.g4" 55 | 56 | def __init__(self, input=None, output:TextIO = sys.stdout): 57 | super().__init__(input, output) 58 | self.checkVersion("4.7") 59 | self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) 60 | self._actions = None 61 | self._predicates = None 62 | 63 | 64 | -------------------------------------------------------------------------------- /tests/test_expr_ast.py: -------------------------------------------------------------------------------- 1 | from antlr_ast.ast import ( 2 | AliasNode, 3 | BaseNode as AstNode, # used in other tests 4 | parse as parse_ast, 5 | process_tree, 6 | BaseNodeTransformer, 7 | Terminal, 8 | ) 9 | from antlr_ast.inputstream import CaseTransformInputStream 10 | from antlr_ast.marshalling import AstEncoder, get_decoder 11 | 12 | from . import grammar 13 | import json 14 | 15 | 16 | class SubExpr(AliasNode): 17 | _fields_spec = ["expression=expr"] 18 | 19 | 20 | class BinaryExpr(AliasNode): 21 | _fields_spec = ["left", "right", "op"] 22 | 23 | 24 | class NotExpr(AliasNode): 25 | _fields_spec = ["op=NOT", "expr"] 26 | 27 | 28 | class Transformer(BaseNodeTransformer): 29 | def visit_BinaryExpr(self, node): 30 | return BinaryExpr.from_spec(node) 31 | 32 | def visit_SubExpr(self, node): 33 | return SubExpr.from_spec(node) 34 | 35 | def visit_NotExpr(self, node): 36 | return NotExpr.from_spec(node) 37 | 38 | 39 | def parse(text, start="expr", **kwargs): 40 | antlr_tree = parse_ast( 41 | grammar, text, start, transform=CaseTransformInputStream.LOWER, **kwargs 42 | ) 43 | simple_tree = process_tree(antlr_tree, transformer_cls=Transformer) 44 | 45 | return simple_tree 46 | 47 | 48 | def test_binary(): 49 | node = parse("1 + 2") 50 | assert isinstance(node, BinaryExpr) 51 | assert node.left == "1" 52 | assert node.right == "2" 53 | assert node.op == "+" 54 | 55 | 56 | def test_not(): 57 | node = parse("not 2") 58 | assert isinstance(node, NotExpr) 59 | assert node.expr == "2" 60 | 61 | 62 | def test_subexpr(): 63 | node = parse("(1 + 1)") 64 | assert isinstance(node, SubExpr) 65 | assert isinstance(node.expression, BinaryExpr) 66 | assert isinstance(node.expression.left, Terminal) 67 | 68 | 69 | def test_fields(): 70 | assert NotExpr._fields == ("op", "expr") 71 | not_expr = parse("not 2") 72 | assert not_expr._fields == ("op", "expr") 73 | 74 | assert SubExpr._fields == ("expression",) 75 | sub_expr = parse("(1 + 1)") 76 | assert sub_expr._fields == ("expression",) 77 | 78 | 79 | # Speaker --------------------------------------------------------------------- 80 | 81 | from antlr_ast.ast import Speaker 82 | 83 | 84 | def test_speaker_default(): 85 | speaker = Speaker( 86 | nodes={"BinaryExpr": "binary expression"}, fields={"left": "left part"} 87 | ) 88 | 89 | node = parse("1 + 1") 90 | str_tmp = "The {field_name} of the {node_name}" 91 | 92 | assert speaker.describe(node, str_tmp, "left") == str_tmp.format( 93 | field_name="left part", node_name="binary expression" 94 | ) 95 | 96 | 97 | def test_speaker_node_cfg(): 98 | node_cnfg = {"name": "binary expression", "fields": {"left": "left part"}} 99 | 100 | speaker = Speaker( 101 | nodes={"BinaryExpr": node_cnfg}, fields={"left": "should not occur!"} 102 | ) 103 | 104 | node = parse("1 + 1") 105 | str_tmp = "The {field_name} of the {node_name}" 106 | 107 | assert speaker.describe(node, str_tmp, "left") == str_tmp.format( 108 | field_name="left part", node_name="binary expression" 109 | ) 110 | 111 | 112 | # BaseNode.get_position ------------------------------------------------------- 113 | 114 | 115 | def test_get_position(): 116 | # Given 117 | code = "1 + (2 + 2)" 118 | correct_position = { 119 | "line_start": 1, 120 | "column_start": 4, 121 | "line_end": 1, 122 | "column_end": 10, 123 | } 124 | 125 | # When 126 | result = parse(code) 127 | position = result.right.get_position() 128 | 129 | # Then 130 | assert len(position) == len(correct_position) 131 | for item in correct_position.items(): 132 | assert item in position.items() 133 | 134 | 135 | def test_terminal_get_position(): 136 | # Given 137 | code = "(2 + 2) + 1" 138 | correct_position = { 139 | "line_start": 1, 140 | "column_start": 10, 141 | "line_end": 1, 142 | "column_end": 10, 143 | } 144 | 145 | # When 146 | result = parse(code) 147 | position = result.right.get_position() 148 | 149 | # Then 150 | assert len(position) == len(correct_position) 151 | for item in correct_position.items(): 152 | assert item in position.items() 153 | 154 | 155 | def test_terminal_get_text_input_stream(): 156 | # Given 157 | code = "(2 + 2) + 894654" 158 | 159 | # When 160 | result = parse(code) 161 | text = result.get_text() 162 | text_right = result.right.get_text() 163 | text_left = result.left.get_text() 164 | text_left_expr_left = result.left.expr.left.get_text() 165 | 166 | # Then 167 | assert text == "(2+2)+894654" 168 | assert text_right == "894654" 169 | assert text_left == "(2+2)" 170 | assert text_left_expr_left == "2" 171 | 172 | 173 | def test_terminal_get_text_from_position(): 174 | # Given 175 | code = "(2 + 2) + 894654" 176 | 177 | # When 178 | result = parse(code) 179 | text = result.get_text(code) 180 | text_right = result.right.get_text(code) 181 | text_left = result.left.get_text(code) 182 | text_left_expr_left = result.left.expr.left.get_text(code) 183 | 184 | # Then 185 | assert text == "(2 + 2) + 894654" 186 | assert text_right == "894654" 187 | assert text_left == "(2 + 2)" 188 | assert text_left_expr_left == "2" 189 | 190 | 191 | def test_text_none_and_self_text(): 192 | # Given 193 | code = "not 2" 194 | ast_tree = parse(code) 195 | json_tree = json.dumps(ast_tree, cls=AstEncoder) 196 | from_json_ast_tree = json.JSONDecoder(object_hook=get_decoder()).decode(json_tree) 197 | 198 | # When 199 | text = from_json_ast_tree.get_text() 200 | 201 | # Then 202 | assert text == "not2" 203 | 204 | 205 | def test_no_position(): 206 | # Given 207 | code = "!" 208 | 209 | # When 210 | result = parse(code) 211 | position = result.get_position() 212 | 213 | assert position is None 214 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # antlr-ast 2 | 3 | [![Build Status](https://travis-ci.org/datacamp/antlr-ast.svg?branch=master)](https://travis-ci.org/datacamp/antlr-ast) 4 | [![codecov](https://codecov.io/gh/datacamp/antlr-ast/branch/master/graph/badge.svg)](https://codecov.io/gh/datacamp/antlr-ast) 5 | 6 | This package allows you to use ANTLR grammars and use the parser output to generate an abstract syntax tree (AST). 7 | 8 | ## Install 9 | 10 | ```bash 11 | pip install antlr-ast 12 | ``` 13 | 14 | **Note:** this package is not python2 compatible. 15 | 16 | ## Running Tests 17 | 18 | ```bash 19 | # may need: 20 | # pip install pytest 21 | py.test 22 | ``` 23 | 24 | ## Usage 25 | 26 | Using `antlr-ast` involves four steps: 27 | 28 | 1. Using ANTLR to define a grammar and to generate the necessary Python files to parse this grammar 29 | 2. Using `parse` to get the ANTLR runtime output based on the generated grammar files 30 | 3. Using `process_tree` on the output of the previous step 31 | 1. A `BaseAstVisitor` (customisable by providing a subclass) transforms the ANTLR output to a serializable tree of `BaseNode`s, 32 | dynamically created based on the rules in the ANTLR grammar 33 | 2. A `BaseNodeTransformer` subclass can be used to transform each kind of node 34 | 3. The simplify option can be used to shorten paths in the tree by skipping nodes that only have a single descendant 35 | 4. Using the resulting tree 36 | 37 | The next sections go into more detail about these steps. 38 | 39 | To visualize the process of creating and transforrming these parse trees, you can use [this ast-viewer](https://github.com/datacamp/ast-viewer). 40 | 41 | ### Using ANTLR 42 | 43 | **Note: For this part of this tutorial you need to know how to parse code** 44 | See the ANTLR [getting started guide](https://github.com/antlr/antlr4/blob/4.7.2/doc/getting-started.md) if you have never installed ANTLR. 45 | The [ANTLR Mega Tutorial](https://tomassetti.me/antlr-mega-tutorial/#python-setup) has useful Python examples. 46 | 47 | [This page explains how to write ANTLR parser rules](https://github.com/antlr/antlr4/blob/master/doc/parser-rules.md). 48 | The rule definition below is an example with descriptive names for important ANTLR parser grammar elements: 49 | 50 | ```g4 51 | rule_name: rule_element? rule_element_label='literal' #RuleAlternativeLabel 52 | | TOKEN+ #RuleAlternativeLabel 53 | ; 54 | ``` 55 | 56 | Rule element and alternative labels are optional. 57 | `+`, `*`, `?`, `|` and `()` have the same meaning as in RegEx. 58 | 59 | Below, we'll use a simple grammar to explain how `antlr-ast` works. 60 | This grammar can be found in `/tests/Expr.g4`. 61 | 62 | ```g4 63 | grammar Expr; 64 | 65 | // parser 66 | 67 | expr: left=expr op=('+'|'-') right=expr #BinaryExpr 68 | | NOT expr #NotExpr 69 | | INT #Integer 70 | | '(' expr ')' #SubExpr 71 | ; 72 | 73 | // lexer 74 | 75 | INT : [0-9]+ ; // match integers 76 | NOT : 'not' ; 77 | 78 | WS : [ \t]+ -> skip ; // toss out whitespace 79 | ``` 80 | 81 | ANTLR can use the grammar above to generate a parser in a number of languages. 82 | To generate a Python parser, you can use the following command. 83 | 84 | ```bash 85 | antlr4 -Dlanguage=Python3 -visitor /tests/Expr.g4 86 | ``` 87 | 88 | This will generate a number of files in the `/tests/` directory, including a Lexer (`ExprLexer.py`), 89 | a parser (`ExprParser.py`), and a visitor (`ExprVisitor.py`). 90 | 91 | You can use and import these directly in Python. For example, from the root of this repo: 92 | 93 | ```bash 94 | from tests import ExprVisitor 95 | ``` 96 | 97 | To easily use the generated files, they are put in the `antlr_py` package. 98 | The `__init__.py` file exports the generated files under an alias that doesn't include the name of the grammar. 99 | 100 | ### Base nodes 101 | 102 | A `BaseNode` subclass has fields for all rule elements and labels for all rule element labels in its corresponding grammar rule. 103 | Both fields and labels are available as properties on `BaseNode` instances. 104 | Labels take precedence over fields if the names would collide. 105 | 106 | The name of a `BaseNode` is the name of the corresponding ANTLR grammar rule, but starting with an uppercase character. 107 | If rule alternative labels are specified for an ANTLR rule, these are used instead of the rule name. 108 | 109 | ### Transforming nodes 110 | 111 | Typically, there is no 1-to-1 mapping between ANTLR rules and the concepts of a language: the rule hierarchy is more nested. 112 | Transformations can be used to make the initial tree of BaseNodes based on ANTLR rules more similar to an AST. 113 | 114 | #### Transformer 115 | 116 | The `BaseNodeTransformer` will walk over the tree from the root node to the leaf nodes. 117 | When visiting a node, it is possible to transform it. 118 | The tree is updated with transformed node before continuing the walk over the tree. 119 | 120 | To define a node transform, add a static method to the `BaseNodeTransformer` subclass passed to `process_tree`. 121 | 122 | - The name of the method you should define follows this pattern: `visit_`, 123 | where `` should be replaced by the name of the `BaseNode` subclass to transform. 124 | - The method should return the transformed node. 125 | 126 | This is a simple example: 127 | 128 | ```python 129 | class Transformer(BaseNodeTransformer): 130 | @staticmethod 131 | def visit_My_antlr_rule(node): 132 | return node.name_of_part 133 | ``` 134 | 135 | #### Custom nodes 136 | 137 | A custom node can represent a part of the parsed language, a type of node present in an AST. 138 | 139 | To make it easy to return a custom node, you can define `AliasNode` subclasses. 140 | Normally, fields of `AliasNode`s are like symlinks to navigate the tree of `BaseNode`s. 141 | 142 | Instances of custom nodes are created from a `BaseNode`. 143 | Fields and labels of the source `BaseNode` are also available on the `AliasNode`. 144 | If an `AliasNode` field name collides with these, it takes precedence when accessing that property. 145 | 146 | This is what a custom node looks like: 147 | 148 | ```python 149 | class NotExpr(AliasNode): 150 | _fields_spec = ["expr", "op=NOT"] 151 | ``` 152 | 153 | This code defines a custom node, `NotExpr` with an `expr` and an `op` field. 154 | 155 | ##### Field specs 156 | 157 | The `_fields_spec` class property is a list that defines the fields the custom node should have. 158 | 159 | This is how a field spec in this list is used when creating an custom node from a `BaseNode` (the source node): 160 | 161 | - If a field spec does not exist on the source node, it is set to `None` 162 | - If multiple field specs define the same field, the first one that isn't `None` is used 163 | - If a field spec is just a name, it is copied from the source node 164 | - If a field spec is an assignment, the left side is the name of the field on the `AliasNode` 165 | and the right side is the path that should be taken starting in the source node to get the node 166 | that should be the value for the field on the custom node. 167 | Parts of this path are separated using `.` 168 | 169 | ##### Connecting to the transformer 170 | 171 | To use this custom node, add a method to the transformer: 172 | 173 | ```python 174 | class Transformer(BaseNodeTransformer): 175 | # ... 176 | 177 | # here the BaseNode name is the same as the custom node name 178 | # but that isn't required 179 | @staticmethod 180 | def visit_NotExpr(node): 181 | return NotExpr.from_spec(node) 182 | ``` 183 | 184 | Instead of defining methods on the transformer class to use custom nodes, it's possible to do this automatically: 185 | 186 | ```python 187 | Transformer.bind_alias_nodes(alias_nodes) 188 | ``` 189 | 190 | To make this work, the `AliasNode` classes in the list should have a `_rules` class property 191 | with a list of the `BaseNode` names it should transform. 192 | 193 | This is the result: 194 | 195 | ```python 196 | class NotExpr(AliasNode): 197 | _fields_spec = ["expr", "op=NOT"] 198 | _rules = ["NotExpr"] 199 | 200 | class Transformer(BaseNodeTransformer): 201 | pass 202 | 203 | alias_nodes = [NotExpr] 204 | Transformer.bind_alias_nodes(alias_nodes) 205 | ``` 206 | 207 | An item in `_rules` can also be a tuple. 208 | In that case, the first item in the tuple is a `BaseNode` name 209 | and the second item is the name of a class method of the custom node. 210 | 211 | It's not useful in the example above, but it is equivalent to this: 212 | 213 | ```python 214 | class NotExpr(AliasNode): 215 | _fields_spec = ["expr", "op=NOT"] 216 | _rules = [("NotExpr", "from_not")] 217 | 218 | @classmethod 219 | def from_not(cls, node): 220 | return cls.from_spec(node) 221 | 222 | class Transformer(BaseNodeTransformer): 223 | pass 224 | 225 | alias_nodes = [NotExpr] 226 | Transformer.bind_alias_nodes(alias_nodes) 227 | ``` 228 | 229 | ### Using the final tree 230 | 231 | It's easy to use a tree that has a mix of `AliasNode`s and dynamic `BaseNode`s: 232 | the whole tree is just a nested Python object. 233 | 234 | When searching nodes in a tree, the priority of nodes can be taken into account. 235 | By default, `BaseNode`s have priority 3 and `AliasNode`s have priority 2. 236 | 237 | When writing code to work with trees, it can be affected by changes in the grammar, the transforms and the custom nodes. 238 | The grammar is the most likely to change. 239 | 240 | To make grammar updates have no impact on your code, don't rely on `BaseNode`s. 241 | You can still check whether the `AliasNode` parent node of a `BaseNode` has the correct fields set 242 | and search for nested `AliasNode`s in a subtree. 243 | 244 | If you do rely on `BaseNode`s, code could break by the addition of `AliasNode`s that replace some of these 245 | if a field name collides with a field name on a used `BaseNode`. 246 | -------------------------------------------------------------------------------- /tests/antlr_py/ExprParser.py: -------------------------------------------------------------------------------- 1 | # Generated from tests/Expr.g4 by ANTLR 4.7 2 | # encoding: utf-8 3 | from antlr4 import * 4 | from io import StringIO 5 | from typing.io import TextIO 6 | import sys 7 | 8 | def serializedATN(): 9 | with StringIO() as buf: 10 | buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\t") 11 | buf.write("\27\4\2\t\2\3\2\3\2\3\2\3\2\3\2\3\2\3\2\3\2\5\2\r\n\2") 12 | buf.write("\3\2\3\2\3\2\7\2\22\n\2\f\2\16\2\25\13\2\3\2\2\3\2\3\2") 13 | buf.write("\2\3\3\2\3\4\2\30\2\f\3\2\2\2\4\5\b\2\1\2\5\6\7\b\2\2") 14 | buf.write("\6\r\5\2\2\5\7\r\7\7\2\2\b\t\7\5\2\2\t\n\5\2\2\2\n\13") 15 | buf.write("\7\6\2\2\13\r\3\2\2\2\f\4\3\2\2\2\f\7\3\2\2\2\f\b\3\2") 16 | buf.write("\2\2\r\23\3\2\2\2\16\17\f\6\2\2\17\20\t\2\2\2\20\22\5") 17 | buf.write("\2\2\7\21\16\3\2\2\2\22\25\3\2\2\2\23\21\3\2\2\2\23\24") 18 | buf.write("\3\2\2\2\24\3\3\2\2\2\25\23\3\2\2\2\4\f\23") 19 | return buf.getvalue() 20 | 21 | 22 | class ExprParser ( Parser ): 23 | 24 | grammarFileName = "Expr.g4" 25 | 26 | atn = ATNDeserializer().deserialize(serializedATN()) 27 | 28 | decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] 29 | 30 | sharedContextCache = PredictionContextCache() 31 | 32 | literalNames = [ "", "'+'", "'-'", "'('", "')'", "", 33 | "'not'" ] 34 | 35 | symbolicNames = [ "", "", "", "", 36 | "", "INT", "NOT", "WS" ] 37 | 38 | RULE_expr = 0 39 | 40 | ruleNames = [ "expr" ] 41 | 42 | EOF = Token.EOF 43 | T__0=1 44 | T__1=2 45 | T__2=3 46 | T__3=4 47 | INT=5 48 | NOT=6 49 | WS=7 50 | 51 | def __init__(self, input:TokenStream, output:TextIO = sys.stdout): 52 | super().__init__(input, output) 53 | self.checkVersion("4.7") 54 | self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache) 55 | self._predicates = None 56 | 57 | 58 | 59 | class ExprContext(ParserRuleContext): 60 | 61 | def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): 62 | super().__init__(parent, invokingState) 63 | self.parser = parser 64 | 65 | 66 | def getRuleIndex(self): 67 | return ExprParser.RULE_expr 68 | 69 | 70 | def copyFrom(self, ctx:ParserRuleContext): 71 | super().copyFrom(ctx) 72 | 73 | 74 | class IntegerContext(ExprContext): 75 | 76 | def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext 77 | super().__init__(parser) 78 | self.copyFrom(ctx) 79 | 80 | def INT(self): 81 | return self.getToken(ExprParser.INT, 0) 82 | 83 | def enterRule(self, listener:ParseTreeListener): 84 | if hasattr( listener, "enterInteger" ): 85 | listener.enterInteger(self) 86 | 87 | def exitRule(self, listener:ParseTreeListener): 88 | if hasattr( listener, "exitInteger" ): 89 | listener.exitInteger(self) 90 | 91 | def accept(self, visitor:ParseTreeVisitor): 92 | if hasattr( visitor, "visitInteger" ): 93 | return visitor.visitInteger(self) 94 | else: 95 | return visitor.visitChildren(self) 96 | 97 | 98 | class SubExprContext(ExprContext): 99 | 100 | def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext 101 | super().__init__(parser) 102 | self.copyFrom(ctx) 103 | 104 | def expr(self): 105 | return self.getTypedRuleContext(ExprParser.ExprContext,0) 106 | 107 | 108 | def enterRule(self, listener:ParseTreeListener): 109 | if hasattr( listener, "enterSubExpr" ): 110 | listener.enterSubExpr(self) 111 | 112 | def exitRule(self, listener:ParseTreeListener): 113 | if hasattr( listener, "exitSubExpr" ): 114 | listener.exitSubExpr(self) 115 | 116 | def accept(self, visitor:ParseTreeVisitor): 117 | if hasattr( visitor, "visitSubExpr" ): 118 | return visitor.visitSubExpr(self) 119 | else: 120 | return visitor.visitChildren(self) 121 | 122 | 123 | class BinaryExprContext(ExprContext): 124 | 125 | def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext 126 | super().__init__(parser) 127 | self.left = None # ExprContext 128 | self.op = None # Token 129 | self.right = None # ExprContext 130 | self.copyFrom(ctx) 131 | 132 | def expr(self, i:int=None): 133 | if i is None: 134 | return self.getTypedRuleContexts(ExprParser.ExprContext) 135 | else: 136 | return self.getTypedRuleContext(ExprParser.ExprContext,i) 137 | 138 | 139 | def enterRule(self, listener:ParseTreeListener): 140 | if hasattr( listener, "enterBinaryExpr" ): 141 | listener.enterBinaryExpr(self) 142 | 143 | def exitRule(self, listener:ParseTreeListener): 144 | if hasattr( listener, "exitBinaryExpr" ): 145 | listener.exitBinaryExpr(self) 146 | 147 | def accept(self, visitor:ParseTreeVisitor): 148 | if hasattr( visitor, "visitBinaryExpr" ): 149 | return visitor.visitBinaryExpr(self) 150 | else: 151 | return visitor.visitChildren(self) 152 | 153 | 154 | class NotExprContext(ExprContext): 155 | 156 | def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext 157 | super().__init__(parser) 158 | self.copyFrom(ctx) 159 | 160 | def NOT(self): 161 | return self.getToken(ExprParser.NOT, 0) 162 | def expr(self): 163 | return self.getTypedRuleContext(ExprParser.ExprContext,0) 164 | 165 | 166 | def enterRule(self, listener:ParseTreeListener): 167 | if hasattr( listener, "enterNotExpr" ): 168 | listener.enterNotExpr(self) 169 | 170 | def exitRule(self, listener:ParseTreeListener): 171 | if hasattr( listener, "exitNotExpr" ): 172 | listener.exitNotExpr(self) 173 | 174 | def accept(self, visitor:ParseTreeVisitor): 175 | if hasattr( visitor, "visitNotExpr" ): 176 | return visitor.visitNotExpr(self) 177 | else: 178 | return visitor.visitChildren(self) 179 | 180 | 181 | 182 | def expr(self, _p:int=0): 183 | _parentctx = self._ctx 184 | _parentState = self.state 185 | localctx = ExprParser.ExprContext(self, self._ctx, _parentState) 186 | _prevctx = localctx 187 | _startState = 0 188 | self.enterRecursionRule(localctx, 0, self.RULE_expr, _p) 189 | self._la = 0 # Token type 190 | try: 191 | self.enterOuterAlt(localctx, 1) 192 | self.state = 10 193 | self._errHandler.sync(self) 194 | token = self._input.LA(1) 195 | if token in [ExprParser.NOT]: 196 | localctx = ExprParser.NotExprContext(self, localctx) 197 | self._ctx = localctx 198 | _prevctx = localctx 199 | 200 | self.state = 3 201 | self.match(ExprParser.NOT) 202 | self.state = 4 203 | self.expr(3) 204 | pass 205 | elif token in [ExprParser.INT]: 206 | localctx = ExprParser.IntegerContext(self, localctx) 207 | self._ctx = localctx 208 | _prevctx = localctx 209 | self.state = 5 210 | self.match(ExprParser.INT) 211 | pass 212 | elif token in [ExprParser.T__2]: 213 | localctx = ExprParser.SubExprContext(self, localctx) 214 | self._ctx = localctx 215 | _prevctx = localctx 216 | self.state = 6 217 | self.match(ExprParser.T__2) 218 | self.state = 7 219 | self.expr(0) 220 | self.state = 8 221 | self.match(ExprParser.T__3) 222 | pass 223 | else: 224 | raise NoViableAltException(self) 225 | 226 | self._ctx.stop = self._input.LT(-1) 227 | self.state = 17 228 | self._errHandler.sync(self) 229 | _alt = self._interp.adaptivePredict(self._input,1,self._ctx) 230 | while _alt!=2 and _alt!=ATN.INVALID_ALT_NUMBER: 231 | if _alt==1: 232 | if self._parseListeners is not None: 233 | self.triggerExitRuleEvent() 234 | _prevctx = localctx 235 | localctx = ExprParser.BinaryExprContext(self, ExprParser.ExprContext(self, _parentctx, _parentState)) 236 | localctx.left = _prevctx 237 | self.pushNewRecursionContext(localctx, _startState, self.RULE_expr) 238 | self.state = 12 239 | if not self.precpred(self._ctx, 4): 240 | from antlr4.error.Errors import FailedPredicateException 241 | raise FailedPredicateException(self, "self.precpred(self._ctx, 4)") 242 | self.state = 13 243 | localctx.op = self._input.LT(1) 244 | _la = self._input.LA(1) 245 | if not(_la==ExprParser.T__0 or _la==ExprParser.T__1): 246 | localctx.op = self._errHandler.recoverInline(self) 247 | else: 248 | self._errHandler.reportMatch(self) 249 | self.consume() 250 | self.state = 14 251 | localctx.right = self.expr(5) 252 | self.state = 19 253 | self._errHandler.sync(self) 254 | _alt = self._interp.adaptivePredict(self._input,1,self._ctx) 255 | 256 | except RecognitionException as re: 257 | localctx.exception = re 258 | self._errHandler.reportError(self, re) 259 | self._errHandler.recover(self, re) 260 | finally: 261 | self.unrollRecursionContexts(_parentctx) 262 | return localctx 263 | 264 | 265 | 266 | def sempred(self, localctx:RuleContext, ruleIndex:int, predIndex:int): 267 | if self._predicates == None: 268 | self._predicates = dict() 269 | self._predicates[0] = self.expr_sempred 270 | pred = self._predicates.get(ruleIndex, None) 271 | if pred is None: 272 | raise Exception("No predicate with index:" + str(ruleIndex)) 273 | else: 274 | return pred(localctx, predIndex) 275 | 276 | def expr_sempred(self, localctx:ExprContext, predIndex:int): 277 | if predIndex == 0: 278 | return self.precpred(self._ctx, 4) 279 | 280 | 281 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /antlr_ast/ast.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import inspect 3 | 4 | from typing import Dict, Optional, List, Union, Type, Any, Callable 5 | 6 | from functools import reduce 7 | from collections import OrderedDict, namedtuple 8 | 9 | from ast import AST, NodeTransformer 10 | 11 | from antlr4.Token import CommonToken 12 | from antlr4 import CommonTokenStream, ParseTreeVisitor, ParserRuleContext, RuleContext 13 | from antlr4.tree.Tree import ErrorNode, TerminalNodeImpl, ParseTree 14 | 15 | from antlr_ast.inputstream import CaseTransformInputStream 16 | from antlr4.error.ErrorListener import ErrorListener, ConsoleErrorListener 17 | 18 | 19 | def parse( 20 | grammar, 21 | text: str, 22 | start: str, 23 | strict=False, 24 | transform: Union[str, Callable] = None, 25 | error_listener: ErrorListener = None, 26 | ) -> ParseTree: 27 | input_stream = CaseTransformInputStream(text, transform=transform) 28 | 29 | lexer = grammar.Lexer(input_stream) 30 | lexer.removeErrorListeners() 31 | lexer.addErrorListener(LexerErrorListener()) 32 | 33 | token_stream = CommonTokenStream(lexer) 34 | parser = grammar.Parser(token_stream) 35 | parser.buildParseTrees = True # default 36 | 37 | if strict: 38 | error_listener = StrictErrorListener() 39 | 40 | if error_listener is not None and error_listener is not True: 41 | parser.removeErrorListeners() 42 | if error_listener: 43 | parser.addErrorListener(error_listener) 44 | 45 | return getattr(parser, start)() 46 | 47 | 48 | def process_tree( 49 | antlr_tree: ParseTree, 50 | base_visitor_cls: Type["BaseAstVisitor"] = None, 51 | transformer_cls: Type["BaseNodeTransformer"] = None, 52 | simplify=True, 53 | ) -> "BaseNode": 54 | cls_registry = BaseNodeRegistry() 55 | 56 | if not base_visitor_cls: 57 | base_visitor_cls = BaseAstVisitor 58 | elif not issubclass(base_visitor_cls, BaseAstVisitor): 59 | raise ValueError("base_visitor_cls must be a BaseAstVisitor subclass") 60 | tree = base_visitor_cls(cls_registry).visit(antlr_tree) 61 | 62 | if transformer_cls is not None: 63 | if not issubclass(transformer_cls, BaseNodeTransformer): 64 | raise ValueError("transformer_cls must be a BaseNodeTransformer subclass") 65 | tree = transformer_cls(cls_registry).visit(tree) 66 | 67 | if simplify: 68 | tree = simplify_tree(tree, unpack_lists=False) 69 | 70 | return tree 71 | 72 | 73 | # TODO use protowhat dump + DumpConfig 74 | # duplicated in ast-viewer (also for Python) 75 | # structure vs to_json()? 76 | def dump_node(node, node_class=AST): 77 | if isinstance(node, node_class): 78 | fields = OrderedDict() 79 | for name in node._fields: 80 | attr = getattr(node, name, None) 81 | if attr is not None: 82 | fields[name] = dump_node(attr, node_class=node_class) 83 | return {"type": node.__class__.__name__, "data": fields} 84 | elif isinstance(node, list): 85 | return [dump_node(x, node_class=node_class) for x in node] 86 | else: 87 | return node 88 | 89 | 90 | FieldSpec = namedtuple("FieldSpec", ["name", "origin"]) 91 | 92 | 93 | def parse_field_spec(spec: str) -> FieldSpec: 94 | # parse mapping for = and . # old: and indices [] ----- 95 | name, *origin = [part.strip() for part in spec.split("=")] 96 | origin = name if not origin else origin[0] 97 | origin = origin.split(".") 98 | return FieldSpec(name, origin) 99 | 100 | 101 | class AstNodeMeta(type): 102 | @property 103 | def _fields(cls): 104 | od = OrderedDict([(parse_field_spec(el).name, None) for el in cls._fields_spec]) 105 | return tuple(od) 106 | 107 | 108 | # Speaker class --------------------------------------------------------------- 109 | 110 | 111 | class Speaker: 112 | def __init__(self, **cfg): 113 | """Initialize speaker instance, for a set of AST nodes. 114 | 115 | Arguments: 116 | nodes: dictionary of node names, and their human friendly names. 117 | Each entry for a node may also be a dictionary containing 118 | name: human friendly name, fields: a dictionary to override 119 | the field names for that node. 120 | fields: dictionary of human friendly field names, used as a default 121 | for each node. 122 | """ 123 | self.node_names = cfg["nodes"] 124 | self.field_names = cfg.get("fields", {}) 125 | 126 | def describe(self, node, fmt="{node_name}", field=None, **kwargs): 127 | cls_name = node.__class__.__name__ 128 | def_field_name = ( 129 | self.field_names.get(field) or field.replace("_", " ") if field else "" 130 | ) 131 | 132 | node_cfg = self.node_names.get(cls_name, cls_name) 133 | node_name, field_names = self.get_info(node_cfg) 134 | 135 | d = { 136 | "node": node, 137 | "field_name": field_names.get(field, def_field_name), 138 | "node_name": node_name.format(node=node), 139 | } 140 | 141 | return fmt.format(**d, **kwargs) 142 | 143 | @staticmethod 144 | def get_info(node_cfg): 145 | """Return a tuple with the verbal name of a node, and a dict of field names.""" 146 | 147 | node_cfg = node_cfg if isinstance(node_cfg, dict) else {"name": node_cfg} 148 | 149 | return node_cfg.get("name"), node_cfg.get("fields", {}) 150 | 151 | 152 | # Error Listener ------------------------------------------------------------------ 153 | 154 | 155 | # from antlr4.error.Errors import RecognitionException 156 | 157 | 158 | class AntlrException(Exception): 159 | def __init__(self, msg, orig): 160 | self.msg, self.orig = msg, orig 161 | 162 | 163 | class StrictErrorListener(ErrorListener): 164 | # The recognizer will be the parser instance 165 | def syntaxError(self, recognizer, badSymbol, line, col, msg, e): 166 | msg = "line {line}:{col} {msg}".format( 167 | badSymbol=badSymbol, line=line, col=col, msg=msg 168 | ) 169 | raise AntlrException(msg, e) 170 | 171 | def reportAmbiguity( 172 | self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs 173 | ): 174 | return 175 | # raise Exception("TODO") 176 | 177 | def reportAttemptingFullContext( 178 | self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs 179 | ): 180 | return 181 | # raise Exception("TODO") 182 | 183 | def reportContextSensitivity( 184 | self, recognizer, dfa, startIndex, stopIndex, prediction, configs 185 | ): 186 | return 187 | # raise Exception("TODO") 188 | 189 | 190 | class LexerErrorListener(ConsoleErrorListener): 191 | def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): 192 | if isinstance(e.input, CaseTransformInputStream): 193 | msg = msg + " " + repr(e.input) 194 | super().syntaxError(recognizer, offendingSymbol, line, column, msg, e) 195 | 196 | 197 | # Parse Tree Visitor ---------------------------------------------------------- 198 | # TODO: visitor inheritance not really needed, but indicates compatibility 199 | # TODO: make general node (Terminal) accessible in class property (.subclasses)? 200 | 201 | IndexReferences = Dict[str, Union[int, List[int]]] 202 | 203 | 204 | class BaseNode(AST): 205 | """AST is subclassed so we can use Python ast module visiting and walking on the custom AST""" 206 | 207 | def __init__( 208 | self, 209 | children: list, 210 | field_references: IndexReferences, 211 | label_references: IndexReferences, 212 | ctx: Optional[ParserRuleContext] = None, 213 | position: Optional[dict] = None, 214 | text: Optional[str] = None, 215 | ): 216 | self.children = children 217 | 218 | self._field_references = field_references 219 | self.children_by_field = materialize(self._field_references, self.children) 220 | 221 | self._label_references = label_references 222 | self.children_by_label = materialize(self._label_references, self.children) 223 | 224 | self._ctx = ctx 225 | self.position = position 226 | self.text = text 227 | 228 | _fields = () 229 | 230 | # whether to descend for selection (greater descends into lower) 231 | _priority = 2 232 | 233 | # getattr: return None or raise for nonexistent attr 234 | # in Transformer conditionals: 235 | # - getattr(obj, attr, None) works with both 236 | # - hasattr(obj, attr) if strict 237 | # - obj.attr if not strict 238 | _strict = False 239 | 240 | @classmethod 241 | def create( 242 | cls, 243 | ctx: ParserRuleContext, 244 | children: Optional[list] = None, 245 | registry: Optional["BaseNodeRegistry"] = None, 246 | ) -> "BaseNode": 247 | if registry is None: 248 | registry = BaseNodeRegistry() 249 | if children is None: 250 | children = ctx.children 251 | 252 | field_names = get_field_names(ctx) 253 | children_by_field = get_field_references(ctx, field_names) 254 | 255 | label_names = get_label_names(ctx) 256 | children_by_label = get_field_references(ctx, label_names) 257 | 258 | cls_name = type(ctx).__name__.split("Context")[0] 259 | subclass = registry.get_cls(cls_name, tuple(field_names)) 260 | 261 | return subclass(children, children_by_field, children_by_label, ctx) 262 | 263 | @classmethod 264 | def create_cls(cls, cls_name: str, field_names: tuple) -> Type["BaseNode"]: 265 | return type(cls_name, (cls,), {"_fields": field_names}) 266 | 267 | def __getattr__(self, name): 268 | try: 269 | result = self.children_by_label.get(name) or self.children_by_field[name] 270 | except KeyError: 271 | if self._strict: 272 | raise AttributeError( 273 | "{}.{} is invalid.".format(self.__class__.__name__, name) 274 | ) 275 | else: 276 | result = None 277 | 278 | return result 279 | 280 | @classmethod 281 | def combine(cls, *fields: "BaseNode") -> List["BaseNode"]: 282 | """Combine fields 283 | 284 | Creates a list field from other fields 285 | Filters None and combines other elements in a flat list 286 | Use in transformer methods. 287 | """ 288 | result = reduce(cls.extend_node_list, fields, []) 289 | 290 | return result 291 | 292 | @staticmethod 293 | def extend_node_list( 294 | acc: List["BaseNode"], new: Union[List["BaseNode"], "BaseNode"] 295 | ) -> List["BaseNode"]: 296 | """Extend accumulator with Node(s) from new""" 297 | if new is None: 298 | new = [] 299 | elif not isinstance(new, list): 300 | new = [new] 301 | return acc + new 302 | 303 | def get_text(self, full_text: str = None) -> Optional[str]: 304 | # TODO implement as __str__? 305 | # + easy to combine with str/Terminal 306 | # + use Python instead of custom interface 307 | # (-) very different from repr / json 308 | text = None 309 | if isinstance(self._ctx, (TerminalNodeImpl, RuleContext)): 310 | if full_text is None: 311 | text = self._ctx.getText() 312 | elif getattr(self._ctx, "start", None) and getattr(self._ctx, "stop", None): 313 | text = full_text[self._ctx.start.start : self._ctx.stop.stop + 1] 314 | elif ( 315 | getattr(self._ctx, "symbol", None) 316 | and getattr(self._ctx.symbol, "start", None) 317 | and getattr(self._ctx.symbol, "stop", None) 318 | ): 319 | text = full_text[self._ctx.symbol.start : self._ctx.symbol.stop + 1] 320 | if text is None and self.text: 321 | text = self.text 322 | 323 | return text 324 | 325 | def get_position(self) -> Optional[Dict[str, int]]: 326 | position = None 327 | ctx = self._ctx 328 | if ctx is not None: 329 | if isinstance(ctx, TerminalNodeImpl): 330 | position = { 331 | "line_start": ctx.symbol.line, 332 | "column_start": ctx.symbol.column, 333 | "line_end": ctx.symbol.line, 334 | "column_end": ctx.symbol.column 335 | + (ctx.symbol.stop - ctx.symbol.start), 336 | } 337 | elif getattr(ctx, "start", None) and getattr(ctx, "stop", None): 338 | position = { 339 | "line_start": ctx.start.line, 340 | "column_start": ctx.start.column, 341 | "line_end": ctx.stop.line, 342 | "column_end": ctx.stop.column + (ctx.stop.stop - ctx.stop.start), 343 | } 344 | 345 | return position or self.position 346 | 347 | def __repr__(self): 348 | return str({**self.children_by_field, **self.children_by_label}) 349 | 350 | 351 | # TODO: 352 | AstNode = BaseNode 353 | 354 | 355 | class Terminal(BaseNode): 356 | """This is a thin node wrapper for a string. 357 | 358 | The node is transparent when not in debug mode. 359 | In debug mode, it keeps the link to the corresponding ANTLR node. 360 | """ 361 | 362 | _fields = tuple(["value"]) 363 | DEBUG = True 364 | DEBUG_INSTANCES = [] 365 | 366 | def __new__(cls, *args, **kwargs): 367 | instance = super().__new__(cls, *args, **kwargs) 368 | if cls.DEBUG: 369 | cls.DEBUG_INSTANCES.append(instance) 370 | return instance 371 | else: 372 | return args[0][0] 373 | 374 | @classmethod 375 | def from_text(cls, text: str, ctx: Optional[ParserRuleContext] = None): 376 | return cls([text], {"value": 0}, {}, ctx) 377 | 378 | def __eq__(self, other): 379 | return self.value == other 380 | 381 | def __str__(self): 382 | # currently just used for better formatting in debugger 383 | return self.value 384 | 385 | def __repr__(self): 386 | return "'{}'".format(self.value) 387 | 388 | 389 | class AliasNode(BaseNode, metaclass=AstNodeMeta): 390 | # TODO: look at AstNode methods 391 | # defines class properties 392 | # - as a property name to copy from ANTLR nodes 393 | # - as a property name defined in terms of (nested) ANTLR node properties 394 | # the field will be set to the first definition that is not undefined 395 | _fields_spec = [] 396 | 397 | _fields = AstNodeMeta._fields 398 | 399 | # Defines which ANTLR nodes to convert to this node. Elements can be: 400 | # - a string: uses AstNode._from_fields as visitor 401 | # - a tuple ('node_name', 'ast_node_class_method_name'): uses ast_node_class_method_name as visitor 402 | # subclasses use _bind_to_visitor to create visit methods for the nodes in _rules on the ParseTreeVisitor 403 | # using this information 404 | _rules = [] 405 | 406 | _priority = 1 407 | 408 | _strict = True 409 | 410 | def __init__(self, node: BaseNode, fields: Optional[Dict[str, Any]] = None): 411 | # TODO: keep reference to node? 412 | # TODO: **fields? (easier notation, but hard to name future arguments 413 | super().__init__( 414 | node.children, node._field_references, node._label_references, node._ctx 415 | ) 416 | 417 | fields = fields or {} 418 | for field, value in fields.items(): 419 | if field not in self._fields: 420 | warnings.warn("Key not in fields: {}".format(field)) 421 | setattr(self, field, value) 422 | 423 | @classmethod 424 | def from_spec(cls, node: BaseNode) -> "AliasNode": 425 | # TODO: no fields_spec argument as before 426 | field_dict = {} 427 | for field_spec in cls._fields_spec: 428 | name, path = parse_field_spec(field_spec) 429 | 430 | # _fields_spec can contain field multiple times 431 | # e.g. x=a and x=b 432 | if field_dict.get(name): 433 | # or / elif behaviour 434 | continue 435 | 436 | # get node ----- 437 | field_dict[name] = cls.get_path(node, path) 438 | return cls(node, field_dict) 439 | 440 | @classmethod 441 | def get_path(cls, node: BaseNode, path: List[str]): 442 | # TODO: can be defined on FieldNode too 443 | result = node 444 | for i in range(len(path)): 445 | result = getattr(result, path[i], None) 446 | if result is None: 447 | break 448 | 449 | return result 450 | 451 | @classmethod 452 | def bind_to_transformer( 453 | cls, 454 | transformer_cls: Type["BaseNodeTransformer"], 455 | default_transform_method: str = "from_spec", 456 | ): 457 | for rule in cls._rules: 458 | if isinstance(rule, str): 459 | cls_method = default_transform_method 460 | else: 461 | rule, cls_method = rule[:2] 462 | transformer_method = cls.get_transformer(cls_method) 463 | bind_to_transformer(transformer_cls, rule, transformer_method) 464 | 465 | @classmethod 466 | def get_transformer(cls, method_name: str): 467 | """Get method to bind to visitor""" 468 | transform_function = getattr(cls, method_name) 469 | assert callable(transform_function) 470 | 471 | def transformer_method(self, node): 472 | kwargs = {} 473 | if inspect.signature(transform_function).parameters.get("helper"): 474 | kwargs["helper"] = self.helper 475 | return transform_function(node, **kwargs) 476 | 477 | return transformer_method 478 | 479 | 480 | class BaseNodeRegistry: 481 | def __init__(self): 482 | self.dynamic_node_classes = {} 483 | 484 | def get_cls(self, cls_name: str, field_names: tuple) -> Type[BaseNode]: 485 | """""" 486 | if cls_name not in self.dynamic_node_classes: 487 | self.dynamic_node_classes[cls_name] = BaseNode.create_cls( 488 | cls_name, field_names 489 | ) 490 | else: 491 | existing_cls = self.dynamic_node_classes[cls_name] 492 | all_fields = tuple(set(existing_cls._fields) | set(field_names)) 493 | if len(all_fields) > len(existing_cls._fields): 494 | existing_cls._fields = all_fields 495 | 496 | return self.dynamic_node_classes[cls_name] 497 | 498 | def isinstance(self, instance: BaseNode, class_name: str) -> bool: 499 | """Check if a BaseNode is an instance of a registered dynamic class""" 500 | if isinstance(instance, BaseNode): 501 | klass = self.dynamic_node_classes.get(class_name, None) 502 | if klass: 503 | return isinstance(instance, klass) 504 | # Not an instance of a class in the registry 505 | return False 506 | else: 507 | raise TypeError("This function can only be used for BaseNode objects") 508 | 509 | 510 | # TODO: test: if 'visit' in method, it has to be as 'visit_' 511 | class BaseNodeTransformer(NodeTransformer): 512 | def __init__(self, registry: BaseNodeRegistry): 513 | self.helper = TransformerHelper(registry) 514 | 515 | def visit(self, node: BaseNode): 516 | # TODO: I think transform_ + node.__class__.__name__ would be better/clearer then 517 | # as the node methods don't need to do any visiting (which is completely done by visit and generic_visit) 518 | method = "visit_" + type(node).__name__ 519 | transformer = getattr(self, method, None) 520 | 521 | if transformer is None: 522 | return self.generic_visit(node) 523 | else: 524 | alias = transformer(node) 525 | if isinstance(alias, AliasNode) or alias == node: 526 | # this prevents infinite recursion and visiting 527 | # AliasNodes with a name that is also the name of a BaseNode 528 | if isinstance(alias, BaseNode): 529 | self.generic_visit(alias) 530 | else: 531 | # visit BaseNode (e.g. result of Transformer method) 532 | if isinstance(alias, list): 533 | # Transformer method can return array instead of node 534 | alias = [ 535 | self.visit(el) if isinstance(el, BaseNode) else el 536 | for el in alias 537 | ] # TODO: test 538 | elif isinstance(alias, BaseNode): 539 | alias = self.visit(alias) 540 | 541 | return alias 542 | 543 | def visit_Terminal(self, terminal: Terminal) -> Terminal: 544 | """Handle Terminal the same as other non-node types""" 545 | return terminal 546 | 547 | @classmethod 548 | def bind_alias_nodes(cls, alias_classes: List[Type[AliasNode]]): 549 | for item in alias_classes: 550 | if getattr(item, "_rules", None) is not None: 551 | item.bind_to_transformer(cls) 552 | 553 | 554 | def bind_to_transformer( 555 | transformer_cls: Type[BaseNodeTransformer], 556 | rule_name: str, 557 | transformer_method: Callable, 558 | ): 559 | """Assign AST node class constructors to parse tree visitors.""" 560 | setattr(transformer_cls, get_transformer_method_name(rule_name), transformer_method) 561 | 562 | 563 | def get_transformer_method_name(rule_name: str) -> str: 564 | return "visit_{}".format(rule_name[0].upper() + rule_name[1:]) 565 | 566 | 567 | class TransformerHelper: 568 | def __init__(self, registry: BaseNodeRegistry): 569 | self.registry = registry 570 | 571 | def isinstance(self, *args): 572 | return self.registry.isinstance(*args) 573 | 574 | 575 | def get_alias_nodes(items) -> List[Type[AstNode]]: 576 | return list( 577 | filter( 578 | lambda item: inspect.isclass(item) and issubclass(item, AliasNode), items 579 | ) 580 | ) 581 | 582 | 583 | def simplify_tree(tree, unpack_lists=True, in_list=False): 584 | """Recursively unpack single-item lists and objects where fields and labels only reference a single child 585 | 586 | :param tree: the tree to simplify (mutating!) 587 | :param unpack_lists: whether single-item lists should be replaced by that item 588 | :param in_list: this is used to prevent unpacking a node in a list as AST visit can't handle nested lists 589 | """ 590 | # TODO: copy (or (de)serialize)? outside this function? 591 | if isinstance(tree, BaseNode) and not isinstance(tree, Terminal): 592 | used_fields = [field for field in tree._fields if getattr(tree, field, False)] 593 | if len(used_fields) == 1: 594 | result = getattr(tree, used_fields[0]) 595 | else: 596 | result = None 597 | if ( 598 | len(used_fields) != 1 599 | or isinstance(tree, AliasNode) 600 | or (in_list and isinstance(result, list)) 601 | ): 602 | result = tree 603 | for field in tree._fields: 604 | old_value = getattr(tree, field, None) 605 | if old_value: 606 | setattr( 607 | result, 608 | field, 609 | simplify_tree(old_value, unpack_lists=unpack_lists), 610 | ) 611 | return result 612 | assert result is not None 613 | elif isinstance(tree, list) and len(tree) == 1 and unpack_lists: 614 | result = tree[0] 615 | else: 616 | if isinstance(tree, list): 617 | result = [ 618 | simplify_tree(el, unpack_lists=unpack_lists, in_list=True) 619 | for el in tree 620 | ] 621 | else: 622 | result = tree 623 | return result 624 | 625 | return simplify_tree(result, unpack_lists=unpack_lists) 626 | 627 | 628 | class BaseAstVisitor(ParseTreeVisitor): 629 | """Visitor that creates a high level tree 630 | 631 | ~ ANTLR tree serializer 632 | + automatic node creation using field and label detection 633 | + alias nodes can work on tree without (ANTLR) visitor 634 | 635 | Used from BaseAstVisitor: visitTerminal, visitErrorNode 636 | 637 | TODO: 638 | - [done] support labels 639 | - [done] make compatible with AST: _fields = () (should only every child once) 640 | - [done] include child_index to filter unique elements + order 641 | - [done] memoize dynamic classes, to have list + make instance checks work 642 | - [done] tree simplification as part of AliasNode 643 | - [done] flatten nested list (see select with dynamic clause ordering) 644 | - combine terminals / error nodes 645 | - serialize highlight info 646 | - [done] make compatible with AstNode & AstModule in protowhat (+ shellwhat usage: bashlex + osh parser) 647 | - combining fields & labels dicts needed? 648 | - use exact ANTLR names in _rules (capitalize name without changing other casing) 649 | - add labels to _fields if not overlapping with fields from rules 650 | - [done] eliminate overhead of alias parsing (store ref to child index, get children on alias access) 651 | - [necessary?] grammar must use lexer or grammar rules for elements that should be in the tree 652 | and literals for elements that cannot 653 | currently: 654 | - Use AliasNode to add labels to _fields, define custom fields and omit fields 655 | - Use Transformer to replace a node by a combination of fields 656 | - [rejected] alternative dynamic class naming: 657 | - pass parse start to visitor constructor, use as init for self.current_node 658 | - set self.current_node to field.__name__ before self.visit_field 659 | - use self.current_node to create dynamic classes 660 | (does not use #RuleAlias names in grammar) 661 | (other approach: transforming returned dict, needs more work for arrays + top level) 662 | 663 | Higher order visitor (or integrated) 664 | - [alternative] allow node aliases (~ AstNode._rules) by dynamically creating a class inheriting from the dynamic node class 665 | (multiple inheritance if node is alias for multiple nodes, class has combined _fields for AST compatibility 666 | - [alternative] allow field aliases using .aliases property with defaultdict(list) (~ AstNode._fields_spec) 667 | - dynamic fields? (~ visit_path) 668 | 669 | test code in parse: 670 | tree = parse_ast(grammar, sql_text, start, **kwargs) 671 | field_tree = BaseAstVisitor().visit(tree) 672 | alias_tree = AliasVisitor(Transformer()).visit(field_tree) 673 | 674 | import ast 675 | nodes = [el for el in ast.walk(field_tree)] 676 | import json 677 | json_str = json.dumps(field_tree, default=lambda o: o.to_json()) 678 | """ 679 | 680 | def __init__(self, registry: BaseNodeRegistry): 681 | self.registry = registry 682 | 683 | def visitChildren( 684 | self, node: ParserRuleContext, predicate=None, simplify=False 685 | ) -> BaseNode: 686 | # children is None if all parts of a grammar rule are optional and absent 687 | children = [self.visit(child) for child in node.children or []] 688 | 689 | instance = BaseNode.create(node, children, self.registry) 690 | 691 | return instance 692 | 693 | def visitTerminal(self, ctx: ParserRuleContext) -> Terminal: 694 | """Converts case insensitive keywords and identifiers to lowercase""" 695 | text = ctx.getText() 696 | return Terminal.from_text(text, ctx) 697 | 698 | def visitErrorNode(self, node: ErrorNode): 699 | return None 700 | 701 | 702 | # ANTLR helpers 703 | 704 | 705 | def get_field(ctx: ParserRuleContext, field: str): 706 | """Helper to get the value of a field""" 707 | # field can be a string or a node attribute 708 | if isinstance(field, str): 709 | field = getattr(ctx, field, None) 710 | # when not alias needs to be called 711 | if callable(field): 712 | field = field() 713 | # when alias set on token, need to go from CommonToken -> Terminal Node 714 | elif isinstance(field, CommonToken): 715 | # giving a name to lexer rules sets it to a token, 716 | # rather than the terminal node corresponding to that token 717 | # so we need to find it in children 718 | field = next( 719 | filter(lambda c: getattr(c, "symbol", None) is field, ctx.children) 720 | ) 721 | return field 722 | 723 | 724 | def get_field_references( 725 | ctx: ParserRuleContext, field_names: List[str], simplify=False 726 | ) -> Dict[str, Any]: 727 | """ 728 | Create a mapping from fields to corresponding child indices 729 | :param ctx: ANTLR node 730 | :param field_names: list of strings 731 | :param simplify: if True, omits fields with empty lists or None 732 | this makes it easy to detect nodes that only use a single field 733 | but it requires more work to combine fields that can be empty 734 | :return: mapping str -> int | int[] 735 | """ 736 | field_dict = {} 737 | for field_name in field_names: 738 | field = get_field(ctx, field_name) 739 | if ( 740 | not simplify 741 | or field is not None 742 | and (not isinstance(field, list) or len(field) > 0) 743 | ): 744 | if isinstance(field, list): 745 | value = [ctx.children.index(el) for el in field] 746 | elif field is not None: 747 | value = ctx.children.index(field) 748 | else: 749 | value = None 750 | field_dict[field_name] = value 751 | return field_dict 752 | 753 | 754 | def materialize(reference_dict: IndexReferences, source: List[Any]) -> Dict[str, Any]: 755 | """ 756 | Replace indices by actual elements in a reference mapping 757 | :param reference_dict: mapping str -> int | int[] 758 | :param source: list of elements 759 | :return: mapping str -> element | element[] 760 | """ 761 | materialized_dict = {} 762 | for field in reference_dict: 763 | reference = reference_dict[field] 764 | if isinstance(reference, list): 765 | materialized_dict[field] = [source[index] for index in reference] 766 | elif reference is not None: 767 | materialized_dict[field] = source[reference] 768 | else: 769 | materialized_dict[field] = None 770 | return materialized_dict 771 | 772 | 773 | def get_field_names(ctx: ParserRuleContext) -> List[str]: 774 | """Get fields defined in an ANTLR context for a parser rule""" 775 | # this does not include labels and literals, only rule names and token names 776 | # TODO: check ANTLR parser template for full exclusion list 777 | fields = [ 778 | field 779 | for field in type(ctx).__dict__ 780 | if not field.startswith("__") 781 | and field not in ["accept", "enterRule", "exitRule", "getRuleIndex", "copyFrom"] 782 | ] 783 | return fields 784 | 785 | 786 | def get_label_names(ctx: ParserRuleContext) -> List[str]: 787 | """Get labels defined in an ANTLR context for a parser rule""" 788 | labels = [ 789 | label 790 | for label in ctx.__dict__ 791 | if not label.startswith("_") 792 | and label 793 | not in [ 794 | "children", 795 | "exception", 796 | "invokingState", 797 | "parentCtx", 798 | "parser", 799 | "start", 800 | "stop", 801 | ] 802 | ] 803 | return labels 804 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | --------------------------------------------------------------------------------