├── javalang ├── test │ ├── __init__.py │ ├── source │ │ └── package-info │ │ │ ├── NoAnnotationNoJavadoc.java │ │ │ ├── AnnotationOnly.java │ │ │ ├── JavadocOnly.java │ │ │ ├── AnnotationJavadoc.java │ │ │ └── JavadocAnnotation.java │ ├── test_javadoc.py │ ├── test_util.py │ ├── test_package_declaration.py │ ├── test_tokenizer.py │ └── test_java_8_syntax.py ├── __init__.py ├── parse.py ├── ast.py ├── javadoc.py ├── util.py ├── tree.py ├── tokenizer.py └── parser.py ├── requirements.txt ├── MANIFEST.in ├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── setup.py └── README.rst /javalang/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | six 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | dist 3 | *.egg-info 4 | /build 5 | /env 6 | /.eggs 7 | -------------------------------------------------------------------------------- /javalang/test/source/package-info/NoAnnotationNoJavadoc.java: -------------------------------------------------------------------------------- 1 | package org.javalang.test; -------------------------------------------------------------------------------- /javalang/test/source/package-info/AnnotationOnly.java: -------------------------------------------------------------------------------- 1 | @Package 2 | package org.javalang.test; -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | script: python setup.py test 8 | -------------------------------------------------------------------------------- /javalang/test/source/package-info/JavadocOnly.java: -------------------------------------------------------------------------------- 1 | /** 2 | Test that includes java doc first but no annotation 3 | */ 4 | package org.javalang.test; -------------------------------------------------------------------------------- /javalang/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from . import parser 3 | from . import parse 4 | from . import tokenizer 5 | from . import javadoc 6 | 7 | 8 | __version__ = "0.13.0" 9 | -------------------------------------------------------------------------------- /javalang/test/source/package-info/AnnotationJavadoc.java: -------------------------------------------------------------------------------- 1 | @Package 2 | /** 3 | Test that includes java doc first but no annotation 4 | */ 5 | package org.javalang.test; -------------------------------------------------------------------------------- /javalang/test/source/package-info/JavadocAnnotation.java: -------------------------------------------------------------------------------- 1 | /** 2 | Test that includes java doc first but no annotation 3 | */ 4 | @Package 5 | package org.javalang.test; -------------------------------------------------------------------------------- /javalang/test/test_javadoc.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from .. import javadoc 4 | 5 | 6 | class TestJavadoc(unittest.TestCase): 7 | def test_empty_comment(self): 8 | javadoc.parse('/** */') 9 | javadoc.parse('/***/') 10 | javadoc.parse('/**\n *\n */') 11 | javadoc.parse('/**\n *\n *\n */') 12 | 13 | if __name__ == "__main__": 14 | unittest.main() 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Christopher Thunes 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | name = "javalang", 6 | packages = ["javalang"], 7 | version = "0.13.0", 8 | author = "Chris Thunes", 9 | author_email = "cthunes@brewtab.com", 10 | url = "http://github.com/c2nes/javalang", 11 | description = "Pure Python Java parser and tools", 12 | classifiers = [ 13 | "Programming Language :: Python", 14 | "Development Status :: 4 - Beta", 15 | "Operating System :: OS Independent", 16 | "License :: OSI Approved :: MIT License", 17 | "Intended Audience :: Developers", 18 | "Topic :: Software Development :: Libraries" 19 | ], 20 | long_description = """\ 21 | ======== 22 | javalang 23 | ======== 24 | 25 | javalang is a pure Python library for working with Java source 26 | code. javalang provies a lexer and parser targeting Java 8. The 27 | implementation is based on the Java language spec available at 28 | http://docs.oracle.com/javase/specs/jls/se8/html/. 29 | 30 | """, 31 | zip_safe = False, 32 | install_requires = ['six',], 33 | tests_require = ["nose",], 34 | test_suite = "nose.collector", 35 | ) 36 | -------------------------------------------------------------------------------- /javalang/parse.py: -------------------------------------------------------------------------------- 1 | 2 | from .parser import Parser 3 | from .tokenizer import tokenize 4 | 5 | def parse_expression(exp): 6 | if not exp.endswith(';'): 7 | exp = exp + ';' 8 | 9 | tokens = tokenize(exp) 10 | parser = Parser(tokens) 11 | 12 | return parser.parse_expression() 13 | 14 | def parse_member_signature(sig): 15 | if not sig.endswith(';'): 16 | sig = sig + ';' 17 | 18 | tokens = tokenize(sig) 19 | parser = Parser(tokens) 20 | 21 | return parser.parse_member_declaration() 22 | 23 | def parse_constructor_signature(sig): 24 | # Add an empty body to the signature, replacing a ; if necessary 25 | if sig.endswith(';'): 26 | sig = sig[:-1] 27 | sig = sig + '{ }' 28 | 29 | tokens = tokenize(sig) 30 | parser = Parser(tokens) 31 | 32 | return parser.parse_member_declaration() 33 | 34 | def parse_type(s): 35 | tokens = tokenize(s) 36 | parser = Parser(tokens) 37 | 38 | return parser.parse_type() 39 | 40 | def parse_type_signature(sig): 41 | if sig.endswith(';'): 42 | sig = sig[:-1] 43 | sig = sig + '{ }' 44 | 45 | tokens = tokenize(sig) 46 | parser = Parser(tokens) 47 | 48 | return parser.parse_class_or_interface_declaration() 49 | 50 | def parse(s): 51 | tokens = tokenize(s) 52 | parser = Parser(tokens) 53 | return parser.parse() 54 | -------------------------------------------------------------------------------- /javalang/test/test_util.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from ..util import LookAheadIterator 4 | 5 | 6 | class TestLookAheadIterator(unittest.TestCase): 7 | def test_usage(self): 8 | i = LookAheadIterator(list(range(0, 10000))) 9 | 10 | self.assertEqual(next(i), 0) 11 | self.assertEqual(next(i), 1) 12 | self.assertEqual(next(i), 2) 13 | 14 | self.assertEqual(i.last(), 2) 15 | 16 | self.assertEqual(i.look(), 3) 17 | self.assertEqual(i.last(), 3) 18 | 19 | self.assertEqual(i.look(1), 4) 20 | self.assertEqual(i.look(2), 5) 21 | self.assertEqual(i.look(3), 6) 22 | self.assertEqual(i.look(4), 7) 23 | 24 | self.assertEqual(i.last(), 7) 25 | 26 | i.push_marker() 27 | self.assertEqual(next(i), 3) 28 | self.assertEqual(next(i), 4) 29 | self.assertEqual(next(i), 5) 30 | i.pop_marker(True) # reset 31 | 32 | self.assertEqual(i.look(), 3) 33 | self.assertEqual(next(i), 3) 34 | 35 | i.push_marker() #1 36 | self.assertEqual(next(i), 4) 37 | self.assertEqual(next(i), 5) 38 | i.push_marker() #2 39 | self.assertEqual(next(i), 6) 40 | self.assertEqual(next(i), 7) 41 | i.push_marker() #3 42 | self.assertEqual(next(i), 8) 43 | self.assertEqual(next(i), 9) 44 | i.pop_marker(False) #3 45 | self.assertEqual(next(i), 10) 46 | i.pop_marker(True) #2 47 | self.assertEqual(next(i), 6) 48 | self.assertEqual(next(i), 7) 49 | self.assertEqual(next(i), 8) 50 | i.pop_marker(False) #1 51 | self.assertEqual(next(i), 9) 52 | 53 | try: 54 | with i: 55 | self.assertEqual(next(i), 10) 56 | self.assertEqual(next(i), 11) 57 | raise Exception() 58 | except: 59 | self.assertEqual(next(i), 10) 60 | self.assertEqual(next(i), 11) 61 | 62 | with i: 63 | self.assertEqual(next(i), 12) 64 | self.assertEqual(next(i), 13) 65 | self.assertEqual(next(i), 14) 66 | 67 | 68 | if __name__=="__main__": 69 | unittest.main() 70 | -------------------------------------------------------------------------------- /javalang/test/test_package_declaration.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pkg_resources import resource_string 4 | from .. import parse 5 | 6 | 7 | # From my reading of the spec (http://docs.oracle.com/javase/specs/jls/se7/html/jls-7.html) the 8 | # allowed order is javadoc, optional annotation, package declaration 9 | class PackageInfo(unittest.TestCase): 10 | def testPackageDeclarationOnly(self): 11 | source_file = "source/package-info/NoAnnotationNoJavadoc.java" 12 | ast = self.get_ast(source_file) 13 | 14 | self.failUnless(ast.package.name == "org.javalang.test") 15 | self.failIf(ast.package.annotations) 16 | self.failIf(ast.package.documentation) 17 | 18 | def testAnnotationOnly(self): 19 | source_file = "source/package-info/AnnotationOnly.java" 20 | ast = self.get_ast(source_file) 21 | 22 | self.failUnless(ast.package.name == "org.javalang.test") 23 | self.failUnless(ast.package.annotations) 24 | self.failIf(ast.package.documentation) 25 | 26 | def testJavadocOnly(self): 27 | source_file = "source/package-info/JavadocOnly.java" 28 | ast = self.get_ast(source_file) 29 | 30 | self.failUnless(ast.package.name == "org.javalang.test") 31 | self.failIf(ast.package.annotations) 32 | self.failUnless(ast.package.documentation) 33 | 34 | def testAnnotationThenJavadoc(self): 35 | source_file = "source/package-info/AnnotationJavadoc.java" 36 | ast = self.get_ast(source_file) 37 | 38 | self.failUnless(ast.package.name == "org.javalang.test") 39 | self.failUnless(ast.package.annotations) 40 | self.failIf(ast.package.documentation) 41 | 42 | def testJavadocThenAnnotation(self): 43 | source_file = "source/package-info/JavadocAnnotation.java" 44 | ast = self.get_ast(source_file) 45 | 46 | self.failUnless(ast.package.name == "org.javalang.test") 47 | self.failUnless(ast.package.annotations) 48 | self.failUnless(ast.package.documentation) 49 | 50 | def get_ast(self, filename): 51 | source = resource_string(__name__, filename) 52 | ast = parse.parse(source) 53 | 54 | return ast 55 | 56 | 57 | def main(): 58 | unittest.main() 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /javalang/ast.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import six 4 | 5 | 6 | class MetaNode(type): 7 | def __new__(mcs, name, bases, dict): 8 | attrs = list(dict['attrs']) 9 | dict['attrs'] = list() 10 | 11 | for base in bases: 12 | if hasattr(base, 'attrs'): 13 | dict['attrs'].extend(base.attrs) 14 | 15 | dict['attrs'].extend(attrs) 16 | 17 | return type.__new__(mcs, name, bases, dict) 18 | 19 | 20 | @six.add_metaclass(MetaNode) 21 | class Node(object): 22 | attrs = () 23 | 24 | def __init__(self, **kwargs): 25 | values = kwargs.copy() 26 | 27 | for attr_name in self.attrs: 28 | value = values.pop(attr_name, None) 29 | setattr(self, attr_name, value) 30 | 31 | if values: 32 | raise ValueError('Extraneous arguments') 33 | 34 | def __equals__(self, other): 35 | if type(other) is not type(self): 36 | return False 37 | 38 | for attr in self.attrs: 39 | if getattr(other, attr) != getattr(self, attr): 40 | return False 41 | 42 | return True 43 | 44 | def __repr__(self): 45 | attr_values = [] 46 | for attr in sorted(self.attrs): 47 | attr_values.append('%s=%s' % (attr, getattr(self, attr))) 48 | return '%s(%s)' % (type(self).__name__, ', '.join(attr_values)) 49 | 50 | def __iter__(self): 51 | return walk_tree(self) 52 | 53 | def filter(self, pattern): 54 | for path, node in self: 55 | if ((isinstance(pattern, type) and isinstance(node, pattern)) or 56 | (node == pattern)): 57 | yield path, node 58 | 59 | @property 60 | def children(self): 61 | return [getattr(self, attr_name) for attr_name in self.attrs] 62 | 63 | @property 64 | def position(self): 65 | if hasattr(self, "_position"): 66 | return self._position 67 | 68 | def walk_tree(root): 69 | children = None 70 | 71 | if isinstance(root, Node): 72 | yield (), root 73 | children = root.children 74 | else: 75 | children = root 76 | 77 | for child in children: 78 | if isinstance(child, (Node, list, tuple)): 79 | for path, node in walk_tree(child): 80 | yield (root,) + path, node 81 | 82 | def dump(ast, file): 83 | pickle.dump(ast, file) 84 | 85 | def load(file): 86 | return pickle.load(file) 87 | -------------------------------------------------------------------------------- /javalang/javadoc.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | 4 | def join(s): 5 | return ' '.join(l.strip() for l in s.split('\n')) 6 | 7 | class DocBlock(object): 8 | def __init__(self): 9 | self.description = '' 10 | self.return_doc = None 11 | self.params = [] 12 | 13 | self.authors = [] 14 | self.deprecated = False 15 | 16 | # @exception and @throw are equivalent 17 | self.throws = {} 18 | self.exceptions = self.throws 19 | 20 | self.tags = {} 21 | 22 | def add_block(self, name, value): 23 | value = value.strip() 24 | 25 | if name == 'param': 26 | try: 27 | param, description = value.split(None, 1) 28 | except ValueError: 29 | param, description = value, '' 30 | self.params.append((param, join(description))) 31 | 32 | elif name in ('throws', 'exception'): 33 | try: 34 | ex, description = value.split(None, 1) 35 | except ValueError: 36 | ex, description = value, '' 37 | self.throws[ex] = join(description) 38 | 39 | elif name == 'return': 40 | self.return_doc = value 41 | 42 | elif name == 'author': 43 | self.authors.append(value) 44 | 45 | elif name == 'deprecated': 46 | self.deprecated = True 47 | 48 | self.tags.setdefault(name, []).append(value) 49 | 50 | blocks_re = re.compile('(^@)', re.MULTILINE) 51 | leading_space_re = re.compile(r'^\s*\*', re.MULTILINE) 52 | blocks_justify_re = re.compile(r'^\s*@', re.MULTILINE) 53 | 54 | def _sanitize(s): 55 | s = s.strip() 56 | 57 | if not (s[:3] == '/**' and s[-2:] == '*/'): 58 | raise ValueError('not a valid Javadoc comment') 59 | 60 | s = s.replace('\t', ' ') 61 | 62 | return s 63 | 64 | def _uncomment(s): 65 | # Remove /** and */ 66 | s = s[3:-2].strip() 67 | 68 | return leading_space_re.sub('', s) 69 | 70 | def _get_indent_level(s): 71 | return len(s) - len(s.lstrip()) 72 | 73 | def _left_justify(s): 74 | lines = s.rstrip().splitlines() 75 | 76 | if not lines: 77 | return '' 78 | 79 | indent_levels = [] 80 | for line in lines: 81 | if line.strip(): 82 | indent_levels.append(_get_indent_level(line)) 83 | indent_levels.sort() 84 | 85 | common_indent = indent_levels[0] 86 | if common_indent == 0: 87 | return s 88 | else: 89 | lines = [line[common_indent:] for line in lines] 90 | return '\n'.join(lines) 91 | 92 | def _force_blocks_left(s): 93 | return blocks_justify_re.sub('@', s) 94 | 95 | def parse(raw): 96 | sanitized = _sanitize(raw) 97 | uncommented = _uncomment(sanitized) 98 | justified = _left_justify(uncommented) 99 | justified_fixed = _force_blocks_left(justified) 100 | prepared = justified_fixed 101 | 102 | blocks = blocks_re.split(prepared) 103 | 104 | doc = DocBlock() 105 | 106 | if blocks[0] != '@': 107 | doc.description = blocks[0].strip() 108 | blocks = blocks[2::2] 109 | else: 110 | blocks = blocks[1::2] 111 | 112 | for block in blocks: 113 | try: 114 | tag, value = block.split(None, 1) 115 | except ValueError: 116 | tag, value = block, '' 117 | 118 | doc.add_block(tag, value) 119 | 120 | return doc 121 | -------------------------------------------------------------------------------- /javalang/util.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class LookAheadIterator(object): 4 | def __init__(self, iterable): 5 | self.iterable = iter(iterable) 6 | self.look_ahead = list() 7 | self.markers = list() 8 | self.default = None 9 | self.value = None 10 | 11 | def __iter__(self): 12 | return self 13 | 14 | def set_default(self, value): 15 | self.default = value 16 | 17 | def next(self): 18 | return self.__next__() 19 | 20 | def __next__(self): 21 | if self.look_ahead: 22 | self.value = self.look_ahead.pop(0) 23 | else: 24 | self.value = next(self.iterable) 25 | 26 | if self.markers: 27 | self.markers[-1].append(self.value) 28 | 29 | return self.value 30 | 31 | def look(self, i=0): 32 | """ Look ahead of the iterable by some number of values with advancing 33 | past them. 34 | 35 | If the requested look ahead is past the end of the iterable then None is 36 | returned. 37 | 38 | """ 39 | 40 | length = len(self.look_ahead) 41 | 42 | if length <= i: 43 | try: 44 | self.look_ahead.extend([next(self.iterable) 45 | for _ in range(length, i + 1)]) 46 | except StopIteration: 47 | return self.default 48 | 49 | self.value = self.look_ahead[i] 50 | return self.value 51 | 52 | def last(self): 53 | return self.value 54 | 55 | def __enter__(self): 56 | self.push_marker() 57 | return self 58 | 59 | def __exit__(self, exc_type, exc_val, exc_tb): 60 | # Reset the iterator if there was an error 61 | if exc_type or exc_val or exc_tb: 62 | self.pop_marker(True) 63 | else: 64 | self.pop_marker(False) 65 | 66 | def push_marker(self): 67 | """ Push a marker on to the marker stack """ 68 | self.markers.append(list()) 69 | 70 | def pop_marker(self, reset): 71 | """ Pop a marker off of the marker stack. If reset is True then the 72 | iterator will be returned to the state it was in before the 73 | corresponding call to push_marker(). 74 | 75 | """ 76 | 77 | marker = self.markers.pop() 78 | 79 | if reset: 80 | # Make the values available to be read again 81 | marker.extend(self.look_ahead) 82 | self.look_ahead = marker 83 | elif self.markers: 84 | # Otherwise, reassign the values to the top marker 85 | self.markers[-1].extend(marker) 86 | else: 87 | # If there are not more markers in the stack then discard the values 88 | pass 89 | 90 | class LookAheadListIterator(object): 91 | def __init__(self, iterable): 92 | self.list = list(iterable) 93 | 94 | self.marker = 0 95 | self.saved_markers = [] 96 | 97 | self.default = None 98 | self.value = None 99 | 100 | def __iter__(self): 101 | return self 102 | 103 | def set_default(self, value): 104 | self.default = value 105 | 106 | def next(self): 107 | return self.__next__() 108 | 109 | def __next__(self): 110 | try: 111 | self.value = self.list[self.marker] 112 | self.marker += 1 113 | except IndexError: 114 | raise StopIteration() 115 | 116 | return self.value 117 | 118 | def look(self, i=0): 119 | """ Look ahead of the iterable by some number of values with advancing 120 | past them. 121 | 122 | If the requested look ahead is past the end of the iterable then None is 123 | returned. 124 | 125 | """ 126 | 127 | try: 128 | self.value = self.list[self.marker + i] 129 | except IndexError: 130 | return self.default 131 | 132 | return self.value 133 | 134 | def last(self): 135 | return self.value 136 | 137 | def __enter__(self): 138 | self.push_marker() 139 | return self 140 | 141 | def __exit__(self, exc_type, exc_val, exc_tb): 142 | # Reset the iterator if there was an error 143 | if exc_type or exc_val or exc_tb: 144 | self.pop_marker(True) 145 | else: 146 | self.pop_marker(False) 147 | 148 | def push_marker(self): 149 | """ Push a marker on to the marker stack """ 150 | self.saved_markers.append(self.marker) 151 | 152 | def pop_marker(self, reset): 153 | """ Pop a marker off of the marker stack. If reset is True then the 154 | iterator will be returned to the state it was in before the 155 | corresponding call to push_marker(). 156 | 157 | """ 158 | 159 | saved = self.saved_markers.pop() 160 | 161 | if reset: 162 | self.marker = saved 163 | elif self.saved_markers: 164 | self.saved_markers[-1] = saved 165 | 166 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | ======== 3 | javalang 4 | ======== 5 | 6 | .. image:: https://travis-ci.org/c2nes/javalang.svg?branch=master 7 | :target: https://travis-ci.org/c2nes/javalang 8 | 9 | .. image:: https://badge.fury.io/py/javalang.svg 10 | :target: https://badge.fury.io/py/javalang 11 | 12 | javalang is a pure Python library for working with Java source 13 | code. javalang provides a lexer and parser targeting Java 8. The 14 | implementation is based on the Java language spec available at 15 | http://docs.oracle.com/javase/specs/jls/se8/html/. 16 | 17 | The following gives a very brief introduction to using javalang. 18 | 19 | --------------- 20 | Getting Started 21 | --------------- 22 | 23 | .. code-block:: python 24 | 25 | >>> import javalang 26 | >>> tree = javalang.parse.parse("package javalang.brewtab.com; class Test {}") 27 | 28 | This will return a ``CompilationUnit`` instance. This object is the root of a 29 | tree which may be traversed to extract different information about the 30 | compilation unit, 31 | 32 | .. code-block:: python 33 | 34 | >>> tree.package.name 35 | u'javalang.brewtab.com' 36 | >>> tree.types[0] 37 | ClassDeclaration 38 | >>> tree.types[0].name 39 | u'Test' 40 | 41 | The string passed to ``javalang.parse.parse()`` must represent a complete unit 42 | which simply means it should represent a complete, valid Java source file. Other 43 | methods in the ``javalang.parse`` module allow for some smaller code snippets to 44 | be parsed without providing an entire compilation unit. 45 | 46 | Working with the syntax tree 47 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 48 | 49 | ``CompilationUnit`` is a subclass of ``javalang.ast.Node``, as are its 50 | descendants in the tree. The ``javalang.tree`` module defines the different 51 | types of ``Node`` subclasses, each of which represent the different syntaxual 52 | elements you will find in Java code. For more detail on what node types are 53 | available, see the ``javalang/tree.py`` source file until the documentation is 54 | complete. 55 | 56 | ``Node`` instances support iteration, 57 | 58 | .. code-block:: python 59 | 60 | >>> for path, node in tree: 61 | ... print path, node 62 | ... 63 | () CompilationUnit 64 | (CompilationUnit,) PackageDeclaration 65 | (CompilationUnit, [ClassDeclaration]) ClassDeclaration 66 | 67 | This iteration can also be filtered by type, 68 | 69 | .. code-block:: python 70 | 71 | >>> for path, node in tree.filter(javalang.tree.ClassDeclaration): 72 | ... print path, node 73 | ... 74 | (CompilationUnit, [ClassDeclaration]) ClassDeclaration 75 | 76 | --------------- 77 | Component Usage 78 | --------------- 79 | 80 | Internally, the ``javalang.parse.parse`` method is a simple method which creates 81 | a token stream for the input, initializes a new ``javalang.parser.Parser`` 82 | instance with the given token stream, and then invokes the parser's ``parse()`` 83 | method, returning the resulting ``CompilationUnit``. These components may be 84 | also be used individually. 85 | 86 | Tokenizer 87 | ^^^^^^^^^ 88 | 89 | The tokenizer/lexer may be invoked directly be calling ``javalang.tokenizer.tokenize``, 90 | 91 | .. code-block:: python 92 | 93 | >>> javalang.tokenizer.tokenize('System.out.println("Hello " + "world");') 94 | 95 | 96 | This returns a generator which provides a stream of ``JavaToken`` objects. Each 97 | token carries position (line, column) and value information, 98 | 99 | .. code-block:: python 100 | 101 | >>> tokens = list(javalang.tokenizer.tokenize('System.out.println("Hello " + "world");')) 102 | >>> tokens[6].value 103 | u'"Hello "' 104 | >>> tokens[6].position 105 | (1, 19) 106 | 107 | The tokens are not directly instances of ``JavaToken``, but are instead 108 | instances of subclasses which identify their general type, 109 | 110 | .. code-block:: python 111 | 112 | >>> type(tokens[6]) 113 | 114 | >>> type(tokens[7]) 115 | 116 | 117 | 118 | **NOTE:** The shift operators ``>>`` and ``>>>`` are represented by multiple 119 | ``>`` tokens. This is because multiple ``>`` may appear in a row when closing 120 | nested generic parameter/arguments lists. This abiguity is instead resolved by 121 | the parser. 122 | 123 | Parser 124 | ^^^^^^ 125 | 126 | To parse snippets of code, a parser may be used directly, 127 | 128 | .. code-block:: python 129 | 130 | >>> tokens = javalang.tokenizer.tokenize('System.out.println("Hello " + "world");') 131 | >>> parser = javalang.parser.Parser(tokens) 132 | >>> parser.parse_expression() 133 | MethodInvocation 134 | 135 | The parse methods are designed for incremental parsing so they will not restart 136 | at the beginning of the token stream. Attempting to call a parse method more 137 | than once will result in a ``JavaSyntaxError`` exception. 138 | 139 | Invoking the incorrect parse method will also result in a ``JavaSyntaxError`` 140 | exception, 141 | 142 | .. code-block:: python 143 | 144 | >>> tokens = javalang.tokenizer.tokenize('System.out.println("Hello " + "world");') 145 | >>> parser = javalang.parser.Parser(tokens) 146 | >>> parser.parse_type_declaration() 147 | Traceback (most recent call last): 148 | File "", line 1, in 149 | File "javalang/parser.py", line 336, in parse_type_declaration 150 | return self.parse_class_or_interface_declaration() 151 | File "javalang/parser.py", line 353, in parse_class_or_interface_declaration 152 | self.illegal("Expected type declaration") 153 | File "javalang/parser.py", line 122, in illegal 154 | raise JavaSyntaxError(description, at) 155 | javalang.parser.JavaSyntaxError 156 | 157 | The ``javalang.parse`` module also provides convenience methods for parsing more 158 | common types of code snippets. 159 | -------------------------------------------------------------------------------- /javalang/test/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from .. import tokenizer 3 | 4 | 5 | class TestTokenizer(unittest.TestCase): 6 | 7 | def test_tokenizer_annotation(self): 8 | # Given 9 | code = " @Override" 10 | 11 | # When 12 | tokens = list(tokenizer.tokenize(code)) 13 | 14 | # Then 15 | self.assertEqual(len(tokens), 2) 16 | self.assertEqual(tokens[0].value, "@") 17 | self.assertEqual(tokens[1].value, "Override") 18 | self.assertEqual(type(tokens[0]), tokenizer.Annotation) 19 | self.assertEqual(type(tokens[1]), tokenizer.Identifier) 20 | 21 | def test_tokenizer_javadoc(self): 22 | # Given 23 | code = "/**\n" \ 24 | " * See {@link BlockTokenSecretManager#setKeys(ExportedBlockKeys)}\n" \ 25 | " */" 26 | 27 | # When 28 | tokens = list(tokenizer.tokenize(code)) 29 | 30 | # Then 31 | self.assertEqual(len(tokens), 0) 32 | 33 | def test_tokenize_ignore_errors(self): 34 | # Given 35 | # character '#' was supposed to trigger an error of unknown token with a single line of javadoc 36 | code = " * See {@link BlockTokenSecretManager#setKeys(ExportedBlockKeys)}" 37 | 38 | # When 39 | tokens = list(tokenizer.tokenize(code, ignore_errors=True)) 40 | 41 | # Then 42 | self.assertEqual(len(tokens), 11) 43 | 44 | def test_tokenize_line_comment_eof(self): 45 | # Given 46 | code = " // This line comment at the end of the file has no newline" 47 | 48 | # When 49 | tokens = list(tokenizer.tokenize(code)) 50 | 51 | # Then 52 | self.assertEqual(len(tokens), 0) 53 | 54 | def test_tokenize_comment_line_with_period(self): 55 | # Given 56 | code = " * all of the servlets resistant to cross-site scripting attacks." 57 | 58 | # When 59 | tokens = list(tokenizer.tokenize(code)) 60 | 61 | # Then 62 | self.assertEqual(len(tokens), 13) 63 | 64 | def test_tokenize_integer_at_end(self): 65 | # Given 66 | code = "nextKey = new BlockKey(serialNo, System.currentTimeMillis() + 3" 67 | 68 | # When 69 | tokens = list(tokenizer.tokenize(code, ignore_errors=True)) 70 | 71 | # Then 72 | self.assertEqual(len(tokens), 14) 73 | 74 | def test_tokenize_float_at_end(self): 75 | # Given 76 | code = "nextKey = new BlockKey(serialNo, System.currentTimeMillis() + 3.0" 77 | 78 | # When 79 | tokens = list(tokenizer.tokenize(code, ignore_errors=True)) 80 | 81 | # Then 82 | self.assertEqual(len(tokens), 14) 83 | 84 | def test_tokenize_hex_integer_at_end(self): 85 | # Given 86 | code = "nextKey = new BlockKey(serialNo, System.currentTimeMillis() + 0x3" 87 | 88 | # When 89 | tokens = list(tokenizer.tokenize(code, ignore_errors=True)) 90 | 91 | # Then 92 | self.assertEqual(len(tokens), 14) 93 | 94 | def test_tokenize_token_position_after_comment(self): 95 | # Given 96 | code = """ 97 | public int function() { 98 | int a = 10; 99 | // some comment 100 | int b = 10; 101 | } 102 | """ 103 | 104 | # When 105 | tokens = list(tokenizer.tokenize(code)) 106 | 107 | # Then 108 | # both token 6 and 11 are the "int" tokens of line 2 and 4 109 | self.assertEqual(tokens[6].position[1], 5) 110 | self.assertEqual(tokens[6].position[1], tokens[11].position[1]) 111 | 112 | def test_tokenize_hex_float_integer_at_end(self): 113 | # Given 114 | code = "nextKey = new BlockKey(serialNo, System.currentTimeMillis() + 0x3.2p2" 115 | 116 | # When 117 | tokens = list(tokenizer.tokenize(code, ignore_errors=True)) 118 | 119 | # Then 120 | self.assertEqual(len(tokens), 14) 121 | 122 | def test_string_delim_within_comment(self): 123 | 124 | # Given 125 | code = "* Returns 0 if it can't find the end \ 126 | if (*itr == '\r') { \ 127 | int status;" 128 | 129 | # When 130 | tokens = list(tokenizer.tokenize(code, ignore_errors=True)) 131 | 132 | # Then 133 | self.assertEqual(len(tokens), 8) 134 | 135 | def test_inline_comment_position(self): 136 | # Columns 137 | # 11111111112 138 | # 12345678901234567890 139 | code = "int /* comment */ j;" 140 | tokens = list(tokenizer.tokenize(code)) 141 | 142 | int_token = tokens[0] 143 | j_token = tokens[1] 144 | semi_token = tokens[2] 145 | 146 | self.assertEqual(int_token.position.line, 1) 147 | self.assertEqual(int_token.position.column, 1) 148 | 149 | self.assertEqual(j_token.position.line, 1) 150 | self.assertEqual(j_token.position.column, 19) 151 | 152 | self.assertEqual(semi_token.position.line, 1) 153 | self.assertEqual(semi_token.position.column, 20) 154 | 155 | def test_multiline_inline_comment(self): 156 | code = """int /* 157 | hello 158 | world 159 | */ j;""" 160 | 161 | tokens = list(tokenizer.tokenize(code)) 162 | token_int = tokens[0] 163 | token_j = tokens[1] 164 | 165 | self.assertEqual(token_int.position.line, 1) 166 | self.assertEqual(token_int.position.column, 1) 167 | 168 | self.assertEqual(token_j.position.line, 4) 169 | self.assertEqual(token_j.position.column, 4) 170 | 171 | def test_multiline_inline_comment_end_of_input(self): 172 | code = """int /* 173 | hello 174 | world 175 | */""" 176 | 177 | tokens = list(tokenizer.tokenize(code)) 178 | token_int = tokens[0] 179 | 180 | self.assertEqual(token_int.position.line, 1) 181 | self.assertEqual(token_int.position.column, 1) 182 | 183 | def test_column_starts_at_one(self): 184 | code = """int j; 185 | int k; 186 | """ 187 | token = list(tokenizer.tokenize(code)) 188 | self.assertEqual(token[0].position.column, 1) 189 | self.assertEqual(token[3].position.column, 1) 190 | 191 | if __name__=="__main__": 192 | unittest.main() 193 | -------------------------------------------------------------------------------- /javalang/tree.py: -------------------------------------------------------------------------------- 1 | 2 | from .ast import Node 3 | 4 | # ------------------------------------------------------------------------------ 5 | 6 | class CompilationUnit(Node): 7 | attrs = ("package", "imports", "types") 8 | 9 | class Import(Node): 10 | attrs = ("path", "static", "wildcard") 11 | 12 | class Documented(Node): 13 | attrs = ("documentation",) 14 | 15 | class Declaration(Node): 16 | attrs = ("modifiers", "annotations") 17 | 18 | class TypeDeclaration(Declaration, Documented): 19 | attrs = ("name", "body") 20 | 21 | @property 22 | def fields(self): 23 | return [decl for decl in self.body if isinstance(decl, FieldDeclaration)] 24 | 25 | @property 26 | def methods(self): 27 | return [decl for decl in self.body if isinstance(decl, MethodDeclaration)] 28 | 29 | @property 30 | def constructors(self): 31 | return [decl for decl in self.body if isinstance(decl, ConstructorDeclaration)] 32 | 33 | class PackageDeclaration(Declaration, Documented): 34 | attrs = ("name",) 35 | 36 | class ClassDeclaration(TypeDeclaration): 37 | attrs = ("type_parameters", "extends", "implements") 38 | 39 | class EnumDeclaration(TypeDeclaration): 40 | attrs = ("implements",) 41 | 42 | @property 43 | def fields(self): 44 | return [decl for decl in self.body.declarations if isinstance(decl, FieldDeclaration)] 45 | 46 | @property 47 | def methods(self): 48 | return [decl for decl in self.body.declarations if isinstance(decl, MethodDeclaration)] 49 | 50 | class InterfaceDeclaration(TypeDeclaration): 51 | attrs = ("type_parameters", "extends",) 52 | 53 | class AnnotationDeclaration(TypeDeclaration): 54 | attrs = () 55 | 56 | # ------------------------------------------------------------------------------ 57 | 58 | class Type(Node): 59 | attrs = ("name", "dimensions",) 60 | 61 | class BasicType(Type): 62 | attrs = () 63 | 64 | class ReferenceType(Type): 65 | attrs = ("arguments", "sub_type") 66 | 67 | class TypeArgument(Node): 68 | attrs = ("type", "pattern_type") 69 | 70 | # ------------------------------------------------------------------------------ 71 | 72 | class TypeParameter(Node): 73 | attrs = ("name", "extends") 74 | 75 | # ------------------------------------------------------------------------------ 76 | 77 | class Annotation(Node): 78 | attrs = ("name", "element") 79 | 80 | class ElementValuePair(Node): 81 | attrs = ("name", "value") 82 | 83 | class ElementArrayValue(Node): 84 | attrs = ("values",) 85 | 86 | # ------------------------------------------------------------------------------ 87 | 88 | class Member(Documented): 89 | attrs = () 90 | 91 | class MethodDeclaration(Member, Declaration): 92 | attrs = ("type_parameters", "return_type", "name", "parameters", "throws", "body") 93 | 94 | class FieldDeclaration(Member, Declaration): 95 | attrs = ("type", "declarators") 96 | 97 | class ConstructorDeclaration(Declaration, Documented): 98 | attrs = ("type_parameters", "name", "parameters", "throws", "body") 99 | 100 | # ------------------------------------------------------------------------------ 101 | 102 | class ConstantDeclaration(FieldDeclaration): 103 | attrs = () 104 | 105 | class ArrayInitializer(Node): 106 | attrs = ("initializers",) 107 | 108 | class VariableDeclaration(Declaration): 109 | attrs = ("type", "declarators") 110 | 111 | class LocalVariableDeclaration(VariableDeclaration): 112 | attrs = () 113 | 114 | class VariableDeclarator(Node): 115 | attrs = ("name", "dimensions", "initializer") 116 | 117 | class FormalParameter(Declaration): 118 | attrs = ("type", "name", "varargs") 119 | 120 | class InferredFormalParameter(Node): 121 | attrs = ('name',) 122 | 123 | # ------------------------------------------------------------------------------ 124 | 125 | class Statement(Node): 126 | attrs = ("label",) 127 | 128 | class IfStatement(Statement): 129 | attrs = ("condition", "then_statement", "else_statement") 130 | 131 | class WhileStatement(Statement): 132 | attrs = ("condition", "body") 133 | 134 | class DoStatement(Statement): 135 | attrs = ("condition", "body") 136 | 137 | class ForStatement(Statement): 138 | attrs = ("control", "body") 139 | 140 | class AssertStatement(Statement): 141 | attrs = ("condition", "value") 142 | 143 | class BreakStatement(Statement): 144 | attrs = ("goto",) 145 | 146 | class ContinueStatement(Statement): 147 | attrs = ("goto",) 148 | 149 | class ReturnStatement(Statement): 150 | attrs = ("expression",) 151 | 152 | class ThrowStatement(Statement): 153 | attrs = ("expression",) 154 | 155 | class SynchronizedStatement(Statement): 156 | attrs = ("lock", "block") 157 | 158 | class TryStatement(Statement): 159 | attrs = ("resources", "block", "catches", "finally_block") 160 | 161 | class SwitchStatement(Statement): 162 | attrs = ("expression", "cases") 163 | 164 | class BlockStatement(Statement): 165 | attrs = ("statements",) 166 | 167 | class StatementExpression(Statement): 168 | attrs = ("expression",) 169 | 170 | # ------------------------------------------------------------------------------ 171 | 172 | class TryResource(Declaration): 173 | attrs = ("type", "name", "value") 174 | 175 | class CatchClause(Statement): 176 | attrs = ("parameter", "block") 177 | 178 | class CatchClauseParameter(Declaration): 179 | attrs = ("types", "name") 180 | 181 | # ------------------------------------------------------------------------------ 182 | 183 | class SwitchStatementCase(Node): 184 | attrs = ("case", "statements") 185 | 186 | class ForControl(Node): 187 | attrs = ("init", "condition", "update") 188 | 189 | class EnhancedForControl(Node): 190 | attrs = ("var", "iterable") 191 | 192 | # ------------------------------------------------------------------------------ 193 | 194 | class Expression(Node): 195 | attrs = () 196 | 197 | class Assignment(Expression): 198 | attrs = ("expressionl", "value", "type") 199 | 200 | class TernaryExpression(Expression): 201 | attrs = ("condition", "if_true", "if_false") 202 | 203 | class BinaryOperation(Expression): 204 | attrs = ("operator", "operandl", "operandr") 205 | 206 | class Cast(Expression): 207 | attrs = ("type", "expression") 208 | 209 | class MethodReference(Expression): 210 | attrs = ("expression", "method", "type_arguments") 211 | 212 | class LambdaExpression(Expression): 213 | attrs = ('parameters', 'body') 214 | 215 | # ------------------------------------------------------------------------------ 216 | 217 | class Primary(Expression): 218 | attrs = ("prefix_operators", "postfix_operators", "qualifier", "selectors") 219 | 220 | class Literal(Primary): 221 | attrs = ("value",) 222 | 223 | class This(Primary): 224 | attrs = () 225 | 226 | class MemberReference(Primary): 227 | attrs = ("member",) 228 | 229 | class Invocation(Primary): 230 | attrs = ("type_arguments", "arguments") 231 | 232 | class ExplicitConstructorInvocation(Invocation): 233 | attrs = () 234 | 235 | class SuperConstructorInvocation(Invocation): 236 | attrs = () 237 | 238 | class MethodInvocation(Invocation): 239 | attrs = ("member",) 240 | 241 | class SuperMethodInvocation(Invocation): 242 | attrs = ("member",) 243 | 244 | class SuperMemberReference(Primary): 245 | attrs = ("member",) 246 | 247 | class ArraySelector(Expression): 248 | attrs = ("index",) 249 | 250 | class ClassReference(Primary): 251 | attrs = ("type",) 252 | 253 | class VoidClassReference(ClassReference): 254 | attrs = () 255 | 256 | # ------------------------------------------------------------------------------ 257 | 258 | class Creator(Primary): 259 | attrs = ("type",) 260 | 261 | class ArrayCreator(Creator): 262 | attrs = ("dimensions", "initializer") 263 | 264 | class ClassCreator(Creator): 265 | attrs = ("constructor_type_arguments", "arguments", "body") 266 | 267 | class InnerClassCreator(Creator): 268 | attrs = ("constructor_type_arguments", "arguments", "body") 269 | 270 | # ------------------------------------------------------------------------------ 271 | 272 | class EnumBody(Node): 273 | attrs = ("constants", "declarations") 274 | 275 | class EnumConstantDeclaration(Declaration, Documented): 276 | attrs = ("name", "arguments", "body") 277 | 278 | class AnnotationMethod(Declaration): 279 | attrs = ("name", "return_type", "dimensions", "default") 280 | 281 | -------------------------------------------------------------------------------- /javalang/test/test_java_8_syntax.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pkg_resources import resource_string 4 | from .. import parse, parser, tree 5 | 6 | 7 | def setup_java_class(content_to_add): 8 | """ returns an example java class with the 9 | given content_to_add contained within a method. 10 | """ 11 | template = """ 12 | public class Lambda { 13 | 14 | public static void main(String args[]) { 15 | %s 16 | } 17 | } 18 | """ 19 | return template % content_to_add 20 | 21 | 22 | def filter_type_in_method(clazz, the_type, method_name): 23 | """ yields the result of filtering the given class for the given 24 | type inside the given method identified by its name. 25 | """ 26 | for path, node in clazz.filter(the_type): 27 | for p in reversed(path): 28 | if isinstance(p, tree.MethodDeclaration): 29 | if p.name == method_name: 30 | yield path, node 31 | 32 | 33 | class LambdaSupportTest(unittest.TestCase): 34 | 35 | """ Contains tests for java 8 lambda syntax. """ 36 | 37 | def assert_contains_lambda_expression_in_m( 38 | self, clazz, method_name='main'): 39 | """ asserts that the given tree contains a method with the supplied 40 | method name containing a lambda expression. 41 | """ 42 | matches = list(filter_type_in_method( 43 | clazz, tree.LambdaExpression, method_name)) 44 | if not matches: 45 | self.fail('No matching lambda expression found.') 46 | return matches 47 | 48 | def test_lambda_support_no_parameters_no_body(self): 49 | """ tests support for lambda with no parameters and no body. """ 50 | self.assert_contains_lambda_expression_in_m( 51 | parse.parse(setup_java_class("() -> {};"))) 52 | 53 | def test_lambda_support_no_parameters_expression_body(self): 54 | """ tests support for lambda with no parameters and an 55 | expression body. 56 | """ 57 | test_classes = [ 58 | setup_java_class("() -> 3;"), 59 | setup_java_class("() -> null;"), 60 | setup_java_class("() -> { return 21; };"), 61 | setup_java_class("() -> { System.exit(1); };"), 62 | ] 63 | for test_class in test_classes: 64 | clazz = parse.parse(test_class) 65 | self.assert_contains_lambda_expression_in_m(clazz) 66 | 67 | def test_lambda_support_no_parameters_complex_expression(self): 68 | """ tests support for lambda with no parameters and a 69 | complex expression body. 70 | """ 71 | code = """ 72 | () -> { 73 | if (true) return 21; 74 | else 75 | { 76 | int result = 21; 77 | return result / 2; 78 | } 79 | };""" 80 | self.assert_contains_lambda_expression_in_m( 81 | parse.parse(setup_java_class(code))) 82 | 83 | def test_parameter_no_type_expression_body(self): 84 | """ tests support for lambda with parameters with inferred types. """ 85 | test_classes = [ 86 | setup_java_class("(bar) -> bar + 1;"), 87 | setup_java_class("bar -> bar + 1;"), 88 | setup_java_class("x -> x.length();"), 89 | setup_java_class("y -> { y.boom(); };"), 90 | ] 91 | for test_class in test_classes: 92 | clazz = parse.parse(test_class) 93 | self.assert_contains_lambda_expression_in_m(clazz) 94 | 95 | def test_parameter_with_type_expression_body(self): 96 | """ tests support for lambda with parameters with formal types. """ 97 | test_classes = [ 98 | setup_java_class("(int foo) -> { return foo + 2; };"), 99 | setup_java_class("(String s) -> s.length();"), 100 | setup_java_class("(int foo) -> foo + 1;"), 101 | setup_java_class("(Thread th) -> { th.start(); };"), 102 | setup_java_class("(String foo, String bar) -> " 103 | "foo + bar;"), 104 | ] 105 | for test_class in test_classes: 106 | clazz = parse.parse(test_class) 107 | self.assert_contains_lambda_expression_in_m(clazz) 108 | 109 | def test_parameters_with_no_type_expression_body(self): 110 | """ tests support for multiple lambda parameters 111 | that are specified without their types. 112 | """ 113 | self.assert_contains_lambda_expression_in_m( 114 | parse.parse(setup_java_class("(x, y) -> x + y;"))) 115 | 116 | def test_parameters_with_mixed_inferred_and_declared_types(self): 117 | """ this tests that lambda type specification mixing is considered 118 | invalid as per the specifications. 119 | """ 120 | with self.assertRaises(parser.JavaSyntaxError): 121 | parse.parse(setup_java_class("(x, int y) -> x+y;")) 122 | 123 | def test_parameters_inferred_types_with_modifiers(self): 124 | """ this tests that lambda inferred type parameters with modifiers are 125 | considered invalid as per the specifications. 126 | """ 127 | with self.assertRaises(parser.JavaSyntaxError): 128 | parse.parse(setup_java_class("(x, final y) -> x+y;")) 129 | 130 | def test_invalid_parameters_are_invalid(self): 131 | """ this tests that invalid lambda parameters are are 132 | considered invalid as per the specifications. 133 | """ 134 | with self.assertRaises(parser.JavaSyntaxError): 135 | parse.parse(setup_java_class("(a b c) -> {};")) 136 | 137 | def test_cast_works(self): 138 | """ this tests that a cast expression works as expected. """ 139 | parse.parse(setup_java_class("String x = (String) A.x() ;")) 140 | 141 | 142 | class MethodReferenceSyntaxTest(unittest.TestCase): 143 | 144 | """ Contains tests for java 8 method reference syntax. """ 145 | 146 | def assert_contains_method_reference_expression_in_m( 147 | self, clazz, method_name='main'): 148 | """ asserts that the given class contains a method with the supplied 149 | method name containing a method reference. 150 | """ 151 | matches = list(filter_type_in_method( 152 | clazz, tree.MethodReference, method_name)) 153 | if not matches: 154 | self.fail('No matching method reference found.') 155 | return matches 156 | 157 | def test_method_reference(self): 158 | """ tests that method references are supported. """ 159 | self.assert_contains_method_reference_expression_in_m( 160 | parse.parse(setup_java_class("String::length;"))) 161 | 162 | def test_method_reference_to_the_new_method(self): 163 | """ test support for method references to 'new'. """ 164 | self.assert_contains_method_reference_expression_in_m( 165 | parse.parse(setup_java_class("String::new;"))) 166 | 167 | def test_method_reference_to_the_new_method_with_explict_type(self): 168 | """ test support for method references to 'new' with an 169 | explicit type. 170 | """ 171 | self.assert_contains_method_reference_expression_in_m( 172 | parse.parse(setup_java_class("String:: new;"))) 173 | 174 | def test_method_reference_from_super(self): 175 | """ test support for method references from 'super'. """ 176 | self.assert_contains_method_reference_expression_in_m( 177 | parse.parse(setup_java_class("super::toString;"))) 178 | 179 | def test_method_reference_from_super_with_identifier(self): 180 | """ test support for method references from Identifier.super. """ 181 | self.assert_contains_method_reference_expression_in_m( 182 | parse.parse(setup_java_class("String.super::toString;"))) 183 | 184 | @unittest.expectedFailure 185 | def test_method_reference_explicit_type_arguments_for_generic_type(self): 186 | """ currently there is no support for method references 187 | for an explicit type. 188 | """ 189 | self.assert_contains_method_reference_expression_in_m( 190 | parse.parse(setup_java_class("List::size;"))) 191 | 192 | def test_method_reference_explicit_type_arguments(self): 193 | """ test support for method references with an explicit type. 194 | """ 195 | self.assert_contains_method_reference_expression_in_m( 196 | parse.parse(setup_java_class("Arrays:: sort;"))) 197 | 198 | @unittest.expectedFailure 199 | def test_method_reference_from_array_type(self): 200 | """ currently there is no support for method references 201 | from a primary type. 202 | """ 203 | self.assert_contains_method_reference_expression_in_m( 204 | parse.parse(setup_java_class("int[]::new;"))) 205 | 206 | 207 | class InterfaceSupportTest(unittest.TestCase): 208 | 209 | """ Contains tests for java 8 interface extensions. """ 210 | 211 | def test_interface_support_static_methods(self): 212 | parse.parse(""" 213 | interface Foo { 214 | void foo(); 215 | 216 | static Foo create() { 217 | return new Foo() { 218 | @Override 219 | void foo() { 220 | System.out.println("foo"); 221 | } 222 | }; 223 | } 224 | } 225 | """) 226 | 227 | def test_interface_support_default_methods(self): 228 | parse.parse(""" 229 | interface Foo { 230 | default void foo() { 231 | System.out.println("foo"); 232 | } 233 | } 234 | """) 235 | 236 | 237 | def main(): 238 | unittest.main() 239 | 240 | if __name__ == '__main__': 241 | main() 242 | -------------------------------------------------------------------------------- /javalang/tokenizer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unicodedata 3 | from collections import namedtuple 4 | 5 | import six 6 | 7 | 8 | class LexerError(Exception): 9 | pass 10 | 11 | Position = namedtuple('Position', ['line', 'column']) 12 | 13 | class JavaToken(object): 14 | def __init__(self, value, position=None, javadoc=None): 15 | self.value = value 16 | self.position = position 17 | self.javadoc = javadoc 18 | 19 | def __repr__(self): 20 | if self.position: 21 | return '%s "%s" line %d, position %d' % ( 22 | self.__class__.__name__, self.value, self.position[0], self.position[1] 23 | ) 24 | else: 25 | return '%s "%s"' % (self.__class__.__name__, self.value) 26 | 27 | def __str__(self): 28 | return repr(self) 29 | 30 | def __eq__(self, other): 31 | raise Exception("Direct comparison not allowed") 32 | 33 | class EndOfInput(JavaToken): 34 | pass 35 | 36 | class Keyword(JavaToken): 37 | VALUES = set(['abstract', 'assert', 'boolean', 'break', 'byte', 'case', 38 | 'catch', 'char', 'class', 'const', 'continue', 'default', 39 | 'do', 'double', 'else', 'enum', 'extends', 'final', 40 | 'finally', 'float', 'for', 'goto', 'if', 'implements', 41 | 'import', 'instanceof', 'int', 'interface', 'long', 'native', 42 | 'new', 'package', 'private', 'protected', 'public', 'return', 43 | 'short', 'static', 'strictfp', 'super', 'switch', 44 | 'synchronized', 'this', 'throw', 'throws', 'transient', 'try', 45 | 'void', 'volatile', 'while']) 46 | 47 | 48 | class Modifier(Keyword): 49 | VALUES = set(['abstract', 'default', 'final', 'native', 'private', 50 | 'protected', 'public', 'static', 'strictfp', 'synchronized', 51 | 'transient', 'volatile']) 52 | 53 | class BasicType(Keyword): 54 | VALUES = set(['boolean', 'byte', 'char', 'double', 55 | 'float', 'int', 'long', 'short']) 56 | 57 | class Literal(JavaToken): 58 | pass 59 | 60 | class Integer(Literal): 61 | pass 62 | 63 | class DecimalInteger(Literal): 64 | pass 65 | 66 | class OctalInteger(Integer): 67 | pass 68 | 69 | class BinaryInteger(Integer): 70 | pass 71 | 72 | class HexInteger(Integer): 73 | pass 74 | 75 | class FloatingPoint(Literal): 76 | pass 77 | 78 | class DecimalFloatingPoint(FloatingPoint): 79 | pass 80 | 81 | class HexFloatingPoint(FloatingPoint): 82 | pass 83 | 84 | class Boolean(Literal): 85 | VALUES = set(["true", "false"]) 86 | 87 | class Character(Literal): 88 | pass 89 | 90 | class String(Literal): 91 | pass 92 | 93 | class Null(Literal): 94 | pass 95 | 96 | class Separator(JavaToken): 97 | VALUES = set(['(', ')', '{', '}', '[', ']', ';', ',', '.']) 98 | 99 | class Operator(JavaToken): 100 | MAX_LEN = 4 101 | VALUES = set(['>>>=', '>>=', '<<=', '%=', '^=', '|=', '&=', '/=', 102 | '*=', '-=', '+=', '<<', '--', '++', '||', '&&', '!=', 103 | '>=', '<=', '==', '%', '^', '|', '&', '/', '*', '-', 104 | '+', ':', '?', '~', '!', '<', '>', '=', '...', '->', '::']) 105 | 106 | # '>>>' and '>>' are excluded so that >> becomes two tokens and >>> becomes 107 | # three. This is done because we can not distinguish the operators >> and 108 | # >>> from the closing of multipel type parameter/argument lists when 109 | # lexing. The job of potentially recombining these symbols is left to the 110 | # parser 111 | 112 | INFIX = set(['||', '&&', '|', '^', '&', '==', '!=', '<', '>', '<=', '>=', 113 | '<<', '>>', '>>>', '+', '-', '*', '/', '%']) 114 | 115 | PREFIX = set(['++', '--', '!', '~', '+', '-']) 116 | 117 | POSTFIX = set(['++', '--']) 118 | 119 | ASSIGNMENT = set(['=', '+=', '-=', '*=', '/=', '&=', '|=', '^=', '%=', 120 | '<<=', '>>=', '>>>=']) 121 | 122 | LAMBDA = set(['->']) 123 | 124 | METHOD_REFERENCE = set(['::',]) 125 | 126 | def is_infix(self): 127 | return self.value in self.INFIX 128 | 129 | def is_prefix(self): 130 | return self.value in self.PREFIX 131 | 132 | def is_postfix(self): 133 | return self.value in self.POSTFIX 134 | 135 | def is_assignment(self): 136 | return self.value in self.ASSIGNMENT 137 | 138 | 139 | class Annotation(JavaToken): 140 | pass 141 | 142 | class Identifier(JavaToken): 143 | pass 144 | 145 | 146 | class JavaTokenizer(object): 147 | 148 | IDENT_START_CATEGORIES = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Pc', 'Sc']) 149 | 150 | IDENT_PART_CATEGORIES = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mc', 'Mn', 'Nd', 'Nl', 'Pc', 'Sc']) 151 | 152 | def __init__(self, data, ignore_errors=False): 153 | self.data = data 154 | self.ignore_errors = ignore_errors 155 | self.errors = [] 156 | 157 | # Rows and columns both start at 1 158 | self.current_line = 1 159 | self.start_of_line = -1 160 | 161 | self.operators = [set() for i in range(0, Operator.MAX_LEN)] 162 | 163 | for v in Operator.VALUES: 164 | self.operators[len(v) - 1].add(v) 165 | 166 | self.whitespace_consumer = re.compile(r'[^\s]') 167 | 168 | self.javadoc = None 169 | 170 | 171 | def reset(self): 172 | self.i = 0 173 | self.j = 0 174 | 175 | def consume_whitespace(self): 176 | match = self.whitespace_consumer.search(self.data, self.i + 1) 177 | 178 | if not match: 179 | self.i = self.length 180 | return 181 | 182 | i = match.start() 183 | 184 | start_of_line = self.data.rfind('\n', self.i, i) 185 | 186 | if start_of_line != -1: 187 | self.start_of_line = start_of_line 188 | self.current_line += self.data.count('\n', self.i, i) 189 | 190 | self.i = i 191 | 192 | def read_string(self): 193 | delim = self.data[self.i] 194 | 195 | state = 0 196 | j = self.i + 1 197 | length = self.length 198 | 199 | while True: 200 | if j >= length: 201 | self.error('Unterminated character/string literal') 202 | break 203 | 204 | if state == 0: 205 | if self.data[j] == '\\': 206 | state = 1 207 | elif self.data[j] == delim: 208 | break 209 | 210 | elif state == 1: 211 | if self.data[j] in 'btnfru"\'\\': 212 | state = 0 213 | elif self.data[j] in '0123': 214 | state = 2 215 | elif self.data[j] in '01234567': 216 | state = 3 217 | else: 218 | self.error('Illegal escape character', self.data[j]) 219 | 220 | elif state == 2: 221 | # Possibly long octal 222 | if self.data[j] in '01234567': 223 | state = 3 224 | elif self.data[j] == '\\': 225 | state = 1 226 | elif self.data[j] == delim: 227 | break 228 | 229 | elif state == 3: 230 | state = 0 231 | 232 | if self.data[j] == '\\': 233 | state = 1 234 | elif self.data[j] == delim: 235 | break 236 | 237 | j += 1 238 | 239 | self.j = j + 1 240 | 241 | def try_operator(self): 242 | for l in range(min(self.length - self.i, Operator.MAX_LEN), 0, -1): 243 | if self.data[self.i:self.i + l] in self.operators[l - 1]: 244 | self.j = self.i + l 245 | return True 246 | return False 247 | 248 | def read_comment(self): 249 | if self.data[self.i + 1] == '/': 250 | terminator, accept_eof = '\n', True 251 | else: 252 | terminator, accept_eof = '*/', False 253 | 254 | i = self.data.find(terminator, self.i + 2) 255 | 256 | if i != -1: 257 | i += len(terminator) 258 | elif accept_eof: 259 | i = self.length 260 | else: 261 | self.error('Unterminated block comment') 262 | partial_comment = self.data[self.i:] 263 | self.i = self.length 264 | return partial_comment 265 | 266 | comment = self.data[self.i:i] 267 | start_of_line = self.data.rfind('\n', self.i, i) 268 | 269 | if start_of_line != -1: 270 | self.start_of_line = start_of_line 271 | self.current_line += self.data.count('\n', self.i, i) 272 | 273 | self.i = i 274 | 275 | return comment 276 | 277 | def read_decimal_float_or_integer(self): 278 | orig_i = self.i 279 | self.j = self.i 280 | 281 | self.read_decimal_integer() 282 | 283 | if self.j >= len(self.data) or self.data[self.j] not in '.eEfFdD': 284 | return DecimalInteger 285 | 286 | if self.data[self.j] == '.': 287 | self.i = self.j + 1 288 | self.read_decimal_integer() 289 | 290 | if self.j < len(self.data) and self.data[self.j] in 'eE': 291 | self.j = self.j + 1 292 | 293 | if self.j < len(self.data) and self.data[self.j] in '-+': 294 | self.j = self.j + 1 295 | 296 | self.i = self.j 297 | self.read_decimal_integer() 298 | 299 | if self.j < len(self.data) and self.data[self.j] in 'fFdD': 300 | self.j = self.j + 1 301 | 302 | self.i = orig_i 303 | return DecimalFloatingPoint 304 | 305 | def read_hex_integer_or_float(self): 306 | orig_i = self.i 307 | self.j = self.i + 2 308 | 309 | self.read_hex_integer() 310 | 311 | if self.j >= len(self.data) or self.data[self.j] not in '.pP': 312 | return HexInteger 313 | 314 | if self.data[self.j] == '.': 315 | self.j = self.j + 1 316 | self.read_digits('0123456789abcdefABCDEF') 317 | 318 | if self.j < len(self.data) and self.data[self.j] in 'pP': 319 | self.j = self.j + 1 320 | else: 321 | self.error('Invalid hex float literal') 322 | 323 | if self.j < len(self.data) and self.data[self.j] in '-+': 324 | self.j = self.j + 1 325 | 326 | self.i = self.j 327 | self.read_decimal_integer() 328 | 329 | if self.j < len(self.data) and self.data[self.j] in 'fFdD': 330 | self.j = self.j + 1 331 | 332 | self.i = orig_i 333 | return HexFloatingPoint 334 | 335 | def read_digits(self, digits): 336 | tmp_i = 0 337 | c = None 338 | 339 | while self.j + tmp_i < len(self.data): 340 | c = self.data[self.j + tmp_i] 341 | 342 | if c in digits: 343 | self.j += 1 + tmp_i 344 | tmp_i = 0 345 | elif c == '_': 346 | tmp_i += 1 347 | else: 348 | break 349 | 350 | if c in 'lL': 351 | self.j += 1 352 | 353 | def read_decimal_integer(self): 354 | self.j = self.i 355 | self.read_digits('0123456789') 356 | 357 | def read_hex_integer(self): 358 | self.j = self.i + 2 359 | self.read_digits('0123456789abcdefABCDEF') 360 | 361 | def read_bin_integer(self): 362 | self.j = self.i + 2 363 | self.read_digits('01') 364 | 365 | def read_octal_integer(self): 366 | self.j = self.i + 1 367 | self.read_digits('01234567') 368 | 369 | def read_integer_or_float(self, c, c_next): 370 | if c == '0' and c_next in 'xX': 371 | return self.read_hex_integer_or_float() 372 | elif c == '0' and c_next in 'bB': 373 | self.read_bin_integer() 374 | return BinaryInteger 375 | elif c == '0' and c_next in '01234567': 376 | self.read_octal_integer() 377 | return OctalInteger 378 | else: 379 | return self.read_decimal_float_or_integer() 380 | 381 | def try_separator(self): 382 | if self.data[self.i] in Separator.VALUES: 383 | self.j = self.i + 1 384 | return True 385 | return False 386 | 387 | def decode_data(self): 388 | # Encodings to try in order 389 | codecs = ['utf_8', 'iso-8859-1'] 390 | 391 | # If data is already unicode don't try to redecode 392 | if isinstance(self.data, six.text_type): 393 | return self.data 394 | 395 | for codec in codecs: 396 | try: 397 | data = self.data.decode(codec) 398 | return data 399 | except UnicodeDecodeError: 400 | pass 401 | 402 | self.error('Could not decode input data') 403 | 404 | def is_java_identifier_start(self, c): 405 | return unicodedata.category(c) in self.IDENT_START_CATEGORIES 406 | 407 | def read_identifier(self): 408 | self.j = self.i + 1 409 | 410 | while self.j < len(self.data) and unicodedata.category(self.data[self.j]) in self.IDENT_PART_CATEGORIES: 411 | self.j += 1 412 | 413 | ident = self.data[self.i:self.j] 414 | if ident in Keyword.VALUES: 415 | token_type = Keyword 416 | 417 | if ident in BasicType.VALUES: 418 | token_type = BasicType 419 | elif ident in Modifier.VALUES: 420 | token_type = Modifier 421 | 422 | elif ident in Boolean.VALUES: 423 | token_type = Boolean 424 | elif ident == 'null': 425 | token_type = Null 426 | else: 427 | token_type = Identifier 428 | 429 | return token_type 430 | 431 | def pre_tokenize(self): 432 | new_data = list() 433 | data = self.decode_data() 434 | 435 | i = 0 436 | j = 0 437 | length = len(data) 438 | 439 | NONE = 0 440 | ELIGIBLE = 1 441 | MARKER_FOUND = 2 442 | 443 | state = NONE 444 | 445 | while j < length: 446 | if state == NONE: 447 | j = data.find('\\', j) 448 | 449 | if j == -1: 450 | j = length 451 | break 452 | 453 | state = ELIGIBLE 454 | 455 | elif state == ELIGIBLE: 456 | c = data[j] 457 | 458 | if c == 'u': 459 | state = MARKER_FOUND 460 | new_data.append(data[i:j - 1]) 461 | else: 462 | state = NONE 463 | 464 | elif state == MARKER_FOUND: 465 | c = data[j] 466 | 467 | if c != 'u': 468 | try: 469 | escape_code = int(data[j:j+4], 16) 470 | except ValueError: 471 | self.error('Invalid unicode escape', data[j:j+4]) 472 | 473 | new_data.append(six.unichr(escape_code)) 474 | 475 | i = j + 4 476 | j = i 477 | 478 | state = NONE 479 | 480 | continue 481 | 482 | j = j + 1 483 | 484 | new_data.append(data[i:]) 485 | 486 | self.data = ''.join(new_data) 487 | self.length = len(self.data) 488 | 489 | def tokenize(self): 490 | self.reset() 491 | 492 | # Convert unicode escapes 493 | self.pre_tokenize() 494 | 495 | while self.i < self.length: 496 | token_type = None 497 | 498 | c = self.data[self.i] 499 | c_next = None 500 | startswith = c 501 | 502 | if self.i + 1 < self.length: 503 | c_next = self.data[self.i + 1] 504 | startswith = c + c_next 505 | 506 | if c.isspace(): 507 | self.consume_whitespace() 508 | continue 509 | 510 | elif startswith in ("//", "/*"): 511 | comment = self.read_comment() 512 | if comment.startswith("/**"): 513 | self.javadoc = comment 514 | continue 515 | 516 | elif startswith == '..' and self.try_operator(): 517 | # Ensure we don't mistake a '...' operator as a sequence of 518 | # three '.' separators. This is done as an optimization instead 519 | # of moving try_operator higher in the chain because operators 520 | # aren't as common and try_operator is expensive 521 | token_type = Operator 522 | 523 | elif c == '@': 524 | token_type = Annotation 525 | self.j = self.i + 1 526 | 527 | elif c == '.' and c_next and c_next.isdigit(): 528 | token_type = self.read_decimal_float_or_integer() 529 | 530 | elif self.try_separator(): 531 | token_type = Separator 532 | 533 | elif c in ("'", '"'): 534 | token_type = String 535 | self.read_string() 536 | 537 | elif c in '0123456789': 538 | token_type = self.read_integer_or_float(c, c_next) 539 | 540 | elif self.is_java_identifier_start(c): 541 | token_type = self.read_identifier() 542 | 543 | elif self.try_operator(): 544 | token_type = Operator 545 | 546 | else: 547 | self.error('Could not process token', c) 548 | self.i = self.i + 1 549 | continue 550 | 551 | position = Position(self.current_line, self.i - self.start_of_line) 552 | token = token_type(self.data[self.i:self.j], position, self.javadoc) 553 | yield token 554 | 555 | if self.javadoc: 556 | self.javadoc = None 557 | 558 | self.i = self.j 559 | 560 | def error(self, message, char=None): 561 | # Provide additional information in the errors message 562 | line_start = self.data.rfind('\n', 0, self.i) + 1 563 | line_end = self.data.find('\n', self.i) 564 | line = self.data[line_start:line_end].strip() 565 | 566 | line_number = self.current_line 567 | 568 | if not char: 569 | char = self.data[self.j] 570 | 571 | message = u'%s at "%s", line %s: %s' % (message, char, line_number, line) 572 | error = LexerError(message) 573 | self.errors.append(error) 574 | 575 | if not self.ignore_errors: 576 | raise error 577 | 578 | def tokenize(code, ignore_errors=False): 579 | tokenizer = JavaTokenizer(code, ignore_errors) 580 | return tokenizer.tokenize() 581 | 582 | def reformat_tokens(tokens): 583 | indent = 0 584 | closed_block = False 585 | ident_last = False 586 | 587 | output = list() 588 | 589 | for token in tokens: 590 | if closed_block: 591 | closed_block = False 592 | indent -= 4 593 | 594 | output.append('\n') 595 | output.append(' ' * indent) 596 | output.append('}') 597 | 598 | if isinstance(token, (Literal, Keyword, Identifier)): 599 | output.append('\n') 600 | output.append(' ' * indent) 601 | 602 | if token.value == '{': 603 | indent += 4 604 | output.append(' {\n') 605 | output.append(' ' * indent) 606 | 607 | elif token.value == '}': 608 | closed_block = True 609 | 610 | elif token.value == ',': 611 | output.append(', ') 612 | 613 | elif isinstance(token, (Literal, Keyword, Identifier)): 614 | if ident_last: 615 | # If the last token was a literla/keyword/identifer put a space in between 616 | output.append(' ') 617 | ident_last = True 618 | output.append(token.value) 619 | 620 | elif isinstance(token, Operator): 621 | output.append(' ' + token.value + ' ') 622 | 623 | elif token.value == ';': 624 | output.append(';\n') 625 | output.append(' ' * indent) 626 | 627 | else: 628 | output.append(token.value) 629 | 630 | ident_last = isinstance(token, (Literal, Keyword, Identifier)) 631 | 632 | if closed_block: 633 | output.append('\n}') 634 | 635 | output.append('\n') 636 | 637 | return ''.join(output) 638 | -------------------------------------------------------------------------------- /javalang/parser.py: -------------------------------------------------------------------------------- 1 | import six 2 | 3 | from . import util 4 | from . import tree 5 | from .tokenizer import ( 6 | EndOfInput, Keyword, Modifier, BasicType, Identifier, 7 | Annotation, Literal, Operator, JavaToken, 8 | ) 9 | 10 | ENABLE_DEBUG_SUPPORT = False 11 | 12 | def parse_debug(method): 13 | global ENABLE_DEBUG_SUPPORT 14 | 15 | if ENABLE_DEBUG_SUPPORT: 16 | def _method(self): 17 | if not hasattr(self, 'recursion_depth'): 18 | self.recursion_depth = 0 19 | 20 | if self.debug: 21 | depth = "%02d" % (self.recursion_depth,) 22 | token = six.text_type(self.tokens.look()) 23 | start_value = self.tokens.look().value 24 | name = method.__name__ 25 | sep = ("-" * self.recursion_depth) 26 | e_message = "" 27 | 28 | print("%s %s> %s(%s)" % (depth, sep, name, token)) 29 | 30 | self.recursion_depth += 1 31 | 32 | try: 33 | r = method(self) 34 | 35 | except JavaSyntaxError as e: 36 | e_message = e.description 37 | raise 38 | 39 | except Exception as e: 40 | e_message = six.text_type(e) 41 | raise 42 | 43 | finally: 44 | token = six.text_type(self.tokens.last()) 45 | print("%s <%s %s(%s, %s) %s" % 46 | (depth, sep, name, start_value, token, e_message)) 47 | self.recursion_depth -= 1 48 | else: 49 | self.recursion_depth += 1 50 | try: 51 | r = method(self) 52 | finally: 53 | self.recursion_depth -= 1 54 | 55 | return r 56 | 57 | return _method 58 | 59 | else: 60 | return method 61 | 62 | # ------------------------------------------------------------------------------ 63 | # ---- Parsing exception ---- 64 | 65 | class JavaParserBaseException(Exception): 66 | def __init__(self, message=''): 67 | super(JavaParserBaseException, self).__init__(message) 68 | 69 | class JavaSyntaxError(JavaParserBaseException): 70 | def __init__(self, description, at=None): 71 | super(JavaSyntaxError, self).__init__() 72 | 73 | self.description = description 74 | self.at = at 75 | 76 | class JavaParserError(JavaParserBaseException): 77 | pass 78 | 79 | # ------------------------------------------------------------------------------ 80 | # ---- Parser class ---- 81 | 82 | class Parser(object): 83 | operator_precedence = [ set(('||',)), 84 | set(('&&',)), 85 | set(('|',)), 86 | set(('^',)), 87 | set(('&',)), 88 | set(('==', '!=')), 89 | set(('<', '>', '>=', '<=', 'instanceof')), 90 | set(('<<', '>>', '>>>')), 91 | set(('+', '-')), 92 | set(('*', '/', '%')) ] 93 | 94 | def __init__(self, tokens): 95 | self.tokens = util.LookAheadListIterator(tokens) 96 | self.tokens.set_default(EndOfInput(None)) 97 | 98 | self.debug = False 99 | 100 | # ------------------------------------------------------------------------------ 101 | # ---- Debug control ---- 102 | 103 | def set_debug(self, debug=True): 104 | self.debug = debug 105 | 106 | # ------------------------------------------------------------------------------ 107 | # ---- Parsing entry point ---- 108 | 109 | def parse(self): 110 | return self.parse_compilation_unit() 111 | 112 | # ------------------------------------------------------------------------------ 113 | # ---- Helper methods ---- 114 | 115 | def illegal(self, description, at=None): 116 | if not at: 117 | at = self.tokens.look() 118 | 119 | raise JavaSyntaxError(description, at) 120 | 121 | def accept(self, *accepts): 122 | last = None 123 | 124 | if len(accepts) == 0: 125 | raise JavaParserError("Missing acceptable values") 126 | 127 | for accept in accepts: 128 | token = next(self.tokens) 129 | if isinstance(accept, six.string_types) and ( 130 | not token.value == accept): 131 | self.illegal("Expected '%s'" % (accept,)) 132 | elif isinstance(accept, type) and not isinstance(token, accept): 133 | self.illegal("Expected %s" % (accept.__name__,)) 134 | 135 | last = token 136 | 137 | return last.value 138 | 139 | def would_accept(self, *accepts): 140 | if len(accepts) == 0: 141 | raise JavaParserError("Missing acceptable values") 142 | 143 | for i, accept in enumerate(accepts): 144 | token = self.tokens.look(i) 145 | 146 | if isinstance(accept, six.string_types) and ( 147 | not token.value == accept): 148 | return False 149 | elif isinstance(accept, type) and not isinstance(token, accept): 150 | return False 151 | 152 | return True 153 | 154 | def try_accept(self, *accepts): 155 | if len(accepts) == 0: 156 | raise JavaParserError("Missing acceptable values") 157 | 158 | for i, accept in enumerate(accepts): 159 | token = self.tokens.look(i) 160 | 161 | if isinstance(accept, six.string_types) and ( 162 | not token.value == accept): 163 | return False 164 | elif isinstance(accept, type) and not isinstance(token, accept): 165 | return False 166 | 167 | for i in range(0, len(accepts)): 168 | next(self.tokens) 169 | 170 | return True 171 | 172 | def build_binary_operation(self, parts, start_level=0): 173 | if len(parts) == 1: 174 | return parts[0] 175 | 176 | operands = list() 177 | operators = list() 178 | 179 | i = 0 180 | 181 | for level in range(start_level, len(self.operator_precedence)): 182 | for j in range(1, len(parts) - 1, 2): 183 | if parts[j] in self.operator_precedence[level]: 184 | operand = self.build_binary_operation(parts[i:j], level + 1) 185 | operator = parts[j] 186 | i = j + 1 187 | 188 | operands.append(operand) 189 | operators.append(operator) 190 | 191 | if operands: 192 | break 193 | 194 | operand = self.build_binary_operation(parts[i:], level + 1) 195 | operands.append(operand) 196 | 197 | operation = operands[0] 198 | 199 | for operator, operandr in zip(operators, operands[1:]): 200 | operation = tree.BinaryOperation(operandl=operation) 201 | operation.operator = operator 202 | operation.operandr = operandr 203 | 204 | return operation 205 | 206 | def is_annotation(self, i=0): 207 | """ Returns true if the position is the start of an annotation application 208 | (as opposed to an annotation declaration) 209 | 210 | """ 211 | 212 | return (isinstance(self.tokens.look(i), Annotation) 213 | and not self.tokens.look(i + 1).value == 'interface') 214 | 215 | def is_annotation_declaration(self, i=0): 216 | """ Returns true if the position is the start of an annotation application 217 | (as opposed to an annotation declaration) 218 | 219 | """ 220 | 221 | return (isinstance(self.tokens.look(i), Annotation) 222 | and self.tokens.look(i + 1).value == 'interface') 223 | 224 | # ------------------------------------------------------------------------------ 225 | # ---- Parsing methods ---- 226 | 227 | # ------------------------------------------------------------------------------ 228 | # -- Identifiers -- 229 | 230 | @parse_debug 231 | def parse_identifier(self): 232 | return self.accept(Identifier) 233 | 234 | @parse_debug 235 | def parse_qualified_identifier(self): 236 | qualified_identifier = list() 237 | 238 | while True: 239 | identifier = self.parse_identifier() 240 | qualified_identifier.append(identifier) 241 | 242 | if not self.try_accept('.'): 243 | break 244 | 245 | return '.'.join(qualified_identifier) 246 | 247 | @parse_debug 248 | def parse_qualified_identifier_list(self): 249 | qualified_identifiers = list() 250 | 251 | while True: 252 | qualified_identifier = self.parse_qualified_identifier() 253 | qualified_identifiers.append(qualified_identifier) 254 | 255 | if not self.try_accept(','): 256 | break 257 | 258 | return qualified_identifiers 259 | 260 | # ------------------------------------------------------------------------------ 261 | # -- Top level units -- 262 | 263 | @parse_debug 264 | def parse_compilation_unit(self): 265 | package = None 266 | package_annotations = None 267 | javadoc = None 268 | import_declarations = list() 269 | type_declarations = list() 270 | 271 | self.tokens.push_marker() 272 | next_token = self.tokens.look() 273 | if next_token: 274 | javadoc = next_token.javadoc 275 | 276 | if self.is_annotation(): 277 | package_annotations = self.parse_annotations() 278 | 279 | if self.try_accept('package'): 280 | self.tokens.pop_marker(False) 281 | 282 | token = self.tokens.look() 283 | package_name = self.parse_qualified_identifier() 284 | package = tree.PackageDeclaration(annotations=package_annotations, 285 | name=package_name, 286 | documentation=javadoc) 287 | package._position = token.position 288 | 289 | self.accept(';') 290 | else: 291 | self.tokens.pop_marker(True) 292 | package_annotations = None 293 | 294 | while self.would_accept('import'): 295 | token = self.tokens.look() 296 | import_declaration = self.parse_import_declaration() 297 | import_declaration._position = token.position 298 | import_declarations.append(import_declaration) 299 | 300 | while not isinstance(self.tokens.look(), EndOfInput): 301 | try: 302 | type_declaration = self.parse_type_declaration() 303 | except StopIteration: 304 | self.illegal("Unexpected end of input") 305 | 306 | if type_declaration: 307 | type_declarations.append(type_declaration) 308 | 309 | return tree.CompilationUnit(package=package, 310 | imports=import_declarations, 311 | types=type_declarations) 312 | 313 | @parse_debug 314 | def parse_import_declaration(self): 315 | qualified_identifier = list() 316 | static = False 317 | import_all = False 318 | 319 | self.accept('import') 320 | 321 | if self.try_accept('static'): 322 | static = True 323 | 324 | while True: 325 | identifier = self.parse_identifier() 326 | qualified_identifier.append(identifier) 327 | 328 | if self.try_accept('.'): 329 | if self.try_accept('*'): 330 | self.accept(';') 331 | import_all = True 332 | break 333 | 334 | else: 335 | self.accept(';') 336 | break 337 | 338 | return tree.Import(path='.'.join(qualified_identifier), 339 | static=static, 340 | wildcard=import_all) 341 | 342 | @parse_debug 343 | def parse_type_declaration(self): 344 | if self.try_accept(';'): 345 | return None 346 | else: 347 | return self.parse_class_or_interface_declaration() 348 | 349 | @parse_debug 350 | def parse_class_or_interface_declaration(self): 351 | modifiers, annotations, javadoc = self.parse_modifiers() 352 | type_declaration = None 353 | 354 | token = self.tokens.look() 355 | if token.value == 'class': 356 | type_declaration = self.parse_normal_class_declaration() 357 | elif token.value == 'enum': 358 | type_declaration = self.parse_enum_declaration() 359 | elif token.value == 'interface': 360 | type_declaration = self.parse_normal_interface_declaration() 361 | elif self.is_annotation_declaration(): 362 | type_declaration = self.parse_annotation_type_declaration() 363 | else: 364 | self.illegal("Expected type declaration") 365 | 366 | type_declaration._position = token.position 367 | type_declaration.modifiers = modifiers 368 | type_declaration.annotations = annotations 369 | type_declaration.documentation = javadoc 370 | 371 | return type_declaration 372 | 373 | @parse_debug 374 | def parse_normal_class_declaration(self): 375 | name = None 376 | type_params = None 377 | extends = None 378 | implements = None 379 | body = None 380 | 381 | self.accept('class') 382 | 383 | name = self.parse_identifier() 384 | 385 | if self.would_accept('<'): 386 | type_params = self.parse_type_parameters() 387 | 388 | if self.try_accept('extends'): 389 | extends = self.parse_type() 390 | 391 | if self.try_accept('implements'): 392 | implements = self.parse_type_list() 393 | 394 | body = self.parse_class_body() 395 | 396 | return tree.ClassDeclaration(name=name, 397 | type_parameters=type_params, 398 | extends=extends, 399 | implements=implements, 400 | body=body) 401 | 402 | @parse_debug 403 | def parse_enum_declaration(self): 404 | name = None 405 | implements = None 406 | body = None 407 | 408 | self.accept('enum') 409 | name = self.parse_identifier() 410 | 411 | if self.try_accept('implements'): 412 | implements = self.parse_type_list() 413 | 414 | body = self.parse_enum_body() 415 | 416 | return tree.EnumDeclaration(name=name, 417 | implements=implements, 418 | body=body) 419 | 420 | @parse_debug 421 | def parse_normal_interface_declaration(self): 422 | name = None 423 | type_parameters = None 424 | extends = None 425 | body = None 426 | 427 | self.accept('interface') 428 | name = self.parse_identifier() 429 | 430 | if self.would_accept('<'): 431 | type_parameters = self.parse_type_parameters() 432 | 433 | if self.try_accept('extends'): 434 | extends = self.parse_type_list() 435 | 436 | body = self.parse_interface_body() 437 | 438 | return tree.InterfaceDeclaration(name=name, 439 | type_parameters=type_parameters, 440 | extends=extends, 441 | body=body) 442 | 443 | @parse_debug 444 | def parse_annotation_type_declaration(self): 445 | name = None 446 | body = None 447 | 448 | self.accept('@', 'interface') 449 | 450 | name = self.parse_identifier() 451 | body = self.parse_annotation_type_body() 452 | 453 | return tree.AnnotationDeclaration(name=name, 454 | body=body) 455 | 456 | # ------------------------------------------------------------------------------ 457 | # -- Types -- 458 | 459 | @parse_debug 460 | def parse_type(self): 461 | java_type = None 462 | 463 | if isinstance(self.tokens.look(), BasicType): 464 | java_type = self.parse_basic_type() 465 | elif isinstance(self.tokens.look(), Identifier): 466 | java_type = self.parse_reference_type() 467 | else: 468 | self.illegal("Expected type") 469 | 470 | java_type.dimensions = self.parse_array_dimension() 471 | 472 | return java_type 473 | 474 | @parse_debug 475 | def parse_basic_type(self): 476 | return tree.BasicType(name=self.accept(BasicType)) 477 | 478 | @parse_debug 479 | def parse_reference_type(self): 480 | reference_type = tree.ReferenceType() 481 | tail = reference_type 482 | 483 | while True: 484 | tail.name = self.parse_identifier() 485 | 486 | if self.would_accept('<'): 487 | tail.arguments = self.parse_type_arguments() 488 | 489 | if self.try_accept('.'): 490 | tail.sub_type = tree.ReferenceType() 491 | tail = tail.sub_type 492 | else: 493 | break 494 | 495 | return reference_type 496 | 497 | @parse_debug 498 | def parse_type_arguments(self): 499 | type_arguments = list() 500 | 501 | self.accept('<') 502 | 503 | while True: 504 | type_argument = self.parse_type_argument() 505 | type_arguments.append(type_argument) 506 | 507 | if self.try_accept('>'): 508 | break 509 | 510 | self.accept(',') 511 | 512 | return type_arguments 513 | 514 | @parse_debug 515 | def parse_type_argument(self): 516 | pattern_type = None 517 | base_type = None 518 | 519 | if self.try_accept('?'): 520 | if self.tokens.look().value in ('extends', 'super'): 521 | pattern_type = self.tokens.next().value 522 | else: 523 | return tree.TypeArgument(pattern_type='?') 524 | 525 | if self.would_accept(BasicType): 526 | base_type = self.parse_basic_type() 527 | self.accept('[', ']') 528 | base_type.dimensions = [None] 529 | else: 530 | base_type = self.parse_reference_type() 531 | base_type.dimensions = [] 532 | 533 | base_type.dimensions += self.parse_array_dimension() 534 | 535 | return tree.TypeArgument(type=base_type, 536 | pattern_type=pattern_type) 537 | 538 | @parse_debug 539 | def parse_nonwildcard_type_arguments(self): 540 | self.accept('<') 541 | type_arguments = self.parse_type_list() 542 | self.accept('>') 543 | 544 | return [tree.TypeArgument(type=t) for t in type_arguments] 545 | 546 | @parse_debug 547 | def parse_type_list(self): 548 | types = list() 549 | 550 | while True: 551 | if self.would_accept(BasicType): 552 | base_type = self.parse_basic_type() 553 | self.accept('[', ']') 554 | base_type.dimensions = [None] 555 | else: 556 | base_type = self.parse_reference_type() 557 | base_type.dimensions = [] 558 | 559 | base_type.dimensions += self.parse_array_dimension() 560 | types.append(base_type) 561 | 562 | if not self.try_accept(','): 563 | break 564 | 565 | return types 566 | 567 | @parse_debug 568 | def parse_type_arguments_or_diamond(self): 569 | if self.try_accept('<', '>'): 570 | return list() 571 | else: 572 | return self.parse_type_arguments() 573 | 574 | @parse_debug 575 | def parse_nonwildcard_type_arguments_or_diamond(self): 576 | if self.try_accept('<', '>'): 577 | return list() 578 | else: 579 | return self.parse_nonwildcard_type_arguments() 580 | 581 | @parse_debug 582 | def parse_type_parameters(self): 583 | type_parameters = list() 584 | 585 | self.accept('<') 586 | 587 | while True: 588 | type_parameter = self.parse_type_parameter() 589 | type_parameters.append(type_parameter) 590 | 591 | if self.try_accept('>'): 592 | break 593 | else: 594 | self.accept(',') 595 | 596 | return type_parameters 597 | 598 | @parse_debug 599 | def parse_type_parameter(self): 600 | identifier = self.parse_identifier() 601 | extends = None 602 | 603 | if self.try_accept('extends'): 604 | extends = list() 605 | 606 | while True: 607 | reference_type = self.parse_reference_type() 608 | extends.append(reference_type) 609 | 610 | if not self.try_accept('&'): 611 | break 612 | 613 | return tree.TypeParameter(name=identifier, 614 | extends=extends) 615 | 616 | @parse_debug 617 | def parse_array_dimension(self): 618 | array_dimension = 0 619 | 620 | while self.try_accept('[', ']'): 621 | array_dimension += 1 622 | 623 | return [None] * array_dimension 624 | 625 | # ------------------------------------------------------------------------------ 626 | # -- Annotations and modifiers -- 627 | 628 | @parse_debug 629 | def parse_modifiers(self): 630 | annotations = list() 631 | modifiers = set() 632 | javadoc = None 633 | 634 | next_token = self.tokens.look() 635 | if next_token: 636 | javadoc = next_token.javadoc 637 | 638 | while True: 639 | token = self.tokens.look() 640 | if self.would_accept(Modifier): 641 | modifiers.add(self.accept(Modifier)) 642 | 643 | elif self.is_annotation(): 644 | annotation = self.parse_annotation() 645 | annotation._position = token.position 646 | annotations.append(annotation) 647 | 648 | else: 649 | break 650 | 651 | return (modifiers, annotations, javadoc) 652 | 653 | @parse_debug 654 | def parse_annotations(self): 655 | annotations = list() 656 | 657 | while True: 658 | token = self.tokens.look() 659 | 660 | annotation = self.parse_annotation() 661 | annotation._position = token.position 662 | annotations.append(annotation) 663 | 664 | if not self.is_annotation(): 665 | break 666 | 667 | return annotations 668 | 669 | @parse_debug 670 | def parse_annotation(self): 671 | qualified_identifier = None 672 | annotation_element = None 673 | 674 | self.accept('@') 675 | qualified_identifier = self.parse_qualified_identifier() 676 | 677 | if self.try_accept('('): 678 | if not self.would_accept(')'): 679 | annotation_element = self.parse_annotation_element() 680 | self.accept(')') 681 | 682 | return tree.Annotation(name=qualified_identifier, 683 | element=annotation_element) 684 | 685 | @parse_debug 686 | def parse_annotation_element(self): 687 | if self.would_accept(Identifier, '='): 688 | return self.parse_element_value_pairs() 689 | else: 690 | return self.parse_element_value() 691 | 692 | @parse_debug 693 | def parse_element_value_pairs(self): 694 | pairs = list() 695 | 696 | while True: 697 | token = self.tokens.look() 698 | pair = self.parse_element_value_pair() 699 | pair._position = token.position 700 | pairs.append(pair) 701 | 702 | if not self.try_accept(','): 703 | break 704 | 705 | return pairs 706 | 707 | @parse_debug 708 | def parse_element_value_pair(self): 709 | identifier = self.parse_identifier() 710 | self.accept('=') 711 | value = self.parse_element_value() 712 | 713 | return tree.ElementValuePair(name=identifier, 714 | value=value) 715 | 716 | @parse_debug 717 | def parse_element_value(self): 718 | token = self.tokens.look() 719 | if self.is_annotation(): 720 | annotation = self.parse_annotation() 721 | annotation._position = token.position 722 | return annotation 723 | 724 | elif self.would_accept('{'): 725 | return self.parse_element_value_array_initializer() 726 | 727 | else: 728 | return self.parse_expressionl() 729 | 730 | @parse_debug 731 | def parse_element_value_array_initializer(self): 732 | self.accept('{') 733 | 734 | if self.try_accept('}'): 735 | return list() 736 | 737 | element_values = self.parse_element_values() 738 | self.try_accept(',') 739 | self.accept('}') 740 | 741 | return tree.ElementArrayValue(values=element_values) 742 | 743 | @parse_debug 744 | def parse_element_values(self): 745 | element_values = list() 746 | 747 | while True: 748 | element_value = self.parse_element_value() 749 | element_values.append(element_value) 750 | 751 | if self.would_accept('}') or self.would_accept(',', '}'): 752 | break 753 | 754 | self.accept(',') 755 | 756 | return element_values 757 | 758 | # ------------------------------------------------------------------------------ 759 | # -- Class body -- 760 | 761 | @parse_debug 762 | def parse_class_body(self): 763 | declarations = list() 764 | 765 | self.accept('{') 766 | 767 | while not self.would_accept('}'): 768 | declaration = self.parse_class_body_declaration() 769 | if declaration: 770 | declarations.append(declaration) 771 | 772 | self.accept('}') 773 | 774 | return declarations 775 | 776 | @parse_debug 777 | def parse_class_body_declaration(self): 778 | token = self.tokens.look() 779 | 780 | if self.try_accept(';'): 781 | return None 782 | 783 | elif self.would_accept('static', '{'): 784 | self.accept('static') 785 | return self.parse_block() 786 | 787 | elif self.would_accept('{'): 788 | return self.parse_block() 789 | 790 | else: 791 | return self.parse_member_declaration() 792 | 793 | @parse_debug 794 | def parse_member_declaration(self): 795 | modifiers, annotations, javadoc = self.parse_modifiers() 796 | member = None 797 | 798 | token = self.tokens.look() 799 | if self.try_accept('void'): 800 | method_name = self.parse_identifier() 801 | member = self.parse_void_method_declarator_rest() 802 | member.name = method_name 803 | 804 | elif token.value == '<': 805 | member = self.parse_generic_method_or_constructor_declaration() 806 | 807 | elif token.value == 'class': 808 | member = self.parse_normal_class_declaration() 809 | 810 | elif token.value == 'enum': 811 | member = self.parse_enum_declaration() 812 | 813 | elif token.value == 'interface': 814 | member = self.parse_normal_interface_declaration() 815 | 816 | elif self.is_annotation_declaration(): 817 | member = self.parse_annotation_type_declaration() 818 | 819 | elif self.would_accept(Identifier, '('): 820 | constructor_name = self.parse_identifier() 821 | member = self.parse_constructor_declarator_rest() 822 | member.name = constructor_name 823 | 824 | else: 825 | member = self.parse_method_or_field_declaraction() 826 | 827 | member._position = token.position 828 | member.modifiers = modifiers 829 | member.annotations = annotations 830 | member.documentation = javadoc 831 | 832 | return member 833 | 834 | @parse_debug 835 | def parse_method_or_field_declaraction(self): 836 | member_type = self.parse_type() 837 | member_name = self.parse_identifier() 838 | 839 | member = self.parse_method_or_field_rest() 840 | 841 | if isinstance(member, tree.MethodDeclaration): 842 | member_type.dimensions += member.return_type.dimensions 843 | 844 | member.name = member_name 845 | member.return_type = member_type 846 | else: 847 | member.type = member_type 848 | member.declarators[0].name = member_name 849 | 850 | return member 851 | 852 | @parse_debug 853 | def parse_method_or_field_rest(self): 854 | token = self.tokens.look() 855 | 856 | if self.would_accept('('): 857 | return self.parse_method_declarator_rest() 858 | else: 859 | rest = self.parse_field_declarators_rest() 860 | self.accept(';') 861 | return rest 862 | 863 | @parse_debug 864 | def parse_field_declarators_rest(self): 865 | array_dimension, initializer = self.parse_variable_declarator_rest() 866 | declarators = [tree.VariableDeclarator(dimensions=array_dimension, 867 | initializer=initializer)] 868 | 869 | while self.try_accept(','): 870 | declarator = self.parse_variable_declarator() 871 | declarators.append(declarator) 872 | 873 | return tree.FieldDeclaration(declarators=declarators) 874 | 875 | @parse_debug 876 | def parse_method_declarator_rest(self): 877 | formal_parameters = self.parse_formal_parameters() 878 | additional_dimensions = self.parse_array_dimension() 879 | throws = None 880 | body = None 881 | 882 | if self.try_accept('throws'): 883 | throws = self.parse_qualified_identifier_list() 884 | 885 | if self.would_accept('{'): 886 | body = self.parse_block() 887 | else: 888 | self.accept(';') 889 | 890 | return tree.MethodDeclaration(parameters=formal_parameters, 891 | throws=throws, 892 | body=body, 893 | return_type=tree.Type(dimensions=additional_dimensions)) 894 | 895 | @parse_debug 896 | def parse_void_method_declarator_rest(self): 897 | formal_parameters = self.parse_formal_parameters() 898 | throws = None 899 | body = None 900 | 901 | if self.try_accept('throws'): 902 | throws = self.parse_qualified_identifier_list() 903 | 904 | if self.would_accept('{'): 905 | body = self.parse_block() 906 | else: 907 | self.accept(';') 908 | 909 | return tree.MethodDeclaration(parameters=formal_parameters, 910 | throws=throws, 911 | body=body) 912 | 913 | @parse_debug 914 | def parse_constructor_declarator_rest(self): 915 | formal_parameters = self.parse_formal_parameters() 916 | throws = None 917 | body = None 918 | 919 | if self.try_accept('throws'): 920 | throws = self.parse_qualified_identifier_list() 921 | 922 | body = self.parse_block() 923 | 924 | return tree.ConstructorDeclaration(parameters=formal_parameters, 925 | throws=throws, 926 | body=body) 927 | 928 | @parse_debug 929 | def parse_generic_method_or_constructor_declaration(self): 930 | type_parameters = self.parse_type_parameters() 931 | method = None 932 | 933 | token = self.tokens.look() 934 | if self.would_accept(Identifier, '('): 935 | constructor_name = self.parse_identifier() 936 | method = self.parse_constructor_declarator_rest() 937 | method.name = constructor_name 938 | elif self.try_accept('void'): 939 | method_name = self.parse_identifier() 940 | method = self.parse_void_method_declarator_rest() 941 | method.name = method_name 942 | 943 | else: 944 | method_return_type = self.parse_type() 945 | method_name = self.parse_identifier() 946 | 947 | method = self.parse_method_declarator_rest() 948 | 949 | method_return_type.dimensions += method.return_type.dimensions 950 | method.return_type = method_return_type 951 | method.name = method_name 952 | 953 | method._position = token.position 954 | method.type_parameters = type_parameters 955 | return method 956 | 957 | # ------------------------------------------------------------------------------ 958 | # -- Interface body -- 959 | 960 | @parse_debug 961 | def parse_interface_body(self): 962 | declarations = list() 963 | 964 | self.accept('{') 965 | while not self.would_accept('}'): 966 | declaration = self.parse_interface_body_declaration() 967 | 968 | if declaration: 969 | declarations.append(declaration) 970 | self.accept('}') 971 | 972 | return declarations 973 | 974 | @parse_debug 975 | def parse_interface_body_declaration(self): 976 | if self.try_accept(';'): 977 | return None 978 | 979 | modifiers, annotations, javadoc = self.parse_modifiers() 980 | 981 | declaration = self.parse_interface_member_declaration() 982 | declaration.modifiers = modifiers 983 | declaration.annotations = annotations 984 | declaration.documentation = javadoc 985 | 986 | return declaration 987 | 988 | @parse_debug 989 | def parse_interface_member_declaration(self): 990 | declaration = None 991 | 992 | token = self.tokens.look() 993 | if self.would_accept('class'): 994 | declaration = self.parse_normal_class_declaration() 995 | elif self.would_accept('interface'): 996 | declaration = self.parse_normal_interface_declaration() 997 | elif self.would_accept('enum'): 998 | declaration = self.parse_enum_declaration() 999 | elif self.is_annotation_declaration(): 1000 | declaration = self.parse_annotation_type_declaration() 1001 | elif self.would_accept('<'): 1002 | declaration = self.parse_interface_generic_method_declarator() 1003 | elif self.try_accept('void'): 1004 | method_name = self.parse_identifier() 1005 | declaration = self.parse_void_interface_method_declarator_rest() 1006 | declaration.name = method_name 1007 | else: 1008 | declaration = self.parse_interface_method_or_field_declaration() 1009 | 1010 | declaration._position = token.position 1011 | 1012 | return declaration 1013 | 1014 | @parse_debug 1015 | def parse_interface_method_or_field_declaration(self): 1016 | java_type = self.parse_type() 1017 | name = self.parse_identifier() 1018 | member = self.parse_interface_method_or_field_rest() 1019 | 1020 | if isinstance(member, tree.MethodDeclaration): 1021 | java_type.dimensions += member.return_type.dimensions 1022 | member.name = name 1023 | member.return_type = java_type 1024 | else: 1025 | member.declarators[0].name = name 1026 | member.type = java_type 1027 | 1028 | return member 1029 | 1030 | @parse_debug 1031 | def parse_interface_method_or_field_rest(self): 1032 | rest = None 1033 | 1034 | if self.would_accept('('): 1035 | rest = self.parse_interface_method_declarator_rest() 1036 | else: 1037 | rest = self.parse_constant_declarators_rest() 1038 | self.accept(';') 1039 | 1040 | return rest 1041 | 1042 | @parse_debug 1043 | def parse_constant_declarators_rest(self): 1044 | array_dimension, initializer = self.parse_constant_declarator_rest() 1045 | declarators = [tree.VariableDeclarator(dimensions=array_dimension, 1046 | initializer=initializer)] 1047 | 1048 | while self.try_accept(','): 1049 | declarator = self.parse_constant_declarator() 1050 | declarators.append(declarator) 1051 | 1052 | return tree.ConstantDeclaration(declarators=declarators) 1053 | 1054 | @parse_debug 1055 | def parse_constant_declarator_rest(self): 1056 | array_dimension = self.parse_array_dimension() 1057 | self.accept('=') 1058 | initializer = self.parse_variable_initializer() 1059 | 1060 | return (array_dimension, initializer) 1061 | 1062 | @parse_debug 1063 | def parse_constant_declarator(self): 1064 | name = self.parse_identifier() 1065 | additional_dimension, initializer = self.parse_constant_declarator_rest() 1066 | 1067 | return tree.VariableDeclarator(name=name, 1068 | dimensions=additional_dimension, 1069 | initializer=initializer) 1070 | 1071 | @parse_debug 1072 | def parse_interface_method_declarator_rest(self): 1073 | parameters = self.parse_formal_parameters() 1074 | array_dimension = self.parse_array_dimension() 1075 | throws = None 1076 | body = None 1077 | 1078 | if self.try_accept('throws'): 1079 | throws = self.parse_qualified_identifier_list() 1080 | 1081 | if self.would_accept('{'): 1082 | body = self.parse_block() 1083 | else: 1084 | self.accept(';') 1085 | 1086 | return tree.MethodDeclaration(parameters=parameters, 1087 | throws=throws, 1088 | body=body, 1089 | return_type=tree.Type(dimensions=array_dimension)) 1090 | 1091 | @parse_debug 1092 | def parse_void_interface_method_declarator_rest(self): 1093 | parameters = self.parse_formal_parameters() 1094 | throws = None 1095 | body = None 1096 | 1097 | if self.try_accept('throws'): 1098 | throws = self.parse_qualified_identifier_list() 1099 | 1100 | if self.would_accept('{'): 1101 | body = self.parse_block() 1102 | else: 1103 | self.accept(';') 1104 | 1105 | return tree.MethodDeclaration(parameters=parameters, 1106 | throws=throws, 1107 | body=body) 1108 | 1109 | @parse_debug 1110 | def parse_interface_generic_method_declarator(self): 1111 | type_parameters = self.parse_type_parameters() 1112 | return_type = None 1113 | method_name = None 1114 | 1115 | if not self.try_accept('void'): 1116 | return_type = self.parse_type() 1117 | 1118 | method_name = self.parse_identifier() 1119 | method = self.parse_interface_method_declarator_rest() 1120 | method.name = method_name 1121 | method.return_type = return_type 1122 | method.type_parameters = type_parameters 1123 | 1124 | return method 1125 | 1126 | # ------------------------------------------------------------------------------ 1127 | # -- Parameters and variables -- 1128 | 1129 | @parse_debug 1130 | def parse_formal_parameters(self): 1131 | formal_parameters = list() 1132 | 1133 | self.accept('(') 1134 | 1135 | if self.try_accept(')'): 1136 | return formal_parameters 1137 | 1138 | while True: 1139 | modifiers, annotations = self.parse_variable_modifiers() 1140 | 1141 | token = self.tokens.look() 1142 | parameter_type = self.parse_type() 1143 | varargs = False 1144 | 1145 | if self.try_accept('...'): 1146 | varargs = True 1147 | 1148 | parameter_name = self.parse_identifier() 1149 | parameter_type.dimensions += self.parse_array_dimension() 1150 | 1151 | parameter = tree.FormalParameter(modifiers=modifiers, 1152 | annotations=annotations, 1153 | type=parameter_type, 1154 | name=parameter_name, 1155 | varargs=varargs) 1156 | 1157 | parameter._position = token.position 1158 | formal_parameters.append(parameter) 1159 | 1160 | if varargs: 1161 | # varargs parameter must be the last 1162 | break 1163 | 1164 | if not self.try_accept(','): 1165 | break 1166 | 1167 | self.accept(')') 1168 | 1169 | return formal_parameters 1170 | 1171 | @parse_debug 1172 | def parse_variable_modifiers(self): 1173 | modifiers = set() 1174 | annotations = list() 1175 | 1176 | while True: 1177 | token = self.tokens.look() 1178 | if self.try_accept('final'): 1179 | modifiers.add('final') 1180 | elif self.is_annotation(): 1181 | annotation = self.parse_annotation() 1182 | annotation._position = token.position 1183 | annotations.append(annotation) 1184 | else: 1185 | break 1186 | 1187 | return modifiers, annotations 1188 | 1189 | @parse_debug 1190 | def parse_variable_declators(self): 1191 | declarators = list() 1192 | 1193 | while True: 1194 | declarator = self.parse_variable_declator() 1195 | declarators.append(declarator) 1196 | 1197 | if not self.try_accept(','): 1198 | break 1199 | 1200 | return declarators 1201 | 1202 | @parse_debug 1203 | def parse_variable_declarators(self): 1204 | declarators = list() 1205 | 1206 | while True: 1207 | declarator = self.parse_variable_declarator() 1208 | declarators.append(declarator) 1209 | 1210 | if not self.try_accept(','): 1211 | break 1212 | 1213 | return declarators 1214 | 1215 | @parse_debug 1216 | def parse_variable_declarator(self): 1217 | identifier = self.parse_identifier() 1218 | array_dimension, initializer = self.parse_variable_declarator_rest() 1219 | 1220 | return tree.VariableDeclarator(name=identifier, 1221 | dimensions=array_dimension, 1222 | initializer=initializer) 1223 | 1224 | @parse_debug 1225 | def parse_variable_declarator_rest(self): 1226 | array_dimension = self.parse_array_dimension() 1227 | initializer = None 1228 | 1229 | if self.try_accept('='): 1230 | initializer = self.parse_variable_initializer() 1231 | 1232 | return (array_dimension, initializer) 1233 | 1234 | @parse_debug 1235 | def parse_variable_initializer(self): 1236 | if self.would_accept('{'): 1237 | return self.parse_array_initializer() 1238 | else: 1239 | return self.parse_expression() 1240 | 1241 | @parse_debug 1242 | def parse_array_initializer(self): 1243 | array_initializer = tree.ArrayInitializer(initializers=list()) 1244 | 1245 | self.accept('{') 1246 | 1247 | if self.try_accept(','): 1248 | self.accept('}') 1249 | return array_initializer 1250 | 1251 | if self.try_accept('}'): 1252 | return array_initializer 1253 | 1254 | while True: 1255 | initializer = self.parse_variable_initializer() 1256 | array_initializer.initializers.append(initializer) 1257 | 1258 | if not self.would_accept('}'): 1259 | self.accept(',') 1260 | 1261 | if self.try_accept('}'): 1262 | return array_initializer 1263 | 1264 | # ------------------------------------------------------------------------------ 1265 | # -- Blocks and statements -- 1266 | 1267 | @parse_debug 1268 | def parse_block(self): 1269 | statements = list() 1270 | 1271 | self.accept('{') 1272 | 1273 | while not self.would_accept('}'): 1274 | statement = self.parse_block_statement() 1275 | statements.append(statement) 1276 | self.accept('}') 1277 | 1278 | return statements 1279 | 1280 | @parse_debug 1281 | def parse_block_statement(self): 1282 | if self.would_accept(Identifier, ':'): 1283 | # Labeled statement 1284 | return self.parse_statement() 1285 | 1286 | if self.would_accept('synchronized'): 1287 | return self.parse_statement() 1288 | 1289 | token = None 1290 | found_annotations = False 1291 | i = 0 1292 | 1293 | # Look past annoatations and modifiers. If we find a modifier that is not 1294 | # 'final' then the statement must be a class or interface declaration 1295 | while True: 1296 | token = self.tokens.look(i) 1297 | 1298 | if isinstance(token, Modifier): 1299 | if not token.value == 'final': 1300 | return self.parse_class_or_interface_declaration() 1301 | 1302 | elif self.is_annotation(i): 1303 | found_annotations = True 1304 | 1305 | i += 2 1306 | while self.tokens.look(i).value == '.': 1307 | i += 2 1308 | 1309 | if self.tokens.look(i).value == '(': 1310 | parens = 1 1311 | i += 1 1312 | 1313 | while parens > 0: 1314 | token = self.tokens.look(i) 1315 | if token.value == '(': 1316 | parens += 1 1317 | elif token.value == ')': 1318 | parens -= 1 1319 | i += 1 1320 | continue 1321 | 1322 | else: 1323 | break 1324 | 1325 | i += 1 1326 | 1327 | if token.value in ('class', 'enum', 'interface', '@'): 1328 | return self.parse_class_or_interface_declaration() 1329 | 1330 | if found_annotations or isinstance(token, BasicType): 1331 | statement = self.parse_local_variable_declaration_statement() 1332 | statement._position = token.position 1333 | return statement 1334 | 1335 | # At this point, if the block statement is a variable definition the next 1336 | # token MUST be an identifier, so if it isn't we can conclude the block 1337 | # statement is a normal statement 1338 | if not isinstance(token, Identifier): 1339 | return self.parse_statement() 1340 | 1341 | # We can't easily determine the statement type. Try parsing as a variable 1342 | # declaration first and fall back to a statement 1343 | try: 1344 | with self.tokens: 1345 | statement = self.parse_local_variable_declaration_statement() 1346 | statement._position = token.position 1347 | return statement 1348 | except JavaSyntaxError: 1349 | return self.parse_statement() 1350 | 1351 | @parse_debug 1352 | def parse_local_variable_declaration_statement(self): 1353 | modifiers, annotations = self.parse_variable_modifiers() 1354 | java_type = self.parse_type() 1355 | declarators = self.parse_variable_declarators() 1356 | self.accept(';') 1357 | 1358 | var = tree.LocalVariableDeclaration(modifiers=modifiers, 1359 | annotations=annotations, 1360 | type=java_type, 1361 | declarators=declarators) 1362 | return var 1363 | 1364 | @parse_debug 1365 | def parse_statement(self): 1366 | token = self.tokens.look() 1367 | if self.would_accept('{'): 1368 | block = self.parse_block() 1369 | statement = tree.BlockStatement(statements=block) 1370 | statement._position = token.position 1371 | return statement 1372 | 1373 | elif self.try_accept(';'): 1374 | statement = tree.Statement() 1375 | statement._position = token.position 1376 | return statement 1377 | 1378 | elif self.would_accept(Identifier, ':'): 1379 | identifer = self.parse_identifier() 1380 | self.accept(':') 1381 | 1382 | statement = self.parse_statement() 1383 | statement.label = identifer 1384 | statement._position = token.position 1385 | 1386 | return statement 1387 | 1388 | elif self.try_accept('if'): 1389 | condition = self.parse_par_expression() 1390 | then = self.parse_statement() 1391 | else_statement = None 1392 | 1393 | if self.try_accept('else'): 1394 | else_statement = self.parse_statement() 1395 | 1396 | statement = tree.IfStatement(condition=condition, 1397 | then_statement=then, 1398 | else_statement=else_statement) 1399 | statement._position = token.position 1400 | return statement 1401 | 1402 | elif self.try_accept('assert'): 1403 | condition = self.parse_expression() 1404 | value = None 1405 | 1406 | if self.try_accept(':'): 1407 | value = self.parse_expression() 1408 | 1409 | self.accept(';') 1410 | 1411 | statement = tree.AssertStatement(condition=condition, value=value) 1412 | statement._position = token.position 1413 | return statement 1414 | 1415 | elif self.try_accept('switch'): 1416 | switch_expression = self.parse_par_expression() 1417 | self.accept('{') 1418 | switch_block = self.parse_switch_block_statement_groups() 1419 | self.accept('}') 1420 | 1421 | statement = tree.SwitchStatement(expression=switch_expression, cases=switch_block) 1422 | statement._position = token.position 1423 | return statement 1424 | 1425 | elif self.try_accept('while'): 1426 | condition = self.parse_par_expression() 1427 | action = self.parse_statement() 1428 | 1429 | statement = tree.WhileStatement(condition=condition, body=action) 1430 | statement._position = token.position 1431 | return statement 1432 | 1433 | elif self.try_accept('do'): 1434 | action = self.parse_statement() 1435 | self.accept('while') 1436 | condition = self.parse_par_expression() 1437 | self.accept(';') 1438 | 1439 | statement = tree.DoStatement(condition=condition, body=action) 1440 | statement._position = token.position 1441 | return statement 1442 | 1443 | elif self.try_accept('for'): 1444 | self.accept('(') 1445 | for_control = self.parse_for_control() 1446 | self.accept(')') 1447 | for_statement = self.parse_statement() 1448 | 1449 | statement = tree.ForStatement(control=for_control, body=for_statement) 1450 | statement._position = token.position 1451 | return statement 1452 | 1453 | elif self.try_accept('break'): 1454 | label = None 1455 | 1456 | if self.would_accept(Identifier): 1457 | label = self.parse_identifier() 1458 | 1459 | self.accept(';') 1460 | 1461 | statement = tree.BreakStatement(goto=label) 1462 | statement._position = token.position 1463 | return statement 1464 | 1465 | elif self.try_accept('continue'): 1466 | label = None 1467 | 1468 | if self.would_accept(Identifier): 1469 | label = self.parse_identifier() 1470 | 1471 | self.accept(';') 1472 | 1473 | statement = tree.ContinueStatement(goto=label) 1474 | statement._position = token.position 1475 | return statement 1476 | 1477 | elif self.try_accept('return'): 1478 | value = None 1479 | 1480 | if not self.would_accept(';'): 1481 | value = self.parse_expression() 1482 | 1483 | self.accept(';') 1484 | 1485 | statement = tree.ReturnStatement(expression=value) 1486 | statement._position = token.position 1487 | return statement 1488 | 1489 | elif self.try_accept('throw'): 1490 | value = self.parse_expression() 1491 | self.accept(';') 1492 | 1493 | statement = tree.ThrowStatement(expression=value) 1494 | statement._position = token.position 1495 | return statement 1496 | 1497 | elif self.try_accept('synchronized'): 1498 | lock = self.parse_par_expression() 1499 | block = self.parse_block() 1500 | 1501 | statement = tree.SynchronizedStatement(lock=lock, block=block) 1502 | statement._position = token.position 1503 | return statement 1504 | 1505 | elif self.try_accept('try'): 1506 | resource_specification = None 1507 | block = None 1508 | catches = None 1509 | finally_block = None 1510 | 1511 | if self.would_accept('{'): 1512 | block = self.parse_block() 1513 | 1514 | if self.would_accept('catch'): 1515 | catches = self.parse_catches() 1516 | 1517 | if self.try_accept('finally'): 1518 | finally_block = self.parse_block() 1519 | 1520 | if catches == None and finally_block == None: 1521 | self.illegal("Expected catch/finally block") 1522 | 1523 | else: 1524 | resource_specification = self.parse_resource_specification() 1525 | block = self.parse_block() 1526 | 1527 | if self.would_accept('catch'): 1528 | catches = self.parse_catches() 1529 | 1530 | if self.try_accept('finally'): 1531 | finally_block = self.parse_block() 1532 | 1533 | statement = tree.TryStatement(resources=resource_specification, 1534 | block=block, 1535 | catches=catches, 1536 | finally_block=finally_block) 1537 | statement._position = token.position 1538 | return statement 1539 | 1540 | else: 1541 | expression = self.parse_expression() 1542 | self.accept(';') 1543 | 1544 | statement = tree.StatementExpression(expression=expression) 1545 | statement._position = token.position 1546 | return statement 1547 | 1548 | # ------------------------------------------------------------------------------ 1549 | # -- Try / catch -- 1550 | 1551 | @parse_debug 1552 | def parse_catches(self): 1553 | catches = list() 1554 | 1555 | while True: 1556 | catch = self.parse_catch_clause() 1557 | catches.append(catch) 1558 | 1559 | if not self.would_accept('catch'): 1560 | break 1561 | 1562 | return catches 1563 | 1564 | @parse_debug 1565 | def parse_catch_clause(self): 1566 | self.accept('catch', '(') 1567 | 1568 | modifiers, annotations = self.parse_variable_modifiers() 1569 | catch_parameter = tree.CatchClauseParameter(types=list()) 1570 | 1571 | while True: 1572 | catch_type = self.parse_qualified_identifier() 1573 | catch_parameter.types.append(catch_type) 1574 | 1575 | if not self.try_accept('|'): 1576 | break 1577 | catch_parameter.name = self.parse_identifier() 1578 | 1579 | self.accept(')') 1580 | block = self.parse_block() 1581 | 1582 | return tree.CatchClause(parameter=catch_parameter, block=block) 1583 | 1584 | @parse_debug 1585 | def parse_resource_specification(self): 1586 | resources = list() 1587 | 1588 | self.accept('(') 1589 | 1590 | while True: 1591 | resource = self.parse_resource() 1592 | resources.append(resource) 1593 | 1594 | if not self.would_accept(')'): 1595 | self.accept(';') 1596 | 1597 | if self.try_accept(')'): 1598 | break 1599 | 1600 | return resources 1601 | 1602 | @parse_debug 1603 | def parse_resource(self): 1604 | modifiers, annotations = self.parse_variable_modifiers() 1605 | reference_type = self.parse_reference_type() 1606 | reference_type.dimensions = self.parse_array_dimension() 1607 | name = self.parse_identifier() 1608 | reference_type.dimensions += self.parse_array_dimension() 1609 | self.accept('=') 1610 | value = self.parse_expression() 1611 | 1612 | return tree.TryResource(modifiers=modifiers, 1613 | annotations=annotations, 1614 | type=reference_type, 1615 | name=name, 1616 | value=value) 1617 | 1618 | # ------------------------------------------------------------------------------ 1619 | # -- Switch and for statements --- 1620 | 1621 | @parse_debug 1622 | def parse_switch_block_statement_groups(self): 1623 | statement_groups = list() 1624 | 1625 | while self.tokens.look().value in ('case', 'default'): 1626 | statement_group = self.parse_switch_block_statement_group() 1627 | statement_groups.append(statement_group) 1628 | 1629 | return statement_groups 1630 | 1631 | @parse_debug 1632 | def parse_switch_block_statement_group(self): 1633 | labels = list() 1634 | statements = list() 1635 | 1636 | while True: 1637 | case_type = self.tokens.next().value 1638 | case_value = None 1639 | 1640 | if case_type == 'case': 1641 | if self.would_accept(Identifier, ':'): 1642 | case_value = self.parse_identifier() 1643 | else: 1644 | case_value = self.parse_expression() 1645 | 1646 | labels.append(case_value) 1647 | elif not case_type == 'default': 1648 | self.illegal("Expected switch case") 1649 | 1650 | self.accept(':') 1651 | 1652 | if self.tokens.look().value not in ('case', 'default'): 1653 | break 1654 | 1655 | while self.tokens.look().value not in ('case', 'default', '}'): 1656 | statement = self.parse_block_statement() 1657 | statements.append(statement) 1658 | 1659 | return tree.SwitchStatementCase(case=labels, statements=statements) 1660 | 1661 | @parse_debug 1662 | def parse_for_control(self): 1663 | # Try for_var_control and fall back to normal three part for control 1664 | 1665 | try: 1666 | with self.tokens: 1667 | return self.parse_for_var_control() 1668 | except JavaSyntaxError: 1669 | pass 1670 | 1671 | init = None 1672 | if not self.would_accept(';'): 1673 | init = self.parse_for_init_or_update() 1674 | 1675 | self.accept(';') 1676 | 1677 | condition = None 1678 | if not self.would_accept(';'): 1679 | condition = self.parse_expression() 1680 | 1681 | self.accept(';') 1682 | 1683 | update = None 1684 | if not self.would_accept(')'): 1685 | update = self.parse_for_init_or_update() 1686 | 1687 | return tree.ForControl(init=init, 1688 | condition=condition, 1689 | update=update) 1690 | 1691 | @parse_debug 1692 | def parse_for_var_control(self): 1693 | modifiers, annotations = self.parse_variable_modifiers() 1694 | var_type = self.parse_type() 1695 | var_name = self.parse_identifier() 1696 | var_type.dimensions += self.parse_array_dimension() 1697 | 1698 | var = tree.VariableDeclaration(modifiers=modifiers, 1699 | annotations=annotations, 1700 | type=var_type) 1701 | 1702 | rest = self.parse_for_var_control_rest() 1703 | 1704 | if isinstance(rest, tree.Expression): 1705 | var.declarators = [tree.VariableDeclarator(name=var_name)] 1706 | return tree.EnhancedForControl(var=var, 1707 | iterable=rest) 1708 | else: 1709 | declarators, condition, update = rest 1710 | declarators[0].name = var_name 1711 | var.declarators = declarators 1712 | return tree.ForControl(init=var, 1713 | condition=condition, 1714 | update=update) 1715 | 1716 | @parse_debug 1717 | def parse_for_var_control_rest(self): 1718 | if self.try_accept(':'): 1719 | expression = self.parse_expression() 1720 | return expression 1721 | 1722 | declarators = None 1723 | if not self.would_accept(';'): 1724 | declarators = self.parse_for_variable_declarator_rest() 1725 | else: 1726 | declarators = [tree.VariableDeclarator()] 1727 | self.accept(';') 1728 | 1729 | condition = None 1730 | if not self.would_accept(';'): 1731 | condition = self.parse_expression() 1732 | self.accept(';') 1733 | 1734 | update = None 1735 | if not self.would_accept(')'): 1736 | update = self.parse_for_init_or_update() 1737 | 1738 | return (declarators, condition, update) 1739 | 1740 | @parse_debug 1741 | def parse_for_variable_declarator_rest(self): 1742 | initializer = None 1743 | 1744 | if self.try_accept('='): 1745 | initializer = self.parse_variable_initializer() 1746 | 1747 | declarators = [tree.VariableDeclarator(initializer=initializer)] 1748 | 1749 | while self.try_accept(','): 1750 | declarator = self.parse_variable_declarator() 1751 | declarators.append(declarator) 1752 | 1753 | return declarators 1754 | 1755 | @parse_debug 1756 | def parse_for_init_or_update(self): 1757 | expressions = list() 1758 | 1759 | while True: 1760 | expression = self.parse_expression() 1761 | expressions.append(expression) 1762 | 1763 | if not self.try_accept(','): 1764 | break 1765 | 1766 | return expressions 1767 | 1768 | # ------------------------------------------------------------------------------ 1769 | # -- Expressions -- 1770 | 1771 | @parse_debug 1772 | def parse_expression(self): 1773 | expressionl = self.parse_expressionl() 1774 | assignment_type = None 1775 | assignment_expression = None 1776 | 1777 | if self.tokens.look().value in Operator.ASSIGNMENT: 1778 | assignment_type = self.tokens.next().value 1779 | assignment_expression = self.parse_expression() 1780 | return tree.Assignment(expressionl=expressionl, 1781 | type=assignment_type, 1782 | value=assignment_expression) 1783 | else: 1784 | return expressionl 1785 | 1786 | @parse_debug 1787 | def parse_expressionl(self): 1788 | expression_2 = self.parse_expression_2() 1789 | true_expression = None 1790 | false_expression = None 1791 | 1792 | if self.try_accept('?'): 1793 | true_expression = self.parse_expression() 1794 | self.accept(':') 1795 | false_expression = self.parse_expressionl() 1796 | 1797 | return tree.TernaryExpression(condition=expression_2, 1798 | if_true=true_expression, 1799 | if_false=false_expression) 1800 | if self.would_accept('->'): 1801 | body = self.parse_lambda_method_body() 1802 | return tree.LambdaExpression(parameters=[expression_2], 1803 | body=body) 1804 | if self.try_accept('::'): 1805 | method_reference, type_arguments = self.parse_method_reference() 1806 | return tree.MethodReference( 1807 | expression=expression_2, 1808 | method=method_reference, 1809 | type_arguments=type_arguments) 1810 | return expression_2 1811 | 1812 | @parse_debug 1813 | def parse_expression_2(self): 1814 | expression_3 = self.parse_expression_3() 1815 | token = self.tokens.look() 1816 | if token.value in Operator.INFIX or token.value == 'instanceof': 1817 | parts = self.parse_expression_2_rest() 1818 | parts.insert(0, expression_3) 1819 | return self.build_binary_operation(parts) 1820 | 1821 | return expression_3 1822 | 1823 | @parse_debug 1824 | def parse_expression_2_rest(self): 1825 | parts = list() 1826 | 1827 | token = self.tokens.look() 1828 | while token.value in Operator.INFIX or token.value == 'instanceof': 1829 | if self.try_accept('instanceof'): 1830 | comparison_type = self.parse_type() 1831 | parts.extend(('instanceof', comparison_type)) 1832 | else: 1833 | operator = self.parse_infix_operator() 1834 | expression = self.parse_expression_3() 1835 | parts.extend((operator, expression)) 1836 | 1837 | token = self.tokens.look() 1838 | 1839 | return parts 1840 | 1841 | # ------------------------------------------------------------------------------ 1842 | # -- Expression operators -- 1843 | 1844 | @parse_debug 1845 | def parse_expression_3(self): 1846 | prefix_operators = list() 1847 | while self.tokens.look().value in Operator.PREFIX: 1848 | prefix_operators.append(self.tokens.next().value) 1849 | 1850 | if self.would_accept('('): 1851 | try: 1852 | with self.tokens: 1853 | lambda_exp = self.parse_lambda_expression() 1854 | if lambda_exp: 1855 | return lambda_exp 1856 | except JavaSyntaxError: 1857 | pass 1858 | try: 1859 | with self.tokens: 1860 | self.accept('(') 1861 | cast_target = self.parse_type() 1862 | self.accept(')') 1863 | expression = self.parse_expression_3() 1864 | 1865 | return tree.Cast(type=cast_target, 1866 | expression=expression) 1867 | except JavaSyntaxError: 1868 | pass 1869 | 1870 | primary = self.parse_primary() 1871 | primary.prefix_operators = prefix_operators 1872 | if getattr(primary, "selectors", None) is None: 1873 | primary.selectors = list() 1874 | primary.postfix_operators = list() 1875 | 1876 | token = self.tokens.look() 1877 | while token.value in '[.': 1878 | selector = self.parse_selector() 1879 | selector._position = token.position 1880 | primary.selectors.append(selector) 1881 | 1882 | token = self.tokens.look() 1883 | 1884 | while token.value in Operator.POSTFIX: 1885 | primary.postfix_operators.append(self.tokens.next().value) 1886 | token = self.tokens.look() 1887 | 1888 | return primary 1889 | 1890 | @parse_debug 1891 | def parse_method_reference(self): 1892 | type_arguments = list() 1893 | if self.would_accept('<'): 1894 | type_arguments = self.parse_nonwildcard_type_arguments() 1895 | if self.would_accept('new'): 1896 | method_reference = tree.MemberReference(member=self.accept('new')) 1897 | else: 1898 | method_reference = self.parse_expression() 1899 | return method_reference, type_arguments 1900 | 1901 | @parse_debug 1902 | def parse_lambda_expression(self): 1903 | lambda_expr = None 1904 | parameters = None 1905 | if self.would_accept('(', Identifier, ','): 1906 | self.accept('(') 1907 | parameters = [] 1908 | while not self.would_accept(')'): 1909 | parameters.append(tree.InferredFormalParameter( 1910 | name=self.parse_identifier())) 1911 | self.try_accept(',') 1912 | self.accept(')') 1913 | else: 1914 | parameters = self.parse_formal_parameters() 1915 | body = self.parse_lambda_method_body() 1916 | return tree.LambdaExpression(parameters=parameters, 1917 | body=body) 1918 | 1919 | @parse_debug 1920 | def parse_lambda_method_body(self): 1921 | if self.accept('->'): 1922 | if self.would_accept('{'): 1923 | return self.parse_block() 1924 | else: 1925 | return self.parse_expression() 1926 | 1927 | @parse_debug 1928 | def parse_infix_operator(self): 1929 | operator = self.accept(Operator) 1930 | 1931 | if not operator in Operator.INFIX: 1932 | self.illegal("Expected infix operator") 1933 | 1934 | if operator == '>' and self.try_accept('>'): 1935 | operator = '>>' 1936 | 1937 | if self.try_accept('>'): 1938 | operator = '>>>' 1939 | 1940 | return operator 1941 | 1942 | # ------------------------------------------------------------------------------ 1943 | # -- Primary expressions -- 1944 | 1945 | @parse_debug 1946 | def parse_primary(self): 1947 | token = self.tokens.look() 1948 | 1949 | if isinstance(token, Literal): 1950 | literal = self.parse_literal() 1951 | literal._position = token.position 1952 | return literal 1953 | 1954 | elif token.value == '(': 1955 | return self.parse_par_expression() 1956 | 1957 | elif self.try_accept('this'): 1958 | arguments = None 1959 | 1960 | if self.would_accept('('): 1961 | arguments = self.parse_arguments() 1962 | return tree.ExplicitConstructorInvocation(arguments=arguments) 1963 | 1964 | return tree.This() 1965 | elif self.would_accept('super', '::'): 1966 | self.accept('super') 1967 | return token 1968 | elif self.try_accept('super'): 1969 | super_suffix = self.parse_super_suffix() 1970 | return super_suffix 1971 | 1972 | elif self.try_accept('new'): 1973 | return self.parse_creator() 1974 | 1975 | elif token.value == '<': 1976 | type_arguments = self.parse_nonwildcard_type_arguments() 1977 | 1978 | if self.try_accept('this'): 1979 | arguments = self.parse_arguments() 1980 | return tree.ExplicitConstructorInvocation(type_arguments=type_arguments, 1981 | arguments=arguments) 1982 | else: 1983 | invocation = self.parse_explicit_generic_invocation_suffix() 1984 | invocation._position = token.position 1985 | invocation.type_arguments = type_arguments 1986 | 1987 | return invocation 1988 | 1989 | elif isinstance(token, Identifier): 1990 | qualified_identifier = [self.parse_identifier()] 1991 | 1992 | while self.would_accept('.', Identifier): 1993 | self.accept('.') 1994 | identifier = self.parse_identifier() 1995 | qualified_identifier.append(identifier) 1996 | 1997 | identifier_suffix = self.parse_identifier_suffix() 1998 | 1999 | if isinstance(identifier_suffix, (tree.MemberReference, tree.MethodInvocation)): 2000 | # Take the last identifer as the member and leave the rest for the qualifier 2001 | identifier_suffix.member = qualified_identifier.pop() 2002 | 2003 | elif isinstance(identifier_suffix, tree.ClassReference): 2004 | identifier_suffix.type = tree.ReferenceType(name=qualified_identifier.pop()) 2005 | 2006 | identifier_suffix._position = token.position 2007 | identifier_suffix.qualifier = '.'.join(qualified_identifier) 2008 | 2009 | return identifier_suffix 2010 | 2011 | elif isinstance(token, BasicType): 2012 | base_type = self.parse_basic_type() 2013 | base_type.dimensions = self.parse_array_dimension() 2014 | self.accept('.', 'class') 2015 | 2016 | return tree.ClassReference(type=base_type) 2017 | 2018 | elif self.try_accept('void'): 2019 | self.accept('.', 'class') 2020 | return tree.VoidClassReference() 2021 | 2022 | self.illegal("Expected expression") 2023 | 2024 | @parse_debug 2025 | def parse_literal(self): 2026 | literal = self.accept(Literal) 2027 | return tree.Literal(value=literal) 2028 | 2029 | @parse_debug 2030 | def parse_par_expression(self): 2031 | self.accept('(') 2032 | expression = self.parse_expression() 2033 | self.accept(')') 2034 | 2035 | return expression 2036 | 2037 | @parse_debug 2038 | def parse_arguments(self): 2039 | expressions = list() 2040 | 2041 | self.accept('(') 2042 | 2043 | if self.try_accept(')'): 2044 | return expressions 2045 | 2046 | while True: 2047 | expression = self.parse_expression() 2048 | expressions.append(expression) 2049 | 2050 | if not self.try_accept(','): 2051 | break 2052 | 2053 | self.accept(')') 2054 | 2055 | return expressions 2056 | 2057 | @parse_debug 2058 | def parse_super_suffix(self): 2059 | identifier = None 2060 | type_arguments = None 2061 | arguments = None 2062 | 2063 | if self.try_accept('.'): 2064 | if self.would_accept('<'): 2065 | type_arguments = self.parse_nonwildcard_type_arguments() 2066 | 2067 | identifier = self.parse_identifier() 2068 | 2069 | if self.would_accept('('): 2070 | arguments = self.parse_arguments() 2071 | else: 2072 | arguments = self.parse_arguments() 2073 | 2074 | if identifier and arguments is not None: 2075 | return tree.SuperMethodInvocation(member=identifier, 2076 | arguments=arguments, 2077 | type_arguments=type_arguments) 2078 | elif arguments is not None: 2079 | return tree.SuperConstructorInvocation(arguments=arguments) 2080 | else: 2081 | return tree.SuperMemberReference(member=identifier) 2082 | 2083 | @parse_debug 2084 | def parse_explicit_generic_invocation_suffix(self): 2085 | identifier = None 2086 | arguments = None 2087 | if self.try_accept('super'): 2088 | return self.parse_super_suffix() 2089 | else: 2090 | identifier = self.parse_identifier() 2091 | arguments = self.parse_arguments() 2092 | return tree.MethodInvocation(member=identifier, 2093 | arguments=arguments) 2094 | 2095 | # ------------------------------------------------------------------------------ 2096 | # -- Creators -- 2097 | 2098 | @parse_debug 2099 | def parse_creator(self): 2100 | constructor_type_arguments = None 2101 | 2102 | if self.would_accept(BasicType): 2103 | created_name = self.parse_basic_type() 2104 | rest = self.parse_array_creator_rest() 2105 | rest.type = created_name 2106 | return rest 2107 | 2108 | if self.would_accept('<'): 2109 | constructor_type_arguments = self.parse_nonwildcard_type_arguments() 2110 | 2111 | created_name = self.parse_created_name() 2112 | 2113 | if self.would_accept('['): 2114 | if constructor_type_arguments: 2115 | self.illegal("Array creator not allowed with generic constructor type arguments") 2116 | 2117 | rest = self.parse_array_creator_rest() 2118 | rest.type = created_name 2119 | return rest 2120 | else: 2121 | arguments, body = self.parse_class_creator_rest() 2122 | return tree.ClassCreator(constructor_type_arguments=constructor_type_arguments, 2123 | type=created_name, 2124 | arguments=arguments, 2125 | body=body) 2126 | 2127 | @parse_debug 2128 | def parse_created_name(self): 2129 | created_name = tree.ReferenceType() 2130 | tail = created_name 2131 | 2132 | while True: 2133 | tail.name = self.parse_identifier() 2134 | 2135 | if self.would_accept('<'): 2136 | tail.arguments = self.parse_type_arguments_or_diamond() 2137 | 2138 | if self.try_accept('.'): 2139 | tail.sub_type = tree.ReferenceType() 2140 | tail = tail.sub_type 2141 | else: 2142 | break 2143 | 2144 | return created_name 2145 | 2146 | @parse_debug 2147 | def parse_class_creator_rest(self): 2148 | arguments = self.parse_arguments() 2149 | class_body = None 2150 | 2151 | if self.would_accept('{'): 2152 | class_body = self.parse_class_body() 2153 | 2154 | return (arguments, class_body) 2155 | 2156 | @parse_debug 2157 | def parse_array_creator_rest(self): 2158 | if self.would_accept('[', ']'): 2159 | array_dimension = self.parse_array_dimension() 2160 | array_initializer = self.parse_array_initializer() 2161 | 2162 | return tree.ArrayCreator(dimensions=array_dimension, 2163 | initializer=array_initializer) 2164 | 2165 | else: 2166 | array_dimensions = list() 2167 | 2168 | while self.would_accept('[') and not self.would_accept('[', ']'): 2169 | self.accept('[') 2170 | expression = self.parse_expression() 2171 | array_dimensions.append(expression) 2172 | self.accept(']') 2173 | 2174 | array_dimensions += self.parse_array_dimension() 2175 | return tree.ArrayCreator(dimensions=array_dimensions) 2176 | 2177 | @parse_debug 2178 | def parse_identifier_suffix(self): 2179 | if self.try_accept('[', ']'): 2180 | array_dimension = [None] + self.parse_array_dimension() 2181 | self.accept('.', 'class') 2182 | return tree.ClassReference(type=tree.Type(dimensions=array_dimension)) 2183 | 2184 | elif self.would_accept('('): 2185 | arguments = self.parse_arguments() 2186 | return tree.MethodInvocation(arguments=arguments) 2187 | 2188 | elif self.try_accept('.', 'class'): 2189 | return tree.ClassReference() 2190 | 2191 | elif self.try_accept('.', 'this'): 2192 | return tree.This() 2193 | 2194 | elif self.would_accept('.', '<'): 2195 | next(self.tokens) 2196 | return self.parse_explicit_generic_invocation() 2197 | 2198 | elif self.try_accept('.', 'new'): 2199 | type_arguments = None 2200 | 2201 | if self.would_accept('<'): 2202 | type_arguments = self.parse_nonwildcard_type_arguments() 2203 | 2204 | inner_creator = self.parse_inner_creator() 2205 | inner_creator.constructor_type_arguments = type_arguments 2206 | 2207 | return inner_creator 2208 | 2209 | elif self.would_accept('.', 'super', '('): 2210 | self.accept('.', 'super') 2211 | arguments = self.parse_arguments() 2212 | return tree.SuperConstructorInvocation(arguments=arguments) 2213 | 2214 | else: 2215 | return tree.MemberReference() 2216 | 2217 | @parse_debug 2218 | def parse_explicit_generic_invocation(self): 2219 | type_arguments = self.parse_nonwildcard_type_arguments() 2220 | 2221 | token = self.tokens.look() 2222 | 2223 | invocation = self.parse_explicit_generic_invocation_suffix() 2224 | invocation._position = token.position 2225 | invocation.type_arguments = type_arguments 2226 | 2227 | return invocation 2228 | 2229 | @parse_debug 2230 | def parse_inner_creator(self): 2231 | identifier = self.parse_identifier() 2232 | type_arguments = None 2233 | 2234 | if self.would_accept('<'): 2235 | type_arguments = self.parse_nonwildcard_type_arguments_or_diamond() 2236 | 2237 | java_type = tree.ReferenceType(name=identifier, 2238 | arguments=type_arguments) 2239 | 2240 | arguments, class_body = self.parse_class_creator_rest() 2241 | 2242 | return tree.InnerClassCreator(type=java_type, 2243 | arguments=arguments, 2244 | body=class_body) 2245 | 2246 | @parse_debug 2247 | def parse_selector(self): 2248 | if self.try_accept('['): 2249 | expression = self.parse_expression() 2250 | self.accept(']') 2251 | return tree.ArraySelector(index=expression) 2252 | 2253 | elif self.try_accept('.'): 2254 | 2255 | token = self.tokens.look() 2256 | if isinstance(token, Identifier): 2257 | identifier = self.tokens.next().value 2258 | arguments = None 2259 | 2260 | if self.would_accept('('): 2261 | arguments = self.parse_arguments() 2262 | 2263 | return tree.MethodInvocation(member=identifier, 2264 | arguments=arguments) 2265 | else: 2266 | return tree.MemberReference(member=identifier) 2267 | elif self.would_accept('super', '::'): 2268 | self.accept('super') 2269 | return token 2270 | elif self.would_accept('<'): 2271 | return self.parse_explicit_generic_invocation() 2272 | elif self.try_accept('this'): 2273 | return tree.This() 2274 | elif self.try_accept('super'): 2275 | return self.parse_super_suffix() 2276 | elif self.try_accept('new'): 2277 | type_arguments = None 2278 | 2279 | if self.would_accept('<'): 2280 | type_arguments = self.parse_nonwildcard_type_arguments() 2281 | 2282 | inner_creator = self.parse_inner_creator() 2283 | inner_creator.constructor_type_arguments = type_arguments 2284 | 2285 | return inner_creator 2286 | 2287 | self.illegal("Expected selector") 2288 | 2289 | # ------------------------------------------------------------------------------ 2290 | # -- Enum and annotation body -- 2291 | 2292 | @parse_debug 2293 | def parse_enum_body(self): 2294 | constants = list() 2295 | body_declarations = list() 2296 | 2297 | self.accept('{') 2298 | 2299 | if not self.try_accept(','): 2300 | while not (self.would_accept(';') or self.would_accept('}')): 2301 | constant = self.parse_enum_constant() 2302 | constants.append(constant) 2303 | 2304 | if not self.try_accept(','): 2305 | break 2306 | 2307 | if self.try_accept(';'): 2308 | while not self.would_accept('}'): 2309 | declaration = self.parse_class_body_declaration() 2310 | 2311 | if declaration: 2312 | body_declarations.append(declaration) 2313 | 2314 | self.accept('}') 2315 | 2316 | return tree.EnumBody(constants=constants, 2317 | declarations=body_declarations) 2318 | 2319 | @parse_debug 2320 | def parse_enum_constant(self): 2321 | annotations = list() 2322 | javadoc = None 2323 | constant_name = None 2324 | arguments = None 2325 | body = None 2326 | 2327 | next_token = self.tokens.look() 2328 | if next_token: 2329 | javadoc = next_token.javadoc 2330 | 2331 | if self.would_accept(Annotation): 2332 | annotations = self.parse_annotations() 2333 | 2334 | constant_name = self.parse_identifier() 2335 | 2336 | if self.would_accept('('): 2337 | arguments = self.parse_arguments() 2338 | 2339 | if self.would_accept('{'): 2340 | body = self.parse_class_body() 2341 | 2342 | return tree.EnumConstantDeclaration(annotations=annotations, 2343 | name=constant_name, 2344 | arguments=arguments, 2345 | body=body, 2346 | documentation=javadoc) 2347 | 2348 | @parse_debug 2349 | def parse_annotation_type_body(self): 2350 | declarations = None 2351 | 2352 | self.accept('{') 2353 | declarations = self.parse_annotation_type_element_declarations() 2354 | self.accept('}') 2355 | 2356 | return declarations 2357 | 2358 | @parse_debug 2359 | def parse_annotation_type_element_declarations(self): 2360 | declarations = list() 2361 | 2362 | while not self.would_accept('}'): 2363 | declaration = self.parse_annotation_type_element_declaration() 2364 | declarations.append(declaration) 2365 | 2366 | return declarations 2367 | 2368 | @parse_debug 2369 | def parse_annotation_type_element_declaration(self): 2370 | modifiers, annotations, javadoc = self.parse_modifiers() 2371 | declaration = None 2372 | 2373 | token = self.tokens.look() 2374 | if self.would_accept('class'): 2375 | declaration = self.parse_normal_class_declaration() 2376 | elif self.would_accept('interface'): 2377 | declaration = self.parse_normal_interface_declaration() 2378 | elif self.would_accept('enum'): 2379 | declaration = self.parse_enum_declaration() 2380 | elif self.is_annotation_declaration(): 2381 | declaration = self.parse_annotation_type_declaration() 2382 | else: 2383 | attribute_type = self.parse_type() 2384 | attribute_name = self.parse_identifier() 2385 | declaration = self.parse_annotation_method_or_constant_rest() 2386 | self.accept(';') 2387 | 2388 | if isinstance(declaration, tree.AnnotationMethod): 2389 | declaration.name = attribute_name 2390 | declaration.return_type = attribute_type 2391 | else: 2392 | declaration.declarators[0].name = attribute_name 2393 | declaration.type = attribute_type 2394 | 2395 | declaration._position = token.position 2396 | declaration.modifiers = modifiers 2397 | declaration.annotations = annotations 2398 | declaration.documentation = javadoc 2399 | 2400 | return declaration 2401 | 2402 | @parse_debug 2403 | def parse_annotation_method_or_constant_rest(self): 2404 | if self.try_accept('('): 2405 | self.accept(')') 2406 | 2407 | array_dimension = self.parse_array_dimension() 2408 | default = None 2409 | 2410 | if self.try_accept('default'): 2411 | default = self.parse_element_value() 2412 | 2413 | return tree.AnnotationMethod(dimensions=array_dimension, 2414 | default=default) 2415 | else: 2416 | return self.parse_constant_declarators_rest() 2417 | 2418 | def parse(tokens, debug=False): 2419 | parser = Parser(tokens) 2420 | parser.set_debug(debug) 2421 | return parser.parse() 2422 | --------------------------------------------------------------------------------