├── indentml ├── __init__.py ├── formatter.py ├── indexedlist.py └── parser.py ├── setup.cfg ├── LICENSE ├── test ├── test_formatter.py ├── test_indexedlist.py └── test_parser.py ├── .gitignore ├── setup.py └── README.md /indentml/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | # This flag says that the code is written to work on both Python 2 and Python 3 | # 3. If at all possible, it is good practice to do this. If you cannot, you 4 | # will need to generate wheels for each Python version that you support. 5 | universal=0 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2017 Ilya V. Schurov and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/test_formatter.py: -------------------------------------------------------------------------------- 1 | # (c) Ilya V. Schurov, 2016 2 | # Available under MIT license (see LICENSE file in the root folder) 3 | 4 | import unittest 5 | from textwrap import dedent 6 | 7 | from indentml.formatter import DummyXMLFormatter, parse_and_format 8 | 9 | 10 | class TestDummyXMLFormatter(unittest.TestCase): 11 | def test_dummy_xml_formatter1(self): 12 | doc = dedent(r""" 13 | \tag 14 | Hello 15 | \othertag 16 | I'm indentml 17 | How are you? 18 | I'm fine""") 19 | 20 | obtained = parse_and_format(doc, DummyXMLFormatter, 21 | allowed_tags={'tag', 'othertag'}) 22 | expected = dedent("HelloI'm indentml" 23 | "How are you?I'm fine") 24 | 25 | self.assertEqual(obtained, expected) 26 | 27 | def test_dummy_xml_formatter2(self): 28 | doc = dedent(r""" 29 | \image \src http://example.com \width 100% 30 | Some image""") 31 | obtained = parse_and_format(doc, DummyXMLFormatter, 32 | allowed_tags={'image', 'src', 'width'}) 33 | expected = dedent("http://example.com" 34 | "100%Some image") 35 | self.assertEqual(obtained, expected) 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | #Ipython Notebook 103 | .ipynb_checkpoints 104 | 105 | # IDEA 106 | .idea/ 107 | -------------------------------------------------------------------------------- /indentml/formatter.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import re 3 | 4 | from indentml.parser import QqTag, QqParser 5 | 6 | 7 | class QqFormatter(object): 8 | """ 9 | This is basic formatter class. Custom formatters can inherit from it. 10 | """ 11 | 12 | def __init__(self, root: QqTag=None, allowed_tags=None): 13 | self.root = root 14 | self.allowed_tags = allowed_tags or set() 15 | 16 | def uses_tags(self): 17 | members = inspect.getmembers(self, predicate=inspect.ismethod) 18 | handles = [member for member in members 19 | if member[0].startswith("handle_") or 20 | member[0] == 'preprocess'] 21 | alltags = set([]) 22 | for handle in handles: 23 | if handle[0].startswith("handle_"): 24 | alltags.add(handle[0][len("handle_"):]) 25 | doc = handle[1].__doc__ 26 | if not doc: 27 | continue 28 | for line in doc.splitlines(): 29 | m = re.search(r"Uses tags:(.+)", line) 30 | if m: 31 | tags = m.group(1).split(",") 32 | tags = [tag.strip() for tag in tags] 33 | alltags.update(tags) 34 | return alltags 35 | 36 | def format(self, content) -> str: 37 | """ 38 | :param content: could be QqTag or any iterable of QqTags 39 | :return: str: text of tag 40 | """ 41 | if content is None: 42 | return "" 43 | 44 | out = [] 45 | 46 | for child in content: 47 | if isinstance(child, str): 48 | out.append(child) 49 | else: 50 | out.append(self.handle(child)) 51 | return "".join(out) 52 | 53 | def handle(self, tag): 54 | name = tag.name 55 | tag_handler = 'handle_'+name 56 | if hasattr(self, tag_handler): 57 | return getattr(self, tag_handler)(tag) 58 | elif hasattr(self, 'handle__fallback'): 59 | return self.handle__fallback(tag) 60 | else: 61 | return "" 62 | 63 | def do_format(self): 64 | return self.format(self.root) 65 | 66 | 67 | class DummyXMLFormatter(QqFormatter): 68 | def handle__fallback(self, tag): 69 | return "<{name}>{content}".format( 70 | name=tag.name, content=self.format(tag) 71 | ) 72 | 73 | 74 | def parse_and_format(doc: str, 75 | formatter_factory, 76 | allowed_tags=None) -> str: 77 | formatter = formatter_factory() 78 | if allowed_tags is None: 79 | allowed_tags = formatter.uses_tags() 80 | 81 | parser = QqParser(allowed_tags=allowed_tags) 82 | 83 | tree = parser.parse(doc) 84 | formatter.root = tree 85 | 86 | return formatter.do_format() 87 | -------------------------------------------------------------------------------- /test/test_indexedlist.py: -------------------------------------------------------------------------------- 1 | # (c) Ilya V. Schurov, 2016 2 | # Available under MIT license (see LICENSE file in the root folder) 3 | 4 | import unittest 5 | from sortedcontainers import SortedList 6 | 7 | from indentml.indexedlist import IndexedList 8 | 9 | 10 | class TestIndexedlistMethods(unittest.TestCase): 11 | def test_creating_indexedlist1(self): 12 | q = IndexedList([['a', 'b'], ['a', 'd']]) 13 | self.assertEqual(list(q._directory['a']), [0, 1]) 14 | self.assertTrue(q.is_consistent()) 15 | 16 | def test_creating_indexedlist2(self): 17 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'], 18 | ['b', 123], ['a'], {'b': 321}) 19 | self.assertEqual( 20 | repr(q), 21 | "IndexedList([['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'], ['b', 123], ['a'], {'b': 321}])" 22 | ) 23 | self.assertEqual(eval(repr(q)), q) 24 | self.assertEqual( 25 | q._directory, { 26 | 'b': SortedList([5, 7]), 27 | 'a': SortedList([0, 1, 4, 6]), 28 | str: SortedList([2, 3]) 29 | }) 30 | self.assertTrue(q.is_consistent()) 31 | 32 | def test_delitem(self): 33 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'], 34 | ['b', 123], ['a'], {'b': 321}) 35 | 36 | del q[0] 37 | self.assertEqual( 38 | q._directory, { 39 | 'b': SortedList([4, 6]), 40 | 'a': SortedList([0, 3, 5]), 41 | str: SortedList([1, 2]) 42 | }) 43 | self.assertTrue(q.is_consistent()) 44 | 45 | del q[2] 46 | self.assertEqual( 47 | q._directory, { 48 | 'b': SortedList([3, 5]), 49 | 'a': SortedList([0, 2, 4]), 50 | str: SortedList([1]) 51 | }) 52 | self.assertTrue(q.is_consistent()) 53 | 54 | del q[3] 55 | self.assertEqual(q._directory, { 56 | 'b': SortedList([4]), 57 | 'a': SortedList([0, 2, 3]), 58 | str: SortedList([1]) 59 | }) 60 | 61 | self.assertTrue(q.is_consistent()) 62 | 63 | del q[0] 64 | self.assertTrue(q.is_consistent()) 65 | 66 | del q[3] 67 | self.assertTrue(q.is_consistent()) 68 | 69 | del q[0] 70 | self.assertTrue(q.is_consistent()) 71 | 72 | del q[1] 73 | self.assertTrue(q.is_consistent()) 74 | 75 | del q[0] 76 | self.assertTrue(q.is_consistent()) 77 | self.assertEqual(q, []) 78 | 79 | def test_setitem(self): 80 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'], 81 | ['b', 123], ['a'], {'b': 321}) 82 | q[0] = 2 83 | self.assertEqual( 84 | q._directory, { 85 | 'b': SortedList([5, 7]), 86 | 'a': SortedList([1, 4, 6]), 87 | str: SortedList([0, 2, 3]) 88 | }) 89 | 90 | self.assertTrue(q.is_consistent()) 91 | 92 | q[2] = ['b', 'c', 'd'] 93 | self.assertEqual( 94 | q._directory, { 95 | 'b': SortedList([2, 5, 7]), 96 | 'a': SortedList([1, 4, 6]), 97 | str: SortedList([0, 3]) 98 | }) 99 | self.assertTrue(q.is_consistent()) 100 | 101 | q[1] = ['cd', 'efg', 12] 102 | self.assertEqual( 103 | q._directory, { 104 | 'a': SortedList([4, 6]), 105 | 'b': SortedList([2, 5, 7]), 106 | 'cd': SortedList([1]), 107 | str: SortedList([0, 3]) 108 | }) 109 | self.assertTrue(q.is_consistent()) 110 | 111 | def test_insert(self): 112 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'], 113 | ['b', 123], ['a'], {'b': 321}) 114 | q.insert(2, 'b') 115 | self.assertEqual( 116 | q._directory, { 117 | 'a': SortedList([0, 1, 5, 7]), 118 | 'b': SortedList([6, 8]), 119 | str: SortedList([2, 3, 4]) 120 | }) 121 | self.assertTrue(q.is_consistent()) 122 | 123 | q.insert(0, ['b', 123]) 124 | self.assertEqual( 125 | q._directory, { 126 | 'a': SortedList([1, 2, 6, 8]), 127 | 'b': SortedList([0, 7, 9]), 128 | str: SortedList([3, 4, 5]) 129 | }) 130 | self.assertTrue(q.is_consistent()) 131 | 132 | 133 | if __name__ == '__main__': 134 | unittest.main() 135 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | 3 | See: 4 | https://packaging.python.org/en/latest/distributing.html 5 | https://github.com/pypa/sampleproject 6 | """ 7 | 8 | # Always prefer setuptools over distutils 9 | from setuptools import setup, find_packages 10 | # To use a consistent encoding 11 | from codecs import open 12 | from os import path 13 | 14 | here = path.abspath(path.dirname(__file__)) 15 | 16 | # Get the long description from the README file 17 | # with open(path.join(here, 'README.md'), encoding='utf-8') as f: 18 | # long_description = f.read() 19 | long_description = r"""**indentml** is a simple general-purpose indent-based 20 | language suitable to describe tree-like structures. 21 | """ 22 | setup( 23 | name='indentml', 24 | 25 | # Versions should comply with PEP440. For a discussion on single-sourcing 26 | # the version across setup.py and the project code, see 27 | # https://packaging.python.org/en/latest/single_source_version.html 28 | version='0.2.3.post1', 29 | 30 | description=('indentml is a simple general-purpose indent-based language' 31 | ' suitable to describe tree-like structures'), 32 | long_description=long_description, 33 | 34 | # The project's main homepage. 35 | url='https://github.com/ischurov/indentml', 36 | 37 | # Author details 38 | author='Ilya V. Schurov', 39 | author_email='ilya@schurov.com', 40 | 41 | # Choose your license 42 | license='MIT', 43 | 44 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 45 | classifiers=[ 46 | # How mature is this project? Common values are 47 | # 3 - Alpha 48 | # 4 - Beta 49 | # 5 - Production/Stable 50 | 'Development Status :: 3 - Alpha', 51 | 52 | # Indicate who your project is intended for 53 | 'Intended Audience :: Developers', 54 | 'Topic :: Software Development :: Build Tools', 55 | 'Topic :: Software Development :: Documentation', 56 | 57 | # Pick your license as you wish (should match "license" above) 58 | 'License :: OSI Approved :: MIT License', 59 | 60 | # Specify the Python versions you support here. In particular, ensure 61 | # that you indicate whether you support Python 2, Python 3 or both. 62 | 'Programming Language :: Python :: 3', 63 | 'Programming Language :: Python :: 3.6', 64 | 'Programming Language :: Python :: 3.7', 65 | 'Programming Language :: Python :: 3.8', 66 | 'Programming Language :: Python :: 3.9', 67 | 'Programming Language :: Python :: 3.10', 68 | ], 69 | 70 | # What does your project relate to? 71 | keywords='indent-based, markup', 72 | 73 | # You can just specify the packages manually here if your project is 74 | # simple. Or you can use find_packages(). 75 | packages=find_packages(exclude=['test']), 76 | 77 | # Alternatively, if you want to distribute just a my_module.py, uncomment 78 | # this: 79 | # py_modules=["my_module"], 80 | 81 | # List run-time dependencies here. These will be installed by pip when 82 | # your project is installed. For an analysis of "install_requires" vs pip's 83 | # requirements files see: 84 | # https://packaging.python.org/en/latest/requirements.html 85 | install_requires=['sortedcontainers'], 86 | 87 | # List additional groups of dependencies here (e.g. development 88 | # dependencies). You can install these using the following syntax, 89 | # for example: 90 | # $ pip install -e .[dev,test] 91 | # extras_require={ 92 | # 'html': ['yattag', 'mako', 'fuzzywuzzy', 'matplotlib', 'flask', 93 | # 'beautifulsoup4'], 94 | # }, 95 | 96 | # If there are data files included in your packages that need to be 97 | # installed, specify them here. If using Python 2.6 or less, then these 98 | # have to be included in MANIFEST.in as well. 99 | # package_data={ 100 | # 'sample': ['package_data.dat'], 101 | # }, 102 | 103 | # Although 'package_data' is the preferred approach, in some case you may 104 | # need to place data files outside of your packages. See: 105 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa 106 | # In this case, 'data_file' will be installed into '/my_data' 107 | # data_files=[('my_data', ['data/data_file'])], 108 | 109 | # To provide executable scripts, use entry points in preference to the 110 | # "scripts" keyword. Entry points provide cross-platform support and allow 111 | # pip to create the appropriate form of executable for the target platform. 112 | # entry_points={ 113 | # 'console_scripts': [ 114 | # 'qqmathbook=qqmbr.qqmathbook:main', 115 | # ], 116 | # }, 117 | ) 118 | -------------------------------------------------------------------------------- /indentml/indexedlist.py: -------------------------------------------------------------------------------- 1 | # (c) Ilya V. Schurov, 2016 2 | # Available under MIT license (see LICENSE file in the root folder) 3 | 4 | from collections.abc import Mapping 5 | from collections import defaultdict 6 | from sortedcontainers import SortedList 7 | from typing import (TypeVar, Sequence, MutableSequence, 8 | List, Union, overload) 9 | 10 | T = TypeVar("T") 11 | 12 | class IndexedList(MutableSequence[T]): 13 | """ 14 | IndexedList is a mixture of list and dictionary. 15 | Every element in IndexedList has a key and one can perform fast search by key. 16 | 17 | The key is calculated in the following way depending on the element's type: 18 | 19 | - ``str``: key is ``str`` (this is a special case) 20 | - ``list``: key is a first element of the list or ``None`` if the list is empty 21 | - ``dictionary``: if it has only one record, its key is a key, otherwise ``Sequence.Mapping`` is a key 22 | - any other object: we'll look for .qqkey() method, and fallback to ``str`` if fail 23 | 24 | The main purpose of this class is to provide effective BeautifulSoup-style navigation over the s-expression-like 25 | data structures 26 | """ 27 | 28 | def __init__(self, *iterable) -> None: 29 | if len(iterable) == 1 and isinstance(iterable[0], Sequence): 30 | iterable = iterable[0] 31 | self._container: List[T] = list(iterable) 32 | self._directory = defaultdict(SortedList) 33 | self.update_directory() 34 | 35 | def __delitem__(self, i): 36 | old_element = self._container[i] 37 | self._directory[self.get_key(old_element)].remove(i) 38 | for key, places in self._directory.items(): 39 | for k, index in enumerate(places): 40 | if index >= i: 41 | value = places[k] 42 | del places[k] 43 | places.add(value - 1) 44 | 45 | del self._container[i] 46 | 47 | @overload 48 | def __getitem__(self, idx: int) -> T: ... 49 | 50 | @overload 51 | def __getitem__(self, s: slice) -> Sequence[T]: ... 52 | 53 | def __getitem__(self, i): 54 | return self._container[i] 55 | 56 | def __len__(self): 57 | return len(self._container) 58 | 59 | def __setitem__(self, i, item): 60 | places = self._directory[self.get_key(self._container[i])] 61 | places.remove(i) 62 | 63 | self._container[i] = item 64 | 65 | self.add_index(i, item) 66 | 67 | def insert(self, i, x): 68 | for key, places in self._directory.items(): 69 | for k in range(len(places)-1, -1, -1): 70 | if places[k] >= i: 71 | value = places[k] 72 | del places[k] 73 | places.add(value + 1) 74 | else: 75 | break 76 | self._container.insert(i, x) 77 | self.add_index(i, x) 78 | 79 | def __str__(self): 80 | return str(self._container) 81 | 82 | def __repr__(self): 83 | return "IndexedList(%s)" % repr(self._container) 84 | 85 | def find_index(self, key): 86 | return self._directory[key][0] 87 | 88 | def find_all_indexes(self, key): 89 | return self._directory.get(key, []) 90 | 91 | def find_all(self, key): 92 | return [self._container[i] for i in self.find_all_indexes(key)] 93 | 94 | def find(self, key): 95 | return self._container[self.find_index(key)] 96 | 97 | def update_directory(self): 98 | self._directory.clear() 99 | for i, item in enumerate(self._container): 100 | self.add_index(i, item) 101 | 102 | def add_index(self, i, item): 103 | key = self.get_key(item) 104 | self._directory[key].add(i) 105 | 106 | def is_consistent(self): 107 | for i, el in enumerate(self._container): 108 | if i not in self.find_all_indexes(self.get_key(el)): 109 | return False 110 | return True 111 | 112 | def __eq__(self, other): 113 | if isinstance(other, IndexedList): 114 | return self._container == other._container 115 | elif isinstance(other, Sequence): 116 | return self._container == other 117 | else: 118 | return False 119 | 120 | def clear(self): 121 | self._container.clear() 122 | self._directory.clear() 123 | 124 | @staticmethod 125 | def get_key(item): 126 | if isinstance(item, str): 127 | return str 128 | try: 129 | return item.qqkey() 130 | except AttributeError: 131 | if isinstance(item, Sequence): 132 | if item: 133 | return item[0] 134 | else: 135 | return None 136 | elif isinstance(item, Mapping): 137 | if len(item) == 1: 138 | return list(item)[0] 139 | else: 140 | return Mapping 141 | else: 142 | return str -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # indentml 2 | ## General-purpose indent-based markup language 3 | 4 | **indentml** (previously known as *MLQQ*) is a simple general-purpose indent-based markup language designed to represent tree-like structures in human-readable way. It is similar to *YAML* but simpler. 5 | 6 | ### Install 7 | 8 | pip install indentml 9 | 10 | Currently only Python 3 is supported. 11 | 12 | ### Code sample 13 | 14 | \topic 15 | \id dicts-objects 16 | \heading \lang en 17 | Dicts / objects 18 | \description \lang en 19 | The standard structure to store elements accessible by arbitrary keys 20 | (mapping) in Python is called a dict (dictionary) and in JavaScript 21 | — object. 22 | \compare 23 | \id dict-object-creation 24 | \what \lang en 25 | Creation of dictionary / object 26 | \python 27 | my_dict = {'a': 10, 'b': 20} 28 | my_dict['a'] 29 | my_dict.a # error 30 | \js 31 | var my_obj = {a: 10, b: 20}; 32 | my_obj['a'] 33 | my_obj.a 34 | \comment \lang en 35 | You can access elements of an object either with square brackets 36 | or with dot-notation. 37 | 38 | \figure 39 | \source http://example.com/somefig.png 40 | \caption Some figure 41 | \width 500px 42 | 43 | \question 44 | Do you like qqmbr? 45 | \quiz 46 | \choice 47 | No. 48 | \comment You didn't even try! 49 | \choice \correct 50 | Yes, i like it very much! 51 | \comment And so do I! 52 | 53 | 54 | ### Syntax 55 | #### Special characters 56 | The following characters have special meaning in **indetml**: 57 | 58 | 1. **Tag beginning character.** This character is used to mark the beginning of any tag. By default, it is a backslash `\` 59 | (like in LaTeX), but can be configured to any other character. If you need to enter this character literally, you have 60 | to escape it with the same character (like `\\`). You can also escape other special characters listed below with *tag beginning character*. 61 | 2. Opening and closing brackets: `{`, `}`, and `[`, `]`, used to indicate the content that belongs to *inline tags*, see below. 62 | 3. Tabs are forbidden at the beginning of the line in **indentml** (just like in YAML). 63 | 64 | #### Block tags 65 | Block tags are typed at the beginning of the line, after several spaces that mark *indent* of a tag. 66 | Block tag starts with *tag beginning character* and ends with the whitespace or newline character. All the lines below the block tag 67 | which indent is greater than tag's indent are appended to the tag. When indent decreases, tag is closed. E.g. 68 | 69 | \tag 70 | Hello 71 | \othertag 72 | I'm indentml 73 | How are you? 74 | I'm fine 75 | 76 | will be translated into the following XML tree: 77 | 78 | Hello 79 | I'm indentml 80 | How are you? 81 | I'm fine 82 | 83 | The rest of a line where block tag begins will be attached to that tag either. 84 | 85 | #### Inline tags 86 | Inline tags are started with *tag beginning character* and ended by bracket: `{` or `[`. Type of bracket affects the 87 | processing. Tag contents is everything between its opening bracket and corresponding closing bracket. 88 | It can spread over several lines. 89 | 90 | Brackets (of the same kind) inside the tag should be either balanced or escaped. 91 | 92 | For example, 93 | 94 | This is \tag{with some {brackets} inside} 95 | 96 | is valid markup: the contents of tag `tag` will be `with some {brackets} inside`. 97 | 98 | #### Allowed tags 99 | Only those tags are processed that are explicitly *allowed*. There are two sets defined: allowed block tags and allowed inline tags. 100 | The sequences that look like tags but are not in the appropriate set are considered as simple text. 101 | 102 | #### Indents and whitespaces 103 | Indent of the first line after the block tag is a *base indent* of this tag. All lines that belong to tag will be stripped 104 | from the left by the number of leading whitespaces that corresponds to the base indent. The rest of whitespaces will be preserved. 105 | 106 | For example: 107 | 108 | \pythoncode 109 | for i in range(1, 10): 110 | print(i) 111 | 112 | Here the contents of `pythoncode` tag is 113 | 114 | for i in range(1, 10): 115 | print(i) 116 | 117 | Note four whitespaces before `print`. 118 | 119 | If a line has an indent that is less than *base indent*, it MUST be equal to the indent of one of open block tags. Than 120 | all the tags up to that one (including that one) will be closed. 121 | 122 | For example, the following is forbidden: 123 | 124 | \code 125 | some string with indent 4 126 | some string with indent 2 127 | 128 | It is possible to use any indent values but multiples of 4 are recommended (like [PEP-8](https://www.python.org/dev/peps/pep-0008/)). 129 | -------------------------------------------------------------------------------- /test/test_parser.py: -------------------------------------------------------------------------------- 1 | # (c) Ilya V. Schurov, 2016 2 | # Available under MIT license (see LICENSE file in the root folder) 3 | 4 | import unittest 5 | from textwrap import dedent 6 | 7 | from indentml.parser import QqTag, QqParser 8 | 9 | 10 | class TestQqTagMethods(unittest.TestCase): 11 | def test_create_qqtag(self): 12 | q = QqTag({'a': 'b'}) 13 | self.assertEqual(q.name, 'a') 14 | self.assertEqual(q.value, 'b') 15 | 16 | q = QqTag('a', [ 17 | QqTag('b', 'hello'), 18 | QqTag('c', 'world'), 19 | QqTag('b', 'this'), 20 | QqTag('--+-', [ 21 | QqTag('b', 'way'), 22 | "this" 23 | ])]) 24 | 25 | self.assertEqual(q.name, 'a') 26 | # self.assertEqual(q._children, IndexedList([QqTag('b', ['hello']), QqTag('c', ['world']), QqTag('b', ['this']), 27 | # QqTag('--+-', [QqTag('b', ['way']), 'this'])])) 28 | # self.assertEqual(eval(repr(q)), q) 29 | self.assertEqual(q.as_list(), 30 | ['a', ['b', 'hello'], ['c', 'world'], 31 | ['b', 'this'], ['--+-', ['b', 'way'], 'this']]) 32 | 33 | def test_qqtag_accessors(self): 34 | q = QqTag('a', [ 35 | QqTag('b', 'hello'), 36 | QqTag('c', 'world'), 37 | QqTag('b', 'this'), 38 | QqTag('--+-', [ 39 | QqTag('b', 'way'), 40 | "this" 41 | ])]) 42 | 43 | self.assertEqual(q.b_.value, 'hello') 44 | self.assertEqual(q.c_.value, 'world') 45 | self.assertEqual([b.as_list() for b in q('b')], [['b', 'hello'], 46 | ['b', 'this']]) 47 | self.assertEqual(q.find('--+-').b_.value, 'way') 48 | self.assertEqual(q[0].value, 'hello') 49 | self.assertEqual(q[1].value, 'world') 50 | self.assertEqual(q[3][0].value, 'way') 51 | 52 | def test_qqtag_backlinks(self): 53 | q = QqTag('a', [ 54 | QqTag('b', 'hello'), 55 | QqTag('c', 'world'), 56 | QqTag('b', 'this'), 57 | QqTag('--+-', [ 58 | QqTag('b', 'way'), 59 | "this" 60 | ])]) 61 | self.assertTrue(q._is_consistent()) 62 | new_tag = QqTag({'qqq': 'bbb'}) 63 | q.append_child(new_tag) 64 | self.assertEqual(new_tag.idx, 4) 65 | del q[0] 66 | self.assertEqual(new_tag.idx, 3) 67 | self.assertTrue(q._is_consistent()) 68 | 69 | other_tag = QqTag({'other': ['some', 'values']}) 70 | q.insert(2, other_tag) 71 | self.assertEqual(other_tag.idx, 2) 72 | self.assertEqual(new_tag.idx, 4) 73 | 74 | third_tag = QqTag({'this': 'hi'}) 75 | q[3] = third_tag 76 | self.assertEqual(third_tag.idx, 3) 77 | self.assertTrue(q._is_consistent()) 78 | 79 | def test_qqtag_prev_next(self): 80 | q = QqTag('a', [ 81 | QqTag('b', 'hello'), 82 | QqTag('c', 'world'), 83 | QqTag('b', 'this'), 84 | QqTag('--+-', [ 85 | QqTag('b', 'way'), 86 | "this" 87 | ])]) 88 | 89 | self.assertEqual(q.c_.prev().value, 'hello') 90 | self.assertEqual(q.b_.next().value, 'world') 91 | self.assertEqual(q.c_.next().value, 'this') 92 | 93 | def test_qqtag_insert(self): 94 | z = QqTag("z") 95 | w = QqTag("w") 96 | q = QqTag('a', [ 97 | "Hello", 98 | "World", 99 | z, 100 | "This"]) 101 | self.assertEqual(q[2], z) 102 | q.insert(0, w) 103 | self.assertEqual(q[0], w) 104 | self.assertEqual(q[3], z) 105 | self.assertEqual(q[3].idx, 3) 106 | self.assertTrue(q._is_consistent()) 107 | 108 | def test_qqtag_del(self): 109 | z = QqTag("z") 110 | w = QqTag("w") 111 | q = QqTag('a', [ 112 | "Hello", 113 | "World", 114 | z, 115 | "This", 116 | w 117 | ]) 118 | del q[2] 119 | self.assertTrue(q._is_consistent()) 120 | self.assertEqual(q[3], w) 121 | self.assertEqual(q[3].idx, 3) 122 | self.assertEqual(q.as_list(), 123 | ["a", "Hello", "World", "This", ["w"]]) 124 | 125 | 126 | class TestQqParser(unittest.TestCase): 127 | def test_block_tags1(self): 128 | doc = r"""Hello 129 | \tag 130 | World 131 | """ 132 | parser = QqParser(allowed_tags={'tag'}) 133 | tree = parser.parse(doc) 134 | print(tree.as_list()) 135 | self.assertEqual(tree[0], "Hello") 136 | 137 | self.assertEqual(tree.tag_.name, 'tag') 138 | self.assertEqual(tree.tag_.value, 'World') 139 | 140 | def test_block_tags_nested(self): 141 | doc = r"""Hello 142 | \tag 143 | World 144 | \othertag 145 | This 146 | Is 147 | A test 148 | The end 149 | 150 | Blank line before the end 151 | """ 152 | parser = QqParser(allowed_tags={'tag', 'othertag'}) 153 | tree = parser.parse(doc) 154 | print(tree.as_list()) 155 | self.assertEqual(tree[0], "Hello") 156 | self.assertEqual(tree.tag_[0], "World") 157 | self.assertEqual(tree.tag_.othertag_._children, ["This\nIs"]) 158 | self.assertEqual(tree.tag_[2], 'A test') 159 | self.assertEqual(tree[2], 'The end\n\nBlank line before the end') 160 | self.assertEqual(tree.tag_.parent, tree) 161 | self.assertEqual(tree.tag_.othertag_.parent, tree.tag_) 162 | 163 | def test_block_additional_indent(self): 164 | doc = r"""Hello 165 | \tag 166 | First 167 | Second 168 | Third 169 | End""" 170 | parser = QqParser(allowed_tags={'tag'}) 171 | tree = parser.parse(doc) 172 | self.assertEqual(tree.tag_._children, 173 | ['First\n Second\nThird']) 174 | 175 | def test_match_bracket(self): 176 | doc = dedent("""\ 177 | hello { world { 178 | some test } { 179 | okay { } 180 | this is a test }} test 181 | """) 182 | parser = QqParser() 183 | parser.parse_init(doc) 184 | start = parser.position(0, 6) 185 | stop = parser.position(None, 0) 186 | out = parser.match_bracket(start, stop) 187 | self.assertEqual(out.clipped_line(stop), 188 | "} test\n") 189 | 190 | def test_inline_tag_contents(self): 191 | doc = dedent("""\ 192 | haha \\tag{this}{ 193 | that}[another]{this 194 | }[okay test] stop 195 | """) 196 | parser = QqParser(allowed_tags={"tag"}) 197 | parser.parse_init(doc) 198 | start = parser.position(0, 0) 199 | stop = parser.position(None, 0) 200 | tag_position, tag, type, after = parser.locate_tag(start, stop) 201 | self.assertEqual(start.clipped_line(tag_position), "haha ") 202 | self.assertEqual(tag, "tag") 203 | self.assertEqual(type, "inline") 204 | 205 | items = parser.inline_tag_contents(after, stop) 206 | contents = ["".join(item['start'].lines_before(item['stop'])) 207 | for item in items] 208 | self.assertEqual(contents, 209 | ["this", "\n that", 210 | "another", "this\n ", "okay test"]) 211 | self.assertEqual([item['type'] for item in items], 212 | ['{', '{', '[', '{', '[']) 213 | 214 | def test_scan_after_attribute_tag(self): 215 | doc = dedent("""\ 216 | test \\tag this \\tag{inline \\tag{} \\tag}q \\tag 217 | other tag 218 | """) 219 | parser = QqParser(allowed_tags={"tag"}) 220 | parser.parse_init(doc) 221 | start = parser.position(0, 0) 222 | stop = parser.position(None, 0) 223 | tag_position, tag, type, after = parser.locate_tag(start, stop) 224 | 225 | start = after.copy() 226 | before = parser.scan_after_attribute_tag(start, stop) 227 | self.assertEqual(start.clipped_line(before), 228 | 'this \\tag{inline \\tag{} \\tag}q ') 229 | 230 | def test_scan_after_attribute_tag2(self): 231 | doc = dedent("""\ 232 | test \\tag this \\tag{inline \\tag{} \\tag}\\tag 233 | other tag 234 | """) 235 | parser = QqParser(allowed_tags={"tag"}) 236 | parser.parse_init(doc) 237 | start = parser.position(0, 0) 238 | stop = parser.position(None, 0) 239 | tag_positoin, tag, type, after = parser.locate_tag(start, stop) 240 | 241 | start = after.copy() 242 | before = parser.scan_after_attribute_tag(start, stop) 243 | self.assertEqual(start.clipped_line(before), 244 | 'this \\tag{inline \\tag{} \\tag}') 245 | 246 | def test_inline_tag1(self): 247 | doc = r"""Hello, \tag{inline} tag! 248 | """ 249 | parser = QqParser(allowed_tags={'tag'}) 250 | tree = parser.parse(doc) 251 | self.assertEqual(tree[0], 'Hello, ') 252 | self.assertEqual(tree.tag_.value, 'inline') 253 | self.assertEqual(tree[2], ' tag!') 254 | 255 | def test_inline_tag2(self): 256 | doc = r"""Hello, \othertag{\tag{inline} tag}! 257 | """ 258 | parser = QqParser(allowed_tags={'tag', 'othertag'}) 259 | tree = parser.parse(doc) 260 | # self.assertEqual(tree._othertag._tag.value, 'inline') 261 | self.assertEqual(tree.as_list(), ['_root', 'Hello, ', 262 | ['othertag', ['tag', 'inline'], 263 | ' tag'], '!']) 264 | 265 | def test_inline_tag3(self): 266 | doc = r"""Hello, \tag{ 267 | this is a continuation of inline tag on the next line 268 | 269 | the next one\othertag{okay}} 270 | """ 271 | parser = QqParser(allowed_tags={'tag', 'othertag'}) 272 | tree = parser.parse(doc) 273 | self.assertEqual(tree.as_list(), [ 274 | '_root', 'Hello, ', 275 | [ 276 | 'tag', 277 | ('\nthis is a continuation of inline tag on the next line' 278 | '\n\nthe next one'), 279 | [ 280 | 'othertag', 281 | 'okay' 282 | ] 283 | ], '' 284 | ]) 285 | 286 | def test_inline_tag4(self): 287 | doc = r"Hello, \tag{I'm [your{taggy}] tag} okay" 288 | parser = QqParser(allowed_tags={'tag', 'othertag'}) 289 | tree = parser.parse(doc) 290 | self.assertEqual(tree.as_list(), [ 291 | '_root', 'Hello, ', 292 | [ 293 | 'tag', 294 | "I'm [your{taggy}] tag" 295 | ], 296 | " okay" 297 | ]) 298 | 299 | def test_block_and_inline_tags(self): 300 | doc = r"""Hello, 301 | \tag 302 | I'm your \othertag{tag} 303 | \tag 304 | { 305 | \tag 306 | { 307 | this \tag{is a {a test} 308 | okay} 309 | } 310 | } 311 | """ 312 | parser = QqParser(allowed_tags={'tag', 'othertag'}) 313 | tree = parser.parse(doc) 314 | self.assertEqual(tree.as_list(), [ 315 | '_root', 'Hello,\n', 316 | [ 317 | 'tag', 318 | "I'm your ", 319 | ['othertag', 'tag'], 320 | '\n', 321 | [ 322 | 'tag', 323 | '{\n', 324 | [ 325 | 'tag', 326 | '{\nthis ', 327 | [ 328 | 'tag', 329 | 'is a {a test}\nokay', 330 | ], 331 | '\n' 332 | ], 333 | '}\n' 334 | ], 335 | '}\n' 336 | ] 337 | ]) 338 | 339 | def test_sameline_tags(self): 340 | self.maxDiff = None 341 | doc = r""" Hello! 342 | \h1 Intro to qqmbr 343 | 344 | \h2 Fresh documentation system 345 | 346 | **qqmbr** is a documentation system intended to be extremely simple and extremely extensible. 347 | It was written to allow writing rich content that can be compiled into different formats. 348 | One source, multiple media: HTML, XML, LaTeX, PDF, eBooks, any other. Look below to see it in action. 349 | 350 | \h3 This is nice level-3 header 351 | 352 | Some paragraph text. See also \ref{sec:another} (reference to different header). 353 | 354 | There are LaTeX formulas here: 355 | 356 | \eq 357 | x^2 + y^2 = z^2 358 | 359 | `\\eq` is a qqtag. It is better than tag, because it is auto-closing (look at the indent, like Python). 360 | 361 | Here is formula with the label: 362 | 363 | \equation \label eq:Fermat 364 | x^n + y^n = z^n, \quad n>2 365 | 366 | Several formulas with labels: 367 | 368 | \gather 369 | \item \label eq:2x2 370 | 2\times 2 = 4 371 | \item \label eq:3x3 372 | 3\times 3 = 9 373 | 374 | We can reference formula \eqref{eq:Fermat} and \eqref{eq:2x2} just like we referenced header before. 375 | 376 | \h3 Another level-3 header \label sec:another 377 | 378 | Here is the header we referenced. 379 | 380 | \h3 More interesting content 381 | 382 | \figure 383 | \source http://example.com/somefig.png 384 | \caption Some figure 385 | \width 500px 386 | 387 | \question 388 | Do you like qqmbr? 389 | \quiz 390 | \choice \correct false 391 | No. 392 | \comment You didn't even try! 393 | \choice \correct true 394 | Yes, i like it very much! 395 | \comment And so do I! 396 | """ 397 | parser = QqParser( 398 | allowed_tags={'h1', 'h2', 'h3', 'eq', 'equation', 'label', 399 | 'gather', 'inlne', 'item', 'ref', 'eqref', 400 | 'source', 'caption', 'width', 'question', 401 | 'quiz', 'choice', 402 | 'comment', 'correct', 'figure'}) 403 | tree = parser.parse(doc) 404 | print(tree.as_list()) 405 | self.assertEqual(tree.as_list(), ['_root', 406 | 'Hello!', 407 | ['h1', 'Intro to qqmbr'], 408 | '', 409 | ['h2', 410 | 'Fresh documentation system'], 411 | '\n**qqmbr** is a documentation system intended to be extremely simple and extremely extensible.\nIt was written to allow writing rich content that can be compiled into different formats.\nOne source, multiple media: HTML, XML, LaTeX, PDF, eBooks, any other. Look below to see it in action.\n', 412 | ['h3', 413 | 'This is nice level-3 header'], 414 | '\nSome paragraph text. See also ', 415 | ['ref', 'sec:another'], 416 | ' (reference to different header).\n\nThere are LaTeX formulas here:\n', 417 | ['eq', 418 | 'x^2 + y^2 = z^2'], 419 | '\n`\\eq` is a qqtag. It is better than tag, because it is auto-closing (look at the indent, like Python).\n\nHere is formula with the label:\n', 420 | ['equation', 421 | ['label', 'eq:Fermat'], 422 | 'x^n + y^n = z^n, \\quad n>2'], 423 | '\nSeveral formulas with labels:\n', 424 | ['gather', 425 | ['item', 426 | ['label', 'eq:2x2'], 427 | '2\\times 2 = 4'], 428 | ['item', 429 | ['label', 'eq:3x3'], 430 | '3\\times 3 = 9']], 431 | '\nWe can reference formula ', 432 | ['eqref', 'eq:Fermat'], 433 | ' and ', 434 | ['eqref', 'eq:2x2'], 435 | ' just like we referenced header before.\n', 436 | ['h3', 437 | 'Another level-3 header ', 438 | ['label', 439 | 'sec:another']], 440 | '\nHere is the header we referenced.\n', 441 | ['h3', 442 | 'More interesting content'], 443 | '', 444 | ['figure', 445 | ['source', 446 | 'http://example.com/somefig.png'], 447 | ['caption', 448 | 'Some figure'], 449 | ['width', '500px']], 450 | '', 451 | ['question', 452 | 'Do you like qqmbr?', 453 | ['quiz', 454 | ['choice', 455 | ['correct', 'false'], 456 | 'No.', 457 | ['comment', 458 | "You didn't even try!"]], 459 | ['choice', 460 | ['correct', 'true'], 461 | 'Yes, i like it very much!', 462 | ['comment', 463 | 'And so do I!']]]]]) 464 | 465 | def test_inline_tag_at_the_beginning_of_the_line(self): 466 | doc = r"""\tag 467 | some content here here and here and we have some inline 468 | \tag{here and \othertag{there}} 469 | """ 470 | parser = QqParser(allowed_tags={'tag', 'othertag'}) 471 | tree = parser.parse(doc) 472 | self.assertEqual(tree.as_list(), ['_root', ['tag','some content ' 473 | 'here here and ' 474 | 'here and we ' 475 | 'have some ' 476 | 'inline\n', 477 | ['tag', 'here and ',['othertag', 478 | 'there']], 479 | '']]) 480 | 481 | def test_alias2tag(self): 482 | doc = r"""\# Heading 1 483 | \## Heading 2 484 | Hello 485 | """ 486 | parser = QqParser(allowed_tags={'h1', 'h2'}, 487 | alias2tag={"#": 'h1', "##": 'h2'}) 488 | tree = parser.parse(doc) 489 | self.assertEqual(tree.as_list(), 490 | ["_root", ["h1", "Heading 1"], 491 | ["h2", "Heading 2"], "Hello"]) 492 | 493 | def test_non_allowed_tag_with_bracket(self): 494 | doc = r"""Hello \inlinetag{some \forbiddentag{here} okay} this""" 495 | parser = QqParser(allowed_tags={'inlinetag'}) 496 | tree = parser.parse(doc) 497 | self.assertEqual(tree.as_list(), ["_root", "Hello ", ["inlinetag", "some \\forbiddentag{here} okay"], " this"]) 498 | 499 | def test_escape_unescape(self): 500 | doc = r"""Hello 501 | \sometag test 502 | \\sometag test 503 | \sometag 504 | \ here we are 505 | we are here 506 | some \inline{tag with \{ curve bracket inside} okay 507 | some \inline[square bracket \[ inside] okay 508 | """ 509 | parser = QqParser(allowed_tags={'sometag', 'inline'}) 510 | tree = parser.parse(doc) 511 | self.assertEqual(tree.as_list(), [ 512 | "_root", "Hello", 513 | ["sometag", "test"], 514 | "\\sometag test", 515 | ["sometag", " here we are\nwe are here"], 516 | "some ", 517 | ["inline", "tag with { curve bracket inside"], 518 | " okay\nsome ", 519 | ["inline", 520 | ["_item", "square bracket [ inside"]], 521 | " okay" 522 | ]) 523 | 524 | def test_square_bracket_inline(self): 525 | doc = r"Some inline \tag[with][multiple][arguments]" 526 | parser = QqParser(allowed_tags={"tag"}) 527 | tree = parser.parse(doc) 528 | self.assertEqual(tree.as_list(), 529 | [ 530 | "_root", "Some inline ", 531 | ["tag", 532 | ["_item", "with"], 533 | ["_item", "multiple"], 534 | ["_item", "arguments"] 535 | ] 536 | ]) 537 | 538 | def test_mixed_brackets_inline(self): 539 | doc = r"Some inline \tag[with]{multiple}[arguments]" 540 | parser = QqParser(allowed_tags={"tag"}) 541 | tree = parser.parse(doc) 542 | self.assertEqual(tree.as_list(), 543 | [ 544 | "_root", "Some inline ", 545 | ["tag", 546 | ["_item", "with"], 547 | "multiple", 548 | ["_item", "arguments"] 549 | ] 550 | ]) 551 | 552 | def test_multiline_inline_with_attribute(self): 553 | doc = "\\tag{hello \\tag world \n this is \n a \\tag test}" 554 | parser = QqParser(allowed_tags={"tag"}) 555 | tree = parser.parse(doc) 556 | self.assertEqual(tree.as_list(), 557 | ['_root', 558 | ['tag', 'hello ', 559 | ['tag', 'world \n this is \n a'], 560 | ['tag', 'test']]]) 561 | 562 | def test_multiple_arguments2(self): 563 | doc = r"""\proof 564 | By \ref[existence 565 | and uniqueness theorem\nonumber][thm:4:eu] there exists 566 | """ 567 | parser = QqParser(allowed_tags={"proof", "ref", "nonumber"}) 568 | tree = parser.parse(doc) 569 | print(tree.as_list()) 570 | self.assertEqual(tree.as_list(), 571 | ['_root', ['proof', 'By ', 572 | ['ref', 573 | ['_item', 574 | 'existence\nand uniqueness theorem', 575 | ['nonumber']], 576 | ['_item', 577 | 'thm:4:eu']], 578 | ' there exists']]) 579 | 580 | def test_empty_square_bracket_tag(self): 581 | doc = r"""\blocktag 582 | Some \empty[ 583 | 584 | ] tag 585 | """ 586 | parser = QqParser(allowed_tags={'blocktag', 'empty'}) 587 | tree = parser.parse(doc) 588 | self.assertEqual(tree.as_list(),["_root", ['blocktag', 'Some ', 589 | ['empty', 590 | ['_item', '\n']], 591 | ' tag']]) 592 | 593 | def test_blocktag_inside_inlinetag(self): 594 | doc = r"""\blocktag Some \inlinetag[Hello \blocktag test]""" 595 | parser = QqParser(allowed_tags={'inlinetag', 'blocktag'}) 596 | tree = parser.parse(doc) 597 | 598 | self.assertEqual(tree.as_list(), ['_root', ['blocktag', 'Some ', 599 | ['inlinetag', 600 | ['_item', 'Hello ', 601 | ['blocktag', 'test']]]]]) 602 | 603 | doc = r"""Some \inlinetag[Hello \blocktag test 604 | \blocktag another test]""" 605 | parser = QqParser(allowed_tags={'inlinetag', 'blocktag'}) 606 | tree = parser.parse(doc) 607 | print(tree.as_list()) 608 | self.assertEqual(tree.as_list(), 609 | ['_root', 'Some ', ['inlinetag', 610 | ['_item', 'Hello ', 611 | ['blocktag', 'test'], 612 | ['blocktag', 613 | 'another test']]]]) 614 | 615 | def test_inlinetag_with_multiple_arguments(self): 616 | doc = r"""\blocktag Some \inlinetag[Hello][world]""" 617 | parser = QqParser(allowed_tags={'inlinetag', 'blocktag'}) 618 | tree = parser.parse(doc) 619 | self.assertEqual(tree.as_list(), 620 | ["_root", ["blocktag", "Some ", 621 | ["inlinetag", ["_item", "Hello"], 622 | ["_item", "world"]]]]) 623 | 624 | def test_end_with_empty_line(self): 625 | doc = dedent(r""" 626 | \tag 627 | 628 | """) 629 | parser = QqParser(allowed_tags={"tag"}) 630 | tree = parser.parse(doc) 631 | 632 | def test_as_etree(self): 633 | doc = dedent(r""" 634 | \tag 635 | some content 636 | \tag 637 | other content 638 | more text here 639 | """) 640 | parser = QqParser(allowed_tags={"tag"}) 641 | tree = parser.parse(doc) 642 | self.assertEqual(et.tostring(tree.as_etree()), 643 | b'<_root>some content' 644 | b'other content' 645 | b'more text here' 646 | b'') 647 | 648 | def test_newline(self): 649 | doc = dedent(r""" 650 | \tag 651 | Hello 652 | 653 | Stop. 654 | """) 655 | parser = QqParser(allowed_tags={'tag'}) 656 | tree = parser.parse(doc) 657 | self.assertEqual(tree.as_list(), 658 | ['_root', '', ['tag', 'Hello'], 659 | '\nStop.']) 660 | 661 | def test_children_tags(self): 662 | doc = dedent(r""" 663 | \tag 664 | some content 665 | \tag 666 | other content 667 | more text here 668 | \tag 669 | some other tag 670 | """) 671 | parser = QqParser(allowed_tags={"tag"}) 672 | tree = parser.parse(doc) 673 | children = (list(tree.tag_.children_tags())) 674 | self.assertEqual(children[0].as_list(), ["tag", "other content"]) 675 | self.assertEqual(children[1].as_list(), ["tag", "some other tag"]) 676 | 677 | def test_blank_line_after_tag(self): 678 | doc = dedent(r""" 679 | \tag 680 | 681 | otherline 682 | \tag 683 | othertag 684 | """) 685 | parser = QqParser(allowed_tags={"tag"}) 686 | tree = parser.parse(doc) 687 | self.assertEqual(tree.as_list(), 688 | ['_root', '', 689 | ['tag', '\notherline', ['tag', 'othertag']]]) 690 | -------------------------------------------------------------------------------- /indentml/parser.py: -------------------------------------------------------------------------------- 1 | # (c) Ilya V. Schurov, 2016 — 2021 2 | # Available under MIT license (see LICENSE file in the root folder) 3 | 4 | from collections import namedtuple 5 | from collections.abc import Sequence, MutableSequence 6 | from collections import namedtuple 7 | from indentml.indexedlist import IndexedList 8 | import re 9 | from functools import total_ordering 10 | import os 11 | from xml.etree.ElementTree import Element 12 | from itertools import islice, groupby 13 | from typing import Optional, Iterator, Union, overload, Sequence, List 14 | 15 | 16 | class QqError(Exception): 17 | pass 18 | 19 | 20 | class QqTag(MutableSequence): 21 | """ 22 | QqTag is essentially an IndexedList with name attached. It behaves 23 | mostly like eTree Element. 24 | 25 | It provides eTree and BeautifulSoup-style navigation over its children: 26 | - ``tag.find('subtag')`` returns first occurrence of a child with name 27 | ``subtag``. (Note that in contrast with BeautifulSoup, this is not 28 | recursive: it searches only through tag's direct children.) 29 | - ``tag._subtag`` is a shortcut for ``tag.find('subtag')`` 30 | (works if ``subtag`` is valid identifier) 31 | - ``tag.find_all('subtag')`` returns all occurrences of tag with 32 | name 'subtag' 33 | - ``tag('subtag')`` is a shortcut for ``tag.find_all('subtag')`` 34 | 35 | If QqTag has only one child, it is called *simple*. Then its `.value` 36 | is defined. (Useful for access to property-like subtags.) 37 | """ 38 | def __init__(self, name, children=None, parent=None, idx=None, 39 | adopt=False): 40 | if isinstance(name, dict) and len(name) == 1: 41 | self.__init__(*list(name.items())[0], parent=parent) 42 | return 43 | 44 | self.name = name 45 | self.parent = parent 46 | self.idx = idx 47 | # tag has to know its place in the list of parents children 48 | # to be able to navigate to previous / next siblings 49 | 50 | self.adopter = adopt 51 | # tag is called 'adopter' if it does not register itself as 52 | # a parent of its children 53 | # TODO: write test for adoption 54 | 55 | self._children: IndexedList[Union[str, "QqTag"]] 56 | 57 | if children is None: 58 | self._children = IndexedList() 59 | elif ( 60 | isinstance(children, str) 61 | or isinstance(children, int) 62 | or isinstance(children, float) 63 | ): 64 | self._children = IndexedList([children]) 65 | elif isinstance(children, Sequence): 66 | self._children = IndexedList(children) 67 | else: 68 | raise QqError( 69 | "I don't know what to do with children " + str(children) 70 | ) 71 | 72 | if not adopt: 73 | for i, child in enumerate(self): 74 | if isinstance(child, QqTag): 75 | child.parent = self 76 | child.idx = i 77 | 78 | def __repr__(self): 79 | if self.parent is None: 80 | return "QqTag(%s, %s)" % ( 81 | repr(self.name), 82 | repr(self._children), 83 | ) 84 | else: 85 | return "QqTag(%s, %s, parent = %s)" % ( 86 | repr(self.name), 87 | repr(self._children), 88 | repr(self.parent.name), 89 | ) 90 | 91 | def __str__(self): 92 | return "{%s : %s}" % (self.name, self._children) 93 | 94 | def __eq__(self, other): 95 | if other is None or not isinstance(other, QqTag): 96 | return False 97 | return self.as_list() == other.as_list() 98 | 99 | @property 100 | def is_simple(self): 101 | """ 102 | Simple tags are those containing only one child 103 | and it is string 104 | :return: 105 | """ 106 | return len(self) == 1 and isinstance(self[0], str) 107 | 108 | @property 109 | def value(self): 110 | if self.is_simple: 111 | return self[0] 112 | raise QqError( 113 | "More than one child, value is not defined, QqTag: " 114 | + str(self) 115 | ) 116 | 117 | @value.setter 118 | def value(self, value): 119 | if self.is_simple: 120 | self[0] = value 121 | else: 122 | raise QqError("More than one child, cannot set value") 123 | 124 | def qqkey(self): 125 | return self.name 126 | 127 | def __getattr__(self, attr): 128 | if attr[-1] == "_": 129 | return self.find_or_empty(attr[:-1]) 130 | raise AttributeError("Attribute " + attr + " not found") 131 | 132 | def __bool__(self): 133 | return bool(self._children) 134 | 135 | def find(self, key: str) -> Optional["QqTag"]: 136 | """ 137 | Returns direct children with the given key if it exists, 138 | otherwise returns None 139 | :param key: key 140 | :return: QqTag 141 | """ 142 | if key in self._children._directory: 143 | return self._children.find(key) 144 | return None 145 | 146 | def find_or_empty(self, key: str) -> "QqTag": 147 | """ 148 | The same as find, but returns empty QqTag if finds nothing 149 | :param key: 150 | :return: 151 | """ 152 | if key in self._children._directory: 153 | return self._children.find(key) 154 | return QqTag("_") 155 | 156 | def find_all(self, key: str) -> "QqTag": 157 | return QqTag("_", self._children.find_all(key), adopt=True) 158 | 159 | def __call__(self, key): 160 | return self.find_all(key) 161 | 162 | def as_list(self) -> list: 163 | ret = [self.name] 164 | for child in self: 165 | if isinstance(child, QqTag): 166 | ret.append(child.as_list()) 167 | else: 168 | ret.append(child) 169 | return ret 170 | 171 | def insert(self, idx: int, child) -> None: 172 | self._children.insert(idx, child) 173 | if not self.adopter and isinstance(child, QqTag): 174 | child.parent = self 175 | child.idx = idx 176 | for child in self._children[idx + 1 :]: 177 | if isinstance(child, QqTag): 178 | child.idx += 1 179 | 180 | def __delitem__(self, idx: int): 181 | del self._children[idx] 182 | if not self.adopter: 183 | for child in self._children[idx:]: 184 | if isinstance(child, QqTag): 185 | child.idx -= 1 186 | 187 | def append_child(self, child): 188 | self.insert(len(self), child) 189 | 190 | def _is_consistent(self): 191 | if self.adopter: 192 | raise QqError("Adopter cannot be checked for consistency") 193 | for i, child in enumerate(self): 194 | if isinstance(child, QqTag) and ( 195 | child.parent != self or child.idx != i 196 | ): 197 | return False 198 | return True 199 | 200 | def append_line(self, line: str) -> None: 201 | """ 202 | Appends line if it is not empty 203 | 204 | :param line: 205 | """ 206 | if line: 207 | self._children.append(line) 208 | 209 | @overload 210 | def __getitem__(self, idx: int) -> "QqTag": 211 | ... 212 | 213 | @overload 214 | def __getitem__(self, s: slice) -> Sequence["QqTag"]: 215 | ... 216 | 217 | def __getitem__(self, idx): 218 | return self._children[idx] 219 | 220 | def __setitem__(self, idx: int, child: "QqTag"): 221 | self._children[idx] = child 222 | if not self.adopter: 223 | # TODO testme 224 | child.parent = self 225 | child.idx = idx 226 | 227 | def __iter__(self): 228 | return iter(self._children) 229 | 230 | def __len__(self): 231 | return len(self._children) 232 | 233 | def children_tags(self) -> Iterator["QqTag"]: 234 | """ 235 | Returns iterator of all childrens that are QqTags 236 | 237 | :return: 238 | """ 239 | return (tag for tag in self if isinstance(tag, QqTag)) 240 | 241 | @property 242 | def text_content(self): 243 | chunk = [] 244 | for child in self: 245 | if isinstance(child, str): 246 | chunk.append(child) 247 | return "".join(chunk) 248 | 249 | def exists(self, key): 250 | """ 251 | Returns True if a child with given key exists 252 | :param key: 253 | :return: 254 | """ 255 | return key in self._children._directory 256 | 257 | def get(self, key: str, default_value: str = None) -> str: 258 | """ 259 | Returns a value of a direct child with a given key. 260 | If it is does not exists or is not simple, 261 | returns default value (default: None) 262 | :param key: key 263 | :param default_value: what to return if there is no 264 | such key or the corresponding child is ot simple 265 | :return: the value of a child 266 | """ 267 | tag = self.find(key) 268 | if tag and tag.is_simple: 269 | return tag.value 270 | else: 271 | return default_value 272 | 273 | def ancestor_path(self): 274 | """ 275 | Returns list of ancestors for self. 276 | 277 | Example: 278 | 279 | \tag 280 | \othertag 281 | \thirdtag 282 | 283 | thirdtag.ancestor_path == [thirdtag, othertag, tag, _root] 284 | 285 | :return: list 286 | """ 287 | tag = self 288 | path = [tag] 289 | while tag.parent: 290 | tag = tag.parent 291 | path.append(tag) 292 | return path 293 | 294 | def get_eve(self): 295 | """ 296 | Returns ancestor which is a direct child of a root 297 | 298 | :return: 299 | """ 300 | return self.ancestor_path()[-2] 301 | 302 | def next(self): 303 | if ( 304 | not self.parent 305 | or self.idx is None 306 | or self.idx == len(self.parent) - 1 307 | ): 308 | return None 309 | return self.parent[self.idx + 1] 310 | 311 | def prev(self): 312 | if not self.parent or self.idx is None or self.idx == 0: 313 | return None 314 | return self.parent[self.idx - 1] 315 | 316 | def clear(self): 317 | self._children.clear() 318 | 319 | def extend_children(self, children): 320 | for child in children: 321 | self.append_child(child) 322 | 323 | def children_values(self, strings="raise", not_simple="raise"): 324 | """ 325 | Make a list of .value applied to all children instances 326 | 327 | :param strings: one of 'raise', 'keep', 'none', 'skip' 328 | :param not_simple: one of 'raise', 'keep', 'none', 'skip' 329 | 330 | What to do if string or not simple tag occurs: 331 | - 'raise': raise an exception 332 | - 'keep': keep tags/strings as is 333 | - 'none': replace with None 334 | - 'skip': skip this item 335 | :return: list of strings 336 | """ 337 | assert strings in ["raise", "keep", "none", "skip"] 338 | assert not_simple in ["raise", "keep", "none", "skip"] 339 | values = [] 340 | for child in self: 341 | if isinstance(child, str): 342 | if strings == "raise": 343 | raise QqError( 344 | "string does not have value (set strings option" 345 | " to 'keep', 'none' or 'skip' to workaround)" 346 | ) 347 | if strings == "keep": 348 | values.append(child.strip()) 349 | elif strings == "none": 350 | values.append(None) 351 | # if strings == 'skip': pass 352 | else: # QqTag assumed 353 | if child.is_simple: 354 | values.append(child.value) 355 | continue 356 | # child is not simple 357 | if not_simple == "raise": 358 | raise QqError( 359 | ( 360 | "Child {} is not simple. Use not_simple option " 361 | "to tweak the behavior" 362 | ).format(child) 363 | ) 364 | if not_simple == "none": 365 | values.append(None) 366 | if not_simple == "keep": 367 | values.append(child) 368 | # if not_simple == 'skip': pass 369 | return values 370 | 371 | @property 372 | def itemized(self) -> bool: 373 | """ 374 | Returns True if all children are '_item's 375 | :return: bool 376 | """ 377 | return len(self.find_all("_item")) == len(self) 378 | 379 | def itemize(self): 380 | """ 381 | If self's children are _items, return... #TODO 382 | :return: 383 | """ 384 | if self.itemized: 385 | return self 386 | return QqTag(self.name, [QqTag("_item", self, adopt=True)]) 387 | 388 | def unitemized(self): 389 | """ 390 | If self is simple (only one child and it is string), return self 391 | If self's only child is "_item", return it 392 | :return: 393 | """ 394 | # TODO testme 395 | 396 | if self.is_simple: 397 | return self 398 | if len(self) == 1 and self[0].name == "_item": 399 | return self[0] 400 | raise QqError("Can't unitemize tag " + str(self)) 401 | 402 | def process_include_tags(self, parser, includedir, follow=True): 403 | """ 404 | Recursively processes include tags (as defined by parser.include) 405 | Reads files from includedir 406 | 407 | Does not modify current tag, returns a new one instead 408 | 409 | :param parser: 410 | :param includedir: 411 | :param follow: follow include directives in included files 412 | recursively 413 | :return: processed tree 414 | """ 415 | 416 | # TODO FIXME Sanity checks for includedir 417 | 418 | newtree = QqTag(self.name) 419 | for child in self: 420 | if isinstance(child, str): 421 | newtree.append_child(child) 422 | else: # child is QqTag 423 | if child.name == parser.include: 424 | include_path = child.value 425 | # FROM: https://www.guyrutenberg.com/2013/12/06/ 426 | # preventing-directory-traversal-in-python/ 427 | include_path = os.path.normpath( 428 | "/" + include_path 429 | ).lstrip("/") 430 | # END FROM 431 | 432 | include_parsed = parser.parse_file( 433 | os.path.join(includedir, include_path) 434 | ) 435 | if follow: 436 | include_parsed = include_parsed.process_include_tags( 437 | parser, includedir, follow 438 | ) 439 | newtree.extend_children(include_parsed) 440 | else: 441 | newtree.append( 442 | child.process_include_tags( 443 | parser, includedir, follow 444 | ) 445 | ) 446 | return newtree 447 | 448 | def as_etree(self): 449 | tree = Element(self.name) 450 | chunk = [] 451 | for child in self: 452 | if isinstance(child, str): 453 | chunk.append(child) 454 | else: 455 | append_text(tree, "".join(chunk)) 456 | chunk.clear() 457 | tree.append(child.as_etree()) 458 | if chunk: 459 | append_text(tree, "".join(chunk)) 460 | return tree 461 | 462 | def escape(self, line: str, tbcharacter="\\") -> str: 463 | for char in [tbcharacter, "[", "{", "}", "]"]: 464 | line = line.replace(char, tbcharacter + char) 465 | return line 466 | 467 | def serialize( 468 | self, tbcharacter="\\", tabs=4, escape_brackets=True 469 | ) -> List[str]: 470 | lines = [] 471 | if self.name != "_root": 472 | lines.append(tbcharacter + self.name + "\n") 473 | prefix = " " * tabs 474 | else: 475 | prefix = "" 476 | for i, child in enumerate(self): 477 | if isinstance(child, str): 478 | if escape_brackets: 479 | line = self.escape(child, tbcharacter) 480 | else: 481 | line = child.replace(tbcharacter, tbcharacter * 2) 482 | for subline in line.split("\n"): 483 | if i == 0 and subline.startswith(" "): 484 | subline = tbcharacter + subline 485 | if i < len(self) - 1 and isinstance(self[i + 1], QqTag): 486 | postfix = "\n" 487 | else: 488 | postfix = "" 489 | lines.append(prefix + line + postfix) 490 | else: 491 | lines.extend( 492 | prefix + line 493 | for line in child.serialize(tbcharacter, tabs) 494 | ) 495 | if lines and not lines[-1].endswith("\n"): 496 | lines[-1] = lines[-1] + "\n" 497 | return lines 498 | 499 | 500 | def append_text(tree, text): 501 | children = list(tree) 502 | if children: 503 | if children[-1].tail is None: 504 | children[-1].tail = text 505 | else: 506 | children[-1].tail += text 507 | else: 508 | if tree.text is None: 509 | tree.text = text 510 | else: 511 | tree.text += text 512 | return tree 513 | 514 | 515 | def dedent(line, indent): 516 | if line[:indent] == " " * indent: 517 | return line[indent:] 518 | raise QqError("Can't dedent line {} by {}".format(repr(line), indent)) 519 | 520 | 521 | def get_indent(s, empty_to_none=False): 522 | if not s.strip() and empty_to_none: 523 | return None 524 | m = re.match(r"\s*", s) 525 | beginning = m.group(0) 526 | if "\t" in beginning: 527 | raise QqError( 528 | "No tabs allowed in QqDoc at the beginning " 529 | "of line! Line: " + s 530 | ) 531 | m = re.match(r" *", s) 532 | return len(m.group(0)) 533 | 534 | 535 | @total_ordering 536 | class Position(object): 537 | def __init__(self, line, offset, lines): 538 | self.line = line 539 | self.offset = offset 540 | self.lines = lines 541 | if line is None: 542 | self.line = len(lines) 543 | 544 | def __lt__(self, other): 545 | return (self.line, self.offset) < (other.line, other.offset) 546 | 547 | def __eq__(self, other): 548 | return (self.line, self.offset) == (other.line, other.offset) 549 | 550 | def nextchar(self): 551 | new = self.copy() 552 | new.offset += 1 553 | if new.offset >= len(new.lines[new.line]): 554 | new = new.nextline() 555 | return new 556 | 557 | def prevchar(self): 558 | new = self.copy() 559 | new.offset -= 1 560 | if new.offset < 0: 561 | new.line -= 1 562 | new.offset = len(new.getline) - 1 563 | return new 564 | 565 | def prevline(self): 566 | return Position(line=self.line - 1, offset=0, lines=self.lines) 567 | 568 | def nextline(self): 569 | return Position(line=self.line + 1, offset=0, lines=self.lines) 570 | 571 | def copy(self): 572 | return Position( 573 | line=self.line, offset=self.offset, lines=self.lines 574 | ) 575 | 576 | def __str__(self): 577 | return "Position: line_number: {}, offset: {}, line: {}".format( 578 | self.line, self.offset, get(self.lines, self.line) 579 | ) 580 | 581 | def __repr__(self): 582 | return "Position(line={}, offset={})".format( 583 | self.line, self.offset 584 | ) 585 | 586 | def lines_before(self, stop): 587 | pos = self 588 | out = [] 589 | while pos < stop: 590 | out.append(pos.clipped_line(stop)) 591 | pos = pos.nextline() 592 | return out 593 | 594 | def clipped_line(self, stop): 595 | """ 596 | Returns line clipped before stop 597 | 598 | :param stop: 599 | :return: 600 | """ 601 | 602 | if stop.line > self.line: 603 | inline_stop_offset = None 604 | else: 605 | inline_stop_offset = stop.offset 606 | return self.getline[self.offset : inline_stop_offset] 607 | 608 | @property 609 | def getline(self): 610 | return self.lines[self.line] 611 | 612 | @property 613 | def getchar(self): 614 | return self.getline[self.offset] 615 | 616 | def get_end_of_line(self): 617 | return Position(self.line, len(self.getline), self.lines) 618 | 619 | def get_start_of_line(self): 620 | return Position(self.line, 0, self.lines) 621 | 622 | 623 | def get(s, i, default=None): 624 | if i < 0 or i >= len(s): 625 | return default 626 | return s[i] 627 | 628 | 629 | def first_nonspace_idx(line, start=0, stop=None): 630 | if stop is None: 631 | stop = len(line) 632 | m = re.match(r"\s*", line[start:stop]) 633 | return start + m.end(0) 634 | 635 | 636 | class QqParser(object): 637 | """ 638 | General indentml parser. 639 | """ 640 | 641 | def __init__( 642 | self, 643 | tb_char="\\", 644 | allowed_tags=None, 645 | allowed_inline_tags=None, 646 | alias2tag=None, 647 | include="_include", 648 | ): 649 | self.tb_char = tb_char 650 | self.command_regex = re.escape(self.tb_char) 651 | if allowed_tags is None: 652 | self.allowed_tags = set([]) 653 | else: 654 | self.allowed_tags = allowed_tags 655 | self.tag_regex = r"([^\s\{\[\&" + self.command_regex + "]+)" 656 | if allowed_inline_tags is None: 657 | self.allowed_inline_tags = self.allowed_tags 658 | else: 659 | self.allowed_inline_tags = allowed_inline_tags 660 | if alias2tag is None: 661 | self.alias2tag = {} 662 | else: 663 | self.alias2tag = alias2tag 664 | self.escape_stub = "&_ESCAPE_Thohhe1eieMam6Yo_" 665 | self.include = include 666 | self.allowed_tags.add(include) 667 | self._lines = None 668 | self._indents = None 669 | self.blocktag_rc = re.compile( 670 | self.command_regex 671 | + self.tag_regex 672 | + r"(?= |{}|$)".format(self.command_regex) 673 | ) 674 | self.anytag_rc = re.compile( 675 | self.command_regex 676 | + self.tag_regex 677 | + r"(?= |{}|{{|\[|$)".format(self.command_regex) 678 | ) 679 | 680 | def is_allowed_tag(self, tag: str, inline=False): 681 | if inline: 682 | return tag in self.allowed_inline_tags 683 | else: 684 | return tag in self.allowed_tags 685 | 686 | def escape_line(self, s): 687 | """ 688 | Replaces '\\' and '\ ' with special stub 689 | :param s: a line 690 | :return: escaped line 691 | """ 692 | s = s.replace(self.tb_char * 2, self.escape_stub + "COMMAND_&") 693 | s = s.replace(self.tb_char + " ", self.escape_stub + "SPACE_&") 694 | s = s.replace( 695 | self.tb_char + "{", self.escape_stub + "OPEN_CURVE_&" 696 | ) 697 | s = s.replace( 698 | self.tb_char + "[", self.escape_stub + "OPEN_SQUARE_&" 699 | ) 700 | s = s.replace( 701 | self.tb_char + "}", self.escape_stub + "CLOSE_CURVE_&" 702 | ) 703 | s = s.replace( 704 | self.tb_char + "]", self.escape_stub + "CLOSE_SQUARE_&" 705 | ) 706 | 707 | return s 708 | 709 | def unescape_line(self, s): 710 | """ 711 | Replaces special stub's inserted by ``escape_line()`` 712 | with '\' and ' ' 713 | 714 | Note: this is **NOT** an inverse of escape_line. 715 | 716 | :param s: a line 717 | :return: unescaped line 718 | """ 719 | s = s.replace(self.escape_stub + "SPACE_&", " ") 720 | s = s.replace(self.escape_stub + "COMMAND_&", self.tb_char) 721 | s = s.replace(self.escape_stub + "OPEN_CURVE_&", "{") 722 | s = s.replace(self.escape_stub + "OPEN_SQUARE_&", "[") 723 | s = s.replace(self.escape_stub + "CLOSE_CURVE_&", "}") 724 | s = s.replace(self.escape_stub + "CLOSE_SQUARE_&", "]") 725 | 726 | return s 727 | 728 | def position(self, line, offset): 729 | return Position(line=line, offset=offset, lines=self._lines) 730 | 731 | def parse_init(self, text: Union[str, Sequence[str]]): 732 | """ 733 | :param text: 734 | :return: 735 | """ 736 | if isinstance(text, str): 737 | lines = text.splitlines(keepends=True) 738 | else: 739 | lines = text 740 | 741 | lines = [self.escape_line(line) for line in lines] 742 | 743 | self._lines = lines 744 | 745 | # basic indent is indent of first non-empty line, if any 746 | basicindent = next( 747 | (get_indent(line) for line in lines if line.strip()), 0 748 | ) 749 | 750 | self._indents = [] 751 | 752 | # we want to replace all Nones with indent of next non-empty string 753 | # to do so, first, let us group all indents 754 | 755 | indents, nums = zip( 756 | *[ 757 | (indent, sum(1 for _ in g)) 758 | for indent, g in groupby( 759 | get_indent(line, empty_to_none=True) for line in lines 760 | ) 761 | ] 762 | ) 763 | 764 | for i, (indent, num) in enumerate(zip(indents, nums)): 765 | if indent is None: 766 | indent = get(indents, i + 1, basicindent) 767 | self._indents.extend([indent] * num) 768 | 769 | def parse(self, lines: Union[str, Sequence[str]]): 770 | self.parse_init(lines) 771 | start = self.position(0, 0) 772 | stop = self.position(None, 0) 773 | tags = self.parse_fragment( 774 | start, stop, current_indent=get_indent(self._lines[0]) 775 | ) 776 | return QqTag("_root", tags) 777 | 778 | def append_chunk_and_clear( 779 | self, tags, chunk, stripeol=False, ignoreempty=False 780 | ): 781 | joined = "".join(chunk) 782 | if stripeol and joined and joined[-1] == "\n": 783 | joined = joined[:-1] 784 | if joined or (not ignoreempty and chunk): 785 | # empty chunk is not the same as chunk with empty line 786 | tags.append(self.unescape_line(joined)) 787 | chunk.clear() 788 | 789 | def parse_fragment( 790 | self, start, stop, current_indent, merge_lines=False 791 | ): 792 | 793 | tags = [] 794 | 795 | pos = start.copy() 796 | chunk = [] 797 | 798 | while pos < stop: 799 | # loop invariant: everything before pos is appended to tags 800 | # or chunk 801 | 802 | line = pos.clipped_line(stop) 803 | if not line.strip(): 804 | if line and line[-1] == "\n": 805 | chunk.append("\n") 806 | pos = pos.nextline() 807 | continue 808 | if pos.offset == 0: 809 | line = dedent(line, current_indent) 810 | pos.offset = current_indent 811 | blockmode = True 812 | else: 813 | blockmode = False 814 | 815 | if ( 816 | not merge_lines 817 | and blockmode 818 | and line.strip() 819 | and line[0] == self.tb_char 820 | ): 821 | # possibly block tag line 822 | m = self.blocktag_rc.match(line) 823 | if m: 824 | tag = m.group(1) 825 | tag = self.alias2tag.get(tag, tag) 826 | if self.is_allowed_tag(tag): 827 | newstart_pos = current_indent + first_nonspace_idx( 828 | line, m.end(1) 829 | ) 830 | newstop_line, tag_contents_indent = self.block_tag_stop_line_indent( 831 | pos.line, stop.line 832 | ) 833 | parsed_content = self.parse_fragment( 834 | self.position(pos.line, newstart_pos), 835 | self.position(newstop_line, 0), 836 | tag_contents_indent, 837 | ) 838 | self.append_chunk_and_clear( 839 | tags, chunk, stripeol=True 840 | ) 841 | tags.append(QqTag(tag, children=parsed_content)) 842 | pos = self.position(newstop_line, 0) 843 | continue 844 | 845 | tag_position, tag, ttype, after = self.locate_tag(pos, stop) 846 | if tag is not None: 847 | chunk.append(pos.clipped_line(tag_position)) 848 | self.append_chunk_and_clear(tags, chunk, ignoreempty=True) 849 | if ttype == "block": 850 | next_bt_position = self.scan_after_attribute_tag( 851 | after, stop, merge_lines=merge_lines 852 | ) 853 | new_stop = self.find_first_nonspace_character_before( 854 | next_bt_position, after 855 | ).nextchar() 856 | parsed_content = self.parse_fragment( 857 | after, new_stop, current_indent 858 | ) 859 | tags.append(QqTag(tag, children=parsed_content)) 860 | pos = next_bt_position.copy() 861 | continue 862 | if ttype == "inline": 863 | items = self.inline_tag_contents(after, stop) 864 | parsed_items = [] 865 | 866 | for item in items: 867 | parsed_content = self.parse_fragment( 868 | item["start"], 869 | item["stop"], 870 | current_indent, 871 | merge_lines=True, 872 | ) 873 | if item["type"] == "{": 874 | parsed_items.extend(parsed_content) 875 | else: # item['type'] == '[' 876 | parsed_items.append( 877 | QqTag("_item", children=parsed_content) 878 | ) 879 | tags.append(QqTag(tag, children=parsed_items)) 880 | pos = items[-1]["stop"].nextchar() 881 | continue 882 | 883 | chunk.append(line) 884 | pos = pos.nextline() 885 | 886 | self.append_chunk_and_clear(tags, chunk, stripeol=True) 887 | return tags 888 | 889 | def find_first_nonspace_character_before( 890 | self, start: Position, stop: Position 891 | ): 892 | # FIXME: stop is not used: why? 893 | line = "".join( 894 | reversed(start.get_start_of_line().clipped_line(start)) 895 | ) 896 | m = re.match(r"\s*", line) 897 | 898 | return self.position(start.line, start.offset - m.end(0) - 1) 899 | 900 | def block_tag_stop_line_indent(self, start_line, stop_line): 901 | tag_indent = self._indents[start_line] 902 | if stop_line <= start_line + 1: 903 | # don't have more lines 904 | # e.g. 905 | # \tag rest of line 906 | # EOF 907 | # indent is of no importance, so set it to -1 908 | return start_line + 1, -1 909 | 910 | contents_indent = self._indents[start_line + 1] 911 | if contents_indent <= tag_indent: 912 | # tag is already closed 913 | # like 914 | # \tag rest of line 915 | # something 916 | return start_line + 1, -1 917 | 918 | last_tag_line, last_tag_indent = next( 919 | ( 920 | (i, indent) 921 | for i, indent in enumerate( 922 | islice(self._indents, start_line + 2, stop_line), 923 | start_line + 2, 924 | ) 925 | if indent < contents_indent 926 | ), 927 | (stop_line, tag_indent), 928 | ) 929 | 930 | if last_tag_indent > tag_indent: 931 | raise QqError( 932 | "Incorrect indent at line {}: ".format(last_tag_line) 933 | + self._lines[last_tag_line] 934 | ) 935 | return last_tag_line, contents_indent 936 | 937 | def locate_tag(self, start: Position, stop: Position): 938 | """ 939 | locates inline or block tag on line 940 | beginning with given position pos 941 | 942 | does not propogate on the following lines 943 | 944 | :param start: position to start with 945 | :param stop: position to stop 946 | :return: (tag_position: Position of first tag character (\\) 947 | tag: tag name, 948 | type: 'block' or 'inline', 949 | after: Position of first non-space character after tag 950 | (if it is block tag) or simply first character after 951 | tag (if it is inline tag) 952 | """ 953 | line = start.clipped_line(stop) 954 | 955 | for m in self.anytag_rc.finditer(line): 956 | tag = m.group(1) 957 | tag_position = self.position( 958 | start.line, start.offset + m.start(0) 959 | ) 960 | after = self.position( 961 | start.line, 962 | start.offset + first_nonspace_idx(line, m.end(1)), 963 | ) 964 | next_char = get(line, m.end(1)) 965 | if next_char not in ["{", "["]: 966 | if self.is_allowed_tag(tag): 967 | return tag_position, tag, "block", after 968 | else: 969 | if self.is_allowed_tag(tag, inline=True): 970 | return tag_position, tag, "inline", after 971 | return min(start.get_end_of_line(), stop), None, None, None 972 | 973 | def inline_tag_contents(self, start: Position, stop: Position): 974 | """ 975 | Finds the contents of inline tag: 976 | 977 | :param start: 978 | :param stop: 979 | :return: a list of dicts {'type': '[' or '{', 980 | 'start': Position, 981 | 'stop': Position} 982 | """ 983 | items = [] 984 | pos = start 985 | while pos < stop and pos.getchar in ["[", "{"]: 986 | type_ = pos.getchar 987 | end = self.match_bracket(pos, stop) 988 | items.append( 989 | {"type": type_, "start": pos.nextchar(), "stop": end} 990 | ) 991 | pos = end.nextchar() 992 | return items 993 | 994 | def match_bracket(self, start: Position, stop: Position) -> Position: 995 | """ 996 | Finds the matching closing bracket 997 | :param start: start position, its value should be '[' or '{' 998 | :param stop: stop position 999 | :return: position of matching closing bracket 1000 | """ 1001 | open_bracket = self._lines[start.line][start.offset] 1002 | assert open_bracket in ["[", "{"] 1003 | pos = start.copy() 1004 | counter = 0 1005 | # open bracket counter 1006 | closing_bracket = {"[": "]", "{": "}"}[open_bracket] 1007 | bracket_rc = re.compile( 1008 | re.escape(open_bracket) + "|" + re.escape(closing_bracket) 1009 | ) 1010 | 1011 | while pos < stop: 1012 | line = pos.clipped_line(stop) 1013 | for m in bracket_rc.finditer(line): 1014 | if ( 1015 | self.position(pos.line, pos.offset + m.start(0)) 1016 | >= stop 1017 | ): 1018 | raise QqError( 1019 | "No closing bracket found: " 1020 | "start: {}, stop: {}".format(start, stop) 1021 | ) 1022 | if m.group(0) == open_bracket: 1023 | counter += 1 1024 | else: 1025 | counter -= 1 1026 | if counter == 0: 1027 | return self.position( 1028 | pos.line, pos.offset + m.start(0) 1029 | ) 1030 | pos = pos.nextline() 1031 | raise QqError( 1032 | "No closing bracket found: " 1033 | "start: {}, stop: {}".format(start, stop) 1034 | ) 1035 | 1036 | def scan_after_attribute_tag( 1037 | self, start: Position, stop: Position, merge_lines=False 1038 | ): 1039 | """ 1040 | scans the rest of line / fragment after block tag found inline 1041 | looking for another block tag 1042 | skipping every inline tag with its contents 1043 | 1044 | :param start: first character to scan 1045 | :param stop: where to stop 1046 | :param merge_lines: look for several lines 1047 | :return: (Position of the first character of next block tag or EOL, 1048 | Position of the first non-space character after block tag 1049 | or None if EOL found) 1050 | """ 1051 | if not merge_lines: 1052 | stop = min(stop, start.nextline()) 1053 | # looking only for current line 1054 | 1055 | pos = start.copy() 1056 | ret = start.copy() 1057 | 1058 | while pos < stop: 1059 | tag_position, tag, type_, after = self.locate_tag(pos, stop) 1060 | if tag is None: 1061 | pos = pos.nextline() 1062 | ret = tag_position 1063 | continue 1064 | if type_ == "block": 1065 | return tag_position 1066 | else: 1067 | contents = self.inline_tag_contents(after, stop) 1068 | pos = contents[-1]["stop"].nextchar() 1069 | ret = min(pos.get_end_of_line(), stop) 1070 | 1071 | return ret 1072 | 1073 | def parse_file(self, filename): 1074 | with open(filename) as f: 1075 | lines = f.readlines() 1076 | return self.parse(lines) 1077 | --------------------------------------------------------------------------------