├── indentml
├── __init__.py
├── formatter.py
├── indexedlist.py
└── parser.py
├── setup.cfg
├── LICENSE
├── test
├── test_formatter.py
├── test_indexedlist.py
└── test_parser.py
├── .gitignore
├── setup.py
└── README.md
/indentml/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | # This flag says that the code is written to work on both Python 2 and Python
3 | # 3. If at all possible, it is good practice to do this. If you cannot, you
4 | # will need to generate wheels for each Python version that you support.
5 | universal=0
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016-2017 Ilya V. Schurov and contributors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/test/test_formatter.py:
--------------------------------------------------------------------------------
1 | # (c) Ilya V. Schurov, 2016
2 | # Available under MIT license (see LICENSE file in the root folder)
3 |
4 | import unittest
5 | from textwrap import dedent
6 |
7 | from indentml.formatter import DummyXMLFormatter, parse_and_format
8 |
9 |
10 | class TestDummyXMLFormatter(unittest.TestCase):
11 | def test_dummy_xml_formatter1(self):
12 | doc = dedent(r"""
13 | \tag
14 | Hello
15 | \othertag
16 | I'm indentml
17 | How are you?
18 | I'm fine""")
19 |
20 | obtained = parse_and_format(doc, DummyXMLFormatter,
21 | allowed_tags={'tag', 'othertag'})
22 | expected = dedent("HelloI'm indentml"
23 | "How are you?I'm fine")
24 |
25 | self.assertEqual(obtained, expected)
26 |
27 | def test_dummy_xml_formatter2(self):
28 | doc = dedent(r"""
29 | \image \src http://example.com \width 100%
30 | Some image""")
31 | obtained = parse_and_format(doc, DummyXMLFormatter,
32 | allowed_tags={'image', 'src', 'width'})
33 | expected = dedent("http://example.com"
34 | "100%Some image")
35 | self.assertEqual(obtained, expected)
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 | #Ipython Notebook
103 | .ipynb_checkpoints
104 |
105 | # IDEA
106 | .idea/
107 |
--------------------------------------------------------------------------------
/indentml/formatter.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import re
3 |
4 | from indentml.parser import QqTag, QqParser
5 |
6 |
7 | class QqFormatter(object):
8 | """
9 | This is basic formatter class. Custom formatters can inherit from it.
10 | """
11 |
12 | def __init__(self, root: QqTag=None, allowed_tags=None):
13 | self.root = root
14 | self.allowed_tags = allowed_tags or set()
15 |
16 | def uses_tags(self):
17 | members = inspect.getmembers(self, predicate=inspect.ismethod)
18 | handles = [member for member in members
19 | if member[0].startswith("handle_") or
20 | member[0] == 'preprocess']
21 | alltags = set([])
22 | for handle in handles:
23 | if handle[0].startswith("handle_"):
24 | alltags.add(handle[0][len("handle_"):])
25 | doc = handle[1].__doc__
26 | if not doc:
27 | continue
28 | for line in doc.splitlines():
29 | m = re.search(r"Uses tags:(.+)", line)
30 | if m:
31 | tags = m.group(1).split(",")
32 | tags = [tag.strip() for tag in tags]
33 | alltags.update(tags)
34 | return alltags
35 |
36 | def format(self, content) -> str:
37 | """
38 | :param content: could be QqTag or any iterable of QqTags
39 | :return: str: text of tag
40 | """
41 | if content is None:
42 | return ""
43 |
44 | out = []
45 |
46 | for child in content:
47 | if isinstance(child, str):
48 | out.append(child)
49 | else:
50 | out.append(self.handle(child))
51 | return "".join(out)
52 |
53 | def handle(self, tag):
54 | name = tag.name
55 | tag_handler = 'handle_'+name
56 | if hasattr(self, tag_handler):
57 | return getattr(self, tag_handler)(tag)
58 | elif hasattr(self, 'handle__fallback'):
59 | return self.handle__fallback(tag)
60 | else:
61 | return ""
62 |
63 | def do_format(self):
64 | return self.format(self.root)
65 |
66 |
67 | class DummyXMLFormatter(QqFormatter):
68 | def handle__fallback(self, tag):
69 | return "<{name}>{content}{name}>".format(
70 | name=tag.name, content=self.format(tag)
71 | )
72 |
73 |
74 | def parse_and_format(doc: str,
75 | formatter_factory,
76 | allowed_tags=None) -> str:
77 | formatter = formatter_factory()
78 | if allowed_tags is None:
79 | allowed_tags = formatter.uses_tags()
80 |
81 | parser = QqParser(allowed_tags=allowed_tags)
82 |
83 | tree = parser.parse(doc)
84 | formatter.root = tree
85 |
86 | return formatter.do_format()
87 |
--------------------------------------------------------------------------------
/test/test_indexedlist.py:
--------------------------------------------------------------------------------
1 | # (c) Ilya V. Schurov, 2016
2 | # Available under MIT license (see LICENSE file in the root folder)
3 |
4 | import unittest
5 | from sortedcontainers import SortedList
6 |
7 | from indentml.indexedlist import IndexedList
8 |
9 |
10 | class TestIndexedlistMethods(unittest.TestCase):
11 | def test_creating_indexedlist1(self):
12 | q = IndexedList([['a', 'b'], ['a', 'd']])
13 | self.assertEqual(list(q._directory['a']), [0, 1])
14 | self.assertTrue(q.is_consistent())
15 |
16 | def test_creating_indexedlist2(self):
17 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'],
18 | ['b', 123], ['a'], {'b': 321})
19 | self.assertEqual(
20 | repr(q),
21 | "IndexedList([['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'], ['b', 123], ['a'], {'b': 321}])"
22 | )
23 | self.assertEqual(eval(repr(q)), q)
24 | self.assertEqual(
25 | q._directory, {
26 | 'b': SortedList([5, 7]),
27 | 'a': SortedList([0, 1, 4, 6]),
28 | str: SortedList([2, 3])
29 | })
30 | self.assertTrue(q.is_consistent())
31 |
32 | def test_delitem(self):
33 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'],
34 | ['b', 123], ['a'], {'b': 321})
35 |
36 | del q[0]
37 | self.assertEqual(
38 | q._directory, {
39 | 'b': SortedList([4, 6]),
40 | 'a': SortedList([0, 3, 5]),
41 | str: SortedList([1, 2])
42 | })
43 | self.assertTrue(q.is_consistent())
44 |
45 | del q[2]
46 | self.assertEqual(
47 | q._directory, {
48 | 'b': SortedList([3, 5]),
49 | 'a': SortedList([0, 2, 4]),
50 | str: SortedList([1])
51 | })
52 | self.assertTrue(q.is_consistent())
53 |
54 | del q[3]
55 | self.assertEqual(q._directory, {
56 | 'b': SortedList([4]),
57 | 'a': SortedList([0, 2, 3]),
58 | str: SortedList([1])
59 | })
60 |
61 | self.assertTrue(q.is_consistent())
62 |
63 | del q[0]
64 | self.assertTrue(q.is_consistent())
65 |
66 | del q[3]
67 | self.assertTrue(q.is_consistent())
68 |
69 | del q[0]
70 | self.assertTrue(q.is_consistent())
71 |
72 | del q[1]
73 | self.assertTrue(q.is_consistent())
74 |
75 | del q[0]
76 | self.assertTrue(q.is_consistent())
77 | self.assertEqual(q, [])
78 |
79 | def test_setitem(self):
80 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'],
81 | ['b', 123], ['a'], {'b': 321})
82 | q[0] = 2
83 | self.assertEqual(
84 | q._directory, {
85 | 'b': SortedList([5, 7]),
86 | 'a': SortedList([1, 4, 6]),
87 | str: SortedList([0, 2, 3])
88 | })
89 |
90 | self.assertTrue(q.is_consistent())
91 |
92 | q[2] = ['b', 'c', 'd']
93 | self.assertEqual(
94 | q._directory, {
95 | 'b': SortedList([2, 5, 7]),
96 | 'a': SortedList([1, 4, 6]),
97 | str: SortedList([0, 3])
98 | })
99 | self.assertTrue(q.is_consistent())
100 |
101 | q[1] = ['cd', 'efg', 12]
102 | self.assertEqual(
103 | q._directory, {
104 | 'a': SortedList([4, 6]),
105 | 'b': SortedList([2, 5, 7]),
106 | 'cd': SortedList([1]),
107 | str: SortedList([0, 3])
108 | })
109 | self.assertTrue(q.is_consistent())
110 |
111 | def test_insert(self):
112 | q = IndexedList(['a', 'b'], {'a': 123}, 'a', 123, ['a', 'b', 'c'],
113 | ['b', 123], ['a'], {'b': 321})
114 | q.insert(2, 'b')
115 | self.assertEqual(
116 | q._directory, {
117 | 'a': SortedList([0, 1, 5, 7]),
118 | 'b': SortedList([6, 8]),
119 | str: SortedList([2, 3, 4])
120 | })
121 | self.assertTrue(q.is_consistent())
122 |
123 | q.insert(0, ['b', 123])
124 | self.assertEqual(
125 | q._directory, {
126 | 'a': SortedList([1, 2, 6, 8]),
127 | 'b': SortedList([0, 7, 9]),
128 | str: SortedList([3, 4, 5])
129 | })
130 | self.assertTrue(q.is_consistent())
131 |
132 |
133 | if __name__ == '__main__':
134 | unittest.main()
135 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """A setuptools based setup module.
2 |
3 | See:
4 | https://packaging.python.org/en/latest/distributing.html
5 | https://github.com/pypa/sampleproject
6 | """
7 |
8 | # Always prefer setuptools over distutils
9 | from setuptools import setup, find_packages
10 | # To use a consistent encoding
11 | from codecs import open
12 | from os import path
13 |
14 | here = path.abspath(path.dirname(__file__))
15 |
16 | # Get the long description from the README file
17 | # with open(path.join(here, 'README.md'), encoding='utf-8') as f:
18 | # long_description = f.read()
19 | long_description = r"""**indentml** is a simple general-purpose indent-based
20 | language suitable to describe tree-like structures.
21 | """
22 | setup(
23 | name='indentml',
24 |
25 | # Versions should comply with PEP440. For a discussion on single-sourcing
26 | # the version across setup.py and the project code, see
27 | # https://packaging.python.org/en/latest/single_source_version.html
28 | version='0.2.3.post1',
29 |
30 | description=('indentml is a simple general-purpose indent-based language'
31 | ' suitable to describe tree-like structures'),
32 | long_description=long_description,
33 |
34 | # The project's main homepage.
35 | url='https://github.com/ischurov/indentml',
36 |
37 | # Author details
38 | author='Ilya V. Schurov',
39 | author_email='ilya@schurov.com',
40 |
41 | # Choose your license
42 | license='MIT',
43 |
44 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
45 | classifiers=[
46 | # How mature is this project? Common values are
47 | # 3 - Alpha
48 | # 4 - Beta
49 | # 5 - Production/Stable
50 | 'Development Status :: 3 - Alpha',
51 |
52 | # Indicate who your project is intended for
53 | 'Intended Audience :: Developers',
54 | 'Topic :: Software Development :: Build Tools',
55 | 'Topic :: Software Development :: Documentation',
56 |
57 | # Pick your license as you wish (should match "license" above)
58 | 'License :: OSI Approved :: MIT License',
59 |
60 | # Specify the Python versions you support here. In particular, ensure
61 | # that you indicate whether you support Python 2, Python 3 or both.
62 | 'Programming Language :: Python :: 3',
63 | 'Programming Language :: Python :: 3.6',
64 | 'Programming Language :: Python :: 3.7',
65 | 'Programming Language :: Python :: 3.8',
66 | 'Programming Language :: Python :: 3.9',
67 | 'Programming Language :: Python :: 3.10',
68 | ],
69 |
70 | # What does your project relate to?
71 | keywords='indent-based, markup',
72 |
73 | # You can just specify the packages manually here if your project is
74 | # simple. Or you can use find_packages().
75 | packages=find_packages(exclude=['test']),
76 |
77 | # Alternatively, if you want to distribute just a my_module.py, uncomment
78 | # this:
79 | # py_modules=["my_module"],
80 |
81 | # List run-time dependencies here. These will be installed by pip when
82 | # your project is installed. For an analysis of "install_requires" vs pip's
83 | # requirements files see:
84 | # https://packaging.python.org/en/latest/requirements.html
85 | install_requires=['sortedcontainers'],
86 |
87 | # List additional groups of dependencies here (e.g. development
88 | # dependencies). You can install these using the following syntax,
89 | # for example:
90 | # $ pip install -e .[dev,test]
91 | # extras_require={
92 | # 'html': ['yattag', 'mako', 'fuzzywuzzy', 'matplotlib', 'flask',
93 | # 'beautifulsoup4'],
94 | # },
95 |
96 | # If there are data files included in your packages that need to be
97 | # installed, specify them here. If using Python 2.6 or less, then these
98 | # have to be included in MANIFEST.in as well.
99 | # package_data={
100 | # 'sample': ['package_data.dat'],
101 | # },
102 |
103 | # Although 'package_data' is the preferred approach, in some case you may
104 | # need to place data files outside of your packages. See:
105 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
106 | # In this case, 'data_file' will be installed into '/my_data'
107 | # data_files=[('my_data', ['data/data_file'])],
108 |
109 | # To provide executable scripts, use entry points in preference to the
110 | # "scripts" keyword. Entry points provide cross-platform support and allow
111 | # pip to create the appropriate form of executable for the target platform.
112 | # entry_points={
113 | # 'console_scripts': [
114 | # 'qqmathbook=qqmbr.qqmathbook:main',
115 | # ],
116 | # },
117 | )
118 |
--------------------------------------------------------------------------------
/indentml/indexedlist.py:
--------------------------------------------------------------------------------
1 | # (c) Ilya V. Schurov, 2016
2 | # Available under MIT license (see LICENSE file in the root folder)
3 |
4 | from collections.abc import Mapping
5 | from collections import defaultdict
6 | from sortedcontainers import SortedList
7 | from typing import (TypeVar, Sequence, MutableSequence,
8 | List, Union, overload)
9 |
10 | T = TypeVar("T")
11 |
12 | class IndexedList(MutableSequence[T]):
13 | """
14 | IndexedList is a mixture of list and dictionary.
15 | Every element in IndexedList has a key and one can perform fast search by key.
16 |
17 | The key is calculated in the following way depending on the element's type:
18 |
19 | - ``str``: key is ``str`` (this is a special case)
20 | - ``list``: key is a first element of the list or ``None`` if the list is empty
21 | - ``dictionary``: if it has only one record, its key is a key, otherwise ``Sequence.Mapping`` is a key
22 | - any other object: we'll look for .qqkey() method, and fallback to ``str`` if fail
23 |
24 | The main purpose of this class is to provide effective BeautifulSoup-style navigation over the s-expression-like
25 | data structures
26 | """
27 |
28 | def __init__(self, *iterable) -> None:
29 | if len(iterable) == 1 and isinstance(iterable[0], Sequence):
30 | iterable = iterable[0]
31 | self._container: List[T] = list(iterable)
32 | self._directory = defaultdict(SortedList)
33 | self.update_directory()
34 |
35 | def __delitem__(self, i):
36 | old_element = self._container[i]
37 | self._directory[self.get_key(old_element)].remove(i)
38 | for key, places in self._directory.items():
39 | for k, index in enumerate(places):
40 | if index >= i:
41 | value = places[k]
42 | del places[k]
43 | places.add(value - 1)
44 |
45 | del self._container[i]
46 |
47 | @overload
48 | def __getitem__(self, idx: int) -> T: ...
49 |
50 | @overload
51 | def __getitem__(self, s: slice) -> Sequence[T]: ...
52 |
53 | def __getitem__(self, i):
54 | return self._container[i]
55 |
56 | def __len__(self):
57 | return len(self._container)
58 |
59 | def __setitem__(self, i, item):
60 | places = self._directory[self.get_key(self._container[i])]
61 | places.remove(i)
62 |
63 | self._container[i] = item
64 |
65 | self.add_index(i, item)
66 |
67 | def insert(self, i, x):
68 | for key, places in self._directory.items():
69 | for k in range(len(places)-1, -1, -1):
70 | if places[k] >= i:
71 | value = places[k]
72 | del places[k]
73 | places.add(value + 1)
74 | else:
75 | break
76 | self._container.insert(i, x)
77 | self.add_index(i, x)
78 |
79 | def __str__(self):
80 | return str(self._container)
81 |
82 | def __repr__(self):
83 | return "IndexedList(%s)" % repr(self._container)
84 |
85 | def find_index(self, key):
86 | return self._directory[key][0]
87 |
88 | def find_all_indexes(self, key):
89 | return self._directory.get(key, [])
90 |
91 | def find_all(self, key):
92 | return [self._container[i] for i in self.find_all_indexes(key)]
93 |
94 | def find(self, key):
95 | return self._container[self.find_index(key)]
96 |
97 | def update_directory(self):
98 | self._directory.clear()
99 | for i, item in enumerate(self._container):
100 | self.add_index(i, item)
101 |
102 | def add_index(self, i, item):
103 | key = self.get_key(item)
104 | self._directory[key].add(i)
105 |
106 | def is_consistent(self):
107 | for i, el in enumerate(self._container):
108 | if i not in self.find_all_indexes(self.get_key(el)):
109 | return False
110 | return True
111 |
112 | def __eq__(self, other):
113 | if isinstance(other, IndexedList):
114 | return self._container == other._container
115 | elif isinstance(other, Sequence):
116 | return self._container == other
117 | else:
118 | return False
119 |
120 | def clear(self):
121 | self._container.clear()
122 | self._directory.clear()
123 |
124 | @staticmethod
125 | def get_key(item):
126 | if isinstance(item, str):
127 | return str
128 | try:
129 | return item.qqkey()
130 | except AttributeError:
131 | if isinstance(item, Sequence):
132 | if item:
133 | return item[0]
134 | else:
135 | return None
136 | elif isinstance(item, Mapping):
137 | if len(item) == 1:
138 | return list(item)[0]
139 | else:
140 | return Mapping
141 | else:
142 | return str
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # indentml
2 | ## General-purpose indent-based markup language
3 |
4 | **indentml** (previously known as *MLQQ*) is a simple general-purpose indent-based markup language designed to represent tree-like structures in human-readable way. It is similar to *YAML* but simpler.
5 |
6 | ### Install
7 |
8 | pip install indentml
9 |
10 | Currently only Python 3 is supported.
11 |
12 | ### Code sample
13 |
14 | \topic
15 | \id dicts-objects
16 | \heading \lang en
17 | Dicts / objects
18 | \description \lang en
19 | The standard structure to store elements accessible by arbitrary keys
20 | (mapping) in Python is called a dict (dictionary) and in JavaScript
21 | — object.
22 | \compare
23 | \id dict-object-creation
24 | \what \lang en
25 | Creation of dictionary / object
26 | \python
27 | my_dict = {'a': 10, 'b': 20}
28 | my_dict['a']
29 | my_dict.a # error
30 | \js
31 | var my_obj = {a: 10, b: 20};
32 | my_obj['a']
33 | my_obj.a
34 | \comment \lang en
35 | You can access elements of an object either with square brackets
36 | or with dot-notation.
37 |
38 | \figure
39 | \source http://example.com/somefig.png
40 | \caption Some figure
41 | \width 500px
42 |
43 | \question
44 | Do you like qqmbr?
45 | \quiz
46 | \choice
47 | No.
48 | \comment You didn't even try!
49 | \choice \correct
50 | Yes, i like it very much!
51 | \comment And so do I!
52 |
53 |
54 | ### Syntax
55 | #### Special characters
56 | The following characters have special meaning in **indetml**:
57 |
58 | 1. **Tag beginning character.** This character is used to mark the beginning of any tag. By default, it is a backslash `\`
59 | (like in LaTeX), but can be configured to any other character. If you need to enter this character literally, you have
60 | to escape it with the same character (like `\\`). You can also escape other special characters listed below with *tag beginning character*.
61 | 2. Opening and closing brackets: `{`, `}`, and `[`, `]`, used to indicate the content that belongs to *inline tags*, see below.
62 | 3. Tabs are forbidden at the beginning of the line in **indentml** (just like in YAML).
63 |
64 | #### Block tags
65 | Block tags are typed at the beginning of the line, after several spaces that mark *indent* of a tag.
66 | Block tag starts with *tag beginning character* and ends with the whitespace or newline character. All the lines below the block tag
67 | which indent is greater than tag's indent are appended to the tag. When indent decreases, tag is closed. E.g.
68 |
69 | \tag
70 | Hello
71 | \othertag
72 | I'm indentml
73 | How are you?
74 | I'm fine
75 |
76 | will be translated into the following XML tree:
77 |
78 | Hello
79 | I'm indentml
80 | How are you?
81 | I'm fine
82 |
83 | The rest of a line where block tag begins will be attached to that tag either.
84 |
85 | #### Inline tags
86 | Inline tags are started with *tag beginning character* and ended by bracket: `{` or `[`. Type of bracket affects the
87 | processing. Tag contents is everything between its opening bracket and corresponding closing bracket.
88 | It can spread over several lines.
89 |
90 | Brackets (of the same kind) inside the tag should be either balanced or escaped.
91 |
92 | For example,
93 |
94 | This is \tag{with some {brackets} inside}
95 |
96 | is valid markup: the contents of tag `tag` will be `with some {brackets} inside`.
97 |
98 | #### Allowed tags
99 | Only those tags are processed that are explicitly *allowed*. There are two sets defined: allowed block tags and allowed inline tags.
100 | The sequences that look like tags but are not in the appropriate set are considered as simple text.
101 |
102 | #### Indents and whitespaces
103 | Indent of the first line after the block tag is a *base indent* of this tag. All lines that belong to tag will be stripped
104 | from the left by the number of leading whitespaces that corresponds to the base indent. The rest of whitespaces will be preserved.
105 |
106 | For example:
107 |
108 | \pythoncode
109 | for i in range(1, 10):
110 | print(i)
111 |
112 | Here the contents of `pythoncode` tag is
113 |
114 | for i in range(1, 10):
115 | print(i)
116 |
117 | Note four whitespaces before `print`.
118 |
119 | If a line has an indent that is less than *base indent*, it MUST be equal to the indent of one of open block tags. Than
120 | all the tags up to that one (including that one) will be closed.
121 |
122 | For example, the following is forbidden:
123 |
124 | \code
125 | some string with indent 4
126 | some string with indent 2
127 |
128 | It is possible to use any indent values but multiples of 4 are recommended (like [PEP-8](https://www.python.org/dev/peps/pep-0008/)).
129 |
--------------------------------------------------------------------------------
/test/test_parser.py:
--------------------------------------------------------------------------------
1 | # (c) Ilya V. Schurov, 2016
2 | # Available under MIT license (see LICENSE file in the root folder)
3 |
4 | import unittest
5 | from textwrap import dedent
6 |
7 | from indentml.parser import QqTag, QqParser
8 |
9 |
10 | class TestQqTagMethods(unittest.TestCase):
11 | def test_create_qqtag(self):
12 | q = QqTag({'a': 'b'})
13 | self.assertEqual(q.name, 'a')
14 | self.assertEqual(q.value, 'b')
15 |
16 | q = QqTag('a', [
17 | QqTag('b', 'hello'),
18 | QqTag('c', 'world'),
19 | QqTag('b', 'this'),
20 | QqTag('--+-', [
21 | QqTag('b', 'way'),
22 | "this"
23 | ])])
24 |
25 | self.assertEqual(q.name, 'a')
26 | # self.assertEqual(q._children, IndexedList([QqTag('b', ['hello']), QqTag('c', ['world']), QqTag('b', ['this']),
27 | # QqTag('--+-', [QqTag('b', ['way']), 'this'])]))
28 | # self.assertEqual(eval(repr(q)), q)
29 | self.assertEqual(q.as_list(),
30 | ['a', ['b', 'hello'], ['c', 'world'],
31 | ['b', 'this'], ['--+-', ['b', 'way'], 'this']])
32 |
33 | def test_qqtag_accessors(self):
34 | q = QqTag('a', [
35 | QqTag('b', 'hello'),
36 | QqTag('c', 'world'),
37 | QqTag('b', 'this'),
38 | QqTag('--+-', [
39 | QqTag('b', 'way'),
40 | "this"
41 | ])])
42 |
43 | self.assertEqual(q.b_.value, 'hello')
44 | self.assertEqual(q.c_.value, 'world')
45 | self.assertEqual([b.as_list() for b in q('b')], [['b', 'hello'],
46 | ['b', 'this']])
47 | self.assertEqual(q.find('--+-').b_.value, 'way')
48 | self.assertEqual(q[0].value, 'hello')
49 | self.assertEqual(q[1].value, 'world')
50 | self.assertEqual(q[3][0].value, 'way')
51 |
52 | def test_qqtag_backlinks(self):
53 | q = QqTag('a', [
54 | QqTag('b', 'hello'),
55 | QqTag('c', 'world'),
56 | QqTag('b', 'this'),
57 | QqTag('--+-', [
58 | QqTag('b', 'way'),
59 | "this"
60 | ])])
61 | self.assertTrue(q._is_consistent())
62 | new_tag = QqTag({'qqq': 'bbb'})
63 | q.append_child(new_tag)
64 | self.assertEqual(new_tag.idx, 4)
65 | del q[0]
66 | self.assertEqual(new_tag.idx, 3)
67 | self.assertTrue(q._is_consistent())
68 |
69 | other_tag = QqTag({'other': ['some', 'values']})
70 | q.insert(2, other_tag)
71 | self.assertEqual(other_tag.idx, 2)
72 | self.assertEqual(new_tag.idx, 4)
73 |
74 | third_tag = QqTag({'this': 'hi'})
75 | q[3] = third_tag
76 | self.assertEqual(third_tag.idx, 3)
77 | self.assertTrue(q._is_consistent())
78 |
79 | def test_qqtag_prev_next(self):
80 | q = QqTag('a', [
81 | QqTag('b', 'hello'),
82 | QqTag('c', 'world'),
83 | QqTag('b', 'this'),
84 | QqTag('--+-', [
85 | QqTag('b', 'way'),
86 | "this"
87 | ])])
88 |
89 | self.assertEqual(q.c_.prev().value, 'hello')
90 | self.assertEqual(q.b_.next().value, 'world')
91 | self.assertEqual(q.c_.next().value, 'this')
92 |
93 | def test_qqtag_insert(self):
94 | z = QqTag("z")
95 | w = QqTag("w")
96 | q = QqTag('a', [
97 | "Hello",
98 | "World",
99 | z,
100 | "This"])
101 | self.assertEqual(q[2], z)
102 | q.insert(0, w)
103 | self.assertEqual(q[0], w)
104 | self.assertEqual(q[3], z)
105 | self.assertEqual(q[3].idx, 3)
106 | self.assertTrue(q._is_consistent())
107 |
108 | def test_qqtag_del(self):
109 | z = QqTag("z")
110 | w = QqTag("w")
111 | q = QqTag('a', [
112 | "Hello",
113 | "World",
114 | z,
115 | "This",
116 | w
117 | ])
118 | del q[2]
119 | self.assertTrue(q._is_consistent())
120 | self.assertEqual(q[3], w)
121 | self.assertEqual(q[3].idx, 3)
122 | self.assertEqual(q.as_list(),
123 | ["a", "Hello", "World", "This", ["w"]])
124 |
125 |
126 | class TestQqParser(unittest.TestCase):
127 | def test_block_tags1(self):
128 | doc = r"""Hello
129 | \tag
130 | World
131 | """
132 | parser = QqParser(allowed_tags={'tag'})
133 | tree = parser.parse(doc)
134 | print(tree.as_list())
135 | self.assertEqual(tree[0], "Hello")
136 |
137 | self.assertEqual(tree.tag_.name, 'tag')
138 | self.assertEqual(tree.tag_.value, 'World')
139 |
140 | def test_block_tags_nested(self):
141 | doc = r"""Hello
142 | \tag
143 | World
144 | \othertag
145 | This
146 | Is
147 | A test
148 | The end
149 |
150 | Blank line before the end
151 | """
152 | parser = QqParser(allowed_tags={'tag', 'othertag'})
153 | tree = parser.parse(doc)
154 | print(tree.as_list())
155 | self.assertEqual(tree[0], "Hello")
156 | self.assertEqual(tree.tag_[0], "World")
157 | self.assertEqual(tree.tag_.othertag_._children, ["This\nIs"])
158 | self.assertEqual(tree.tag_[2], 'A test')
159 | self.assertEqual(tree[2], 'The end\n\nBlank line before the end')
160 | self.assertEqual(tree.tag_.parent, tree)
161 | self.assertEqual(tree.tag_.othertag_.parent, tree.tag_)
162 |
163 | def test_block_additional_indent(self):
164 | doc = r"""Hello
165 | \tag
166 | First
167 | Second
168 | Third
169 | End"""
170 | parser = QqParser(allowed_tags={'tag'})
171 | tree = parser.parse(doc)
172 | self.assertEqual(tree.tag_._children,
173 | ['First\n Second\nThird'])
174 |
175 | def test_match_bracket(self):
176 | doc = dedent("""\
177 | hello { world {
178 | some test } {
179 | okay { }
180 | this is a test }} test
181 | """)
182 | parser = QqParser()
183 | parser.parse_init(doc)
184 | start = parser.position(0, 6)
185 | stop = parser.position(None, 0)
186 | out = parser.match_bracket(start, stop)
187 | self.assertEqual(out.clipped_line(stop),
188 | "} test\n")
189 |
190 | def test_inline_tag_contents(self):
191 | doc = dedent("""\
192 | haha \\tag{this}{
193 | that}[another]{this
194 | }[okay test] stop
195 | """)
196 | parser = QqParser(allowed_tags={"tag"})
197 | parser.parse_init(doc)
198 | start = parser.position(0, 0)
199 | stop = parser.position(None, 0)
200 | tag_position, tag, type, after = parser.locate_tag(start, stop)
201 | self.assertEqual(start.clipped_line(tag_position), "haha ")
202 | self.assertEqual(tag, "tag")
203 | self.assertEqual(type, "inline")
204 |
205 | items = parser.inline_tag_contents(after, stop)
206 | contents = ["".join(item['start'].lines_before(item['stop']))
207 | for item in items]
208 | self.assertEqual(contents,
209 | ["this", "\n that",
210 | "another", "this\n ", "okay test"])
211 | self.assertEqual([item['type'] for item in items],
212 | ['{', '{', '[', '{', '['])
213 |
214 | def test_scan_after_attribute_tag(self):
215 | doc = dedent("""\
216 | test \\tag this \\tag{inline \\tag{} \\tag}q \\tag
217 | other tag
218 | """)
219 | parser = QqParser(allowed_tags={"tag"})
220 | parser.parse_init(doc)
221 | start = parser.position(0, 0)
222 | stop = parser.position(None, 0)
223 | tag_position, tag, type, after = parser.locate_tag(start, stop)
224 |
225 | start = after.copy()
226 | before = parser.scan_after_attribute_tag(start, stop)
227 | self.assertEqual(start.clipped_line(before),
228 | 'this \\tag{inline \\tag{} \\tag}q ')
229 |
230 | def test_scan_after_attribute_tag2(self):
231 | doc = dedent("""\
232 | test \\tag this \\tag{inline \\tag{} \\tag}\\tag
233 | other tag
234 | """)
235 | parser = QqParser(allowed_tags={"tag"})
236 | parser.parse_init(doc)
237 | start = parser.position(0, 0)
238 | stop = parser.position(None, 0)
239 | tag_positoin, tag, type, after = parser.locate_tag(start, stop)
240 |
241 | start = after.copy()
242 | before = parser.scan_after_attribute_tag(start, stop)
243 | self.assertEqual(start.clipped_line(before),
244 | 'this \\tag{inline \\tag{} \\tag}')
245 |
246 | def test_inline_tag1(self):
247 | doc = r"""Hello, \tag{inline} tag!
248 | """
249 | parser = QqParser(allowed_tags={'tag'})
250 | tree = parser.parse(doc)
251 | self.assertEqual(tree[0], 'Hello, ')
252 | self.assertEqual(tree.tag_.value, 'inline')
253 | self.assertEqual(tree[2], ' tag!')
254 |
255 | def test_inline_tag2(self):
256 | doc = r"""Hello, \othertag{\tag{inline} tag}!
257 | """
258 | parser = QqParser(allowed_tags={'tag', 'othertag'})
259 | tree = parser.parse(doc)
260 | # self.assertEqual(tree._othertag._tag.value, 'inline')
261 | self.assertEqual(tree.as_list(), ['_root', 'Hello, ',
262 | ['othertag', ['tag', 'inline'],
263 | ' tag'], '!'])
264 |
265 | def test_inline_tag3(self):
266 | doc = r"""Hello, \tag{
267 | this is a continuation of inline tag on the next line
268 |
269 | the next one\othertag{okay}}
270 | """
271 | parser = QqParser(allowed_tags={'tag', 'othertag'})
272 | tree = parser.parse(doc)
273 | self.assertEqual(tree.as_list(), [
274 | '_root', 'Hello, ',
275 | [
276 | 'tag',
277 | ('\nthis is a continuation of inline tag on the next line'
278 | '\n\nthe next one'),
279 | [
280 | 'othertag',
281 | 'okay'
282 | ]
283 | ], ''
284 | ])
285 |
286 | def test_inline_tag4(self):
287 | doc = r"Hello, \tag{I'm [your{taggy}] tag} okay"
288 | parser = QqParser(allowed_tags={'tag', 'othertag'})
289 | tree = parser.parse(doc)
290 | self.assertEqual(tree.as_list(), [
291 | '_root', 'Hello, ',
292 | [
293 | 'tag',
294 | "I'm [your{taggy}] tag"
295 | ],
296 | " okay"
297 | ])
298 |
299 | def test_block_and_inline_tags(self):
300 | doc = r"""Hello,
301 | \tag
302 | I'm your \othertag{tag}
303 | \tag
304 | {
305 | \tag
306 | {
307 | this \tag{is a {a test}
308 | okay}
309 | }
310 | }
311 | """
312 | parser = QqParser(allowed_tags={'tag', 'othertag'})
313 | tree = parser.parse(doc)
314 | self.assertEqual(tree.as_list(), [
315 | '_root', 'Hello,\n',
316 | [
317 | 'tag',
318 | "I'm your ",
319 | ['othertag', 'tag'],
320 | '\n',
321 | [
322 | 'tag',
323 | '{\n',
324 | [
325 | 'tag',
326 | '{\nthis ',
327 | [
328 | 'tag',
329 | 'is a {a test}\nokay',
330 | ],
331 | '\n'
332 | ],
333 | '}\n'
334 | ],
335 | '}\n'
336 | ]
337 | ])
338 |
339 | def test_sameline_tags(self):
340 | self.maxDiff = None
341 | doc = r""" Hello!
342 | \h1 Intro to qqmbr
343 |
344 | \h2 Fresh documentation system
345 |
346 | **qqmbr** is a documentation system intended to be extremely simple and extremely extensible.
347 | It was written to allow writing rich content that can be compiled into different formats.
348 | One source, multiple media: HTML, XML, LaTeX, PDF, eBooks, any other. Look below to see it in action.
349 |
350 | \h3 This is nice level-3 header
351 |
352 | Some paragraph text. See also \ref{sec:another} (reference to different header).
353 |
354 | There are LaTeX formulas here:
355 |
356 | \eq
357 | x^2 + y^2 = z^2
358 |
359 | `\\eq` is a qqtag. It is better than tag, because it is auto-closing (look at the indent, like Python).
360 |
361 | Here is formula with the label:
362 |
363 | \equation \label eq:Fermat
364 | x^n + y^n = z^n, \quad n>2
365 |
366 | Several formulas with labels:
367 |
368 | \gather
369 | \item \label eq:2x2
370 | 2\times 2 = 4
371 | \item \label eq:3x3
372 | 3\times 3 = 9
373 |
374 | We can reference formula \eqref{eq:Fermat} and \eqref{eq:2x2} just like we referenced header before.
375 |
376 | \h3 Another level-3 header \label sec:another
377 |
378 | Here is the header we referenced.
379 |
380 | \h3 More interesting content
381 |
382 | \figure
383 | \source http://example.com/somefig.png
384 | \caption Some figure
385 | \width 500px
386 |
387 | \question
388 | Do you like qqmbr?
389 | \quiz
390 | \choice \correct false
391 | No.
392 | \comment You didn't even try!
393 | \choice \correct true
394 | Yes, i like it very much!
395 | \comment And so do I!
396 | """
397 | parser = QqParser(
398 | allowed_tags={'h1', 'h2', 'h3', 'eq', 'equation', 'label',
399 | 'gather', 'inlne', 'item', 'ref', 'eqref',
400 | 'source', 'caption', 'width', 'question',
401 | 'quiz', 'choice',
402 | 'comment', 'correct', 'figure'})
403 | tree = parser.parse(doc)
404 | print(tree.as_list())
405 | self.assertEqual(tree.as_list(), ['_root',
406 | 'Hello!',
407 | ['h1', 'Intro to qqmbr'],
408 | '',
409 | ['h2',
410 | 'Fresh documentation system'],
411 | '\n**qqmbr** is a documentation system intended to be extremely simple and extremely extensible.\nIt was written to allow writing rich content that can be compiled into different formats.\nOne source, multiple media: HTML, XML, LaTeX, PDF, eBooks, any other. Look below to see it in action.\n',
412 | ['h3',
413 | 'This is nice level-3 header'],
414 | '\nSome paragraph text. See also ',
415 | ['ref', 'sec:another'],
416 | ' (reference to different header).\n\nThere are LaTeX formulas here:\n',
417 | ['eq',
418 | 'x^2 + y^2 = z^2'],
419 | '\n`\\eq` is a qqtag. It is better than tag, because it is auto-closing (look at the indent, like Python).\n\nHere is formula with the label:\n',
420 | ['equation',
421 | ['label', 'eq:Fermat'],
422 | 'x^n + y^n = z^n, \\quad n>2'],
423 | '\nSeveral formulas with labels:\n',
424 | ['gather',
425 | ['item',
426 | ['label', 'eq:2x2'],
427 | '2\\times 2 = 4'],
428 | ['item',
429 | ['label', 'eq:3x3'],
430 | '3\\times 3 = 9']],
431 | '\nWe can reference formula ',
432 | ['eqref', 'eq:Fermat'],
433 | ' and ',
434 | ['eqref', 'eq:2x2'],
435 | ' just like we referenced header before.\n',
436 | ['h3',
437 | 'Another level-3 header ',
438 | ['label',
439 | 'sec:another']],
440 | '\nHere is the header we referenced.\n',
441 | ['h3',
442 | 'More interesting content'],
443 | '',
444 | ['figure',
445 | ['source',
446 | 'http://example.com/somefig.png'],
447 | ['caption',
448 | 'Some figure'],
449 | ['width', '500px']],
450 | '',
451 | ['question',
452 | 'Do you like qqmbr?',
453 | ['quiz',
454 | ['choice',
455 | ['correct', 'false'],
456 | 'No.',
457 | ['comment',
458 | "You didn't even try!"]],
459 | ['choice',
460 | ['correct', 'true'],
461 | 'Yes, i like it very much!',
462 | ['comment',
463 | 'And so do I!']]]]])
464 |
465 | def test_inline_tag_at_the_beginning_of_the_line(self):
466 | doc = r"""\tag
467 | some content here here and here and we have some inline
468 | \tag{here and \othertag{there}}
469 | """
470 | parser = QqParser(allowed_tags={'tag', 'othertag'})
471 | tree = parser.parse(doc)
472 | self.assertEqual(tree.as_list(), ['_root', ['tag','some content '
473 | 'here here and '
474 | 'here and we '
475 | 'have some '
476 | 'inline\n',
477 | ['tag', 'here and ',['othertag',
478 | 'there']],
479 | '']])
480 |
481 | def test_alias2tag(self):
482 | doc = r"""\# Heading 1
483 | \## Heading 2
484 | Hello
485 | """
486 | parser = QqParser(allowed_tags={'h1', 'h2'},
487 | alias2tag={"#": 'h1', "##": 'h2'})
488 | tree = parser.parse(doc)
489 | self.assertEqual(tree.as_list(),
490 | ["_root", ["h1", "Heading 1"],
491 | ["h2", "Heading 2"], "Hello"])
492 |
493 | def test_non_allowed_tag_with_bracket(self):
494 | doc = r"""Hello \inlinetag{some \forbiddentag{here} okay} this"""
495 | parser = QqParser(allowed_tags={'inlinetag'})
496 | tree = parser.parse(doc)
497 | self.assertEqual(tree.as_list(), ["_root", "Hello ", ["inlinetag", "some \\forbiddentag{here} okay"], " this"])
498 |
499 | def test_escape_unescape(self):
500 | doc = r"""Hello
501 | \sometag test
502 | \\sometag test
503 | \sometag
504 | \ here we are
505 | we are here
506 | some \inline{tag with \{ curve bracket inside} okay
507 | some \inline[square bracket \[ inside] okay
508 | """
509 | parser = QqParser(allowed_tags={'sometag', 'inline'})
510 | tree = parser.parse(doc)
511 | self.assertEqual(tree.as_list(), [
512 | "_root", "Hello",
513 | ["sometag", "test"],
514 | "\\sometag test",
515 | ["sometag", " here we are\nwe are here"],
516 | "some ",
517 | ["inline", "tag with { curve bracket inside"],
518 | " okay\nsome ",
519 | ["inline",
520 | ["_item", "square bracket [ inside"]],
521 | " okay"
522 | ])
523 |
524 | def test_square_bracket_inline(self):
525 | doc = r"Some inline \tag[with][multiple][arguments]"
526 | parser = QqParser(allowed_tags={"tag"})
527 | tree = parser.parse(doc)
528 | self.assertEqual(tree.as_list(),
529 | [
530 | "_root", "Some inline ",
531 | ["tag",
532 | ["_item", "with"],
533 | ["_item", "multiple"],
534 | ["_item", "arguments"]
535 | ]
536 | ])
537 |
538 | def test_mixed_brackets_inline(self):
539 | doc = r"Some inline \tag[with]{multiple}[arguments]"
540 | parser = QqParser(allowed_tags={"tag"})
541 | tree = parser.parse(doc)
542 | self.assertEqual(tree.as_list(),
543 | [
544 | "_root", "Some inline ",
545 | ["tag",
546 | ["_item", "with"],
547 | "multiple",
548 | ["_item", "arguments"]
549 | ]
550 | ])
551 |
552 | def test_multiline_inline_with_attribute(self):
553 | doc = "\\tag{hello \\tag world \n this is \n a \\tag test}"
554 | parser = QqParser(allowed_tags={"tag"})
555 | tree = parser.parse(doc)
556 | self.assertEqual(tree.as_list(),
557 | ['_root',
558 | ['tag', 'hello ',
559 | ['tag', 'world \n this is \n a'],
560 | ['tag', 'test']]])
561 |
562 | def test_multiple_arguments2(self):
563 | doc = r"""\proof
564 | By \ref[existence
565 | and uniqueness theorem\nonumber][thm:4:eu] there exists
566 | """
567 | parser = QqParser(allowed_tags={"proof", "ref", "nonumber"})
568 | tree = parser.parse(doc)
569 | print(tree.as_list())
570 | self.assertEqual(tree.as_list(),
571 | ['_root', ['proof', 'By ',
572 | ['ref',
573 | ['_item',
574 | 'existence\nand uniqueness theorem',
575 | ['nonumber']],
576 | ['_item',
577 | 'thm:4:eu']],
578 | ' there exists']])
579 |
580 | def test_empty_square_bracket_tag(self):
581 | doc = r"""\blocktag
582 | Some \empty[
583 |
584 | ] tag
585 | """
586 | parser = QqParser(allowed_tags={'blocktag', 'empty'})
587 | tree = parser.parse(doc)
588 | self.assertEqual(tree.as_list(),["_root", ['blocktag', 'Some ',
589 | ['empty',
590 | ['_item', '\n']],
591 | ' tag']])
592 |
593 | def test_blocktag_inside_inlinetag(self):
594 | doc = r"""\blocktag Some \inlinetag[Hello \blocktag test]"""
595 | parser = QqParser(allowed_tags={'inlinetag', 'blocktag'})
596 | tree = parser.parse(doc)
597 |
598 | self.assertEqual(tree.as_list(), ['_root', ['blocktag', 'Some ',
599 | ['inlinetag',
600 | ['_item', 'Hello ',
601 | ['blocktag', 'test']]]]])
602 |
603 | doc = r"""Some \inlinetag[Hello \blocktag test
604 | \blocktag another test]"""
605 | parser = QqParser(allowed_tags={'inlinetag', 'blocktag'})
606 | tree = parser.parse(doc)
607 | print(tree.as_list())
608 | self.assertEqual(tree.as_list(),
609 | ['_root', 'Some ', ['inlinetag',
610 | ['_item', 'Hello ',
611 | ['blocktag', 'test'],
612 | ['blocktag',
613 | 'another test']]]])
614 |
615 | def test_inlinetag_with_multiple_arguments(self):
616 | doc = r"""\blocktag Some \inlinetag[Hello][world]"""
617 | parser = QqParser(allowed_tags={'inlinetag', 'blocktag'})
618 | tree = parser.parse(doc)
619 | self.assertEqual(tree.as_list(),
620 | ["_root", ["blocktag", "Some ",
621 | ["inlinetag", ["_item", "Hello"],
622 | ["_item", "world"]]]])
623 |
624 | def test_end_with_empty_line(self):
625 | doc = dedent(r"""
626 | \tag
627 |
628 | """)
629 | parser = QqParser(allowed_tags={"tag"})
630 | tree = parser.parse(doc)
631 |
632 | def test_as_etree(self):
633 | doc = dedent(r"""
634 | \tag
635 | some content
636 | \tag
637 | other content
638 | more text here
639 | """)
640 | parser = QqParser(allowed_tags={"tag"})
641 | tree = parser.parse(doc)
642 | self.assertEqual(et.tostring(tree.as_etree()),
643 | b'<_root>some content'
644 | b'other content'
645 | b'more text here'
646 | b'')
647 |
648 | def test_newline(self):
649 | doc = dedent(r"""
650 | \tag
651 | Hello
652 |
653 | Stop.
654 | """)
655 | parser = QqParser(allowed_tags={'tag'})
656 | tree = parser.parse(doc)
657 | self.assertEqual(tree.as_list(),
658 | ['_root', '', ['tag', 'Hello'],
659 | '\nStop.'])
660 |
661 | def test_children_tags(self):
662 | doc = dedent(r"""
663 | \tag
664 | some content
665 | \tag
666 | other content
667 | more text here
668 | \tag
669 | some other tag
670 | """)
671 | parser = QqParser(allowed_tags={"tag"})
672 | tree = parser.parse(doc)
673 | children = (list(tree.tag_.children_tags()))
674 | self.assertEqual(children[0].as_list(), ["tag", "other content"])
675 | self.assertEqual(children[1].as_list(), ["tag", "some other tag"])
676 |
677 | def test_blank_line_after_tag(self):
678 | doc = dedent(r"""
679 | \tag
680 |
681 | otherline
682 | \tag
683 | othertag
684 | """)
685 | parser = QqParser(allowed_tags={"tag"})
686 | tree = parser.parse(doc)
687 | self.assertEqual(tree.as_list(),
688 | ['_root', '',
689 | ['tag', '\notherline', ['tag', 'othertag']]])
690 |
--------------------------------------------------------------------------------
/indentml/parser.py:
--------------------------------------------------------------------------------
1 | # (c) Ilya V. Schurov, 2016 — 2021
2 | # Available under MIT license (see LICENSE file in the root folder)
3 |
4 | from collections import namedtuple
5 | from collections.abc import Sequence, MutableSequence
6 | from collections import namedtuple
7 | from indentml.indexedlist import IndexedList
8 | import re
9 | from functools import total_ordering
10 | import os
11 | from xml.etree.ElementTree import Element
12 | from itertools import islice, groupby
13 | from typing import Optional, Iterator, Union, overload, Sequence, List
14 |
15 |
16 | class QqError(Exception):
17 | pass
18 |
19 |
20 | class QqTag(MutableSequence):
21 | """
22 | QqTag is essentially an IndexedList with name attached. It behaves
23 | mostly like eTree Element.
24 |
25 | It provides eTree and BeautifulSoup-style navigation over its children:
26 | - ``tag.find('subtag')`` returns first occurrence of a child with name
27 | ``subtag``. (Note that in contrast with BeautifulSoup, this is not
28 | recursive: it searches only through tag's direct children.)
29 | - ``tag._subtag`` is a shortcut for ``tag.find('subtag')``
30 | (works if ``subtag`` is valid identifier)
31 | - ``tag.find_all('subtag')`` returns all occurrences of tag with
32 | name 'subtag'
33 | - ``tag('subtag')`` is a shortcut for ``tag.find_all('subtag')``
34 |
35 | If QqTag has only one child, it is called *simple*. Then its `.value`
36 | is defined. (Useful for access to property-like subtags.)
37 | """
38 | def __init__(self, name, children=None, parent=None, idx=None,
39 | adopt=False):
40 | if isinstance(name, dict) and len(name) == 1:
41 | self.__init__(*list(name.items())[0], parent=parent)
42 | return
43 |
44 | self.name = name
45 | self.parent = parent
46 | self.idx = idx
47 | # tag has to know its place in the list of parents children
48 | # to be able to navigate to previous / next siblings
49 |
50 | self.adopter = adopt
51 | # tag is called 'adopter' if it does not register itself as
52 | # a parent of its children
53 | # TODO: write test for adoption
54 |
55 | self._children: IndexedList[Union[str, "QqTag"]]
56 |
57 | if children is None:
58 | self._children = IndexedList()
59 | elif (
60 | isinstance(children, str)
61 | or isinstance(children, int)
62 | or isinstance(children, float)
63 | ):
64 | self._children = IndexedList([children])
65 | elif isinstance(children, Sequence):
66 | self._children = IndexedList(children)
67 | else:
68 | raise QqError(
69 | "I don't know what to do with children " + str(children)
70 | )
71 |
72 | if not adopt:
73 | for i, child in enumerate(self):
74 | if isinstance(child, QqTag):
75 | child.parent = self
76 | child.idx = i
77 |
78 | def __repr__(self):
79 | if self.parent is None:
80 | return "QqTag(%s, %s)" % (
81 | repr(self.name),
82 | repr(self._children),
83 | )
84 | else:
85 | return "QqTag(%s, %s, parent = %s)" % (
86 | repr(self.name),
87 | repr(self._children),
88 | repr(self.parent.name),
89 | )
90 |
91 | def __str__(self):
92 | return "{%s : %s}" % (self.name, self._children)
93 |
94 | def __eq__(self, other):
95 | if other is None or not isinstance(other, QqTag):
96 | return False
97 | return self.as_list() == other.as_list()
98 |
99 | @property
100 | def is_simple(self):
101 | """
102 | Simple tags are those containing only one child
103 | and it is string
104 | :return:
105 | """
106 | return len(self) == 1 and isinstance(self[0], str)
107 |
108 | @property
109 | def value(self):
110 | if self.is_simple:
111 | return self[0]
112 | raise QqError(
113 | "More than one child, value is not defined, QqTag: "
114 | + str(self)
115 | )
116 |
117 | @value.setter
118 | def value(self, value):
119 | if self.is_simple:
120 | self[0] = value
121 | else:
122 | raise QqError("More than one child, cannot set value")
123 |
124 | def qqkey(self):
125 | return self.name
126 |
127 | def __getattr__(self, attr):
128 | if attr[-1] == "_":
129 | return self.find_or_empty(attr[:-1])
130 | raise AttributeError("Attribute " + attr + " not found")
131 |
132 | def __bool__(self):
133 | return bool(self._children)
134 |
135 | def find(self, key: str) -> Optional["QqTag"]:
136 | """
137 | Returns direct children with the given key if it exists,
138 | otherwise returns None
139 | :param key: key
140 | :return: QqTag
141 | """
142 | if key in self._children._directory:
143 | return self._children.find(key)
144 | return None
145 |
146 | def find_or_empty(self, key: str) -> "QqTag":
147 | """
148 | The same as find, but returns empty QqTag if finds nothing
149 | :param key:
150 | :return:
151 | """
152 | if key in self._children._directory:
153 | return self._children.find(key)
154 | return QqTag("_")
155 |
156 | def find_all(self, key: str) -> "QqTag":
157 | return QqTag("_", self._children.find_all(key), adopt=True)
158 |
159 | def __call__(self, key):
160 | return self.find_all(key)
161 |
162 | def as_list(self) -> list:
163 | ret = [self.name]
164 | for child in self:
165 | if isinstance(child, QqTag):
166 | ret.append(child.as_list())
167 | else:
168 | ret.append(child)
169 | return ret
170 |
171 | def insert(self, idx: int, child) -> None:
172 | self._children.insert(idx, child)
173 | if not self.adopter and isinstance(child, QqTag):
174 | child.parent = self
175 | child.idx = idx
176 | for child in self._children[idx + 1 :]:
177 | if isinstance(child, QqTag):
178 | child.idx += 1
179 |
180 | def __delitem__(self, idx: int):
181 | del self._children[idx]
182 | if not self.adopter:
183 | for child in self._children[idx:]:
184 | if isinstance(child, QqTag):
185 | child.idx -= 1
186 |
187 | def append_child(self, child):
188 | self.insert(len(self), child)
189 |
190 | def _is_consistent(self):
191 | if self.adopter:
192 | raise QqError("Adopter cannot be checked for consistency")
193 | for i, child in enumerate(self):
194 | if isinstance(child, QqTag) and (
195 | child.parent != self or child.idx != i
196 | ):
197 | return False
198 | return True
199 |
200 | def append_line(self, line: str) -> None:
201 | """
202 | Appends line if it is not empty
203 |
204 | :param line:
205 | """
206 | if line:
207 | self._children.append(line)
208 |
209 | @overload
210 | def __getitem__(self, idx: int) -> "QqTag":
211 | ...
212 |
213 | @overload
214 | def __getitem__(self, s: slice) -> Sequence["QqTag"]:
215 | ...
216 |
217 | def __getitem__(self, idx):
218 | return self._children[idx]
219 |
220 | def __setitem__(self, idx: int, child: "QqTag"):
221 | self._children[idx] = child
222 | if not self.adopter:
223 | # TODO testme
224 | child.parent = self
225 | child.idx = idx
226 |
227 | def __iter__(self):
228 | return iter(self._children)
229 |
230 | def __len__(self):
231 | return len(self._children)
232 |
233 | def children_tags(self) -> Iterator["QqTag"]:
234 | """
235 | Returns iterator of all childrens that are QqTags
236 |
237 | :return:
238 | """
239 | return (tag for tag in self if isinstance(tag, QqTag))
240 |
241 | @property
242 | def text_content(self):
243 | chunk = []
244 | for child in self:
245 | if isinstance(child, str):
246 | chunk.append(child)
247 | return "".join(chunk)
248 |
249 | def exists(self, key):
250 | """
251 | Returns True if a child with given key exists
252 | :param key:
253 | :return:
254 | """
255 | return key in self._children._directory
256 |
257 | def get(self, key: str, default_value: str = None) -> str:
258 | """
259 | Returns a value of a direct child with a given key.
260 | If it is does not exists or is not simple,
261 | returns default value (default: None)
262 | :param key: key
263 | :param default_value: what to return if there is no
264 | such key or the corresponding child is ot simple
265 | :return: the value of a child
266 | """
267 | tag = self.find(key)
268 | if tag and tag.is_simple:
269 | return tag.value
270 | else:
271 | return default_value
272 |
273 | def ancestor_path(self):
274 | """
275 | Returns list of ancestors for self.
276 |
277 | Example:
278 |
279 | \tag
280 | \othertag
281 | \thirdtag
282 |
283 | thirdtag.ancestor_path == [thirdtag, othertag, tag, _root]
284 |
285 | :return: list
286 | """
287 | tag = self
288 | path = [tag]
289 | while tag.parent:
290 | tag = tag.parent
291 | path.append(tag)
292 | return path
293 |
294 | def get_eve(self):
295 | """
296 | Returns ancestor which is a direct child of a root
297 |
298 | :return:
299 | """
300 | return self.ancestor_path()[-2]
301 |
302 | def next(self):
303 | if (
304 | not self.parent
305 | or self.idx is None
306 | or self.idx == len(self.parent) - 1
307 | ):
308 | return None
309 | return self.parent[self.idx + 1]
310 |
311 | def prev(self):
312 | if not self.parent or self.idx is None or self.idx == 0:
313 | return None
314 | return self.parent[self.idx - 1]
315 |
316 | def clear(self):
317 | self._children.clear()
318 |
319 | def extend_children(self, children):
320 | for child in children:
321 | self.append_child(child)
322 |
323 | def children_values(self, strings="raise", not_simple="raise"):
324 | """
325 | Make a list of .value applied to all children instances
326 |
327 | :param strings: one of 'raise', 'keep', 'none', 'skip'
328 | :param not_simple: one of 'raise', 'keep', 'none', 'skip'
329 |
330 | What to do if string or not simple tag occurs:
331 | - 'raise': raise an exception
332 | - 'keep': keep tags/strings as is
333 | - 'none': replace with None
334 | - 'skip': skip this item
335 | :return: list of strings
336 | """
337 | assert strings in ["raise", "keep", "none", "skip"]
338 | assert not_simple in ["raise", "keep", "none", "skip"]
339 | values = []
340 | for child in self:
341 | if isinstance(child, str):
342 | if strings == "raise":
343 | raise QqError(
344 | "string does not have value (set strings option"
345 | " to 'keep', 'none' or 'skip' to workaround)"
346 | )
347 | if strings == "keep":
348 | values.append(child.strip())
349 | elif strings == "none":
350 | values.append(None)
351 | # if strings == 'skip': pass
352 | else: # QqTag assumed
353 | if child.is_simple:
354 | values.append(child.value)
355 | continue
356 | # child is not simple
357 | if not_simple == "raise":
358 | raise QqError(
359 | (
360 | "Child {} is not simple. Use not_simple option "
361 | "to tweak the behavior"
362 | ).format(child)
363 | )
364 | if not_simple == "none":
365 | values.append(None)
366 | if not_simple == "keep":
367 | values.append(child)
368 | # if not_simple == 'skip': pass
369 | return values
370 |
371 | @property
372 | def itemized(self) -> bool:
373 | """
374 | Returns True if all children are '_item's
375 | :return: bool
376 | """
377 | return len(self.find_all("_item")) == len(self)
378 |
379 | def itemize(self):
380 | """
381 | If self's children are _items, return... #TODO
382 | :return:
383 | """
384 | if self.itemized:
385 | return self
386 | return QqTag(self.name, [QqTag("_item", self, adopt=True)])
387 |
388 | def unitemized(self):
389 | """
390 | If self is simple (only one child and it is string), return self
391 | If self's only child is "_item", return it
392 | :return:
393 | """
394 | # TODO testme
395 |
396 | if self.is_simple:
397 | return self
398 | if len(self) == 1 and self[0].name == "_item":
399 | return self[0]
400 | raise QqError("Can't unitemize tag " + str(self))
401 |
402 | def process_include_tags(self, parser, includedir, follow=True):
403 | """
404 | Recursively processes include tags (as defined by parser.include)
405 | Reads files from includedir
406 |
407 | Does not modify current tag, returns a new one instead
408 |
409 | :param parser:
410 | :param includedir:
411 | :param follow: follow include directives in included files
412 | recursively
413 | :return: processed tree
414 | """
415 |
416 | # TODO FIXME Sanity checks for includedir
417 |
418 | newtree = QqTag(self.name)
419 | for child in self:
420 | if isinstance(child, str):
421 | newtree.append_child(child)
422 | else: # child is QqTag
423 | if child.name == parser.include:
424 | include_path = child.value
425 | # FROM: https://www.guyrutenberg.com/2013/12/06/
426 | # preventing-directory-traversal-in-python/
427 | include_path = os.path.normpath(
428 | "/" + include_path
429 | ).lstrip("/")
430 | # END FROM
431 |
432 | include_parsed = parser.parse_file(
433 | os.path.join(includedir, include_path)
434 | )
435 | if follow:
436 | include_parsed = include_parsed.process_include_tags(
437 | parser, includedir, follow
438 | )
439 | newtree.extend_children(include_parsed)
440 | else:
441 | newtree.append(
442 | child.process_include_tags(
443 | parser, includedir, follow
444 | )
445 | )
446 | return newtree
447 |
448 | def as_etree(self):
449 | tree = Element(self.name)
450 | chunk = []
451 | for child in self:
452 | if isinstance(child, str):
453 | chunk.append(child)
454 | else:
455 | append_text(tree, "".join(chunk))
456 | chunk.clear()
457 | tree.append(child.as_etree())
458 | if chunk:
459 | append_text(tree, "".join(chunk))
460 | return tree
461 |
462 | def escape(self, line: str, tbcharacter="\\") -> str:
463 | for char in [tbcharacter, "[", "{", "}", "]"]:
464 | line = line.replace(char, tbcharacter + char)
465 | return line
466 |
467 | def serialize(
468 | self, tbcharacter="\\", tabs=4, escape_brackets=True
469 | ) -> List[str]:
470 | lines = []
471 | if self.name != "_root":
472 | lines.append(tbcharacter + self.name + "\n")
473 | prefix = " " * tabs
474 | else:
475 | prefix = ""
476 | for i, child in enumerate(self):
477 | if isinstance(child, str):
478 | if escape_brackets:
479 | line = self.escape(child, tbcharacter)
480 | else:
481 | line = child.replace(tbcharacter, tbcharacter * 2)
482 | for subline in line.split("\n"):
483 | if i == 0 and subline.startswith(" "):
484 | subline = tbcharacter + subline
485 | if i < len(self) - 1 and isinstance(self[i + 1], QqTag):
486 | postfix = "\n"
487 | else:
488 | postfix = ""
489 | lines.append(prefix + line + postfix)
490 | else:
491 | lines.extend(
492 | prefix + line
493 | for line in child.serialize(tbcharacter, tabs)
494 | )
495 | if lines and not lines[-1].endswith("\n"):
496 | lines[-1] = lines[-1] + "\n"
497 | return lines
498 |
499 |
500 | def append_text(tree, text):
501 | children = list(tree)
502 | if children:
503 | if children[-1].tail is None:
504 | children[-1].tail = text
505 | else:
506 | children[-1].tail += text
507 | else:
508 | if tree.text is None:
509 | tree.text = text
510 | else:
511 | tree.text += text
512 | return tree
513 |
514 |
515 | def dedent(line, indent):
516 | if line[:indent] == " " * indent:
517 | return line[indent:]
518 | raise QqError("Can't dedent line {} by {}".format(repr(line), indent))
519 |
520 |
521 | def get_indent(s, empty_to_none=False):
522 | if not s.strip() and empty_to_none:
523 | return None
524 | m = re.match(r"\s*", s)
525 | beginning = m.group(0)
526 | if "\t" in beginning:
527 | raise QqError(
528 | "No tabs allowed in QqDoc at the beginning "
529 | "of line! Line: " + s
530 | )
531 | m = re.match(r" *", s)
532 | return len(m.group(0))
533 |
534 |
535 | @total_ordering
536 | class Position(object):
537 | def __init__(self, line, offset, lines):
538 | self.line = line
539 | self.offset = offset
540 | self.lines = lines
541 | if line is None:
542 | self.line = len(lines)
543 |
544 | def __lt__(self, other):
545 | return (self.line, self.offset) < (other.line, other.offset)
546 |
547 | def __eq__(self, other):
548 | return (self.line, self.offset) == (other.line, other.offset)
549 |
550 | def nextchar(self):
551 | new = self.copy()
552 | new.offset += 1
553 | if new.offset >= len(new.lines[new.line]):
554 | new = new.nextline()
555 | return new
556 |
557 | def prevchar(self):
558 | new = self.copy()
559 | new.offset -= 1
560 | if new.offset < 0:
561 | new.line -= 1
562 | new.offset = len(new.getline) - 1
563 | return new
564 |
565 | def prevline(self):
566 | return Position(line=self.line - 1, offset=0, lines=self.lines)
567 |
568 | def nextline(self):
569 | return Position(line=self.line + 1, offset=0, lines=self.lines)
570 |
571 | def copy(self):
572 | return Position(
573 | line=self.line, offset=self.offset, lines=self.lines
574 | )
575 |
576 | def __str__(self):
577 | return "Position: line_number: {}, offset: {}, line: {}".format(
578 | self.line, self.offset, get(self.lines, self.line)
579 | )
580 |
581 | def __repr__(self):
582 | return "Position(line={}, offset={})".format(
583 | self.line, self.offset
584 | )
585 |
586 | def lines_before(self, stop):
587 | pos = self
588 | out = []
589 | while pos < stop:
590 | out.append(pos.clipped_line(stop))
591 | pos = pos.nextline()
592 | return out
593 |
594 | def clipped_line(self, stop):
595 | """
596 | Returns line clipped before stop
597 |
598 | :param stop:
599 | :return:
600 | """
601 |
602 | if stop.line > self.line:
603 | inline_stop_offset = None
604 | else:
605 | inline_stop_offset = stop.offset
606 | return self.getline[self.offset : inline_stop_offset]
607 |
608 | @property
609 | def getline(self):
610 | return self.lines[self.line]
611 |
612 | @property
613 | def getchar(self):
614 | return self.getline[self.offset]
615 |
616 | def get_end_of_line(self):
617 | return Position(self.line, len(self.getline), self.lines)
618 |
619 | def get_start_of_line(self):
620 | return Position(self.line, 0, self.lines)
621 |
622 |
623 | def get(s, i, default=None):
624 | if i < 0 or i >= len(s):
625 | return default
626 | return s[i]
627 |
628 |
629 | def first_nonspace_idx(line, start=0, stop=None):
630 | if stop is None:
631 | stop = len(line)
632 | m = re.match(r"\s*", line[start:stop])
633 | return start + m.end(0)
634 |
635 |
636 | class QqParser(object):
637 | """
638 | General indentml parser.
639 | """
640 |
641 | def __init__(
642 | self,
643 | tb_char="\\",
644 | allowed_tags=None,
645 | allowed_inline_tags=None,
646 | alias2tag=None,
647 | include="_include",
648 | ):
649 | self.tb_char = tb_char
650 | self.command_regex = re.escape(self.tb_char)
651 | if allowed_tags is None:
652 | self.allowed_tags = set([])
653 | else:
654 | self.allowed_tags = allowed_tags
655 | self.tag_regex = r"([^\s\{\[\&" + self.command_regex + "]+)"
656 | if allowed_inline_tags is None:
657 | self.allowed_inline_tags = self.allowed_tags
658 | else:
659 | self.allowed_inline_tags = allowed_inline_tags
660 | if alias2tag is None:
661 | self.alias2tag = {}
662 | else:
663 | self.alias2tag = alias2tag
664 | self.escape_stub = "&_ESCAPE_Thohhe1eieMam6Yo_"
665 | self.include = include
666 | self.allowed_tags.add(include)
667 | self._lines = None
668 | self._indents = None
669 | self.blocktag_rc = re.compile(
670 | self.command_regex
671 | + self.tag_regex
672 | + r"(?= |{}|$)".format(self.command_regex)
673 | )
674 | self.anytag_rc = re.compile(
675 | self.command_regex
676 | + self.tag_regex
677 | + r"(?= |{}|{{|\[|$)".format(self.command_regex)
678 | )
679 |
680 | def is_allowed_tag(self, tag: str, inline=False):
681 | if inline:
682 | return tag in self.allowed_inline_tags
683 | else:
684 | return tag in self.allowed_tags
685 |
686 | def escape_line(self, s):
687 | """
688 | Replaces '\\' and '\ ' with special stub
689 | :param s: a line
690 | :return: escaped line
691 | """
692 | s = s.replace(self.tb_char * 2, self.escape_stub + "COMMAND_&")
693 | s = s.replace(self.tb_char + " ", self.escape_stub + "SPACE_&")
694 | s = s.replace(
695 | self.tb_char + "{", self.escape_stub + "OPEN_CURVE_&"
696 | )
697 | s = s.replace(
698 | self.tb_char + "[", self.escape_stub + "OPEN_SQUARE_&"
699 | )
700 | s = s.replace(
701 | self.tb_char + "}", self.escape_stub + "CLOSE_CURVE_&"
702 | )
703 | s = s.replace(
704 | self.tb_char + "]", self.escape_stub + "CLOSE_SQUARE_&"
705 | )
706 |
707 | return s
708 |
709 | def unescape_line(self, s):
710 | """
711 | Replaces special stub's inserted by ``escape_line()``
712 | with '\' and ' '
713 |
714 | Note: this is **NOT** an inverse of escape_line.
715 |
716 | :param s: a line
717 | :return: unescaped line
718 | """
719 | s = s.replace(self.escape_stub + "SPACE_&", " ")
720 | s = s.replace(self.escape_stub + "COMMAND_&", self.tb_char)
721 | s = s.replace(self.escape_stub + "OPEN_CURVE_&", "{")
722 | s = s.replace(self.escape_stub + "OPEN_SQUARE_&", "[")
723 | s = s.replace(self.escape_stub + "CLOSE_CURVE_&", "}")
724 | s = s.replace(self.escape_stub + "CLOSE_SQUARE_&", "]")
725 |
726 | return s
727 |
728 | def position(self, line, offset):
729 | return Position(line=line, offset=offset, lines=self._lines)
730 |
731 | def parse_init(self, text: Union[str, Sequence[str]]):
732 | """
733 | :param text:
734 | :return:
735 | """
736 | if isinstance(text, str):
737 | lines = text.splitlines(keepends=True)
738 | else:
739 | lines = text
740 |
741 | lines = [self.escape_line(line) for line in lines]
742 |
743 | self._lines = lines
744 |
745 | # basic indent is indent of first non-empty line, if any
746 | basicindent = next(
747 | (get_indent(line) for line in lines if line.strip()), 0
748 | )
749 |
750 | self._indents = []
751 |
752 | # we want to replace all Nones with indent of next non-empty string
753 | # to do so, first, let us group all indents
754 |
755 | indents, nums = zip(
756 | *[
757 | (indent, sum(1 for _ in g))
758 | for indent, g in groupby(
759 | get_indent(line, empty_to_none=True) for line in lines
760 | )
761 | ]
762 | )
763 |
764 | for i, (indent, num) in enumerate(zip(indents, nums)):
765 | if indent is None:
766 | indent = get(indents, i + 1, basicindent)
767 | self._indents.extend([indent] * num)
768 |
769 | def parse(self, lines: Union[str, Sequence[str]]):
770 | self.parse_init(lines)
771 | start = self.position(0, 0)
772 | stop = self.position(None, 0)
773 | tags = self.parse_fragment(
774 | start, stop, current_indent=get_indent(self._lines[0])
775 | )
776 | return QqTag("_root", tags)
777 |
778 | def append_chunk_and_clear(
779 | self, tags, chunk, stripeol=False, ignoreempty=False
780 | ):
781 | joined = "".join(chunk)
782 | if stripeol and joined and joined[-1] == "\n":
783 | joined = joined[:-1]
784 | if joined or (not ignoreempty and chunk):
785 | # empty chunk is not the same as chunk with empty line
786 | tags.append(self.unescape_line(joined))
787 | chunk.clear()
788 |
789 | def parse_fragment(
790 | self, start, stop, current_indent, merge_lines=False
791 | ):
792 |
793 | tags = []
794 |
795 | pos = start.copy()
796 | chunk = []
797 |
798 | while pos < stop:
799 | # loop invariant: everything before pos is appended to tags
800 | # or chunk
801 |
802 | line = pos.clipped_line(stop)
803 | if not line.strip():
804 | if line and line[-1] == "\n":
805 | chunk.append("\n")
806 | pos = pos.nextline()
807 | continue
808 | if pos.offset == 0:
809 | line = dedent(line, current_indent)
810 | pos.offset = current_indent
811 | blockmode = True
812 | else:
813 | blockmode = False
814 |
815 | if (
816 | not merge_lines
817 | and blockmode
818 | and line.strip()
819 | and line[0] == self.tb_char
820 | ):
821 | # possibly block tag line
822 | m = self.blocktag_rc.match(line)
823 | if m:
824 | tag = m.group(1)
825 | tag = self.alias2tag.get(tag, tag)
826 | if self.is_allowed_tag(tag):
827 | newstart_pos = current_indent + first_nonspace_idx(
828 | line, m.end(1)
829 | )
830 | newstop_line, tag_contents_indent = self.block_tag_stop_line_indent(
831 | pos.line, stop.line
832 | )
833 | parsed_content = self.parse_fragment(
834 | self.position(pos.line, newstart_pos),
835 | self.position(newstop_line, 0),
836 | tag_contents_indent,
837 | )
838 | self.append_chunk_and_clear(
839 | tags, chunk, stripeol=True
840 | )
841 | tags.append(QqTag(tag, children=parsed_content))
842 | pos = self.position(newstop_line, 0)
843 | continue
844 |
845 | tag_position, tag, ttype, after = self.locate_tag(pos, stop)
846 | if tag is not None:
847 | chunk.append(pos.clipped_line(tag_position))
848 | self.append_chunk_and_clear(tags, chunk, ignoreempty=True)
849 | if ttype == "block":
850 | next_bt_position = self.scan_after_attribute_tag(
851 | after, stop, merge_lines=merge_lines
852 | )
853 | new_stop = self.find_first_nonspace_character_before(
854 | next_bt_position, after
855 | ).nextchar()
856 | parsed_content = self.parse_fragment(
857 | after, new_stop, current_indent
858 | )
859 | tags.append(QqTag(tag, children=parsed_content))
860 | pos = next_bt_position.copy()
861 | continue
862 | if ttype == "inline":
863 | items = self.inline_tag_contents(after, stop)
864 | parsed_items = []
865 |
866 | for item in items:
867 | parsed_content = self.parse_fragment(
868 | item["start"],
869 | item["stop"],
870 | current_indent,
871 | merge_lines=True,
872 | )
873 | if item["type"] == "{":
874 | parsed_items.extend(parsed_content)
875 | else: # item['type'] == '['
876 | parsed_items.append(
877 | QqTag("_item", children=parsed_content)
878 | )
879 | tags.append(QqTag(tag, children=parsed_items))
880 | pos = items[-1]["stop"].nextchar()
881 | continue
882 |
883 | chunk.append(line)
884 | pos = pos.nextline()
885 |
886 | self.append_chunk_and_clear(tags, chunk, stripeol=True)
887 | return tags
888 |
889 | def find_first_nonspace_character_before(
890 | self, start: Position, stop: Position
891 | ):
892 | # FIXME: stop is not used: why?
893 | line = "".join(
894 | reversed(start.get_start_of_line().clipped_line(start))
895 | )
896 | m = re.match(r"\s*", line)
897 |
898 | return self.position(start.line, start.offset - m.end(0) - 1)
899 |
900 | def block_tag_stop_line_indent(self, start_line, stop_line):
901 | tag_indent = self._indents[start_line]
902 | if stop_line <= start_line + 1:
903 | # don't have more lines
904 | # e.g.
905 | # \tag rest of line
906 | # EOF
907 | # indent is of no importance, so set it to -1
908 | return start_line + 1, -1
909 |
910 | contents_indent = self._indents[start_line + 1]
911 | if contents_indent <= tag_indent:
912 | # tag is already closed
913 | # like
914 | # \tag rest of line
915 | # something
916 | return start_line + 1, -1
917 |
918 | last_tag_line, last_tag_indent = next(
919 | (
920 | (i, indent)
921 | for i, indent in enumerate(
922 | islice(self._indents, start_line + 2, stop_line),
923 | start_line + 2,
924 | )
925 | if indent < contents_indent
926 | ),
927 | (stop_line, tag_indent),
928 | )
929 |
930 | if last_tag_indent > tag_indent:
931 | raise QqError(
932 | "Incorrect indent at line {}: ".format(last_tag_line)
933 | + self._lines[last_tag_line]
934 | )
935 | return last_tag_line, contents_indent
936 |
937 | def locate_tag(self, start: Position, stop: Position):
938 | """
939 | locates inline or block tag on line
940 | beginning with given position pos
941 |
942 | does not propogate on the following lines
943 |
944 | :param start: position to start with
945 | :param stop: position to stop
946 | :return: (tag_position: Position of first tag character (\\)
947 | tag: tag name,
948 | type: 'block' or 'inline',
949 | after: Position of first non-space character after tag
950 | (if it is block tag) or simply first character after
951 | tag (if it is inline tag)
952 | """
953 | line = start.clipped_line(stop)
954 |
955 | for m in self.anytag_rc.finditer(line):
956 | tag = m.group(1)
957 | tag_position = self.position(
958 | start.line, start.offset + m.start(0)
959 | )
960 | after = self.position(
961 | start.line,
962 | start.offset + first_nonspace_idx(line, m.end(1)),
963 | )
964 | next_char = get(line, m.end(1))
965 | if next_char not in ["{", "["]:
966 | if self.is_allowed_tag(tag):
967 | return tag_position, tag, "block", after
968 | else:
969 | if self.is_allowed_tag(tag, inline=True):
970 | return tag_position, tag, "inline", after
971 | return min(start.get_end_of_line(), stop), None, None, None
972 |
973 | def inline_tag_contents(self, start: Position, stop: Position):
974 | """
975 | Finds the contents of inline tag:
976 |
977 | :param start:
978 | :param stop:
979 | :return: a list of dicts {'type': '[' or '{',
980 | 'start': Position,
981 | 'stop': Position}
982 | """
983 | items = []
984 | pos = start
985 | while pos < stop and pos.getchar in ["[", "{"]:
986 | type_ = pos.getchar
987 | end = self.match_bracket(pos, stop)
988 | items.append(
989 | {"type": type_, "start": pos.nextchar(), "stop": end}
990 | )
991 | pos = end.nextchar()
992 | return items
993 |
994 | def match_bracket(self, start: Position, stop: Position) -> Position:
995 | """
996 | Finds the matching closing bracket
997 | :param start: start position, its value should be '[' or '{'
998 | :param stop: stop position
999 | :return: position of matching closing bracket
1000 | """
1001 | open_bracket = self._lines[start.line][start.offset]
1002 | assert open_bracket in ["[", "{"]
1003 | pos = start.copy()
1004 | counter = 0
1005 | # open bracket counter
1006 | closing_bracket = {"[": "]", "{": "}"}[open_bracket]
1007 | bracket_rc = re.compile(
1008 | re.escape(open_bracket) + "|" + re.escape(closing_bracket)
1009 | )
1010 |
1011 | while pos < stop:
1012 | line = pos.clipped_line(stop)
1013 | for m in bracket_rc.finditer(line):
1014 | if (
1015 | self.position(pos.line, pos.offset + m.start(0))
1016 | >= stop
1017 | ):
1018 | raise QqError(
1019 | "No closing bracket found: "
1020 | "start: {}, stop: {}".format(start, stop)
1021 | )
1022 | if m.group(0) == open_bracket:
1023 | counter += 1
1024 | else:
1025 | counter -= 1
1026 | if counter == 0:
1027 | return self.position(
1028 | pos.line, pos.offset + m.start(0)
1029 | )
1030 | pos = pos.nextline()
1031 | raise QqError(
1032 | "No closing bracket found: "
1033 | "start: {}, stop: {}".format(start, stop)
1034 | )
1035 |
1036 | def scan_after_attribute_tag(
1037 | self, start: Position, stop: Position, merge_lines=False
1038 | ):
1039 | """
1040 | scans the rest of line / fragment after block tag found inline
1041 | looking for another block tag
1042 | skipping every inline tag with its contents
1043 |
1044 | :param start: first character to scan
1045 | :param stop: where to stop
1046 | :param merge_lines: look for several lines
1047 | :return: (Position of the first character of next block tag or EOL,
1048 | Position of the first non-space character after block tag
1049 | or None if EOL found)
1050 | """
1051 | if not merge_lines:
1052 | stop = min(stop, start.nextline())
1053 | # looking only for current line
1054 |
1055 | pos = start.copy()
1056 | ret = start.copy()
1057 |
1058 | while pos < stop:
1059 | tag_position, tag, type_, after = self.locate_tag(pos, stop)
1060 | if tag is None:
1061 | pos = pos.nextline()
1062 | ret = tag_position
1063 | continue
1064 | if type_ == "block":
1065 | return tag_position
1066 | else:
1067 | contents = self.inline_tag_contents(after, stop)
1068 | pos = contents[-1]["stop"].nextchar()
1069 | ret = min(pos.get_end_of_line(), stop)
1070 |
1071 | return ret
1072 |
1073 | def parse_file(self, filename):
1074 | with open(filename) as f:
1075 | lines = f.readlines()
1076 | return self.parse(lines)
1077 |
--------------------------------------------------------------------------------