├── .coveragerc
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── TexSoup
    ├── __init__.py
    ├── category.py
    ├── data.py
    ├── reader.py
    ├── tex.py
    ├── tokens.py
    └── utils.py
├── docs
    ├── .gitignore
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _static
    │       ├── css
    │       │   └── theme-mod.css
    │       ├── images
    │       │   ├── android-chrome-192x192.png
    │       │   ├── android-chrome-512x512.png
    │       │   ├── apple-touch-icon.png
    │       │   ├── arrow-down-orange.svg
    │       │   ├── arrow-right-with-tail.svg
    │       │   ├── browserconfig.xml
    │       │   ├── chevron-right-orange.svg
    │       │   ├── favicon-16x16.png
    │       │   ├── favicon-32x32.png
    │       │   ├── favicon.ico
    │       │   ├── logo-dark.svg
    │       │   ├── logo-icon.svg
    │       │   ├── logo.svg
    │       │   ├── mstile-150x150.png
    │       │   ├── pytorch-x.svg
    │       │   ├── safari-pinned-tab.svg
    │       │   ├── search-icon.svg
    │       │   ├── site.webmanifest
    │       │   └── view-page-source-icon.svg
    │       └── texsoup.ai
    │   ├── _templates
    │       ├── cookie_banner.html
    │       ├── footer.html
    │       ├── landing.html
    │       ├── layout.html
    │       └── theme_variables.jinja
    │   ├── categorizer.rst
    │   ├── conf.py
    │   ├── data.rst
    │   ├── index.rst
    │   ├── main.rst
    │   ├── modification.rst
    │   ├── navigation.rst
    │   ├── parser.rst
    │   ├── quickstart.rst
    │   ├── searching.rst
    │   ├── soup.rst
    │   ├── tokenizer.rst
    │   └── utils.rst
├── examples
    ├── README.md
    ├── count_references.py
    ├── list_everything.py
    ├── resolve_imports.py
    ├── simple_conversion.py
    ├── solution_length.py
    └── structure_diagram.py
├── pytest.ini
├── setup.py
└── tests
    ├── __init__.py
    ├── config.py
    ├── samples
        ├── chikin.pdf
        ├── chikin.tex
        └── pancake.tex
    ├── test_api.py
    ├── test_load_edit_save.py
    ├── test_parser.py
    └── test_search.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | include = */TexSoup/*
 3 | omit = tests/*
 4 | 
 5 | [report]
 6 | # Regexes for lines to exclude from consideration
 7 | exclude_lines =
 8 |     # Have to re-enable the standard pragma
 9 |     pragma: no cover
10 | 
11 |     # Don't complain about missing debug-only code:
12 |     def __repr__
13 |     if self\.debug
14 | 
15 |     # Don't complain if tests don't hit defensive assertion code:
16 |     raise AssertionError
17 |     raise NotImplementedError
18 | 
19 |     # Don't complain if non-runnable code isn't run:
20 |     if 0:
21 |     if __name__ == .__main__.:
22 | 
23 | ignore_errors = True
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .coveralls.yml
  2 | .idea
  3 | 
  4 | ### macOS ###
  5 | # General
  6 | .DS_Store
  7 | .AppleDouble
  8 | .LSOverride
  9 | 
 10 | # Icon must end with two \r
 11 | Icon
 12 | 
 13 | # Thumbnails
 14 | ._*
 15 | 
 16 | # Files that might appear in the root of a volume
 17 | .DocumentRevisions-V100
 18 | .fseventsd
 19 | .Spotlight-V100
 20 | .TemporaryItems
 21 | .Trashes
 22 | .VolumeIcon.icns
 23 | .com.apple.timemachine.donotpresent
 24 | 
 25 | # Directories potentially created on remote AFP share
 26 | .AppleDB
 27 | .AppleDesktop
 28 | Network Trash Folder
 29 | Temporary Items
 30 | .apdisk
 31 | 
 32 | ### Python ###
 33 | # Byte-compiled / optimized / DLL files
 34 | __pycache__/
 35 | *.py[cod]
 36 | *$py.class
 37 | 
 38 | # C extensions
 39 | *.so
 40 | 
 41 | # Distribution / packaging
 42 | .Python
 43 | develop-eggs/
 44 | dist/
 45 | downloads/
 46 | eggs/
 47 | .eggs/
 48 | lib/
 49 | lib64/
 50 | parts/
 51 | sdist/
 52 | var/
 53 | wheels/
 54 | pip-wheel-metadata/
 55 | share/python-wheels/
 56 | *.egg-info/
 57 | .installed.cfg
 58 | *.egg
 59 | MANIFEST
 60 | 
 61 | # PyInstaller
 62 | #  Usually these files are written by a python script from a template
 63 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 64 | *.manifest
 65 | *.spec
 66 | 
 67 | # Installer logs
 68 | pip-log.txt
 69 | pip-delete-this-directory.txt
 70 | 
 71 | # Unit test / coverage reports
 72 | htmlcov/
 73 | .tox/
 74 | .nox/
 75 | .coverage
 76 | .coverage.*
 77 | .cache
 78 | nosetests.xml
 79 | coverage.xml
 80 | *.cover
 81 | .hypothesis/
 82 | .pytest_cache/
 83 | 
 84 | # Translations
 85 | *.mo
 86 | *.pot
 87 | 
 88 | # Django stuff:
 89 | *.log
 90 | local_settings.py
 91 | db.sqlite3
 92 | 
 93 | # Flask stuff:
 94 | instance/
 95 | .webassets-cache
 96 | 
 97 | # Scrapy stuff:
 98 | .scrapy
 99 | 
100 | # Sphinx documentation
101 | docs/_build/
102 | 
103 | # PyBuilder
104 | target/
105 | 
106 | # Jupyter Notebook
107 | .ipynb_checkpoints
108 | 
109 | # IPython
110 | profile_default/
111 | ipython_config.py
112 | 
113 | # pyenv
114 | .python-version
115 | 
116 | # pipenv
117 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
118 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
119 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
120 | #   install all needed dependencies.
121 | #Pipfile.lock
122 | 
123 | # celery beat schedule file
124 | celerybeat-schedule
125 | 
126 | # SageMath parsed files
127 | *.sage.py
128 | 
129 | # Environments
130 | .env
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 | 
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 | 
142 | # Rope project settings
143 | .ropeproject
144 | 
145 | # mkdocs documentation
146 | /site
147 | 
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 | 
153 | # Pyre type checker
154 | .pyre/
155 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | sudo: false
 4 | 
 5 | python:
 6 |   - "3.4"
 7 |   - "3.5"
 8 |   - "3.6"
 9 |   - "3.7"
10 |   - "3.8"
11 | 
12 | install:
13 |   - python setup.py install
14 |   - python setup.py easy_install $(python3 -c 'import distutils.core; print(" ".join(distutils.core.run_setup("setup.py").tests_require))')
15 | 
16 | cache:
17 |   directories:
18 |   - "$HOME/.cache/pip"
19 |   - lib/python3.4/site-packages
20 |   - lib/python3.5/site-packages
21 |   - lib/python3.6/site-packages
22 |   - lib/python3.7/site-packages
23 |   - lib/python3.8/site-packages
24 | 
25 | script:
26 |   - py.test --cov
27 | 
28 | after_success:
29 |   - CI=true TRAVIS=true coveralls
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Alvin Wan
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <a href="https://texsoup.alvinwan.com"><img src="https://user-images.githubusercontent.com/2068077/55692228-b7f92d00-595a-11e9-93a2-90090a361d12.png" width="80px"></a>
  2 | 
  3 | # [TexSoup](https://texsoup.alvinwan.com)
  4 | 
  5 | [![PyPi Downloads per Day](https://img.shields.io/pypi/dm/texsoup.svg)](https://pypi.python.org/pypi/TexSoup/)
  6 | [![Build Status](https://travis-ci.org/alvinwan/TexSoup.svg?branch=master)](https://travis-ci.org/alvinwan/TexSoup)
  7 | [![Coverage Status](https://coveralls.io/repos/github/alvinwan/TexSoup/badge.svg?branch=master)](https://coveralls.io/github/alvinwan/TexSoup?branch=master)
  8 | 
  9 | TexSoup is a fault-tolerant, Python3 package for searching, navigating, and modifying LaTeX documents. You can skip installation and try TexSoup directly, using the [pytwiddle demo &rarr;](https://pytwiddle.com/?id=example:latex.py)
 10 | 
 11 | - [Getting Started](https://github.com/alvinwan/TexSoup#Getting-Started)
 12 | - [Installation](https://github.com/alvinwan/TexSoup#Installation)
 13 | - [API Reference](http://texsoup.alvinwan.com/docs/data.html)
 14 | 
 15 | Created by [Alvin Wan](http://alvinwan.com) + [contributors](https://github.com/alvinwan/TexSoup/graphs/contributors).
 16 | 
 17 | # Getting Started
 18 | 
 19 | To parse a $LaTeX$ document, pass an open filehandle or a string into the
 20 | `TexSoup` constructor.
 21 | 
 22 | ``` python
 23 | from TexSoup import TexSoup
 24 | soup = TexSoup("""
 25 | \begin{document}
 26 | 
 27 | \section{Hello \textit{world}.}
 28 | 
 29 | \subsection{Watermelon}
 30 | 
 31 | (n.) A sacred fruit. Also known as:
 32 | 
 33 | \begin{itemize}
 34 | \item red lemon
 35 | \item life
 36 | \end{itemize}
 37 | 
 38 | Here is the prevalence of each synonym.
 39 | 
 40 | \begin{tabular}{c c}
 41 | red lemon & uncommon \\
 42 | life & common
 43 | \end{tabular}
 44 | 
 45 | \end{document}
 46 | """)
 47 | ```
 48 | 
 49 | With the soupified $\LaTeX$, you can now search and traverse the document tree.
 50 | The code below demonstrates the basic functions that TexSoup provides.
 51 | 
 52 | ```python
 53 | >>> soup.section  # grabs the first `section`
 54 | \section{Hello \textit{world}.}
 55 | >>> soup.section.name
 56 | 'section'
 57 | >>> soup.section.string
 58 | 'Hello \\textit{world}.'
 59 | >>> soup.section.parent.name
 60 | 'document'
 61 | >>> soup.tabular
 62 | \begin{tabular}{c c}
 63 | red lemon & uncommon \\
 64 | life & common
 65 | \end{tabular}
 66 | >>> soup.tabular.args[0]
 67 | 'c c'
 68 | >>> soup.item
 69 | \item red lemon
 70 | >>> list(soup.find_all('item'))
 71 | [\item red lemon, \item life]
 72 | ```
 73 | 
 74 | For more use cases, see [the Quickstart Guide](https://texsoup.alvinwan.com/docs/quickstart.html). Or, try TexSoup [online, via pytwiddle &rarr;](https://pytwiddle.com/?id=example:latex.py)
 75 | 
 76 | Links:
 77 | 
 78 | - [Quickstart Guide: how and when to use TexSoup](http://texsoup.alvinwan.com/docs/quickstart.html)
 79 | - [Example Use Cases: counting references, resolving imports, and more](https://github.com/alvinwan/TexSoup/tree/master/examples)
 80 | 
 81 | # Installation
 82 | 
 83 | ## Pip
 84 | 
 85 | TexSoup is published via PyPi, so you can install it via `pip`. The package
 86 | name is `TexSoup`:
 87 | 
 88 | ```bash
 89 | $ pip install texsoup
 90 | ```
 91 | 
 92 | ## From source
 93 | 
 94 | Alternatively, you can install the package from source:
 95 | 
 96 | ```bash
 97 | $ git clone https://github.com/alvinwan/TexSoup.git
 98 | $ cd TexSoup
 99 | $ pip install .
100 | ```
101 | 


--------------------------------------------------------------------------------
/TexSoup/__init__.py:
--------------------------------------------------------------------------------
 1 | """TexSoup's main utility is the ``TexSoup`` function.
 2 | 
 3 | Invoke this function on a LaTeX string or file handler to obtain a parse
 4 | tree with navigation, search, and modification utilities.
 5 | """
 6 | 
 7 | from TexSoup.tex import read
 8 | from TexSoup.data import TexNode
 9 | 
10 | __version__ = '0.3.1'
11 | 
12 | 
13 | # noinspection PyPep8Naming
14 | def TexSoup(tex_code, skip_envs=(), tolerance=0):
15 |     r"""
16 |     At a high-level, parses provided Tex into a navigable, searchable
17 |     structure. This is accomplished in two steps:
18 | 
19 |     1. Tex is parsed, cleaned, and packaged.
20 |     2. Structure fed to TexNodes for a searchable, coder-friendly interface.
21 | 
22 |     :param Union[str,iterable] tex_code: the Tex source
23 |     :param Union[str] skip_envs: names of environments to skip parsing
24 |     :param int tolerance: error tolerance level (only supports 0 or 1)
25 |     :return: :class:`TexSoup.data.TexNode` object representing tex document
26 | 
27 |     >>> from TexSoup import TexSoup
28 |     >>> soup = TexSoup(r'''
29 |     ... \begin{document}
30 |     ...
31 |     ... \section{Hello \textit{world}.}
32 |     ...
33 |     ... \subsection{Watermelon}
34 |     ...
35 |     ... (n.) A sacred fruit. Also known as:
36 |     ...
37 |     ... \begin{itemize}
38 |     ... \item red lemon
39 |     ... \item life
40 |     ... \end{itemize}
41 |     ...
42 |     ... Here is the prevalence of each synonym.
43 |     ...
44 |     ... \begin{tabular}{c c}
45 |     ... red lemon & uncommon \\ \n
46 |     ... life & common
47 |     ... \end{tabular}
48 |     ...
49 |     ... \end{document}
50 |     ... ''')
51 |     >>> soup.section
52 |     \section{Hello \textit{world}.}
53 |     >>> soup.section.name
54 |     'section'
55 |     >>> soup.section.string
56 |     'Hello \\textit{world}.'
57 |     >>> soup.section.parent.name
58 |     'document'
59 |     >>> soup.tabular
60 |     \begin{tabular}{c c}
61 |     red lemon & uncommon \\ \n
62 |     life & common
63 |     \end{tabular}
64 |     >>> soup.tabular.args[0].string
65 |     'c c'
66 |     >>> soup.itemize
67 |     \begin{itemize}
68 |     \item red lemon
69 |     \item life
70 |     \end{itemize}
71 |     >>> soup.item
72 |     \item red lemon
73 |     <BLANKLINE>
74 |     >>> list(soup.find_all('item'))
75 |     [\item red lemon
76 |     , \item life
77 |     ]
78 |     >>> soup = TexSoup(r'''\textbf{'Hello'}\textit{'Y'}O\textit{'U'}''')
79 |     >>> soup.textbf.delete()
80 |     >>> 'Hello' not in repr(soup)
81 |     True
82 |     >>> soup.textit.replace_with('S')
83 |     >>> soup.textit.replace_with('U', 'P')
84 |     >>> soup
85 |     SOUP
86 |     """
87 |     parsed, src = read(tex_code, skip_envs=skip_envs, tolerance=tolerance)
88 |     return TexNode(parsed, src=src)
89 | 


--------------------------------------------------------------------------------
/TexSoup/category.py:
--------------------------------------------------------------------------------
 1 | """Categorize all characters into one of category codes."""
 2 | 
 3 | from TexSoup.utils import CC, Token, to_buffer
 4 | import string
 5 | 
 6 | 
 7 | # Core category codes
 8 | # https://www.overleaf.com/learn/latex/Table_of_TeX_category_codes
 9 | others = set(string.printable) - set(string.ascii_letters) - \
10 |     set('{}\\$&\n\r#^_~%\x00\x7d \t[]()')
11 | CATEGORY_CODES = {
12 |     CC.Escape:      '\\',
13 |     CC.GroupBegin:  '{',
14 |     CC.GroupEnd:    '}',
15 |     CC.MathSwitch:  '$',
16 |     CC.Alignment:   '&',  # not used
17 |     CC.EndOfLine:   ('\n', '\r'),
18 |     CC.Macro:       '#',  # not used
19 |     CC.Superscript: '^',  # not used
20 |     CC.Subscript:   '_',  # not used
21 |     CC.Ignored:     chr(0),
22 |     CC.Spacer:      (chr(32), chr(9)),
23 |     CC.Letter:      tuple(string.ascii_letters),  # + lots of unicode
24 |     CC.Other:       tuple(others),
25 |     CC.Active:      '~',  # not used
26 |     CC.Comment:     '%',
27 |     CC.Invalid:      chr(127),
28 | 
29 |     # custom
30 |     CC.BracketBegin: '[',
31 |     CC.BracketEnd:  ']',
32 |     CC.ParenBegin:  '(',
33 |     CC.ParenEnd:    ')'
34 | }
35 | 
36 | 
37 | @to_buffer()
38 | def categorize(text):
39 |     r"""Generator for category code tokens on text, ignoring comments.
40 | 
41 |     :param Union[str,iterator,Buffer] text: LaTeX to process
42 | 
43 |     >>> chars = list(categorize(r'\bf{}%[ello+😂'))
44 |     >>> chars[0].category
45 |     <CategoryCodes.Escape: 1>
46 |     >>> chars[1].category
47 |     <CategoryCodes.Letter: 12>
48 |     >>> chars[3].category
49 |     <CategoryCodes.GroupBegin: 2>
50 |     >>> chars[4].category
51 |     <CategoryCodes.GroupEnd: 3>
52 |     >>> chars[5].category
53 |     <CategoryCodes.Comment: 15>
54 |     >>> chars[6].category
55 |     <CategoryCodes.BracketBegin: 19>
56 |     >>> chars[-2].category
57 |     <CategoryCodes.Other: 13>
58 |     >>> chars[-1].category
59 |     <CategoryCodes.Other: 13>
60 |     >>> print(*chars)
61 |     \ b f { } % [ e l l o + 😂
62 |     >>> next(categorize(r'''
63 |     ... ''')).category
64 |     <CategoryCodes.EndOfLine: 6>
65 |     """
66 |     for position, char in enumerate(text):
67 | 
68 |         value = None
69 |         for cc, values in CATEGORY_CODES.items():
70 |             if char in values:
71 |                 value = char
72 |                 break
73 | 
74 |         if value is None:
75 |             yield Token(char, position, CC.Other)
76 |         else:
77 |             yield Token(char, position, cc)
78 | 


--------------------------------------------------------------------------------
/TexSoup/reader.py:
--------------------------------------------------------------------------------
  1 | """Parsing mechanisms should not be directly invoked publicly, as they are
  2 | subject to change."""
  3 | 
  4 | from TexSoup.utils import Token, Buffer, MixedBuffer, CharToLineOffset
  5 | from TexSoup.data import *
  6 | from TexSoup.data import arg_type
  7 | from TexSoup.tokens import (
  8 |     TC,
  9 |     tokenize,
 10 |     SKIP_ENV_NAMES,
 11 |     MATH_ENV_NAMES,
 12 |     SPECIAL_COMMANDS,
 13 | )
 14 | import functools
 15 | import string
 16 | import sys
 17 | 
 18 | 
 19 | MODE_MATH = 'mode:math'
 20 | MODE_NON_MATH = 'mode:non-math'
 21 | MODE_SPECIAL = 'mode:special'
 22 | MATH_SIMPLE_ENVS = (
 23 |     TexDisplayMathModeEnv,
 24 |     TexMathModeEnv,
 25 |     TexDisplayMathEnv,
 26 |     TexMathEnv
 27 | )
 28 | MATH_TOKEN_TO_ENV = {env.token_begin: env for env in MATH_SIMPLE_ENVS}
 29 | ARG_BEGIN_TO_ENV = {arg.token_begin: arg for arg in arg_type}
 30 | 
 31 | SIGNATURES = {
 32 |     'def': (2, 0),
 33 |     'textbf': (1, 0),
 34 |     'section': (1, 1),
 35 |     'label': (1, 0),
 36 |     'cap': (0, 0),
 37 |     'cup': (0, 0),
 38 |     'in': (0, 0),
 39 |     'notin': (0, 0),
 40 |     'infty': (0, 0),
 41 |     'noindent': (0, 0),
 42 | }
 43 | 
 44 | 
 45 | __all__ = ['read_expr', 'read_tex']
 46 | 
 47 | 
 48 | def read_tex(buf, skip_envs=(), tolerance=0):
 49 |     r"""Parse all expressions in buffer
 50 | 
 51 |     :param Buffer buf: a buffer of tokens
 52 |     :param Tuple[str] skip_envs: environments to skip parsing
 53 |     :param int tolerance: error tolerance level (only supports 0 or 1)
 54 |     :return: iterable over parsed expressions
 55 |     :rtype: Iterable[TexExpr]
 56 |     """
 57 |     while buf.hasNext():
 58 |         yield read_expr(buf,
 59 |                         skip_envs=SKIP_ENV_NAMES + skip_envs,
 60 |                         tolerance=tolerance)
 61 | 
 62 | 
 63 | def make_read_peek(f):
 64 |     r"""Make any reader into a peek function.
 65 | 
 66 |     The wrapped function still parses the next sequence of tokens in the
 67 |     buffer but rolls back the buffer position afterwards.
 68 | 
 69 |     >>> from TexSoup.category import categorize
 70 |     >>> from TexSoup.tokens import tokenize
 71 |     >>> def read(buf):
 72 |     ...     buf.forward(3)
 73 |     >>> buf = Buffer(tokenize(categorize(r'\item testing \textbf{hah}')))
 74 |     >>> buf.position
 75 |     0
 76 |     >>> make_read_peek(read)(buf)
 77 |     >>> buf.position
 78 |     0
 79 |     """
 80 |     @functools.wraps(f)
 81 |     def wrapper(buf, *args, **kwargs):
 82 |         start = buf.position
 83 |         ret = f(buf, *args, **kwargs)
 84 |         buf.backward(buf.position - start)
 85 |         return ret
 86 |     return wrapper
 87 | 
 88 | 
 89 | def read_expr(src, skip_envs=(), tolerance=0, mode=MODE_NON_MATH):
 90 |     r"""Read next expression from buffer
 91 | 
 92 |     :param Buffer src: a buffer of tokens
 93 |     :param Tuple[str] skip_envs: environments to skip parsing
 94 |     :param int tolerance: error tolerance level (only supports 0 or 1)
 95 |     :param str mode: math or not math mode
 96 |     :return: parsed expression
 97 |     :rtype: [TexExpr, Token]
 98 |     """
 99 |     c = next(src)
100 |     if c.category in MATH_TOKEN_TO_ENV.keys():
101 |         expr = MATH_TOKEN_TO_ENV[c.category]([], position=c.position)
102 |         return read_math_env(src, expr, tolerance=tolerance)
103 |     elif c.category == TC.Escape:
104 |         name, args = read_command(src, tolerance=tolerance, mode=mode)
105 |         if name == 'item':
106 |             assert mode != MODE_MATH, r'Command \item invalid in math mode.'
107 |             contents = read_item(src)
108 |             expr = TexCmd(name, contents, args, position=c.position)
109 |         # if we are in "special" mode, we do not attempt to match the `\begin`
110 |         # and `\end`
111 |         elif name == 'begin' and mode != MODE_SPECIAL:
112 |             assert args, 'Begin command must be followed by an env name.'
113 |             expr = TexNamedEnv(
114 |                 args[0].string, args=args[1:], position=c.position)
115 |             if expr.name in MATH_ENV_NAMES:
116 |                 mode = MODE_MATH
117 |             if expr.name in skip_envs:
118 |                 read_skip_env(src, expr)
119 |             else:
120 |                 read_env(src, expr, skip_envs=skip_envs,tolerance=tolerance, mode=mode)
121 |         else:
122 |             expr = TexCmd(name, args=args, position=c.position)
123 |         return expr
124 |     if c.category == TC.GroupBegin:
125 |         return read_arg(src, c, tolerance=tolerance)
126 | 
127 |     assert isinstance(c, Token)
128 |     return TexText(c)
129 | 
130 | 
131 | ################
132 | # ENVIRONMENTS #
133 | ################
134 | 
135 | 
136 | def read_item(src, tolerance=0):
137 |     r"""Read the item content. Assumes escape has just been parsed.
138 | 
139 |     There can be any number of whitespace characters between \item and the
140 |     first non-whitespace character. Any amount of whitespace between subsequent
141 |     characters is also allowed.
142 | 
143 |     \item can also take an argument.
144 | 
145 |     :param Buffer src: a buffer of tokens
146 |     :param int tolerance: error tolerance level (only supports 0 or 1)
147 |     :return: contents of the item and any item arguments
148 | 
149 |     >>> from TexSoup.category import categorize
150 |     >>> from TexSoup.tokens import tokenize
151 |     >>> def read_item_from(string, skip=2):
152 |     ...     buf = tokenize(categorize(string))
153 |     ...     _ = buf.forward(skip)
154 |     ...     return read_item(buf)
155 |     >>> read_item_from(r'\item aaa {bbb} ccc\end{itemize}')
156 |     [' aaa ', BraceGroup('bbb'), ' ccc']
157 |     >>> read_item_from(r'\item aaa \textbf{itemize}\item no')
158 |     [' aaa ', TexCmd('textbf', [BraceGroup('itemize')])]
159 |     >>> read_item_from(r'\item WITCH [nuuu] DOCTORRRR 👩🏻‍⚕️')
160 |     [' WITCH ', '[', 'nuuu', ']', ' DOCTORRRR 👩🏻‍⚕️']
161 |     >>> read_item_from(r'''\begin{itemize}
162 |     ... \item
163 |     ... \item first item
164 |     ... \end{itemize}''', skip=8)
165 |     ['\n']
166 |     >>> read_item_from(r'''\def\itemeqn{\item}''', skip=7)
167 |     []
168 |     """
169 |     extras = []
170 | 
171 |     while src.hasNext():
172 |         if src.peek().category == TC.Escape:
173 |             cmd_name, _ = make_read_peek(read_command)(
174 |                 src, 1, skip=1, tolerance=tolerance)
175 |             if cmd_name in ('end', 'item'):
176 |                 return extras
177 |         elif src.peek().category == TC.GroupEnd:
178 |             break
179 |         extras.append(read_expr(src, tolerance=tolerance))
180 |     return extras
181 | 
182 | 
183 | def unclosed_env_handler(src, expr, end):
184 |     """Handle unclosed environments.
185 | 
186 |     Currently raises an end-of-file error. In the future, this can be the hub
187 |     for unclosed-environment fault tolerance.
188 | 
189 |     :param Buffer src: a buffer of tokens
190 |     :param TexExpr expr: expression for the environment
191 |     :param int tolerance: error tolerance level (only supports 0 or 1)
192 |     :param end str: Actual end token (as opposed to expected)
193 |     """
194 |     clo = CharToLineOffset(str(src))
195 |     explanation = 'Instead got %s' % end if end else 'Reached end of file.'
196 |     line, offset = clo(src.position)
197 |     raise EOFError('[Line: %d, Offset: %d] "%s" env expecting %s. %s' % (
198 |         line, offset, expr.name, expr.end, explanation))
199 | 
200 | 
201 | def read_math_env(src, expr, tolerance=0):
202 |     r"""Read the environment from buffer.
203 | 
204 |     Advances the buffer until right after the end of the environment. Adds
205 |     parsed content to the expression automatically.
206 | 
207 |     :param Buffer src: a buffer of tokens
208 |     :param TexExpr expr: expression for the environment
209 |     :rtype: TexExpr
210 | 
211 |     >>> from TexSoup.category import categorize
212 |     >>> from TexSoup.tokens import tokenize
213 |     >>> buf = tokenize(categorize(r'\min_x \|Xw-y\|_2^2'))
214 |     >>> read_math_env(buf, TexMathModeEnv())
215 |     Traceback (most recent call last):
216 |         ...
217 |     EOFError: [Line: 0, Offset: 7] "$" env expecting $. Reached end of file.
218 |     """
219 |     contents = []
220 |     while src.hasNext() and src.peek().category != expr.token_end:
221 |         contents.append(read_expr(src, tolerance=tolerance, mode=MODE_MATH))
222 |     if not src.hasNext() or src.peek().category != expr.token_end:
223 |         unclosed_env_handler(src, expr, src.peek())
224 |     next(src)
225 |     expr.append(*contents)
226 |     return expr
227 | 
228 | 
229 | def read_skip_env(src, expr):
230 |     r"""Read the environment from buffer, WITHOUT parsing contents
231 | 
232 |     Advances the buffer until right after the end of the environment. Adds
233 |     UNparsed content to the expression automatically.
234 | 
235 |     :param Buffer src: a buffer of tokens
236 |     :param TexExpr expr: expression for the environment
237 |     :rtype: TexExpr
238 | 
239 |     >>> from TexSoup.category import categorize
240 |     >>> from TexSoup.tokens import tokenize
241 |     >>> buf = tokenize(categorize(r' \textbf{aa \end{foobar}ha'))
242 |     >>> read_skip_env(buf, TexNamedEnv('foobar'))
243 |     TexNamedEnv('foobar', [' \\textbf{aa '], [])
244 |     >>> buf = tokenize(categorize(r' \textbf{aa ha'))
245 |     >>> read_skip_env(buf, TexNamedEnv('foobar'))  #doctest:+ELLIPSIS
246 |     Traceback (most recent call last):
247 |         ...
248 |     EOFError: ...
249 |     """
250 |     def condition(s): return s.startswith('\\end{%s}' % expr.name)
251 |     contents = [src.forward_until(condition, peek=False)]
252 |     if not src.startswith('\\end{%s}' % expr.name):
253 |         unclosed_env_handler(src, expr, src.peek((0, 6)))
254 |     src.forward(5)
255 |     expr.append(*contents)
256 |     return expr
257 | 
258 | 
259 | def read_env(src, expr, skip_envs=(), tolerance=0, mode=MODE_NON_MATH):
260 |     r"""Read the environment from buffer.
261 | 
262 |     Advances the buffer until right after the end of the environment. Adds
263 |     parsed content to the expression automatically.
264 | 
265 |     :param Buffer src: a buffer of tokens
266 |     :param TexExpr expr: expression for the environment
267 |     :param int tolerance: error tolerance level (only supports 0 or 1)
268 |     :param str mode: math or not math mode
269 |     :rtype: TexExpr
270 | 
271 |     >>> from TexSoup.category import categorize
272 |     >>> from TexSoup.tokens import tokenize
273 |     >>> buf = tokenize(categorize(' tingtang \\end\n{foobar}walla'))
274 |     >>> read_env(buf, TexNamedEnv('foobar'))
275 |     TexNamedEnv('foobar', [' tingtang '], [])
276 |     >>> buf = tokenize(categorize(' tingtang \\end\n\n{foobar}walla'))
277 |     >>> read_env(buf, TexNamedEnv('foobar')) #doctest: +ELLIPSIS
278 |     Traceback (most recent call last):
279 |         ...
280 |     EOFError: [Line: 0, Offset: 1] ...
281 |     >>> buf = tokenize(categorize(' tingtang \\end\n\n{nope}walla'))
282 |     >>> read_env(buf, TexNamedEnv('foobar'), tolerance=1)  # error tolerance
283 |     TexNamedEnv('foobar', [' tingtang '], [])
284 |     """
285 |     contents = []
286 |     while src.hasNext():
287 |         if src.peek().category == TC.Escape:
288 |             name, args = make_read_peek(read_command)(
289 |                 src, skip=1, tolerance=tolerance, mode=mode)
290 |             if name == 'end':
291 |                 break
292 |         contents.append(read_expr(src, skip_envs=skip_envs, tolerance=tolerance, mode=mode))
293 |     error = not src.hasNext() or not args or args[0].string != expr.name
294 |     if error and tolerance == 0:
295 |         unclosed_env_handler(src, expr, src.peek((0, 6)))
296 |     elif not error:
297 |         src.forward(5)
298 |     expr.append(*contents)
299 |     return expr
300 | 
301 | 
302 | ############
303 | # COMMANDS #
304 | ############
305 | 
306 | 
307 | # TODO: handle macro-weirdness e.g., \def\blah[#1][[[[[[[[#2{"#1 . #2"}
308 | # TODO: add newcommand macro
309 | def read_args(src, n_required=-1, n_optional=-1, args=None, tolerance=0,
310 |         mode=MODE_NON_MATH):
311 |     r"""Read all arguments from buffer.
312 | 
313 |     This function assumes that the command name has already been parsed. By
314 |     default, LaTeX allows only up to 9 arguments of both types, optional
315 |     and required. If `n_optional` is not set, all valid bracket groups are
316 |     captured. If `n_required` is not set, all valid brace groups are
317 |     captured.
318 | 
319 |     :param Buffer src: a buffer of tokens
320 |     :param TexArgs args: existing arguments to extend
321 |     :param int n_required: Number of required arguments. If < 0, all valid
322 |                            brace groups will be captured.
323 |     :param int n_optional: Number of optional arguments. If < 0, all valid
324 |                            bracket groups will be captured.
325 |     :param int tolerance: error tolerance level (only supports 0 or 1)
326 |     :param str mode: math or not math mode
327 |     :return: parsed arguments
328 |     :rtype: TexArgs
329 | 
330 |     >>> from TexSoup.category import categorize
331 |     >>> from TexSoup.tokens import tokenize
332 |     >>> test = lambda s, *a, **k: read_args(tokenize(categorize(s)), *a, **k)
333 |     >>> test('[walla]{walla}{ba]ng}')  # 'regular' arg parse
334 |     [BracketGroup('walla'), BraceGroup('walla'), BraceGroup('ba', ']', 'ng')]
335 |     >>> test('\t[wa]\n{lla}\n\n{b[ing}')  # interspersed spacers + 2 newlines
336 |     [BracketGroup('wa'), BraceGroup('lla')]
337 |     >>> test('\t[\t{a]}bs', 2, 0)  # use char as arg, since no opt args
338 |     [BraceGroup('['), BraceGroup('a', ']')]
339 |     >>> test('\n[hue]\t[\t{a]}', 2, 1)  # check stop opt arg capture
340 |     [BracketGroup('hue'), BraceGroup('['), BraceGroup('a', ']')]
341 |     >>> test('\t\\item')
342 |     []
343 |     >>> test('   \t    \n\t \n{bingbang}')
344 |     []
345 |     >>> test('[tempt]{ing}[WITCH]{doctorrrr}', 0, 0)
346 |     []
347 |     """
348 |     args = args or TexArgs()
349 |     if n_required == 0 and n_optional == 0:
350 |         return args
351 | 
352 |     n_optional = read_arg_optional(src, args, n_optional, tolerance, mode)
353 |     n_required = read_arg_required(src, args, n_required, tolerance, mode)
354 | 
355 |     if src.hasNext() and src.peek().category == TC.BracketBegin:
356 |         n_optional = read_arg_optional(src, args, n_optional, tolerance, mode)
357 |     if src.hasNext() and src.peek().category == TC.GroupBegin:
358 |         n_required = read_arg_required(src, args, n_required, tolerance, mode)
359 |     return args
360 | 
361 | 
362 | def read_arg_optional(
363 |         src, args, n_optional=-1, tolerance=0, mode=MODE_NON_MATH):
364 |     """Read next optional argument from buffer.
365 | 
366 |     If the command has remaining optional arguments, look for:
367 | 
368 |        a. A spacer. Skip the spacer if it exists.
369 |        b. A bracket delimiter. If the optional argument is bracket-delimited,
370 |           the contents of the bracket group are used as the argument.
371 | 
372 |     :param Buffer src: a buffer of tokens
373 |     :param TexArgs args: existing arguments to extend
374 |     :param int n_optional: Number of optional arguments. If < 0, all valid
375 |                            bracket groups will be captured.
376 |     :param int tolerance: error tolerance level (only supports 0 or 1)
377 |     :param str mode: math or not math mode
378 |     :return: number of remaining optional arguments
379 |     :rtype: int
380 |     """
381 |     while n_optional != 0:
382 |         spacer = read_spacer(src)
383 |         if not (src.hasNext() and src.peek().category == TC.BracketBegin):
384 |             if spacer:
385 |                 src.backward(1)
386 |             break
387 |         args.append(read_arg(src, next(src), tolerance=tolerance, mode=mode))
388 |         n_optional -= 1
389 |     return n_optional
390 | 
391 | 
392 | def read_arg_required(
393 |         src, args, n_required=-1, tolerance=0, mode=MODE_NON_MATH):
394 |     r"""Read next required argument from buffer.
395 | 
396 |     If the command has remaining required arguments, look for:
397 | 
398 |        a. A spacer. Skip the spacer if it exists.
399 |        b. A curly-brace delimiter. If the required argument is brace-delimited,
400 |           the contents of the brace group are used as the argument.
401 |        c. Spacer or not, if a brace group is not found, simply use the next
402 |           character, unless it is a backslash, in which case use the full command name
403 | 
404 |     :param Buffer src: a buffer of tokens
405 |     :param TexArgs args: existing arguments to extend
406 |     :param int n_required: Number of required arguments. If < 0, all valid
407 |                            brace groups will be captured.
408 |     :param int tolerance: error tolerance level (only supports 0 or 1)
409 |     :param str mode: math or not math mode
410 |     :return: number of remaining optional arguments
411 |     :rtype: int
412 | 
413 |     >>> from TexSoup.category import categorize
414 |     >>> from TexSoup.tokens import tokenize
415 |     >>> buf = tokenize(categorize('{wal]la}\n{ba ng}\n'))
416 |     >>> args = TexArgs()
417 |     >>> read_arg_required(buf, args)  # 'regular' arg parse
418 |     -3
419 |     >>> args
420 |     [BraceGroup('wal', ']', 'la'), BraceGroup('ba ng')]
421 |     >>> buf.hasNext() and buf.peek().category == TC.MergedSpacer
422 |     True
423 |     """
424 |     while n_required != 0 and src.hasNext():
425 |         spacer = read_spacer(src)
426 | 
427 |         if src.hasNext() and src.peek().category == TC.GroupBegin:
428 |             args.append(read_arg(
429 |                 src, next(src), tolerance=tolerance, mode=mode))
430 |             n_required -= 1
431 |             continue
432 |         elif src.hasNext() and n_required > 0:
433 |             next_token = next(src)
434 |             if next_token.category == TC.Escape:
435 |                 name, _ = read_command(src, 0, 0, tolerance=tolerance, mode=mode)
436 |                 args.append(TexCmd(name, position=next_token.position))
437 |             else:
438 |                 args.append('{%s}' % next_token)
439 |             n_required -= 1
440 |             continue
441 | 
442 |         if spacer:
443 |             src.backward(1)
444 |         break
445 |     return n_required
446 | 
447 | 
448 | def read_arg(src, c, tolerance=0, mode=MODE_NON_MATH):
449 |     r"""Read the argument from buffer.
450 | 
451 |     Advances buffer until right before the end of the argument.
452 | 
453 |     :param Buffer src: a buffer of tokens
454 |     :param str c: argument token (starting token)
455 |     :param int tolerance: error tolerance level (only supports 0 or 1)
456 |     :param str mode: math or not math mode
457 |     :return: the parsed argument
458 |     :rtype: TexGroup
459 | 
460 |     >>> from TexSoup.category import categorize
461 |     >>> from TexSoup.tokens import tokenize
462 |     >>> s = r'''{\item\abovedisplayskip=2pt\abovedisplayshortskip=0pt~\vspace*{-\baselineskip}}'''
463 |     >>> buf = tokenize(categorize(s))
464 |     >>> read_arg(buf, next(buf))
465 |     BraceGroup(TexCmd('item'))
466 |     >>> buf = tokenize(categorize(r'{\incomplete! [complete]'))
467 |     >>> read_arg(buf, next(buf), tolerance=1)
468 |     BraceGroup(TexCmd('incomplete'), '! ', '[', 'complete', ']')
469 |     """
470 |     content = [c]
471 |     arg = ARG_BEGIN_TO_ENV[c.category]
472 |     while src.hasNext():
473 |         if src.peek().category == arg.token_end:
474 |             src.forward()
475 |             return arg(*content[1:], position=c.position)
476 |         else:
477 |             content.append(read_expr(src, tolerance=tolerance, mode=mode))
478 | 
479 |     if tolerance == 0:
480 |         clo = CharToLineOffset(str(src))
481 |         line, offset = clo(c.position)
482 |         raise TypeError(
483 |             '[Line: %d, Offset %d] Malformed argument. First and last elements '
484 |             'must match a valid argument format. In this case, TexSoup'
485 |             ' could not find matching punctuation for: %s.\n'
486 |             'Just finished parsing: %s' %
487 |             (line, offset, c, content))
488 |     return arg(*content[1:], position=c.position)
489 | 
490 | 
491 | def read_spacer(buf):
492 |     r"""Extracts the next spacer, if there is one, before non-whitespace
493 | 
494 |     Define a spacer to be a contiguous string of only whitespace, with at most
495 |     one line break.
496 | 
497 |     >>> from TexSoup.category import categorize
498 |     >>> from TexSoup.tokens import tokenize
499 |     >>> read_spacer(Buffer(tokenize(categorize('   \t    \n'))))
500 |     '   \t    \n'
501 |     >>> read_spacer(Buffer(tokenize(categorize('   \t    \n\t \n  \t\n'))))
502 |     '   \t    \n\t '
503 |     >>> read_spacer(Buffer(tokenize(categorize('{'))))
504 |     ''
505 |     >>> read_spacer(Buffer(tokenize(categorize('   \t    \na'))))
506 |     ''
507 |     >>> read_spacer(Buffer(tokenize(categorize('   \t    \n\t \n  \t\na'))))
508 |     '   \t    \n\t '
509 |     """
510 |     if buf.hasNext() and buf.peek().category == TC.MergedSpacer:
511 |         return next(buf)
512 |     return ''
513 | 
514 | 
515 | def read_command(buf, n_required_args=-1, n_optional_args=-1, skip=0,
516 |                  tolerance=0, mode=MODE_NON_MATH):
517 |     r"""Parses command and all arguments. Assumes escape has just been parsed.
518 | 
519 |     No whitespace is allowed between escape and command name. e.g.,
520 |     :code:`\ textbf` is a backslash command, then text :code:`textbf`. Only
521 |     :code:`\textbf` is the bold command.
522 | 
523 |     >>> from TexSoup.category import categorize
524 |     >>> from TexSoup.tokens import tokenize
525 |     >>> buf = Buffer(tokenize(categorize('\\sect  \t    \n\t{wallawalla}')))
526 |     >>> next(buf)
527 |     '\\'
528 |     >>> read_command(buf)
529 |     ('sect', [BraceGroup('wallawalla')])
530 |     >>> buf = Buffer(tokenize(categorize('\\sect  \t   \n\t \n{bingbang}')))
531 |     >>> _ = next(buf)
532 |     >>> read_command(buf)
533 |     ('sect', [])
534 |     >>> buf = Buffer(tokenize(categorize('\\sect{ooheeeee}')))
535 |     >>> _ = next(buf)
536 |     >>> read_command(buf)
537 |     ('sect', [BraceGroup('ooheeeee')])
538 |     >>> buf = Buffer(tokenize(categorize(r'\item aaa {bbb} ccc\end{itemize}')))
539 |     >>> read_command(buf, skip=1)
540 |     ('item', [])
541 |     >>> buf.peek()
542 |     ' aaa '
543 | 
544 |     # >>> buf = Buffer(tokenize(categorize('\\sect abcd')))
545 |     # >>> _ = next(buf)
546 |     # >>> read_command(buf)
547 |     # ('sect', ('a',))
548 |     """
549 |     for _ in range(skip):
550 |         next(buf)
551 | 
552 |     name = next(buf)
553 |     # if the command is a special one (like `newcommand`), enter "special"
554 |     # mode, in which a single `\begin` or `\end` are allowed
555 |     if name.text in SPECIAL_COMMANDS:
556 |         mode = MODE_SPECIAL
557 |     token = Token('', buf.position)
558 |     if n_required_args < 0 and n_optional_args < 0:
559 |         n_required_args, n_optional_args = SIGNATURES.get(name, (-1, -1))
560 |     args = read_args(buf, n_required_args, n_optional_args,
561 |                      tolerance=tolerance, mode=mode)
562 |     # after parsing the command, go back to normal mode
563 |     if name.text in SPECIAL_COMMANDS:
564 |         mode = MODE_NON_MATH
565 |     return name, args
566 | 


--------------------------------------------------------------------------------
/TexSoup/tex.py:
--------------------------------------------------------------------------------
 1 | from TexSoup.reader import read_expr, read_tex
 2 | from TexSoup.data import *
 3 | from TexSoup.utils import *
 4 | from TexSoup.tokens import tokenize
 5 | from TexSoup.category import categorize
 6 | import itertools
 7 | 
 8 | 
 9 | def read(tex, skip_envs=(), tolerance=0):
10 |     """Read and parse all LaTeX source.
11 | 
12 |     :param Union[str,iterable] tex: LaTeX source
13 |     :param Union[str] skip_envs: names of environments to skip parsing
14 |     :param int tolerance: error tolerance level (only supports 0 or 1)
15 |     :return TexEnv: the global environment
16 |     """
17 |     if not isinstance(tex, str):
18 |         tex = ''.join(itertools.chain(*tex))
19 |     buf = categorize(tex)
20 |     buf = tokenize(buf)
21 |     buf = read_tex(buf, skip_envs=skip_envs, tolerance=tolerance)
22 |     return TexEnv('[tex]', begin='', end='', contents=buf), tex
23 | 


--------------------------------------------------------------------------------
/TexSoup/tokens.py:
--------------------------------------------------------------------------------
  1 | """Tokenization for all input.
  2 | 
  3 | Translates string into iterable `TexSoup.utils.Buffer`, yielding one
  4 | token at a time.
  5 | """
  6 | 
  7 | from TexSoup.utils import to_buffer, Buffer, Token, CC
  8 | from TexSoup.data import arg_type
  9 | from TexSoup.category import categorize  # used for tests
 10 | from TexSoup.utils import IntEnum, TC
 11 | import itertools
 12 | import string
 13 | 
 14 | # Custom higher-level combinations of primitives
 15 | SKIP_ENV_NAMES = ('lstlisting', 'verbatim', 'verbatimtab', 'Verbatim', 'listing')
 16 | MATH_ENV_NAMES = (
 17 |     'align', 'align*', 'alignat', 'array', 'displaymath', 'eqnarray',
 18 |     'eqnarray*', 'equation', 'equation*', 'flalign', 'flalign*', 'gather',
 19 |     'gather*', 'math', 'multline', 'multline*', 'split'
 20 | )
 21 | SPECIAL_COMMANDS = {'newcommand', 'renewcommand', 'providecommand'}
 22 | BRACKETS_DELIMITERS = {
 23 |     '(', ')', '<', '>', '[', ']', '{', '}', r'\{', r'\}', '.' '|', r'\langle',
 24 |     r'\rangle', r'\lfloor', r'\rfloor', r'\lceil', r'\rceil', r'\ulcorner',
 25 |     r'\urcorner', r'\lbrack', r'\rbrack'
 26 | }
 27 | # TODO: looks like left-right do have to match
 28 | SIZE_PREFIX = ('left', 'right', 'big', 'Big', 'bigg', 'Bigg')
 29 | PUNCTUATION_COMMANDS = {command + bracket
 30 |                         for command in SIZE_PREFIX
 31 |                         for bracket in BRACKETS_DELIMITERS.union({'|', '.'})}
 32 | 
 33 | __all__ = ['tokenize']
 34 | 
 35 | 
 36 | def next_token(text, prev=None):
 37 |     r"""Returns the next possible token, advancing the iterator to the next
 38 |     position to start processing from.
 39 | 
 40 |     :param Union[str,iterator,Buffer] text: LaTeX to process
 41 |     :return str: the token
 42 | 
 43 |     >>> b = categorize(r'\textbf{Do play\textit{nice}.}   $$\min_w \|w\|_2^2$$')
 44 |     >>> print(next_token(b), next_token(b), next_token(b), next_token(b))
 45 |     \ textbf { Do play
 46 |     >>> print(next_token(b), next_token(b), next_token(b), next_token(b))
 47 |     \ textit { nice
 48 |     >>> print(next_token(b))
 49 |     }
 50 |     >>> print(next_token(categorize('.}')))
 51 |     .
 52 |     >>> next_token(b)
 53 |     '.'
 54 |     >>> next_token(b)
 55 |     '}'
 56 |     """
 57 |     while text.hasNext():
 58 |         for name, f in tokenizers:
 59 |             current_token = f(text, prev=prev)
 60 |             if current_token is not None:
 61 |                 return current_token
 62 | 
 63 | 
 64 | @to_buffer()
 65 | def tokenize(text):
 66 |     r"""Generator for LaTeX tokens on text, ignoring comments.
 67 | 
 68 |     :param Union[str,iterator,Buffer] text: LaTeX to process
 69 | 
 70 |     >>> print(*tokenize(categorize(r'\\%}')))
 71 |     \\ %}
 72 |     >>> print(*tokenize(categorize(r'\textbf{hello \\%}')))
 73 |     \ textbf { hello  \\ %}
 74 |     >>> print(*tokenize(categorize(r'\textbf{Do play \textit{nice}.}')))
 75 |     \ textbf { Do play  \ textit { nice } . }
 76 |     >>> print(*tokenize(categorize(r'\begin{tabular} 0 & 1 \\ 2 & 0 \end{tabular}')))
 77 |     \ begin { tabular }  0 & 1  \\  2 & 0  \ end { tabular }
 78 |     """
 79 |     current_token = next_token(text)
 80 |     while current_token is not None:
 81 |         assert current_token.category in TC
 82 |         yield current_token
 83 |         current_token = next_token(text, prev=current_token)
 84 | 
 85 | 
 86 | ##############
 87 | # Tokenizers #
 88 | ##############
 89 | 
 90 | tokenizers = []
 91 | 
 92 | 
 93 | def token(name):
 94 |     """Marker for a token.
 95 | 
 96 |     :param str name: Name of tokenizer
 97 |     """
 98 | 
 99 |     def wrap(f):
100 |         tokenizers.append((name, f))
101 |         return f
102 | 
103 |     return wrap
104 | 
105 | 
106 | @token('escaped_symbols')
107 | def tokenize_escaped_symbols(text, prev=None):
108 |     r"""Process an escaped symbol or a known punctuation command.
109 | 
110 |     :param Buffer text: iterator over line, with current position
111 | 
112 |     >>> tokenize_escaped_symbols(categorize(r'\\'))
113 |     '\\\\'
114 |     >>> tokenize_escaped_symbols(categorize(r'\\%'))
115 |     '\\\\'
116 |     >>> tokenize_escaped_symbols(categorize(r'\}'))
117 |     '\\}'
118 |     >>> tokenize_escaped_symbols(categorize(r'\%'))
119 |     '\\%'
120 |     >>> tokenize_escaped_symbols(categorize(r'\ '))
121 |     '\\ '
122 |     """
123 |     if text.peek().category == CC.Escape \
124 |             and text.peek(1) \
125 |             and text.peek(1).category in (
126 |                 CC.Escape, CC.GroupBegin, CC.GroupEnd, CC.MathSwitch,
127 |                 CC.Alignment, CC.EndOfLine, CC.Macro, CC.Superscript,
128 |                 CC.Subscript, CC.Spacer, CC.Active, CC.Comment, CC.Other):
129 |         result = text.forward(2)
130 |         result.category = TC.EscapedComment
131 |         return result
132 | 
133 | 
134 | @token('comment')
135 | def tokenize_line_comment(text, prev=None):
136 |     r"""Process a line comment
137 | 
138 |     :param Buffer text: iterator over line, with current position
139 | 
140 |     >>> tokenize_line_comment(categorize('%hello world\\'))
141 |     '%hello world\\'
142 |     >>> tokenize_line_comment(categorize('hello %world'))
143 |     >>> tokenize_line_comment(categorize('%}hello world'))
144 |     '%}hello world'
145 |     >>> tokenize_line_comment(categorize('%}  '))
146 |     '%}  '
147 |     >>> tokenize_line_comment(categorize('%hello\n world'))
148 |     '%hello'
149 |     >>> b = categorize(r'\\%')
150 |     >>> _ = next(b), next(b)
151 |     >>> tokenize_line_comment(b)
152 |     '%'
153 |     >>> tokenize_line_comment(categorize(r'\%'))
154 |     """
155 |     result = Token('', text.position)
156 |     if text.peek().category == CC.Comment and (
157 |             prev is None or prev.category != CC.Comment):
158 |         result += text.forward(1)
159 |         while text.hasNext() and text.peek().category != CC.EndOfLine:
160 |             result += text.forward(1)
161 |         result.category = TC.Comment
162 |         return result
163 | 
164 | 
165 | @token('math_sym_switch')
166 | def tokenize_math_sym_switch(text, prev=None):
167 |     r"""Group characters in math switches.
168 | 
169 |     :param Buffer text: iterator over line, with current position
170 | 
171 |     >>> tokenize_math_sym_switch(categorize(r'$\min_x$ \command'))
172 |     '$'
173 |     >>> tokenize_math_sym_switch(categorize(r'$$\min_x$$ \command'))
174 |     '$$'
175 |     """
176 |     if text.peek().category == CC.MathSwitch:
177 |         if text.peek(1) and text.peek(1).category == CC.MathSwitch:
178 |             result = Token(text.forward(2), text.position)
179 |             result.category = TC.DisplayMathSwitch
180 |         else:
181 |             result = Token(text.forward(1), text.position)
182 |             result.category = TC.MathSwitch
183 |         return result
184 | 
185 | 
186 | @token('math_asym_switch')
187 | def tokenize_math_asym_switch(text, prev=None):
188 |     r"""Group characters in begin-end-style math switches
189 | 
190 |     :param Buffer text: iterator over line, with current position
191 | 
192 |     >>> tokenize_math_asym_switch(categorize(r'\[asf'))
193 |     '\\['
194 |     >>> tokenize_math_asym_switch(categorize(r'\] sdf'))
195 |     '\\]'
196 |     >>> tokenize_math_asym_switch(categorize(r'[]'))
197 |     """
198 |     mapping = {
199 |         (CC.Escape, CC.BracketBegin):   TC.DisplayMathGroupBegin,
200 |         (CC.Escape, CC.BracketEnd):     TC.DisplayMathGroupEnd,
201 |         (CC.Escape, CC.ParenBegin):     TC.MathGroupBegin,
202 |         (CC.Escape, CC.ParenEnd):       TC.MathGroupEnd
203 |     }
204 |     if not text.hasNext(2):
205 |         return
206 |     key = (text.peek().category, text.peek(1).category)
207 |     if key in mapping:
208 |         result = text.forward(2)
209 |         result.category = mapping[key]
210 |         return result
211 | 
212 | 
213 | @token('line_break')
214 | def tokenize_line_break(text, prev=None):
215 |     r"""Extract LaTeX line breaks.
216 | 
217 |     >>> tokenize_line_break(categorize(r'\\aaa'))
218 |     '\\\\'
219 |     >>> tokenize_line_break(categorize(r'\aaa'))
220 |     """
221 |     if text.peek().category == CC.Escape and text.peek(1) \
222 |             and text.peek(1).category == CC.Escape:
223 |         result = text.forward(2)
224 |         result.category = TC.LineBreak
225 |         return result
226 | 
227 | 
228 | @token('ignore')
229 | def tokenize_ignore(text, prev=None):
230 |     r"""Filter out ignored or invalid characters
231 | 
232 |     >>> print(*tokenize(categorize('\x00hello')))
233 |     hello
234 |     """
235 |     while text.peek().category in (CC.Ignored, CC.Invalid):
236 |         text.forward(1)
237 | 
238 | 
239 | @token('spacers')
240 | def tokenize_spacers(text, prev=None):
241 |     r"""Combine spacers [ + line break [ + spacer]]
242 | 
243 |     >>> tokenize_spacers(categorize('\t\n{there'))
244 |     '\t\n'
245 |     >>> tokenize_spacers(categorize('\t\nthere'))
246 |     >>> tokenize_spacers(categorize('      \t     '))
247 |     '      \t     '
248 |     >>> tokenize_spacers(categorize(r' ccc'))
249 |     """
250 |     result = Token('', text.position)
251 |     while text.hasNext() and text.peek().category == CC.Spacer:
252 |         result += text.forward(1)
253 |     if text.hasNext() and text.peek().category == CC.EndOfLine:
254 |         result += text.forward(1)
255 |     while text.hasNext() and text.peek().category == CC.Spacer:
256 |         result += text.forward(1)
257 |     result.category = TC.MergedSpacer
258 | 
259 |     if text.hasNext() and text.peek().category in (CC.Letter, CC.Other):
260 |         text.backward(text.position - result.position)
261 |         return
262 | 
263 |     if result:
264 |         return result
265 | 
266 | 
267 | @token('symbols')
268 | def tokenize_symbols(text, prev=None):
269 |     r"""Process singletone symbols as standalone tokens.
270 | 
271 |     :param Buffer text: iterator over line, with current position. Escape is
272 |                         isolated if not part of escaped char
273 | 
274 |     >>> next(tokenize(categorize(r'\begin turing')))
275 |     '\\'
276 |     >>> next(tokenize(categorize(r'\bf  {turing}')))
277 |     '\\'
278 |     >>> next(tokenize(categorize(r'{]}'))).category
279 |     <TokenCode.GroupBegin: 23>
280 |     """
281 |     mapping = {
282 |         CC.Escape:          TC.Escape,
283 |         CC.GroupBegin:      TC.GroupBegin,
284 |         CC.GroupEnd:        TC.GroupEnd,
285 |         CC.BracketBegin:     TC.BracketBegin,
286 |         CC.BracketEnd:    TC.BracketEnd
287 |     }
288 |     if text.peek().category in mapping.keys():
289 |         result = text.forward(1)
290 |         result.category = mapping[result.category]
291 |         return result
292 | 
293 | 
294 | # TODO: move me to parser (should parse punctuation as arg +
295 | # store punctuation commads as macro)
296 | @token('punctuation_command_name')
297 | def tokenize_punctuation_command_name(text, prev=None):
298 |     """Process command that augments or modifies punctuation.
299 | 
300 |     This is important to the tokenization of a string, as opening or closing
301 |     punctuation is not supposed to match.
302 | 
303 |     :param Buffer text: iterator over text, with current position
304 |     """
305 |     if text.peek(-1) and text.peek(-1).category == CC.Escape:
306 |         for point in PUNCTUATION_COMMANDS:
307 |             if text.peek((0, len(point))) == point:
308 |                 result = text.forward(len(point))
309 |                 result.category = TC.PunctuationCommandName
310 |                 return result
311 | 
312 | 
313 | @token('command_name')
314 | def tokenize_command_name(text, prev=None):
315 |     r"""Extract most restrictive subset possibility for command name.
316 | 
317 |     Parser can later join allowed spacers and macros to assemble the final
318 |     command name and arguments.
319 | 
320 |     >>> b = categorize(r'\bf{')
321 |     >>> _ = next(b)
322 |     >>> tokenize_command_name(b)
323 |     'bf'
324 |     >>> b = categorize(r'\bf,')
325 |     >>> _ = next(b)
326 |     >>> tokenize_command_name(b)
327 |     'bf'
328 |     >>> b = categorize(r'\bf*{')
329 |     >>> _ = next(b)
330 |     >>> tokenize_command_name(b)
331 |     'bf*'
332 |     """
333 |     if text.peek(-1) and text.peek(-1).category == CC.Escape \
334 |             and text.peek().category == CC.Letter:
335 |         c = text.forward(1)
336 |         while text.hasNext() and text.peek().category == CC.Letter \
337 |                 or text.peek() == '*':  # TODO: what do about asterisk?
338 |             # TODO: excluded other, macro, super, sub, acttive, alignment
339 |             # although macros can make these a part of the command name
340 |             c += text.forward(1)
341 |         c.category = TC.CommandName
342 |         return c
343 | 
344 | 
345 | @token('string')
346 | def tokenize_string(text, prev=None):
347 |     r"""Process a string of text
348 | 
349 |     :param Buffer text: iterator over line, with current position
350 |     :param Union[None,iterable,str] delimiters: defines the delimiters
351 | 
352 |     >>> tokenize_string(categorize('hello'))
353 |     'hello'
354 |     >>> b = categorize(r'hello again\command')
355 |     >>> tokenize_string(b)
356 |     'hello again'
357 |     >>> print(b.peek())
358 |     \
359 |     >>> print(tokenize_string(categorize(r'0 & 1\\\command')))
360 |     0 & 1
361 |     """
362 |     result = Token('', text.position, category=TC.Text)
363 |     while text.hasNext() and text.peek().category not in (
364 |             CC.Escape,
365 |             CC.GroupBegin,
366 |             CC.GroupEnd,
367 |             CC.MathSwitch,
368 |             CC.BracketBegin,
369 |             CC.BracketEnd,
370 |             CC.Comment):
371 |         result += next(text)
372 |     return result
373 | 


--------------------------------------------------------------------------------
/TexSoup/utils.py:
--------------------------------------------------------------------------------
  1 | import bisect
  2 | import functools
  3 | 
  4 | from enum import IntEnum as IntEnumBase
  5 | 
  6 | 
  7 | ##########
  8 | # Tokens #
  9 | ##########
 10 | 
 11 | 
 12 | def IntEnum(name, keys, start=1):
 13 |     """Explicitly define key-value pairs. For Python3.4 compatibility"""
 14 |     return IntEnumBase(name,
 15 |                        [(key, index) for index, key in enumerate(keys, start=start)])
 16 | 
 17 | 
 18 | CC = IntEnum('CategoryCodes', (
 19 |     'Escape',
 20 |     'GroupBegin',
 21 |     'GroupEnd',
 22 |     'MathSwitch',
 23 |     'Alignment',
 24 |     'EndOfLine',
 25 |     'Macro',
 26 |     'Superscript',
 27 |     'Subscript',
 28 |     'Ignored',
 29 |     'Spacer',
 30 |     'Letter',
 31 |     'Other',
 32 |     'Active',
 33 |     'Comment',
 34 |     'Invalid',
 35 | 
 36 |     # custom
 37 |     'MathGroupBegin',
 38 |     'MathGroupEnd',
 39 |     'BracketBegin',
 40 |     'BracketEnd',
 41 |     'ParenBegin',
 42 |     'ParenEnd'
 43 | ))
 44 | 
 45 | 
 46 | # Only includes items that cannot cause failures
 47 | TC = IntEnum('TokenCode', (
 48 |     'Escape',
 49 |     'GroupBegin',
 50 |     'GroupEnd',
 51 |     'Comment',
 52 |     'MergedSpacer',  # whitespace allowed between <command name> and arguments
 53 |     'EscapedComment',
 54 |     'MathSwitch',
 55 |     'DisplayMathSwitch',
 56 |     'MathGroupBegin',
 57 |     'MathGroupEnd',
 58 |     'DisplayMathGroupBegin',
 59 |     'DisplayMathGroupEnd',
 60 |     'LineBreak',
 61 |     'CommandName',
 62 |     'Text',
 63 |     'BracketBegin',
 64 |     'BracketEnd',
 65 |     'ParenBegin',
 66 |     'ParenEnd',
 67 | 
 68 |     # temporary (Replace with macros support)
 69 |     'PunctuationCommandName',
 70 |     'SizeCommand',
 71 |     'Spacer'
 72 | ), start=max(CC))
 73 | 
 74 | 
 75 | class Token(str):
 76 |     """Enhanced string object with knowledge of global position."""
 77 | 
 78 |     # noinspection PyArgumentList
 79 |     def __new__(cls, text='', position=None, category=None):
 80 |         """Initializer for pseudo-string object.
 81 | 
 82 |         :param text: The original string
 83 |         :param position: Position in the original buffer
 84 |         :param category: Category of token
 85 |         """
 86 |         self = str.__new__(cls, text)
 87 |         if isinstance(text, Token):
 88 |             self.text = text.text
 89 |             self.position = text.position
 90 |             self.category = category or text.category
 91 |         else:
 92 |             self.text = text
 93 |             self.position = position
 94 |             self.category = category
 95 |         return self
 96 | 
 97 |     def __repr__(self):
 98 |         return repr(self.text)
 99 | 
100 |     def __str__(self):
101 |         return str(self.text)
102 | 
103 |     def __getattr__(self, name):
104 |         return getattr(self.text, name)
105 | 
106 |     def __eq__(self, other):
107 |         """
108 |         >>> Token('asdf', 0) == Token('asdf', 2)
109 |         True
110 |         >>> Token('asdf', 0) == Token('asd', 0)
111 |         False
112 |         """
113 |         if isinstance(other, Token):
114 |             return self.text == other.text
115 |         else:
116 |             return self.text == other
117 | 
118 |     def __hash__(self):
119 |         """
120 |         >>> hash(Token('asf')) == hash('asf')
121 |         True
122 |         """
123 |         return hash(self.text)
124 | 
125 |     def __add__(self, other):
126 |         """Implements addition in the form of TextWithPosition(...) + (obj).
127 | 
128 |         >>> t1 = Token('as', 0) + Token('df', 1)
129 |         >>> str(t1)
130 |         'asdf'
131 |         >>> t1.position
132 |         0
133 |         >>> t2 = Token('as', 1) + 'df'
134 |         >>> str(t2)
135 |         'asdf'
136 |         >>> t3 = Token(t2)
137 |         >>> t3.position
138 |         1
139 |         """
140 |         if isinstance(other, Token):
141 |             return Token(self.text + other.text, self.position, self.category)
142 |         else:
143 |             return Token(self.text + other, self.position, self.category)
144 | 
145 |     def __radd__(self, other):
146 |         """Implements addition in the form of (obj) + TextWithPosition(...).
147 | 
148 |         Note that if the first element is Token,
149 |         Token(...).__add__(...) will be used. As a result, we
150 |         can assume WLOG that `other` is a type other than Token.
151 | 
152 |         >>> t1 = Token('as', 2) + Token('dfg', 2)
153 |         >>> str(t1)
154 |         'asdfg'
155 |         >>> t1.position
156 |         2
157 |         >>> t2 = 'as' + Token('dfg', 2)
158 |         >>> str(t2)
159 |         'asdfg'
160 |         >>> t2.position
161 |         0
162 |         """
163 |         return Token(
164 |             other + self.text, self.position - len(other), self.category)
165 | 
166 |     def __iadd__(self, other):
167 |         """Implements addition in the form of TextWithPosition(...) += ...
168 | 
169 |         >>> t1 = Token('as', 0)
170 |         >>> t1 += 'df'
171 |         >>> str(t1)
172 |         'asdf'
173 |         >>> t1.position
174 |         0
175 |         """
176 |         if isinstance(other, Token):
177 |             new = Token(self.text + other.text, self.position, self.category)
178 |         else:
179 |             new = Token(self.text + other, self.position, self.category)
180 |         return new
181 | 
182 |     @classmethod
183 |     def join(cls, tokens, glue=''):
184 |         if len(tokens) > 0:
185 |             return Token(
186 |                 glue.join(t.text for t in tokens),
187 |                 tokens[0].position,
188 |                 tokens[0].category)
189 |         else:
190 |             return Token.Empty
191 | 
192 |     def __bool__(self):
193 |         return bool(self.text)
194 | 
195 |     def __contains__(self, item):
196 |         """
197 |         >>> 'rg' in Token('corgi', 0)
198 |         True
199 |         >>> 'reg' in Token('corgi', 0)
200 |         False
201 |         >>> Token('rg', 0) in Token('corgi', 0)
202 |         True
203 |         """
204 |         if isinstance(item, Token):
205 |             return item.text in self.text
206 |         return item in self.text
207 | 
208 |     def __iter__(self):
209 |         """
210 |         >>> list(Token('asdf', 0))
211 |         ['a', 's', 'd', 'f']
212 |         """
213 |         return iter(self.__iter())
214 | 
215 |     def __iter(self):
216 |         for i, c in enumerate(self.text):
217 |             yield Token(c, self.position + i, self.category)
218 | 
219 |     def __getitem__(self, i):
220 |         """Access characters in object just as with strings.
221 | 
222 |         >>> t1 = Token('asdf', 2)
223 |         >>> t1[0]
224 |         'a'
225 |         >>> t1[-1]
226 |         'f'
227 |         >>> t1[:]
228 |         'asdf'
229 |         """
230 |         if isinstance(i, int):
231 |             start = i
232 |         else:
233 |             start = i.start
234 |         if start is None:
235 |             start = 0
236 |         if start < 0:
237 |             start = len(self.text) + start
238 |         return Token(self.text[i], self.position + start, self.category)
239 | 
240 |     def strip(self, *args, **kwargs):
241 |         stripped = self.text.strip(*args, **kwargs)
242 |         offset = self.text.find(stripped)
243 |         return Token(stripped, self.position + offset, self.category)
244 | 
245 |     def lstrip(self, *args, **kwargs):
246 |         """Strip leading whitespace for text.
247 | 
248 |         >>> t = Token('  asdf  ', 2)
249 |         >>> t.lstrip()
250 |         'asdf  '
251 |         """
252 |         stripped = self.text.lstrip(*args, **kwargs)
253 |         offset = self.text.find(stripped)
254 |         return Token(stripped, self.position + offset, self.category)
255 | 
256 |     def rstrip(self, *args, **kwargs):
257 |         """Strip trailing whitespace for text.
258 | 
259 |         >>> t = Token('  asdf  ', 2)
260 |         >>> t.rstrip()
261 |         '  asdf'
262 |         """
263 |         stripped = self.text.rstrip(*args, **kwargs)
264 |         offset = self.text.find(stripped)
265 |         return Token(stripped, self.position + offset, self.category)
266 | 
267 | 
268 | Token.Empty = Token('', position=0)
269 | 
270 | 
271 | # TODO: Rename to Buffer (formerly MixedBuffer) and StringBuffer
272 | # but needs test refactoring to change defaults
273 | class Buffer:
274 |     """Converts string or iterable into a navigable iterator of strings.
275 | 
276 |     >>> b1 = Buffer("012345")
277 |     >>> next(b1)
278 |     '0'
279 |     >>> b1.forward()
280 |     '1'
281 |     >>> b1.endswith('1')
282 |     True
283 |     >>> b1.backward(2)
284 |     '01'
285 |     >>> b1.peek()
286 |     '0'
287 |     >>> b1.peek(2)
288 |     '2'
289 |     >>> b1.peek((0, 2))
290 |     '01'
291 |     >>> b1.startswith('01')
292 |     True
293 |     >>> b1[2:4]
294 |     '23'
295 |     >>> Buffer('asdf')[:10]
296 |     'asdf'
297 |     >>> def gen():
298 |     ...     for i in range(10):
299 |     ...         yield i
300 |     >>> list(gen())
301 |     [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
302 |     >>> list(Buffer(gen()))
303 |     [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
304 |     """
305 | 
306 |     def __init__(self, iterator, join=Token.join, empty=lambda: '',
307 |                  init=lambda content, index: Token(content, index)):
308 |         """Initialization for Buffer.
309 | 
310 |         :param iterator: iterator or iterable
311 |         :param func join: function to join multiple buffer elements
312 |         """
313 |         assert hasattr(iterator, '__iter__'), 'Must be an iterable.'
314 |         self.__iterator = iter(iterator)
315 |         self.__queue = []
316 |         self.__i = 0
317 |         self.__join = join
318 |         self.__init = init
319 |         self.__empty = empty
320 | 
321 |     # noinspection PyPep8Naming
322 |     def hasNext(self, n=1):
323 |         """Returns whether or not there is another element."""
324 |         return bool(self.peek(n - 1))
325 | 
326 |     def startswith(self, s):
327 |         """Check if iterator starts with s, beginning from the current
328 |         position."""
329 |         return self.peek((0, len(s))).startswith(s)
330 | 
331 |     def endswith(self, s):
332 |         """Check if iterator ends with s, ending at current position."""
333 |         return self.peek((-len(s), 0)).endswith(s)
334 | 
335 |     def forward(self, j=1):
336 |         """Move forward by j steps.
337 | 
338 |         >>> b = Buffer('abcdef')
339 |         >>> b.forward(3)
340 |         'abc'
341 |         >>> b.forward(-2)
342 |         'bc'
343 |         """
344 |         if j < 0:
345 |             return self.backward(-j)
346 |         self.__i += j
347 |         return self[self.__i - j:self.__i]
348 | 
349 |     def num_forward_until(self, condition):
350 |         """Forward until one of the provided matches is found.
351 | 
352 |         :param condition: set of valid strings
353 |         """
354 |         i, c = 0, ''
355 |         while self.hasNext() and not condition(self.peek()):
356 |             c += self.forward(1)
357 |             i += 1
358 |         assert self.backward(i) == c
359 |         return i
360 | 
361 |     def forward_until(self, condition, peek=True):
362 |         """Forward until one of the provided matches is found.
363 | 
364 |         The returned string contains all characters found before the condition
365 |         was met. In other words, the condition will be true for the remainder
366 |         of the buffer.
367 | 
368 |         :param Callable condition: lambda condition for the token to stop at
369 | 
370 |         >>> buf = Buffer(map(str, range(9)))
371 |         >>> _ = buf.forward_until(lambda x: int(x) > 3)
372 |         >>> c = buf.forward_until(lambda x: int(x) > 6)
373 |         >>> c
374 |         '456'
375 |         >>> c.position
376 |         4
377 |         """
378 |         c = self.__init(self.__empty(), self.peek().position)
379 |         while self.hasNext() and not condition(self.peek() if peek else self):
380 |             c += self.forward(1)
381 |         return c
382 | 
383 |     def backward(self, j=1):
384 |         """Move backward by j steps.
385 | 
386 |         >>> b = Buffer('abcdef')
387 |         >>> b.backward(-3)
388 |         'abc'
389 |         >>> b.backward(2)
390 |         'bc'
391 |         """
392 |         if j < 0:
393 |             return self.forward(-j)
394 |         assert self.__i - j >= 0, 'Cannot move more than %d back' % self.__i
395 |         self.__i -= j
396 |         return self[self.__i:self.__i + j]
397 | 
398 |     def peek(self, j=0):
399 |         """Peek at the next value(s), without advancing the Buffer.
400 | 
401 |         Return None if index is out of range.
402 |         """
403 |         try:
404 |             if isinstance(j, int):
405 |                 return self[self.__i + j]
406 |             return self[self.__i + j[0]:self.__i + j[1]]
407 |         except IndexError:
408 |             return None
409 | 
410 |     def __next__(self):
411 |         """Implements next."""
412 |         while self.__i >= len(self.__queue):
413 |             self.__queue.append(self.__init(
414 |                 next(self.__iterator), self.__i))
415 |         self.__i += 1
416 |         return self.__queue[self.__i - 1]
417 | 
418 |     def __getitem__(self, i):
419 |         """Supports indexing list.
420 | 
421 |         >>> b = Buffer('asdf')
422 |         >>> b[5]
423 |         Traceback (most recent call last):
424 |             ...
425 |         IndexError: list index out of range
426 |         >>> b[0]
427 |         'a'
428 |         >>> b[1:3]
429 |         'sd'
430 |         >>> b[1:]
431 |         'sdf'
432 |         >>> b[:3]
433 |         'asd'
434 |         >>> b[:]
435 |         'asdf'
436 |         """
437 |         if isinstance(i, int):
438 |             old, j = self.__i, i
439 |         else:
440 |             old, j = self.__i, i.stop
441 | 
442 |         while j is None or self.__i <= j:
443 |             try:
444 |                 next(self)
445 |             except StopIteration:
446 |                 break
447 |         self.__i = old
448 |         if isinstance(i, int):
449 |             return self.__queue[i]
450 |         return self.__join(self.__queue[i])
451 | 
452 |     def __iter__(self):
453 |         return self
454 | 
455 |     @property
456 |     def position(self):
457 |         return self.__i
458 | 
459 | 
460 | class CharToLineOffset(object):
461 |     """Utility to convert absolute position in the source file to
462 |     line_no:char_no_in_line. This can be very useful if we want to parse LaTeX
463 |     and navigate to some elements in the generated DVI/PDF via SyncTeX.
464 | 
465 |     >>> clo = CharToLineOffset('''hello
466 |     ... world
467 |     ... I scream for ice cream!''')
468 |     >>> clo(3)
469 |     (0, 3)
470 |     >>> clo(6)
471 |     (1, 0)
472 |     >>> clo(12)
473 |     (2, 0)
474 |     """
475 | 
476 |     def __init__(self, src):
477 |         self.line_break_positions = [i for i, c in enumerate(src) if c == '\n']
478 |         self.src_len = len(src)
479 | 
480 |     def __call__(self, char_pos):
481 |         line_no = bisect.bisect(self.line_break_positions, char_pos)
482 |         if line_no == 0:
483 |             char_no = char_pos
484 |         elif line_no == len(self.line_break_positions):
485 |             line_start = self.line_break_positions[-1]
486 |             char_no = min(char_pos - line_start - 1, self.src_len - line_start)
487 |         else:
488 |             char_no = char_pos - self.line_break_positions[line_no - 1] - 1
489 |         return line_no, char_no
490 | 
491 | 
492 | class MixedBuffer(Buffer):
493 | 
494 |     def __init__(self, iterator):
495 |         """Initialization for Buffer, accepting types beyond strings.
496 | 
497 |         :param iterator: iterator or iterable
498 |         :param func join: function to join multiple buffer elements
499 | 
500 |         >>> buf = MixedBuffer([324, 'adsf', lambda x: x])
501 |         >>> buf.peek()
502 |         324
503 |         """
504 |         super().__init__(iterator,
505 |                          join=lambda x: x, empty=lambda x: [],
506 |                          init=lambda content, index: content)
507 | 
508 | 
509 | ##############
510 | # Decorators #
511 | ##############
512 | 
513 | 
514 | def to_buffer(convert_in=True, convert_out=True, Buffer=Buffer):
515 |     """Decorator converting all strings and iterators/iterables into
516 |     Buffers.
517 | 
518 |     :param bool convert_in: Convert inputs where applicable to Buffers
519 |     :param bool convert_out: Convert output to a Buffer
520 |     :param type Buffer: Type of Buffer to convert into
521 |     """
522 |     def decorator(f):
523 |         @functools.wraps(f)
524 |         def wrap(*args, **kwargs):
525 |             iterator = args[0]
526 |             if convert_in:
527 |                 iterator = kwargs.get('iterator', iterator)
528 |                 if not isinstance(iterator, Buffer):
529 |                     iterator = Buffer(iterator)
530 |             output = f(iterator, *args[1:], **kwargs)
531 |             if convert_out:
532 |                 return Buffer(output)
533 |             return output
534 |         return wrap
535 |     return decorator
536 | 
537 | 
538 | def to_list(f):
539 |     """Converts generator or iterable output to list
540 | 
541 |     >>> class A:
542 |     ...     @property
543 |     ...     @to_list
544 |     ...     def a(self):
545 |     ...         for i in range(3):
546 |     ...             yield i
547 |     >>> A().a
548 |     [0, 1, 2]
549 |     """
550 |     @functools.wraps(f)
551 |     def wrapper(*args, **kwargs):
552 |         return list(f(*args, **kwargs))
553 |     return wrapper
554 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | src
2 | build
3 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = TexSoup
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=TexSoup
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
3 | sphinxcontrib.katex
4 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/theme-mod.css:
--------------------------------------------------------------------------------
  1 | /********
  2 |  * LOGO *
  3 |  ********/
  4 | 
  5 | @media screen and (min-width: 768px) {
  6 |   .site-footer .footer-logo {
  7 |       background-size: 51px 80px;
  8 |       height: 51px;
  9 |       margin-bottom: 0;
 10 |       margin-bottom: 0;
 11 |       width: 80px;
 12 |   }
 13 | }
 14 | 
 15 | /*********
 16 |  * COLOR *
 17 |  *********/
 18 | 
 19 | html p a, html p a:link, html p a:visited {
 20 |   text-decoration-line: underline;
 21 | }
 22 | 
 23 | p a, p a:link, p a:visited {
 24 |   color:#999;
 25 | }
 26 | 
 27 | .pytorch-left-menu li.toctree-l1.current>a, .pytorch-right-menu li.toctree-l1.current>a,
 28 | .header-holder .main-menu ul li.active:after, .header-holder .main-menu ul li.active a,
 29 | ul.pytorch-breadcrumbs a, p a:hover, li a:hover, .btn:hover,
 30 | article.pytorch-article .class em.property,.anchorjs-link:hover {
 31 |   color:transparent;
 32 |   -webkit-background-clip: text !important;
 33 |   -webkit-text-fill-color: transparent;
 34 |   background: rgb(241,90,36);
 35 |   background: linear-gradient(135deg, rgba(241,90,36,1) 0%, rgba(251,176,59,1) 100%);
 36 | }
 37 | 
 38 | .pytorch-left-menu li.toctree-l1.current > a::before, .pytorch-right-menu li.toctree-l1.current > a::before {
 39 |   color: transparent;
 40 |   height: 8px;
 41 |   width: 8px;
 42 |   border-radius: 50%;
 43 |   background: rgb(241,90,36);
 44 |   background: linear-gradient(135deg, rgba(241,90,36,1) 0%, rgba(251,176,59,1) 100%);
 45 | }
 46 | 
 47 | article.pytorch-article .class dt {
 48 |   background-clip: padding-box;
 49 |   -webkit-background-clip: padding-box;
 50 |   border: solid 3px transparent;
 51 | }
 52 | 
 53 | article.pytorch-article .class dt::before {
 54 |   content: '';
 55 |   position: absolute;
 56 |   top: 0;
 57 |   right: 0;
 58 |   bottom: 0;
 59 |   left: 0;
 60 |   z-index: -1;
 61 | 
 62 |   margin-top:-3px;
 63 |   margin-left:1px;
 64 |   background: rgb(241,90,36);
 65 |   background: linear-gradient(135deg, rgba(241,90,36,1) 0%, rgba(251,176,59,1) 100%);
 66 | }
 67 | 
 68 | .pytorch-content-wrap {
 69 |   background-color:transparent;  /* allows gradients to show up as borders */
 70 | }
 71 | 
 72 | 
 73 | article.pytorch-article .attribute dt,
 74 | article.pytorch-article .function dt,
 75 | article.pytorch-article .class .attribute dt,
 76 | article.pytorch-article .class .classmethod dt,
 77 | article.pytorch-article .class .method dt,
 78 | article.pytorch-article .class .staticmethod dt {
 79 |   border-left:3px solid transparent;
 80 | }
 81 | 
 82 | article.pytorch-article .attribute dt::before,
 83 | article.pytorch-article .function dt::before,
 84 | article.pytorch-article .class .attribute dt::before,
 85 | article.pytorch-article .class .classmethod dt::before,
 86 | article.pytorch-article .class .method dt::before,
 87 | article.pytorch-article .class .staticmethod dt::before {
 88 |   margin-top:0;
 89 |   margin-left:-3px;
 90 |   background: linear-gradient(0deg, rgba(241,90,36,1) 0%, rgba(251,176,59,1) 100%);
 91 | }
 92 | 
 93 | /* HACK: why is this needed? */
 94 | article.pytorch-article .class dl.attribute dt::before {
 95 |   margin-left:-6px;
 96 |   margin-bottom: -3px;
 97 | }
 98 | 
 99 | /*********
100 |  * FIXES *
101 |  *********/
102 | 
103 | .pytorch-left-menu li.current ul,  .pytorch-right-menu li.current ul {
104 |   padding-left: 1em;
105 | }
106 | 
107 | #docs-tutorials-resources {
108 |   padding: 0;
109 |   height: 0;
110 |   margin: 0;
111 | }
112 | 
113 | .built-with {
114 |   margin-bottom:1em;
115 | }
116 | 
117 | .pytorch-menu-vertical {
118 |   padding-bottom: 1.625rem;
119 | }
120 | 
121 | .pytorch-breadcrumbs-aside img {
122 |   height: 20px;
123 | }
124 | 
125 | @media screen and (min-width: 1101px) {
126 |   .pytorch-left-menu li.toctree-l1.current>a:before, .pytorch-right-menu li.toctree-l1.current>a:before {
127 |     top:5px;
128 |   }
129 | }
130 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/android-chrome-192x192.png


--------------------------------------------------------------------------------
/docs/source/_static/images/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/android-chrome-512x512.png


--------------------------------------------------------------------------------
/docs/source/_static/images/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/apple-touch-icon.png


--------------------------------------------------------------------------------
/docs/source/_static/images/arrow-down-orange.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 13 13"><defs><style>.cls-1,.cls-2{fill-rule:evenodd;}.cls-1{fill:url(#linear-gradient);}.cls-2{fill:url(#linear-gradient-2);}</style><linearGradient id="linear-gradient" x1="13.4" y1="2.52" x2="6.76" y2="12.83" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient><linearGradient id="linear-gradient-2" x1="9.64" y1="0.09" x2="3" y2="10.4" xlink:href="#linear-gradient"/></defs><title>arrow-down-orange</title><g id="Main-Copy-6"><polygon class="cls-1" points="11.22 5.37 7.25 9.34 9.82 9.34 12.5 6.66 11.22 5.37"/><polygon class="cls-2" points="5.44 9.34 1.47 5.37 0.19 6.66 6.34 12.81 7.25 11.9 7.25 9.34 7.25 0.5 5.44 0.5 5.44 9.34"/></g></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/arrow-right-with-tail.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 13 13"><defs><style>.cls-1,.cls-2{fill-rule:evenodd;}.cls-1{fill:url(#linear-gradient);}.cls-2{fill:url(#linear-gradient-2);}</style><linearGradient id="linear-gradient" x1="2.82" y1="-2.05" x2="11.43" y2="6.56" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient><linearGradient id="linear-gradient-2" x1="0.16" y1="0.61" x2="8.77" y2="9.22" xlink:href="#linear-gradient"/></defs><title>arrow-right-with-tail</title><g id="desktop"><g id="community"><g id="newsletter"><g id="Page-1"><polygon id="Fill-1" class="cls-1" points="4.87 1.28 8.84 5.25 8.84 2.68 6.16 0 4.87 1.28"/><polygon id="Fill-2" class="cls-2" points="8.84 5.25 0 5.25 0 7.06 8.84 7.06 4.87 11.03 6.16 12.31 12.31 6.16 11.4 5.25 8.84 5.25"/></g></g></g></g></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/browserconfig.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <browserconfig>
 3 |     <msapplication>
 4 |         <tile>
 5 |             <square150x150logo src="/mstile-150x150.png"/>
 6 |             <TileColor>#ffffff</TileColor>
 7 |         </tile>
 8 |     </msapplication>
 9 | </browserconfig>
10 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/chevron-right-orange.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 8.2 14.4"><defs><style>.cls-1{fill:url(#linear-gradient);}</style><linearGradient id="linear-gradient" x1="-0.03" y1="7.2" x2="8.17" y2="7.2" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient></defs><title>chevron-right-orange</title><g id="desktop"><g id="_01_Home" data-name="01_Home"><g id="community"><g id="cta"><g id="Page-1"><polygon class="cls-1" points="1.43 14.38 -0.03 13.02 5.43 7.2 -0.03 1.39 1.43 0.01 8.17 7.2 1.43 14.38"/></g></g></g></g></g></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/favicon-16x16.png


--------------------------------------------------------------------------------
/docs/source/_static/images/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/favicon-32x32.png


--------------------------------------------------------------------------------
/docs/source/_static/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/favicon.ico


--------------------------------------------------------------------------------
/docs/source/_static/images/logo-dark.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 682 125"><defs><style>.cls-1{isolation:isolate;}.cls-2{fill:url(#linear-gradient);}</style><linearGradient id="linear-gradient" x1="139.85" y1="91.16" x2="25.32" y2="-23.37" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient></defs><title>logo-dark</title><g class="cls-1"><path d="M263.12,12.83V91.28h-8.89V12.83H223.69V4.5h70v8.33Z"/><path d="M298.24,60.46c.28,14.3,8.89,24.57,22.22,24.57a34.28,34.28,0,0,0,18.46-5.28l1.67,7.92a39.09,39.09,0,0,1-20.41,5.41c-18.33,0-30.68-13.74-30.68-33,0-20,12.77-34.16,30-34.16,14,0,23.18,9.17,23.18,23.47a38.93,38.93,0,0,1-1.38,10.13l-43.05.28Zm35.83-7.64A19.83,19.83,0,0,0,334.2,50c0-9.72-5.27-16.38-15.27-16.38-11.38,0-18.74,8.75-20.13,19.3Z"/><path d="M393.49,91.28l-17.22-26.8-17.08,26.8h-9.72l21.94-32.63-20.55-31h9.58L376.27,52.4l15.55-24.71h9.58L381.13,58.37l21.94,32.91Z"/><path d="M457.35,68.79c0-10.14-8.33-14.17-21.93-18.19C423.2,47,410.15,41.71,410.15,26.3c0-13.74,11.66-23.6,27.77-23.6A47.64,47.64,0,0,1,464,10.2L460.82,18a42.09,42.09,0,0,0-22.77-7.22c-13.05,0-19.29,6.94-19.29,14.86,0,9.58,8.6,13.19,21.93,17.35,12.5,3.75,25.13,9.44,25.13,25.13,0,14.3-11.94,25-29.29,25a52.14,52.14,0,0,1-27.91-8l3.19-7.91a43.64,43.64,0,0,0,25,8C448.88,85.17,457.35,78,457.35,68.79Z"/><path d="M508.17,93.08c-17.64,0-30.69-13-30.69-33.46s13.61-33.74,31.38-33.74,30.55,13.06,30.55,33.47S525.8,93.08,508.17,93.08Zm.28-59.42c-13.47,0-22.36,10.69-22.36,25.69,0,15.55,9,26,22.49,26s22.36-10.69,22.36-25.69C530.94,44.07,521.91,33.66,508.45,33.66Z"/><path d="M563.42,27.69v41.1c0,11.1,4.45,16.1,13.75,16.1,7.36,0,14.44-3.75,19.58-9V27.69h8.6V91.28l-8.6,1.8V82.81c-6.53,6.66-14.86,10.27-21.94,10.27-12.36,0-20-7.91-20-21.79V27.69Z"/><path d="M652.42,26.3c17.22,0,28.46,13.61,28.46,32.91,0,19.71-13,33.87-31.38,33.87A40.84,40.84,0,0,1,634,89.75v33.88h-8.61V27.41l8.61-1.8v8.47C638.26,30.05,644.65,26.3,652.42,26.3ZM634,80.31c3.88,3,9.85,5,16,5,13.61,0,22.08-11,22.08-26,0-14.72-8.06-25.13-21-25.13A25.63,25.63,0,0,0,634,41.3Z"/></g><path class="cls-2" d="M1,4.18v6A84,84,0,0,0,31.27,74.93v24.7H134.63v-12H43.27v-10H85.48v-12H38.87A72,72,0,0,1,13.26,16.18H157.68a72.13,72.13,0,0,1-27,50.58l7.5,9.36a84.06,84.06,0,0,0,31.69-65.94v-6Z"/></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/logo-icon.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 120 68"><defs><style>.cls-1{fill:url(#linear-gradient);}</style><linearGradient id="linear-gradient" x1="98.64" y1="61.97" x2="17.28" y2="-19.39" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient></defs><title>logo-icon</title><path class="cls-1" d="M0,.18V4.44a59.64,59.64,0,0,0,21.49,46V68H94.93V59.46H30V52.35H60V43.82H26.91A51.07,51.07,0,0,1,8.72,8.7H111.3A51.19,51.19,0,0,1,92.16,44.63l5.33,6.66A59.75,59.75,0,0,0,120,4.44V.18Z"/></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/logo.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 682 125"><defs><style>.cls-1{isolation:isolate;}.cls-2{fill:#fff;}.cls-3{fill:url(#linear-gradient);}</style><linearGradient id="linear-gradient" x1="139.85" y1="92.16" x2="25.32" y2="-22.37" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient></defs><title>logo</title><g class="cls-1"><path class="cls-2" d="M263.12,13.83V92.28h-8.89V13.83H223.69V5.5h70v8.33Z"/><path class="cls-2" d="M298.24,61.46c.28,14.3,8.89,24.57,22.22,24.57a34.28,34.28,0,0,0,18.46-5.28l1.67,7.92a39.09,39.09,0,0,1-20.41,5.41c-18.33,0-30.68-13.74-30.68-33,0-20,12.77-34.16,30-34.16,14,0,23.18,9.17,23.18,23.47a38.93,38.93,0,0,1-1.38,10.13l-43.05.28Zm35.83-7.64A19.91,19.91,0,0,0,334.2,51c0-9.72-5.27-16.38-15.27-16.38-11.38,0-18.74,8.75-20.13,19.3Z"/><path class="cls-2" d="M393.49,92.28l-17.22-26.8-17.08,26.8h-9.72l21.94-32.63-20.55-31h9.58L376.27,53.4l15.55-24.71h9.58L381.13,59.37l21.94,32.91Z"/><path class="cls-2" d="M457.35,69.79c0-10.14-8.33-14.17-21.93-18.19C423.2,48,410.15,42.71,410.15,27.3c0-13.74,11.66-23.6,27.77-23.6A47.64,47.64,0,0,1,464,11.2L460.82,19a42.09,42.09,0,0,0-22.77-7.22c-13.05,0-19.29,6.94-19.29,14.86,0,9.58,8.6,13.19,21.93,17.35,12.5,3.75,25.13,9.44,25.13,25.13,0,14.3-11.94,25-29.29,25a52.14,52.14,0,0,1-27.91-8l3.19-7.91a43.64,43.64,0,0,0,25,8C448.88,86.17,457.35,79,457.35,69.79Z"/><path class="cls-2" d="M508.17,94.08c-17.64,0-30.69-13-30.69-33.46s13.61-33.74,31.38-33.74,30.55,13.06,30.55,33.47S525.8,94.08,508.17,94.08Zm.28-59.42c-13.47,0-22.36,10.69-22.36,25.69,0,15.55,9,26,22.49,26s22.36-10.69,22.36-25.69C530.94,45.07,521.91,34.66,508.45,34.66Z"/><path class="cls-2" d="M563.42,28.69v41.1c0,11.1,4.45,16.1,13.75,16.1,7.36,0,14.44-3.75,19.58-9V28.69h8.6V92.28l-8.6,1.8V83.81c-6.53,6.66-14.86,10.27-21.94,10.27-12.36,0-20-7.91-20-21.79V28.69Z"/><path class="cls-2" d="M652.42,27.3c17.22,0,28.46,13.61,28.46,32.91,0,19.71-13,33.87-31.38,33.87A40.84,40.84,0,0,1,634,90.75v33.88h-8.61V28.41l8.61-1.8v8.47C638.26,31.05,644.65,27.3,652.42,27.3ZM634,81.31c3.88,3,9.85,5,16,5,13.61,0,22.08-11,22.08-26,0-14.72-8.06-25.14-21-25.14A25.64,25.64,0,0,0,634,42.3Z"/></g><path class="cls-3" d="M1,5.18v6A84,84,0,0,0,31.27,75.93v24.7H134.63v-12H43.27v-10H85.48v-12H38.87A72,72,0,0,1,13.26,17.18H157.68a72.13,72.13,0,0,1-27,50.58l7.5,9.36a84.06,84.06,0,0,0,31.69-65.94v-6Z"/></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/images/mstile-150x150.png


--------------------------------------------------------------------------------
/docs/source/_static/images/pytorch-x.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 200 200"><defs><style>.cls-1{fill:url(#linear-gradient);}</style><linearGradient id="linear-gradient" x1="181.55" y1="12.59" x2="15.4" y2="190.67" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient></defs><title>pytorch-x</title><polygon class="cls-1" points="197.16 16.78 183.22 2.84 100 86.06 16.78 2.84 2.84 16.78 86.06 100 2.84 183.22 16.78 197.16 100 113.94 183.22 197.16 197.16 183.22 113.94 100 197.16 16.78"/></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/safari-pinned-tab.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
 3 |  "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
 4 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg"
 5 |  width="960.000000pt" height="960.000000pt" viewBox="0 0 960.000000 960.000000"
 6 |  preserveAspectRatio="xMidYMid meet">
 7 | <metadata>
 8 | Created by potrace 1.11, written by Peter Selinger 2001-2013
 9 | </metadata>
10 | <g transform="translate(0.000000,960.000000) scale(0.100000,-0.100000)"
11 | fill="#000000" stroke="none">
12 | <path d="M3 7170 c3 -184 8 -355 11 -380 4 -42 15 -144 21 -200 7 -63 42 -275
13 | 60 -370 3 -14 7 -34 9 -45 39 -192 170 -623 235 -770 5 -11 19 -45 31 -75 69
14 | -170 182 -397 294 -590 49 -85 92 -157 95 -160 4 -3 22 -30 41 -60 73 -117
15 | 260 -367 370 -494 143 -167 443 -462 531 -524 10 -7 19 -23 19 -35 0 -12 0
16 | -29 0 -37 0 -8 1 -80 0 -160 0 -80 0 -154 0 -165 1 -11 0 -112 0 -225 -1 -113
17 | 0 -207 1 -210 1 -3 0 -9 -1 -15 -2 -8 -2 -538 0 -568 0 -4 1322 -6 2937 -5
18 | l2937 3 1 337 c0 217 -4 339 -10 341 -5 1 -1172 2 -2592 2 -1421 -1 -2584 2
19 | -2584 5 -9 37 -10 59 -9 145 1 36 -2 69 -5 75 -3 5 -3 10 2 10 9 0 8 83 -1
20 | 106 -3 8 -2 14 2 14 7 0 7 70 1 157 -2 23 0 45 3 48 3 3 544 6 1203 7 l1197 0
21 | -5 24 c-2 13 -1 24 2 24 6 0 5 72 -2 125 -2 17 -1 39 3 50 4 11 4 30 -1 42 -5
22 | 12 -4 25 0 28 5 3 5 21 1 40 -4 19 -5 35 -1 35 7 0 6 204 -2 225 -4 8 -2 17 2
23 | 20 5 2 6 19 2 37 l-7 33 -1320 0 c-1159 0 -1323 2 -1339 15 -89 72 -337 317
24 | -421 415 -113 131 -151 178 -184 225 -13 19 -27 37 -30 40 -3 3 -22 28 -41 55
25 | -18 28 -36 52 -39 55 -33 31 -335 548 -331 567 0 2 -18 42 -40 90 -23 49 -42
26 | 93 -43 98 -2 6 -8 21 -13 35 -35 82 -60 150 -100 268 -24 73 -46 141 -48 150
27 | -2 9 -13 51 -24 92 -26 93 -64 264 -76 340 -2 14 -6 39 -9 55 -3 17 -8 50 -11
28 | 75 -3 25 -8 56 -10 70 -2 14 -6 57 -10 95 l-6 70 4098 0 4098 -1 -1 -49 c-1
29 | -89 -28 -285 -60 -435 -3 -14 -7 -35 -9 -47 -2 -11 -6 -30 -10 -42 -3 -11 -8
30 | -32 -11 -45 -5 -29 -64 -233 -92 -321 -71 -219 -214 -544 -316 -720 -23 -38
31 | -50 -87 -61 -108 -11 -20 -23 -37 -27 -37 -5 0 -8 -4 -8 -10 0 -26 -176 -277
32 | -316 -452 -127 -157 -300 -340 -434 -458 -30 -27 -66 -58 -80 -70 -14 -12 -40
33 | -34 -57 -47 -31 -24 -42 -47 -25 -55 4 -1 29 -32 57 -68 27 -36 60 -76 71 -90
34 | 24 -28 105 -129 144 -180 14 -18 32 -41 40 -50 39 -47 86 -104 92 -113 5 -9
35 | 54 24 114 78 7 5 43 37 80 70 90 79 313 302 389 389 33 38 65 74 70 80 30 33
36 | 175 217 175 221 0 3 8 15 19 26 10 11 36 45 57 77 21 31 41 59 45 62 3 3 12
37 | 16 19 29 7 14 17 30 22 35 23 27 149 234 217 356 81 147 195 388 256 545 15
38 | 39 33 84 40 100 7 17 25 68 40 115 16 47 31 90 34 95 3 6 6 15 7 22 2 13 22
39 | 79 31 103 3 8 7 20 8 25 2 6 15 57 30 115 15 58 29 113 31 123 12 53 15 65 20
40 | 97 3 19 8 39 11 43 2 4 6 20 8 35 2 15 11 70 20 122 9 52 19 113 21 135 1 22
41 | 5 49 8 61 2 12 8 57 12 100 3 43 9 93 11 109 2 17 6 183 10 370 l5 340 -4802
42 | 0 -4802 0 5 -335z"/>
43 | </g>
44 | </svg>
45 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/search-icon.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 16 16"><defs><style>.cls-1{fill-rule:evenodd;fill:url(#linear-gradient);}.cls-2{fill:url(#linear-gradient-2);}</style><linearGradient id="linear-gradient" x1="12.58" y1="0.28" x2="2.41" y2="10.68" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient><linearGradient id="linear-gradient-2" x1="19.66" y1="7.2" x2="9.48" y2="17.61" xlink:href="#linear-gradient"/></defs><title>search-icon</title><path class="cls-1" d="M6.5.25A6.25,6.25,0,1,0,12.75,6.5,6.25,6.25,0,0,0,6.5.25Zm0,11A4.75,4.75,0,1,1,11.25,6.5,4.75,4.75,0,0,1,6.5,11.25Z"/><circle class="cls-2" cx="13.5" cy="13.5" r="1.5"/></svg>


--------------------------------------------------------------------------------
/docs/source/_static/images/site.webmanifest:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "",
 3 |     "short_name": "",
 4 |     "icons": [
 5 |         {
 6 |             "src": "/android-chrome-192x192.png",
 7 |             "sizes": "192x192",
 8 |             "type": "image/png"
 9 |         },
10 |         {
11 |             "src": "/android-chrome-512x512.png",
12 |             "sizes": "512x512",
13 |             "type": "image/png"
14 |         }
15 |     ],
16 |     "theme_color": "#ffffff",
17 |     "background_color": "#ffffff",
18 |     "display": "standalone"
19 | }
20 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/view-page-source-icon.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><defs><style>.cls-1{fill:url(#linear-gradient);}.cls-2{fill:url(#linear-gradient-2);}.cls-3{fill:url(#linear-gradient-3);}</style><linearGradient id="linear-gradient" x1="20.07" y1="1.75" x2="0.84" y2="17.5" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#f15a24"/><stop offset="1" stop-color="#fbb03b"/></linearGradient><linearGradient id="linear-gradient-2" x1="16.9" y1="-0.15" x2="-1.03" y2="15.29" xlink:href="#linear-gradient"/><linearGradient id="linear-gradient-3" x1="23.19" y1="5.56" x2="3.97" y2="21.32" xlink:href="#linear-gradient"/></defs><title>view-page-source-icon</title><path class="cls-1" d="M0,2.47V17.53H20V2.47ZM18.83,16.36H1.17V3.64H18.82V16.36Z"/><polygon class="cls-2" points="4.29 13.54 5.12 14.37 9.49 10 5.12 5.62 4.29 6.46 4.29 6.46 7.83 10 4.29 13.54"/><rect class="cls-3" x="10.59" y="12.9" width="5.88" height="1.17"/></svg>


--------------------------------------------------------------------------------
/docs/source/_static/texsoup.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/docs/source/_static/texsoup.ai


--------------------------------------------------------------------------------
/docs/source/_templates/cookie_banner.html:
--------------------------------------------------------------------------------
1 | <div class="cookie-banner-wrapper">
2 |   <div class="container">
3 |     <p class="gdpr-notice">I analyze traffic using Google Analytics, which uses cookies. By continuing to use this site, you agree to allow this usage of cookies.</p>
4 |     <img class="close-button" src="{{ pathto('_static/images/pytorch-x.svg', 1) }}">
5 |   </div>
6 | </div>
7 | 


--------------------------------------------------------------------------------
/docs/source/_templates/footer.html:
--------------------------------------------------------------------------------
 1 | <!-- Only change in this file is to change attribution from rtd to pytorch -->
 2 | 
 3 | <footer>
 4 |   {% if (theme_prev_next_buttons_location == 'bottom' or theme_prev_next_buttons_location == 'both') and (next or prev) %}
 5 |     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
 6 |       {% if next %}
 7 |         <a href="{{ next.link|e }}" class="btn btn-neutral float-right" title="{{ next.title|striptags|e }}" accesskey="n" rel="next">{{ _('Next') }} <img src="{{ pathto('_static/images/chevron-right-orange.svg', 1) }}" class="next-page"></a>
 8 |       {% endif %}
 9 |       {% if prev %}
10 |         <a href="{{ prev.link|e }}" class="btn btn-neutral" title="{{ prev.title|striptags|e }}" accesskey="p" rel="prev"><img src="{{ pathto('_static/images/chevron-right-orange.svg', 1) }}" class="previous-page"> {{ _('Previous') }}</a>
11 |       {% endif %}
12 |     </div>
13 |   {% endif %}
14 | 
15 |   {% if theme_pytorch_project == 'tutorials' %}
16 | 
17 |     <hr class="helpful-hr hr-top">
18 |       <div class="helpful-container">
19 |         <div class="helpful-question">Was this helpful?</div>
20 |         <div class="helpful-question yes-link" data-behavior="was-this-helpful-event" data-response="yes">Yes</div>
21 |         <div class="helpful-question no-link" data-behavior="was-this-helpful-event" data-response="no">No</div>
22 |         <div class="was-helpful-thank-you">Thank you</div>
23 |       </div>
24 |     <hr class="helpful-hr hr-bottom"/>
25 | 
26 |   {% else %}
27 | 
28 |     <hr>
29 | 
30 |   {% endif %}
31 | 
32 |   <div role="contentinfo">
33 |     <p>
34 |     {%- if show_copyright %}
35 |       {%- if hasdoc('copyright') %}
36 |         {% trans path=pathto('copyright'), copyright=copyright|e %}&copy; <a href="{{ path }}">Copyright</a> {{ copyright }}.{% endtrans %}
37 |       {%- else %}
38 |         {% trans copyright=copyright|e %}&copy; Copyright {{ copyright }}.{% endtrans %}
39 |       {%- endif %}
40 |     {%- endif %}
41 | 
42 |     {%- if build_id and build_url %}
43 |       {% trans build_url=build_url, build_id=build_id %}
44 |         <span class="build">
45 |           Build
46 |           <a href="{{ build_url }}">{{ build_id }}</a>.
47 |         </span>
48 |       {% endtrans %}
49 |     {%- elif commit %}
50 |       {% trans commit=commit %}
51 |         <span class="commit">
52 |           Revision <code>{{ commit }}</code>.
53 |         </span>
54 |       {% endtrans %}
55 |     {%- elif last_updated %}
56 |       {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
57 |     {%- endif %}
58 | 
59 |     </p>
60 |   </div>
61 | 
62 |   {%- if show_sphinx %}
63 |     {% trans %}
64 |       <div class="built-with">
65 |         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/pytorch/pytorch_sphinx_theme">theme</a> provided by <a href="https://pytorch.org">PyTorch</a>.
66 |       </div>
67 |     {% endtrans %}
68 |   {%- endif %}
69 | 
70 |   {%- block extrafooter %} {% endblock %}
71 | 
72 | </footer>
73 | 


--------------------------------------------------------------------------------
/docs/source/_templates/landing.html:
--------------------------------------------------------------------------------
 1 | {% extend "layout.html" %}
 2 | 
 3 | {% block mainbody %}
 4 |   <style>
 5 | .landing {
 6 |   width:100%;
 7 |   display:flex;
 8 |   flex-direction:column;
 9 |   align-items: center;
10 |   margin-bottom:5em;
11 | }
12 | .landing h1 {
13 |   font-size:4em;
14 |   max-width:17em;
15 |   text-align:center;
16 |   line-height:125%;
17 |   text-transform:none;
18 |   letter-spacing:0;
19 |   font-weight:300;
20 |   margin-top:1em;
21 |   padding:0 0.5em;
22 | }
23 | .landing h2 {
24 |   font-size:1.2em;
25 |   color:#999;
26 | }
27 | .landing p {
28 |   display:flex;
29 |   flex-direction:column;
30 | }
31 | .landing p span {
32 |   width:100%;
33 |   text-align:center;
34 | }
35 | .landing .btn {
36 |   color:transparent;
37 |   -webkit-background-clip: text !important;
38 |   -webkit-text-fill-color: transparent;
39 |   background: rgb(241,90,36);
40 |   background: linear-gradient(135deg, rgba(241,90,36,1) 0%, rgba(251,176,59,1) 100%);
41 |   font-size:1.5em;
42 | }
43 | .landing .btn-arrow {
44 |   height:0.6em;
45 |   margin-bottom:0.15em;
46 |   margin-left:0.5em;
47 | }
48 |   </style>
49 | 
50 |   <div class="landing">
51 |     <h1>Navigate, Search, and Modify LaTeX Documents in Python</h1>
52 |     <h2>Easy and reliable: No C extensions, no installation dependencies, and 100% test coverage</h2>
53 |     <p><a class="btn" href="/docs/quickstart.html">Get started <img class="btn-arrow" src="docs/_static/images/chevron-right-orange.svg"></a><span><a href="http://github.com/alvinwan/texsoup">View on Github</a></span></p>
54 |   </div>
55 | 
56 | {% endblock %}
57 | 


--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
  1 | {# TEMPLATE VAR SETTINGS #}
  2 | 
  3 | {%- set extra_css_files = ["_static/css/theme-mod.css"] -%}
  4 | {%- set favicon=1 -%}
  5 | {%- set url_root = pathto('', 1) %}
  6 | {%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
  7 | {%- if not embedded and docstitle %}
  8 |   {%- set titlesuffix = " &mdash; "|safe + docstitle|e %}
  9 | {%- else %}
 10 |   {%- set titlesuffix = "" %}
 11 | {%- endif %}
 12 | {%- set lang_attr = 'en' if language == None else (language | replace('_', '-')) %}
 13 | {% import 'theme_variables.jinja' as theme_variables %}
 14 | {%- set theme_pytorch_project = 'docs' -%}
 15 | 
 16 | <!DOCTYPE html>
 17 | <!--[if IE 8]><html class="no-js lt-ie9" lang="{{ lang_attr }}" > <![endif]-->
 18 | <!--[if gt IE 8]><!--> <html class="no-js" lang="{{ lang_attr }}" > <!--<![endif]-->
 19 | <head>
 20 |   <meta charset="utf-8">
 21 |   {{ metatags }}
 22 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 23 |   {% block htmltitle %}
 24 |   <title>{{ title|striptags|e }}{{ titlesuffix }}</title>
 25 |   {% endblock %}
 26 | 
 27 |   {# FAVICON #}
 28 |   {% if favicon %}
 29 |     <link rel="apple-touch-icon" sizes="180x180" href="_static/images/apple-touch-icon.png">
 30 |     <link rel="icon" type="image/png" sizes="32x32" href="_static/images/favicon-32x32.png">
 31 |     <link rel="icon" type="image/png" sizes="16x16" href="_static/images/favicon-16x16.png">
 32 |     <link rel="manifest" href="_static/images/site.webmanifest">
 33 |     <link rel="mask-icon" href="_static/images/safari-pinned-tab.svg" color="#5bbad5">
 34 |     <meta name="msapplication-TileColor" content="#ffffff">
 35 |     <meta name="theme-color" content="#ffffff">
 36 |   {% endif %}
 37 |   {# CANONICAL URL #}
 38 |   {% if theme_canonical_url %}
 39 |     <link rel="canonical" href="{{ theme_canonical_url }}/{{ pagename }}.html"/>
 40 |   {% endif %}
 41 | 
 42 |   {# CSS #}
 43 | 
 44 |   {# OPENSEARCH #}
 45 |   {% if not embedded %}
 46 |     {% if use_opensearch %}
 47 |       <link rel="search" type="application/opensearchdescription+xml"
 48 |             title="{% trans docstitle=docstitle|e %}Search within {{ docstitle }}{% endtrans %}"
 49 |             href="{{ pathto('_static/opensearch.xml', 1) }}"/>
 50 |     {% endif %}
 51 | 
 52 |   {% endif %}
 53 | 
 54 |   <link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css" />
 55 |   <!-- <link rel="stylesheet" href="{{ pathto('_static/pygments.css', 1) }}" type="text/css" /> -->
 56 |   {%- for css in css_files %}
 57 |     {%- if css|attr("rel") %}
 58 |   <link rel="{{ css.rel }}" href="{{ pathto(css.filename, 1) }}" type="text/css"{% if css.title is not none %} title="{{ css.title }}"{% endif %} />
 59 |     {%- else %}
 60 |   <link rel="stylesheet" href="{{ pathto(css, 1) }}" type="text/css" />
 61 |     {%- endif %}
 62 |   {%- endfor %}
 63 |   {%- for cssfile in extra_css_files %}
 64 |     <link rel="stylesheet" href="{{ pathto(cssfile, 1) }}" type="text/css" />
 65 |   {%- endfor %}
 66 | 
 67 |   {%- block linktags %}
 68 |     {%- if hasdoc('about') %}
 69 |     <link rel="author" title="{{ _('About these documents') }}" href="{{ pathto('about') }}" />
 70 |     {%- endif %}
 71 |     {%- if hasdoc('genindex') %}
 72 |     <link rel="index" title="{{ _('Index') }}" href="{{ pathto('genindex') }}" />
 73 |     {%- endif %}
 74 |     {%- if hasdoc('search') %}
 75 |     <link rel="search" title="{{ _('Search') }}" href="{{ pathto('search') }}" />
 76 |     {%- endif %}
 77 |     {%- if hasdoc('copyright') %}
 78 |     <link rel="copyright" title="{{ _('Copyright') }}" href="{{ pathto('copyright') }}" />
 79 |     {%- endif %}
 80 |     {%- if next %}
 81 |     <link rel="next" title="{{ next.title|striptags|e }}" href="{{ next.link|e }}" />
 82 |     {%- endif %}
 83 |     {%- if prev %}
 84 |     <link rel="prev" title="{{ prev.title|striptags|e }}" href="{{ prev.link|e }}" />
 85 |     {%- endif %}
 86 |   {%- endblock %}
 87 |   {%- block extrahead %} {% endblock %}
 88 | 
 89 |   {# Keep modernizr in head - http://modernizr.com/docs/#installing #}
 90 |   <script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
 91 | </head>
 92 | <div class="container-fluid header-holder tutorials-header" id="header-holder">
 93 |   <div class="container">
 94 |     <div class="header-container">
 95 |       <a class="header-logo" href="{{ theme_variables.urls['home'] }}" aria-label="PyTorch"></a>
 96 | 
 97 |       <div class="main-menu">
 98 |         <ul>
 99 |         {% for link in theme_variables.main_menu_links %}
100 |           <li {%- if theme_pytorch_project.lower() == link['name'].lower() %} class="active"{% endif %}>
101 |             <a href="{{ link['href'] }}">{{ link['name'] }}</a>
102 |           </li>
103 |         {% endfor %}
104 |         </ul>
105 |       </div>
106 | 
107 |       <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
108 |     </div>
109 | 
110 |   </div>
111 | </div>
112 | 
113 | 
114 | <body class="pytorch-body">
115 | 
116 |   {% block extrabody %} {% endblock %}
117 | 
118 |   {% block mainbody %}
119 |     {# SIDE NAV, TOGGLES ON MOBILE #}
120 | 
121 |     <div class="table-of-contents-link-wrapper">
122 |       <span>Table of Contents</span>
123 |       <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
124 |     </div>
125 | 
126 |     <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
127 |       <div class="pytorch-side-scroll">
128 |         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
129 |           <div class="pytorch-left-menu-search">
130 |             {% block sidebartitle %}
131 | 
132 |             {% if theme_display_version %}
133 |               {%- set nav_version = version %}
134 |               {% if READTHEDOCS and current_version %}
135 |                 {%- set nav_version = current_version %}
136 |               {% endif %}
137 |               {% if nav_version %}
138 |                 <div class="version">
139 |                   {{ nav_version }}
140 |                 </div>
141 |               {% endif %}
142 |             {% endif %}
143 | 
144 |             {% include "searchbox.html" %}
145 | 
146 |             {% endblock %}
147 |           </div>
148 | 
149 |           {% block menu %}
150 |             {#
151 |               The singlehtml builder doesn't handle this toctree call when the
152 |               toctree is empty. Skip building this for now.
153 |             #}
154 |             {% if 'singlehtml' not in builder %}
155 |               {% set global_toc = toctree(maxdepth=2,
156 |                                           collapse=theme_collapse_navigation|tobool,
157 |                                           includehidden=theme_includehidden|tobool,
158 |                                           titles_only=theme_titles_only|tobool) %}
159 |             {% endif %}
160 |             {% if global_toc %}
161 |               {{ global_toc }}
162 |             {% else %}
163 |               <!-- Local TOC -->
164 |               <div class="local-toc">{{ toc }}</div>
165 |             {% endif %}
166 |           {% endblock %}
167 |         </div>
168 |       </div>
169 |     </nav>
170 | 
171 |     <div class="pytorch-container">
172 |       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
173 |         <div class="pytorch-breadcrumbs-wrapper">
174 |           {% include "breadcrumbs.html" %}
175 |         </div>
176 | 
177 |         <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
178 |           Shortcuts
179 |         </div>
180 |       </div>
181 | 
182 |       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
183 |         <div class="pytorch-content-left">
184 | 
185 |         {% if theme_pytorch_project == 'tutorials' %}
186 | 
187 |           <div class="pytorch-call-to-action-links">
188 |             <div id="tutorial-type">{{ pagename }}</div>
189 | 
190 |             <div id="google-colab-link">
191 |               <img class="call-to-action-img" src="{{ pathto('_static/images/pytorch-colab.svg', 1) }}"/>
192 |               <div class="call-to-action-desktop-view">Run in Google Colab</div>
193 |               <div class="call-to-action-mobile-view">Colab</div>
194 |             </div>
195 |             <div id="download-notebook-link">
196 |               <img class="call-to-action-notebook-img" src="{{ pathto('_static/images/pytorch-download.svg', 1) }}"/>
197 |               <div class="call-to-action-desktop-view">Download Notebook</div>
198 |               <div class="call-to-action-mobile-view">Notebook</div>
199 |             </div>
200 |             <div id="github-view-link">
201 |               <img class="call-to-action-img" src="{{ pathto('_static/images/pytorch-github.svg', 1) }}"/>
202 |               <div class="call-to-action-desktop-view">View on GitHub</div>
203 |               <div class="call-to-action-mobile-view">GitHub</div>
204 |             </div>
205 |           </div>
206 | 
207 |         {% endif %}
208 | 
209 |         {%- block content %}
210 |           {% if theme_style_external_links|tobool %}
211 |           <div class="rst-content style-external-links">
212 |           {% else %}
213 |           <div class="rst-content">
214 |           {% endif %}
215 |             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
216 |             {%- block document %}
217 |              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
218 |               {% block body %}{% endblock %}
219 |              </article>
220 |              {% if self.comments()|trim %}
221 |              <div class="articleComments">
222 |               {% block comments %}{% endblock %}
223 |              </div>
224 |              {% endif%}
225 |             </div>
226 |             {%- endblock %}
227 |             {% include "footer.html" %}
228 |           </div>
229 |         {%- endblock %}
230 |         </div>
231 | 
232 |         <div class="pytorch-content-right" id="pytorch-content-right">
233 |           <div class="pytorch-right-menu" id="pytorch-right-menu">
234 |             <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
235 |               {{ toc }}
236 |             </div>
237 |           </div>
238 |         </div>
239 |       </section>
240 |     </div>
241 | 
242 |   {% include "versions.html" %}
243 | 
244 |   {% if not embedded %}
245 | 
246 |      {% if sphinx_version >= "1.8.0" %}
247 |        <script type="text/javascript" id="documentation_options" data-url_root="{{ pathto('', 1) }}" src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
248 |        {%- for scriptfile in script_files %}
249 |          {{ js_tag(scriptfile) }}
250 |        {%- endfor %}
251 |      {% else %}
252 |        <script type="text/javascript">
253 |            var DOCUMENTATION_OPTIONS = {
254 |                URL_ROOT:'{{ url_root }}',
255 |                VERSION:'{{ release|e }}',
256 |                LANGUAGE:'{{ language }}',
257 |                COLLAPSE_INDEX:false,
258 |                FILE_SUFFIX:'{{ '' if no_search_suffix else file_suffix }}',
259 |                HAS_SOURCE:  {{ has_source|lower }},
260 |                SOURCELINK_SUFFIX: '{{ sourcelink_suffix }}'
261 |            };
262 |        </script>
263 |        {%- for scriptfile in script_files %}
264 |          <script type="text/javascript" src="{{ pathto(scriptfile, 1) }}"></script>
265 |        {%- endfor %}
266 |      {% endif %}
267 | 
268 |   {% endif %}
269 | 
270 |   <script type="text/javascript" src="{{ pathto('_static/js/vendor/popper.min.js', 1) }}"></script>
271 |   <script type="text/javascript" src="{{ pathto('_static/js/vendor/bootstrap.min.js', 1) }}"></script>
272 |   <script type="text/javascript" src="{{ pathto('_static/js/theme.js', 1) }}"></script>
273 | 
274 |   <script type="text/javascript">
275 |       jQuery(function () {
276 |           SphinxRtdTheme.Navigation.enable({{ 'true' if theme_sticky_navigation|tobool else 'false' }});
277 |       });
278 |   </script>
279 | 
280 |   {% endblock %}
281 | 
282 |   {%- block footer %} {% endblock %}
283 | 
284 |   <!-- Begin Footer -->
285 |   <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
286 |   </div>
287 |   <footer class="site-footer">
288 |     <div class="container footer-container">
289 |       <div class="footer-logo-wrapper">
290 |         <a href="{{ theme_variables.urls['home'] }}" class="footer-logo"></a>
291 |       </div>
292 | 
293 |       <div class="footer-links-wrapper">
294 |       {% for column in theme_variables.footer_columns %}
295 |         <div class="footer-links-col">
296 |           <ul>
297 |             <li class="list-title">{% if column['href'] %}<a href="{{ column['href'] }}">{% endif %}{{ column['name'] }}{% if column['href'] %}</a>{% endif %}</li>
298 |             {% for link in column['links'] %}
299 |             <li><a href="{{ link['href'] }}">{{ link['name'] }}</a></li>
300 |             {% endfor %}
301 |           </ul>
302 |         </div>
303 |       {% endfor %}
304 |       </div>
305 |     </div>
306 |   </footer>
307 | 
308 |   {% include "cookie_banner.html" %}
309 | 
310 |   <!-- End Footer -->
311 | 
312 |   <!-- Begin Mobile Menu -->
313 | 
314 |   <div class="mobile-main-menu">
315 |     <div class="container-fluid">
316 |       <div class="container">
317 |         <div class="mobile-main-menu-header-container">
318 |           <a class="header-logo" href="{{ theme_variables.urls['home'] }}" aria-label="PyTorch"></a>
319 |           <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
320 |         </div>
321 |       </div>
322 |     </div>
323 | 
324 |     <div class="mobile-main-menu-links-container">
325 |       <div class="main-menu">
326 |         <ul>
327 |         {% for link in theme_variables.main_menu_links %}
328 |           <li {%- if theme_pytorch_project.lower() == link['name'].lower() %} class="active"{% endif %}>
329 |             <a href="{{ link['href'] }}">{{ link['name'] }}</a>
330 |           </li>
331 |         {% endfor %}
332 |         </ul>
333 |       </div>
334 |     </div>
335 |   </div>
336 | 
337 |   <!-- End Mobile Menu -->
338 | 
339 |   <script type="text/javascript" src="{{ pathto('_static/js/vendor/anchor.min.js', 1) }}"></script>
340 | 
341 |   <script type="text/javascript">
342 |     $(document).ready(function() {
343 |       mobileMenu.bind();
344 |       mobileTOC.bind();
345 |       pytorchAnchors.bind();
346 |       sideMenus.bind();
347 |       scrollToAnchor.bind();
348 |       highlightNavigation.bind();
349 | 
350 |       // Add class to links that have code blocks, since we cannot create links in code blocks
351 |       $("article.pytorch-article a span.pre").each(function(e) {
352 |         $(this).closest("a").addClass("has-code");
353 |       });
354 |     })
355 |   </script>
356 | 
357 |   <!-- Google Analytics -->
358 |   <script>
359 |     (function(i, s, o, g, r, a, m)
360 |     {
361 |       i['GoogleAnalyticsObject'] = r;
362 |       i[r] = i[r] || function()
363 |       {
364 |         (i[r].q = i[r].q || []).push(arguments)
365 |       }, i[r].l = 1 * new Date();
366 |       a = s.createElement(o),
367 |         m = s.getElementsByTagName(o)[0];
368 |       a.async = 1;
369 |       a.src = g;
370 |       m.parentNode.insertBefore(a, m)
371 |     })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');
372 |     ga('create', 'UA-32800241-3', 'alvinwan.com');
373 |     ga('send', 'pageview');
374 |   </script>
375 | </body>
376 | </html>
377 | 


--------------------------------------------------------------------------------
/docs/source/_templates/theme_variables.jinja:
--------------------------------------------------------------------------------
 1 | {%- set urls = {
 2 |   'github': 'https://github.com/alvinwan/texsoup',
 3 |   'github_issues': 'https://github.com/alvinwan/texsoup/issues',
 4 |   'getting_started': 'https://texsoup.alvinwan.com/docs/quickstart.html',
 5 |   'docs': 'https://texsoup.alvinwan.com/docs/',
 6 |   'home': 'https://texsoup.alvinwan.com/',
 7 |   'md2py': 'https://github.com/alvinwan/md2py',
 8 |   'tex2py': 'https://github.com/alvinwan/tex2py'
 9 | }
10 | -%}
11 | {%- set main_menu_links = [
12 |   {'name': 'Home', 'href': urls['home']},
13 |   {'name': 'Docs', 'href': urls['docs']},
14 |   {'name': 'Github', 'href': urls['github'],}
15 | ]
16 | -%}
17 | {%- set footer_columns = [
18 |   {'name': 'TexSoup', 'href': urls['home'], 'links': [
19 |     {'name': 'Getting Started', 'href': urls['getting_started']}
20 |   ]},
21 |   {'name': 'Support', 'links': [
22 |     {'name': 'Docs', 'href': urls['docs']},
23 |     {'name': 'Github Issues', 'href': urls['github_issues']}
24 |   ]},
25 |   {'name': 'Related', 'links': [
26 |     {'name': 'Markdown2Python', 'href': urls['md2py']},
27 |     {'name': 'LaTeX2Python', 'href': urls['tex2py']}
28 |   ]}
29 | ]
30 | -%}
31 | 


--------------------------------------------------------------------------------
/docs/source/categorizer.rst:
--------------------------------------------------------------------------------
 1 | Categorizing Mechanics
 2 | ===================================
 3 | 
 4 | .. automodule:: TexSoup.category
 5 | 
 6 | Categorizer
 7 | -----------------------------------
 8 | 
 9 | .. autofunction:: categorize
10 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # TexSoup documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Dec 23 13:31:47 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | # import sys
 22 | 
 23 | # source code directory, relative to this file, for sphinx-autobuild
 24 | # sys.path.insert(0, os.path.abspath('../..'))
 25 | 
 26 | import TexSoup
 27 | 
 28 | RELEASE = os.environ.get('RELEASE', False)
 29 | 
 30 | import pytorch_sphinx_theme
 31 | 
 32 | # -- General configuration ------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | needs_sphinx = '1.6'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.autosummary',
 44 |     'sphinx.ext.doctest',
 45 |     'sphinx.ext.intersphinx',
 46 |     'sphinx.ext.todo',
 47 |     'sphinx.ext.coverage',
 48 |     'sphinx.ext.napoleon',
 49 |     'sphinx.ext.viewcode',
 50 |     'sphinxcontrib.katex',
 51 | ]
 52 | 
 53 | # katex options
 54 | #
 55 | #
 56 | 
 57 | katex_options = r'''
 58 | delimiters : [
 59 |    {left: "$$", right: "$$", display: true},
 60 |    {left: "\\(", right: "\\)", display: false},
 61 |    {left: "\\[", right: "\\]", display: true}
 62 | ]
 63 | '''
 64 | 
 65 | napoleon_use_ivar = True
 66 | 
 67 | # Add any paths that contain templates here, relative to this directory.
 68 | templates_path = ['_templates']
 69 | 
 70 | # The suffix(es) of source filenames.
 71 | # You can specify multiple suffix as a list of string:
 72 | #
 73 | # source_suffix = ['.rst', '.md']
 74 | source_suffix = '.rst'
 75 | 
 76 | # The master toctree document.
 77 | master_doc = 'index'
 78 | 
 79 | # General information about the project.
 80 | project = 'TexSoup'
 81 | copyright = '2020, Alvin Wan'
 82 | author = 'Alvin Wan'
 83 | 
 84 | # The version info for the project you're documenting, acts as replacement for
 85 | # |version| and |release|, also used in various other places throughout the
 86 | # built documents.
 87 | #
 88 | # The short X.Y version.
 89 | # TODO: change to [:2] at v1.0
 90 | version = 'master (' + TexSoup.__version__ + ' )'
 91 | # The full version, including alpha/beta/rc tags.
 92 | # TODO: verify this works as expected
 93 | release = 'master'
 94 | 
 95 | # The language for content autogenerated by Sphinx. Refer to documentation
 96 | # for a list of supported languages.
 97 | #
 98 | # This is also used if you do content translation via gettext catalogs.
 99 | # Usually you set "language" from the command line for these cases.
100 | language = None
101 | 
102 | # List of patterns, relative to source directory, that match files and
103 | # directories to ignore when looking for source files.
104 | # This patterns also effect to html_static_path and html_extra_path
105 | exclude_patterns = []
106 | 
107 | # The name of the Pygments (syntax highlighting) style to use.
108 | pygments_style = 'sphinx'
109 | 
110 | # If true, `todo` and `todoList` produce output, else they produce nothing.
111 | todo_include_todos = True
112 | 
113 | # Disable docstring inheritance
114 | autodoc_inherit_docstrings = False
115 | 
116 | 
117 | # -- katex javascript in header
118 | #
119 | #    def setup(app):
120 | #    app.add_javascript("https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.js")
121 | 
122 | 
123 | # -- Options for HTML output ----------------------------------------------
124 | #
125 | # The theme to use for HTML and HTML Help pages.  See the documentation for
126 | # a list of builtin themes.
127 | #
128 | #
129 | #
130 | 
131 | html_theme = 'pytorch_sphinx_theme'
132 | html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
133 | 
134 | # Theme options are theme-specific and customize the look and feel of a theme
135 | # further.  For a list of options available for each theme, see the
136 | # documentation.
137 | 
138 | html_theme_options = {
139 |     'pytorch_project': 'docs',
140 |     'canonical_url': 'https://texsoup.alvinwan.com',
141 |     'collapse_navigation': False,
142 |     'display_version': True,
143 |     'logo_only': True,
144 | }
145 | 
146 | html_logo = '_static/images/logo-dark.svg'
147 | 
148 | 
149 | # Add any paths that contain custom static files (such as style sheets) here,
150 | # relative to this directory. They are copied after the builtin static files,
151 | # so a file named "default.css" will overwrite the builtin "default.css".
152 | html_static_path = ['_static']  # , '_images']
153 | 
154 | 
155 | # Called automatically by Sphinx, making this `conf.py` an "extension".
156 | def setup(app):
157 |     # NOTE: in Sphinx 1.8+ `html_css_files` is an official configuration value
158 |     # and can be moved outside of this function (and the setup(app) function
159 |     # can be deleted).
160 |     html_css_files = [
161 |         'https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css'
162 |     ]
163 | 
164 |     # In Sphinx 1.8 it was renamed to `add_css_file`, 1.7 and prior it is
165 |     # `add_stylesheet` (deprecated in 1.8).
166 |     add_css = getattr(app, 'add_css_file', app.add_stylesheet)
167 |     for css_file in html_css_files:
168 |         add_css(css_file)
169 | 
170 | 
171 | # -- Options for HTMLHelp output ------------------------------------------
172 | 
173 | # Output file base name for HTML help builder.
174 | htmlhelp_basename = 'TexSoupdoc'
175 | 
176 | 
177 | # -- Options for LaTeX output ---------------------------------------------
178 | 
179 | latex_elements = {
180 |     # The paper size ('letterpaper' or 'a4paper').
181 |     #
182 |     # 'papersize': 'letterpaper',
183 | 
184 |     # The font size ('10pt', '11pt' or '12pt').
185 |     #
186 |     # 'pointsize': '10pt',
187 | 
188 |     # Additional stuff for the LaTeX preamble.
189 |     #
190 |     # 'preamble': '',
191 | 
192 |     # Latex figure (float) alignment
193 |     #
194 |     # 'figure_align': 'htbp',
195 | }
196 | 
197 | # Grouping the document tree into LaTeX files. List of tuples
198 | # (source start file, target name, title,
199 | #  author, documentclass [howto, manual, or own class]).
200 | latex_documents = [
201 |     (master_doc, 'pytorch.tex', 'TexSoup Documentation',
202 |      'Alvin Wan', 'manual'),
203 | ]
204 | 
205 | 
206 | # -- Options for manual page output ---------------------------------------
207 | 
208 | # One entry per manual page. List of tuples
209 | # (source start file, name, description, authors, manual section).
210 | man_pages = [
211 |     (master_doc, 'TexSoup', 'TexSoup Documentation',
212 |      [author], 1)
213 | ]
214 | 
215 | 
216 | # -- Options for Texinfo output -------------------------------------------
217 | 
218 | # Grouping the document tree into Texinfo files. List of tuples
219 | # (source start file, target name, title, author,
220 | #  dir menu entry, description, category)
221 | texinfo_documents = [
222 |     (master_doc, 'TexSoup', 'TexSoup Documentation',
223 |      author, 'TexSoup', 'One line description of project.',
224 |      'Miscellaneous'),
225 | ]
226 | 
227 | 
228 | # Example configuration for intersphinx: refer to the Python standard library.
229 | intersphinx_mapping = {
230 |     'python': ('https://docs.python.org/', None),
231 |     'numpy': ('https://docs.scipy.org/doc/numpy/', None),
232 | }
233 | 
234 | # -- A patch that prevents Sphinx from cross-referencing ivar tags -------
235 | # See http://stackoverflow.com/a/41184353/3343043
236 | 
237 | from docutils import nodes
238 | from sphinx.util.docfields import TypedField
239 | from sphinx import addnodes
240 | 
241 | 
242 | def patched_make_field(self, types, domain, items, **kw):
243 |     # `kw` catches `env=None` needed for newer sphinx while maintaining
244 |     #  backwards compatibility when passed along further down!
245 | 
246 |     # type: (List, unicode, Tuple) -> nodes.field
247 |     def handle_item(fieldarg, content):
248 |         par = nodes.paragraph()
249 |         par += addnodes.literal_strong('', fieldarg)  # Patch: this line added
250 |         # par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
251 |         #                           addnodes.literal_strong))
252 |         if fieldarg in types:
253 |             par += nodes.Text(' (')
254 |             # NOTE: using .pop() here to prevent a single type node to be
255 |             # inserted twice into the doctree, which leads to
256 |             # inconsistencies later when references are resolved
257 |             fieldtype = types.pop(fieldarg)
258 |             if len(fieldtype) == 1 and isinstance(fieldtype[0], nodes.Text):
259 |                 typename = u''.join(n.astext() for n in fieldtype)
260 |                 typename = typename.replace('int', 'python:int')
261 |                 typename = typename.replace('long', 'python:long')
262 |                 typename = typename.replace('float', 'python:float')
263 |                 typename = typename.replace('type', 'python:type')
264 |                 par.extend(self.make_xrefs(self.typerolename, domain, typename,
265 |                                            addnodes.literal_emphasis, **kw))
266 |             else:
267 |                 par += fieldtype
268 |             par += nodes.Text(')')
269 |         par += nodes.Text(' -- ')
270 |         par += content
271 |         return par
272 | 
273 |     fieldname = nodes.field_name('', self.label)
274 |     if len(items) == 1 and self.can_collapse:
275 |         fieldarg, content = items[0]
276 |         bodynode = handle_item(fieldarg, content)
277 |     else:
278 |         bodynode = self.list_type()
279 |         for fieldarg, content in items:
280 |             bodynode += nodes.list_item('', handle_item(fieldarg, content))
281 |     fieldbody = nodes.field_body('', bodynode)
282 |     return nodes.field('', fieldname, fieldbody)
283 | 
284 | TypedField.make_field = patched_make_field
285 | 


--------------------------------------------------------------------------------
/docs/source/data.rst:
--------------------------------------------------------------------------------
 1 | Data Structures
 2 | ===================================
 3 | 
 4 | .. automodule:: TexSoup.data
 5 | 
 6 | Node
 7 | -----------------------------------
 8 | 
 9 | .. autoclass:: TexNode()
10 |   :members:
11 | 
12 | Expressions
13 | -----------------------------------
14 | 
15 | .. autoclass:: TexExpr()
16 |   :members:
17 | 
18 | .. autoclass:: TexEnv()
19 |   :members:
20 | 
21 | .. autoclass:: TexCmd()
22 |   :members:
23 | 
24 | Groups
25 | -----------------------------------
26 | 
27 | .. autoclass:: TexGroup()
28 |   :members:
29 | 
30 | .. autoclass:: BracketGroup()
31 |   :members:
32 | 
33 | .. autoclass:: BraceGroup()
34 |   :members:
35 | 
36 | .. autoclass:: TexArgs()
37 |   :members:
38 | 
39 | Environments
40 | -----------------------------------
41 | 
42 | .. autoclass:: TexNamedEnv()
43 |   :members:
44 | 
45 | .. autoclass:: TexUnNamedEnv()
46 |   :members:
47 | 
48 | .. autoclass:: TexMathEnv()
49 |   :members:
50 | 
51 | .. autoclass:: TexDisplayMathEnv()
52 |   :members:
53 | 
54 | .. autoclass:: TexMathModeEnv()
55 |   :members:
56 | 
57 | .. autoclass:: TexDisplayMathModeEnv()
58 |   :members:
59 | 
60 | Text
61 | ----------------------------------
62 | 
63 | .. autoclass:: TexText()
64 |   :members:
65 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. TexSoup documentation master file, created by
 2 |    sphinx-quickstart on Sat Apr  6 22:08:46 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | TexSoup documentation
 7 | ===================================
 8 | 
 9 | TexSoup is a Python3 library for pulling data from :math:`\LaTeX` files. It
10 | turns even invalid sources into a BeautifulSoup-esque structure that you can
11 | navigate, search, and modify.
12 | 
13 | .. toctree::
14 |    :maxdepth: 1
15 |    :caption: Guides
16 | 
17 |    quickstart
18 |    soup
19 |    navigation
20 |    searching
21 |    modification
22 | 
23 | 
24 | .. toctree::
25 |    :maxdepth: 1
26 |    :caption: How it Works
27 | 
28 |    categorizer
29 |    tokenizer
30 |    parser
31 | 
32 | .. toctree::
33 |    :maxdepth: 1
34 |    :caption: Package Reference
35 | 
36 |    main
37 |    data
38 |    utils
39 | 
40 | Indices and tables
41 | ==================
42 | 
43 | * :ref:`genindex`
44 | 


--------------------------------------------------------------------------------
/docs/source/main.rst:
--------------------------------------------------------------------------------
1 | Main Interface
2 | ===================================
3 | 
4 | .. automodule:: TexSoup
5 | 
6 | .. autofunction:: TexSoup
7 | 


--------------------------------------------------------------------------------
/docs/source/modification.rst:
--------------------------------------------------------------------------------
 1 | Modification
 2 | ===================================
 3 | 
 4 | You can also modify the document using the TexSoup tree, then export the changes
 5 | back to a :math:`\LaTeX` file.
 6 | 
 7 | Commands
 8 | -----------------------------------
 9 | 
10 | As mentioned in :ref:`page-soup`, you can change commands and their arguments.
11 | 
12 |     >>> soup = TexSoup(r'I am \textbf{\large Large and bold}')
13 |     >>> cmd = soup.textbf
14 |     >>> cmd.name = 'textit'
15 |     >>> cmd
16 |     \textit{\large Large and bold}
17 | 
18 | You can set :code:`.string` for any single-argument command (e.g., :code:`\section`).
19 | 
20 |     >>> cmd.string = 'corgis are the best'
21 |     >>> cmd
22 |     \textit{corgis are the best}
23 | 
24 | You can do the same for any command in math mode.
25 | 
26 |     >>> soup2 = TexSoup(r'$$\textrm{math}\sum$$')
27 |     >>> soup2.textrm.string = 'not math'
28 |     >>> soup2
29 |     $$\textrm{not math}\sum$$
30 | 
31 | You can also remove any command in-place, by calling :code:`.delete` on it.
32 | 
33 |     >>> soup2.textrm.delete()
34 |     >>> soup2
35 |     $$\sum$$
36 | 
37 | Arguments
38 | -----------------------------------
39 | 
40 | You can modify arguments just as you would a list.
41 | 
42 |     >>> cmd.args.append('{moar}')
43 |     >>> cmd
44 |     \textit{corgis are the best}{moar}
45 |     >>> cmd.args.remove('{moar}')
46 |     >>> cmd
47 |     \textit{corgis are the best}
48 |     >>> cmd.args.extend(['[moar]', '{crazy}'])
49 |     \textit{corgis are the best}[moar]{crazy}
50 |     >>> cmd.args = cmd.args[:2]
51 |     >>> cmd
52 |     \textit{corgis are the best}[moar]
53 | 
54 | Use the argument's :code:`.string` attribute to modify the argument's contents.
55 | 
56 |     >>> cmd.args[0].string = 'no'
57 |     >>> cmd
58 |     \textit{no}[moar]
59 | 
60 | Environments
61 | -----------------------------------
62 | 
63 | Use the :code:`.string` attribute to modify any environment with only text content
64 | (i.e., a verbatim or math environment).
65 | 
66 |     >>> soup = TexSoup(r'\begin{verbatim}Huehue\end{verbatim}')
67 |     >>> soup.verbatim.string = 'HUEHUE'
68 |     >>> soup
69 |     \begin{verbatim}HUEHUE\end{verbatim}
70 |     >>> soup = TexSoup(r'$$\text{math}$$')
71 |     >>> soup.text.string = ''
72 | 
73 | You can add to an environment's contents using list-like operations, like
74 | :code:`.append`, :code:`.remove`, :code:`.insert`, and :code:`.extend`.
75 | 
76 |     >>> from TexSoup import TexSoup
77 |     >>> soup = TexSoup(r'''
78 |     ... \begin{itemize}
79 |     ...     \item Hello
80 |     ...     \item Bye
81 |     ... \end{itemize}''')
82 |     >>> tmp = soup.item
83 |     >>> soup.itemize.remove(soup.item)
84 |     >>> soup.itemize
85 |     \begin{itemize}
86 |     \item Bye
87 |     \end{itemize}
88 |     >>> soup.insert(1, tmp)
89 |     >>> soup
90 |     \begin{itemize}
91 |     \item Hello
92 |     \item Bye
93 |     \end{itemize}
94 | 
95 | See :class:`TexSoup.data.TexNode` for more utilities.
96 | 


--------------------------------------------------------------------------------
/docs/source/navigation.rst:
--------------------------------------------------------------------------------
 1 | Navigation
 2 | ===================================
 3 | 
 4 | Here's the :math:`\LaTeX` document from the quickstart guide::
 5 | 
 6 |     >>> tex_doc = """
 7 |     ... \begin{document}
 8 |     ... \section{Hello \textit{world}.}
 9 |     ... \subsection{Watermelon}
10 |     ... (n.) A sacred fruit. Also known as:
11 |     ... \begin{itemize}
12 |     ...   \item red lemon
13 |     ...   \item life
14 |     ... \end{itemize}
15 |     ... Here is the prevalence of each synonym, in Table \ref{table:synonyms}.
16 |     ... \begin{tabular}{c c}\label{table:synonyms}
17 |     ...   red lemon & uncommon \\ \n
18 |     ...   life & common
19 |     ... \end{tabular}
20 |     ... \end{document}
21 |     ... """
22 |     >>> from TexSoup import TexSoup
23 |     >>> soup = TexSoup(tex_doc)
24 | 
25 | Going Down
26 | -----------------------------------
27 | 
28 | Some expressions contain content. For example, environments may contain items.
29 | TexSoup provides attributes for navigating an environment's children.
30 | 
31 | Naviate by naming the expression you want. For example, to access italicized
32 | text, use :code:`soup.textit`::
33 | 
34 |     >>> soup.textit
35 |     \textit{world}
36 | 
37 | You can use this to select expressions from a specific part of the document.
38 | For example, this retrieves the an item from an itemize environment::
39 | 
40 |     >>> soup.itemize.item
41 |     \item red lemon
42 | 
43 | 
44 | 
45 | Note accessing by name only returns the first result.
46 | 
47 |     >>> soup.item
48 |     \item red lemon
49 | 
50 | 
51 | To access *all* items, use one of the utilities from :ref:`page-search`, such
52 | as :code:`find_all`::
53 | 
54 |     >>> soup.find_all('item')
55 |     [\item red lemon
56 |       , \item life
57 |     ]
58 | 
59 | An environment's contents are accessible via a list called :code:`contents`.
60 | Note that changing this list in-place will not affect the environment::
61 | 
62 |     >>> soup.itemize.contents
63 |     [\item red lemon
64 |       , \item life
65 |     ]
66 | 
67 | There are several views into an environment's content:
68 | 
69 | - :code:`.children`: Nested Tex expressions. Does not include floating text.
70 | - :code:`.contents`: Nested Tex expressions and text. Does not contain whitespace-only text.
71 | - :code:`.expr.all`: Nested Tex expressions and text, regardless of whitespace or not. All information needed to reconstruct the original source.
72 | - :code:`.descendants`: Tex expressions nested inside of Tex expressions.
73 | - :code:`.text`: Used to "detex" a source file. Returns text from all descendants, without Tex expressions.
74 | 
75 | If a command has only one required argument, or an environment has only one
76 | child, these values are made available as a :code:`.string`.
77 | 
78 |     >>> soup.textit.string
79 |     'world'
80 | 
81 | Going Up
82 | -----------------------------------
83 | 
84 | You can access an experssion's parent with the :code:`.parent` attribute::
85 | 
86 |     >>> soup.textit.parent
87 |     \section{Hello \textit{world}.}
88 | 


--------------------------------------------------------------------------------
/docs/source/parser.rst:
--------------------------------------------------------------------------------
 1 | Parsing Mechanics
 2 | ===================================
 3 | 
 4 | .. automodule:: TexSoup.reader
 5 | 
 6 | Parser
 7 | -----------------------------------
 8 | 
 9 | .. autofunction:: read_tex
10 | .. autofunction:: read_expr
11 | .. autofunction:: read_spacer
12 | .. autofunction:: make_read_peek
13 | 
14 | Environment Parser
15 | -----------------------------------
16 | 
17 | .. autofunction:: read_item
18 | .. autofunction:: unclosed_env_handler
19 | .. autofunction:: read_math_env
20 | .. autofunction:: read_skip_env
21 | .. autofunction:: read_env
22 | 
23 | Argument Parser
24 | -----------------------------------
25 | 
26 | .. autofunction:: read_args
27 | .. autofunction:: read_arg_optional
28 | .. autofunction:: read_arg_required
29 | .. autofunction:: read_arg
30 | 
31 | Command Parser
32 | -----------------------------------
33 | 
34 | .. autofunction:: read_command
35 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
  1 | Quick Start
  2 | ===================================
  3 | 
  4 | The below illustrates some basic TexSoup functions.
  5 | 
  6 | How to Use
  7 | -----------------------------------
  8 | 
  9 | Here is a :math:`\LaTeX` document::
 10 | 
 11 |   >>> tex_doc = """
 12 |   ... \begin{document}
 13 |   ... \section{Hello \textit{world}.}
 14 |   ... \subsection{Watermelon}
 15 |   ... (n.) A sacred fruit. Also known as:
 16 |   ... \begin{itemize}
 17 |   ...   \item red lemon
 18 |   ...   \item life
 19 |   ... \end{itemize}
 20 |   ... Here is the prevalence of each synonym, in Table \ref{table:synonyms}.
 21 |   ... \begin{tabular}{c c}\label{table:synonyms}
 22 |   ...   red lemon & uncommon \\ \n
 23 |   ...   life & common
 24 |   ... \end{tabular}
 25 |   ... \end{document}
 26 |   ... """
 27 | 
 28 | Call :code:`TexSoup` on this string to re-represent this document as a
 29 | nested data structure::
 30 | 
 31 |   >>> from TexSoup import TexSoup
 32 |   >>> soup = TexSoup(tex_doc)
 33 |   >>> soup
 34 |   \begin{document}
 35 |   \section{Hello \textit{world}.}
 36 |   \subsection{Watermelon}
 37 |   (n.) A sacred fruit. Also known as:
 38 |   \begin{itemize}
 39 |   \item red lemon
 40 |   \item life
 41 |   \end{itemize}
 42 |   Here is the prevalence of each synonym, in Table \ref{table:synonyms}.
 43 |   \begin{tabular}{c c}\label{table:synonyms}
 44 |   red lemon & uncommon \\ \n
 45 |   life & common
 46 |   \end{tabular}
 47 |   \end{document}
 48 | 
 49 | Here are a few ways to navigate the TexSoup data structure::
 50 | 
 51 |   >>> soup.section
 52 |   \section{Hello \textit{world}.}
 53 |   >>> soup.section.name
 54 |   'section'
 55 |   >>> soup.section.string
 56 |   'Hello \\textit{world}.'
 57 |   >>> soup.section.parent.name
 58 |   'document'
 59 |   >>> soup.tabular
 60 |   \begin{tabular}{c c}\label{table:synonyms}
 61 |   red lemon & uncommon \\ \n
 62 |   life & common
 63 |   \end{tabular}
 64 |   >>> soup.tabular.args[0]
 65 |   'c c'
 66 |   >>> soup.item
 67 |   \item red lemon
 68 | 
 69 |   >>> list(soup.find_all('item'))
 70 |   [\item red lemon
 71 |     , \item life
 72 |   ]
 73 | 
 74 | One task may be to find all references. To do this, simply search for
 75 | ``\ref{<label>}``. You can even report each reference's line number::
 76 | 
 77 |   >>> soup.count(r'\ref{table:synonyms}')
 78 |   1
 79 |   >>> for cmd in soup.find_all(r'\ref{table:synonyms}'):
 80 |   ...   soup.char_pos_to_line(cmd.position)
 81 |   (8, 49)
 82 | 
 83 | Another task may be to extract all text from the page::
 84 | 
 85 |   >>> list(soup.text)
 86 |   ['Hello ', 'world', '.', 'Watermelon', '\n\n(n.) A sacred fruit. Also known as:\n\n', 'red lemon\n', 'life\n', '\n\nHere is the prevalence of each synonym.\n\n', '\nred lemon & uncommon \\\\ ', '\nlife & common\n']
 87 | 
 88 | Does this look promising? If so,
 89 | `try TexSoup online <https://repl.it/@ALVINWAN1/texsoup>`_ or read on to
 90 | install.
 91 | 
 92 | How to Install
 93 | -----------------------------------
 94 | 
 95 | TexSoup is published via PyPi, so you can install it via ``pip``. The package
 96 | name is ``TexSoup``::
 97 | 
 98 |   pip install TexSoup
 99 | 
100 | Alternatively, you can install the package from source::
101 | 
102 |   git clone https://github.com/alvinwan/TexSoup.git
103 |   cd TexSoup
104 |   python setup.py install
105 | 


--------------------------------------------------------------------------------
/docs/source/searching.rst:
--------------------------------------------------------------------------------
 1 | .. _page-search:
 2 | 
 3 | Search
 4 | ===================================
 5 | 
 6 | TexSoup supports a few search utilities, namely :code:`.find` and
 7 | :code:`.find_all`. The interface for both is identical. Here's the
 8 | :math:`\LaTeX` document from the quickstart guide::
 9 | 
10 |     >>> tex_doc = """
11 |     ... \begin{document}
12 |     ... \section{Hello \textit{world}.}
13 |     ... \subsection{Watermelon}
14 |     ... (n.) A sacred fruit. Also known as:
15 |     ... \begin{itemize}
16 |     ...   \item red lemon
17 |     ...   \item life
18 |     ... \end{itemize}
19 |     ... Here is the prevalence of each synonym, in Table \ref{table:synonyms}.
20 |     ... \begin{tabular}{c c}\label{table:synonyms}
21 |     ...   red lemon & uncommon \\ \n
22 |     ...   life & common
23 |     ... \end{tabular}
24 |     ... \end{document}
25 |     ... """
26 |     >>> from TexSoup import TexSoup
27 |     >>> soup = TexSoup(tex_doc)
28 | 
29 | Kinds of Filters
30 | -----------------------------------
31 | 
32 | The simplest way to search is using a string filter::
33 | 
34 |     >>> soup.find_all('item')
35 |     [\item red lemon
36 |       , \item life
37 |     ]
38 | 
39 | If you pass in a list, TexSoup will return results that match *any* item in
40 | that list.
41 | 
42 |     >>> soup.find_all('item', 'textit')
43 |     [\textit{world}, \item red lemon
44 |       , \item life
45 |     ]
46 | 
47 | You can also use regex compiled objects or regex strings with
48 | :code:`.search_regex`.
49 | 


--------------------------------------------------------------------------------
/docs/source/soup.rst:
--------------------------------------------------------------------------------
  1 | .. _page-soup:
  2 | 
  3 | Soup
  4 | ===================================
  5 | 
  6 | Making a Soup
  7 | -----------------------------------
  8 | 
  9 | To parse a :math:`\LaTeX` document, pass an open filehandle or a string into the
 10 | :code:`TexSoup` constructor::
 11 | 
 12 |     >>> from TexSoup import TexSoup
 13 |     >>> with open("main.tex") as f:
 14 |     ...     soup = TexSoup(f)
 15 |     >>> soup2 = TexSoup(r'\begin{document}Hello world!\end{document}')
 16 | 
 17 | Alternatively, compute the data structure only::
 18 | 
 19 |     >>> from TexSoup import read
 20 |     >>> soup3, _ = read(r'\begin{document}Hello world!\end{document}')
 21 |     >>> soup3
 22 |     [TexNamedEnv('document', ['Hello world!'], [])]
 23 | 
 24 | You can also ask TexSoup to tolerate :math:`\LaTeX` errors. In which case,
 25 | TexSoup will make a best-effort guess::
 26 | 
 27 |     >>> soup4 = TexSoup(r'\begin{itemize}\item hullo\end{enumerate}', tolerance=1)
 28 |     >>> soup4
 29 |     \begin{itemize}\item hullo\end{itemize}\end{enumerate}
 30 | 
 31 | To output the soup, you can call :code:`str()` on a :class:`TexSoup.data.TexNode` object, or any nested
 32 | data structure.
 33 | 
 34 |     >>> soup4
 35 |     \begin{itemize}\item hullo\end{itemize}\end{enumerate}
 36 |     >>> str(soup4)
 37 |     '\\begin{itemize}\\item hullo\\end{itemize}\\end{enumerate}'
 38 |     >>> soup4.item
 39 |     \item hullo
 40 |     >>> str(soup4.item)
 41 |     '\\item hullo'
 42 | 
 43 | Kinds of Objects
 44 | ------------------------------------
 45 | 
 46 | TexSoup translates a :math:`\LaTeX` document into a tree of Python objects.
 47 | There are only three *kinds* of objects: commands, environments, and
 48 | text.
 49 | 
 50 | Command
 51 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 52 | 
 53 | A :code:`TexCmd` corresponds to a command in the original document::
 54 | 
 55 |     >>> soup = TexSoup(r'I am \textbf{\large Large and bold}')
 56 |     >>> cmd = soup.textbf
 57 |     >>> cmd
 58 |     \textbf{\large Large and bold}
 59 | 
 60 | You can access the underlying data structures using :code:`.expr`.
 61 | 
 62 |     >>> cmd.expr
 63 |     TexCmd('textbf', [BraceGroup(TexCmd('large'), ' Large and bold')])
 64 | 
 65 | Every command has a name::
 66 | 
 67 |     >>> cmd.name
 68 |     'textbf'
 69 | 
 70 | You can change the command's name too. This change will be reflected when you
 71 | convert the TexSoup back to :math:`\LaTeX`::
 72 | 
 73 |     >>> cmd.name = 'textit'
 74 |     >>> cmd
 75 |     \textit{\large Large and bold}
 76 |     >>> soup
 77 |     I am \textit{\large Large and bold}
 78 | 
 79 | Commands may have any number of arguments, stored in :code:`.args` as a list.
 80 | Our command has just one argument::
 81 | 
 82 |     >>> len(cmd.args)
 83 |     1
 84 |     >>> str(cmd.args[0])
 85 |     '{\\large Large and bold}'
 86 | 
 87 | You can add, remove, and modify arguments, treating :code:`.args` as a list::
 88 | 
 89 |     >>> cmd.args.append('{moar}')  # add arguments
 90 |     >>> str(cmd.args)
 91 |     '{\\large Large and bold}{moar}'
 92 |     >>> cmd.args.remove('{\large Large and bold}')  # remove arguments
 93 |     >>> str(cmd.args)
 94 |     '{moar}'
 95 |     >>> cmd.args[0].string = 'floating'  # modify arguments
 96 |     >>> str(cmd.args)
 97 |     '{floating}'
 98 | 
 99 | All arguments are represented using TexSoup's underlying data structures::
100 | 
101 |     >>> cmd.args
102 |     [BraceGroup('floating')]
103 | 
104 | The above commands all apply to optional arguments as well. Note
105 | that all changes are reflected when we convert the soup back to :math:`\LaTeX`::
106 | 
107 |     >>> cmd.args.append('[optional]')  # add optional arg
108 |     >>> str(cmd.args)
109 |     '{floating}[optional]'
110 |     >>> cmd.args.remove('[optional]')  # remove optional arg
111 |     >>> str(cmd.args)
112 |     '{floating}'
113 |     >>> soup
114 |     I am \textit{floating}
115 | 
116 | Text
117 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 | 
119 | .. note: If you've just started reading from this portion of the guide, start
120 |          by defining :code:`soup = TexSoup(r'I am \textit{floating}')`.
121 | 
122 | A :code:`TexText` represents floating bits of text::
123 | 
124 |     >>> soup
125 |     I am \textit{floating}
126 |     >>> text = next(soup.contents)
127 |     >>> text
128 |     'I am '
129 |     >>> type(text)
130 |     <class 'TexSoup.data.TexText'>
131 | 
132 | You can set the :code:`.text` attribute. As before, this will be reflected
133 | when you convert the data structure back into :math:`\LaTeX`.
134 | 
135 |     >>> text.text = 'I am not '
136 |     >>> soup
137 |     I am not \textit{floating}
138 | 
139 | Environment
140 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
141 | 
142 | Environments, or :code:`TexEnv`, are split into three types:
143 | 
144 | 1. :code:`TexNamedEnv`: The typical environments you think of, with a begin
145 |    and an end, such as :code:`\begin{itemize}...\end{itemize}`.
146 | 2. :code:`TexUnNamedEnv`: Special environments such as math :code:`\(...\)`.
147 |    All math environments fall in this category.
148 | 3. :code:`TexGroup`: Unnamed environments with single-character delimiters,
149 |    like :code:`{...}`.
150 | 
151 | You can access environments by name::
152 | 
153 |     >>> soup = TexSoup(r'Haha \begin{itemize}[label=\alph]\item Huehue\end{itemize}')
154 |     >>> env = soup.itemize
155 |     >>> env
156 |     \begin{itemize}[label=\alph]\item Huehue\end{itemize}
157 | 
158 | Every environment's name can be accessed and modified using :code:`.name`::
159 | 
160 |     >>> env.name
161 |     'itemize'
162 |     >>> env.name = 'enumerate'
163 |     >>> env
164 |     \begin{enumerate}[label=\alph]\item Huehue\end{enumerate}
165 |     >>> soup
166 |     Haha \begin{enumerate}[label=\alph]\item Huehue\end{enumerate}
167 | 
168 | As with commands, environments store arguments in a list :code:`.args`::
169 | 
170 |     >>> str(env.args)
171 |     '[label=\\alph]'
172 | 
173 | Each environment will contain variable amounts of content, accessible via
174 | :code:`.contents`::
175 | 
176 |     >>> list(env.contents)
177 |     [\item Huehue]
178 | 


--------------------------------------------------------------------------------
/docs/source/tokenizer.rst:
--------------------------------------------------------------------------------
 1 | Tokenizing Mechanics
 2 | ===================================
 3 | 
 4 | .. automodule:: TexSoup.tokens
 5 | 
 6 | Tokenizer
 7 | -----------------------------------
 8 | 
 9 | .. autofunction:: tokenize
10 | .. autofunction:: next_token
11 | 
12 | Escape Tokenizer
13 | -----------------------------------
14 | 
15 | .. autofunction:: tokenize_escaped_symbols
16 | .. autofunction:: tokenize_line_comment
17 | .. autofunction:: tokenize_line_break
18 | .. autofunction:: tokenize_ignore
19 | 
20 | 
21 | Math Tokenizer
22 | -----------------------------------
23 | 
24 | .. autofunction:: tokenize_math_sym_switch
25 | .. autofunction:: tokenize_math_asym_switch
26 | .. autofunction:: tokenize_punctuation_command_name
27 | 
28 | Command Tokenizer
29 | -----------------------------------
30 | 
31 | .. autofunction:: tokenize_command_name
32 | 
33 | Text Tokenizer
34 | -----------------------------------
35 | 
36 | .. autofunction:: tokenize_symbols
37 | .. autofunction:: tokenize_string
38 | 


--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
 1 | Utilities
 2 | ===================================
 3 | 
 4 | .. automodule:: TexSoup.utils
 5 | 
 6 | Tokens
 7 | -----------------------------------
 8 | 
 9 | .. autoclass:: CC
10 |   :members:
11 | 
12 | .. autoclass:: TC
13 |   :members:
14 | 
15 | .. autoclass:: Token
16 |   :members:
17 | 
18 | .. autoclass:: CharToLineOffset
19 |   :members:
20 | 
21 | Buffer
22 | -----------------------------------
23 | 
24 | .. autoclass:: Buffer
25 |   :members:
26 | 
27 | .. autofunction:: to_buffer
28 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # TexSoup in the Wild
 2 | 
 3 | TexSoup has a variety of practical applications, whether it be minor
 4 | conveniences or more powerful $\LaTeX$ extensions. The examples below exhibit a
 5 | few of these use cases, including simple reference counts and integration with
 6 | computer algebra systems (coming soon).
 7 | 
 8 | ## Examples
 9 | 
10 | See the `examples/` folder for example scripts and usages for TexSoup.
11 | 
12 | - [Count References](https://github.com/alvinwan/TexSoup/blob/master/examples/count_references.py)
13 | - [Solution Length](https://github.com/alvinwan/TexSoup/blob/master/examples/solution_length.py)
14 | - [Resolve Imports](https://github.com/alvinwan/TexSoup/blob/master/examples/resolve_imports.py)
15 | 
16 | ## Uses
17 | 
18 | See slightly more complex uses for TexSoup.
19 | 
20 | - [LaTex2Python](https://github.com/alvinwan/tex2py) converts $\LaTeX$ into a
21 |     document tree, organizing content by either a default or custom hierarchy.
22 | - [Tex2Ipy](https://github.com/prabhuramachandran/tex2ipy) by Prabhu Ramachandran,
23 |     converts $\LaTeX$ beamer files to Jupyter notebooks
24 | 


--------------------------------------------------------------------------------
/examples/count_references.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Count References
 3 | ---
 4 | 
 5 | This script counts the number of times each label is referenced, in a given tex
 6 | document. To use it, run
 7 | 
 8 |     python count_reference.py
 9 | 
10 | after installing TexSoup.
11 | 
12 | @author: Alvin Wan
13 | @site: alvinwan.com
14 | """
15 | 
16 | from TexSoup import TexSoup
17 | 
18 | 
19 | def count(tex):
20 |     """Extract all labels, then count the number of times each is referenced in
21 |     the provided file. Does not follow \includes.
22 |     """
23 | 
24 |     # soupify
25 |     soup = TexSoup(tex)
26 | 
27 |     # extract all unique labels
28 |     labels = set(label.string for label in soup.find_all('label'))
29 | 
30 |     # create dictionary mapping label to number of references
31 |     label_refs = {}
32 |     for label in labels:
33 |         refs = soup.find_all('\\ref{%s}' % label)
34 |         pagerefs = soup.find_all('\\pageref{%s}' % label)
35 |         label_refs[label] = len(list(refs)) + len(list(pagerefs))
36 | 
37 |     return label_refs
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     counts = count(open(input('Tex file:').strip()))
42 | 
43 |     if not counts:
44 |         print('No labels found.')
45 |     else:
46 |         print(counts)
47 | 


--------------------------------------------------------------------------------
/examples/list_everything.py:
--------------------------------------------------------------------------------
 1 | """
 2 | List Everything
 3 | ---
 4 | This script creates a tree of lists from a given LaTeX document.
 5 | To use it, run
 6 | 
 7 |     python list_everything.py
 8 | 
 9 | after installing TexSoup.
10 | 
11 | @author: Simon Maenaut
12 | @e-mail: simon@ulyssis.org
13 | """
14 | import TexSoup
15 | import pprint
16 | 
17 | 
18 | def everything(tex_tree):
19 |     """
20 |     Accepts a list of Union[TexNode,Token] and returns a nested list
21 |     of strings of the entire source document.
22 |     """
23 |     result = []
24 |     for tex_code in tex_tree:
25 |         if isinstance(tex_code, TexSoup.TexEnv):
26 |             result.append([tex_code.begin + str(tex_code.args), everything(tex_code.all), tex_code.end])
27 |         elif isinstance(tex_code, TexSoup.TexCmd):
28 |             result.append(["\\" + tex_code.name + str(tex_code.args)])
29 |         elif isinstance(tex_code, TexSoup.TexText):
30 |             result.append(tex_code.text)
31 |         elif isinstance(tex_code, TexSoup.TexGroup):
32 |             result.append(["{", everything(TexSoup.TexSoup(tex_code.value).expr.all), "}"])
33 |         else:
34 |             result.append([str(tex_code)])
35 | 
36 |     return result
37 | 
38 | 
39 | # Run programme as main file
40 | if __name__ == '__main__':
41 | 
42 |     tex_file = open(input('LaTex file:').strip())
43 |     tex_soup = TexSoup.TexSoup(tex_file)
44 |     tex_text = everything(tex_soup.expr.all)
45 |     print("LaTeX Contents:\n== == ==\n\n")
46 |     pprint.pprint(tex_text)
47 | 


--------------------------------------------------------------------------------
/examples/resolve_imports.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Resolve Imports
 3 | ---
 4 | 
 5 | This script resolves imports and updates the parse tree, in place, in a given
 6 | tex document. To use it, run
 7 | 
 8 |     python resolve_imports.py
 9 | 
10 | after installing TexSoup. The result is similar to the command `latexpand`.
11 | 
12 | @author: Alvin Wan
13 | @site: alvinwan.com
14 | """
15 | 
16 | from TexSoup import TexSoup
17 | 
18 | 
19 | def resolve(tex):
20 |     """Resolve all imports and update the parse tree.
21 | 
22 |     Reads from a tex file and once finished, writes to a tex file.
23 |     """
24 | 
25 |     # soupify
26 |     soup = TexSoup(tex)
27 | 
28 |     # resolve subimports
29 |     for subimport in soup.find_all('subimport'):
30 |         path = subimport.args[0] + subimport.args[1]
31 |         subimport.replace_with(*resolve(open(path)).contents)
32 | 
33 |     # resolve imports
34 |     for _import in soup.find_all('import'):
35 |         _import.replace_with(*resolve(open(_import.args[0])).contents)
36 | 
37 |     # resolve includes
38 |     for include in soup.find_all('include'):
39 |         include.replace_with(*resolve(open(include.args[0])).contents)
40 | 
41 |     # resolve inputs
42 |     for _input in soup.find_all('input'):
43 |         _input.replace_with(*resolve(open(_input.args[0])).contents)
44 | 
45 |     return soup
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     new_soup = resolve(open(input('Source Tex file:').strip()))
50 | 
51 |     with open(input('Destination Tex file:').strip(), 'w') as f:
52 |         f.write(repr(new_soup))
53 | 


--------------------------------------------------------------------------------
/examples/simple_conversion.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple Conversion
 3 | ---
 4 | This script converts a given LaTeX document to a json file and checks the conversion.
 5 | To use it, run
 6 | 
 7 |     python simple_conversion.py
 8 | 
 9 | after installing TexSoup.
10 | 
11 | @author: Simon Maenaut
12 | @e-mail: simon@ulyssis.org
13 | """
14 | import TexSoup
15 | import json
16 | 
17 | 
18 | def to_dictionary(tex_tree):
19 |     str_tree = []
20 |     for i in tex_tree:
21 |         if isinstance(i, list):
22 |             str_tree.append(i)
23 |         elif isinstance(i, TexSoup.TexEnv):
24 |             str_tree.append(
25 |                 {
26 |                     i.name: [
27 |                         {"begin": i.begin + str(i.args)},
28 |                         to_dictionary(i.all),
29 |                         {"end": i.end},
30 |                     ]
31 |                 }
32 |             )
33 |         elif isinstance(i, TexSoup.TexCmd):
34 |             str_tree.append({i.name: "\\" + i.name + str(i.args)})
35 |         elif isinstance(i, TexSoup.TexText):
36 |             str_tree.append(str(i.text))
37 |         elif isinstance(i, TexSoup.TexGroup):
38 |             str_tree.append(["{", to_dictionary(TexSoup.TexSoup(i.value).expr.all), "}"])
39 |         else:
40 |             str_tree.append(str(i))
41 | 
42 |     return str_tree
43 | 
44 | 
45 | def to_latex(tex_json):
46 |     if isinstance(tex_json, dict):
47 |         tex_code = "".join([to_latex(val) for val in tex_json.values()])
48 |     elif isinstance(tex_json, list):
49 |         tex_code = "".join([to_latex(val) for val in tex_json])
50 |     else:
51 |         tex_code = tex_json
52 | 
53 |     return tex_code
54 | 
55 | 
56 | # Run programme as main file.
57 | # This should always print True as output.
58 | if __name__ == '__main__':
59 | 
60 |     import os
61 | 
62 |     tex_path = input('LaTex file:').strip()
63 |     tex_text = open(tex_path).read()
64 |     tex_dict = {"latex": {"contents": to_dictionary(TexSoup.TexSoup(tex_text).expr.all)}}
65 | 
66 |     new_path = ".".join(tex_path.split(".")[:-1]) + "__tmp.json"
67 |     json.dump(tex_dict, open(new_path, "x"), indent="  ")
68 |     new_json = json.load(open(new_path))
69 |     os.remove(new_path)
70 |     new_text = to_latex(new_json)
71 | 
72 |     print(tex_text == new_text, "\n\n\n", json.dumps(new_json, indent="  "))
73 | 


--------------------------------------------------------------------------------
/examples/solution_length.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Solution Length
 3 | ---
 4 | 
 5 | This script measures the length of solutions, given the command used to denote
 6 | answers. To use it, run
 7 | 
 8 |     python solution_length.py
 9 | 
10 | after installing TexSoup.
11 | 
12 | @author: Alvin Wan
13 | @site: alvinwan.com
14 | """
15 | 
16 | from TexSoup import TexSoup
17 | 
18 | 
19 | def sollen(tex, command):
20 |     r"""Measure solution length
21 | 
22 |     :param Union[str,buffer] tex: the LaTeX source as a string or file buffer
23 |     :param str command: the command denoting a solution i.e., if the tex file
24 |         uses '\answer{<answer here>}', then the command is 'answer'.
25 |     :return int: the solution length
26 |     """
27 |     return sum(len(a.string) for a in TexSoup(tex).find_all(command))
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     print(
32 |         'Solution length:',
33 |         sollen(open(input('Tex file:').strip()), input('Solution command:')))
34 | 


--------------------------------------------------------------------------------
/examples/structure_diagram.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Structure Diagram
 3 | ---
 4 | This script creates a structure diagram from a given LaTeX document.
 5 | To use it, run
 6 | 
 7 |     python structure_diagram.py
 8 | 
 9 | after installing TexSoup.
10 | 
11 | @author: Simon Maenaut
12 | @e-mail: simon@ulyssis.org
13 | """
14 | import TexSoup
15 | import textwrap
16 | 
17 | 
18 | def tex_read(tex_soup, prefix=" |- "):
19 |     result = ""
20 |     for tex_code in tex_soup:
21 |         if isinstance(tex_code, TexSoup.TexEnv):
22 |             result += tex_read((prefix + tex_code.begin + str(tex_code.args)
23 |                                 + "\n" + textwrap.indent(tex_read(tex_code.all), "\t")
24 |                                 + "\n" + prefix + tex_code.end).splitlines(), prefix="")
25 |         elif isinstance(tex_code, TexSoup.TexCmd):
26 |             result += textwrap.indent("\\" + tex_code.name + str(tex_code.args), prefix, lambda line: True)
27 |         elif isinstance(tex_code, TexSoup.TexText):
28 |             result += textwrap.indent(tex_code.text.strip(), prefix, lambda line: True)
29 |         elif isinstance(tex_code, TexSoup.TexGroup):
30 |             result += tex_read((prefix + "{" + "\n"
31 |                                 + textwrap.indent(tex_read(TexSoup.TexSoup(tex_code.value).expr.all), "\t")
32 |                                 + "\n" + prefix + "}").splitlines(), prefix="")
33 |         else:
34 |             result += textwrap.indent(str(tex_code), prefix)
35 |         if not result.endswith("\n"):
36 |             result += "\n"
37 | 
38 |     return result
39 | 
40 | 
41 | # Run programme as main file
42 | if __name__ == '__main__':
43 | 
44 |     tex_path = input('LaTex file:').strip()
45 |     tex_text = open(tex_path).read()
46 |     tex_tree = tex_read(TexSoup.TexSoup(tex_text).expr.all)
47 |     print("LaTeX Contents:\n== == ==\n\n")
48 |     print(tex_tree)
49 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = tests TexSoup --doctest-modules --cov=TexSoup -p no:cacheprovider
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from setuptools import setup
 3 | from setuptools.command.test import test as test_command
 4 | 
 5 | tests_require = ['pytest', 'pytest-cov==2.5.1',
 6 |                  'coverage==4.4', 'coveralls==1.1']
 7 | install_requires = []
 8 | 
 9 | 
10 | with open("README.md", "r") as fh:
11 |     long_description = fh.read()
12 | 
13 | 
14 | class PyTest(test_command):
15 |     user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")]
16 | 
17 |     def __init__(self, dist, **kw):
18 |         super().__init__(dist, **kw)
19 |         self.pytest_args = []
20 | 
21 |     def initialize_options(self):
22 |         test_command.initialize_options(self)
23 | 
24 |     def finalize_options(self):
25 |         test_command.finalize_options(self)
26 | 
27 |     def run_tests(self):
28 |         # import here, cause outside the eggs aren't loaded
29 |         import pytest
30 |         errno = pytest.main(self.pytest_args)
31 |         sys.exit(errno)
32 | 
33 | 
34 | VERSION = '0.3.1'
35 | 
36 | setup(
37 |     name="TexSoup",
38 |     version=VERSION,
39 |     author="Alvin Wan",
40 |     author_email='hi@alvinwan.com',
41 |     description=("parses valid LaTeX and provides variety of Beautiful-Soup-"
42 |                  "esque methods and Pythonic idioms for iterating over and "
43 |                  "searching the parse tree"),
44 |     long_description=long_description,
45 |     long_description_content_type="text/markdown",
46 |     license="BSD",
47 |     url="https://github.com/alvinwan/TexSoup",
48 |     packages=['TexSoup'],
49 |     cmdclass={'test': PyTest},
50 |     tests_require=tests_require,
51 |     install_requires=install_requires,
52 |     download_url='https://github.com/alvinwan/TexSoup/archive/%s.zip' % VERSION,
53 |     classifiers=[
54 |         "Topic :: Utilities",
55 |         "Intended Audience :: Developers",
56 |         "Programming Language :: Python :: 3",
57 |         "Topic :: Software Development :: Libraries",
58 |     ],
59 | )
60 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/tests/__init__.py


--------------------------------------------------------------------------------
/tests/config.py:
--------------------------------------------------------------------------------
 1 | from TexSoup import TexSoup
 2 | import os
 3 | import pytest
 4 | 
 5 | 
 6 | def seed(path):
 7 |     """Filepath relative to test directory"""
 8 |     return os.path.join(os.path.split(os.path.realpath(__file__))[0], path)
 9 | 
10 | 
11 | ############
12 | # FIXTURES #
13 | ############
14 | 
15 | @pytest.fixture(scope='function')
16 | def chikin():
17 |     """Instance of the chikin tex file"""
18 |     return TexSoup(open(seed('samples/chikin.tex')))
19 | 
20 | 
21 | @pytest.fixture(scope='function')
22 | def pancake():
23 |     """Content of the pancake tex file"""
24 |     with open(seed('samples/pancake.tex')) as fp:
25 |         return fp.read()
26 | 
27 | 


--------------------------------------------------------------------------------
/tests/samples/chikin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alvinwan/TexSoup/7dabf76fdde1f0d7f0d30ede841b3b45bbb609f6/tests/samples/chikin.pdf


--------------------------------------------------------------------------------
/tests/samples/chikin.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[a4paper]{article}
 2 | 
 3 | \begin{document}
 4 | 
 5 | \section{Chikin Tales}
 6 | 
 7 | \subsection{Chikin Fly}
 8 | 
 9 | Chickens don't fly. They do only the following:
10 | 
11 | \begin{itemize}
12 | \item waddle
13 | \item plop
14 | \end{itemize}
15 | 
16 | \section{Chikin Scream}
17 | 
18 | \subsection{Plopping}
19 | 
20 | Plopping involves three steps:
21 | 
22 | \begin{enumerate}
23 | \item squawk
24 | \item plop
25 | \item repeat, unless ordered to squat
26 | \end{enumerate}
27 | 
28 | \subsection{I Scream}
29 | 
30 | \end{document}
31 | 


--------------------------------------------------------------------------------
/tests/samples/pancake.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[a4paper]{article}
 2 | 
 3 | \begin{document}
 4 | 
 5 | \section{Pancake}
 6 | 
 7 | \subsection{Ingredients}
 8 | \label{ingredients}
 9 | 
10 | This is a pancake recipe that at the same time serves as \LaTeX test document. Put the following ingredients in a big bowl (make sure to add flour as last ingredient -- the order of the others is not important):
11 | 
12 | \begin{description}
13 | \item[6 eggs] whithout shells but egg white and yolk together
14 | \item[1 liter milk] the fresh one if possible with normal 3.5\% fat
15 | \item[5~g salt] I hope you have a good scale or do such things often
16 | \item[2 spoons of oil] use oil that's heat resistant like sun flower oil
17 | \item[500~g flour] and pour it in while steering
18 | \end{description}
19 | 
20 | 
21 | \subsection{Baking}
22 | 
23 | Prepare the dough like discribed in section \ref{ingredients}.
24 | Heat up a pan (use some oil if needed). Take a pan and put dough in such it covers the base. Wait approximately 1 to 2 minutes until the dough is no more fluid. Turn the pancake and bake the other side as well (approximately 1 minute). Continue until you have no more dough.
25 | 
26 | 
27 | Some suggestions:
28 | \begin{itemize}
29 | 	\item Tastes wonderfully with yoghurt and sugar
30 | 	\item Some people perfer jam
31 | 	\item Cheese, bakon and eggs are also an option but if you are in favor of those, put them already into the pan when you bake the second side of the pancake.
32 | \end{itemize}
33 | 
34 | \emph{Enjoy your meal!}
35 | 
36 | \end{document}
37 | 


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
  1 | from TexSoup import TexSoup
  2 | from TexSoup.utils import Token
  3 | from tests.config import chikin
  4 | import pytest
  5 | import re
  6 | 
  7 | ##############
  8 | # NAVIGATION #
  9 | ##############
 10 | 
 11 | 
 12 | if chikin:
 13 |     pass
 14 | 
 15 | 
 16 | def test_navigation_attributes(chikin):
 17 |     """Test navigation with attributes by dot notation"""
 18 |     assert str(chikin.section) == r'\section{Chikin Tales}'
 19 |     assert chikin.section.name == 'section'
 20 |     assert chikin.section.string == 'Chikin Tales'
 21 | 
 22 | 
 23 | def test_navigation_parent(chikin):
 24 |     """Test parent navigation"""
 25 |     assert chikin.section.parent.name == 'document'
 26 |     assert chikin.subsection.parent.name == 'document'
 27 | 
 28 | 
 29 | def test_navigation_children(chikin):
 30 |     """Test identification of all children"""
 31 |     assert len(list(chikin.children)) == 2
 32 |     docclass, document = chikin.children
 33 |     assert document.name == 'document'
 34 |     assert len(list(chikin.document.children)) == 7
 35 | 
 36 | 
 37 | def test_navigation_descendants(chikin):
 38 |     """Test identification of all descendants"""
 39 |     assert len(list(chikin.descendants)) == 28
 40 | 
 41 | 
 42 | def test_navigation_positions(chikin):
 43 |     assert chikin.char_pos_to_line(0) == (0, 0), '\\'
 44 |     assert chikin.char_pos_to_line(1) == (0, 1), 'documentclass'
 45 |     assert chikin.char_pos_to_line(172) == (11, 6), 'waddle'
 46 | 
 47 |     assert isinstance(chikin.itemize.children[0].contents[0], Token)
 48 | 
 49 |     # get position of first token
 50 |     waddle_pos = chikin.itemize.children[0].contents[0].position
 51 |     assert chikin.char_pos_to_line(waddle_pos) == (11, 5)
 52 | 
 53 |     # get position of item
 54 |     enumerate_first_item_pos = chikin.enumerate.children[0].name.position
 55 |     assert chikin.char_pos_to_line(enumerate_first_item_pos) == (22, 1)
 56 | 
 57 |     # get position of section
 58 |     section_pos = list(chikin.find_all('section'))[1].name.position
 59 |     assert chikin.char_pos_to_line(section_pos) == (15, 1)
 60 | 
 61 | 
 62 | ##########
 63 | # SEARCH #
 64 | ##########
 65 | 
 66 | 
 67 | def test_find_basic(chikin):
 68 |     """Find all LaTeX commands"""
 69 |     document = chikin.find('document')
 70 |     assert document.name == 'document'
 71 | 
 72 | 
 73 | def test_find_by_command(chikin):
 74 |     """Find all LaTeX blocks that match a command"""
 75 |     sections = list(chikin.find_all('section'))
 76 |     assert str(sections[0]) == r'\section{Chikin Tales}'
 77 |     assert str(sections[1]) == r'\section{Chikin Scream}'
 78 | 
 79 | 
 80 | def test_find_env():
 81 |     """Find all equations in the document"""
 82 |     soup = TexSoup(r"""\begin{equation}1+1\end{equation}""")
 83 |     equations = soup.find_all(r'\begin{equation}')
 84 |     assert len(list(equations)) > 0
 85 | 
 86 | ################
 87 | # MODIFICATION #
 88 | ################
 89 | 
 90 | 
 91 | def test_delete(chikin):
 92 |     """Delete an element from the parse tree."""
 93 |     chikin.section.delete()
 94 |     assert 'Chikin Tales' not in str(chikin)
 95 | 
 96 | 
 97 | def test_delete_arg():
 98 |     """Delete an element from an arg in the parse tree"""
 99 |     soup = TexSoup(r'\foo{\bar{\baz}}')
100 |     soup.bar.delete()
101 | 
102 | 
103 | def test_delete_env_arg():
104 |     """Delete an element from an environment arg in the parse tree"""
105 |     soup = TexSoup(r'\begin{foo}{\bar{\baz}}\end{foo}')
106 |     soup.bar.delete()
107 | 
108 | 
109 | def test_delete_token():
110 |     """Delete Token"""
111 |     soup = TexSoup(r"""
112 |     \section{one}
113 |     text
114 |     \section{two}
115 |     delete me""")
116 | 
117 |     assert 'delete me' in str(soup)
118 |     for node in soup.all:
119 |         if 'delete me' in node:
120 |             node.delete()
121 |     assert 'delete me' not in str(soup)
122 | 
123 | 
124 | def test_replace_single(chikin):
125 |     """Replace an element in the parse tree"""
126 |     chikin.section.replace_with(chikin.subsection)
127 |     assert 'Chikin Tales' not in str(chikin)
128 |     assert len(list(chikin.find_all('subsection'))) == 4
129 | 
130 | 
131 | def test_replace_multiple(chikin):
132 |     """Replace an element in the parse tree"""
133 |     chikin.section.replace_with(chikin.subsection, chikin.subsection)
134 |     assert 'Chikin Tales' not in str(chikin)
135 |     assert len(list(chikin.find_all('subsection'))) == 5
136 | 
137 | 
138 | def test_replace_in_args():
139 |     """Replace an element in an argument"""
140 |     soup = TexSoup(r'\Fig{\ref{a_label}}')
141 |     soup.ref.replace_with('2')
142 |     assert str(soup) == r'\Fig{2}'
143 | 
144 | 
145 | def test_append(chikin):
146 |     """Add a child to the parse tree"""
147 |     chikin.itemize.append('asdfghjkl')
148 |     assert 'asdfghjkl' in str(chikin.itemize)
149 | 
150 | 
151 | def test_insert(chikin):
152 |     """Add a child to the parse tree at a specific position"""
153 |     chikin.insert(0, 'asdfghjkl')
154 |     assert 'asdfghjkl' in str(chikin)
155 |     assert str(chikin[0]) == 'asdfghjkl'
156 | 
157 | 
158 | def test_change_string():
159 |     """Change argument string value"""
160 |     soup = TexSoup(r"\newtheorem{Theo}{Theorem}")
161 |     soup.newtheorem.args[0].string = soup.newtheorem.args[1].string.lower()
162 |     assert soup.newtheorem.args[0].string == 'theorem'
163 |     assert str(soup.newtheorem) == r"\newtheorem{theorem}{Theorem}"
164 |     assert str(soup) == r"\newtheorem{theorem}{Theorem}"
165 | 
166 |     soup = TexSoup(r'''
167 |     \begin{theorem}
168 |     \begin{equation}
169 |     t = s
170 |     \end{equation}
171 |     \end{theorem}
172 |     ''')
173 |     equation = soup.find('equation')
174 |     equation.name = 'eqn'
175 |     assert str(equation) == r'''\begin{eqn}
176 |     t = s
177 |     \end{eqn}'''
178 | 
179 | 
180 | def test_change_name():
181 |     """Change argument string value"""
182 |     soup = TexSoup(r"\textbf{Theo} haha")
183 |     soup.textbf.name = 'textit'
184 |     assert soup.textit
185 |     assert str(soup) == r"\textit{Theo} haha"
186 | 
187 | 
188 | def test_access_position(chikin):
189 |     """Tests that commands, arguments, environments, and strings store pos"""
190 |     clo = chikin.char_pos_to_line
191 | 
192 |     assert chikin.section.position == 52
193 |     assert chikin.section.args[0].position == 60
194 |     assert chikin.itemize.position == 150
195 |     contents = list(chikin.document.contents)
196 |     assert len(contents) > 2 and contents[2].position == 100, contents
197 | 
198 |     assert clo(chikin.section.position) == (4, 0)
199 | 
200 | 
201 | def test_math_env_change():
202 |     """Tests that commands in math environments can be found / modified"""
203 |     soup = TexSoup(r'\begin{align}\infer{A}{B}\infer{C}{D}\end{align}')
204 |     assert soup.infer is not None, repr(soup.expr)
205 |     for infer in soup.find_all('infer'):
206 |         infer.args = infer.args[::-1]
207 |     assert str(soup) == r'\begin{align}\infer{B}{A}\infer{D}{C}\end{align}'
208 | 
209 |     soup = TexSoup(r'$$\infer{A}{B}\infer{C}{D}$$')
210 |     assert soup.infer is not None, repr(soup.expr)
211 |     for infer in soup.find_all('infer'):
212 |         infer.args = infer.args[::-1]
213 |     assert str(soup) == r'$$\infer{B}{A}\infer{D}{C}$$'
214 | 
215 | 
216 | #########
217 | # TEXT #
218 | ########
219 | 
220 | def test_text(chikin):
221 |     """Get text of document"""
222 |     text = list(chikin.text)
223 |     assert 'Chikin Tales' in text
224 |     assert 'Chikin Fly' in text
225 |     assert ' waddle\n' in text
226 | 
227 | 
228 | def test_search_regex(chikin):
229 |     """Find all occurenses of a regex in the document text"""
230 |     matches = list(chikin.search_regex(r"unless[a-z ]*"))
231 |     assert len(matches) == 1
232 |     assert matches[0] == "unless ordered to squat"
233 |     assert matches[0].position == 341
234 | 
235 | 
236 | def test_search_regex_precompiled_pattern(chikin):
237 |     """Find all occurenses of a regex in the document text"""
238 |     pattern = re.compile(r"unless[a-z ]*")
239 |     matches = list(chikin.search_regex(pattern))
240 |     assert len(matches) == 1
241 |     assert matches[0] == "unless ordered to squat"
242 |     assert matches[0].position == 341
243 | 
244 | 
245 | ###########
246 | # TEXSOUP #
247 | ###########
248 | 
249 | 
250 | def test_skip_envs():
251 |     """Test envs with invalid latex are not parsed."""
252 |     with pytest.raises(TypeError):
253 |         soup = TexSoup(r"""will raise error \textbf{aaaaa""")
254 | 
255 |     # no error, ignores verbatim
256 |     TexSoup(r"""\begin{verbatim} \textbf{aaaaa \end{verbatim}""")
257 | 
258 |     # no error, customized to ignore foobar
259 |     TexSoup(r"""\begin{foobar} \textbf{aaaaa \end{foobar}""", skip_envs=('foobar',))
260 | 


--------------------------------------------------------------------------------
/tests/test_load_edit_save.py:
--------------------------------------------------------------------------------
 1 | from TexSoup import TexSoup
 2 | from tests.config import pancake
 3 | 
 4 | 
 5 | ########################
 6 | # LOAD EDIT SAVE TESTS #
 7 | ########################
 8 | 
 9 | 
10 | def test_load_save(pancake):
11 |     """Tests whether a LaTeX document can be loaded and saved."""
12 |     soup = TexSoup(pancake)
13 |     treated_pancake = str(soup)
14 |     assert treated_pancake == pancake
15 | 
16 | 
17 | def test_load_edit_save(pancake):
18 |     """Tests whether a LaTeX document can be loaded, modified and saved."""
19 |     soup = TexSoup(pancake)
20 |     emph = soup.find('emph')
21 |     emph.delete()
22 |     pancake_no_emph_soup = str(soup)
23 |     pancake_no_emph_replace = pancake.replace(r'\emph{Enjoy your meal!}', '')
24 |     assert pancake_no_emph_soup == pancake_no_emph_replace
25 | 


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
  1 | from TexSoup import TexSoup
  2 | import pytest
  3 | 
  4 | 
  5 | ###############
  6 | # BASIC TESTS #
  7 | ###############
  8 | 
  9 | 
 10 | def test_commands_only():
 11 |     """Tests that parser for commands-only string works."""
 12 |     soup = TexSoup(r"""
 13 |     \section{Chikin Tales}
 14 |     \subsection{Chikin Fly}
 15 |     """)
 16 |     children = list(soup.children)
 17 |     assert len(children) == 2
 18 |     assert str(children[0]) == r'\section{Chikin Tales}'
 19 |     assert str(children[1]) == r'\subsection{Chikin Fly}'
 20 | 
 21 | 
 22 | def test_commands_envs_only():
 23 |     """Tests that parser for commands-environments-only string works."""
 24 |     soup = TexSoup(r"""
 25 |     \section{Chikin Tales}
 26 |     \subsection{Chikin Fly}
 27 | 
 28 |     \begin{itemize}
 29 |     \item plop
 30 |     \item squat
 31 |     \end{itemize}
 32 |     """)
 33 |     children = list(soup.children)
 34 |     assert len(children) == 3
 35 |     assert str(children[0]) == r'\section{Chikin Tales}'
 36 |     assert str(children[1]) == r'\subsection{Chikin Fly}'
 37 |     itemize = children[2]
 38 |     assert itemize.name == 'itemize'
 39 |     items = list(itemize.children)
 40 |     assert len(items) == 2
 41 | 
 42 | 
 43 | def test_commands_envs_text():
 44 |     """Tests that parser for commands, environments, and strings work."""
 45 |     soup = TexSoup(r"""
 46 |     \begin{document}
 47 |     \title{Chikin}
 48 |     \date{\today}
 49 |     \section
 50 |     [Tales]{Chikin Tales}
 51 |     \subsection
 52 |     {Chikin Fly}
 53 | 
 54 |     Here is what chickens do:
 55 | 
 56 |     \begin{itemize}
 57 |     \item plop
 58 |     \item squat
 59 |     \end{itemize}
 60 |     \end{document}
 61 |     """)
 62 |     assert len(list(soup.children)) == 1
 63 |     doc = soup.children[0]
 64 |     assert doc.name == 'document'
 65 |     contents, children = list(doc.contents), list(doc.children)
 66 |     assert str(children[0]) == r'\title{Chikin}'
 67 |     assert str(children[1]) == r'\date{\today}'
 68 |     assert str(children[2]) == r'\section[Tales]{Chikin Tales}'
 69 |     assert str(children[3]) == r'\subsection{Chikin Fly}'
 70 |     assert len(children) == 5
 71 |     assert len(contents) == 6
 72 |     everything = list(doc.expr.all)
 73 |     assert len(everything) == 12
 74 | 
 75 | 
 76 | #########
 77 | # CASES #
 78 | #########
 79 | 
 80 | 
 81 | def test_text_preserved():
 82 |     """Tests that the parser preserves regular non-expression text."""
 83 |     soup = TexSoup(r"""
 84 |     \Question \textbf{Question Title}
 85 | 
 86 |     Here is what chickens do:
 87 | 
 88 |     \sol{They fly!}
 89 |     """)
 90 |     assert 'Here is what chickens do:' in str(soup)
 91 | 
 92 | 
 93 | def test_command_name_parse():
 94 |     """Tests that the name of a command is parsed correctly.
 95 | 
 96 |     Arguments can be separated from a command name by at most one line break
 97 |     and any other whitespace.
 98 |     """
 99 |     with_space_not_arg = TexSoup(r"""\item (10 points)""")
100 |     assert with_space_not_arg.item is not None
101 |     assert len(list(with_space_not_arg.item.contents)) == 1
102 |     assert with_space_not_arg.item.contents[0] == '(10 points)'
103 | 
104 |     with_space_with_arg = TexSoup(r"""\section {hula}""")
105 |     assert with_space_with_arg.section.string == 'hula'
106 | 
107 |     with_linebreak_with_arg = TexSoup(r"""\section
108 |     {hula}""")
109 |     assert with_linebreak_with_arg.section.string == 'hula'
110 | 
111 | 
112 | def test_command_env_name_parse():
113 |     """Tests that the begin/end command is parsed correctly."""
114 | 
115 |     with_space = TexSoup(r"""\begin            {itemize}\end{itemize}""")
116 |     assert len(list(with_space.contents)) == 1
117 | 
118 |     with_whitespace = TexSoup(r"""\begin
119 | {itemize}\end{itemize}""")
120 |     assert len(list(with_whitespace.contents)) == 1
121 | 
122 | 
123 | def test_commands_without_arguments():
124 |     """Tests that commands without arguments are parsed correctly."""
125 |     soup = TexSoup(r"""
126 |     \Question \textbf{Question Title}
127 | 
128 |     Here is what chickens do:
129 | 
130 |     \sol{They fly!}
131 | 
132 |     \Question
133 |     \textbf{Question 2 Title}
134 |     """)
135 |     assert len(list(soup.contents)) == 6
136 |     assert soup[0].name.strip() == 'Question'
137 |     assert len(list(soup.children)) == 5
138 |     assert list(soup.children)[0].name.strip() == 'Question'
139 | 
140 | 
141 | def test_unlabeled_environment():
142 |     """Tests that unlabeled environment is parsed and recognized.
143 | 
144 |     Check that the environment is recognized not as an argument but as an
145 |     unlabeled environment.
146 |     """
147 |     soup = TexSoup(r"""{\color{blue} \textbf{This} \textit{is} some text.}""")
148 |     assert len(list(soup.contents)) == 1, 'Environment not recognized.'
149 | 
150 | 
151 | def test_ignore_environment():
152 |     """Tests that "ignore" environments are preserved (e.g., math, verbatim)."""
153 |     soup = TexSoup(r"""
154 |     \begin{equation}\min_x \|Ax - b\|_2^2\end{equation}
155 |     \begin{verbatim}
156 |     \min_x \|Ax - b\|_2^2 + \lambda \|x\|_2^2
157 |     \end{verbatim}
158 |     $$\min_x \|Ax - b\|_2^2 + \lambda \|x\|_1^2$$
159 |     \[[0,1)\]
160 |     \begin{flalign} will break if TexSoup starts parsing math[ \end{flalign}
161 |     \begin{align*} hah [ \end{align*}
162 |     """)
163 |     verbatim = list(list(soup.children)[1].contents)[0]
164 |     assert len(list(soup.contents)) == 6, 'Special environments not recognized.'
165 |     assert str(list(soup.children)[0]) == r'\begin{equation}\min_x \|Ax - b\|_2^2\end{equation}'
166 |     # hacky workaround for odd string types
167 |     assert verbatim[0] == '\n' and verbatim[1:].startswith('   '), 'Whitespace not preserved: {}'.format(verbatim)
168 |     assert str(list(soup.children)[2]) == r'$$\min_x \|Ax - b\|_2^2 + \lambda \|x\|_1^2$$'
169 |     assert str(list(soup.children)[3]) == r'\[[0,1)\]'
170 | 
171 | 
172 | def test_inline_math():
173 |     """Tests that inline math is rendered correctly."""
174 |     soup = TexSoup(r"""
175 |     \begin{itemize}
176 |     \item This $e^{i\pi} = -1$
177 |     \item How \(e^{i\pi} + 1 = 0\)
178 |     \item Therefore!
179 |     \end{itemize}""")
180 |     assert r'$e^{i\pi} = -1$' in str(soup), 'Math environment not kept intact.'
181 |     assert r'$e^{i\pi} = -1$' in str(list(soup.itemize.children)[0]), 'Environment incorrectly associated.'
182 |     assert r'\(e^{i\pi} + 1 = 0\)' in str(soup), 'Math environment not kept intact.'
183 |     assert r'\(e^{i\pi} + 1 = 0\)' in str(list(soup.itemize.children)[1]), 'Environment incorrectly associated.'
184 | 
185 | 
186 | def test_escaped_characters():
187 |     """Tests that special characters are escaped properly.
188 |     Formerly, escaped characters would be rendered as latex commands.
189 |     """
190 |     soup = TexSoup(r"""
191 |     \begin{itemize}
192 |     \item Ice cream costs \$4-\$5 around here. \}\ [\{]
193 |     \end{itemize}""")
194 |     assert str(soup.item).strip() == r'\item Ice cream costs \$4-\$5 around here. \}\ [\{]'
195 |     assert '\\$4-\\$5' in str(soup), 'Escaped characters not properly rendered.'
196 | 
197 | 
198 | def test_newline_after_backslash():
199 |     """Tests that newlines after backslashes are preserved."""
200 |     text = 'a\\\nb'
201 |     soup = TexSoup(text)
202 |     assert str(soup) == text
203 | 
204 | 
205 | def test_math_environment_weirdness():
206 |     """Tests that math environment interacts correctly with other envs."""
207 |     soup = TexSoup(r"""\begin{a} \end{a}$ b$""")
208 |     assert '$' not in str(soup.a), 'Math env snuck into begin env.'
209 |     soup = TexSoup(r"""\begin{a} $ b$ \end{a}""")
210 |     assert '$' in str(soup.a.contents[0]), 'Math env not found in begin env'
211 |     soup = TexSoup(r"""\begin{verbatim} $ \end{verbatim}""")
212 |     assert soup.verbatim is not None
213 |     # GH48
214 |     soup = TexSoup(r"""a\\$a$""")
215 |     assert '$' in str(soup), 'Math env not correctly parsed after \\\\'
216 |     # GH55
217 |     soup = TexSoup(r"""\begin{env} text\\$formula$ \end{env}""")
218 |     assert '$' in str(soup.env), 'Math env not correctly parsed after \\\\'
219 | 
220 | 
221 | def test_tokenize_punctuation_command_names():
222 |     """Tests handling math expressions including bracket modifiers."""
223 |     # GH111 size variant
224 |     soup = TexSoup(r"""$\big(xy\big)$""")
225 |     assert str(list(soup.descendants)[1]) == r'\big(', 'wrong punctuation mark'
226 |     assert str(list(soup.descendants)[3]) == r'\big)', 'wrong punctuation mark'
227 |     # GH111 left-right variant
228 |     soup = TexSoup(r"""$\left[xy\right]$""")
229 |     assert str(list(soup.descendants)[1]) == r'\left[', 'wrong punctuation mark'
230 |     assert str(list(soup.descendants)[3]) == r'\right]', 'wrong punctuation mark'
231 |     # one sided
232 |     soup = TexSoup(r"""$\Big|$""")
233 |     assert str(list(soup.descendants)[1]) == r'\Big|', 'wrong punctuation'
234 |     # set builder
235 |     soup = TexSoup(r"""$\left\{x|y\right\}$""")
236 |     assert str(list(soup.descendants)[1]) == r'\left\{', 'wrong punctuation'
237 |     assert str(list(soup.descendants)[3]) == r'\right\}', 'wrong punctuation'
238 |     # long ones
239 |     soup = TexSoup(r"""$\big\lfloor x \big\rfloor$""")
240 |     assert str(list(soup.descendants)[1]) == r'\big\lfloor', 'wrong punctuation'
241 |     assert str(list(soup.descendants)[3]) == r'\big\rfloor', 'wrong punctuation'
242 | 
243 | 
244 | def test_item_parsing():
245 |     """Tests that item parsing is valid."""
246 |     soup = TexSoup(r"""\item aaa {\bbb} ccc""")
247 |     assert str(soup.item) == r'\item aaa {\bbb} ccc'
248 |     soup = TexSoup(r"""\begin{itemize}
249 |     \item hello $\alpha$
250 |     \end{itemize}""")
251 |     assert str(soup.item).strip() == r'\item hello $\alpha$'
252 |     soup = TexSoup(r"""\begin{itemize}
253 |     \item
254 |     \item first item
255 |     \end{itemize}""")
256 |     assert len(list(soup.item.contents)) == 0, \
257 |         "Zeroth item should have no contents"
258 |     soup = TexSoup(r"""\begin{itemize}
259 |     \item second item
260 |     \item
261 | 
262 | 
263 |     third item
264 |     with third item
265 | 
266 |     floating text
267 |     \end{itemize}""")
268 |     items = list(soup.find_all('item'))
269 |     content = items[1].contents[0]
270 |     assert 'third item' in content, 'Item does not tolerate starting line breaks (as it should)'
271 |     assert 'with' in content, 'Item does not tolerate line break in middle (as it should)'
272 |     soup = TexSoup(r"""\begin{itemize}
273 |     \item This item contains code!
274 |     \begin{lstlisting}
275 |     Code code code
276 |     \end{lstlisting}
277 |     \item hello
278 |     \end{itemize}""")
279 |     assert ' Code code code' in str(soup.item.lstlisting), 'Item does not correctly parse contained environments.'
280 |     assert '\n    Code code code\n    ' in soup.item.lstlisting.expr.contents
281 |     soup = TexSoup(r"""\begin{itemize}
282 |     \item\label{some-label} waddle
283 |     \item plop
284 |     \end{itemize}""")
285 |     assert str(soup.item.label) == r'\label{some-label}'
286 | 
287 | 
288 | def test_item_argument_parsing():
289 |     """Tests that item arguments are correctly associated with item."""
290 |     soup = TexSoup(r"""\item[marker]""")
291 |     assert str(soup.item) == r'\item[marker]'
292 | 
293 | 
294 | def test_comment_escaping():
295 |     """Tests that comments can be escaped properly."""
296 |     soup = TexSoup(r"""\caption{ 30 \%}""")
297 |     assert '%' in str(soup.caption), 'Comment not escaped properly'
298 | 
299 | 
300 | def test_comment_unparsed():
301 |     """Tests that comments are not parsed."""
302 |     soup = TexSoup(r"""\caption{30} % \caption{...""")
303 |     assert '%' not in str(soup.caption)
304 | 
305 | 
306 | def test_comment_after_escape():
307 |     """Tests that comments after escapes work."""
308 |     soup = TexSoup(r"""\documentclass{article}
309 |     \begin{document}
310 |      \\%
311 |     \end{document}
312 |     """)
313 |     assert len(list(soup.document.contents)) == 2
314 | 
315 |     soup2 = TexSoup(r"""\documentclass{article}
316 |     \begin{document}
317 | 
318 |     hi\\%
319 | 
320 | 
321 |     there
322 | 
323 |     \end{document}
324 |     hi\\%""")
325 |     assert len(list(soup2.document.contents)) == 4
326 | 
327 |     soup3 = TexSoup(r"""
328 |     \documentclass{article}
329 |     \usepackage{graphicx}
330 |     \begin{document}
331 |     \begin{equation}
332 |     \scalebox{2.0}{$x =
333 |     \begin{cases}
334 |     1, & \text{if } y=1 \\
335 |     0, & \text{otherwise}
336 |     \end{cases}$}
337 |     \end{equation}
338 |     \end{document}
339 |     """)
340 |     assert soup3.equation
341 |     assert soup3.scalebox
342 | 
343 | 
344 | def test_items_with_labels():
345 |     """Items can have labels with square brackets such as in the description
346 |     environment. See Issue #32."""
347 |     soup = TexSoup(r"""\begin{description}
348 |     \item[Python] a high-level general-purpose interpreted programming language.
349 |     \end{description}""")
350 |     assert "Python" in soup.description.item.args
351 | 
352 | 
353 | def test_multiline_args():
354 |     """Tests that macros with arguments are different lines are parsed
355 |     properly. See Issue #31."""
356 |     soup = TexSoup(r"""\mytitle{Essay title}
357 |     {Essay subheading.}""")
358 |     assert "Essay subheading." in soup.mytitle.args
359 |     # Only one newline allowed
360 |     soup = TexSoup(r"""\mytitle{Essay title}
361 | 
362 |     {Essay subheading.}""")
363 |     assert "Essay subheading." not in soup.mytitle.args
364 |     assert "Essay title" in soup.mytitle.args
365 |     soup = TexSoup(r"""\title{Arguments}
366 |     {appear}
367 |     \subtitle{everywhere}
368 |     in \LaTeX.
369 | 
370 |     \date{\today}
371 |     """)
372 |     assert "Arguments" in soup.title.args
373 |     assert "appear" in soup.title.args
374 |     assert "everywhere" in soup.subtitle.args
375 |     assert "\n    in " in list(soup.contents)
376 |     assert len(list(soup.contents)) == 6
377 | 
378 | 
379 | def test_nested_commands():
380 |     """Tests that nested commands are parsed correctly."""
381 |     soup = TexSoup(r'\emph{Some \textbf{bold} words}')
382 |     assert soup.textbf is not None
383 |     assert len(list(soup.emph.contents)) == 3
384 | 
385 | 
386 | def test_def_item():
387 |     """Tests that def with more 'complex' argument + item body parses."""
388 |     soup = TexSoup(r"""
389 |     \def\itemeqn{\item\abovedisplayskip=2pt\abovedisplayshortskip=0pt~\vspace*{-\baselineskip}}
390 |     """)
391 |     assert soup.item is not None
392 | 
393 | 
394 | def test_def_without_braces():
395 |     """Tests that def without braces around the new command parses correctly."""
396 |     soup = TexSoup(r"\def\acommandname{replacement text}")
397 |     assert len(soup.find("def").args) == 2
398 |     assert str(soup.find("def").args[0]) == r"\acommandname"
399 |     assert str(soup.find("def").args[1]) == "{replacement text}"
400 | 
401 | 
402 | def test_grouping_optional_argument():
403 |     """Tests that grouping occurs correctly"""
404 |     soup = TexSoup(r"\begin{Theorem}[The argopt contains {$]\int_\infty$} the square bracket]\end{Theorem}")
405 |     assert len(soup.Theorem.args) == 1
406 | 
407 | 
408 | def test_zero_argument_signatures():
409 |     """Tests that specific commands that do not take arguments are parsed correctly."""
410 |     soup = TexSoup(r"$\cap[\cup[\in[\notin[\infty[$")
411 |     assert len(soup.find("cap").args) == 0
412 |     assert len(soup.find("cup").args) == 0
413 |     assert len(soup.find("in").args) == 0
414 |     assert len(soup.find("notin").args) == 0
415 |     assert len(soup.find("infty").args) == 0
416 | 
417 |     soup = TexSoup(r"\begin{equation} \cup [0, \infty) \end{equation}")
418 |     assert len(soup.find("cup").args) == 0
419 | 
420 | 
421 | ##############
422 | # FORMATTING #
423 | ##############
424 | 
425 | 
426 | def test_basic_whitespace():
427 |     """Tests that basic text maintains whitespace."""
428 |     soup = TexSoup("""
429 |     Here is some text
430 |     with a line break
431 |     and awko      taco spacing
432 |     """)
433 |     assert len(str(soup).split('\n')) == 5, 'Line breaks not persisted.'
434 | 
435 | 
436 | def test_whitespace_in_command():
437 |     """Tests that whitespace in commands are maintained."""
438 |     soup = TexSoup(r"""
439 |     \begin{article}
440 |     \title {This title contains    a space}
441 |     \section {This title contains
442 |     line break}
443 |     \end{article}
444 |     """)
445 |     assert '    ' in soup.article.title.string
446 |     assert '\n' in soup.article.section.string
447 | 
448 | 
449 | def test_math_environment_whitespace():
450 |     """Tests that math environments are untouched."""
451 |     soup = TexSoup(r"""$$\lambda
452 |     \Sigma$$ But don't mind me \$3.00""")
453 |     children, contents = list(soup.children), list(soup.contents)
454 |     assert '\n' in str(children[0]), 'Whitesapce not preserved in math env.'
455 |     assert len(children) == 1 and children[0].name == '$$', 'Math env wrong'
456 |     assert r'\$' == contents[2], 'Dollar sign not escaped!'
457 |     soup = TexSoup(r"""\gamma = \beta\begin{notescaped}\gamma = \beta\end{notescaped}
458 |     \begin{equation*}\beta = \gamma\end{equation*}""")
459 |     assert str(soup.find('equation*')) == r'\begin{equation*}\beta = \gamma\end{equation*}'
460 |     assert str(soup).startswith(r'\gamma = \beta')
461 |     assert str(soup.notescaped) == r'\begin{notescaped}\gamma = \beta\end{notescaped}'
462 | 
463 | 
464 | def test_non_letter_commands():
465 |     """
466 |     Tests that non-letters are still captured as an escaped sequence
467 |     (whether valid or not).
468 |     """
469 |     for punctuation in '!@#$%^&*_+-=~`<>,./?;:|':
470 |         tex = r"""
471 |         \begin{{document}}
472 |         \lstinline{{\{} Word [a-z]+}}
473 |         \end{{document}}
474 |         """.format(punctuation)
475 |         soup = TexSoup(tex)
476 |         assert str(soup) == tex
477 | 
478 | 
479 | def test_math_environment_escape():
480 |     """Tests $ escapes in math environment."""
481 |     soup = TexSoup(r"$ \$ $")
482 |     contents = list(soup.contents)
483 |     assert r'\$' in contents[0][0], \
484 |         'Dollar sign not escaped! Contents: %s' % contents
485 | 
486 | 
487 | def test_punctuation_command_structure():
488 |     """Tests that commands for punctuation work."""
489 |     soup = TexSoup(r"""\right. \right[ \right( \right|
490 |     \right\langle \right\lfloor \right\lceil \right\ulcorner \big{ \bigg{
491 |     \Big{ \Bigg}""")
492 |     assert len(list(soup.contents)) == 12
493 |     assert len(list(soup.children)) == 12
494 | 
495 | 
496 | def test_non_punctuation_command_structure():
497 |     """Tests that normal commands do not include punctuation in the command.
498 | 
499 |     However, the asterisk is one exception.
500 |     """
501 |     soup = TexSoup(r"""\mycommand, hello""")
502 |     contents = list(soup.contents)
503 |     assert r'\mycommand' == str(contents[0]), 'Comma considered part of the command.'
504 | 
505 |     soup = TexSoup(r"""\hspace*{0.2in} hello \hspace*{2in} world""")
506 |     assert len(list(soup.contents)) == 4, '* not recognized as part of command.'
507 | 
508 | 
509 | def test_allow_unclosed_non_curly_braces():
510 |     """Tests that non-curly-brace 'delimiters' can be unclosed
511 | 
512 |     Non-curly-brace delimiters only cause parse errors when parsing arguments
513 |     for a command.
514 |     """
515 |     soup = TexSoup("[)")
516 |     assert len(list(soup.contents)) == 2
517 | 
518 |     soup = TexSoup(r"""
519 |     \documentclass{article}
520 |         \usepackage[utf8]{inputenc}
521 |     \begin{document}
522 |         \textbf{[}
523 |     \end{document}
524 |     """)
525 |     assert soup.textbf.string == '['
526 | 
527 |     soup = TexSoup("[regular text]")
528 |     contents = list(soup.contents)
529 |     assert isinstance(contents[0], str)
530 | 
531 |     soup = TexSoup("{regular text}[")
532 |     contents = list(soup.contents)
533 |     assert isinstance(contents[1], str)
534 | 
535 | 
536 | ##########
537 | # BUFFER #
538 | ##########
539 | 
540 | 
541 | def test_buffer():
542 |     from TexSoup.utils import Buffer
543 |     b = Buffer('abcdef')
544 |     assert b.forward_until(lambda s: s in 'def') == 'abc'
545 |     assert b.forward_until(lambda s: s in 'f') == 'de'
546 |     assert b.backward(5) == 'abcde'
547 |     assert b.forward_until(lambda s: s not in 'abc') == 'abc'
548 |     assert b.forward_until(lambda s: s in 'def') == ''
549 |     assert b.backward(3) == 'abc'
550 |     assert b.num_forward_until(lambda s: s in 'def') == 3
551 |     assert b.forward(3) == 'abc'
552 |     assert b.num_forward_until(lambda s: s in 'g') == 3
553 |     assert b.forward(3) == 'def'
554 |     assert b.num_forward_until(lambda s: s in 'z') == 0
555 |     assert b.backward(6) == 'abcdef'
556 |     assert b.num_forward_until(lambda s: s not in 'abc') == 3
557 | 
558 | 
559 | def test_to_buffer():
560 |     from TexSoup.utils import to_buffer
561 |     f = to_buffer(convert_out=False)(lambda x: x[:])
562 |     assert f('asdf') == 'asdf'
563 |     g = to_buffer(convert_out=False)(lambda x: x)
564 |     assert not g('').hasNext()
565 |     assert next(g('asdf')) == 'a'
566 |     h = to_buffer()(lambda x: x)
567 |     assert str(f('asdf')) == 'asdf'
568 | 
569 | ##########
570 | # ERRORS #
571 | ##########
572 | 
573 | 
574 | def test_unclosed_commands():
575 |     """Tests that unclosed commands result in an error."""
576 |     with pytest.raises(TypeError):
577 |         TexSoup(r"""\textit{hello""")
578 | 
579 |     with pytest.raises(TypeError):
580 |         TexSoup(r"""\textit{hello %}""")
581 | 
582 |     with pytest.raises(TypeError):
583 |         TexSoup(r"""\textit{hello \\%}""")
584 | 
585 | 
586 | def test_unclosed_environments():
587 |     """Tests that unclosed environment results in error."""
588 |     with pytest.raises(EOFError):
589 |         TexSoup(r"""\begin{itemize}\item haha""")
590 | 
591 | 
592 | def test_unclosed_math_environments():
593 |     """Tests that unclosed math environment results in error."""
594 |     with pytest.raises(EOFError):
595 |         TexSoup(r"""$$\min_x \|Xw-y\|_2^2""")
596 | 
597 |     with pytest.raises(EOFError):
598 |         TexSoup(r"""$\min_x \|Xw-y\|_2^2""")
599 | 
600 | 
601 | def test_arg_parse():
602 |     """Test arg parsing errors."""
603 |     from TexSoup.data import TexGroup
604 |     with pytest.raises(TypeError):
605 |         TexGroup.parse('{]')
606 | 
607 |     with pytest.raises(TypeError):
608 |         TexGroup.parse(r'\section[{')
609 | 
610 | 
611 | ###################
612 | # FAULT TOLERANCE #
613 | ###################
614 | 
615 | 
616 | def test_tolerance_env_unclosed():
617 |     """Test that unclosed envs are tolerated"""
618 |     with pytest.raises(EOFError):
619 |         TexSoup(r"""
620 |         \begin{enva}
621 |         \begin{envb}
622 |         \end{enva}
623 |         \end{envb}""")
624 | 
625 |     soup = TexSoup(r"""
626 |     \begin{enva}
627 |     \begin{envb}
628 |     \end{enva}
629 |     \end{envb}""", tolerance=1)
630 |     assert len(list(soup.enva.contents)) == 1
631 |     assert soup.end
632 | 
633 | def test_special_command():
634 |     """Test that we tolerate unclosed environments when in special mode."""
635 |     # source:
636 |     # https://github.com/alvinwan/TexSoup/issues/135#issuecomment-1705749106
637 |     texsrc = r"""
638 |     \documentclass[useAMS,usenatbib]{mnras}
639 |     \newcommand{\beq}{\begin{equation}}
640 |     \newcommand{\eeq}{\end{equation}}
641 | 
642 |     \begin{document}
643 |     \begin{itemize}
644 |     \item something
645 |     \end{itemize}
646 | 
647 |     \end{document}
648 |     """
649 |     soup = TexSoup(texsrc)
650 |     assert soup
651 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
 1 | from TexSoup import TexSoup
 2 | 
 3 | 
 4 | ###############
 5 | # BASIC TESTS #
 6 | ###############
 7 | 
 8 | 
 9 | #########
10 | # CASES #
11 | #########
12 | 
13 | 
14 | def test_commands_without_any_sort_arguments():
15 |     """Tests that commands without any sort argument can still be searched."""
16 |     soup = TexSoup(r"""
17 |     \Question \textbf{Question Title}
18 | 
19 |     Here is what chickens do:
20 | 
21 |     \sol{They fly!}
22 | 
23 |     \Question
24 |     \textbf{Question 2 Title}
25 |     """)
26 |     assert len(list(soup.find_all('Question'))) == 2
27 |     assert soup.find('section') is None
28 | 
29 | 
30 | def test_commands_with_one_or_more_arguments():
31 |     """Tests that commands with one or more argument can still be searched."""
32 |     soup = TexSoup(r"""
33 |     \section{Chikin Tales}
34 |     \subsection{Chikin Fly}
35 |     \section{Chikin Sequel}
36 |     """)
37 |     assert len(list(soup.find_all('section'))) == 2
38 |     assert soup.find('title') is None
39 | 
40 | 
41 | def test_list_search():
42 |     """Tests that giving a list to search returns all matches """
43 |     soup = TexSoup(r"""
44 |     \section*{Chikin Tales}
45 |     \subsection{Chikin Fly}
46 |     \section{Chikin Sequel}
47 |     """)
48 |     assert len(list(soup.find_all(['section', 'section*']))) == 2
49 | 


--------------------------------------------------------------------------------