├── .github └── workflows │ ├── codeql.yml │ ├── deploy-pypi.yml │ ├── tests-ci-js.yml │ └── tests-ci.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── doc ├── Makefile ├── _static │ └── custom.css ├── changes.rst ├── conf.py ├── example_latex2text_custom_quotes.py ├── index.rst ├── latex2text.rst ├── latexencode.rst ├── latexnodes.nodes.rst ├── latexnodes.parsers.rst ├── latexnodes.rst ├── latexwalker.rst ├── macrospec.rst ├── new-in-pylatexenc-2.rst ├── new-in-pylatexenc-3.rst └── requirements.txt ├── js-transcrypt ├── .gitignore ├── .yarnrc.yml ├── README.md ├── generate_pylatexenc_js.py ├── import_pylatexenc_modules.py ├── libpatches │ ├── bisect.py │ ├── collections.py │ ├── customjspatches.js │ ├── functools.py │ ├── json.py │ ├── logging.js │ ├── unique_object_id.js │ └── unittest.py ├── my_test_script.py ├── mytestjscode │ ├── my_test_js_code.js │ ├── node_modules │ │ └── pylatexenc-js │ └── package.json ├── package.json ├── preprocesslib-pylatexenc.config.yaml ├── preprocesslib-tests.config.yaml ├── transcrypt_runtime_patches.js └── yarn.lock ├── poetry.lock ├── pylatexenc ├── __init__.py ├── _util.py ├── _util_support.py ├── latex2text │ ├── __init__.py │ ├── __main__.py │ ├── _defaultspecs.py │ └── _inputlatexfile.py ├── latexencode │ ├── __init__.py │ ├── __main__.py │ ├── _partial_latex_encoder.py │ ├── _rule.py │ ├── _uni2latexmap.py │ ├── _uni2latexmap_xml.py │ ├── _unicode_to_latex_encoder.py │ └── get_builtin_rules.py ├── latexnodes │ ├── __init__.py │ ├── _callablespecbase.py │ ├── _exctypes.py │ ├── _latex_recomposer.py │ ├── _latexcontextdbbase.py │ ├── _nodescollector.py │ ├── _parsedargs.py │ ├── _parsedargsinfo.py │ ├── _parsingstate.py │ ├── _parsingstatedelta.py │ ├── _token.py │ ├── _tokenreader.py │ ├── _tokenreaderbase.py │ ├── _walkerbase.py │ ├── nodes.py │ └── parsers │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── _delimited.py │ │ ├── _expression.py │ │ ├── _generalnodes.py │ │ ├── _math.py │ │ ├── _optionals.py │ │ ├── _stdarg.py │ │ └── _verbatim.py ├── latexwalker │ ├── __init__.py │ ├── __main__.py │ ├── _defaultspecs.py │ ├── _get_defaultspecs.py │ ├── _helpers.py │ ├── _legacy_py1x.py │ └── _walker.py ├── macrospec │ ├── __init__.py │ ├── _argumentsparser.py │ ├── _environmentbodyparser.py │ ├── _latexcontextdb.py │ ├── _macrocallparser.py │ ├── _pyltxenc2_argparsers │ │ ├── __init__.py │ │ ├── _base.py │ │ └── _verbatimargsparser.py │ ├── _specclasses.py │ └── _spechelpers.py └── version.py ├── pyproject.toml ├── setup.py ├── test ├── .gitignore ├── __init__.py ├── _helpers_tests.py ├── dummy │ └── readme.txt ├── test_2_latex2text.py ├── test_2_latexwalker.py ├── test_2_macrospec.py ├── test_input_1.tex ├── test_latexencode.py ├── test_latexencode_all.py ├── test_latexnodes_latex_recomposer.py ├── test_latexnodes_nodes.py ├── test_latexnodes_nodescollector.py ├── test_latexnodes_parsedargsinfo.py ├── test_latexnodes_parsers_delimited.py ├── test_latexnodes_parsers_expression.py ├── test_latexnodes_parsers_generalnodes.py ├── test_latexnodes_parsers_math.py ├── test_latexnodes_parsers_optionals.py ├── test_latexnodes_parsers_stdarg.py ├── test_latexnodes_parsers_verbatim.py ├── test_latexnodes_tokenreader.py ├── test_latexnodes_tokenreaderbase.py ├── test_macrospec_argumentsparser.py ├── test_macrospec_environmentbodyparser.py ├── test_macrospec_latexcontextdb.py ├── test_macrospec_macrocallparser.py ├── test_pylatexenc.py ├── test_util.py └── uni_chars_test_previous.txt └── tools ├── gen_l2t_from_lenc.py ├── gen_xml_dic.py ├── preprocess_lib.py ├── unicode.xml ├── unicode.xml.LICENSE └── utils_transcrypt_generate_js.py /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | schedule: 9 | - cron: "13 20 * * 3" 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: [ python ] 24 | 25 | steps: 26 | - name: Checkout 27 | uses: actions/checkout@v3 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v2 31 | with: 32 | languages: ${{ matrix.language }} 33 | queries: +security-and-quality 34 | 35 | - name: Autobuild 36 | uses: github/codeql-action/autobuild@v2 37 | 38 | - name: Perform CodeQL Analysis 39 | uses: github/codeql-action/analyze@v2 40 | with: 41 | category: "/language:${{ matrix.language }}" 42 | -------------------------------------------------------------------------------- /.github/workflows/deploy-pypi.yml: -------------------------------------------------------------------------------- 1 | 2 | name: 'deploy-pypi' 3 | 4 | 5 | on: 6 | workflow_dispatch: {} 7 | 8 | jobs: 9 | deploy-pypi: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | python-version: ["3.11"] 16 | poetry-version: ["1.4"] 17 | 18 | environment: env-deploy-pypi 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - uses: actions/setup-python@v4 24 | with: 25 | python-version: '${{ matrix.python-version }}' 26 | 27 | - name: 'Set up poetry - Run image' 28 | uses: abatilo/actions-poetry@v2 29 | with: 30 | poetry-version: '${{ matrix.poetry-version }}' 31 | 32 | - name: 'Poetry Build Package' 33 | run: poetry build 34 | 35 | - name: 'Poetry Publish package' 36 | run: 'poetry publish -p ${{ secrets.PYPI_API_TOKEN}} -u __token__' 37 | 38 | 39 | -------------------------------------------------------------------------------- /.github/workflows/tests-ci-js.yml: -------------------------------------------------------------------------------- 1 | 2 | name: 'tests-ci-js' 3 | 4 | 5 | on: 6 | push: 7 | branches: [ "main", "devel" ] 8 | pull_request: 9 | branches: [ "main", "devel" ] 10 | schedule: 11 | - cron: "13 20 * * 3" 12 | 13 | jobs: 14 | tests-ci-js: 15 | 16 | strategy: 17 | #fail-fast: true 18 | matrix: 19 | python-version: 20 | - "3.11" 21 | poetry-version: 22 | - "1.4" 23 | node-version: 24 | - "14" 25 | - "19" 26 | os: 27 | - 'ubuntu-latest' 28 | 29 | runs-on: '${{ matrix.os }}' 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | # Install NodeJS 35 | - uses: actions/setup-node@v3 36 | with: 37 | node-version: '${{ matrix.node-version }}' 38 | 39 | - name: 'Setting up some node packages' 40 | run: 'npm i debug' 41 | 42 | # Poetry & Python are needed to transpile Python library 43 | - uses: actions/setup-python@v4 44 | with: 45 | python-version: '${{ matrix.python-version }}' 46 | 47 | - name: 'Set up poetry - Run image' 48 | uses: abatilo/actions-poetry@v2 49 | 50 | - name: 'Poetry Install (with buildjslib)' 51 | run: 'poetry install --with buildjslib' 52 | 53 | - name: 'Generate pylatexenc JS library' 54 | working-directory: 'js-transcrypt' 55 | run: 'poetry run python ./generate_pylatexenc_js.py --compile-tests' 56 | 57 | - name: 'Run JS tests' 58 | working-directory: 'js-transcrypt' 59 | run: 'node test-pylatexenc-js/runtests.js' 60 | -------------------------------------------------------------------------------- /.github/workflows/tests-ci.yml: -------------------------------------------------------------------------------- 1 | name: 'tests-ci' 2 | 3 | on: 4 | push: 5 | branches: [ "main", "devel" ] 6 | pull_request: 7 | branches: [ "main", "devel" ] 8 | schedule: 9 | - cron: "13 20 * * 3" 10 | 11 | jobs: 12 | tests-ci: 13 | 14 | strategy: 15 | #fail-fast: true 16 | matrix: 17 | python-version: 18 | - "3.8" 19 | - "3.9" 20 | - "3.10" 21 | - "3.11" 22 | - "3.12" 23 | - "3.13" 24 | os: 25 | - 'ubuntu-latest' 26 | include: 27 | # Deprecated system for github actions :/ 28 | # - python-version: "3.4" 29 | # os: "ubuntu-18.04" 30 | # - python-version: "2.7" 31 | # os: 'ubuntu-20.04' 32 | - python-version: "3.6" 33 | os: 'ubuntu-20.04' 34 | - python-version: "3.7" 35 | os: 'ubuntu-20.04' 36 | 37 | runs-on: '${{ matrix.os }}' 38 | 39 | steps: 40 | - uses: actions/checkout@v3 41 | - uses: actions/setup-python@v4 42 | with: 43 | python-version: '${{ matrix.python-version }}' 44 | 45 | - name: 'Install dependencies' 46 | run: |- 47 | pip install pytest toml; if [ "${{ matrix.python-version }}" == "2.7" ]; then pip install chainmap; fi 48 | 49 | - name: 'Run tests' 50 | run: pytest 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | 4 | .cache 5 | 6 | *.py[cdo] 7 | pylatexenc.egg-info 8 | 9 | doc/_build 10 | 11 | dist 12 | build 13 | 14 | tools/error.log 15 | tools/output.log 16 | tools/transcryptable_output 17 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Configuration for readthedocs.org 3 | # 4 | 5 | # See poetry builds on RTD: 6 | # https://docs.readthedocs.io/en/stable/build-customization.html#install-dependencies-with-poetry 7 | 8 | version: 2 9 | 10 | build: 11 | 12 | os: "ubuntu-22.04" 13 | 14 | tools: 15 | python: "3.10" 16 | 17 | jobs: 18 | 19 | post_create_environment: 20 | # Install poetry 21 | # https://python-poetry.org/docs/#installing-manually 22 | - 'pip install "poetry>=1.4"' 23 | 24 | post_install: 25 | # Install dependencies with 'docs' dependency group 26 | # https://python-poetry.org/docs/managing-dependencies/#dependency-groups 27 | # VIRTUAL_ENV needs to be set manually for now. 28 | # See https://github.com/readthedocs/readthedocs.org/pull/11152/ 29 | - 'VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with builddoc' 30 | 31 | 32 | sphinx: 33 | configuration: doc/conf.py 34 | builder: 'dirhtml' 35 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2023 Philippe Faist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include test/uni_chars_test_previous.txt test/test_input_1.tex test/dummy/readme.txt 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pylatexenc 2 | ========== 3 | 4 | Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion 5 | 6 | .. image:: https://img.shields.io/github/license/phfaist/pylatexenc.svg?style=flat 7 | :target: https://github.com/phfaist/pylatexenc/blob/master/LICENSE.txt 8 | 9 | .. image:: https://img.shields.io/pypi/v/pylatexenc.svg?style=flat 10 | :target: https://pypi.org/project/pylatexenc/ 11 | 12 | Python: ≥ 3.4 or ≥ 2.7. The library is designed to be as backwards-compatible as 13 | reasonably possible and is able to run on old python verisons should it be 14 | necessary. (Use the setup.py script directly if you have python<3.7, poetry 15 | doesn't seem to work with old python versions.) 16 | 17 | **NEW (4/2023)**: *PYLATEXENC 3.0alpha* is in pre-release on PyPI. See `new features 18 | and major changes `_. 19 | The `documentation `_ is still 20 | incomplete, and the new APIs are still subject to changes. The code is meant 21 | to be as backwards compatible as is reasonably possible. Feel free to try it 22 | out & submit feedback! 23 | 24 | 25 | Unicode Text to LaTeX code 26 | -------------------------- 27 | 28 | The ``pylatexenc.latexencode`` module provides a function ``unicode_to_latex()`` 29 | which converts a unicode string into LaTeX text and escape sequences. It should 30 | recognize accented characters and most math symbols. A couple of switches allow 31 | you to alter how this function behaves. 32 | 33 | You can also run ``latexencode`` in command-line to convert plain unicode text 34 | (from the standard input or from files given on the command line) into LaTeX 35 | code, written on to the standard output. 36 | 37 | A third party plug-in for Vim 38 | `vim-latexencode `_ 39 | by `@Konfekt `_ 40 | provides a corresponding command to operate on a given range. 41 | 42 | 43 | Parsing LaTeX code & converting to plain text (unicode) 44 | ------------------------------------------------------- 45 | 46 | The ``pylatexenc.latexwalker`` module provides a series of routines that parse 47 | the LaTeX structure of given LaTeX code and returns a logical structure of 48 | objects, which can then be used to produce output in another format such as 49 | plain text. This is not a replacement for a full (La)TeX engine, rather, this 50 | module provides a way to parse a chunk of LaTeX code as mark-up code. 51 | 52 | The ``pylatexenc.latex2text`` module builds up on top of 53 | ``pylatexenc.latexwalker`` and provides functions to convert given LaTeX code to 54 | plain text with unicode characters. 55 | 56 | You can also run ``latex2text`` in command-line to convert LaTeX input (either 57 | from the standard input, or from files given on the command line) into plain 58 | text written on the standard output. 59 | 60 | 61 | Documentation 62 | ------------- 63 | 64 | Full documentation is available at https://pylatexenc.readthedocs.io/. 65 | 66 | To build the documentation manually, run:: 67 | 68 | > poetry install --with=builddoc 69 | > cd doc/ 70 | doc> poetry run make html 71 | 72 | 73 | License 74 | ------- 75 | 76 | See LICENSE.txt (MIT License). 77 | 78 | NOTE: See copyright notice and license information for file 79 | ``tools/unicode.xml`` provided in ``tools/unicode.xml.LICENSE``. (The file 80 | ``tools/unicode.xml`` was downloaded from 81 | https://www.w3.org/2003/entities/2007xml/unicode.xml as linked from 82 | https://www.w3.org/TR/xml-entity-names/#source.) 83 | 84 | 85 | Javascript Library 86 | ------------------ 87 | 88 | Some core parts of this library can be transcribed to JavaScript. This feature 89 | is used (and was developed for) my `Flexible Latex-like Markup 90 | project `_. See the *js-transcrypt/* folder and 91 | its `README file `_. 92 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pylatexenc 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/_static/custom.css: -------------------------------------------------------------------------------- 1 | /*@import url('https://fonts.googleapis.com/css?family=Open+Sans:400,400i,600,600i');*/ 2 | @import url('https://fonts.googleapis.com/css2?family=Fira+Mono:wght@400;500&family=Fira+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap'); 3 | 4 | 5 | div.body p, div.body dd, div.body li, div.body blockquote { 6 | -moz-hyphens: none; 7 | hyphens: none; 8 | } 9 | 10 | div.document { 11 | margin-top: 10px; 12 | } 13 | 14 | 15 | /* SIDEBAR */ 16 | 17 | div.sphinxsidebar { 18 | font-size: 0.9rem; 19 | line-height: inherit; 20 | 21 | /*background-color: rgba(120,80,50,0.1);*/ 22 | box-shadow: 3px 5px 10px rgba(0,0,0,0.1); 23 | border-right: 2px solid rgba(120,80,50,0.1); 24 | padding-right: 10px; 25 | box-sizing: border-box; 26 | 27 | border-radius: 12px; 28 | 29 | background-color: rgba(255,255,255,0.8); 30 | } 31 | 32 | @media screen and (max-width: 875px) { 33 | div.sphinxsidebar { 34 | width: 100vw; 35 | background-color: #333; 36 | } 37 | } 38 | 39 | div.sphinxsidebar ul { 40 | list-style: square; 41 | margin-left: 10px; 42 | } 43 | div.sphinxsidebar ul > li { 44 | margin-bottom: 0.2em; 45 | } 46 | div.sphinxsidebar ul ul { 47 | list-style: url(''); 48 | } 49 | 50 | 51 | /* BODY */ 52 | 53 | div.body { 54 | padding-left: 25px; 55 | padding-right: 0px; 56 | padding-top: 1em; /*0px;*/ 57 | padding-bottom: 0px; 58 | } 59 | 60 | div.body dl { 61 | margin-bottom: 1em; 62 | } 63 | 64 | div.body li { 65 | margin-bottom: 0.2em; 66 | } 67 | div.body li > ul { 68 | margin-top: 0.2em; 69 | margin-bottom: 0.2em; 70 | } 71 | 72 | div.body section > dl, 73 | div.body .section > dl { 74 | background: rgba(120,80,50,0.05); 75 | padding: 1.5rem 2rem; 76 | border-radius: 1rem; 77 | margin-bottom: 2rem; 78 | box-shadow: 3px 3px 6px rgba(0,0,0,0.1); 79 | } 80 | div.body section > dl > dt, 81 | div.body .section > dl > dt { 82 | margin-bottom: 1.5em; 83 | } 84 | div.body section > dl > dd, 85 | div.body .section > dl > dd { 86 | margin-left: 0px; 87 | } 88 | 89 | div.body li > blockquote { 90 | margin-left: 0px; 91 | } 92 | 93 | 94 | 95 | .sig { 96 | /*font-family: 'Roboto', sans-serif;*/ 97 | font-family: inherit; 98 | font-weight: 350; 99 | color: rgb(100,100,100); 100 | letter-spacing: 0.4px; 101 | } 102 | .sig pre { 103 | font-family: inherit; 104 | } 105 | .sig > em.property:first-child { 106 | font-style: normal; 107 | font-variant: small-caps; 108 | display: block; 109 | } 110 | .sig-prename { 111 | font-size: 0.9em; 112 | } 113 | .sig-name { 114 | color: #000; 115 | font-weight: 550; 116 | font-size: inherit; 117 | } 118 | .sig-param { 119 | color: #000; 120 | font-size: 0.95em; 121 | } 122 | 123 | 124 | pre, tt, code { 125 | letter-spacing: -.2pt; /* condense mono/code font a bit */ 126 | } 127 | 128 | tt, code { 129 | background-color: #b7a48629; 130 | } 131 | 132 | code { 133 | color: #000; 134 | font-weight: 550; 135 | padding: 2px 3px; 136 | border-radius: 4px; 137 | } 138 | code.xref.py { 139 | font-family: inherit; 140 | letter-spacing: 0.2pt; 141 | padding: 0px 0px; 142 | border-radius: 0px; 143 | } 144 | code.xref, a code { 145 | font-weight: 550; 146 | } 147 | 148 | /* link colors */ 149 | a, a code.xref { 150 | color: rgb(117, 63, 10); 151 | } 152 | 153 | 154 | /* when we have a local TOC, don't make headers into prominent links */ 155 | a.toc-backref { 156 | color: inherit; 157 | text-decoration: inherit; 158 | } 159 | 160 | 161 | .versionmodified { 162 | color: rgb(125, 42, 109); 163 | } 164 | 165 | 166 | 167 | /* --- */ 168 | 169 | nav.contents, aside.topic, div.topic { 170 | border: 0px none; 171 | background-color: rgba(120,80,50,0.05); 172 | border-radius: 10px; 173 | padding: 20px; 174 | box-shadow: 3px 3px 6px rgba(0,0,0,0.1); 175 | } 176 | 177 | 178 | div.admonition { 179 | border-radius: 15px; 180 | } 181 | -------------------------------------------------------------------------------- /doc/changes.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Changes 3 | ============ 4 | 5 | 6 | pylatexenc 3.0alpha 7 | =================== 8 | 9 | (Still in development.) 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | 14 | new-in-pylatexenc-3 15 | 16 | - see in particular the :ref:`list of changes that might affect existing code 17 | ` if you're using some more 18 | advanced features of `pylatexenc`. 19 | 20 | 21 | 22 | 23 | pylatexenc 2.10 24 | =============== 25 | 26 | - Added :py:class:`pylatexenc.latexencode.PartialLatexToLatexEncoder` which can 27 | help to avoid double-escaping some existing LaTeX expressions in the string 28 | that we want to LaTeX-encode. 29 | 30 | - The :py:class:`pylatexenc.latexencode.UnicodeToTextConversionRule` now has a 31 | :py:attr:`~pylatexenc.latexencode.UnicodeToTextConversionRule.replacement_text_protection` 32 | field which can be used to override the default `replacement_text_protection` 33 | set on a :py:class:`pylatexenc.latexencode.UnicodeToTextEncoder` object. Also 34 | the `replacement_text_protection` fields accept an arbitrary callable object. 35 | 36 | - added some known macro definitions for :py:mod:`~pylatexenc.latexwalker` and 37 | :py:mod:`~pylatexenc.latex2text`, such as ``\definecolor`` and ``\textcolor``. 38 | 39 | - Bug fixes (including :issue:`57`) 40 | 41 | pylatexenc 2.9 42 | ============== 43 | 44 | - Bug fixes (including issues :issue:`49`, :issue:`51`, :pr:`52`, :pr:`53`, 45 | :pr:`54`) 46 | 47 | pylatexenc 2.8 48 | ============== 49 | 50 | - `latex2text` module: Basic support for array and matrix environments. 51 | Matrices are represented inline, in the form ``[ a b; c d ]``. 52 | 53 | - `latexencode` bugfix (issue :issue:`44`) 54 | 55 | pylatexenc 2.7 56 | ============== 57 | 58 | - Bug fix: the parser now disambiguates ``$$`` as either a display math 59 | delimiter or two inline math delimiters as in ``$a$$b$`` (issue :issue:`43`) 60 | 61 | pylatexenc 2.6 62 | ============== 63 | 64 | - In `latex2text`: 65 | 66 | + Bug fix: default behavior of the `strict_latex_spaces` option in the 67 | :py:class:`pylatexenc.latex2text.LatexNodes2Text()` constructor 68 | 69 | + fix ``\le``, ``\ge``, ``\leqslant``, ``\geqslant`` (issue :issue:`41`) 70 | 71 | + reorganized the default latex symbol categories 72 | 73 | 74 | pylatexenc 2.5 75 | ============== 76 | 77 | - `latex2text`: Add support for ``\mathbb{}``, ``\mathbf{}`` and some friends 78 | (issue :issue:`40`) 79 | 80 | pylatexenc 2.4 81 | ============== 82 | 83 | - Bug fixes in how `latex2text` attempts to recover from parse errors in 84 | tolerant mode 85 | 86 | pylatexenc 2.3 87 | ============== 88 | 89 | - Minor bug fixes in `latex2text` 90 | 91 | 92 | pylatexenc 2.2 93 | ============== 94 | 95 | Version 2.2 brings a few minor bug fixes and improvements over version 2.1: 96 | 97 | - `pylatexenc.latex2text` supports more LaTeX symbols 98 | 99 | - `latex2text` and `latexwalker` command-line utilities accept a new `-c` option 100 | where you can directly specify LaTeX code 101 | 102 | - minor bug fixes 103 | 104 | 105 | pylatexenc 2.1 106 | ============== 107 | 108 | Version 2.1 brings a few minor bug fixes to version 2.0. 109 | 110 | 111 | pylatexenc 2.0 112 | ============== 113 | 114 | .. toctree:: 115 | :maxdepth: 1 116 | 117 | new-in-pylatexenc-2 118 | 119 | - see in particular the :ref:`list of changes that might affect existing code 120 | ` if you're using some advanced features of 121 | `pylatexenc`. 122 | 123 | 124 | pylatexenc 1.x 125 | ============== 126 | 127 | See description of updates and changes on the `github releases page 128 | `_. 129 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pylatexenc documentation build configuration file, created by 5 | # sphinx-quickstart on Mon Apr 24 16:32:21 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os.path 21 | import sys 22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 23 | 24 | import pylatexenc.version 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.intersphinx', 38 | 'sphinx.ext.mathjax', 39 | 40 | 'sphinx_issues', 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix(es) of source filenames. 47 | # You can specify multiple suffix as a list of string: 48 | # 49 | # source_suffix = ['.rst', '.md'] 50 | source_suffix = '.rst' 51 | 52 | # The master toctree document. 53 | master_doc = 'index' 54 | 55 | # General information about the project. 56 | project = 'pylatexenc' 57 | copyright = '2023, Philippe Faist' 58 | author = 'Philippe Faist' 59 | 60 | # The version info for the project you're documenting, acts as replacement for 61 | # |version| and |release|, also used in various other places throughout the 62 | # built documents. 63 | # 64 | # The short X.Y version. 65 | version = pylatexenc.version.version_str 66 | # The full version, including alpha/beta/rc tags. 67 | release = version 68 | 69 | # The language for content autogenerated by Sphinx. Refer to documentation 70 | # for a list of supported languages. 71 | # 72 | # This is also used if you do content translation via gettext catalogs. 73 | # Usually you set "language" from the command line for these cases. 74 | #language = None 75 | 76 | # List of patterns, relative to source directory, that match files and 77 | # directories to ignore when looking for source files. 78 | # This patterns also effect to html_static_path and html_extra_path 79 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 80 | 81 | # The name of the Pygments (syntax highlighting) style to use. 82 | pygments_style = 'sphinx' 83 | 84 | # If true, `todo` and `todoList` produce output, else they produce nothing. 85 | todo_include_todos = False 86 | 87 | 88 | 89 | #autodoc_docstring_signature = True 90 | autodoc_member_order = 'bysource' 91 | autodoc_inherit_docstrings = False 92 | 93 | # autodoc_default_options = { 94 | # 'members': True, # 'var1, var2', 95 | # #'member-order': 'bysource', 96 | # 'special-members': '__init__', 97 | # #'undoc-members': True, 98 | # #'exclude-members': '__weakref__' 99 | # } 100 | 101 | 102 | # -- Options for sphinx_issues -------------------------------------------- 103 | 104 | # GitHub repo 105 | issues_github_path = "phfaist/pylatexenc" 106 | 107 | 108 | # -- Options for HTML output ---------------------------------------------- 109 | 110 | # The theme to use for HTML and HTML Help pages. See the documentation for 111 | # a list of builtin themes. 112 | # 113 | 114 | html_theme = 'alabaster' 115 | 116 | 117 | # import guzzle_sphinx_theme 118 | # html_theme_path = guzzle_sphinx_theme.html_theme_path() 119 | # html_theme = 'guzzle_sphinx_theme' 120 | # html_style = 'custom.css' /* don't forget to add ''' @import url("guzzle.css"); ''' */ 121 | 122 | #import sphinx_bootstrap_theme 123 | #html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 124 | #html_theme = 'bootstrap' 125 | 126 | 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | # 132 | html_theme_options = { 133 | 'font_family': 'Fira Sans', 134 | 'font_size': '15px', 135 | 'head_font_family': 'Fira Sans', 136 | 'code_font_family': 'Fira Mono', 137 | 'github_user': 'phfaist', 138 | 'github_repo': 'pylatexenc', 139 | 'github_button': True, 140 | 'github_type': 'star', 141 | 'github_count': 'true', 142 | 143 | 'fixed_sidebar': True, 144 | 'page_width': '950px', 145 | 'sidebar_width': '220px', 146 | } 147 | html_sidebars = { 148 | '**': [ 149 | 'about.html', 150 | 'navigation.html', 151 | 'relations.html', 152 | 'searchbox.html', 153 | # 'donate.html', 154 | ] 155 | } 156 | 157 | # Add any paths that contain custom static files (such as style sheets) here, 158 | # relative to this directory. They are copied after the builtin static files, 159 | # so a file named "default.css" will overwrite the builtin "default.css". 160 | html_static_path = [ '_static' ] 161 | 162 | 163 | # -- Options for HTMLHelp output ------------------------------------------ 164 | 165 | # Output file base name for HTML help builder. 166 | htmlhelp_basename = 'pylatexencdoc' 167 | 168 | 169 | # -- Options for LaTeX output --------------------------------------------- 170 | 171 | latex_elements = { 172 | # The paper size ('letterpaper' or 'a4paper'). 173 | # 174 | # 'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | # 178 | # 'pointsize': '10pt', 179 | 180 | # Additional stuff for the LaTeX preamble. 181 | # 182 | # 'preamble': '', 183 | 184 | # Latex figure (float) alignment 185 | # 186 | # 'figure_align': 'htbp', 187 | } 188 | 189 | # Grouping the document tree into LaTeX files. List of tuples 190 | # (source start file, target name, title, 191 | # author, documentclass [howto, manual, or own class]). 192 | latex_documents = [ 193 | (master_doc, 'pylatexenc.tex', 'pylatexenc Documentation', 194 | 'Philippe Faist', 'manual'), 195 | ] 196 | 197 | 198 | # -- Options for manual page output --------------------------------------- 199 | 200 | # One entry per manual page. List of tuples 201 | # (source start file, name, description, authors, manual section). 202 | man_pages = [ 203 | (master_doc, 'pylatexenc', 'pylatexenc Documentation', 204 | [author], 1) 205 | ] 206 | 207 | 208 | # -- Options for Texinfo output ------------------------------------------- 209 | 210 | # Grouping the document tree into Texinfo files. List of tuples 211 | # (source start file, target name, title, author, 212 | # dir menu entry, description, category) 213 | texinfo_documents = [ 214 | (master_doc, 'pylatexenc', 'pylatexenc Documentation', 215 | author, 'pylatexenc', 'One line description of project.', 216 | 'Miscellaneous'), 217 | ] 218 | 219 | 220 | 221 | 222 | # Example configuration for intersphinx: refer to the Python standard library. 223 | intersphinx_mapping = { 224 | 'python': ('https://docs.python.org/3', None) 225 | } 226 | -------------------------------------------------------------------------------- /doc/example_latex2text_custom_quotes.py: -------------------------------------------------------------------------------- 1 | from pylatexenc import latexwalker, latex2text, macrospec 2 | 3 | # 4 | # Define macros, environments, specials for the *parser* 5 | # 6 | lw_context_db = latexwalker.get_default_latex_context_db() 7 | lw_context_db.add_context_category( 8 | 'my-quotes', 9 | prepend=True, 10 | macros=[ 11 | macrospec.MacroSpec("putindblquotes", "{"), 12 | macrospec.MacroSpec("putinquotes", "[[{"), 13 | ], 14 | environments=[ 15 | macrospec.EnvironmentSpec("indblquotes", ""), 16 | macrospec.EnvironmentSpec("inquotes", "[["), 17 | ], 18 | specials=[ 19 | macrospec.SpecialsSpec("`"), 20 | macrospec.SpecialsSpec("'"), 21 | macrospec.SpecialsSpec("``"), 22 | macrospec.SpecialsSpec("''"), 23 | ], 24 | ) 25 | 26 | # 27 | # Implement macros, environments, specials for the *conversion to text* 28 | # 29 | 30 | def _get_optional_arg(node, default, l2tobj): 31 | """Helper that returns the `node` converted to text, or `default` 32 | if the node is `None` (e.g. an optional argument that was not 33 | specified)""" 34 | if node is None: 35 | return default 36 | return l2tobj.nodelist_to_text([node]) 37 | 38 | def put_in_quotes_macro_repl(n, l2tobj): 39 | """Get the text replacement for the macro 40 | \putinquotes[open-quote][close-quote]{text}""" 41 | if not n.nodeargd: 42 | # n.nodeargd can be empty if e.g. \putinquotes was a single 43 | # token passed as an argument to a macro, 44 | # e.g. \newcommand\putinquotes... 45 | return '' 46 | open_q_s = _get_optional_arg(n.nodeargd.argnlist[0], '“', l2tobj) 47 | close_q_s = _get_optional_arg(n.nodeargd.argnlist[1], '”', l2tobj) 48 | return (open_q_s + l2tobj.nodelist_to_text([n.nodeargd.argnlist[2]]) 49 | + close_q_s) 50 | 51 | def in_quotes_env_repl(n, l2tobj): 52 | """Get the text replacement for the {inquotes} environment""" 53 | open_q_s = _get_optional_arg(n.nodeargd.argnlist[0], '“', l2tobj) 54 | close_q_s = _get_optional_arg(n.nodeargd.argnlist[1], '”', l2tobj) 55 | return open_q_s + l2tobj.nodelist_to_text(n.nodelist) + close_q_s 56 | 57 | l2t_context_db = latex2text.get_default_latex_context_db() 58 | l2t_context_db.add_context_category( 59 | 'my-quotes', 60 | prepend=True, 61 | macros=[ 62 | latex2text.MacroTextSpec("putindblquotes", 63 | simplify_repl=r'“%(1)s”'), 64 | latex2text.MacroTextSpec("putinquotes", 65 | simplify_repl=put_in_quotes_macro_repl), 66 | ], 67 | environments=[ 68 | latex2text.EnvironmentTextSpec("indblquotes", 69 | simplify_repl=r'“%(body)s”'), 70 | latex2text.EnvironmentTextSpec("inquotes", 71 | simplify_repl=in_quotes_env_repl), 72 | ], 73 | specials=[ 74 | latex2text.SpecialsTextSpec('`', "‘"), 75 | latex2text.SpecialsTextSpec("'", "’"), 76 | latex2text.SpecialsTextSpec('``', "“"), 77 | latex2text.SpecialsTextSpec("''", "”"), 78 | ], 79 | ) 80 | 81 | 82 | # 83 | # Here is an example usage: 84 | # 85 | 86 | def custom_latex_to_text( input_latex ): 87 | # the latex parser instance with custom latex_context 88 | lw_obj = latexwalker.LatexWalker(input_latex, 89 | latex_context=lw_context_db) 90 | # parse to node list 91 | nodelist, pos, length = lw_obj.get_latex_nodes() 92 | # initialize the converter to text with custom latex_context 93 | l2t_obj = latex2text.LatexNodes2Text(latex_context=l2t_context_db) 94 | # convert to text 95 | return l2t_obj.nodelist_to_text( nodelist ) 96 | 97 | 98 | print(custom_latex_to_text( 99 | r"""\begin{inquotes}[`][']Hello, world\end{inquotes}""")) 100 | # ‘Hello, world’ 101 | 102 | print(custom_latex_to_text(r"""\putinquotes[``]['']{Hello, world}""")) 103 | # “Hello, world” 104 | 105 | print(custom_latex_to_text(r"""\putinquotes{Hello, world}""")) 106 | # “Hello, world” 107 | 108 | print(custom_latex_to_text(r"""\putinquotes[`][']{Hello, world}""")) 109 | # ‘Hello, world’ 110 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. pylatexenc documentation master file, created by 2 | sphinx-quickstart on Mon Apr 24 16:32:21 2017. 3 | You can adapt this file completely to your liking, 4 | but it should at least contain the root `toctree` directive. 5 | 6 | Welcome to pylatexenc's documentation! 7 | ====================================== 8 | 9 | [pylatexenc version: |version|] 10 | 11 | A simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion. 12 | 13 | Quick example:: 14 | 15 | >>> from pylatexenc.latex2text import LatexNodes2Text 16 | >>> latex = r"""\textbf{Hi there!} Here is \emph{an equation}: 17 | ... \begin{equation} 18 | ... \zeta = x + i y 19 | ... \end{equation} 20 | ... where $i$ is the imaginary unit. 21 | ... """ 22 | >>> print(LatexNodes2Text().latex_to_text(latex)) 23 | Hi there! Here is an equation: 24 | 25 | ζ = x + i y 26 | 27 | where i is the imaginary unit. 28 | 29 | And the other way around:: 30 | 31 | >>> from pylatexenc.latexencode import unicode_to_latex 32 | >>> text = "À votre santé!" 33 | >>> print(unicode_to_latex(text)) 34 | \`A votre sant\'e! 35 | 36 | 37 | You can also use these utilities directly in command line, e.g.:: 38 | 39 | $ echo 'À votre santé!' | latexencode 40 | \`A votre sant\'e! 41 | 42 | 43 | Documentation 44 | ============= 45 | 46 | 47 | .. toctree:: 48 | :maxdepth: 2 49 | :caption: Contents: 50 | 51 | latexnodes 52 | macrospec 53 | latexwalker 54 | latex2text 55 | latexencode 56 | changes 57 | 58 | 59 | Indices and tables 60 | ================== 61 | 62 | * :ref:`genindex` 63 | * :ref:`modindex` 64 | * :ref:`search` 65 | -------------------------------------------------------------------------------- /doc/latex2text.rst: -------------------------------------------------------------------------------- 1 | `latex2text` — Simple Latex to Text Converter 2 | --------------------------------------------- 3 | 4 | .. automodule:: pylatexenc.latex2text 5 | :no-undoc-members: 6 | 7 | .. contents:: Contents: 8 | :local: 9 | 10 | 11 | Custom latex conversion rules: A simple template 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | Here is a short introduction on how to customize the way that 15 | :py:class:`~pylatexenc.latex2text.LatexNodes2Text` converts LaTeX constructs 16 | (macros, environments, and specials) to unicode text. You can start off with 17 | the example template below and adapt it to your needs. 18 | 19 | Macros, environments and specials are parsed as corresponding node objects by 20 | the parser (see :py:class:`pylatexenc.latexwalker.LatexMacroNode`, 21 | :py:class:`pylatexenc.latexwalker.LatexEnvironmentNode`, and 22 | :py:class:`pylatexenc.latexwalker.LatexSpecialsNode`). These node objects are 23 | then converted to unicode text by the 24 | :py:class:`~pylatexenc.latex2text.LatexNodes2Text` object. 25 | 26 | You can define new macros, environments, or specials, or override existing 27 | definitions. The definitions need to be provided twice. First, at the level of 28 | the parser using the :py:mod:`~pylatexenc.macrospec` module; the parser needs to 29 | know the argument structure of your macros, environments, and specials, along 30 | with which characters to recognize as "specials". Second, at the level of 31 | `latex2text`, you need to specify what the replacement strings are for the 32 | different LaTeX constructs after they have been parsed into the latex node tree 33 | by the parser. 34 | 35 | The following template is a simple illustrative example that implements the 36 | following definitions: 37 | 38 | - A new macro ``\putinquotes[`][']{text}`` that puts its mandatory argument 39 | into quotes defined by the two optional arguments. Let's say that the 40 | default quotes that are used are `````` and ``''``. Another simpler macro 41 | ``\putindblquotes{text}`` is also provided for the sake of the example. 42 | 43 | - A new environment ``\begin{inquotes}[`]['] ... \end{inquotes}`` that does 44 | the same thing as its macro equivalent. Another simpler environment 45 | ``\begin{indblquotes}...\end{indblquotes}`` is also provided for the sake of 46 | the example. 47 | 48 | - The usual LaTeX quote symbols `````, ``````, ``'``, and ``''`` for unicode 49 | quotes. (See also issue :issue:`39`) 50 | 51 | Here is the code (see also docs for :py:class:`pylatexenc.macrospec.MacroSpec`, 52 | :py:class:`pylatexenc.macrospec.EnvironmentSpec`, 53 | :py:class:`pylatexenc.macrospec.SpecialsSpec`, as well as 54 | :py:class:`pylatexenc.latex2text.MacroTextSpec`, 55 | :py:class:`pylatexenc.latex2text.EnvironmentTextSpec`, 56 | :py:class:`pylatexenc.latex2text.SpecialsTextSpec`): 57 | 58 | .. literalinclude:: example_latex2text_custom_quotes.py 59 | :language: python 60 | 61 | 62 | Latex to Text Converter Class 63 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 64 | 65 | .. autoclass:: pylatexenc.latex2text.LatexNodes2Text 66 | :members: 67 | 68 | 69 | .. autofunction:: pylatexenc.latex2text.get_default_latex_context_db 70 | 71 | 72 | 73 | Define replacement texts 74 | ~~~~~~~~~~~~~~~~~~~~~~~~ 75 | 76 | .. autoclass:: pylatexenc.latex2text.MacroTextSpec 77 | :members: 78 | 79 | .. autoclass:: pylatexenc.latex2text.EnvironmentTextSpec 80 | :members: 81 | 82 | .. autoclass:: pylatexenc.latex2text.SpecialsTextSpec 83 | :members: 84 | 85 | 86 | 87 | Obsolete members 88 | ~~~~~~~~~~~~~~~~ 89 | 90 | .. autofunction:: pylatexenc.latex2text.EnvDef 91 | 92 | .. autofunction:: pylatexenc.latex2text.MacroDef 93 | 94 | 95 | .. autodata:: pylatexenc.latex2text.default_env_dict 96 | :annotation: 97 | 98 | .. autodata:: pylatexenc.latex2text.default_macro_dict 99 | :annotation: 100 | 101 | .. autodata:: pylatexenc.latex2text.default_text_replacements 102 | :annotation: 103 | -------------------------------------------------------------------------------- /doc/latexencode.rst: -------------------------------------------------------------------------------- 1 | `latexencode` — Encode Unicode to LaTeX 2 | --------------------------------------- 3 | 4 | .. automodule:: pylatexenc.latexencode 5 | :no-undoc-members: 6 | 7 | .. contents:: Contents: 8 | :local: 9 | 10 | 11 | Unicode to Latex Conversion Class and Helper Function 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: pylatexenc.latexencode.UnicodeToLatexEncoder 15 | :members: 16 | 17 | .. autofunction:: pylatexenc.latexencode.unicode_to_latex 18 | 19 | 20 | Specifying conversion rules 21 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 22 | 23 | .. autodata:: pylatexenc.latexencode.RULE_DICT 24 | 25 | .. autodata:: pylatexenc.latexencode.RULE_REGEX 26 | 27 | .. autodata:: pylatexenc.latexencode.RULE_CALLABLE 28 | 29 | 30 | 31 | .. autoclass:: pylatexenc.latexencode.UnicodeToLatexConversionRule 32 | :members: 33 | 34 | 35 | 36 | .. autofunction:: pylatexenc.latexencode.get_builtin_conversion_rules 37 | 38 | .. autofunction:: pylatexenc.latexencode.get_builtin_uni2latex_dict 39 | 40 | 41 | Compatibility with `pylatexenc 1.x` 42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 43 | 44 | .. autofunction:: pylatexenc.latexencode.utf8tolatex 45 | 46 | .. autodata:: pylatexenc.latexencode.utf82latex 47 | -------------------------------------------------------------------------------- /doc/latexnodes.nodes.rst: -------------------------------------------------------------------------------- 1 | `latexnodes.nodes` — LaTeX Nodes Classes 2 | ======================================== 3 | 4 | .. automodule:: pylatexenc.latexnodes.nodes 5 | :no-undoc-members: 6 | :show-inheritance: 7 | 8 | .. contents:: Contents: 9 | :local: 10 | 11 | 12 | 13 | Nodes, Node Lists, and Visitors 14 | ------------------------------- 15 | 16 | 17 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexNode 18 | :members: 19 | 20 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexNodeList 21 | :members: 22 | 23 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexNodesVisitor 24 | :members: 25 | 26 | 27 | LaTeX Node Types 28 | ---------------- 29 | 30 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexCharsNode 31 | :members: 32 | :show-inheritance: 33 | 34 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexGroupNode 35 | :members: 36 | :show-inheritance: 37 | 38 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexCommentNode 39 | :members: 40 | :show-inheritance: 41 | 42 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexMacroNode 43 | :members: 44 | :show-inheritance: 45 | 46 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexEnvironmentNode 47 | :members: 48 | :show-inheritance: 49 | 50 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexSpecialsNode 51 | :members: 52 | :show-inheritance: 53 | 54 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexMathNode 55 | :members: 56 | :show-inheritance: 57 | 58 | 59 | -------------------------------------------------------------------------------- /doc/latexnodes.parsers.rst: -------------------------------------------------------------------------------- 1 | `latexnodes.parsers` — Latex Construct Parsers 2 | ============================================== 3 | 4 | .. automodule:: pylatexenc.latexnodes.parsers 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | 9 | .. contents:: Contents: 10 | :local: 11 | 12 | 13 | Parser base class 14 | ~~~~~~~~~~~~~~~~~ 15 | 16 | .. autoclass:: LatexParserBase 17 | :members: 18 | 19 | 20 | General nodes 21 | ~~~~~~~~~~~~~ 22 | 23 | .. autoclass:: LatexGeneralNodesParser 24 | :members: 25 | :show-inheritance: 26 | 27 | .. autoclass:: LatexSingleNodeParser 28 | :members: 29 | :show-inheritance: 30 | 31 | 32 | Delimited expressions 33 | ~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: LatexDelimitedExpressionParserInfo 36 | :members: 37 | 38 | .. autoclass:: LatexDelimitedExpressionParser 39 | :members: 40 | :show-inheritance: 41 | 42 | .. autoclass:: LatexDelimitedGroupParserInfo 43 | :members: 44 | :show-inheritance: 45 | 46 | .. autoclass:: LatexDelimitedGroupParser 47 | :members: 48 | :show-inheritance: 49 | 50 | .. autoclass:: LatexDelimitedMultiDelimGroupParserInfo 51 | :members: 52 | :show-inheritance: 53 | 54 | .. autoclass:: LatexDelimitedMultiDelimGroupParser 55 | :members: 56 | :show-inheritance: 57 | 58 | .. autoclass:: LatexDelimitedExpressionParserOpeningDelimiterNotFound 59 | :members: 60 | :show-inheritance: 61 | 62 | .. autoclass:: LatexMathParser 63 | :members: 64 | :show-inheritance: 65 | 66 | 67 | 68 | Single expression parser 69 | ~~~~~~~~~~~~~~~~~~~~~~~~ 70 | 71 | .. autoclass:: LatexExpressionParser 72 | :members: 73 | :show-inheritance: 74 | 75 | 76 | Optional expression parser 77 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 78 | 79 | .. autoclass:: LatexOptionalSquareBracketsParser 80 | :members: 81 | :show-inheritance: 82 | 83 | .. autoclass:: LatexOptionalCharsMarkerParser 84 | :members: 85 | :show-inheritance: 86 | 87 | .. autoclass:: LatexOptionalEmbellishmentArgsParser 88 | :members: 89 | :show-inheritance: 90 | 91 | 92 | Verbatim/literal expressions 93 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 94 | 95 | .. autoclass:: LatexVerbatimBaseParser 96 | :members: 97 | :show-inheritance: 98 | 99 | .. autoclass:: LatexDelimitedVerbatimParser 100 | :members: 101 | :show-inheritance: 102 | 103 | .. autoclass:: LatexVerbatimEnvironmentContentsParser 104 | :members: 105 | :show-inheritance: 106 | 107 | 108 | 109 | Typical macro arguments 110 | ~~~~~~~~~~~~~~~~~~~~~~~ 111 | 112 | .. autofunction:: get_standard_argument_parser 113 | 114 | .. autoclass:: LatexStandardArgumentParser 115 | :members: 116 | :show-inheritance: 117 | 118 | .. autoclass:: LatexCharsCommaSeparatedListParser 119 | :members: 120 | :show-inheritance: 121 | 122 | .. autoclass:: LatexCharsGroupParser 123 | :members: 124 | :show-inheritance: 125 | 126 | .. autoclass:: LatexTackOnInformationFieldMacrosParser 127 | :members: 128 | :show-inheritance: 129 | 130 | -------------------------------------------------------------------------------- /doc/latexnodes.rst: -------------------------------------------------------------------------------- 1 | `latexnodes` — LaTeX Nodes Tree and Parsers 2 | =========================================== 3 | 4 | .. automodule:: pylatexenc.latexnodes 5 | :no-undoc-members: 6 | :show-inheritance: 7 | 8 | .. contents:: Contents: 9 | :local: 10 | 11 | 12 | 13 | Parsing State 14 | ------------- 15 | 16 | .. autoclass:: ParsingState 17 | :members: 18 | 19 | .. autoclass:: ParsingStateDelta 20 | :members: 21 | 22 | .. autoclass:: ParsingStateDeltaReplaceParsingState 23 | :members: 24 | 25 | .. autoclass:: ParsingStateDeltaChained 26 | :members: 27 | 28 | .. autoclass:: ParsingStateDeltaWalkerEvent 29 | :members: 30 | 31 | .. autoclass:: ParsingStateDeltaEnterMathMode 32 | :members: 33 | 34 | .. autoclass:: ParsingStateDeltaLeaveMathMode 35 | :members: 36 | 37 | 38 | 39 | Latex Token 40 | ----------- 41 | 42 | .. autoclass:: LatexToken 43 | :members: 44 | 45 | 46 | Token Readers 47 | ------------- 48 | 49 | .. autoclass:: LatexTokenReaderBase 50 | :members: 51 | 52 | .. autoclass:: LatexTokenReader 53 | :members: 54 | 55 | .. autoclass:: LatexTokenListTokenReader 56 | :members: 57 | 58 | 59 | Arguments and Parsed Arguments 60 | ------------------------------ 61 | 62 | .. autoclass:: LatexArgumentSpec 63 | :members: 64 | 65 | .. autoclass:: ParsedArguments 66 | :members: 67 | 68 | .. autoclass:: ParsedArgumentsInfo 69 | :members: 70 | 71 | .. autoclass:: SingleParsedArgumentInfo 72 | :members: 73 | 74 | 75 | Nodes Collector 76 | --------------- 77 | 78 | .. autoclass:: LatexNodesCollector 79 | :members: 80 | 81 | 82 | Exception classes 83 | ----------------- 84 | 85 | .. autoclass:: LatexWalkerError 86 | :members: 87 | 88 | .. autoclass:: LatexWalkerLocatedError 89 | :members: 90 | 91 | .. autoclass:: LatexWalkerLocatedErrorFormatter 92 | :members: 93 | 94 | .. autoclass:: LatexWalkerParseError 95 | :members: 96 | 97 | .. autoclass:: LatexWalkerNodesParseError 98 | :members: 99 | 100 | .. autoclass:: LatexWalkerTokenParseError 101 | :members: 102 | 103 | .. autoclass:: LatexWalkerEndOfStream 104 | :members: 105 | 106 | 107 | Base classes 108 | ------------ 109 | 110 | .. autoclass:: CallableSpecBase 111 | :members: 112 | 113 | .. autoclass:: LatexWalkerParsingStateEventHandler 114 | :members: 115 | 116 | .. autoclass:: LatexWalkerBase 117 | :members: 118 | 119 | .. autoclass:: LatexContextDbBase 120 | :members: 121 | 122 | 123 | 124 | Node Classes 125 | ------------ 126 | 127 | .. toctree:: 128 | :maxdepth: 2 129 | 130 | latexnodes.nodes 131 | 132 | 133 | Parser Classes 134 | -------------- 135 | 136 | .. toctree:: 137 | :maxdepth: 2 138 | 139 | latexnodes.parsers 140 | -------------------------------------------------------------------------------- /doc/latexwalker.rst: -------------------------------------------------------------------------------- 1 | `latexwalker` — Calling Parsers for LaTeX Code 2 | ---------------------------------------------- 3 | 4 | .. automodule:: pylatexenc.latexwalker 5 | :no-undoc-members: 6 | :show-inheritance: 7 | 8 | .. contents:: Contents: 9 | :local: 10 | 11 | 12 | The main `LatexWalker` class 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: pylatexenc.latexwalker.LatexWalker 16 | :members: 17 | 18 | 19 | .. autofunction:: pylatexenc.latexwalker.get_default_latex_context_db 20 | 21 | 22 | Exception Classes 23 | ~~~~~~~~~~~~~~~~~ 24 | 25 | .. py:class:: pylatexenc.latexwalker.LatexWalkerError 26 | 27 | Moved to :py:class:`pylatexenc.latexnodes.LatexWalkerError`. 28 | 29 | .. deprecated:: 3.0 30 | 31 | Since Pylatexenc 3.0, this class now resides in the new module 32 | :py:mod:`pylatexenc.latexnodes` as 33 | :py:class:`pylatexenc.latexnodes.LatexWalkerError`. It is aliased in 34 | `pylatexenc.latexwalker` for backwards compatibility. 35 | 36 | .. py:class:: pylatexenc.latexwalker.LatexWalkerParseError 37 | 38 | Moved to :py:class:`pylatexenc.latexnodes.LatexWalkerParseError`. 39 | 40 | .. deprecated:: 3.0 41 | 42 | Since Pylatexenc 3.0, this class now resides in the new module 43 | :py:mod:`pylatexenc.latexnodes` as 44 | :py:class:`pylatexenc.latexnodes.LatexWalkerParseError`. It is aliased in 45 | `pylatexenc.latexwalker` for backwards compatibility. 46 | 47 | .. py:class:: pylatexenc.latexwalker.LatexWalkerEndOfStream 48 | 49 | Moved to :py:class:`pylatexenc.latexnodes.LatexWalkerEndOfStream`. 50 | 51 | .. deprecated:: 3.0 52 | 53 | Since Pylatexenc 3.0, this class now resides in the new module 54 | :py:mod:`pylatexenc.latexnodes` as 55 | :py:class:`pylatexenc.latexnodes.LatexWalkerEndOfStream`. It is aliased 56 | in `pylatexenc.latexwalker` for backwards compatibility. 57 | 58 | 59 | Data Node Classes 60 | ~~~~~~~~~~~~~~~~~ 61 | 62 | .. py:class:: pylatexenc.latexwalker.LatexNode 63 | 64 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexNode`. 65 | 66 | .. deprecated:: 3.0 67 | 68 | Since Pylatexenc 3.0, this class now resides in the new module 69 | :py:mod:`pylatexenc.latexnodes.nodes` as 70 | :py:class:`pylatexenc.latexnodes.nodes.LatexNode`. It is aliased in 71 | `pylatexenc.latexwalker` for backwards compatibility. 72 | 73 | .. py:class:: pylatexenc.latexwalker.LatexCharsNode 74 | 75 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexCharsNode`. 76 | 77 | .. deprecated:: 3.0 78 | 79 | Since Pylatexenc 3.0, this class now resides in the new module 80 | :py:mod:`pylatexenc.latexnodes.nodes` as 81 | :py:class:`pylatexenc.latexnodes.nodes.LatexCharsNode`. It is aliased in 82 | `pylatexenc.latexwalker` for backwards compatibility. 83 | 84 | .. py:class:: pylatexenc.latexwalker.LatexGroupNode 85 | 86 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexGroupNode`. 87 | 88 | .. deprecated:: 3.0 89 | 90 | Since Pylatexenc 3.0, this class now resides in the new module 91 | :py:mod:`pylatexenc.latexnodes.nodes` as 92 | :py:class:`pylatexenc.latexnodes.nodes.LatexGroupNode`. It is aliased in 93 | `pylatexenc.latexwalker` for backwards compatibility. 94 | 95 | .. py:class:: pylatexenc.latexwalker.LatexCommentNode 96 | 97 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexCommentNode`. 98 | 99 | .. deprecated:: 3.0 100 | 101 | Since Pylatexenc 3.0, this class now resides in the new module 102 | :py:mod:`pylatexenc.latexnodes.nodes` as 103 | :py:class:`pylatexenc.latexnodes.nodes.LatexCommentNode`. It is aliased 104 | in `pylatexenc.latexwalker` for backwards compatibility. 105 | 106 | .. py:class:: pylatexenc.latexwalker.LatexMacroNode 107 | 108 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexMacroNode`. 109 | 110 | .. deprecated:: 3.0 111 | 112 | Since Pylatexenc 3.0, this class now resides in the new module 113 | :py:mod:`pylatexenc.latexnodes.nodes` as 114 | :py:class:`pylatexenc.latexnodes.nodes.LatexMacroNode`. It is aliased in 115 | `pylatexenc.latexwalker` for backwards compatibility. 116 | 117 | .. py:class:: pylatexenc.latexwalker.LatexEnvironmentNode 118 | 119 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexEnvironmentNode`. 120 | 121 | .. deprecated:: 3.0 122 | 123 | Since Pylatexenc 3.0, this class now resides in the new module 124 | :py:mod:`pylatexenc.latexnodes.nodes` as 125 | :py:class:`pylatexenc.latexnodes.nodes.LatexEnvironmentNode`. It is 126 | aliased in `pylatexenc.latexwalker` for backwards compatibility. 127 | 128 | .. py:class:: pylatexenc.latexwalker.LatexSpecialsNode 129 | 130 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexSpecialsNode`. 131 | 132 | .. deprecated:: 3.0 133 | 134 | Since Pylatexenc 3.0, this class now resides in the new module 135 | :py:mod:`pylatexenc.latexnodes.nodes` as 136 | :py:class:`pylatexenc.latexnodes.nodes.LatexSpecialsNode`. It is aliased 137 | in `pylatexenc.latexwalker` for backwards compatibility. 138 | 139 | .. py:class:: pylatexenc.latexwalker.LatexMathNode 140 | 141 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexMathNode`. 142 | 143 | .. deprecated:: 3.0 144 | 145 | Since Pylatexenc 3.0, this class now resides in the new module 146 | :py:mod:`pylatexenc.latexnodes.nodes` as 147 | :py:class:`pylatexenc.latexnodes.nodes.LatexMathNode`. It is aliased in 148 | `pylatexenc.latexwalker` for backwards compatibility. 149 | 150 | 151 | Parsing helpers 152 | ~~~~~~~~~~~~~~~ 153 | 154 | .. py:class:: pylatexenc.latexwalker.ParsingState 155 | 156 | .. deprecated:: 3.0 157 | 158 | Since Pylatexenc 3.0, this class now resides in the new module 159 | :py:mod:`pylatexenc.latexnodes`. It is aliased in 160 | `pylatexenc.latexwalker` for backwards compatibility. 161 | 162 | .. py:class:: pylatexenc.latexwalker.LatexToken 163 | 164 | .. deprecated:: 3.0 165 | 166 | Since Pylatexenc 3.0, this class now resides in the new module 167 | :py:mod:`pylatexenc.latexnodes`. It is aliased in 168 | `pylatexenc.latexwalker` for backwards compatibility. 169 | 170 | 171 | 172 | Legacy Macro Definitions (for `pylatexenc 1.x`) 173 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 174 | 175 | .. autodata:: pylatexenc.latexwalker.MacrosDef 176 | 177 | .. autodata:: pylatexenc.latexwalker.default_macro_dict 178 | :annotation: 179 | 180 | 181 | -------------------------------------------------------------------------------- /doc/macrospec.rst: -------------------------------------------------------------------------------- 1 | `macrospec` — Specifying definitions for the parser 2 | --------------------------------------------------- 3 | 4 | .. automodule:: pylatexenc.macrospec 5 | :no-undoc-members: 6 | :show-inheritance: 7 | 8 | .. contents:: Contents: 9 | :local: 10 | 11 | 12 | Macro and environment definitions 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: pylatexenc.macrospec.MacroSpec 16 | :members: 17 | :inherited-members: 18 | 19 | .. autoclass:: pylatexenc.macrospec.EnvironmentSpec 20 | :members: 21 | :inherited-members: 22 | 23 | .. autoclass:: pylatexenc.macrospec.SpecialsSpec 24 | :members: 25 | :inherited-members: 26 | 27 | 28 | .. autofunction:: pylatexenc.macrospec.std_macro 29 | 30 | .. autofunction:: pylatexenc.macrospec.std_environment 31 | 32 | .. autofunction:: pylatexenc.macrospec.std_specials 33 | 34 | 35 | Latex Context "Database" 36 | ~~~~~~~~~~~~~~~~~~~~~~~~ 37 | 38 | .. autoclass:: pylatexenc.macrospec.LatexContextDb 39 | :members: 40 | 41 | .. autoclass:: pylatexenc.macrospec.ParsingStateDeltaExtendLatexContextDb 42 | :show-inheritance: 43 | :members: 44 | 45 | 46 | Lower-level parsers for macro, environments, and specials 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | You shouldn't have to use these directly. 50 | 51 | .. autoclass:: pylatexenc.macrospec.LatexNoArgumentsParser 52 | :show-inheritance: 53 | :members: 54 | 55 | .. autoclass:: pylatexenc.macrospec.LatexArgumentsParser 56 | :show-inheritance: 57 | :members: 58 | 59 | .. autoclass:: pylatexenc.macrospec.LatexEnvironmentBodyContentsParserInfo 60 | :show-inheritance: 61 | :members: 62 | 63 | .. autoclass:: pylatexenc.macrospec.LatexEnvironmentBodyContentsParser 64 | :show-inheritance: 65 | :members: 66 | 67 | .. autoclass:: pylatexenc.macrospec.LatexMacroCallParser 68 | :show-inheritance: 69 | :members: 70 | 71 | .. autoclass:: pylatexenc.macrospec.LatexEnvironmentCallParser 72 | :show-inheritance: 73 | :members: 74 | 75 | .. autoclass:: pylatexenc.macrospec.LatexSpecialsCallParser 76 | :show-inheritance: 77 | :members: 78 | 79 | 80 | 81 | 82 | Legacy (2.x) Macro arguments parsers 83 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 | 85 | .. autoclass:: pylatexenc.macrospec.MacroStandardArgsParser 86 | :members: 87 | 88 | .. autoclass:: pylatexenc.macrospec.ParsedMacroArgs 89 | :members: 90 | 91 | .. autoclass:: pylatexenc.macrospec.VerbatimArgsParser 92 | :show-inheritance: 93 | :members: 94 | 95 | .. autoclass:: pylatexenc.macrospec.ParsedVerbatimArgs 96 | :show-inheritance: 97 | :members: 98 | 99 | -------------------------------------------------------------------------------- /doc/new-in-pylatexenc-3.rst: -------------------------------------------------------------------------------- 1 | What's new in `pylatexenc 3` 2 | ============================ 3 | 4 | Wow, a *lot* of stuff has changed in the `latexwalker` and `macrospec` modules. 5 | There's even a new `latexnodes` module. I don't know where to start! 6 | 7 | The good news is, if you're simply using the latex-to-unicode and 8 | unicode-to-latex conversion tools, your code depending on `pylatexenc 2` should 9 | run without any chagnes. You might get some deprecation warnings which you can 10 | silence using python's warnings filter management (e.g., ``python -W 11 | 'ignore::DeprecationWarnings'`` or using :py:func:`warnings.simplefilter`) 12 | 13 | The `latex2text` and `latexencode` modules have barely changed. 14 | 15 | - New parsing mechanism in a new `latexnodes` module — everything gets delegated 16 | to "parser objects" that are specialized in parsing a specific construct. See 17 | :py:class:`pylatexenc.latexnodes.parsers.LatexParserBase`. 18 | 19 | - The parser has new enhanced handling of macro, environment, and specials 20 | arguments. Arguments can be named for easier lookup when traversing the node 21 | tree. 22 | 23 | - **WARNING**: While in *alpha* stage, I'm expecting that the new APIs might 24 | still change. I'll try to remain as backwards-compatible as possible with 25 | `pylatexenc 2.x` but new APIs introduced in the `3.0alphaX` versions might 26 | still change a bit until they are finalized. 27 | 28 | - Lists of latex node objects 29 | (:py:class:`~pylatexenc.latexnodes.nodes.LatexNode`) are now wrapped in a 30 | special object for node lists → 31 | :py:class:`pylatexenc.latexnodes.nodes.LatexNodeList`. 32 | 33 | - so much more ... ... 34 | 35 | - The `len` attribute in node objects is replaced by a `pos_end` attribute. The 36 | `len` attribute can still be accessed as a read-only computed attribute for 37 | compatibility with existing code using pylatexenc 2. 38 | 39 | 40 | 41 | .. _new-in-pylatexenc-3-possible-pitfall-changes: 42 | 43 | A couple things to look out for 44 | ------------------------------- 45 | 46 | - If you created a :py:class:`~pylatexenc.macrospec.LatexContextDb` database 47 | from scratch, you might suddenly get errors about unknown macros. The default 48 | initialization for unknown macro, environment and specials specification 49 | objects for :py:class:`~pylatexenc.macrospec.LatexContextDb` was, and remains, 50 | `None`. What has changed is the interpretation of this `None`: Now, the latex 51 | walker (more precisely, 52 | :py:class:`~pylatexenc.latexnodes.LatexNodesCollector`) reports an error, 53 | whereas previously, the parser would simply assume the macro doesn't accept 54 | any arguments. To restore the earlier behavior, simply set the spec objects 55 | for unknown macro/environment/specials in your latex context db object:: 56 | 57 | latex_context_db = macrospec.LatexContextDb() 58 | # ... 59 | latex_context_db.add_context_category( ... ) 60 | # ... 61 | latex_context_db.set_unknown_macro_spec(macrospec.MacroSpec('')) 62 | latex_context_db.set_unknown_environment_spec(macrospec.EnvironmentSpec('')) 63 | # 64 | # unknown macros and environemnts are now accepted and are assumed 65 | # not to take any arguments 66 | # 67 | 68 | - Node lists are now encapsulated in a 69 | :py:class:`~pylatexenc.latexnodes.nodes.LatexNodeList`. It behaves very much 70 | like a list in all respects (indexing, slicing, etc.), except that it does not 71 | satisfy ``isinstance(nodelist, list)``. If you relied on such tests, you'll 72 | need to update them to the liking of ``isinstance(nodelist, (LatexNodeList, 73 | list))``. 74 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_issues 2 | -------------------------------------------------------------------------------- /js-transcrypt/.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # additional git ignores 3 | # 4 | 5 | 6 | # 7 | # preprocessed python source code output 8 | # 9 | pp-tmp 10 | 11 | # 12 | # output folders 13 | # 14 | *_js_output 15 | pylatexenc-js 16 | test-pylatexenc-js 17 | 18 | 19 | node_modules 20 | .yarn 21 | -------------------------------------------------------------------------------- /js-transcrypt/.yarnrc.yml: -------------------------------------------------------------------------------- 1 | nodeLinker: 'node-modules' 2 | -------------------------------------------------------------------------------- /js-transcrypt/README.md: -------------------------------------------------------------------------------- 1 | # Building a Javascript version of pylatexenc.latexnodes library via *transcrypt* 2 | 3 | You can use the fantastic [Transcrypt](http://www.transcrypt.org/) tool ([also 4 | on github](https://github.com/QQuick/Transcrypt)) for converting parts of the 5 | pylatexenc code base into JavaScript to make a JavaScript-based parser for 6 | simple LaTeX code. 7 | 8 | This procedure is very much still in alpha stage. Don't rely too much on it! 9 | 10 | To use commands listed here, make sure you installed the optional poetry 11 | dependency group "buildjslib": 12 | 13 | > poetry install --with=buildjslib 14 | 15 | 16 | ## The build script 17 | 18 | To generate the JS python sources simply run in this folder: 19 | 20 | # generates pylatexenc-js/ 21 | > poetry run ./generate_pylatexenc_js.py 22 | 23 | (Make sure you've removed the `pylatexenc-js` folder from any previous run, or 24 | pass the `--delete-target-dir` option to the generator script.) 25 | 26 | To compile the tests along with the library, in its own folder: 27 | 28 | # generates both pylatexenc-js/ and test-pylatexenc-js/ 29 | > poetry run ./generate_pylatexenc_js.py --compile-tests 30 | 31 | To run the tests using `node`, do: 32 | 33 | > node test-pylatexenc-js/runtests.js 34 | 35 | 36 | ## Steps handled by the build script 37 | 38 | These are broadly the steps that the build script will apply. 39 | 40 | ### Preprocessing the pylatexenc library in preparation for transcrypt: 41 | 42 | The script will first preprocess the pylatexenc source code to make it suitable 43 | for use with transcrypt. You can also do this manually with 44 | 45 | > export PYLATEXENC_SRC_DIR=/path/to/root/folder/of/pylatexenc/ 46 | > export PREPROCESS_LIB_OUTPUT_DIR=pp-tmp/ # or some other temporary folder 47 | > poetry run python ../tools/preprocess_lib.py preprocesslib-pylatexenc.config.yaml 48 | 49 | ### Run Transcrypt to generate the Javascript sources 50 | 51 | We need to enable a lot of features in transcrypt, some of which are disabled by 52 | default. The build script basically follows the following commands. 53 | 54 | Transcrypt is called with the `import_pylatexenc_modules.py` module as entry 55 | point. This python module simply imports the subset of the `pylatexenc` library 56 | that we'll be compiling to JavaScript. The command to run is essentially: 57 | 58 | > poetry run transcrypt import_pylatexenc_modules.py --dassert --dext --ecom --gen --tconv --sform --kwargs --keycheck --opov --xreex --nomin --build --anno --parent .none -u .auto -xp 'pp-tmp$libpatches' -od pylatexenc-js 59 | 60 | The JavaScript files are output in the `pylatexenc-js` folder. 61 | 62 | ### Final touches 63 | 64 | The build script will then apply some additional steps and patches: 65 | 66 | - Create a `package.json` file that defines a module, so that you can import the 67 | sources using for instance: 68 | 69 | // js code 70 | import { Symbol1 [, ...] } from './pylatexenc-js/pylatexenc.latexnodes.js' 71 | 72 | - Create a `py.js` module that exports the functions `$$kw` and `repr`, exposing 73 | the keyword-argument functionality as well as python's `repr()` function. 74 | You can pass keywords to transcrypted functions as follows: 75 | 76 | // js code 77 | call_function_from_transcrypt(arg1, arg2, $$kw({ keywordarg1: value1, 78 | keywordarg2: value2 })) 79 | 80 | - Patch Transcrypt's internal runtime methods to add some missing support for 81 | additional functionality (see `transcrypt_runtime_patches.js`) 82 | 83 | -------------------------------------------------------------------------------- /js-transcrypt/generate_pylatexenc_js.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import re 4 | import sys 5 | import argparse 6 | import json 7 | 8 | import shutil 9 | import subprocess 10 | 11 | import logging 12 | logger = logging.getLogger('generate_pylatexenc_js') 13 | 14 | pylatexenc_src_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) 15 | 16 | def run_main(): 17 | parser = argparse.ArgumentParser() 18 | 19 | parser.add_argument('--pylatexenc-js-output-dir', action='store', 20 | default='pylatexenc-js', 21 | help="Folder where to output generated JavaScript pylatexenc sources") 22 | 23 | parser.add_argument('--delete-target-dir', action='store_true', default=False, 24 | help="With this option, the target directory is removed if it exists " 25 | "at the beginning of the script instead of throwing an error. Will " 26 | "also remove the tests target directory if --compile-tests is given.") 27 | 28 | parser.add_argument('--preprocess-lib-output-dir', action='store', default='pp-tmp', 29 | help="Temporary folder in which to write intermediate, " 30 | "preprocessed sources to be fed into Transcrypt") 31 | 32 | parser.add_argument('--compile-tests', action='store_true', default=False, 33 | help="Also compile the pylatexenc tests into a separate " 34 | "folder (by default ./test-pylatexenc-js)") 35 | 36 | parser.add_argument('--test-pylatexenc-js-output-dir', action='store', 37 | default='test-pylatexenc-js', 38 | help="Folder where to output generated JavaScript pylatexenc " 39 | "test sources. " 40 | "The main entry point for the tests will be the script 'runtests.js'") 41 | 42 | args = parser.parse_args() 43 | 44 | logging.basicConfig(level=logging.DEBUG) 45 | 46 | if args.delete_target_dir: 47 | if os.path.exists(args.pylatexenc_js_output_dir): 48 | shutil.rmtree(args.pylatexenc_js_output_dir) 49 | if args.compile_tests: 50 | if os.path.exists(args.test_pylatexenc_js_output_dir): 51 | shutil.rmtree(args.test_pylatexenc_js_output_dir) 52 | 53 | os.makedirs(args.preprocess_lib_output_dir, exist_ok=True) 54 | 55 | if os.path.exists(args.pylatexenc_js_output_dir): 56 | raise RuntimeError( 57 | f"Target destination ‘{args.pylatexenc_js_output_dir}’ already exists. " 58 | f"Please remove it first." 59 | ) 60 | 61 | # pick up pylatexenc's generation script tool 62 | 63 | pylatexenc_tools_dir = os.path.join(pylatexenc_src_dir, 'tools') 64 | logger.info(f"Using pylatexenc_tools_dir = {pylatexenc_tools_dir!r}") 65 | sys.path.insert(0, pylatexenc_tools_dir) 66 | 67 | import utils_transcrypt_generate_js 68 | 69 | genutils = utils_transcrypt_generate_js.GenUtils( 70 | pylatexenc_src_dir=pylatexenc_src_dir, 71 | preprocess_lib_output_dir=args.preprocess_lib_output_dir, 72 | ) 73 | 74 | # preprocess both pylatexenc & pylatexenc libraries to prepare them for Transcrypt --> 75 | genutils.preprocess_pylatexenc_lib() 76 | if args.compile_tests: 77 | genutils.preprocess_lib('preprocesslib-tests.config.yaml') 78 | 79 | # run Transcrypt pylatexenc lib now --> 80 | genutils.run_transcrypt( 81 | 'import_pylatexenc_modules.py', 82 | output_dir=args.pylatexenc_js_output_dir, 83 | ) 84 | # final tweaks to finalize the JS package 85 | genutils.finalize_transcrypt_package( 86 | args.pylatexenc_js_output_dir, 87 | package_name='pylatexenc-js', 88 | package_version='0.0.1', 89 | package_description=\ 90 | 'Automatically transliterated Javascript version of the pylatexenc sources' 91 | ) 92 | 93 | 94 | if args.compile_tests: 95 | 96 | # Generate the test runner script 97 | runtests_py = genutils.generate_runtests_script( 98 | os.path.join(pylatexenc_src_dir, 'test'), 99 | test_file_patterns=[ 100 | # these are regexes that are matched as ^( <...> )[.]py$ 101 | 'test_latexnodes_.*', 102 | 'test_macrospec_.*', 103 | 'test_latexwalker_.*', 104 | 'test_latexencode', 105 | 'test_util', 106 | ] 107 | ) 108 | 109 | # Transcrypt it 110 | genutils.run_transcrypt( 111 | runtests_py, 112 | add_import_paths=[ 113 | os.path.join(args.preprocess_lib_output_dir, 'test') 114 | ], 115 | output_dir=args.test_pylatexenc_js_output_dir, 116 | ) 117 | genutils.finalize_transcrypt_package( 118 | args.test_pylatexenc_js_output_dir, 119 | package_name='test-pylatexenc-js', 120 | ) 121 | 122 | logger.info("Compiled the tests. To run them, try ‘node {}/runtests.js’" 123 | .format(args.test_pylatexenc_js_output_dir)) 124 | 125 | logger.info(f"Done!") 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | if __name__ == '__main__': 134 | run_main() 135 | -------------------------------------------------------------------------------- /js-transcrypt/import_pylatexenc_modules.py: -------------------------------------------------------------------------------- 1 | 2 | import pylatexenc 3 | # 4 | import pylatexenc.latexnodes 5 | import pylatexenc.macrospec 6 | import pylatexenc.latexwalker 7 | 8 | import pylatexenc.latexencode 9 | import pylatexenc.latexencode.get_builtin_rules 10 | 11 | 12 | # additional modules that we might need: 13 | import logging 14 | import collections 15 | 16 | 17 | # customjspatches is no longer needed, we're now directly patching the 18 | # Transcrypt runtime at JS sources generation time (see 19 | # generate_pylatexenc_js.py) 20 | # 21 | #import customjspatches #lgtm [py/unused-import] 22 | 23 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/bisect.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # 5 | # THIS METHOD CAN ASSUME THAT THERE ARE NO DUPLICATES IN THE LIST. 6 | # 7 | def bisect_right(a, x): 8 | # find the first index of a that is > pos 9 | 10 | lo = 0 11 | hi = len(a) 12 | mid = None 13 | 14 | while True: 15 | 16 | #print(f"{a=} {x=} :: {lo=} {hi=} (mid was {mid=})") 17 | 18 | if a[lo] > x: 19 | return lo 20 | if a[hi-1] <= x: 21 | return hi 22 | 23 | # we know that a[lo] <= x and a[hi-1] > x 24 | 25 | if hi - lo <= 2: 26 | if a[lo+1] > x: # a[lo] <= x and a[lo+1] > x --> return lo+1 27 | return lo+1 28 | else: #if a[lo+2] > x: 29 | return lo+2 30 | 31 | mid = (hi + lo) // 2 32 | if a[mid] > x: 33 | hi = mid+1 # we still have a[hi-1] > x 34 | else: # i.e., if a[mid] <= x: 35 | lo = mid # we still have a[lo] <= x 36 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/collections.py: -------------------------------------------------------------------------------- 1 | 2 | ### ChainMap 3 | # -- straight from the python sources. 4 | 5 | 6 | import logging 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | 11 | class ChainMap: 12 | ''' A ChainMap groups multiple dicts (or other mappings) together 13 | to create a single, updateable view. 14 | 15 | The underlying mappings are stored in a list. That list is public and can 16 | be accessed or updated using the *maps* attribute. There is no other 17 | state. 18 | 19 | Lookups search the underlying mappings successively until a key is found. 20 | In contrast, writes, updates, and deletions only operate on the first 21 | mapping. 22 | 23 | ''' 24 | 25 | def __init__(self, *maps): 26 | '''Initialize a ChainMap by setting *maps* to the given mappings. 27 | If no mappings are provided, a single empty dictionary is used. 28 | 29 | ''' 30 | self.maps = list(maps) or [{}] # always at least one map 31 | 32 | def __missing__(self, key): 33 | raise KeyError(key) 34 | #return None 35 | 36 | def __getitem__(self, key): 37 | #logger.debug("Getting item %r", key) 38 | for mapping in self.maps: 39 | #logger.debug("\ttrying mapping: %r", mapping) 40 | if key not in mapping: 41 | continue 42 | #logger.debug("\tfound - %r", mapping[key]) 43 | return mapping[key] # can't use 'key in mapping' with defaultdict 44 | #logger.debug("\tnot found :(") 45 | return self.__missing__(key) # support subclasses that define __missing__ 46 | 47 | def get(self, key, default=None): 48 | return self[key] if key in self else default 49 | 50 | def __len__(self): 51 | return len(set().union(*self.maps)) # reuses stored hash values if possible 52 | 53 | def __iter__(self): 54 | d = {} 55 | for mapping in reversed(self.maps): 56 | d.update(dict.fromkeys(mapping)) # reuses stored hash values if possible 57 | return iter(d) 58 | 59 | def __contains__(self, key): 60 | return any(key in m for m in self.maps) 61 | 62 | def __bool__(self): 63 | return any(self.maps) 64 | 65 | def __repr__(self): 66 | return "{!r}({!r})".format(self.__class__.__name__, self.maps) 67 | # return f'{self.__class__.__name__}({", ".join(map(repr, self.maps))})' 68 | 69 | @classmethod 70 | def fromkeys(cls, iterable, *args): 71 | 'Create a ChainMap with a single dict created from the iterable.' 72 | return cls(dict.fromkeys(iterable, *args)) 73 | 74 | def copy(self): 75 | 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' 76 | return self.__class__(self.maps[0].copy(), *self.maps[1:]) 77 | 78 | def __copy__(self): 79 | 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' 80 | return self.__class__(self.maps[0].copy(), *self.maps[1:]) 81 | 82 | def new_child(self, m=None): # like Django's Context.push() 83 | '''New ChainMap with a new map followed by all previous maps. 84 | If no map is provided, an empty dict is used. 85 | ''' 86 | if m is None: 87 | m = {} 88 | return self.__class__(m, *self.maps) 89 | 90 | @property 91 | def parents(self): # like Django's Context.pop() 92 | 'New ChainMap from maps[1:].' 93 | return self.__class__(*self.maps[1:]) 94 | 95 | def __setitem__(self, key, value): 96 | self.maps[0][key] = value 97 | 98 | def __delitem__(self, key): 99 | try: 100 | del self.maps[0][key] 101 | except KeyError: 102 | raise KeyError(f'Key not found in the first mapping: {key!r}') 103 | 104 | def popitem(self): 105 | 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' 106 | try: 107 | return self.maps[0].popitem() 108 | except KeyError: 109 | raise KeyError('No keys found in the first mapping.') 110 | 111 | def pop(self, key, *args): 112 | 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' 113 | try: 114 | return self.maps[0].pop(key, *args) 115 | except KeyError: 116 | raise KeyError(f'Key not found in the first mapping: {key!r}') 117 | 118 | def clear(self): 119 | 'Clear maps[0], leaving maps[1:] intact.' 120 | self.maps[0].clear() 121 | 122 | def __ior__(self, other): 123 | self.maps[0].update(other) 124 | return self 125 | 126 | def __or__(self, other): 127 | if not isinstance(other, _collections_abc.Mapping): 128 | return NotImplemented 129 | m = self.copy() 130 | m.maps[0].update(other) 131 | return m 132 | 133 | def __ror__(self, other): 134 | if not isinstance(other, _collections_abc.Mapping): 135 | return NotImplemented 136 | m = dict(other) 137 | for child in reversed(self.maps): 138 | m.update(child) 139 | return self.__class__(m) 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/customjspatches.js: -------------------------------------------------------------------------------- 1 | export function custom_apply_patches() { 2 | 3 | console.log("Applying custom JS patches ..."); 4 | 5 | String.prototype.startswith = function (prefix, start) { 6 | //console.log("Custom startswith()! prefix = ", prefix, ", start = ", start); 7 | var pos_start = (typeof start === 'undefined' ? 0 : start); 8 | if (prefix instanceof Array) { 9 | for (var i=0;i= width) { 31 | return this; 32 | } 33 | return fill_char.repeat(width - this.length) + this; 34 | }; 35 | 36 | 37 | }; 38 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/functools.py: -------------------------------------------------------------------------------- 1 | 2 | def partial(fn, *args, **kwargs): 3 | return lambda *newargs, **newkwargs: fn(*args, *newargs, **dict(kwargs, **newkwargs)) 4 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/json.py: -------------------------------------------------------------------------------- 1 | 2 | def loads(s, object_hook=None): 3 | if not object_hook: 4 | return JSON.parse(s) 5 | __pragma__('js', "{}", """ 6 | var wrap_object_hook = (value) => { 7 | if (value instanceof Array || value instanceof Number || value instanceof String) { 8 | return value; 9 | } 10 | return object_hook(value); 11 | };""") 12 | return JSON.parse(s, wrap_object_hook) 13 | 14 | 15 | def dumps(d, indent=0): 16 | return JSON.stringify(d, None, indent) 17 | 18 | 19 | def dump(d, f, indent=0): 20 | f.write(dumps(d, indent)) 21 | 22 | 23 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/logging.js: -------------------------------------------------------------------------------- 1 | // 2 | // mini-patch logger 3 | // 4 | 5 | import { repr } from './org.transcrypt.__runtime__.js'; 6 | 7 | import debug_module from 'debug'; 8 | 9 | debug_module.formatters.r = (v) => repr(v); 10 | 11 | 12 | class DebugLogger 13 | { 14 | constructor(scope) 15 | { 16 | this.scope = scope; 17 | 18 | this._debug_fn = debug_module(this.scope); 19 | this._debug_fn_star = debug_module(this.scope+'*'); // always output 20 | 21 | //console.debug(`setting up logger ‘${this.scope}’ via debug()`); 22 | 23 | this.error = (msg, ...args) => { 24 | const lastarg = args[args.length-1]; 25 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); } 26 | this._debug_fn_star('[[logging.ERROR]] !! ' + msg, ...args); 27 | }; 28 | 29 | this.critical = (msg, ...args) => { 30 | const lastarg = args[args.length-1]; 31 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); } 32 | this._debug_fn_star('[[logging.CRITICAL]] !! ' + msg, ...args); 33 | }; 34 | 35 | this.warning = (msg, ...args) => { 36 | const lastarg = args[args.length-1]; 37 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); } 38 | this._debug_fn_star('[[logging.WARNING]] !! ' + msg, ...args); 39 | }; 40 | 41 | this.info = (msg, ...args) => { 42 | const lastarg = args[args.length-1]; 43 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); } 44 | this._debug_fn_star(msg, ...args); 45 | }; 46 | 47 | this.debug = (msg, ...args) => { 48 | const lastarg = args[args.length-1]; 49 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); } 50 | this._debug_fn('logging.debug ~~ ' + msg, ...args); 51 | }; 52 | } 53 | 54 | _process_kwargs(kwargs) 55 | { 56 | if (kwargs.exc_info) { 57 | console.trace(); 58 | } 59 | } 60 | 61 | // _emit(label, sep, msg, args, log_fn) 62 | // { 63 | // let s = label + sep + _assemble_msg(msg, args); 64 | // if (log_fn !== undefined) { 65 | // log_fn(s); 66 | // } else { 67 | // console.log(s); 68 | // } 69 | // } 70 | }; 71 | 72 | function _assemble_msg(msg, args) 73 | { 74 | if (args.length) { 75 | return msg + " // " + args.map( (a) => repr(a) ).join(' ; '); 76 | } 77 | return msg; 78 | } 79 | 80 | 81 | 82 | let _logger_instances = {}; 83 | 84 | export function getLogger(scope) 85 | { 86 | let logger = _logger_instances[scope]; 87 | if (logger == null) { // null or undefined 88 | logger = new DebugLogger(scope); 89 | _logger_instances[scope] = logger; 90 | } 91 | return logger; 92 | } 93 | 94 | export function basicConfig() 95 | { 96 | } 97 | 98 | -------------------------------------------------------------------------------- /js-transcrypt/libpatches/unique_object_id.js: -------------------------------------------------------------------------------- 1 | 2 | // thanks https://stackoverflow.com/a/43963612/1694896 3 | 4 | export var fn_unique_object_id = (() => { 5 | let currentId = 0; 6 | const map = new WeakMap(); 7 | 8 | return (object) => { 9 | if (!map.has(object)) { 10 | map.set(object, ++currentId); 11 | } 12 | 13 | return map.get(object); 14 | }; 15 | })(); 16 | -------------------------------------------------------------------------------- /js-transcrypt/my_test_script.py: -------------------------------------------------------------------------------- 1 | # some custom JS patches are necessary ... comment out these lines to run with python 2 | #import customjspatches 3 | #customjspatches.custom_apply_patches() 4 | 5 | 6 | #import pylatexenc.latexnodes as latexnodes 7 | import pylatexenc.latexnodes.parsers as parsers 8 | from pylatexenc.macrospec import LatexContextDb, MacroSpec, EnvironmentSpec, SpecialsSpec 9 | from pylatexenc.latexwalker import LatexWalker 10 | 11 | 12 | # # --- minitest --- 13 | # from pylatexenc.latexnodes import ParsingState 14 | # ps = ParsingState(s='', enable_comments=False) 15 | # from unique_object_id import fn_unique_object_id 16 | # print("Parsing state's id is = ", fn_unique_object_id(ps), "and its repr is = ", repr(ps)) 17 | # raise StopHereThatllBeAllThanks 18 | # # --- 19 | 20 | 21 | latextext = r""" 22 | Here is some text that can contain some simple LaTeX macros, to produce 23 | for instance~\textbf{bold text} and \emph{italic text}. 24 | 25 | Two line breaks start a new paragraph. You can use inline math like 26 | \(\alpha=\sum_j\beta_j\) and display equations like 27 | \begin{align} 28 | S_1 &= I\,X\,Z\,Z\,X\ ; \nonumber\\ 29 | S_2, \ldots, S_4 &= \text{cyclical permutations of \(S_1\)}\ . 30 | \label{eq:stabilizers} 31 | \end{align} 32 | 33 | Refer to equations with~\eqref{eq:stabilizers}, etc. ... 34 | 35 | Can we also parse citation commands like~\cite{Key1,Key2}. 36 | """ 37 | 38 | lw_context = LatexContextDb() 39 | lw_context.add_context_category( 40 | 'my-base-latex-category', 41 | macros=[ 42 | MacroSpec('textbf', '{',), 43 | MacroSpec('textit', '{',), 44 | MacroSpec('emph', '{',), 45 | MacroSpec('cite', '{',), 46 | MacroSpec('text', '{',), 47 | MacroSpec('label', '{',), 48 | MacroSpec('eqref', '{',), 49 | ], 50 | specials=[ 51 | SpecialsSpec('~'), 52 | # new paragraph 53 | SpecialsSpec('\n\n'), 54 | ], 55 | environments=[ 56 | EnvironmentSpec('align') 57 | ] 58 | ) 59 | 60 | # for \alpha, \, etc. 61 | lw_context.set_unknown_macro_spec( MacroSpec('','') ) 62 | 63 | 64 | 65 | lw = LatexWalker( 66 | latextext, 67 | latex_context=lw_context, 68 | tolerant_parsing=False 69 | ) 70 | 71 | nodes, carryover_info = lw.parse_content( parsers.LatexGeneralNodesParser() ) 72 | 73 | print("Got node list ->") 74 | print(nodes) 75 | -------------------------------------------------------------------------------- /js-transcrypt/mytestjscode/my_test_js_code.js: -------------------------------------------------------------------------------- 1 | // some custom JS patches are necessary ... comment out these lines to run with python 2 | import * as latexnodes from 'pylatexenc-js/pylatexenc.latexnodes.js'; 3 | import * as macrospec from 'pylatexenc-js/pylatexenc.macrospec.js'; 4 | import * as latexwalker from 'pylatexenc-js/pylatexenc.latexwalker.js'; 5 | import * as parsers from 'pylatexenc-js/pylatexenc.latexnodes.parsers.js'; 6 | 7 | // some setup code 8 | 9 | import * as customjspatches from 'pylatexenc-js/customjspatches.js'; 10 | customjspatches.custom_apply_patches(); 11 | 12 | import {__kwargtrans__, repr} from 'pylatexenc-js/org.transcrypt.__runtime__.js'; 13 | const $$kw = __kwargtrans__; 14 | 15 | 16 | 17 | const {LatexContextDb, MacroSpec, EnvironmentSpec, SpecialsSpec} = macrospec; 18 | const {LatexWalker} = latexwalker; 19 | 20 | 21 | const latextext = ` 22 | Here is some text that can contain some simple LaTeX macros, to produce 23 | for instance~\\textbf{bold text} and \\emph{italic text}. 24 | 25 | Two line breaks start a new paragraph. You can use inline math like 26 | \\(\\alpha=\\sum_j\\beta_j\\) and display equations like 27 | \\begin{align} 28 | S_1 &= I\\,X\\,Z\\,Z\\,X\\ ; \\nonumber\\\\ 29 | S_2, \\ldots, S_4 &= \\text{cyclical permutations of \\(S_1\\)}\\ . 30 | \\label{eq:stabilizers} 31 | \\end{align} 32 | 33 | Refer to equations with~\\eqref{eq:stabilizers}, etc. ... 34 | 35 | Can we also parse citation commands like~\\cite{Key1,Key2}. 36 | `; 37 | 38 | console.log('latextext = ', latextext); 39 | 40 | 41 | const lw_context = new LatexContextDb() 42 | lw_context.add_context_category( 43 | 'my-base-latex-category', 44 | $$kw({ 45 | macros: [ 46 | new MacroSpec('textbf', '{',), 47 | new MacroSpec('textit', '{',), 48 | new MacroSpec('emph', '{',), 49 | new MacroSpec('cite', '{',), 50 | new MacroSpec('text', '{',), 51 | new MacroSpec('label', '{',), 52 | new MacroSpec('eqref', '{',), 53 | ], 54 | environments: [ 55 | new EnvironmentSpec('align') 56 | ], 57 | specials: [ 58 | new SpecialsSpec('~'), 59 | // new paragraph 60 | new SpecialsSpec('\n\n'), 61 | ], 62 | }) 63 | ) 64 | 65 | // for \alpha, \, etc. 66 | lw_context.set_unknown_macro_spec( new MacroSpec('','') ) 67 | 68 | const lw = new LatexWalker( 69 | latextext, 70 | $$kw({ 71 | latex_context: lw_context, 72 | tolerant_parsing: false 73 | }) 74 | ) 75 | 76 | const [nodes, carryover_info] = lw.parse_content( new parsers.LatexGeneralNodesParser() ) 77 | 78 | console.log("Got node list ->") 79 | console.log(repr(nodes)) 80 | console.log(nodes) 81 | 82 | -------------------------------------------------------------------------------- /js-transcrypt/mytestjscode/node_modules/pylatexenc-js: -------------------------------------------------------------------------------- 1 | ../../pylatexenc-js -------------------------------------------------------------------------------- /js-transcrypt/mytestjscode/package.json: -------------------------------------------------------------------------------- 1 | {"type": "module"} 2 | -------------------------------------------------------------------------------- /js-transcrypt/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "debug": "^4.3.4" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /js-transcrypt/preprocesslib-pylatexenc.config.yaml: -------------------------------------------------------------------------------- 1 | 2 | # source directory -- will be provided by the js sources generation script 3 | source_dir: $PYLATEXENC_SRC_DIR 4 | 5 | # output directory -- will create a pylatexenc/ subfolder 6 | target_dir: $PREPROCESS_LIB_OUTPUT_DIR 7 | 8 | # which modules to preprocess 9 | module_list: 10 | - 'pylatexenc.latexnodes' 11 | - 'pylatexenc.macrospec' 12 | - 'pylatexenc.latexwalker' 13 | - 'pylatexenc.latexencode' 14 | - 'pylatexenc.latexencode.get_builtin_rules' 15 | 16 | # features 17 | enabled_features: 18 | keep_future_statements: False 19 | keep_relative_imports: False 20 | keep_super_arguments: False 21 | keep_dict_with_generator: False 22 | keep_frozenset: False 23 | keep_logger_debug: False #True # speed things up by removing logger.debug() calls 24 | guards: 25 | PYTHON2_SUPPORT_CODE: False 26 | PYLATEXENC1_LEGACY_SUPPORT_CODE: False 27 | PYLATEXENC2_LEGACY_SUPPORT_CODE: False 28 | PYLATEXENC_GET_DEFAULT_SPECS_FN: False 29 | LATEXWALKER_HELPERS: False 30 | DEBUG_SET_EQ_ATTRIBUTE: False 31 | patches: 32 | UNIQUE_OBJECT_ID: | 33 | import unique_object_id 34 | fn_unique_object_id = unique_object_id.fn_unique_object_id 35 | 36 | # Always specify u2lobj= to callables (JS will silently ignore extra args) 37 | LATEXENCODE_CALLABLE_ACCEPTS_U2LOBJ_ARG: | 38 | def _callable_accepts_u2lobj_arg(fn): 39 | return True 40 | -------------------------------------------------------------------------------- /js-transcrypt/preprocesslib-tests.config.yaml: -------------------------------------------------------------------------------- 1 | # output directory -- will create a pylatexenc/ subfolder 2 | target_dir: $PREPROCESS_LIB_OUTPUT_DIR 3 | 4 | source_dir: $PYLATEXENC_SRC_DIR 5 | 6 | # which modules to preprocess 7 | module_list: 8 | - 'pylatexenc.latexnodes' 9 | - 'pylatexenc.macrospec' 10 | - 'pylatexenc.latexwalker' 11 | - 'pylatexenc.latexencode' 12 | - 'test.*' 13 | 14 | # features 15 | enabled_features: 16 | keep_future_statements: False 17 | keep_relative_imports: False 18 | keep_super_arguments: False 19 | keep_dict_with_generator: False 20 | keep_frozenset: False 21 | keep_logger_debug: True 22 | guards: 23 | PYTHON2_SUPPORT_CODE: False 24 | PYLATEXENC1_LEGACY_SUPPORT_CODE: False 25 | PYLATEXENC2_LEGACY_SUPPORT_CODE: False 26 | PYLATEXENC_GET_DEFAULT_SPECS_FN: False 27 | LATEXWALKER_HELPERS: False 28 | DEBUG_SET_EQ_ATTRIBUTE: False 29 | TEST_PYLATEXENC_SKIP: False 30 | patches: 31 | UNIQUE_OBJECT_ID: | 32 | import unique_object_id 33 | fn_unique_object_id = unique_object_id.fn_unique_object_id 34 | 35 | # Always specify u2lobj= to callables (JS will silently ignore extra args) 36 | LATEXENCODE_CALLABLE_ACCEPTS_U2LOBJ_ARG: | 37 | def _callable_accepts_u2lobj_arg(fn): 38 | return True 39 | -------------------------------------------------------------------------------- /js-transcrypt/transcrypt_runtime_patches.js: -------------------------------------------------------------------------------- 1 | /*** PhF/PYLATEXENC - BEGIN CUSTOM PATCHES ***/ 2 | 3 | // 4 | // Patch Transcrypt's implemnetations of some builtin object methods. 5 | // 6 | String.prototype.startswith = function (prefix, start) { 7 | //console.log("Custom startswith()! prefix = ", prefix, ", start = ", start); 8 | var pos_start = (typeof start === 'undefined' ? 0 : start); 9 | if (prefix instanceof Array) { 10 | for (var i=0;i= width) { 32 | return this; 33 | } 34 | return fill_char.repeat(width - this.length) + this; 35 | }; 36 | String.prototype.rstrip = function(chars) { 37 | if (chars === undefined) { 38 | return this.replace (/\s*$/g, ''); 39 | } 40 | var s = this; 41 | while (s.length && chars.indexOf(s.slice(-1)) !== -1) { 42 | s = s.slice(0, -1); 43 | } 44 | return s; 45 | } 46 | // 47 | // Patch Transcrypt's __pop__() method which has a bug 48 | // (https://github.com/QQuick/Transcrypt/issues/827) 49 | // 50 | __pop__ = function (aKey, aDefault) { 51 | var result = this [aKey]; 52 | if (result !== undefined) { 53 | delete this [aKey]; 54 | return result; 55 | } else { 56 | if ( aDefault === undefined ) { 57 | throw KeyError (aKey, new Error()); 58 | } 59 | } 60 | return aDefault; 61 | } 62 | 63 | // 64 | // Check that a is not null, too, otherwise we get errors with "'__eq__' in a". 65 | // Also check for __eq__ in b object! 66 | // 67 | __eq__ = function (a, b) { 68 | if (typeof a == 'object' && a != null && '__eq__' in a) { 69 | return a.__eq__ (b); 70 | } else if (typeof b == 'object' && b != null && '__eq__' in b) { 71 | return b.__eq__ (a); 72 | } else { 73 | return a == b; 74 | } 75 | }; 76 | 77 | 78 | 79 | 80 | 81 | /*** PhF/PYLATEXENC - END CUSTOM PATCHES ***/ 82 | -------------------------------------------------------------------------------- /js-transcrypt/yarn.lock: -------------------------------------------------------------------------------- 1 | # This file is generated by running "yarn install" inside your project. 2 | # Manual changes might be lost - proceed with caution! 3 | 4 | __metadata: 5 | version: 6 6 | cacheKey: 8 7 | 8 | "debug@npm:^4.3.4": 9 | version: 4.3.4 10 | resolution: "debug@npm:4.3.4" 11 | dependencies: 12 | ms: 2.1.2 13 | peerDependenciesMeta: 14 | supports-color: 15 | optional: true 16 | checksum: 3dbad3f94ea64f34431a9cbf0bafb61853eda57bff2880036153438f50fb5a84f27683ba0d8e5426bf41a8c6ff03879488120cf5b3a761e77953169c0600a708 17 | languageName: node 18 | linkType: hard 19 | 20 | "ms@npm:2.1.2": 21 | version: 2.1.2 22 | resolution: "ms@npm:2.1.2" 23 | checksum: 673cdb2c3133eb050c745908d8ce632ed2c02d85640e2edb3ace856a2266a813b30c613569bf3354fdf4ea7d1a1494add3bfa95e2713baa27d0c2c71fc44f58f 24 | languageName: node 25 | linkType: hard 26 | 27 | "root-workspace-0b6124@workspace:.": 28 | version: 0.0.0-use.local 29 | resolution: "root-workspace-0b6124@workspace:." 30 | dependencies: 31 | debug: ^4.3.4 32 | languageName: unknown 33 | linkType: soft 34 | -------------------------------------------------------------------------------- /pylatexenc/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # The MIT License (MIT) 3 | # 4 | # Copyright (c) 2015 Philippe Faist 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | # 24 | 25 | 26 | """ 27 | Utilities for LaTeX to/from Unicode Text conversion. 28 | 29 | Main Site: 30 | 31 | https://github.com/phfaist/pylatexenc/ 32 | 33 | """ 34 | 35 | from .version import version_str as _version_str 36 | 37 | __version__ = _version_str 38 | 39 | -------------------------------------------------------------------------------- /pylatexenc/_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2019 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | 31 | import bisect 32 | bisect_right = bisect.bisect_right 33 | 34 | 35 | 36 | # ------------------------------------------------------------------------------ 37 | 38 | 39 | 40 | class LineNumbersCalculator(object): 41 | r""" 42 | Utility to calculate line numbers. 43 | """ 44 | def __init__(self, s, 45 | line_number_offset=1, first_line_column_offset=0, column_offset=0): 46 | super(LineNumbersCalculator, self).__init__() 47 | 48 | self.line_number_offset = line_number_offset 49 | self.first_line_column_offset = first_line_column_offset 50 | self.column_offset = column_offset 51 | 52 | def find_all_new_lines(x): 53 | # first line starts at the beginning of the string 54 | yield 0 55 | k = 0 56 | while k < len(x): 57 | k = x.find('\n', k) 58 | if k == -1: 59 | return 60 | k += 1 61 | # s[k] is the character after the newline, i.e., the 0-th column 62 | # of the new line 63 | yield k 64 | 65 | self._pos_new_lines = list(find_all_new_lines(s)) 66 | 67 | 68 | def pos_to_lineno_colno(self, pos, as_dict=False): 69 | r""" 70 | Return the line and column number corresponding to the given `pos`. 71 | 72 | Return a tuple `(lineno, colno)` giving line number and column number. 73 | Line numbers start at 1 and column number start at zero, i.e., the 74 | beginning of the document (`pos=0`) has line and column number `(1,0)`. 75 | If `as_dict=True`, then a dictionary with keys 'lineno', 'colno' is 76 | returned instead of a tuple. 77 | """ 78 | 79 | if pos is None: 80 | if as_dict: 81 | return {'lineno': None, 'colno': None} 82 | return (None, None) 83 | 84 | # find line number in list 85 | 86 | # line_no is the index of the last item in self._pos_new_lines that is <= pos. 87 | line_no = bisect_right(self._pos_new_lines, pos)-1 88 | assert line_no >= 0 and line_no < len(self._pos_new_lines) 89 | 90 | col_no = pos - self._pos_new_lines[line_no] 91 | 92 | if line_no == 0: 93 | col_no += self.first_line_column_offset 94 | else: 95 | col_no += self.column_offset 96 | line_no += self.line_number_offset 97 | 98 | if as_dict: 99 | return {'lineno': line_no, 'colno': col_no} 100 | return (line_no, col_no) 101 | 102 | 103 | 104 | # ------------------------------------------------------------------------------ 105 | 106 | 107 | class PushPropOverride(object): 108 | def __init__(self, obj, propname, new_value): 109 | super(PushPropOverride, self).__init__() 110 | self.obj = obj 111 | self.propname = propname 112 | self.new_value = new_value 113 | 114 | def __enter__(self): 115 | if self.new_value is not None: 116 | self.initval = getattr(self.obj, self.propname) 117 | setattr(self.obj, self.propname, self.new_value) 118 | return self 119 | 120 | def __exit__(self, type, value, traceback): 121 | # clean-up 122 | if self.new_value is not None: 123 | setattr(self.obj, self.propname, self.initval) 124 | 125 | 126 | # ------------------------------------------------------------------------------ 127 | 128 | 129 | try: 130 | from collections import ChainMap 131 | except ImportError: 132 | pass 133 | ### BEGIN_PYTHON2_SUPPORT_CODE 134 | from chainmap import ChainMap 135 | ### END_PYTHON2_SUPPORT_CODE 136 | 137 | 138 | 139 | # ------------------------------------------------------------------------------ 140 | 141 | 142 | 143 | pylatexenc_deprecated_ver = lambda *args: None #lgtm [py/multiple-definition] 144 | pylatexenc_deprecated_2 = lambda *args: None #lgtm [py/multiple-definition] 145 | pylatexenc_deprecated_3 = lambda *args: None #lgtm [py/multiple-definition] 146 | LazyDict = None #lgtm [py/multiple-definition] 147 | 148 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE 149 | 150 | from ._util_support import ( # lgtm [py/unused-import] 151 | pylatexenc_deprecated_ver, 152 | pylatexenc_deprecated_2, 153 | pylatexenc_deprecated_3, 154 | # 155 | LazyDict 156 | ) 157 | 158 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE 159 | -------------------------------------------------------------------------------- /pylatexenc/_util_support.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2019 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | 33 | try: 34 | # Python >= 3.3 35 | from collections.abc import MutableMapping 36 | except ImportError: 37 | from collections import MutableMapping 38 | 39 | import warnings 40 | 41 | 42 | 43 | 44 | def pylatexenc_deprecated_ver(ver, msg, stacklevel=2): 45 | warnings.warn( 46 | "Deprecated (pylatexenc {}): {} ".format(ver, msg.strip()), 47 | DeprecationWarning, 48 | stacklevel=stacklevel+1 49 | ) 50 | 51 | 52 | def pylatexenc_deprecated_2(msg, stacklevel=2): 53 | warnings.warn( 54 | ( "Deprecated (pylatexenc 2.0): {} " 55 | "[see https://pylatexenc.readthedocs.io/en/latest/new-in-pylatexenc-2/]" ) 56 | .format(msg.strip()), 57 | DeprecationWarning, 58 | stacklevel=stacklevel+1 59 | ) 60 | 61 | def pylatexenc_deprecated_3(msg, stacklevel=2): 62 | warnings.warn( 63 | ( "Deprecated (pylatexenc 3.0): {} " 64 | "[see https://pylatexenc.readthedocs.io/en/latest/new-in-pylatexenc-3/]" ) 65 | .format(msg.strip()), 66 | DeprecationWarning, 67 | stacklevel=stacklevel+1 68 | ) 69 | 70 | 71 | 72 | # ------------------------------------------------------------------------------ 73 | 74 | 75 | class LazyDict(MutableMapping): 76 | r""" 77 | A lazy dictionary that loads its data when it is first queried. 78 | 79 | This is used to store the legacy 80 | :py:data:`pylatexenc.latexwalker.default_macro_dict` as well as 81 | :py:data:`pylatexenc.latex2text.default_macro_dict` etc. Such that these 82 | "dictionaries" are still exposed at the module-level, but the data is loaded 83 | only if they are actually queried. 84 | """ 85 | def __init__(self, generate_dict_fn): 86 | self._full_dict = None 87 | self._generate_dict_fn = generate_dict_fn 88 | 89 | def _ensure_instance(self): 90 | if self._full_dict is not None: 91 | return 92 | self._full_dict = self._generate_dict_fn() 93 | 94 | def __getitem__(self, key): 95 | self._ensure_instance() 96 | return self._full_dict.__getitem__(key) 97 | 98 | def __setitem__(self, key, val): 99 | self._ensure_instance() 100 | return self._full_dict.__setitem__(key, val) 101 | 102 | def __delitem__(self, key): 103 | self._ensure_instance() 104 | return self._full_dict.__delitem__(key) 105 | 106 | def __iter__(self): 107 | self._ensure_instance() 108 | return iter(self._full_dict) 109 | 110 | def __len__(self): 111 | self._ensure_instance() 112 | return len(self._full_dict) 113 | 114 | def copy(self): 115 | self._ensure_instance() 116 | return self._full_dict.copy() 117 | 118 | def clear(self): 119 | self._ensure_instance() 120 | return self._full_dict.clear() 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /pylatexenc/latex2text/_inputlatexfile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2021 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | import os.path 33 | 34 | import logging 35 | logger = logging.getLogger(__name__) 36 | 37 | 38 | def read_latex_file(tex_input_directory, strict_input, fn): 39 | 40 | fnfull = os.path.realpath(os.path.join(tex_input_directory, fn)) 41 | if strict_input: 42 | # make sure that the input file is strictly within dirfull, and 43 | # didn't escape with '../..' tricks or via symlinks. 44 | dirfull = os.path.realpath(tex_input_directory) 45 | if not fnfull.startswith(dirfull): 46 | logger.warning( 47 | "Can't access path '%s' leading outside of mandated directory " 48 | "[strict input mode]", 49 | fn 50 | ) 51 | return '' 52 | 53 | if not os.path.exists(fnfull) and os.path.exists(fnfull + '.tex'): 54 | fnfull = fnfull + '.tex' 55 | if not os.path.exists(fnfull) and os.path.exists(fnfull + '.latex'): 56 | fnfull = fnfull + '.latex' 57 | if not os.path.isfile(fnfull): 58 | logger.warning("Error, file doesn't exist: '%s'", fn) 59 | return '' 60 | 61 | logger.debug("Reading input file %r", fnfull) 62 | 63 | try: 64 | with open(fnfull) as f: 65 | return f.read() 66 | except IOError as e: 67 | logger.warning("Error, can't access '%s': %s", fn, e) 68 | return '' 69 | -------------------------------------------------------------------------------- /pylatexenc/latexencode/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2019 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | import sys 28 | import fileinput 29 | import argparse 30 | import logging 31 | 32 | 33 | from ..latexencode import unicode_to_latex 34 | from ..version import version_str 35 | 36 | 37 | 38 | def main(argv=None): 39 | 40 | if argv is None: 41 | argv = sys.argv[1:] 42 | 43 | parser = argparse.ArgumentParser(prog='latexencode', add_help=False) 44 | parser.add_argument('files', metavar="FILE", nargs='*', 45 | help='Input files (if none specified, read from stdandard input)') 46 | 47 | parser.add_argument('--non-ascii-only', action='store_const', const=True, 48 | dest='non_ascii_only', default=False) 49 | parser.add_argument('--no-non-ascii-only', action='store_const', const=False, 50 | dest='non_ascii_only', 51 | help="The option --non-ascii-only specifies that only non-ascii characters " 52 | "are to be encoded into LaTeX sequences, and not characters like '$' " 53 | "even though they might have a special LaTeX meaning.") 54 | 55 | parser.add_argument('--replacement-latex-protection', 56 | choices=('braces', 'braces-all', 'braces-almost-all', 'braces-after-macro', 57 | 'none'), 58 | dest='replacement_latex_protection', default='braces', 59 | help=r"How to protect replacement latex code from producing invalid latex code " 60 | r"when concatenated in a longer string. One of 'braces', 'braces-all', " 61 | r"'braces-almost-all', 'braces-after-macro', 'none'. Example: using " 62 | r"choice 'braces' we avoid the invalid replacement 'a→b' -> 'a\tob' " 63 | r"with instead 'a{\to}b'.") 64 | 65 | parser.add_argument('--unknown-char-policy', 66 | choices=('keep', 'replace', 'ignore', 'fail'), 67 | dest='unknown_char_policy', default='keep', 68 | help="How to deal with nonascii characters with no known latex code equivalent.") 69 | 70 | parser.add_argument('-q', '--quiet', dest='logging_level', action='store_const', 71 | const=logging.ERROR, default=logging.INFO, 72 | help="Suppress warning messages") 73 | parser.add_argument('--version', action='version', 74 | version='pylatexenc {}'.format(version_str), 75 | help="Show version information and exit") 76 | parser.add_argument('--help', action='help', 77 | help="Show this help information and exit") 78 | 79 | args = parser.parse_args(argv) 80 | 81 | logging.basicConfig() 82 | logging.getLogger().setLevel(args.logging_level) 83 | 84 | latex = '' 85 | for line in fileinput.input(files=args.files): 86 | latex += line 87 | 88 | result = unicode_to_latex( 89 | latex, 90 | non_ascii_only=args.non_ascii_only, 91 | replacement_latex_protection=args.replacement_latex_protection, 92 | unknown_char_policy=args.unknown_char_policy 93 | ) 94 | 95 | sys.stdout.write(result) 96 | 97 | 98 | def run_main(): 99 | try: 100 | 101 | main() 102 | 103 | except SystemExit: 104 | raise 105 | except: # lgtm [py/catch-base-exception] 106 | import pdb 107 | import traceback 108 | traceback.print_exc() 109 | pdb.post_mortem() 110 | 111 | 112 | if __name__ == '__main__': 113 | 114 | # run_main() ## DEBUG 115 | main() 116 | -------------------------------------------------------------------------------- /pylatexenc/latexencode/_partial_latex_encoder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2021 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | from __future__ import print_function, absolute_import, unicode_literals 27 | 28 | #import sys 29 | import logging 30 | 31 | logger = logging.getLogger(__name__) 32 | 33 | 34 | from ._unicode_to_latex_encoder import ( 35 | RULE_CALLABLE, 36 | UnicodeToLatexConversionRule, 37 | UnicodeToLatexEncoder 38 | ) 39 | 40 | 41 | from ..latexwalker import _walker 42 | 43 | 44 | class PartialLatexToLatexEncoder(UnicodeToLatexEncoder): 45 | r""" 46 | Encode a string while preserving some (fuzzily detected) LaTeX constructs 47 | that the input string already has (e.g. accent macros or inline math modes). 48 | 49 | Sometimes you need to fully LaTeX-encode a string that already has some 50 | LaTeX constructs. For instance, titles of bibliographic entries might 51 | include some inline math or accents, but they might also include unicode 52 | characters that need to be encoded. Using a 53 | :py:class:`UnicodeToLatexEncoder` on such strings would result in ugly 54 | doubly-escaped strings such as ``\textbackslash{}'\{e\}``. Instead, 55 | constructs such as ``\'{e}`` should be preserved while other characters 56 | and/or constructs (say '&' or '%') as well as unicode characters should be 57 | encoded. 58 | 59 | This class offers a simple partial solution: Characters are encoded as per 60 | the given `conversion_rules` (or the default conversion rules of 61 | :py:class:`UnicodeToLatexEncoder` objects), except that the characters in 62 | `keep_latex_chars` are to be interpreted as LaTeX and are not to be further 63 | encoded. 64 | 65 | .. versionadded: 2.10 66 | """ 67 | def __init__(self, 68 | # keyword arguments: 69 | keep_latex_chars=r'\${}^_', 70 | conversion_rules=None, 71 | **kwargs): 72 | 73 | base_conversion_rules = conversion_rules 74 | if base_conversion_rules is None: 75 | base_conversion_rules = ['defaults'] 76 | 77 | super(PartialLatexToLatexEncoder, self).__init__( 78 | # only a single rule, our own special method that tries to parse 79 | # partial latex. 80 | conversion_rules=[UnicodeToLatexConversionRule( 81 | rule_type=RULE_CALLABLE, 82 | rule=self._do_partial_latex_encode_step, 83 | replacement_latex_protection='none' 84 | )] + base_conversion_rules, 85 | **kwargs 86 | ) 87 | 88 | self.keep_latex_chars = keep_latex_chars 89 | 90 | 91 | def _do_partial_latex_encode_step(self, s, pos): 92 | r""" 93 | This method is used as a "callable rule" for the 94 | :py:class:`UnicodeToLatexEncoder` object. 95 | 96 | The strategy is to see if we have something that looks like a LaTeX char 97 | we want to keep. If so, keep it as is; if not, return `None` so that 98 | further rules can be considered by the base unicode encoder. 99 | """ 100 | 101 | if s[pos] in self.keep_latex_chars: 102 | # Read a token and if it is a macro, keep the full macro! 103 | lw = _walker.LatexWalker(s, tolerant_parsing=False) 104 | ps = lw.make_parsing_state() 105 | tok = lw.make_token_reader(pos=pos).peek_token(parsing_state=ps) 106 | 107 | tok_as_latex = tok.pre_space + s[tok.pos : tok.pos+tok.len] 108 | 109 | # keep the LaTeX token as-is 110 | return (tok.pos+tok.len - pos, tok_as_latex) 111 | 112 | return None 113 | -------------------------------------------------------------------------------- /pylatexenc/latexencode/get_builtin_rules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2023 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | 28 | # Internal module. Internal API may move, disappear or otherwise change at any 29 | # time and without notice. 30 | 31 | from __future__ import print_function, unicode_literals 32 | 33 | 34 | _MappingProxyType = dict 35 | #__pragma__('skip') 36 | import sys 37 | if sys.version_info.major > 2: 38 | from types import MappingProxyType as _MappingProxyType 39 | else: 40 | _MappingProxyType = dict 41 | #__pragma__('noskip') 42 | 43 | 44 | from ._rule import ( 45 | RULE_DICT, 46 | RULE_REGEX, 47 | RULE_CALLABLE, 48 | UnicodeToLatexConversionRule, 49 | ) 50 | 51 | 52 | 53 | 54 | def get_builtin_uni2latex_dict(): 55 | r""" 56 | Return a dictionary that contains the default collection of known LaTeX 57 | escape sequences for unicode characters. 58 | 59 | The keys of the dictionary are integers that correspond to unicode code 60 | points (i.e., `ord(char)`). The values are the corresponding LaTeX 61 | replacement strings. 62 | 63 | The returned dictionary may not be modified. To alter the behavior of 64 | :py:func:`unicode_to_latex()`, you should specify custom rules to a new 65 | instance of :py:class:`UnicodeToLatexEncoder`. 66 | 67 | .. versionadded:: 2.0 68 | 69 | This function was introduced in `pylatexenc 2.0`. 70 | """ 71 | 72 | from ._uni2latexmap import uni2latex as _uni2latex 73 | return _MappingProxyType(_uni2latex) 74 | 75 | 76 | def get_builtin_conversion_rules(builtin_name): 77 | r""" 78 | Return a built-in set of conversion rules specified by a given name 79 | `builtin_name`. 80 | 81 | There are two builtin conversion rules, with the following names: 82 | 83 | - `'defaults'`: the default conversion rules, a custom-curated list of 84 | unicode chars to LaTeX escapes. 85 | 86 | - `'unicode-xml'`: the conversion rules derived from the `unicode.xml` file 87 | maintained at https://www.w3.org/TR/xml-entity-names/#source by David 88 | Carlisle. 89 | 90 | The return value is a list of :py:class:`UnicodeToLatexConversionRule` 91 | objects that can be either directly specified to the `conversion_rules=` 92 | argument of :py:class:`UnicodeToLatexEncoder`, or included in a larger list 93 | that can be provided to that argument. 94 | 95 | .. versionadded:: 2.0 96 | 97 | This function was introduced in `pylatexenc 2.0`. 98 | """ 99 | if builtin_name == 'defaults': 100 | return [ UnicodeToLatexConversionRule(rule_type=RULE_DICT, 101 | rule=get_builtin_uni2latex_dict()) ] 102 | 103 | if builtin_name == 'unicode-xml': 104 | from . import _uni2latexmap_xml 105 | return [ UnicodeToLatexConversionRule(rule_type=RULE_DICT, 106 | rule=_uni2latexmap_xml.uni2latex) ] 107 | 108 | raise ValueError("Unknown builtin rule set: {}".format(builtin_name)) 109 | 110 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | r""" 27 | .. versionadded:: 3.0 28 | 29 | The `latexnodes` module was introduced in `pylatexenc` 3. 30 | 31 | """ 32 | 33 | 34 | from ._exctypes import * 35 | 36 | from ._token import LatexToken 37 | 38 | from ._nodescollector import ( 39 | LatexNodesCollector 40 | ) 41 | 42 | from ._parsingstate import ( 43 | ParsingState 44 | ) 45 | 46 | from ._parsingstatedelta import ( 47 | ParsingStateDelta, 48 | ParsingStateDeltaReplaceParsingState, 49 | ParsingStateDeltaChained, 50 | ParsingStateDeltaWalkerEvent, 51 | ParsingStateDeltaEnterMathMode, 52 | ParsingStateDeltaLeaveMathMode, 53 | get_updated_parsing_state_from_delta, 54 | ) 55 | from ._parsedargs import ( 56 | LatexArgumentSpec, 57 | ParsedArguments, 58 | ) 59 | 60 | from ._tokenreaderbase import ( 61 | LatexTokenReaderBase, 62 | LatexTokenListTokenReader, 63 | ) 64 | from ._tokenreader import ( 65 | LatexTokenReader, 66 | ) 67 | 68 | from ._callablespecbase import ( 69 | CallableSpecBase 70 | ) 71 | 72 | from ._walkerbase import ( 73 | LatexWalkerParsingStateEventHandler, 74 | LatexWalkerBase, 75 | ) 76 | 77 | from ._latexcontextdbbase import ( 78 | LatexContextDbBase 79 | ) 80 | 81 | from ._parsedargsinfo import ( 82 | ParsedArgumentsInfo, 83 | SingleParsedArgumentInfo, 84 | ) 85 | 86 | from ._latex_recomposer import ( 87 | LatexNodesLatexRecomposer 88 | ) 89 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/_callablespecbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | 33 | 34 | class CallableSpecBase(object): 35 | r""" 36 | The base class for macro, environment, and specials spec classes (see the 37 | :py:mod:`pylatexenc.macrospec` module). 38 | 39 | As far as this :py:mod:`latexnodes` module's classes are concerned, a spec 40 | object is simply something that can provide a parser to parse the given 41 | construct (macro, environment, or specials). 42 | 43 | The spec object should implement :py:meth:`get_node_parser()`, and it should 44 | return a parser instance that can be used to parse the entire construct. 45 | 46 | See :py:class:`macrospec.MacroSpec` for how this is implemented in the 47 | :py:mod:`pylatexenc.macrospec` module. 48 | 49 | .. versionadded:: 3.0 50 | 51 | The :py:class:`CallableSpecBase` class was added in `pylatexenc 3.0`. 52 | """ 53 | 54 | def get_node_parser(self, token): 55 | raise RuntimeError("Subclasses must reimplement get_node_parser()") 56 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/_latexcontextdbbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | 33 | class LatexContextDbBase(object): 34 | r""" 35 | Base class for a parsing state's LaTeX context database. 36 | 37 | A full implementation of how to specify macro, environment, and specials 38 | definitions are actually in the :py:mod:`pylatexenc.macrospec` module. As 39 | far as this :py:mod:`latexnodes` is concerned, a latex context database 40 | object is simply an object that provides the :py:meth:`get_***_spec()` 41 | family of methods along with :py:meth:`test_for_specials()`, and they return 42 | relevant spec objects. 43 | 44 | The spec objects returned by :py:meth:`get_***_spec()` and 45 | :py:meth:`test_for_specials()` are subclasses of 46 | :py:class:`CallableSpecBase`. 47 | 48 | 49 | .. versionadded:: 3.0 50 | 51 | The :py:class:`LatexContextDbBase` class was added in `pylatexenc 3.0`. 52 | """ 53 | 54 | def get_macro_spec(self, macroname): 55 | r""" 56 | Return the macro spec to use to parse a macro named `macroname`. The 57 | `macroname` does not contain the escape character (``\``) itself. 58 | 59 | This method should return the relevant spec object, which should be an 60 | instance of a subclass of :py:class:`CallableSpecBase`. 61 | 62 | The latex context database object may choose to provide a default spec 63 | object if `macroname` wasn't formally defined. As far as the parsers 64 | are concerned, if `get_macro_spec()` returns a spec object, then the 65 | parsers know how to parse the given macro and will happily proceed. 66 | 67 | If a macro of name `macroname` should not be considered as defined, and 68 | the parser should not attempt to parse a macro and raise an error 69 | instead (or recover from it in tolerant parsing mode), then this method 70 | should return `None`. 71 | """ 72 | return None 73 | 74 | def get_environment_spec(self, environmentname): 75 | r""" 76 | Like :py:meth:`get_macro_spec()`, but for environments. The 77 | `environmentname` is the name of the environment specified between the 78 | curly braces after the ``\begin`` call. 79 | 80 | This method should return the relevant spec object, which should be an 81 | instance of a subclass of :py:class:`CallableSpecBase`. 82 | 83 | The latex context database object may choose to provide a default spec 84 | object if an environment named `environmentname` wasn't somehow formally 85 | defined. As far as the parsers are concerned, if 86 | `get_environment_spec()` returns a spec object, then the parsers know 87 | how to parse the given environment and will happily proceed. 88 | 89 | If an environment of name `environmentname` should not be considered as 90 | defined, and the parser should not attempt to parse the environment and 91 | raise an error instead (or recover from it in tolerant parsing mode), 92 | then this method should return `None`. 93 | """ 94 | return None 95 | 96 | def get_specials_spec(self, specials_chars): 97 | r""" 98 | Like :py:meth:`get_macro_spec()`, but for specials. The `specials_chars` is 99 | the sequence of characters for which we'd like to find if they are a 100 | specials construct. 101 | 102 | Parsing of specials is different from macros and environments, because 103 | there is no universal syntax that distinguishes them (macros and 104 | environments are always initiated with the escape character ``\``). So 105 | the token reader will call :py:meth:`test_for_specials()` to see if the 106 | string at the given position can be matched for specials. 107 | 108 | The result is that :py:meth:`get_specials_spec()` usually doesn't get 109 | called when parsing tokens. The :py:meth:`get_specials_spec()` method 110 | is only called in certain specific situations, such as to get the spec 111 | object associated with the new paragraph token ``\n\n``. 112 | 113 | This method should return the relevant spec object, which should be an 114 | instance of a subclass of :py:class:`CallableSpecBase`, or `None` if 115 | these characters are not to be considered as specials. 116 | """ 117 | return None 118 | 119 | def test_for_specials(self, s, pos, parsing_state): 120 | r""" 121 | Test the string `s` at position `pos` for the presence of specials. 122 | 123 | For instance, if the parser tests the string ``"Eq.~\eqref{eq:xyz}"`` at 124 | position 3, then the latex context database might want to report the 125 | character ``~`` as a specials construct and return a specials spec for 126 | it. 127 | 128 | If specials characters are recognized, then this method should return a 129 | corresponding spec object. The spec object should be an instance of a 130 | :py:class:`CallableSpecBase` subclass. In addition, the returned spec 131 | object must expose the attribute :py:attr:`specials_chars`. That 132 | attribute should contain the sequence of characters that were recognized 133 | as special. 134 | 135 | If no specials characters are recongized at exactly the position `pos`, 136 | then this method should return `None`. 137 | """ 138 | return None 139 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/_walkerbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2021 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | from ._parsingstatedelta import ParsingStateDelta 33 | 34 | 35 | 36 | class LatexWalkerParsingStateEventHandler(object): 37 | r""" 38 | A LatexWalker parsing state event handler. 39 | 40 | The LatexWalker instance will call methods on this object to determine how 41 | to update the parsing state upon certain events, such as entering or exiting 42 | math mode. 43 | 44 | Events: 45 | 46 | - enter math mode 47 | 48 | - exit math mode 49 | 50 | .. versionadded:: 3.0 51 | 52 | The :py:class:`LatexWalkerParsingStateEventHandler` class was added in 53 | `pylatexenc 3.0`. 54 | """ 55 | 56 | def enter_math_mode(self, math_mode_delimiter=None, trigger_token=None): 57 | return ParsingStateDelta( 58 | set_attributes=dict( 59 | in_math_mode=True, 60 | math_mode_delimiter=math_mode_delimiter 61 | ) 62 | ) 63 | 64 | def leave_math_mode(self, trigger_token=None): 65 | return ParsingStateDelta( 66 | set_attributes=dict( 67 | in_math_mode=False, 68 | math_mode_delimiter=None 69 | ) 70 | ) 71 | 72 | 73 | _default_parsing_state_event_handler = LatexWalkerParsingStateEventHandler() 74 | 75 | 76 | class LatexWalkerBase(object): 77 | r""" 78 | Base class for a latex-walker. Essentially, this is all that the 79 | classes and methods in the :py:mod:`latexnodes` module need to know about 80 | what a LatexWalker does. 81 | 82 | See also :py:class:`latexwalker.LatexWalker`. 83 | 84 | .. versionadded:: 3.0 85 | 86 | The :py:class:`LatexWalkerBase` class was added in `pylatexenc 3.0`. 87 | """ 88 | 89 | def parsing_state_event_handler(self): 90 | r""" 91 | Doc...... 92 | """ 93 | return _default_parsing_state_event_handler 94 | 95 | def parse_content(self, parser, token_reader=None, parsing_state=None, 96 | open_context=None, **kwargs): 97 | r""" 98 | Doc...... 99 | """ 100 | raise RuntimeError("LatexWalkerBase subclasses must reimplement parse_content()") 101 | 102 | def make_node(self, node_class, **kwargs): 103 | r""" 104 | Doc...... 105 | """ 106 | raise RuntimeError("LatexWalkerBase subclasses must reimplement make_node()") 107 | 108 | def make_nodelist(self, nodelist, **kwargs): 109 | r""" 110 | Doc...... 111 | """ 112 | raise RuntimeError("LatexWalkerBase subclasses must reimplement make_nodelist()") 113 | 114 | def make_nodes_collector(self, 115 | token_reader, 116 | parsing_state, 117 | **kwargs): 118 | r""" 119 | Doc...... 120 | """ 121 | raise RuntimeError( 122 | "LatexWalkerBase subclasses must reimplement make_nodes_collector()") 123 | 124 | def make_latex_group_parser(self, delimiters): 125 | r""" 126 | Doc...... 127 | """ 128 | raise RuntimeError( 129 | "LatexWalkerBase subclasses must reimplement make_latex_group_parser()") 130 | 131 | def make_latex_math_parser(self, math_mode_delimiters): 132 | r""" 133 | Doc...... 134 | """ 135 | raise RuntimeError( 136 | "LatexWalkerBase subclasses must reimplement make_latex_math_parser()") 137 | 138 | 139 | def check_tolerant_parsing_ignore_error(self, exc): 140 | r""" 141 | You can inspect the exception object `exc` and decide whether or not to 142 | attempt to recover from the exception (if you want to be tolerant to 143 | parsing errors). 144 | 145 | Return the exception object if it should be raised, or return None if 146 | recovery should be attempted. 147 | """ 148 | return exc 149 | 150 | def format_node_pos(self, node): 151 | r""" 152 | Doc...... 153 | """ 154 | return 'character position '+repr(node.pos) 155 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | r""" 27 | Collection of Parser objects that can parse specific types of LaTeX 28 | constructs. 29 | """ 30 | 31 | from ._base import LatexParserBase 32 | 33 | from ._generalnodes import ( 34 | LatexGeneralNodesParser, 35 | LatexSingleNodeParser, 36 | ) 37 | from ._delimited import ( 38 | LatexDelimitedExpressionParserInfo, 39 | LatexDelimitedExpressionParser, 40 | LatexDelimitedGroupParserInfo, 41 | LatexDelimitedGroupParser, 42 | LatexDelimitedMultiDelimGroupParserInfo, 43 | LatexDelimitedMultiDelimGroupParser, 44 | LatexDelimitedExpressionParserOpeningDelimiterNotFound, 45 | ) 46 | from ._math import ( 47 | LatexMathParser, 48 | ) 49 | 50 | from ._expression import ( 51 | LatexExpressionParser, 52 | ) 53 | 54 | from ._optionals import ( 55 | LatexOptionalSquareBracketsParser, 56 | LatexOptionalCharsMarkerParser, 57 | ) 58 | 59 | from ._stdarg import ( 60 | LatexStandardArgumentParser, 61 | get_standard_argument_parser, 62 | LatexCharsCommaSeparatedListParser, 63 | LatexCharsGroupParser, 64 | LatexTackOnInformationFieldMacrosParser, 65 | ) 66 | 67 | 68 | from ._verbatim import ( 69 | LatexVerbatimBaseParser, 70 | LatexDelimitedVerbatimParser, 71 | LatexVerbatimEnvironmentContentsParser, 72 | ) 73 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/parsers/_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | 33 | 34 | 35 | 36 | # ------------------------------------------------------------------------------ 37 | 38 | class LatexParserBase(object): 39 | r""" 40 | The base class for :py:mod:`pylatexenc.latexnodes.parsers` parsers. 41 | 42 | Parsers are objects that are designed to parse a specific type of latex 43 | construct, such as content enclosed in curly braces, into a node tree. 44 | 45 | When invoked, parse objects return a tuple `(nodes, parsing_state_delta)`. 46 | The first element, `nodes`, is the result nodes. It is usually a 47 | :py:class:`~pylatexenc.latexnodes.LatexNodeList` instance, but it can also 48 | be a specific node instance, or another related object like a 49 | :py:class:`~pylatexenc.latexnodes.ParsedArguments` instance. The second 50 | element, `parsing_state_delta`, encode any changes in the parsing state that 51 | should be caused by parsing the given construct. The `parsing_state_delta` 52 | should be either `None` (no parsing state changes) or a 53 | :py:class:`~pylatexenc.latexnodes.ParsingStateDelta` instance. For 54 | instance, if the parser encountered a ``\newcommand`` it can relay the 55 | corresponding state change through the `parsing_state_delta` object. 56 | 57 | The main functionality of the parser is implemented in the 58 | :py:meth:`parse()` method. 59 | 60 | Parser objects should be invoked via the latex walker instance, using 61 | `LatexWalker.parse_content()` (see :py:class:`LatexWalkerBase` and 62 | :py:class:`pylatexenc.latexwalker.LatexWalker`): 63 | 64 | .. code:: 65 | 66 | my_latex_walker = LatexWalker(....) 67 | my_parser = .... # some LatexParserBase subclass 68 | 69 | token_reader = my_latex_walker.make_token_reader() 70 | parsing_state = my_latex_walker.make_parsing_state() 71 | 72 | # parse that specific construct: 73 | nodes, parsing_state_delta = my_latex_walker.parse_content( 74 | my_parser, 75 | token_reader, 76 | parsing_state 77 | ) 78 | """ 79 | def __init__(self): 80 | super(LatexParserBase, self).__init__() 81 | 82 | def parse(self, latex_walker, token_reader, parsing_state, **kwargs): 83 | r""" 84 | The main functionality of the parser is implemented in this method. 85 | 86 | Parser objects should not be called directly, but rather be invoked via 87 | the latex walker instance, using `LatexWalker.parse_content()`. (See 88 | class doc above.) 89 | 90 | Subclasses should implement this method to construct the relevant node 91 | tree by reading tokens from the `token_reader` (use 92 | `token_reader.next_token()` and friends, see 93 | :py:class:`~pylatexenc.latexnodes.LatexTokenReaderBase`) 94 | 95 | Subclasses should return a tuple pair `(nodes, parsing_state_delta)`. 96 | 97 | The `nodes` is the node list, node, or object that resulted from the 98 | parsing. 99 | 100 | The `parsing_state_delta` encodes any parsing state changes that 101 | resulted during the parsing of this construct. If there are no parsing 102 | state changes, `parsing_state_delta` can be set to `None`. 103 | """ 104 | raise RuntimeError("LatexParserBase subclasses must reimplement parse()") 105 | 106 | 107 | def contents_can_be_empty(self): 108 | r""" 109 | If absorbing no tokens is a valid option for the thing this object is meant 110 | to parse, then we should return `True` here. This would be the case, 111 | for instance, for group contents, for optional arguments, etc. But a 112 | parser for a mandatory argument would return `False` here. 113 | 114 | This is used in certain special situations, for instance if a closing 115 | brace is immediately encountered after a macro that expected an argument 116 | (say ``\mymacro}`` --- it's an error if ``\mymacro`` requires a 117 | mandatory argument but it's ok if it accepts an optional argument). In 118 | this case, we need to check all the macro arguments' parser to see if it 119 | is okay that they have no contents. 120 | """ 121 | return True 122 | 123 | 124 | def __repr__(self): 125 | return "<{}>".format(self.__class__.__name__) 126 | -------------------------------------------------------------------------------- /pylatexenc/latexnodes/parsers/_math.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | import logging 33 | logger = logging.getLogger(__name__) 34 | 35 | from .._exctypes import * 36 | from .. import nodes 37 | from .._parsingstatedelta import ( 38 | ParsingStateDeltaEnterMathMode, 39 | get_updated_parsing_state_from_delta, 40 | ) 41 | 42 | from ._delimited import ( 43 | LatexDelimitedExpressionParserInfo, 44 | LatexDelimitedExpressionParser, 45 | ) 46 | 47 | 48 | 49 | # for Py3 50 | _basestring = str 51 | 52 | ### BEGIN_PYTHON2_SUPPORT_CODE 53 | import sys 54 | if sys.version_info.major == 2: 55 | _basestring = basestring 56 | ### END_PYTHON2_SUPPORT_CODE 57 | 58 | 59 | 60 | 61 | 62 | class LatexMathParserInfo(LatexDelimitedExpressionParserInfo): 63 | r""" 64 | Reimplementation of the :py:class:`LatexDelimitedExpressionParserInfo` class 65 | for math environments, for :py:class:`LatexMathParser`. 66 | """ 67 | 68 | @classmethod 69 | def is_opening_delimiter(cls, delimiters, first_token, group_parsing_state, 70 | delimited_expression_parser, latex_walker, **kwargs): 71 | 72 | if first_token.tok not in ('mathmode_inline', 'mathmode_display'): 73 | return False 74 | 75 | if not cls.check_opening_delimiter( 76 | delimiters=delimiters, 77 | parsed_opening_delimiter=first_token.arg, 78 | latex_walker=latex_walker 79 | ): 80 | return False 81 | 82 | return True 83 | 84 | @classmethod 85 | def get_acceptable_open_delimiter_list(cls, delimiters, group_parsing_state, 86 | delimited_expression_parser, latex_walker, 87 | **kwargs): 88 | if delimiters is not None: 89 | if isinstance(delimiters, _basestring): 90 | return [delimiters] 91 | else: 92 | return [delimiters[0]] 93 | 94 | return [ 95 | od 96 | for (od, cd) in ( 97 | group_parsing_state.latex_inline_math_delimiters 98 | + group_parsing_state.latex_display_math_delimiters 99 | ) 100 | ] 101 | 102 | 103 | # --- 104 | 105 | def initialize(self): 106 | # set up all the relevant fields manually: 107 | 108 | self.math_mode_type = self.first_token.tok 109 | self.math_mode_delimiter = self.first_token.arg 110 | 111 | # enter math mode ! 112 | self.math_parsing_state = get_updated_parsing_state_from_delta( 113 | self.parsing_state, 114 | ParsingStateDeltaEnterMathMode( 115 | math_mode_delimiter=self.math_mode_delimiter, 116 | trigger_token=self.first_token 117 | ), 118 | self.latex_walker, 119 | ) 120 | 121 | self.contents_parsing_state = self.math_parsing_state 122 | self.parsed_delimiters = self.get_parsed_delimiters() 123 | 124 | def stop_token_condition(self, token): 125 | if token.tok == self.math_mode_type and token.arg == self.parsed_delimiters[1]: 126 | return True 127 | return False 128 | 129 | def get_matching_delimiter(self, opening_delimiter): 130 | return self.math_parsing_state._math_expecting_close_delim_info['close_delim'] 131 | 132 | 133 | def make_group_node_and_parsing_state_delta(self, latex_walker, token_reader, 134 | nodelist, parsing_state_delta): 135 | 136 | # As for the delimited group parser, use cur_pos() so that it includes 137 | # the closing math mode delimiter. 138 | pos_end = token_reader.cur_pos() 139 | 140 | # note that nodelist can be None in case of a parse error 141 | 142 | if self.math_mode_type == 'mathmode_inline': 143 | displaytype = 'inline' 144 | elif self.math_mode_type == 'mathmode_display': 145 | displaytype = 'display' 146 | else: 147 | displaytype = '' 148 | 149 | math_node = latex_walker.make_node( 150 | nodes.LatexMathNode, 151 | displaytype=displaytype, 152 | nodelist=nodelist, 153 | parsing_state=self.parsing_state, 154 | delimiters=self.parsed_delimiters, 155 | pos=self.first_token.pos, 156 | pos_end=pos_end, 157 | ) 158 | 159 | return math_node, parsing_state_delta 160 | 161 | 162 | # ------------------------------------------------------------------------------ 163 | 164 | class LatexMathParser(LatexDelimitedExpressionParser): 165 | def __init__(self, 166 | math_mode_delimiters, 167 | **kwargs): 168 | super(LatexMathParser, self).__init__( 169 | delimiters=math_mode_delimiters, 170 | discard_parsing_state_delta=False, 171 | delimited_expression_parser_info_class=LatexMathParserInfo, 172 | **kwargs 173 | ) 174 | -------------------------------------------------------------------------------- /pylatexenc/latexwalker/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2018 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | r''' 27 | The ``latexwalker`` module provides a simple API for parsing LaTeX snippets, 28 | and representing the contents using a data structure based on node classes. 29 | 30 | LatexWalker will understand the syntax of most common macros. However, 31 | ``latexwalker`` is NOT a replacement for a full LaTeX engine. (Originally, 32 | ``latexwalker`` was designed to extract useful text for indexing for text 33 | database searches of LaTeX content.) 34 | 35 | Simple example usage:: 36 | 37 | >>> from pylatexenc.latexwalker import LatexWalker, LatexEnvironmentNode 38 | >>> w = LatexWalker(r""" 39 | ... \textbf{Hi there!} Here is \emph{a list}: 40 | ... \begin{enumerate}[label=(i)] 41 | ... \item One 42 | ... \item Two 43 | ... \end{enumerate} 44 | ... and $x$ is a variable. 45 | ... """) 46 | >>> (nodelist, pos, len_) = w.get_latex_nodes(pos=0) 47 | >>> nodelist[0] 48 | LatexCharsNode(pos=0, len=1, chars='\n') 49 | >>> nodelist[1] 50 | LatexMacroNode(pos=1, len=18, macroname='textbf', 51 | nodeargd=ParsedMacroArgs(argnlist=[LatexGroupNode(pos=8, len=11, 52 | nodelist=[LatexCharsNode(pos=9, len=9, chars='Hi there!')], 53 | delimiters=('{', '}'))], argspec='{'), macro_post_space='') 54 | >>> nodelist[5].isNodeType(LatexEnvironmentNode) 55 | True 56 | >>> nodelist[5].environmentname 57 | 'enumerate' 58 | >>> nodelist[5].nodeargd.argspec 59 | '[' 60 | >>> nodelist[5].nodeargd.argnlist 61 | [LatexGroupNode(pos=60, len=11, nodelist=[LatexCharsNode(pos=61, len=9, 62 | chars='label=(i)')], delimiters=('[', ']'))] 63 | >>> nodelist[7].latex_verbatim() 64 | '$x$' 65 | 66 | You can also use `latexwalker` directly in command-line, producing JSON or a 67 | human-readable node tree:: 68 | 69 | $ echo '\textit{italic} text' | latexwalker --output-format=json 70 | { 71 | "nodelist": [ 72 | { 73 | "nodetype": "LatexMacroNode", 74 | "pos": 0, 75 | "len": 15, 76 | "macroname": "textit", 77 | [...] 78 | 79 | $ latexwalker --help 80 | [...] 81 | 82 | The parser can be influenced by specifying a collection of known macros and 83 | environments (the "latex context") that are specified using 84 | :py:class:`pylatexenc.macrospec.MacroSpec` and 85 | :py:class:`pylatexenc.macrospec.EnvironmentSpec` objects in a 86 | :py:class:`pylatexenc.macrospec.LatexContextDb` object. See the doc of the 87 | module :py:mod:`pylatexenc.macrospec` for more information. 88 | ''' 89 | 90 | from __future__ import print_function, unicode_literals 91 | 92 | 93 | import logging 94 | logger = logging.getLogger(__name__) 95 | 96 | 97 | 98 | from .. import macrospec 99 | 100 | 101 | # ------------------------------------------------------------------------------ 102 | 103 | 104 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE 105 | from ..latexnodes._exctypes import * 106 | from ..latexnodes.nodes import * 107 | from ..latexnodes._token import LatexToken 108 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE 109 | 110 | 111 | 112 | from ..latexnodes import ParsingState 113 | 114 | from ._walker import LatexWalker 115 | 116 | 117 | ### BEGIN_PYLATEXENC_GET_DEFAULT_SPECS_FN 118 | from ._get_defaultspecs import get_default_latex_context_db 119 | ### END_PYLATEXENC_GET_DEFAULT_SPECS_FN 120 | 121 | 122 | ### BEGIN_PYLATEXENC1_LEGACY_SUPPORT_CODE 123 | from ._legacy_py1x import ( 124 | MacrosDef, 125 | default_macro_dict, 126 | get_token, 127 | get_latex_expression, 128 | get_latex_maybe_optional_arg, 129 | get_latex_braced_group, 130 | get_latex_environment, 131 | get_latex_nodes, 132 | ) 133 | ### END_PYLATEXENC1_LEGACY_SUPPORT_CODE 134 | 135 | 136 | 137 | ### BEGIN_LATEXWALKER_HELPERS 138 | from ._helpers import ( 139 | nodelist_to_latex, 140 | put_in_braces, 141 | disp_node, 142 | make_json_encoder, 143 | ) 144 | ### END_LATEXWALKER_HELPERS 145 | -------------------------------------------------------------------------------- /pylatexenc/latexwalker/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2018 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | import sys 27 | import fileinput 28 | import argparse 29 | import json 30 | import logging 31 | 32 | 33 | from ..latexwalker import LatexWalker, disp_node, make_json_encoder 34 | from ..version import version_str 35 | 36 | 37 | 38 | def main(argv=None): 39 | 40 | if argv is None: 41 | argv = sys.argv[1:] 42 | 43 | parser = argparse.ArgumentParser(prog='latexwalker', add_help=False) 44 | 45 | parser.add_argument('--output-format', metavar="FORMAT", dest="output_format", 46 | choices=["human", "json"], default='human', 47 | help='Requested output format for the node tree ("human" or "json")') 48 | parser.add_argument('--json-indent', metavar="NUMSPACES", dest="json_indent", 49 | type=int, default=2, 50 | help='Indentation in JSON output (specify number of spaces ' 51 | 'per indentation level)') 52 | parser.add_argument('--json-compact', dest="json_indent", action='store_const', const=None, 53 | help='Output compact JSON') 54 | 55 | parser.add_argument('--keep-inline-math', action='store_const', const=True, 56 | dest='keep_inline_math', default=True, 57 | help=argparse.SUPPRESS) 58 | parser.add_argument('--no-keep-inline-math', action='store_const', const=False, 59 | dest='keep_inline_math', 60 | help=argparse.SUPPRESS) 61 | 62 | parser.add_argument('--tolerant-parsing', action='store_const', const=True, 63 | dest='tolerant_parsing', default=True) 64 | parser.add_argument('--no-tolerant-parsing', action='store_const', const=False, 65 | dest='tolerant_parsing', 66 | help="Tolerate syntax errors when parsing, and attempt " 67 | "to continue (default yes)") 68 | 69 | # I'm not sure this flag is useful and if it should be exposed at all. 70 | # Accept it, but make it hidden. 71 | parser.add_argument('--strict-braces', action='store_const', const=True, 72 | dest='strict_braces', default=False, 73 | help=argparse.SUPPRESS) 74 | parser.add_argument('--no-strict-braces', action='store_const', const=False, 75 | dest='strict_braces', 76 | #help="Report errors for mismatching LaTeX braces (default no)" 77 | help=argparse.SUPPRESS) 78 | 79 | parser.add_argument('-q', '--quiet', dest='logging_level', action='store_const', 80 | const=logging.ERROR, default=logging.INFO, 81 | help="Suppress warning messages") 82 | parser.add_argument('-v', '--verbose', dest='logging_level', action='store_const', 83 | const=logging.DEBUG, 84 | help="Verbose output") 85 | parser.add_argument('--version', action='version', 86 | version='pylatexenc {}'.format(version_str), 87 | help="Show version information and exit") 88 | parser.add_argument('--help', action='help', 89 | help="Show this help information and exit") 90 | 91 | 92 | parser.add_argument('--code', '-c', action='store', default=None, metavar="LATEX_CODE", 93 | help="Convert the given LATEX_CODE to unicode text instead of reading " 94 | "from FILE or standard input. You cannot specify FILEs if you use this " 95 | "option, and any standard input is ignored.") 96 | 97 | parser.add_argument('files', metavar="FILE", nargs='*', 98 | help='Input files (if none specified, read from stdandard input)') 99 | 100 | args = parser.parse_args(argv) 101 | 102 | logging.basicConfig() 103 | logging.getLogger().setLevel(args.logging_level) 104 | logger = logging.getLogger(__name__) 105 | 106 | latex = '' 107 | if args.code: 108 | if args.files: 109 | logger.error("Cannot specify both FILEs and --code option. " 110 | "Use --help option for more information.") 111 | sys.exit(1) 112 | latex = args.code 113 | else: 114 | for line in fileinput.input(files=args.files): 115 | latex += line 116 | 117 | latexwalker = LatexWalker(latex, 118 | tolerant_parsing=args.tolerant_parsing, 119 | strict_braces=args.strict_braces) 120 | 121 | (nodelist, pos, len_) = latexwalker.get_latex_nodes() 122 | 123 | 124 | if args.output_format == 'human': 125 | print('\n--- NODES ---\n') 126 | for n in nodelist: 127 | disp_node(n) 128 | print('\n-------------\n') 129 | return 130 | 131 | if args.output_format == 'json': 132 | json.dump({ 'nodelist': nodelist, }, 133 | sys.stdout, 134 | cls=make_json_encoder(latexwalker), 135 | indent=args.json_indent) 136 | sys.stdout.write("\n") 137 | return 138 | 139 | raise ValueError("Invalid output format: "+args.output_format) 140 | 141 | 142 | 143 | def run_main(): 144 | 145 | try: 146 | 147 | main() 148 | 149 | except SystemExit: 150 | raise 151 | except: # lgtm [py/catch-base-exception] 152 | import pdb 153 | import traceback 154 | traceback.print_exc() 155 | pdb.post_mortem() 156 | 157 | 158 | 159 | if __name__ == '__main__': 160 | 161 | run_main() # debug 162 | #main() 163 | -------------------------------------------------------------------------------- /pylatexenc/latexwalker/_get_defaultspecs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2019 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | from __future__ import print_function, unicode_literals 31 | 32 | 33 | # don't define this function in the `_defaultspecs.py` source file because we 34 | # would like to be able to define this function without having to actually load 35 | # the entire default specs module. 36 | 37 | def get_default_latex_context_db(): 38 | r""" 39 | Return a :py:class:`pylatexenc.macrospec.LatexContextDb` instance 40 | initialized with a collection of known macros and environments. 41 | 42 | TODO: document categories. 43 | 44 | If you want to add your own definitions, you should use the 45 | :py:meth:`pylatexenc.macrospec.LatexContextDb.add_context_category()` 46 | method. If you would like to override some definitions, use that method 47 | with the argument `prepend=True`. See docs for 48 | :py:meth:`pylatexenc.macrospec.LatexContextDb.add_context_category()`. 49 | 50 | If there are too many macro/environment definitions, or if there are some 51 | irrelevant ones, you can always filter the returned database using 52 | :py:meth:`pylatexenc.macrospec.LatexContextDb.filter_context()`. 53 | 54 | .. versionadded:: 2.0 55 | 56 | The :py:class:`pylatexenc.macrospec.LatexContextDb` class as well as this 57 | method, were all introduced in `pylatexenc 2.0`. 58 | """ 59 | 60 | from .. import macrospec 61 | from ._defaultspecs import specs 62 | 63 | db = macrospec.LatexContextDb() 64 | 65 | for cat, catspecs in specs: 66 | db.add_context_category( 67 | cat, 68 | macros=catspecs['macros'], 69 | environments=catspecs['environments'], 70 | specials=catspecs['specials'] 71 | ) 72 | 73 | db.set_unknown_macro_spec(macrospec.MacroSpec('')) 74 | db.set_unknown_environment_spec(macrospec.EnvironmentSpec('')) 75 | 76 | return db 77 | 78 | # 79 | -------------------------------------------------------------------------------- /pylatexenc/macrospec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2022 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | r""" 27 | Provides classes and helper functions to describe a LaTeX context of known 28 | macros and environments, specifying how they should be parsed by 29 | :py:mod:`pylatexenc.latexwalker`. 30 | 31 | .. versionadded:: 2.0 32 | 33 | The entire module :py:mod:`pylatexenc.macrospec` was introduced in 34 | `pylatexenc 2.0`. 35 | """ 36 | 37 | 38 | from ._specclasses import ( 39 | CallableSpec, 40 | MacroSpec, 41 | EnvironmentSpec, 42 | SpecialsSpec, 43 | ) 44 | 45 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE 46 | from ._spechelpers import std_macro, std_environment, std_specials 47 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE 48 | 49 | 50 | from ._latexcontextdb import ( 51 | LatexContextDb, 52 | ParsingStateDeltaExtendLatexContextDb, 53 | ) 54 | 55 | from ._argumentsparser import ( 56 | LatexArgumentsParser, 57 | LatexNoArgumentsParser, 58 | ) 59 | 60 | from ._environmentbodyparser import ( 61 | LatexEnvironmentBodyContentsParserInfo, 62 | LatexEnvironmentBodyContentsParser 63 | ) 64 | 65 | from ._macrocallparser import ( 66 | LatexMacroCallParser, 67 | LatexEnvironmentCallParser, 68 | LatexSpecialsCallParser 69 | ) 70 | 71 | 72 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE 73 | from ..latexnodes import ParsedArguments as ParsedMacroArgs 74 | from ._pyltxenc2_argparsers import ( 75 | MacroStandardArgsParser, 76 | ParsedVerbatimArgs, 77 | VerbatimArgsParser, 78 | ParsedLstListingArgs, 79 | LstListingArgsParser, 80 | ) 81 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE 82 | 83 | -------------------------------------------------------------------------------- /pylatexenc/macrospec/_pyltxenc2_argparsers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2021 Philippe Faist 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | # 25 | 26 | 27 | # Internal module. Internal API may move, disappear or otherwise change at any 28 | # time and without notice. 29 | 30 | 31 | 32 | from ._base import ( 33 | MacroStandardArgsParser 34 | ) 35 | 36 | 37 | # ------------------------------------------------------------------------------ 38 | 39 | 40 | from ._verbatimargsparser import ( 41 | ParsedVerbatimArgs, 42 | VerbatimArgsParser, 43 | ParsedLstListingArgs, 44 | LstListingArgsParser, 45 | ) 46 | -------------------------------------------------------------------------------- /pylatexenc/version.py: -------------------------------------------------------------------------------- 1 | # 2 | # The MIT License (MIT) 3 | # 4 | # Copyright (c) 2021 Philippe Faist 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | # 24 | 25 | 26 | # 27 | # Self-note: Checklist --- NOTE THESE ARE OLD INSTRUCTIONS DATING FROM 2.X VERSIONS 28 | # 29 | # 1) First some checks: 30 | # 31 | # - Set below in this file ' version_str = "X.Xb" ' (beta version for next 32 | # release) for the following tests. 33 | # 34 | # - tests pass: https://travis-ci.org/github/phfaist/pylatexenc 35 | # 36 | # - LGTM looks good: https://lgtm.com/projects/g/phfaist/pylatexenc/ 37 | # 38 | # - python package creation works: (python setup.py sdist, pip install 39 | # dist/pylatexenc-xxx.tar.gz) 40 | # 41 | # 2) update change log (doc/changes.rst) 42 | # 43 | # 3) bump version number here 44 | # 45 | # 4) git commit any remaining changes 46 | # 47 | # 5) " git tag vX.X -am '' " 48 | # 49 | # 6) " git push && git push --tags " 50 | # 51 | # 7) on github.com, fill in release details with a summary of changes etc. 52 | # 53 | # 8) create the source package for PyPI (" python3 setup.py sdist ") 54 | # 55 | # 8) upload package to PyPI (twine upload dist/pylatexenc-X.X.tar.gz -r realpypi) 56 | # 57 | 58 | 59 | # ALSO BUMP IN pyproject.toml ! 60 | version_str = "3.0alpha000032" 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pylatexenc" 3 | version = "3.0alpha000032" # ALSO BUMP IN pylatexenc/version.py 4 | description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion" 5 | authors = ["Philippe Faist "] 6 | license = "MIT" 7 | readme = "README.rst" 8 | 9 | [tool.poetry.scripts] 10 | latexwalker = 'pylatexenc.latexwalker.__main__:main' 11 | latex2text = 'pylatexenc.latex2text.__main__:main' 12 | latexencode = 'pylatexenc.latexencode.__main__:main' 13 | 14 | 15 | [tool.poetry.dependencies] 16 | #python = "^2.7 || ^3.4" # This simply creates a dependency resolution mess that doesn't work. 17 | #python = "^3.7" # hmm seems like it severly constrains other package versions 18 | python = "^3.8" 19 | 20 | [tool.poetry.dev-dependencies] 21 | pytest = ">=7.0" 22 | toml = "^0.10.2" 23 | 24 | [tool.poetry.group.builddoc] 25 | optional = true 26 | 27 | [tool.poetry.group.builddoc.dependencies] 28 | Sphinx = ">=5.0.0" 29 | sphinx-issues = ">=3.0.0" 30 | 31 | [tool.poetry.group.buildjslib] 32 | optional = true 33 | 34 | [tool.poetry.group.buildjslib.dependencies] 35 | Transcrypt = ">=3.9.0" 36 | PyYAML = ">=5.0" 37 | 38 | 39 | [build-system] 40 | requires = ["poetry-core>=1.0.0"] 41 | build-backend = "poetry.core.masonry.api" 42 | 43 | 44 | 45 | [tool.pytest.ini_options] 46 | testpaths = [ 47 | "test", 48 | ] 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # The MIT License (MIT) 3 | # 4 | # Copyright (c) 2019 Philippe Faist 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | # 24 | 25 | import os 26 | import os.path 27 | #import sys 28 | 29 | from setuptools import setup, find_packages 30 | 31 | from pylatexenc.version import version_str 32 | 33 | def read(*paths): 34 | """Build a file path from *paths* and return the contents.""" 35 | with open(os.path.join(*paths), 'r') as f: 36 | return f.read() 37 | 38 | setup( 39 | name = "pylatexenc", 40 | version = version_str, 41 | 42 | # metadata for upload to PyPI 43 | author = "Philippe Faist", 44 | author_email = "philippe.faist@bluewin.ch", 45 | description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion", 46 | long_description = read("README.rst"), 47 | license = "MIT", 48 | keywords = "latex text unicode encode parse expression", 49 | url = "https://github.com/phfaist/pylatexenc", 50 | classifiers=[ 51 | 'Development Status :: 5 - Production/Stable', 52 | 'License :: OSI Approved :: MIT License', 53 | 'Programming Language :: Python', 54 | 'Programming Language :: Python :: 2', 55 | 'Programming Language :: Python :: 2.7', 56 | 'Programming Language :: Python :: 3', 57 | 'Operating System :: MacOS :: MacOS X', 58 | 'Operating System :: Microsoft :: Windows', 59 | 'Operating System :: POSIX :: Linux', 60 | 'Intended Audience :: Developers', 61 | 'Topic :: Scientific/Engineering', 62 | 'Topic :: Text Processing :: General', 63 | 'Topic :: Text Processing :: Markup :: LaTeX', 64 | ], 65 | 66 | # files 67 | packages = find_packages(), 68 | entry_points = { 69 | 'console_scripts': [ 70 | 'latexwalker=pylatexenc.latexwalker.__main__:main', 71 | 'latex2text=pylatexenc.latex2text.__main__:main', 72 | 'latexencode=pylatexenc.latexencode.__main__:main', 73 | ], 74 | }, 75 | install_requires = [], 76 | package_data = { 77 | }, 78 | ) 79 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | _tmp_uni_chars_test.temp.txt 2 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phfaist/pylatexenc/6dc2ce7fcd89b7cd1536c79c800f49f09535f5e9/test/__init__.py -------------------------------------------------------------------------------- /test/dummy/readme.txt: -------------------------------------------------------------------------------- 1 | This directory is just used as base '\input{}' directory, to test the strict_input flag of LatexNodes2Text. 2 | -------------------------------------------------------------------------------- /test/test_input_1.tex: -------------------------------------------------------------------------------- 1 | \textit{hi there!} This is {\em an equation}: 2 | \begin{equation} 3 | x + y i = 0 4 | \end{equation} 5 | 6 | where $i$ is the imaginary unit. 7 | -------------------------------------------------------------------------------- /test/test_latexencode_all.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import unicode_literals, print_function 3 | 4 | import unittest 5 | 6 | import sys 7 | import codecs 8 | import difflib 9 | import unicodedata 10 | import logging 11 | import os.path 12 | 13 | if sys.version_info.major >= 3: 14 | PY3 = True 15 | else: 16 | PY3 = False 17 | 18 | if PY3: 19 | def unicode(string): return string 20 | basestring = str 21 | unichr = chr 22 | else: 23 | range = xrange 24 | 25 | 26 | from pylatexenc.latexencode import UnicodeToLatexEncoder 27 | 28 | 29 | 30 | 31 | class TestLatexEncodeAll(unittest.TestCase): 32 | 33 | def __init__(self, *args, **kwargs): 34 | super(TestLatexEncodeAll, self).__init__(*args, **kwargs) 35 | 36 | # def test_pythonunicoderange(self): 37 | # self.assertGreater(sys.maxunicode, 0xFFFF+1, 38 | # "Your python build only supports unicode characters up to U+FFFF." 39 | # " Tests of unicode coverage will fail.") 40 | 41 | def test_all(self): 42 | 43 | loglevel = logging.getLogger().level 44 | logging.getLogger().setLevel(logging.CRITICAL) 45 | 46 | u = UnicodeToLatexEncoder(unknown_char_policy='fail', 47 | replacement_latex_protection='braces-almost-all') 48 | 49 | def fn(x, bdir=os.path.realpath(os.path.abspath(os.path.dirname(__file__)))): 50 | return os.path.join(bdir, x) 51 | 52 | with codecs.open(fn('_tmp_uni_chars_test.temp.txt'), 'w', encoding='utf-8') as testf: 53 | 54 | for i in range(0x10FFFF): 55 | # iter over all valid unicode characters 56 | try: 57 | chrname = unicodedata.name(unichr(i)) # test if valid, i.e., it has a UNICODE NAME 58 | except ValueError: 59 | continue 60 | 61 | line = "0x%04X %-50s |%s|\n"%(i, '['+chrname+']', unichr(i)) 62 | 63 | # try to encode it using our unicode_to_latex routines 64 | try: 65 | enc = u.unicode_to_latex(line) 66 | except ValueError: 67 | continue 68 | testf.write(enc) 69 | 70 | with codecs.open(fn('uni_chars_test_previous.txt'), 'r', encoding='utf-8') as reff, \ 71 | codecs.open(fn('_tmp_uni_chars_test.temp.txt'), 'r', encoding='utf-8') as testf: 72 | a = reff.readlines() 73 | b = testf.readlines() 74 | 75 | logging.getLogger().setLevel(loglevel) 76 | logger = logging.getLogger(__name__) 77 | 78 | # only check up to the supported unicode range 79 | if sys.maxunicode < 0x10FFFF: 80 | logger.warning("Only checking up to unicode U+%X, your python build doesn't support higher", 81 | sys.maxunicode) 82 | afiltered = [ aline for aline in a 83 | if int(aline[:aline.find(' ')], 0) < sys.maxunicode ] 84 | a = afiltered 85 | 86 | s = difflib.unified_diff(a, b, 87 | fromfile='uni_chars_test_previous.txt', 88 | tofile='_tmp_uni_chars_test.temp.txt') 89 | diffmsg = "".join(list(s)).strip() 90 | if diffmsg: 91 | print(diffmsg) 92 | raise self.failureException("Unicode coverage tests failed. See full diff above.") 93 | 94 | 95 | if __name__ == '__main__': 96 | logging.basicConfig(level=logging.DEBUG) 97 | unittest.main() 98 | # 99 | 100 | -------------------------------------------------------------------------------- /test/test_latexnodes_parsers_optionals.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import logging 3 | 4 | 5 | from pylatexenc.latexnodes.parsers._optionals import ( 6 | LatexOptionalCharsMarkerParser, 7 | LatexOptionalSquareBracketsParser, 8 | ) 9 | 10 | from pylatexenc.latexnodes import ( 11 | LatexWalkerParseError, 12 | LatexTokenReader, 13 | LatexToken, 14 | ParsingState, 15 | ) 16 | from pylatexenc.latexnodes.nodes import * 17 | 18 | from ._helpers_tests import ( 19 | DummyWalker, 20 | DummyLatexContextDb, 21 | ) 22 | 23 | 24 | 25 | 26 | # -------------------------------------- 27 | 28 | class TestLatexOptionalCharsMarkerParser(unittest.TestCase): 29 | 30 | maxDiff = None 31 | 32 | def test_simple_chars_marker_isthere(self): 33 | 34 | latextext = r'''*more''' 35 | 36 | tr = LatexTokenReader(latextext) 37 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb()) 38 | lw = DummyWalker() 39 | 40 | parser = LatexOptionalCharsMarkerParser('*') 41 | 42 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps) 43 | 44 | self.assertEqual( 45 | nodes, 46 | LatexNodeList( 47 | [ 48 | LatexCharsNode( 49 | parsing_state=ps, 50 | chars='*', 51 | pos=0, 52 | pos_end=1, 53 | ) 54 | ], 55 | parsing_state=ps 56 | ) 57 | ) 58 | 59 | 60 | def test_simple_chars_marker_notthere(self): 61 | 62 | latextext = r'''more''' 63 | 64 | tr = LatexTokenReader(latextext) 65 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb()) 66 | lw = DummyWalker() 67 | 68 | parser = LatexOptionalCharsMarkerParser('*') 69 | 70 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps) 71 | 72 | self.assertEqual( 73 | nodes, 74 | None, 75 | ) 76 | 77 | def test_simple_chars_marker_notthere_reqempty(self): 78 | 79 | latextext = r'''more''' 80 | 81 | tr = LatexTokenReader(latextext) 82 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb()) 83 | lw = DummyWalker() 84 | 85 | parser = LatexOptionalCharsMarkerParser('*', return_none_instead_of_empty=False) 86 | 87 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps) 88 | 89 | self.assertEqual( 90 | nodes, 91 | LatexNodeList([], parsing_state=ps, pos=0, pos_end=0), 92 | ) 93 | 94 | 95 | def test_simple_chars_marker_isthere_notlist(self): 96 | 97 | latextext = r'''*more''' 98 | 99 | tr = LatexTokenReader(latextext) 100 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb()) 101 | lw = DummyWalker() 102 | 103 | parser = LatexOptionalCharsMarkerParser('*', return_full_node_list=False) 104 | 105 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps) 106 | 107 | self.assertEqual( 108 | nodes, 109 | LatexCharsNode( 110 | parsing_state=ps, 111 | chars='*', 112 | pos=0, 113 | pos_end=1, 114 | ) 115 | ) 116 | 117 | 118 | def test_simple_chars_marker_notthere_notlist(self): 119 | 120 | latextext = r'''more''' 121 | 122 | tr = LatexTokenReader(latextext) 123 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb()) 124 | lw = DummyWalker() 125 | 126 | parser = LatexOptionalCharsMarkerParser('*', return_full_node_list=False) 127 | 128 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps) 129 | 130 | self.assertEqual( 131 | nodes, 132 | None, 133 | ) 134 | 135 | 136 | 137 | 138 | # --- 139 | 140 | if __name__ == '__main__': 141 | logging.basicConfig(level=logging.DEBUG) 142 | unittest.main() 143 | # 144 | -------------------------------------------------------------------------------- /test/test_latexnodes_tokenreaderbase.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import logging 3 | logger = logging.getLogger(__name__) 4 | 5 | 6 | from pylatexenc.latexnodes._tokenreaderbase import ( 7 | LatexTokenReaderBase, 8 | LatexTokenListTokenReader, 9 | ) 10 | 11 | from pylatexenc.latexnodes import ( 12 | LatexWalkerEndOfStream, 13 | LatexToken, 14 | ParsingState, 15 | ) 16 | 17 | 18 | 19 | 20 | class TestTokenReaderBase(unittest.TestCase): 21 | 22 | def test_make_token(self): 23 | tb = LatexTokenReaderBase() 24 | 25 | self.assertEqual( 26 | tb.make_token(tok='char', arg='*', pos=3), 27 | LatexToken(tok='char', arg='*', pos=3) 28 | ) 29 | 30 | def test_peek_token_or_none(self): 31 | 32 | class MyTokenReader(LatexTokenReaderBase): 33 | def __init__(self, at_end=False): 34 | super(MyTokenReader, self).__init__() 35 | self.at_end = at_end 36 | 37 | def peek_token(self, parsing_state): 38 | if not self.at_end: 39 | return self.make_token(tok='char', arg='-', pos=5) 40 | raise LatexWalkerEndOfStream() 41 | 42 | ps = ParsingState() 43 | 44 | tb = MyTokenReader(False) 45 | self.assertEqual( tb.peek_token_or_none(ps), 46 | LatexToken(tok='char', arg='-', pos=5) ) 47 | tb = MyTokenReader(True) 48 | self.assertIsNone( tb.peek_token_or_none(ps) ) 49 | 50 | 51 | 52 | class TestTokenReaderTokenList(unittest.TestCase): 53 | 54 | def test_reads_tokens(self): 55 | 56 | tlist = [ 57 | LatexToken(tok='char', arg='a', pos=0, pos_end=1, pre_space=''), 58 | LatexToken(tok='char', arg='b', pos=1, pos_end=1+2, pre_space=''), 59 | LatexToken(tok='macro', arg='relax', pos=2, pos_end=2+2+len(r'\relax'), 60 | pre_space='', post_space='\t '), 61 | ] 62 | 63 | tr = LatexTokenListTokenReader(tlist) 64 | 65 | ps = ParsingState() 66 | 67 | self.assertEqual(tr.peek_token(ps), tlist[0]) 68 | self.assertEqual(tr.peek_token(ps), tlist[0]) 69 | 70 | self.assertEqual(tr.cur_pos(), tlist[0].pos) 71 | 72 | self.assertEqual(tr.next_token(ps), tlist[0]) 73 | 74 | self.assertEqual(tr.peek_token(ps), tlist[1]) 75 | self.assertEqual(tr.peek_token(ps), tlist[1]) 76 | 77 | self.assertEqual(tr.next_token(ps), tlist[1]) 78 | 79 | self.assertEqual(tr.next_token(ps), tlist[2]) 80 | 81 | tr.move_to_token(tlist[1]) 82 | 83 | self.assertEqual(tr.next_token(ps), tlist[1]) 84 | 85 | tr.move_past_token(tlist[0]) 86 | 87 | self.assertEqual(tr.next_token(ps), tlist[1]) 88 | 89 | self.assertEqual(tr.cur_pos(), tlist[2].pos) 90 | 91 | 92 | 93 | 94 | if __name__ == '__main__': 95 | logging.basicConfig(level=logging.DEBUG) 96 | unittest.main() 97 | # 98 | -------------------------------------------------------------------------------- /test/test_macrospec_argumentsparser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import logging 4 | 5 | 6 | 7 | # from pylatexenc.macrospec._argumentsparser import ( 8 | # LatexArgumentSpec, 9 | # LatexNoArgumentsParser, 10 | # LatexArgumentsParser, 11 | # ) 12 | 13 | # from pylatexenc.latexnodes import ( 14 | # LatexWalkerTokenParseError, 15 | # LatexToken, 16 | # ParsingState 17 | # ) 18 | 19 | 20 | 21 | class TestLatexArgumentsParser(unittest.TestCase): 22 | 23 | # ............. TODO, need to write good tests ................. 24 | 25 | 26 | pass 27 | 28 | 29 | 30 | # class Test__LegacyPyltxenc2MacroArgsParserWrapper(unittest.TestCase): 31 | # def 32 | 33 | 34 | 35 | # --- 36 | 37 | if __name__ == '__main__': 38 | logging.basicConfig(level=logging.DEBUG) 39 | unittest.main() 40 | # 41 | -------------------------------------------------------------------------------- /test/test_macrospec_environmentbodyparser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from pylatexenc.macrospec._environmentbodyparser import ( 5 | LatexEnvironmentBodyContentsParser, 6 | ) 7 | 8 | from pylatexenc.latexnodes import ( 9 | LatexTokenReader, 10 | LatexArgumentSpec, 11 | ParsedArguments, 12 | ParsingState, 13 | ) 14 | from pylatexenc.latexnodes.nodes import * 15 | from pylatexenc.macrospec import ( 16 | LatexContextDb, 17 | MacroSpec, 18 | ParsingStateDeltaExtendLatexContextDb, 19 | ) 20 | from pylatexenc.latexwalker import LatexWalker 21 | 22 | from ._helpers_tests import ( 23 | add_not_equal_warning_to_object 24 | ) 25 | 26 | 27 | add_not_equal_warning_to_object(LatexNode) 28 | add_not_equal_warning_to_object(ParsingState) 29 | add_not_equal_warning_to_object(ParsedArguments) 30 | add_not_equal_warning_to_object(LatexArgumentSpec) 31 | 32 | 33 | class TestEnvironmentBodyContentsParser(unittest.TestCase): 34 | 35 | maxDiff = None 36 | 37 | def test_simple_1(self): 38 | 39 | # \begin{environment} 40 | latextext = r'''a+b=c\end{environment}''' 41 | 42 | tr = LatexTokenReader(latextext) 43 | lw = LatexWalker(latextext, latex_context=LatexContextDb()) 44 | ps = lw.make_parsing_state() 45 | 46 | print("ps = ", ps) 47 | 48 | parser = LatexEnvironmentBodyContentsParser('environment') 49 | nodes, parsing_state_delta = \ 50 | lw.parse_content(parser, token_reader=tr, parsing_state=ps) 51 | 52 | self.assertEqual( 53 | nodes, 54 | LatexNodeList( 55 | [ 56 | LatexCharsNode( 57 | parsing_state=ps, 58 | latex_walker=lw, 59 | chars='a+b=c', 60 | pos=0, 61 | pos_end=5, 62 | ), 63 | ], 64 | pos=0, 65 | pos_end=5, 66 | ) 67 | ) 68 | 69 | def test_contents_and_child_parsing_state_delta(self): 70 | 71 | # \begin{enumerate} 72 | latextext = r''' 73 | \item A \textbf{\localcommand}\end{enumerate}'''.lstrip() 74 | 75 | latex_context = LatexContextDb() 76 | latex_context.add_context_category( 77 | 'main-context-category', 78 | macros=[ 79 | MacroSpec("textbf", '{') 80 | ] 81 | ) 82 | 83 | tr = LatexTokenReader(latextext) 84 | lw = LatexWalker(latextext, latex_context=latex_context, tolerant_parsing=False) 85 | ps = lw.make_parsing_state() 86 | 87 | print("ps = ", ps) 88 | 89 | ps_content_delta = ParsingStateDeltaExtendLatexContextDb( 90 | extend_latex_context=dict( 91 | macros=[ 92 | MacroSpec("item", ''), 93 | ] 94 | ) 95 | ) 96 | ps_child_delta = ParsingStateDeltaExtendLatexContextDb( 97 | extend_latex_context=dict( 98 | macros=[ 99 | MacroSpec("localcommand", ''), 100 | ] 101 | ) 102 | ) 103 | 104 | parser = LatexEnvironmentBodyContentsParser( 105 | 'enumerate', 106 | contents_parsing_state_delta=ps_content_delta, 107 | child_parsing_state_delta=ps_child_delta, 108 | ) 109 | nodes, parsing_state_delta = \ 110 | lw.parse_content(parser, token_reader=tr, parsing_state=ps) 111 | 112 | ps_content = nodes[1].parsing_state 113 | ps_child = nodes[0].parsing_state 114 | ps_child2 = nodes[2].parsing_state 115 | 116 | print("ps_content =", ps_content) 117 | print("ps_child =", ps_child) 118 | 119 | nodes_expected = LatexNodeList( 120 | [ 121 | LatexMacroNode( 122 | parsing_state=ps_child, 123 | latex_walker=lw, 124 | macroname='item', 125 | spec=ps_content.latex_context.get_macro_spec('item'), 126 | nodeargd=ParsedArguments(argnlist=LatexNodeList([]),), 127 | pos=0, 128 | pos_end=6, 129 | macro_post_space=' ', 130 | ), 131 | LatexCharsNode( 132 | parsing_state=ps_content, 133 | latex_walker=lw, 134 | chars='A ', 135 | pos=6, 136 | pos_end=8, 137 | ), 138 | LatexMacroNode( 139 | parsing_state=ps_child2, 140 | latex_walker=lw, 141 | macroname='textbf', 142 | spec=ps.latex_context.get_macro_spec('textbf'), 143 | nodeargd=ParsedArguments( 144 | argnlist=[ 145 | LatexGroupNode( 146 | parsing_state=ps_child2, 147 | latex_walker=lw, 148 | delimiters=('{','}'), 149 | nodelist=LatexNodeList( 150 | [ 151 | LatexMacroNode( 152 | parsing_state=ps_child2, 153 | latex_walker=lw, 154 | spec=ps_child2.latex_context \ 155 | .get_macro_spec('localcommand'), 156 | macroname='localcommand', 157 | nodeargd=ParsedArguments( 158 | argnlist=LatexNodeList([]), 159 | ), 160 | pos=16, 161 | pos_end=29, 162 | macro_post_space='', 163 | ), 164 | ], 165 | pos=16, 166 | pos_end=29, 167 | ), 168 | pos=15, 169 | pos_end=30, 170 | ) 171 | ], 172 | arguments_spec_list=[ 173 | LatexArgumentSpec(argname=None, parser='{'), 174 | ], 175 | ), 176 | pos=8, 177 | pos_end=30, 178 | macro_post_space='', 179 | ), 180 | ], 181 | pos=0, 182 | pos_end=30, 183 | ) 184 | 185 | print(nodes) 186 | print(nodes_expected) 187 | 188 | # check that ps_content is the parsing state by inspecting the context db 189 | self.assertIsNotNone( ps_content.latex_context.get_macro_spec('item') ) 190 | self.assertIsNone( ps_content.latex_context.get_macro_spec('localcommand') ) 191 | 192 | # check that ps_child is the parsing state by inspecting the context db 193 | self.assertIsNone( ps_child.latex_context.get_macro_spec('item') ) 194 | self.assertIsNotNone( ps_child.latex_context.get_macro_spec('localcommand') ) 195 | self.assertIsNone( ps_child2.latex_context.get_macro_spec('item') ) 196 | self.assertIsNotNone( ps_child2.latex_context.get_macro_spec('localcommand') ) 197 | 198 | 199 | self.assertEqual( 200 | nodes, 201 | nodes_expected 202 | ) 203 | 204 | if __name__ == '__main__': 205 | unittest.main() 206 | -------------------------------------------------------------------------------- /test/test_macrospec_latexcontextdb.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import logging 4 | logger = logging.getLogger(__name__) 5 | 6 | from pylatexenc.macrospec._latexcontextdb import ( 7 | LatexContextDb, 8 | _autogen_category_prefix 9 | ) 10 | 11 | from pylatexenc.macrospec import ( 12 | MacroSpec, 13 | EnvironmentSpec, 14 | SpecialsSpec, 15 | ) 16 | 17 | 18 | 19 | class TestLatexContextDb(unittest.TestCase): 20 | 21 | 22 | # TODO........ need more tests here 23 | 24 | 25 | 26 | def test_extended_with(self): 27 | 28 | context = LatexContextDb() 29 | context.add_context_category( 30 | 'base-category', 31 | macros=[ MacroSpec('base', '{'), ], 32 | environments=[ EnvironmentSpec('baseenv', '{'), ], 33 | specials=[ SpecialsSpec('~'), ], 34 | ) 35 | context.freeze() 36 | 37 | logger.debug("context's category list = %r", context.category_list) 38 | logger.debug("context's d = %r", context.d) 39 | logger.debug("context's lookup maps are = %r", context.lookup_chain_maps) 40 | 41 | extd1 = dict( 42 | macros=[ MacroSpec('more', '{'), ], 43 | environments=[ EnvironmentSpec('moreenv', '{'), ], 44 | specials=[ SpecialsSpec('!'), ], 45 | ) 46 | context2 = context.extended_with(**extd1) 47 | 48 | logger.debug("context2's category list = %r", context2.category_list) 49 | logger.debug("context2's d = %r", context2.d) 50 | logger.debug("context2's lookup maps are = %r", context2.lookup_chain_maps) 51 | 52 | self.assertEqual(len(context2.category_list), 2) 53 | self.assertTrue(context2.category_list[0].startswith(_autogen_category_prefix)) 54 | 55 | self.assertEqual(context2.get_macro_spec('base'), 56 | context.get_macro_spec('base')) 57 | self.assertEqual(context2.get_environment_spec('baseenv'), 58 | context.get_environment_spec('baseenv')) 59 | self.assertEqual(context2.get_specials_spec('~'), 60 | context.get_specials_spec('~')) 61 | self.assertEqual(context2.test_for_specials('~~~~~~~', pos=0), 62 | context.get_specials_spec('~')) 63 | self.assertEqual(context2.get_macro_spec('more'), extd1['macros'][0]) 64 | self.assertEqual(context2.get_environment_spec('moreenv'), extd1['environments'][0]) 65 | self.assertEqual(context2.get_specials_spec('!'), extd1['specials'][0]) 66 | self.assertEqual(context2.test_for_specials('!!!!!', pos=0), extd1['specials'][0]) 67 | 68 | extd2 = dict( 69 | macros=[ MacroSpec('evenmore', '{'), ], 70 | environments=[ EnvironmentSpec('baseenv', '{'), ], # override baseenv 71 | specials=[ SpecialsSpec('!!'), ], 72 | ) 73 | context3 = context2.extended_with(**extd2) 74 | 75 | # extended_with() a second time shouldn't add a new category as the 76 | # first category is already an autogenerated one 77 | self.assertEqual(len(context3.category_list), 2) 78 | self.assertEqual(context3.category_list, context2.category_list) 79 | self.assertTrue(context3.category_list[0].startswith(_autogen_category_prefix)) 80 | 81 | logger.debug("context3's category list = %r", context3.category_list) 82 | logger.debug("context3's d = %r", context3.d) 83 | logger.debug("context3's lookup maps are = %r", context3.lookup_chain_maps) 84 | 85 | self.assertEqual(context3.get_macro_spec('base'), 86 | context.get_macro_spec('base')) 87 | # self.assertEqual(context3.get_environment_spec('baseenv'), 88 | # context.get_environment_spec('baseenv')) # NO! 89 | self.assertEqual(context3.get_specials_spec('~'), 90 | context.get_specials_spec('~')) 91 | self.assertEqual(context3.test_for_specials('~~~~~~~', pos=0), 92 | context.get_specials_spec('~')) 93 | self.assertEqual(context3.get_macro_spec('more'), extd1['macros'][0]) 94 | self.assertEqual(context3.get_macro_spec('evenmore'), extd2['macros'][0]) 95 | self.assertEqual(context3.get_environment_spec('moreenv'), extd1['environments'][0]) 96 | self.assertEqual(context3.get_environment_spec('baseenv'), extd2['environments'][0]) 97 | self.assertEqual(context3.get_specials_spec('!'), extd1['specials'][0]) 98 | self.assertEqual(context3.get_specials_spec('!!'), extd2['specials'][0]) 99 | self.assertEqual(context3.test_for_specials('!!!!!', pos=0), extd2['specials'][0]) 100 | 101 | 102 | 103 | 104 | 105 | # --- 106 | 107 | if __name__ == '__main__': 108 | logging.basicConfig(level=logging.DEBUG) 109 | unittest.main() 110 | # 111 | -------------------------------------------------------------------------------- /test/test_macrospec_macrocallparser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import logging 4 | 5 | 6 | 7 | # from pylatexenc.macrospec._macrocallparser import ( 8 | # LatexMacroCallParser, 9 | # LatexEnvironmentCallParser, 10 | # LatexSpecialsCallParser, 11 | # ) 12 | 13 | # from pylatexenc.latexnodes import ( 14 | # ParsingState 15 | # ) 16 | # from pylatexenc.latexnodes.nodes import * 17 | 18 | 19 | 20 | class TestLatexMacroCallParser(unittest.TestCase): 21 | 22 | # ............. TODO, need to write good tests ................. 23 | 24 | 25 | pass 26 | 27 | 28 | 29 | # --- 30 | 31 | if __name__ == '__main__': 32 | logging.basicConfig(level=logging.DEBUG) 33 | unittest.main() 34 | # 35 | -------------------------------------------------------------------------------- /test/test_pylatexenc.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | ### BEGIN_TEST_PYLATEXENC_SKIP 4 | 5 | import toml 6 | import os.path 7 | 8 | import pylatexenc 9 | 10 | 11 | # thanks https://github.com/python-poetry/poetry/issues/144#issuecomment-877835259 12 | 13 | class TestHardcodedPackageVersion(unittest.TestCase): 14 | 15 | def test_versions_are_in_sync(self): 16 | """Checks if the pyproject.toml and package.__init__.py __version__ are in sync.""" 17 | 18 | path = os.path.join( os.path.dirname(__file__), '..', "pyproject.toml" ) 19 | with open(path) as fpp: 20 | pyproject = toml.loads(fpp.read()) 21 | pyproject_version = pyproject["tool"]["poetry"]["version"] 22 | 23 | package_init_version = pylatexenc.__version__ 24 | 25 | self.assertEqual(package_init_version, pyproject_version) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | 31 | ### END_TEST_PYLATEXENC_SKIP 32 | -------------------------------------------------------------------------------- /test/test_util.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pylatexenc import _util 4 | 5 | 6 | class TestLineNumbersCalculator(unittest.TestCase): 7 | 8 | def test_simple(self): 9 | 10 | s = """\ 11 | one 12 | two 13 | three 14 | four 15 | five 16 | """.lstrip() 17 | 18 | ln = _util.LineNumbersCalculator(s) 19 | 20 | self.assertEqual( ln.pos_to_lineno_colno(0), (1,0) ) 21 | self.assertEqual( ln.pos_to_lineno_colno(1), (1,1) ) 22 | self.assertEqual( ln.pos_to_lineno_colno(2), (1,2) ) 23 | self.assertEqual( ln.pos_to_lineno_colno(3), (1,3) ) 24 | self.assertEqual( ln.pos_to_lineno_colno(4), (2,0) ) 25 | self.assertEqual( ln.pos_to_lineno_colno(5), (2,1) ) 26 | self.assertEqual( ln.pos_to_lineno_colno(6), (2,2) ) 27 | self.assertEqual( ln.pos_to_lineno_colno(7), (2,3) ) 28 | self.assertEqual( ln.pos_to_lineno_colno(8), (3,0) ) 29 | self.assertEqual( ln.pos_to_lineno_colno(9), (3,1) ) 30 | 31 | self.assertEqual( ln.pos_to_lineno_colno(23), (5,4) ) 32 | 33 | def test_as_dict(self): 34 | 35 | s = """\ 36 | one 37 | two 38 | three 39 | four 40 | five 41 | """.lstrip() 42 | 43 | ln = _util.LineNumbersCalculator(s) 44 | 45 | self.assertEqual( ln.pos_to_lineno_colno(9, as_dict=True), 46 | { 'lineno': 3, 47 | 'colno': 1 } ) 48 | -------------------------------------------------------------------------------- /tools/gen_l2t_from_lenc.py: -------------------------------------------------------------------------------- 1 | # 2 | # Inspect latexencode rules to see if there are symbols that we can use for 3 | # latex2text, too 4 | # 5 | 6 | # Py3 only script 7 | import sys 8 | assert sys.version_info > (3,0) 9 | 10 | import unicodedata 11 | 12 | import logging 13 | logging.basicConfig(level=logging.INFO) 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | from pylatexenc import latexwalker, latex2text, latexencode #, macrospec 18 | 19 | l2t_default_context = latex2text.get_default_latex_context_db() 20 | 21 | def extract_symbol_node(nodelist, uni, latex): 22 | 23 | if len(nodelist) != 1: 24 | # more than one "thing" 25 | logger.warning("Got nodelist with more than one node, skipping (%s): %s = %r", 26 | chr(uni), latex, nodelist) 27 | return 28 | 29 | thenode = nodelist[0] 30 | 31 | if not thenode.isNodeType(latexwalker.LatexMacroNode): 32 | logger.warning("Got node that is not a macro, skipping (%s): %s = %r", 33 | chr(uni), latex, thenode) 34 | return 35 | 36 | if thenode.macroname == 'ensuremath': 37 | # ignore, parse contents instead 38 | if thenode.nodeargd is None or not thenode.nodeargd.argnlist or \ 39 | len(thenode.nodeargd.argnlist) != 1: 40 | logger.warning(r"\ensuremath with no arguments or wrong # of arguments (%s): %s = %r", 41 | chr(uni), latex, nodelist) 42 | return 43 | 44 | argnode = thenode.nodeargd.argnlist[0] 45 | if argnode.isNodeType(latexwalker.LatexGroupNode): 46 | argnodelist = argnode.nodelist 47 | else: 48 | argnodelist = [ argnode ] 49 | 50 | return extract_symbol_node(argnodelist, uni, latex) 51 | 52 | l2t_mspec = l2t_default_context.get_macro_spec(thenode.macroname) 53 | if l2t_mspec is not None and l2t_mspec.macroname: 54 | # macro found, already known 55 | logger.debug("Macro found (%s): %r", chr(uni), thenode) 56 | return 57 | 58 | if thenode.nodeargd and thenode.nodeargd.argnlist: 59 | logger.warning(r"Macro %r for ‘%s’ is not known to latex2text but it has arguments", 60 | thenode, chr(uni)) 61 | return 62 | 63 | # got a symbol macro, go for it: 64 | print(" MacroTextSpec(%r, u'\\N{%s}'), # ‘%s’" % ( 65 | thenode.macroname, unicodedata.name(chr(uni)), chr(uni) 66 | )) 67 | 68 | 69 | for builtin_name in ('defaults', 'unicode-xml'): 70 | 71 | rules = latexencode.get_builtin_conversion_rules(builtin_name) 72 | 73 | logger.info("Reader latexencode defaults %r", builtin_name) 74 | print(" # Rules from latexencode defaults '%s'"%(builtin_name)) 75 | 76 | for rule in rules: 77 | 78 | if rule.rule_type != latexencode.RULE_DICT: 79 | logger.warning("Ignoring non-dict rule type %d", rule.rule_type) 80 | continue 81 | 82 | # inspect rules for symbols that latex2text might not already be aware of 83 | for uni, latex in rule.rule.items(): 84 | try: 85 | nodelist, _, _ = latexwalker.LatexWalker(latex, tolerant_parsing=False).get_latex_nodes() 86 | except latexwalker.LatexWalkerError as e: 87 | logger.warning("Error parsing %r (%s): %s", latex, chr(uni), e) 88 | continue 89 | 90 | extract_symbol_node(nodelist, uni, latex) 91 | -------------------------------------------------------------------------------- /tools/gen_xml_dic.py: -------------------------------------------------------------------------------- 1 | # 2 | # mini-script to generate the pylatexenc.latexencode._uni2latexmap_xml dict mapping 3 | # 4 | import re 5 | import sys 6 | 7 | if sys.version_info.major > 2: 8 | # python 3 9 | unichr = chr 10 | 11 | from xml.etree import ElementTree as ET 12 | 13 | e = ET.parse('unicode.xml') 14 | 15 | d = {} 16 | dnames = {} 17 | 18 | for chxml in e.find('charlist').iter('character'): 19 | Uid = chxml.attrib['id'] 20 | if '-' in Uid: 21 | # composite/multiple characters not supported 22 | continue 23 | charord = int(Uid.lstrip('U'), 16) 24 | latexxml = chxml.find('latex') 25 | if latexxml is None: 26 | continue 27 | latexval = latexxml.text 28 | if latexval == unichr(charord): 29 | # "latex" representation is the same char directly 30 | continue 31 | if charord == 0x20: 32 | # skip space char 33 | continue 34 | if latexval.startswith(r'\ElsevierGlyph') or latexval.startswith(r'\El') \ 35 | or latexval.startswith(r'\ensuremath{\El'): 36 | continue 37 | if re.search(r'\\[a-zA-Z]+\s+$', latexval): 38 | # ends with named macro+space, remove space because 39 | # latexencode.UnicodeToLatexEncoder will handle that with 40 | # replacement_latex_protection 41 | latexval = latexval.rstrip() 42 | d[charord] = latexval 43 | dnames[charord] = chxml.find('description').text 44 | 45 | # dump dictionary into new module file in current working directory 46 | outputfile = '_uni2latexmap_xml.py' 47 | 48 | HEADER = """\ 49 | # -*- coding: utf-8 -*- 50 | # 51 | # Automatically generated from unicode.xml by gen_xml_dic.py 52 | # 53 | 54 | """ 55 | 56 | with open(outputfile, 'w') as f: 57 | f.write(HEADER) 58 | 59 | f.write("uni2latex = {\n") 60 | 61 | for k,v in d.items(): 62 | f.write("0x%04X: %r,\n"%(k, v)) 63 | 64 | f.write("}\n") 65 | 66 | print("Successfully generated file %s"%(outputfile)) 67 | 68 | 69 | # Now see which characters we don't have in our default set of symbols 70 | from pylatexenc.latexencode._uni2latexmap import uni2latex as uni2latex_defaults 71 | 72 | missing_keys = set(d.keys()).difference(set(uni2latex_defaults.keys())) 73 | if missing_keys: 74 | print("#\n# Missing keys added from unicode.xml\n#\n") 75 | for k in sorted(missing_keys): 76 | if "'" not in d[k]: 77 | therepr = "r'"+d[k]+"'" 78 | else: 79 | therepr = repr(d[k]) 80 | thedef = "0x%04X: %s,"%(k, therepr) 81 | print("%-50s# %s [%s]"%(thedef, dnames[k], unichr(k))) 82 | 83 | -------------------------------------------------------------------------------- /tools/unicode.xml.LICENSE: -------------------------------------------------------------------------------- 1 | [This notice should be placed within redistributed or derivative software code 2 | when appropriate. This particular formulation became active on December 31 2002, 3 | superseding the 1998 version.] 4 | 5 | unicode.xml: https://www.w3.org/2003/entities/2007xml/unicode.xml 6 | https://www.w3.org/TR/xml-entity-names/#source 7 | 8 | Copyright © 2015 World Wide Web Consortium, (Massachusetts Institute of 9 | Technology, European Research Consortium for Informatics and Mathematics, 10 | Keio University, Beihang). All Rights Reserved. This work is distributed 11 | under the W3C® Software License [1] in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | or FITNESS FOR A PARTICULAR PURPOSE. 14 | 15 | [1] http://www.w3.org/Consortium/Legal/copyright-software 16 | 17 | 18 | ---------- 19 | 20 | Copyright David Carlisle 1999-2015 21 | 22 | Use and distribution of this code are permitted under the terms of the 23 | W3C Software Notice and License. 24 | http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html 25 | 26 | ---------- 27 | 28 | LICENSE 29 | 30 | By obtaining, using and/or copying this work, you (the licensee) agree that you 31 | have read, understood, and will comply with the following terms and conditions. 32 | 33 | Permission to copy, modify, and distribute this software and its documentation, 34 | with or without modification, for any purpose and without fee or royalty is 35 | hereby granted, provided that you include the following on ALL copies of the 36 | software and documentation or portions thereof, including modifications: 37 | 38 | - The full text of this NOTICE in a location viewable to users of the 39 | redistributed or derivative work. 40 | 41 | - Any pre-existing intellectual property disclaimers, notices, or terms and 42 | conditions. If none exist, the W3C Software Short Notice should be 43 | included (hypertext is preferred, text is permitted) within the body of 44 | any redistributed or derivative code. 45 | 46 | - Notice of any changes or modifications to the files, including the date 47 | changes were made. (We recommend you provide URIs to the location from 48 | which the code is derived.) 49 | 50 | DISCLAIMERS 51 | 52 | THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE 53 | NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 54 | TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT 55 | THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY 56 | PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. 57 | 58 | COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR 59 | CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION. 60 | 61 | The name and trademarks of copyright holders may NOT be used in advertising or 62 | publicity pertaining to the software without specific, written prior 63 | permission. Title to copyright in this software and any associated documentation 64 | will at all times remain with copyright holders. 65 | 66 | NOTES 67 | 68 | This version: http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 69 | 70 | This formulation of W3C's notice and license became active on December 31 71 | 2002. This version removes the copyright ownership notice such that this license 72 | can be used with materials other than those owned by the W3C, reflects that 73 | ERCIM is now a host of the W3C, includes references to this specific dated 74 | version of the license, and removes the ambiguous grant of "use". Otherwise, 75 | this version is the same as the previous version and is written so as to 76 | preserve the Free Software Foundation's assessment of GPL compatibility and 77 | OSI's certification under the Open Source Definition. 78 | --------------------------------------------------------------------------------