├── .github
└── workflows
│ ├── codeql.yml
│ ├── deploy-pypi.yml
│ ├── tests-ci-js.yml
│ └── tests-ci.yml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── doc
├── Makefile
├── _static
│ └── custom.css
├── changes.rst
├── conf.py
├── example_latex2text_custom_quotes.py
├── index.rst
├── latex2text.rst
├── latexencode.rst
├── latexnodes.nodes.rst
├── latexnodes.parsers.rst
├── latexnodes.rst
├── latexwalker.rst
├── macrospec.rst
├── new-in-pylatexenc-2.rst
├── new-in-pylatexenc-3.rst
└── requirements.txt
├── js-transcrypt
├── .gitignore
├── .yarnrc.yml
├── README.md
├── generate_pylatexenc_js.py
├── import_pylatexenc_modules.py
├── libpatches
│ ├── bisect.py
│ ├── collections.py
│ ├── customjspatches.js
│ ├── functools.py
│ ├── json.py
│ ├── logging.js
│ ├── unique_object_id.js
│ └── unittest.py
├── my_test_script.py
├── mytestjscode
│ ├── my_test_js_code.js
│ ├── node_modules
│ │ └── pylatexenc-js
│ └── package.json
├── package.json
├── preprocesslib-pylatexenc.config.yaml
├── preprocesslib-tests.config.yaml
├── transcrypt_runtime_patches.js
└── yarn.lock
├── poetry.lock
├── pylatexenc
├── __init__.py
├── _util.py
├── _util_support.py
├── latex2text
│ ├── __init__.py
│ ├── __main__.py
│ ├── _defaultspecs.py
│ └── _inputlatexfile.py
├── latexencode
│ ├── __init__.py
│ ├── __main__.py
│ ├── _partial_latex_encoder.py
│ ├── _rule.py
│ ├── _uni2latexmap.py
│ ├── _uni2latexmap_xml.py
│ ├── _unicode_to_latex_encoder.py
│ └── get_builtin_rules.py
├── latexnodes
│ ├── __init__.py
│ ├── _callablespecbase.py
│ ├── _exctypes.py
│ ├── _latex_recomposer.py
│ ├── _latexcontextdbbase.py
│ ├── _nodescollector.py
│ ├── _parsedargs.py
│ ├── _parsedargsinfo.py
│ ├── _parsingstate.py
│ ├── _parsingstatedelta.py
│ ├── _token.py
│ ├── _tokenreader.py
│ ├── _tokenreaderbase.py
│ ├── _walkerbase.py
│ ├── nodes.py
│ └── parsers
│ │ ├── __init__.py
│ │ ├── _base.py
│ │ ├── _delimited.py
│ │ ├── _expression.py
│ │ ├── _generalnodes.py
│ │ ├── _math.py
│ │ ├── _optionals.py
│ │ ├── _stdarg.py
│ │ └── _verbatim.py
├── latexwalker
│ ├── __init__.py
│ ├── __main__.py
│ ├── _defaultspecs.py
│ ├── _get_defaultspecs.py
│ ├── _helpers.py
│ ├── _legacy_py1x.py
│ └── _walker.py
├── macrospec
│ ├── __init__.py
│ ├── _argumentsparser.py
│ ├── _environmentbodyparser.py
│ ├── _latexcontextdb.py
│ ├── _macrocallparser.py
│ ├── _pyltxenc2_argparsers
│ │ ├── __init__.py
│ │ ├── _base.py
│ │ └── _verbatimargsparser.py
│ ├── _specclasses.py
│ └── _spechelpers.py
└── version.py
├── pyproject.toml
├── setup.py
├── test
├── .gitignore
├── __init__.py
├── _helpers_tests.py
├── dummy
│ └── readme.txt
├── test_2_latex2text.py
├── test_2_latexwalker.py
├── test_2_macrospec.py
├── test_input_1.tex
├── test_latexencode.py
├── test_latexencode_all.py
├── test_latexnodes_latex_recomposer.py
├── test_latexnodes_nodes.py
├── test_latexnodes_nodescollector.py
├── test_latexnodes_parsedargsinfo.py
├── test_latexnodes_parsers_delimited.py
├── test_latexnodes_parsers_expression.py
├── test_latexnodes_parsers_generalnodes.py
├── test_latexnodes_parsers_math.py
├── test_latexnodes_parsers_optionals.py
├── test_latexnodes_parsers_stdarg.py
├── test_latexnodes_parsers_verbatim.py
├── test_latexnodes_tokenreader.py
├── test_latexnodes_tokenreaderbase.py
├── test_macrospec_argumentsparser.py
├── test_macrospec_environmentbodyparser.py
├── test_macrospec_latexcontextdb.py
├── test_macrospec_macrocallparser.py
├── test_pylatexenc.py
├── test_util.py
└── uni_chars_test_previous.txt
└── tools
├── gen_l2t_from_lenc.py
├── gen_xml_dic.py
├── preprocess_lib.py
├── unicode.xml
├── unicode.xml.LICENSE
└── utils_transcrypt_generate_js.py
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 | schedule:
9 | - cron: "13 20 * * 3"
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ python ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v3
28 |
29 | - name: Initialize CodeQL
30 | uses: github/codeql-action/init@v2
31 | with:
32 | languages: ${{ matrix.language }}
33 | queries: +security-and-quality
34 |
35 | - name: Autobuild
36 | uses: github/codeql-action/autobuild@v2
37 |
38 | - name: Perform CodeQL Analysis
39 | uses: github/codeql-action/analyze@v2
40 | with:
41 | category: "/language:${{ matrix.language }}"
42 |
--------------------------------------------------------------------------------
/.github/workflows/deploy-pypi.yml:
--------------------------------------------------------------------------------
1 |
2 | name: 'deploy-pypi'
3 |
4 |
5 | on:
6 | workflow_dispatch: {}
7 |
8 | jobs:
9 | deploy-pypi:
10 |
11 | runs-on: ubuntu-latest
12 |
13 | strategy:
14 | matrix:
15 | python-version: ["3.11"]
16 | poetry-version: ["1.4"]
17 |
18 | environment: env-deploy-pypi
19 |
20 | steps:
21 | - uses: actions/checkout@v3
22 |
23 | - uses: actions/setup-python@v4
24 | with:
25 | python-version: '${{ matrix.python-version }}'
26 |
27 | - name: 'Set up poetry - Run image'
28 | uses: abatilo/actions-poetry@v2
29 | with:
30 | poetry-version: '${{ matrix.poetry-version }}'
31 |
32 | - name: 'Poetry Build Package'
33 | run: poetry build
34 |
35 | - name: 'Poetry Publish package'
36 | run: 'poetry publish -p ${{ secrets.PYPI_API_TOKEN}} -u __token__'
37 |
38 |
39 |
--------------------------------------------------------------------------------
/.github/workflows/tests-ci-js.yml:
--------------------------------------------------------------------------------
1 |
2 | name: 'tests-ci-js'
3 |
4 |
5 | on:
6 | push:
7 | branches: [ "main", "devel" ]
8 | pull_request:
9 | branches: [ "main", "devel" ]
10 | schedule:
11 | - cron: "13 20 * * 3"
12 |
13 | jobs:
14 | tests-ci-js:
15 |
16 | strategy:
17 | #fail-fast: true
18 | matrix:
19 | python-version:
20 | - "3.11"
21 | poetry-version:
22 | - "1.4"
23 | node-version:
24 | - "14"
25 | - "19"
26 | os:
27 | - 'ubuntu-latest'
28 |
29 | runs-on: '${{ matrix.os }}'
30 |
31 | steps:
32 | - uses: actions/checkout@v3
33 |
34 | # Install NodeJS
35 | - uses: actions/setup-node@v3
36 | with:
37 | node-version: '${{ matrix.node-version }}'
38 |
39 | - name: 'Setting up some node packages'
40 | run: 'npm i debug'
41 |
42 | # Poetry & Python are needed to transpile Python library
43 | - uses: actions/setup-python@v4
44 | with:
45 | python-version: '${{ matrix.python-version }}'
46 |
47 | - name: 'Set up poetry - Run image'
48 | uses: abatilo/actions-poetry@v2
49 |
50 | - name: 'Poetry Install (with buildjslib)'
51 | run: 'poetry install --with buildjslib'
52 |
53 | - name: 'Generate pylatexenc JS library'
54 | working-directory: 'js-transcrypt'
55 | run: 'poetry run python ./generate_pylatexenc_js.py --compile-tests'
56 |
57 | - name: 'Run JS tests'
58 | working-directory: 'js-transcrypt'
59 | run: 'node test-pylatexenc-js/runtests.js'
60 |
--------------------------------------------------------------------------------
/.github/workflows/tests-ci.yml:
--------------------------------------------------------------------------------
1 | name: 'tests-ci'
2 |
3 | on:
4 | push:
5 | branches: [ "main", "devel" ]
6 | pull_request:
7 | branches: [ "main", "devel" ]
8 | schedule:
9 | - cron: "13 20 * * 3"
10 |
11 | jobs:
12 | tests-ci:
13 |
14 | strategy:
15 | #fail-fast: true
16 | matrix:
17 | python-version:
18 | - "3.8"
19 | - "3.9"
20 | - "3.10"
21 | - "3.11"
22 | - "3.12"
23 | - "3.13"
24 | os:
25 | - 'ubuntu-latest'
26 | include:
27 | # Deprecated system for github actions :/
28 | # - python-version: "3.4"
29 | # os: "ubuntu-18.04"
30 | # - python-version: "2.7"
31 | # os: 'ubuntu-20.04'
32 | - python-version: "3.6"
33 | os: 'ubuntu-20.04'
34 | - python-version: "3.7"
35 | os: 'ubuntu-20.04'
36 |
37 | runs-on: '${{ matrix.os }}'
38 |
39 | steps:
40 | - uses: actions/checkout@v3
41 | - uses: actions/setup-python@v4
42 | with:
43 | python-version: '${{ matrix.python-version }}'
44 |
45 | - name: 'Install dependencies'
46 | run: |-
47 | pip install pytest toml; if [ "${{ matrix.python-version }}" == "2.7" ]; then pip install chainmap; fi
48 |
49 | - name: 'Run tests'
50 | run: pytest
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *~
3 |
4 | .cache
5 |
6 | *.py[cdo]
7 | pylatexenc.egg-info
8 |
9 | doc/_build
10 |
11 | dist
12 | build
13 |
14 | tools/error.log
15 | tools/output.log
16 | tools/transcryptable_output
17 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | #
2 | # Configuration for readthedocs.org
3 | #
4 |
5 | # See poetry builds on RTD:
6 | # https://docs.readthedocs.io/en/stable/build-customization.html#install-dependencies-with-poetry
7 |
8 | version: 2
9 |
10 | build:
11 |
12 | os: "ubuntu-22.04"
13 |
14 | tools:
15 | python: "3.10"
16 |
17 | jobs:
18 |
19 | post_create_environment:
20 | # Install poetry
21 | # https://python-poetry.org/docs/#installing-manually
22 | - 'pip install "poetry>=1.4"'
23 |
24 | post_install:
25 | # Install dependencies with 'docs' dependency group
26 | # https://python-poetry.org/docs/managing-dependencies/#dependency-groups
27 | # VIRTUAL_ENV needs to be set manually for now.
28 | # See https://github.com/readthedocs/readthedocs.org/pull/11152/
29 | - 'VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with builddoc'
30 |
31 |
32 | sphinx:
33 | configuration: doc/conf.py
34 | builder: 'dirhtml'
35 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015-2023 Philippe Faist
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include test/uni_chars_test_previous.txt test/test_input_1.tex test/dummy/readme.txt
3 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | pylatexenc
2 | ==========
3 |
4 | Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion
5 |
6 | .. image:: https://img.shields.io/github/license/phfaist/pylatexenc.svg?style=flat
7 | :target: https://github.com/phfaist/pylatexenc/blob/master/LICENSE.txt
8 |
9 | .. image:: https://img.shields.io/pypi/v/pylatexenc.svg?style=flat
10 | :target: https://pypi.org/project/pylatexenc/
11 |
12 | Python: ≥ 3.4 or ≥ 2.7. The library is designed to be as backwards-compatible as
13 | reasonably possible and is able to run on old python verisons should it be
14 | necessary. (Use the setup.py script directly if you have python<3.7, poetry
15 | doesn't seem to work with old python versions.)
16 |
17 | **NEW (4/2023)**: *PYLATEXENC 3.0alpha* is in pre-release on PyPI. See `new features
18 | and major changes `_.
19 | The `documentation `_ is still
20 | incomplete, and the new APIs are still subject to changes. The code is meant
21 | to be as backwards compatible as is reasonably possible. Feel free to try it
22 | out & submit feedback!
23 |
24 |
25 | Unicode Text to LaTeX code
26 | --------------------------
27 |
28 | The ``pylatexenc.latexencode`` module provides a function ``unicode_to_latex()``
29 | which converts a unicode string into LaTeX text and escape sequences. It should
30 | recognize accented characters and most math symbols. A couple of switches allow
31 | you to alter how this function behaves.
32 |
33 | You can also run ``latexencode`` in command-line to convert plain unicode text
34 | (from the standard input or from files given on the command line) into LaTeX
35 | code, written on to the standard output.
36 |
37 | A third party plug-in for Vim
38 | `vim-latexencode `_
39 | by `@Konfekt `_
40 | provides a corresponding command to operate on a given range.
41 |
42 |
43 | Parsing LaTeX code & converting to plain text (unicode)
44 | -------------------------------------------------------
45 |
46 | The ``pylatexenc.latexwalker`` module provides a series of routines that parse
47 | the LaTeX structure of given LaTeX code and returns a logical structure of
48 | objects, which can then be used to produce output in another format such as
49 | plain text. This is not a replacement for a full (La)TeX engine, rather, this
50 | module provides a way to parse a chunk of LaTeX code as mark-up code.
51 |
52 | The ``pylatexenc.latex2text`` module builds up on top of
53 | ``pylatexenc.latexwalker`` and provides functions to convert given LaTeX code to
54 | plain text with unicode characters.
55 |
56 | You can also run ``latex2text`` in command-line to convert LaTeX input (either
57 | from the standard input, or from files given on the command line) into plain
58 | text written on the standard output.
59 |
60 |
61 | Documentation
62 | -------------
63 |
64 | Full documentation is available at https://pylatexenc.readthedocs.io/.
65 |
66 | To build the documentation manually, run::
67 |
68 | > poetry install --with=builddoc
69 | > cd doc/
70 | doc> poetry run make html
71 |
72 |
73 | License
74 | -------
75 |
76 | See LICENSE.txt (MIT License).
77 |
78 | NOTE: See copyright notice and license information for file
79 | ``tools/unicode.xml`` provided in ``tools/unicode.xml.LICENSE``. (The file
80 | ``tools/unicode.xml`` was downloaded from
81 | https://www.w3.org/2003/entities/2007xml/unicode.xml as linked from
82 | https://www.w3.org/TR/xml-entity-names/#source.)
83 |
84 |
85 | Javascript Library
86 | ------------------
87 |
88 | Some core parts of this library can be transcribed to JavaScript. This feature
89 | is used (and was developed for) my `Flexible Latex-like Markup
90 | project `_. See the *js-transcrypt/* folder and
91 | its `README file `_.
92 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = pylatexenc
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/doc/_static/custom.css:
--------------------------------------------------------------------------------
1 | /*@import url('https://fonts.googleapis.com/css?family=Open+Sans:400,400i,600,600i');*/
2 | @import url('https://fonts.googleapis.com/css2?family=Fira+Mono:wght@400;500&family=Fira+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap');
3 |
4 |
5 | div.body p, div.body dd, div.body li, div.body blockquote {
6 | -moz-hyphens: none;
7 | hyphens: none;
8 | }
9 |
10 | div.document {
11 | margin-top: 10px;
12 | }
13 |
14 |
15 | /* SIDEBAR */
16 |
17 | div.sphinxsidebar {
18 | font-size: 0.9rem;
19 | line-height: inherit;
20 |
21 | /*background-color: rgba(120,80,50,0.1);*/
22 | box-shadow: 3px 5px 10px rgba(0,0,0,0.1);
23 | border-right: 2px solid rgba(120,80,50,0.1);
24 | padding-right: 10px;
25 | box-sizing: border-box;
26 |
27 | border-radius: 12px;
28 |
29 | background-color: rgba(255,255,255,0.8);
30 | }
31 |
32 | @media screen and (max-width: 875px) {
33 | div.sphinxsidebar {
34 | width: 100vw;
35 | background-color: #333;
36 | }
37 | }
38 |
39 | div.sphinxsidebar ul {
40 | list-style: square;
41 | margin-left: 10px;
42 | }
43 | div.sphinxsidebar ul > li {
44 | margin-bottom: 0.2em;
45 | }
46 | div.sphinxsidebar ul ul {
47 | list-style: url('');
48 | }
49 |
50 |
51 | /* BODY */
52 |
53 | div.body {
54 | padding-left: 25px;
55 | padding-right: 0px;
56 | padding-top: 1em; /*0px;*/
57 | padding-bottom: 0px;
58 | }
59 |
60 | div.body dl {
61 | margin-bottom: 1em;
62 | }
63 |
64 | div.body li {
65 | margin-bottom: 0.2em;
66 | }
67 | div.body li > ul {
68 | margin-top: 0.2em;
69 | margin-bottom: 0.2em;
70 | }
71 |
72 | div.body section > dl,
73 | div.body .section > dl {
74 | background: rgba(120,80,50,0.05);
75 | padding: 1.5rem 2rem;
76 | border-radius: 1rem;
77 | margin-bottom: 2rem;
78 | box-shadow: 3px 3px 6px rgba(0,0,0,0.1);
79 | }
80 | div.body section > dl > dt,
81 | div.body .section > dl > dt {
82 | margin-bottom: 1.5em;
83 | }
84 | div.body section > dl > dd,
85 | div.body .section > dl > dd {
86 | margin-left: 0px;
87 | }
88 |
89 | div.body li > blockquote {
90 | margin-left: 0px;
91 | }
92 |
93 |
94 |
95 | .sig {
96 | /*font-family: 'Roboto', sans-serif;*/
97 | font-family: inherit;
98 | font-weight: 350;
99 | color: rgb(100,100,100);
100 | letter-spacing: 0.4px;
101 | }
102 | .sig pre {
103 | font-family: inherit;
104 | }
105 | .sig > em.property:first-child {
106 | font-style: normal;
107 | font-variant: small-caps;
108 | display: block;
109 | }
110 | .sig-prename {
111 | font-size: 0.9em;
112 | }
113 | .sig-name {
114 | color: #000;
115 | font-weight: 550;
116 | font-size: inherit;
117 | }
118 | .sig-param {
119 | color: #000;
120 | font-size: 0.95em;
121 | }
122 |
123 |
124 | pre, tt, code {
125 | letter-spacing: -.2pt; /* condense mono/code font a bit */
126 | }
127 |
128 | tt, code {
129 | background-color: #b7a48629;
130 | }
131 |
132 | code {
133 | color: #000;
134 | font-weight: 550;
135 | padding: 2px 3px;
136 | border-radius: 4px;
137 | }
138 | code.xref.py {
139 | font-family: inherit;
140 | letter-spacing: 0.2pt;
141 | padding: 0px 0px;
142 | border-radius: 0px;
143 | }
144 | code.xref, a code {
145 | font-weight: 550;
146 | }
147 |
148 | /* link colors */
149 | a, a code.xref {
150 | color: rgb(117, 63, 10);
151 | }
152 |
153 |
154 | /* when we have a local TOC, don't make headers into prominent links */
155 | a.toc-backref {
156 | color: inherit;
157 | text-decoration: inherit;
158 | }
159 |
160 |
161 | .versionmodified {
162 | color: rgb(125, 42, 109);
163 | }
164 |
165 |
166 |
167 | /* --- */
168 |
169 | nav.contents, aside.topic, div.topic {
170 | border: 0px none;
171 | background-color: rgba(120,80,50,0.05);
172 | border-radius: 10px;
173 | padding: 20px;
174 | box-shadow: 3px 3px 6px rgba(0,0,0,0.1);
175 | }
176 |
177 |
178 | div.admonition {
179 | border-radius: 15px;
180 | }
181 |
--------------------------------------------------------------------------------
/doc/changes.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Changes
3 | ============
4 |
5 |
6 | pylatexenc 3.0alpha
7 | ===================
8 |
9 | (Still in development.)
10 |
11 | .. toctree::
12 | :maxdepth: 1
13 |
14 | new-in-pylatexenc-3
15 |
16 | - see in particular the :ref:`list of changes that might affect existing code
17 | ` if you're using some more
18 | advanced features of `pylatexenc`.
19 |
20 |
21 |
22 |
23 | pylatexenc 2.10
24 | ===============
25 |
26 | - Added :py:class:`pylatexenc.latexencode.PartialLatexToLatexEncoder` which can
27 | help to avoid double-escaping some existing LaTeX expressions in the string
28 | that we want to LaTeX-encode.
29 |
30 | - The :py:class:`pylatexenc.latexencode.UnicodeToTextConversionRule` now has a
31 | :py:attr:`~pylatexenc.latexencode.UnicodeToTextConversionRule.replacement_text_protection`
32 | field which can be used to override the default `replacement_text_protection`
33 | set on a :py:class:`pylatexenc.latexencode.UnicodeToTextEncoder` object. Also
34 | the `replacement_text_protection` fields accept an arbitrary callable object.
35 |
36 | - added some known macro definitions for :py:mod:`~pylatexenc.latexwalker` and
37 | :py:mod:`~pylatexenc.latex2text`, such as ``\definecolor`` and ``\textcolor``.
38 |
39 | - Bug fixes (including :issue:`57`)
40 |
41 | pylatexenc 2.9
42 | ==============
43 |
44 | - Bug fixes (including issues :issue:`49`, :issue:`51`, :pr:`52`, :pr:`53`,
45 | :pr:`54`)
46 |
47 | pylatexenc 2.8
48 | ==============
49 |
50 | - `latex2text` module: Basic support for array and matrix environments.
51 | Matrices are represented inline, in the form ``[ a b; c d ]``.
52 |
53 | - `latexencode` bugfix (issue :issue:`44`)
54 |
55 | pylatexenc 2.7
56 | ==============
57 |
58 | - Bug fix: the parser now disambiguates ``$$`` as either a display math
59 | delimiter or two inline math delimiters as in ``$a$$b$`` (issue :issue:`43`)
60 |
61 | pylatexenc 2.6
62 | ==============
63 |
64 | - In `latex2text`:
65 |
66 | + Bug fix: default behavior of the `strict_latex_spaces` option in the
67 | :py:class:`pylatexenc.latex2text.LatexNodes2Text()` constructor
68 |
69 | + fix ``\le``, ``\ge``, ``\leqslant``, ``\geqslant`` (issue :issue:`41`)
70 |
71 | + reorganized the default latex symbol categories
72 |
73 |
74 | pylatexenc 2.5
75 | ==============
76 |
77 | - `latex2text`: Add support for ``\mathbb{}``, ``\mathbf{}`` and some friends
78 | (issue :issue:`40`)
79 |
80 | pylatexenc 2.4
81 | ==============
82 |
83 | - Bug fixes in how `latex2text` attempts to recover from parse errors in
84 | tolerant mode
85 |
86 | pylatexenc 2.3
87 | ==============
88 |
89 | - Minor bug fixes in `latex2text`
90 |
91 |
92 | pylatexenc 2.2
93 | ==============
94 |
95 | Version 2.2 brings a few minor bug fixes and improvements over version 2.1:
96 |
97 | - `pylatexenc.latex2text` supports more LaTeX symbols
98 |
99 | - `latex2text` and `latexwalker` command-line utilities accept a new `-c` option
100 | where you can directly specify LaTeX code
101 |
102 | - minor bug fixes
103 |
104 |
105 | pylatexenc 2.1
106 | ==============
107 |
108 | Version 2.1 brings a few minor bug fixes to version 2.0.
109 |
110 |
111 | pylatexenc 2.0
112 | ==============
113 |
114 | .. toctree::
115 | :maxdepth: 1
116 |
117 | new-in-pylatexenc-2
118 |
119 | - see in particular the :ref:`list of changes that might affect existing code
120 | ` if you're using some advanced features of
121 | `pylatexenc`.
122 |
123 |
124 | pylatexenc 1.x
125 | ==============
126 |
127 | See description of updates and changes on the `github releases page
128 | `_.
129 |
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # pylatexenc documentation build configuration file, created by
5 | # sphinx-quickstart on Mon Apr 24 16:32:21 2017.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | import os.path
21 | import sys
22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
23 |
24 | import pylatexenc.version
25 |
26 | # -- General configuration ------------------------------------------------
27 |
28 | # If your documentation needs a minimal Sphinx version, state it here.
29 | #
30 | # needs_sphinx = '1.0'
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | 'sphinx.ext.autodoc',
37 | 'sphinx.ext.intersphinx',
38 | 'sphinx.ext.mathjax',
39 |
40 | 'sphinx_issues',
41 | ]
42 |
43 | # Add any paths that contain templates here, relative to this directory.
44 | templates_path = ['_templates']
45 |
46 | # The suffix(es) of source filenames.
47 | # You can specify multiple suffix as a list of string:
48 | #
49 | # source_suffix = ['.rst', '.md']
50 | source_suffix = '.rst'
51 |
52 | # The master toctree document.
53 | master_doc = 'index'
54 |
55 | # General information about the project.
56 | project = 'pylatexenc'
57 | copyright = '2023, Philippe Faist'
58 | author = 'Philippe Faist'
59 |
60 | # The version info for the project you're documenting, acts as replacement for
61 | # |version| and |release|, also used in various other places throughout the
62 | # built documents.
63 | #
64 | # The short X.Y version.
65 | version = pylatexenc.version.version_str
66 | # The full version, including alpha/beta/rc tags.
67 | release = version
68 |
69 | # The language for content autogenerated by Sphinx. Refer to documentation
70 | # for a list of supported languages.
71 | #
72 | # This is also used if you do content translation via gettext catalogs.
73 | # Usually you set "language" from the command line for these cases.
74 | #language = None
75 |
76 | # List of patterns, relative to source directory, that match files and
77 | # directories to ignore when looking for source files.
78 | # This patterns also effect to html_static_path and html_extra_path
79 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
80 |
81 | # The name of the Pygments (syntax highlighting) style to use.
82 | pygments_style = 'sphinx'
83 |
84 | # If true, `todo` and `todoList` produce output, else they produce nothing.
85 | todo_include_todos = False
86 |
87 |
88 |
89 | #autodoc_docstring_signature = True
90 | autodoc_member_order = 'bysource'
91 | autodoc_inherit_docstrings = False
92 |
93 | # autodoc_default_options = {
94 | # 'members': True, # 'var1, var2',
95 | # #'member-order': 'bysource',
96 | # 'special-members': '__init__',
97 | # #'undoc-members': True,
98 | # #'exclude-members': '__weakref__'
99 | # }
100 |
101 |
102 | # -- Options for sphinx_issues --------------------------------------------
103 |
104 | # GitHub repo
105 | issues_github_path = "phfaist/pylatexenc"
106 |
107 |
108 | # -- Options for HTML output ----------------------------------------------
109 |
110 | # The theme to use for HTML and HTML Help pages. See the documentation for
111 | # a list of builtin themes.
112 | #
113 |
114 | html_theme = 'alabaster'
115 |
116 |
117 | # import guzzle_sphinx_theme
118 | # html_theme_path = guzzle_sphinx_theme.html_theme_path()
119 | # html_theme = 'guzzle_sphinx_theme'
120 | # html_style = 'custom.css' /* don't forget to add ''' @import url("guzzle.css"); ''' */
121 |
122 | #import sphinx_bootstrap_theme
123 | #html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
124 | #html_theme = 'bootstrap'
125 |
126 |
127 |
128 | # Theme options are theme-specific and customize the look and feel of a theme
129 | # further. For a list of options available for each theme, see the
130 | # documentation.
131 | #
132 | html_theme_options = {
133 | 'font_family': 'Fira Sans',
134 | 'font_size': '15px',
135 | 'head_font_family': 'Fira Sans',
136 | 'code_font_family': 'Fira Mono',
137 | 'github_user': 'phfaist',
138 | 'github_repo': 'pylatexenc',
139 | 'github_button': True,
140 | 'github_type': 'star',
141 | 'github_count': 'true',
142 |
143 | 'fixed_sidebar': True,
144 | 'page_width': '950px',
145 | 'sidebar_width': '220px',
146 | }
147 | html_sidebars = {
148 | '**': [
149 | 'about.html',
150 | 'navigation.html',
151 | 'relations.html',
152 | 'searchbox.html',
153 | # 'donate.html',
154 | ]
155 | }
156 |
157 | # Add any paths that contain custom static files (such as style sheets) here,
158 | # relative to this directory. They are copied after the builtin static files,
159 | # so a file named "default.css" will overwrite the builtin "default.css".
160 | html_static_path = [ '_static' ]
161 |
162 |
163 | # -- Options for HTMLHelp output ------------------------------------------
164 |
165 | # Output file base name for HTML help builder.
166 | htmlhelp_basename = 'pylatexencdoc'
167 |
168 |
169 | # -- Options for LaTeX output ---------------------------------------------
170 |
171 | latex_elements = {
172 | # The paper size ('letterpaper' or 'a4paper').
173 | #
174 | # 'papersize': 'letterpaper',
175 |
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #
178 | # 'pointsize': '10pt',
179 |
180 | # Additional stuff for the LaTeX preamble.
181 | #
182 | # 'preamble': '',
183 |
184 | # Latex figure (float) alignment
185 | #
186 | # 'figure_align': 'htbp',
187 | }
188 |
189 | # Grouping the document tree into LaTeX files. List of tuples
190 | # (source start file, target name, title,
191 | # author, documentclass [howto, manual, or own class]).
192 | latex_documents = [
193 | (master_doc, 'pylatexenc.tex', 'pylatexenc Documentation',
194 | 'Philippe Faist', 'manual'),
195 | ]
196 |
197 |
198 | # -- Options for manual page output ---------------------------------------
199 |
200 | # One entry per manual page. List of tuples
201 | # (source start file, name, description, authors, manual section).
202 | man_pages = [
203 | (master_doc, 'pylatexenc', 'pylatexenc Documentation',
204 | [author], 1)
205 | ]
206 |
207 |
208 | # -- Options for Texinfo output -------------------------------------------
209 |
210 | # Grouping the document tree into Texinfo files. List of tuples
211 | # (source start file, target name, title, author,
212 | # dir menu entry, description, category)
213 | texinfo_documents = [
214 | (master_doc, 'pylatexenc', 'pylatexenc Documentation',
215 | author, 'pylatexenc', 'One line description of project.',
216 | 'Miscellaneous'),
217 | ]
218 |
219 |
220 |
221 |
222 | # Example configuration for intersphinx: refer to the Python standard library.
223 | intersphinx_mapping = {
224 | 'python': ('https://docs.python.org/3', None)
225 | }
226 |
--------------------------------------------------------------------------------
/doc/example_latex2text_custom_quotes.py:
--------------------------------------------------------------------------------
1 | from pylatexenc import latexwalker, latex2text, macrospec
2 |
3 | #
4 | # Define macros, environments, specials for the *parser*
5 | #
6 | lw_context_db = latexwalker.get_default_latex_context_db()
7 | lw_context_db.add_context_category(
8 | 'my-quotes',
9 | prepend=True,
10 | macros=[
11 | macrospec.MacroSpec("putindblquotes", "{"),
12 | macrospec.MacroSpec("putinquotes", "[[{"),
13 | ],
14 | environments=[
15 | macrospec.EnvironmentSpec("indblquotes", ""),
16 | macrospec.EnvironmentSpec("inquotes", "[["),
17 | ],
18 | specials=[
19 | macrospec.SpecialsSpec("`"),
20 | macrospec.SpecialsSpec("'"),
21 | macrospec.SpecialsSpec("``"),
22 | macrospec.SpecialsSpec("''"),
23 | ],
24 | )
25 |
26 | #
27 | # Implement macros, environments, specials for the *conversion to text*
28 | #
29 |
30 | def _get_optional_arg(node, default, l2tobj):
31 | """Helper that returns the `node` converted to text, or `default`
32 | if the node is `None` (e.g. an optional argument that was not
33 | specified)"""
34 | if node is None:
35 | return default
36 | return l2tobj.nodelist_to_text([node])
37 |
38 | def put_in_quotes_macro_repl(n, l2tobj):
39 | """Get the text replacement for the macro
40 | \putinquotes[open-quote][close-quote]{text}"""
41 | if not n.nodeargd:
42 | # n.nodeargd can be empty if e.g. \putinquotes was a single
43 | # token passed as an argument to a macro,
44 | # e.g. \newcommand\putinquotes...
45 | return ''
46 | open_q_s = _get_optional_arg(n.nodeargd.argnlist[0], '“', l2tobj)
47 | close_q_s = _get_optional_arg(n.nodeargd.argnlist[1], '”', l2tobj)
48 | return (open_q_s + l2tobj.nodelist_to_text([n.nodeargd.argnlist[2]])
49 | + close_q_s)
50 |
51 | def in_quotes_env_repl(n, l2tobj):
52 | """Get the text replacement for the {inquotes} environment"""
53 | open_q_s = _get_optional_arg(n.nodeargd.argnlist[0], '“', l2tobj)
54 | close_q_s = _get_optional_arg(n.nodeargd.argnlist[1], '”', l2tobj)
55 | return open_q_s + l2tobj.nodelist_to_text(n.nodelist) + close_q_s
56 |
57 | l2t_context_db = latex2text.get_default_latex_context_db()
58 | l2t_context_db.add_context_category(
59 | 'my-quotes',
60 | prepend=True,
61 | macros=[
62 | latex2text.MacroTextSpec("putindblquotes",
63 | simplify_repl=r'“%(1)s”'),
64 | latex2text.MacroTextSpec("putinquotes",
65 | simplify_repl=put_in_quotes_macro_repl),
66 | ],
67 | environments=[
68 | latex2text.EnvironmentTextSpec("indblquotes",
69 | simplify_repl=r'“%(body)s”'),
70 | latex2text.EnvironmentTextSpec("inquotes",
71 | simplify_repl=in_quotes_env_repl),
72 | ],
73 | specials=[
74 | latex2text.SpecialsTextSpec('`', "‘"),
75 | latex2text.SpecialsTextSpec("'", "’"),
76 | latex2text.SpecialsTextSpec('``', "“"),
77 | latex2text.SpecialsTextSpec("''", "”"),
78 | ],
79 | )
80 |
81 |
82 | #
83 | # Here is an example usage:
84 | #
85 |
86 | def custom_latex_to_text( input_latex ):
87 | # the latex parser instance with custom latex_context
88 | lw_obj = latexwalker.LatexWalker(input_latex,
89 | latex_context=lw_context_db)
90 | # parse to node list
91 | nodelist, pos, length = lw_obj.get_latex_nodes()
92 | # initialize the converter to text with custom latex_context
93 | l2t_obj = latex2text.LatexNodes2Text(latex_context=l2t_context_db)
94 | # convert to text
95 | return l2t_obj.nodelist_to_text( nodelist )
96 |
97 |
98 | print(custom_latex_to_text(
99 | r"""\begin{inquotes}[`][']Hello, world\end{inquotes}"""))
100 | # ‘Hello, world’
101 |
102 | print(custom_latex_to_text(r"""\putinquotes[``]['']{Hello, world}"""))
103 | # “Hello, world”
104 |
105 | print(custom_latex_to_text(r"""\putinquotes{Hello, world}"""))
106 | # “Hello, world”
107 |
108 | print(custom_latex_to_text(r"""\putinquotes[`][']{Hello, world}"""))
109 | # ‘Hello, world’
110 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. pylatexenc documentation master file, created by
2 | sphinx-quickstart on Mon Apr 24 16:32:21 2017.
3 | You can adapt this file completely to your liking,
4 | but it should at least contain the root `toctree` directive.
5 |
6 | Welcome to pylatexenc's documentation!
7 | ======================================
8 |
9 | [pylatexenc version: |version|]
10 |
11 | A simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion.
12 |
13 | Quick example::
14 |
15 | >>> from pylatexenc.latex2text import LatexNodes2Text
16 | >>> latex = r"""\textbf{Hi there!} Here is \emph{an equation}:
17 | ... \begin{equation}
18 | ... \zeta = x + i y
19 | ... \end{equation}
20 | ... where $i$ is the imaginary unit.
21 | ... """
22 | >>> print(LatexNodes2Text().latex_to_text(latex))
23 | Hi there! Here is an equation:
24 |
25 | ζ = x + i y
26 |
27 | where i is the imaginary unit.
28 |
29 | And the other way around::
30 |
31 | >>> from pylatexenc.latexencode import unicode_to_latex
32 | >>> text = "À votre santé!"
33 | >>> print(unicode_to_latex(text))
34 | \`A votre sant\'e!
35 |
36 |
37 | You can also use these utilities directly in command line, e.g.::
38 |
39 | $ echo 'À votre santé!' | latexencode
40 | \`A votre sant\'e!
41 |
42 |
43 | Documentation
44 | =============
45 |
46 |
47 | .. toctree::
48 | :maxdepth: 2
49 | :caption: Contents:
50 |
51 | latexnodes
52 | macrospec
53 | latexwalker
54 | latex2text
55 | latexencode
56 | changes
57 |
58 |
59 | Indices and tables
60 | ==================
61 |
62 | * :ref:`genindex`
63 | * :ref:`modindex`
64 | * :ref:`search`
65 |
--------------------------------------------------------------------------------
/doc/latex2text.rst:
--------------------------------------------------------------------------------
1 | `latex2text` — Simple Latex to Text Converter
2 | ---------------------------------------------
3 |
4 | .. automodule:: pylatexenc.latex2text
5 | :no-undoc-members:
6 |
7 | .. contents:: Contents:
8 | :local:
9 |
10 |
11 | Custom latex conversion rules: A simple template
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 |
14 | Here is a short introduction on how to customize the way that
15 | :py:class:`~pylatexenc.latex2text.LatexNodes2Text` converts LaTeX constructs
16 | (macros, environments, and specials) to unicode text. You can start off with
17 | the example template below and adapt it to your needs.
18 |
19 | Macros, environments and specials are parsed as corresponding node objects by
20 | the parser (see :py:class:`pylatexenc.latexwalker.LatexMacroNode`,
21 | :py:class:`pylatexenc.latexwalker.LatexEnvironmentNode`, and
22 | :py:class:`pylatexenc.latexwalker.LatexSpecialsNode`). These node objects are
23 | then converted to unicode text by the
24 | :py:class:`~pylatexenc.latex2text.LatexNodes2Text` object.
25 |
26 | You can define new macros, environments, or specials, or override existing
27 | definitions. The definitions need to be provided twice. First, at the level of
28 | the parser using the :py:mod:`~pylatexenc.macrospec` module; the parser needs to
29 | know the argument structure of your macros, environments, and specials, along
30 | with which characters to recognize as "specials". Second, at the level of
31 | `latex2text`, you need to specify what the replacement strings are for the
32 | different LaTeX constructs after they have been parsed into the latex node tree
33 | by the parser.
34 |
35 | The following template is a simple illustrative example that implements the
36 | following definitions:
37 |
38 | - A new macro ``\putinquotes[`][']{text}`` that puts its mandatory argument
39 | into quotes defined by the two optional arguments. Let's say that the
40 | default quotes that are used are `````` and ``''``. Another simpler macro
41 | ``\putindblquotes{text}`` is also provided for the sake of the example.
42 |
43 | - A new environment ``\begin{inquotes}[`]['] ... \end{inquotes}`` that does
44 | the same thing as its macro equivalent. Another simpler environment
45 | ``\begin{indblquotes}...\end{indblquotes}`` is also provided for the sake of
46 | the example.
47 |
48 | - The usual LaTeX quote symbols `````, ``````, ``'``, and ``''`` for unicode
49 | quotes. (See also issue :issue:`39`)
50 |
51 | Here is the code (see also docs for :py:class:`pylatexenc.macrospec.MacroSpec`,
52 | :py:class:`pylatexenc.macrospec.EnvironmentSpec`,
53 | :py:class:`pylatexenc.macrospec.SpecialsSpec`, as well as
54 | :py:class:`pylatexenc.latex2text.MacroTextSpec`,
55 | :py:class:`pylatexenc.latex2text.EnvironmentTextSpec`,
56 | :py:class:`pylatexenc.latex2text.SpecialsTextSpec`):
57 |
58 | .. literalinclude:: example_latex2text_custom_quotes.py
59 | :language: python
60 |
61 |
62 | Latex to Text Converter Class
63 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
64 |
65 | .. autoclass:: pylatexenc.latex2text.LatexNodes2Text
66 | :members:
67 |
68 |
69 | .. autofunction:: pylatexenc.latex2text.get_default_latex_context_db
70 |
71 |
72 |
73 | Define replacement texts
74 | ~~~~~~~~~~~~~~~~~~~~~~~~
75 |
76 | .. autoclass:: pylatexenc.latex2text.MacroTextSpec
77 | :members:
78 |
79 | .. autoclass:: pylatexenc.latex2text.EnvironmentTextSpec
80 | :members:
81 |
82 | .. autoclass:: pylatexenc.latex2text.SpecialsTextSpec
83 | :members:
84 |
85 |
86 |
87 | Obsolete members
88 | ~~~~~~~~~~~~~~~~
89 |
90 | .. autofunction:: pylatexenc.latex2text.EnvDef
91 |
92 | .. autofunction:: pylatexenc.latex2text.MacroDef
93 |
94 |
95 | .. autodata:: pylatexenc.latex2text.default_env_dict
96 | :annotation:
97 |
98 | .. autodata:: pylatexenc.latex2text.default_macro_dict
99 | :annotation:
100 |
101 | .. autodata:: pylatexenc.latex2text.default_text_replacements
102 | :annotation:
103 |
--------------------------------------------------------------------------------
/doc/latexencode.rst:
--------------------------------------------------------------------------------
1 | `latexencode` — Encode Unicode to LaTeX
2 | ---------------------------------------
3 |
4 | .. automodule:: pylatexenc.latexencode
5 | :no-undoc-members:
6 |
7 | .. contents:: Contents:
8 | :local:
9 |
10 |
11 | Unicode to Latex Conversion Class and Helper Function
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 |
14 | .. autoclass:: pylatexenc.latexencode.UnicodeToLatexEncoder
15 | :members:
16 |
17 | .. autofunction:: pylatexenc.latexencode.unicode_to_latex
18 |
19 |
20 | Specifying conversion rules
21 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
22 |
23 | .. autodata:: pylatexenc.latexencode.RULE_DICT
24 |
25 | .. autodata:: pylatexenc.latexencode.RULE_REGEX
26 |
27 | .. autodata:: pylatexenc.latexencode.RULE_CALLABLE
28 |
29 |
30 |
31 | .. autoclass:: pylatexenc.latexencode.UnicodeToLatexConversionRule
32 | :members:
33 |
34 |
35 |
36 | .. autofunction:: pylatexenc.latexencode.get_builtin_conversion_rules
37 |
38 | .. autofunction:: pylatexenc.latexencode.get_builtin_uni2latex_dict
39 |
40 |
41 | Compatibility with `pylatexenc 1.x`
42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 |
44 | .. autofunction:: pylatexenc.latexencode.utf8tolatex
45 |
46 | .. autodata:: pylatexenc.latexencode.utf82latex
47 |
--------------------------------------------------------------------------------
/doc/latexnodes.nodes.rst:
--------------------------------------------------------------------------------
1 | `latexnodes.nodes` — LaTeX Nodes Classes
2 | ========================================
3 |
4 | .. automodule:: pylatexenc.latexnodes.nodes
5 | :no-undoc-members:
6 | :show-inheritance:
7 |
8 | .. contents:: Contents:
9 | :local:
10 |
11 |
12 |
13 | Nodes, Node Lists, and Visitors
14 | -------------------------------
15 |
16 |
17 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexNode
18 | :members:
19 |
20 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexNodeList
21 | :members:
22 |
23 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexNodesVisitor
24 | :members:
25 |
26 |
27 | LaTeX Node Types
28 | ----------------
29 |
30 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexCharsNode
31 | :members:
32 | :show-inheritance:
33 |
34 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexGroupNode
35 | :members:
36 | :show-inheritance:
37 |
38 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexCommentNode
39 | :members:
40 | :show-inheritance:
41 |
42 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexMacroNode
43 | :members:
44 | :show-inheritance:
45 |
46 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexEnvironmentNode
47 | :members:
48 | :show-inheritance:
49 |
50 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexSpecialsNode
51 | :members:
52 | :show-inheritance:
53 |
54 | .. autoclass:: pylatexenc.latexnodes.nodes.LatexMathNode
55 | :members:
56 | :show-inheritance:
57 |
58 |
59 |
--------------------------------------------------------------------------------
/doc/latexnodes.parsers.rst:
--------------------------------------------------------------------------------
1 | `latexnodes.parsers` — Latex Construct Parsers
2 | ==============================================
3 |
4 | .. automodule:: pylatexenc.latexnodes.parsers
5 | :members:
6 | :no-undoc-members:
7 | :show-inheritance:
8 |
9 | .. contents:: Contents:
10 | :local:
11 |
12 |
13 | Parser base class
14 | ~~~~~~~~~~~~~~~~~
15 |
16 | .. autoclass:: LatexParserBase
17 | :members:
18 |
19 |
20 | General nodes
21 | ~~~~~~~~~~~~~
22 |
23 | .. autoclass:: LatexGeneralNodesParser
24 | :members:
25 | :show-inheritance:
26 |
27 | .. autoclass:: LatexSingleNodeParser
28 | :members:
29 | :show-inheritance:
30 |
31 |
32 | Delimited expressions
33 | ~~~~~~~~~~~~~~~~~~~~~
34 |
35 | .. autoclass:: LatexDelimitedExpressionParserInfo
36 | :members:
37 |
38 | .. autoclass:: LatexDelimitedExpressionParser
39 | :members:
40 | :show-inheritance:
41 |
42 | .. autoclass:: LatexDelimitedGroupParserInfo
43 | :members:
44 | :show-inheritance:
45 |
46 | .. autoclass:: LatexDelimitedGroupParser
47 | :members:
48 | :show-inheritance:
49 |
50 | .. autoclass:: LatexDelimitedMultiDelimGroupParserInfo
51 | :members:
52 | :show-inheritance:
53 |
54 | .. autoclass:: LatexDelimitedMultiDelimGroupParser
55 | :members:
56 | :show-inheritance:
57 |
58 | .. autoclass:: LatexDelimitedExpressionParserOpeningDelimiterNotFound
59 | :members:
60 | :show-inheritance:
61 |
62 | .. autoclass:: LatexMathParser
63 | :members:
64 | :show-inheritance:
65 |
66 |
67 |
68 | Single expression parser
69 | ~~~~~~~~~~~~~~~~~~~~~~~~
70 |
71 | .. autoclass:: LatexExpressionParser
72 | :members:
73 | :show-inheritance:
74 |
75 |
76 | Optional expression parser
77 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
78 |
79 | .. autoclass:: LatexOptionalSquareBracketsParser
80 | :members:
81 | :show-inheritance:
82 |
83 | .. autoclass:: LatexOptionalCharsMarkerParser
84 | :members:
85 | :show-inheritance:
86 |
87 | .. autoclass:: LatexOptionalEmbellishmentArgsParser
88 | :members:
89 | :show-inheritance:
90 |
91 |
92 | Verbatim/literal expressions
93 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
94 |
95 | .. autoclass:: LatexVerbatimBaseParser
96 | :members:
97 | :show-inheritance:
98 |
99 | .. autoclass:: LatexDelimitedVerbatimParser
100 | :members:
101 | :show-inheritance:
102 |
103 | .. autoclass:: LatexVerbatimEnvironmentContentsParser
104 | :members:
105 | :show-inheritance:
106 |
107 |
108 |
109 | Typical macro arguments
110 | ~~~~~~~~~~~~~~~~~~~~~~~
111 |
112 | .. autofunction:: get_standard_argument_parser
113 |
114 | .. autoclass:: LatexStandardArgumentParser
115 | :members:
116 | :show-inheritance:
117 |
118 | .. autoclass:: LatexCharsCommaSeparatedListParser
119 | :members:
120 | :show-inheritance:
121 |
122 | .. autoclass:: LatexCharsGroupParser
123 | :members:
124 | :show-inheritance:
125 |
126 | .. autoclass:: LatexTackOnInformationFieldMacrosParser
127 | :members:
128 | :show-inheritance:
129 |
130 |
--------------------------------------------------------------------------------
/doc/latexnodes.rst:
--------------------------------------------------------------------------------
1 | `latexnodes` — LaTeX Nodes Tree and Parsers
2 | ===========================================
3 |
4 | .. automodule:: pylatexenc.latexnodes
5 | :no-undoc-members:
6 | :show-inheritance:
7 |
8 | .. contents:: Contents:
9 | :local:
10 |
11 |
12 |
13 | Parsing State
14 | -------------
15 |
16 | .. autoclass:: ParsingState
17 | :members:
18 |
19 | .. autoclass:: ParsingStateDelta
20 | :members:
21 |
22 | .. autoclass:: ParsingStateDeltaReplaceParsingState
23 | :members:
24 |
25 | .. autoclass:: ParsingStateDeltaChained
26 | :members:
27 |
28 | .. autoclass:: ParsingStateDeltaWalkerEvent
29 | :members:
30 |
31 | .. autoclass:: ParsingStateDeltaEnterMathMode
32 | :members:
33 |
34 | .. autoclass:: ParsingStateDeltaLeaveMathMode
35 | :members:
36 |
37 |
38 |
39 | Latex Token
40 | -----------
41 |
42 | .. autoclass:: LatexToken
43 | :members:
44 |
45 |
46 | Token Readers
47 | -------------
48 |
49 | .. autoclass:: LatexTokenReaderBase
50 | :members:
51 |
52 | .. autoclass:: LatexTokenReader
53 | :members:
54 |
55 | .. autoclass:: LatexTokenListTokenReader
56 | :members:
57 |
58 |
59 | Arguments and Parsed Arguments
60 | ------------------------------
61 |
62 | .. autoclass:: LatexArgumentSpec
63 | :members:
64 |
65 | .. autoclass:: ParsedArguments
66 | :members:
67 |
68 | .. autoclass:: ParsedArgumentsInfo
69 | :members:
70 |
71 | .. autoclass:: SingleParsedArgumentInfo
72 | :members:
73 |
74 |
75 | Nodes Collector
76 | ---------------
77 |
78 | .. autoclass:: LatexNodesCollector
79 | :members:
80 |
81 |
82 | Exception classes
83 | -----------------
84 |
85 | .. autoclass:: LatexWalkerError
86 | :members:
87 |
88 | .. autoclass:: LatexWalkerLocatedError
89 | :members:
90 |
91 | .. autoclass:: LatexWalkerLocatedErrorFormatter
92 | :members:
93 |
94 | .. autoclass:: LatexWalkerParseError
95 | :members:
96 |
97 | .. autoclass:: LatexWalkerNodesParseError
98 | :members:
99 |
100 | .. autoclass:: LatexWalkerTokenParseError
101 | :members:
102 |
103 | .. autoclass:: LatexWalkerEndOfStream
104 | :members:
105 |
106 |
107 | Base classes
108 | ------------
109 |
110 | .. autoclass:: CallableSpecBase
111 | :members:
112 |
113 | .. autoclass:: LatexWalkerParsingStateEventHandler
114 | :members:
115 |
116 | .. autoclass:: LatexWalkerBase
117 | :members:
118 |
119 | .. autoclass:: LatexContextDbBase
120 | :members:
121 |
122 |
123 |
124 | Node Classes
125 | ------------
126 |
127 | .. toctree::
128 | :maxdepth: 2
129 |
130 | latexnodes.nodes
131 |
132 |
133 | Parser Classes
134 | --------------
135 |
136 | .. toctree::
137 | :maxdepth: 2
138 |
139 | latexnodes.parsers
140 |
--------------------------------------------------------------------------------
/doc/latexwalker.rst:
--------------------------------------------------------------------------------
1 | `latexwalker` — Calling Parsers for LaTeX Code
2 | ----------------------------------------------
3 |
4 | .. automodule:: pylatexenc.latexwalker
5 | :no-undoc-members:
6 | :show-inheritance:
7 |
8 | .. contents:: Contents:
9 | :local:
10 |
11 |
12 | The main `LatexWalker` class
13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 |
15 | .. autoclass:: pylatexenc.latexwalker.LatexWalker
16 | :members:
17 |
18 |
19 | .. autofunction:: pylatexenc.latexwalker.get_default_latex_context_db
20 |
21 |
22 | Exception Classes
23 | ~~~~~~~~~~~~~~~~~
24 |
25 | .. py:class:: pylatexenc.latexwalker.LatexWalkerError
26 |
27 | Moved to :py:class:`pylatexenc.latexnodes.LatexWalkerError`.
28 |
29 | .. deprecated:: 3.0
30 |
31 | Since Pylatexenc 3.0, this class now resides in the new module
32 | :py:mod:`pylatexenc.latexnodes` as
33 | :py:class:`pylatexenc.latexnodes.LatexWalkerError`. It is aliased in
34 | `pylatexenc.latexwalker` for backwards compatibility.
35 |
36 | .. py:class:: pylatexenc.latexwalker.LatexWalkerParseError
37 |
38 | Moved to :py:class:`pylatexenc.latexnodes.LatexWalkerParseError`.
39 |
40 | .. deprecated:: 3.0
41 |
42 | Since Pylatexenc 3.0, this class now resides in the new module
43 | :py:mod:`pylatexenc.latexnodes` as
44 | :py:class:`pylatexenc.latexnodes.LatexWalkerParseError`. It is aliased in
45 | `pylatexenc.latexwalker` for backwards compatibility.
46 |
47 | .. py:class:: pylatexenc.latexwalker.LatexWalkerEndOfStream
48 |
49 | Moved to :py:class:`pylatexenc.latexnodes.LatexWalkerEndOfStream`.
50 |
51 | .. deprecated:: 3.0
52 |
53 | Since Pylatexenc 3.0, this class now resides in the new module
54 | :py:mod:`pylatexenc.latexnodes` as
55 | :py:class:`pylatexenc.latexnodes.LatexWalkerEndOfStream`. It is aliased
56 | in `pylatexenc.latexwalker` for backwards compatibility.
57 |
58 |
59 | Data Node Classes
60 | ~~~~~~~~~~~~~~~~~
61 |
62 | .. py:class:: pylatexenc.latexwalker.LatexNode
63 |
64 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexNode`.
65 |
66 | .. deprecated:: 3.0
67 |
68 | Since Pylatexenc 3.0, this class now resides in the new module
69 | :py:mod:`pylatexenc.latexnodes.nodes` as
70 | :py:class:`pylatexenc.latexnodes.nodes.LatexNode`. It is aliased in
71 | `pylatexenc.latexwalker` for backwards compatibility.
72 |
73 | .. py:class:: pylatexenc.latexwalker.LatexCharsNode
74 |
75 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexCharsNode`.
76 |
77 | .. deprecated:: 3.0
78 |
79 | Since Pylatexenc 3.0, this class now resides in the new module
80 | :py:mod:`pylatexenc.latexnodes.nodes` as
81 | :py:class:`pylatexenc.latexnodes.nodes.LatexCharsNode`. It is aliased in
82 | `pylatexenc.latexwalker` for backwards compatibility.
83 |
84 | .. py:class:: pylatexenc.latexwalker.LatexGroupNode
85 |
86 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexGroupNode`.
87 |
88 | .. deprecated:: 3.0
89 |
90 | Since Pylatexenc 3.0, this class now resides in the new module
91 | :py:mod:`pylatexenc.latexnodes.nodes` as
92 | :py:class:`pylatexenc.latexnodes.nodes.LatexGroupNode`. It is aliased in
93 | `pylatexenc.latexwalker` for backwards compatibility.
94 |
95 | .. py:class:: pylatexenc.latexwalker.LatexCommentNode
96 |
97 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexCommentNode`.
98 |
99 | .. deprecated:: 3.0
100 |
101 | Since Pylatexenc 3.0, this class now resides in the new module
102 | :py:mod:`pylatexenc.latexnodes.nodes` as
103 | :py:class:`pylatexenc.latexnodes.nodes.LatexCommentNode`. It is aliased
104 | in `pylatexenc.latexwalker` for backwards compatibility.
105 |
106 | .. py:class:: pylatexenc.latexwalker.LatexMacroNode
107 |
108 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexMacroNode`.
109 |
110 | .. deprecated:: 3.0
111 |
112 | Since Pylatexenc 3.0, this class now resides in the new module
113 | :py:mod:`pylatexenc.latexnodes.nodes` as
114 | :py:class:`pylatexenc.latexnodes.nodes.LatexMacroNode`. It is aliased in
115 | `pylatexenc.latexwalker` for backwards compatibility.
116 |
117 | .. py:class:: pylatexenc.latexwalker.LatexEnvironmentNode
118 |
119 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexEnvironmentNode`.
120 |
121 | .. deprecated:: 3.0
122 |
123 | Since Pylatexenc 3.0, this class now resides in the new module
124 | :py:mod:`pylatexenc.latexnodes.nodes` as
125 | :py:class:`pylatexenc.latexnodes.nodes.LatexEnvironmentNode`. It is
126 | aliased in `pylatexenc.latexwalker` for backwards compatibility.
127 |
128 | .. py:class:: pylatexenc.latexwalker.LatexSpecialsNode
129 |
130 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexSpecialsNode`.
131 |
132 | .. deprecated:: 3.0
133 |
134 | Since Pylatexenc 3.0, this class now resides in the new module
135 | :py:mod:`pylatexenc.latexnodes.nodes` as
136 | :py:class:`pylatexenc.latexnodes.nodes.LatexSpecialsNode`. It is aliased
137 | in `pylatexenc.latexwalker` for backwards compatibility.
138 |
139 | .. py:class:: pylatexenc.latexwalker.LatexMathNode
140 |
141 | Moved to :py:class:`pylatexenc.latexnodes.nodes.LatexMathNode`.
142 |
143 | .. deprecated:: 3.0
144 |
145 | Since Pylatexenc 3.0, this class now resides in the new module
146 | :py:mod:`pylatexenc.latexnodes.nodes` as
147 | :py:class:`pylatexenc.latexnodes.nodes.LatexMathNode`. It is aliased in
148 | `pylatexenc.latexwalker` for backwards compatibility.
149 |
150 |
151 | Parsing helpers
152 | ~~~~~~~~~~~~~~~
153 |
154 | .. py:class:: pylatexenc.latexwalker.ParsingState
155 |
156 | .. deprecated:: 3.0
157 |
158 | Since Pylatexenc 3.0, this class now resides in the new module
159 | :py:mod:`pylatexenc.latexnodes`. It is aliased in
160 | `pylatexenc.latexwalker` for backwards compatibility.
161 |
162 | .. py:class:: pylatexenc.latexwalker.LatexToken
163 |
164 | .. deprecated:: 3.0
165 |
166 | Since Pylatexenc 3.0, this class now resides in the new module
167 | :py:mod:`pylatexenc.latexnodes`. It is aliased in
168 | `pylatexenc.latexwalker` for backwards compatibility.
169 |
170 |
171 |
172 | Legacy Macro Definitions (for `pylatexenc 1.x`)
173 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 |
175 | .. autodata:: pylatexenc.latexwalker.MacrosDef
176 |
177 | .. autodata:: pylatexenc.latexwalker.default_macro_dict
178 | :annotation:
179 |
180 |
181 |
--------------------------------------------------------------------------------
/doc/macrospec.rst:
--------------------------------------------------------------------------------
1 | `macrospec` — Specifying definitions for the parser
2 | ---------------------------------------------------
3 |
4 | .. automodule:: pylatexenc.macrospec
5 | :no-undoc-members:
6 | :show-inheritance:
7 |
8 | .. contents:: Contents:
9 | :local:
10 |
11 |
12 | Macro and environment definitions
13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 |
15 | .. autoclass:: pylatexenc.macrospec.MacroSpec
16 | :members:
17 | :inherited-members:
18 |
19 | .. autoclass:: pylatexenc.macrospec.EnvironmentSpec
20 | :members:
21 | :inherited-members:
22 |
23 | .. autoclass:: pylatexenc.macrospec.SpecialsSpec
24 | :members:
25 | :inherited-members:
26 |
27 |
28 | .. autofunction:: pylatexenc.macrospec.std_macro
29 |
30 | .. autofunction:: pylatexenc.macrospec.std_environment
31 |
32 | .. autofunction:: pylatexenc.macrospec.std_specials
33 |
34 |
35 | Latex Context "Database"
36 | ~~~~~~~~~~~~~~~~~~~~~~~~
37 |
38 | .. autoclass:: pylatexenc.macrospec.LatexContextDb
39 | :members:
40 |
41 | .. autoclass:: pylatexenc.macrospec.ParsingStateDeltaExtendLatexContextDb
42 | :show-inheritance:
43 | :members:
44 |
45 |
46 | Lower-level parsers for macro, environments, and specials
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 |
49 | You shouldn't have to use these directly.
50 |
51 | .. autoclass:: pylatexenc.macrospec.LatexNoArgumentsParser
52 | :show-inheritance:
53 | :members:
54 |
55 | .. autoclass:: pylatexenc.macrospec.LatexArgumentsParser
56 | :show-inheritance:
57 | :members:
58 |
59 | .. autoclass:: pylatexenc.macrospec.LatexEnvironmentBodyContentsParserInfo
60 | :show-inheritance:
61 | :members:
62 |
63 | .. autoclass:: pylatexenc.macrospec.LatexEnvironmentBodyContentsParser
64 | :show-inheritance:
65 | :members:
66 |
67 | .. autoclass:: pylatexenc.macrospec.LatexMacroCallParser
68 | :show-inheritance:
69 | :members:
70 |
71 | .. autoclass:: pylatexenc.macrospec.LatexEnvironmentCallParser
72 | :show-inheritance:
73 | :members:
74 |
75 | .. autoclass:: pylatexenc.macrospec.LatexSpecialsCallParser
76 | :show-inheritance:
77 | :members:
78 |
79 |
80 |
81 |
82 | Legacy (2.x) Macro arguments parsers
83 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84 |
85 | .. autoclass:: pylatexenc.macrospec.MacroStandardArgsParser
86 | :members:
87 |
88 | .. autoclass:: pylatexenc.macrospec.ParsedMacroArgs
89 | :members:
90 |
91 | .. autoclass:: pylatexenc.macrospec.VerbatimArgsParser
92 | :show-inheritance:
93 | :members:
94 |
95 | .. autoclass:: pylatexenc.macrospec.ParsedVerbatimArgs
96 | :show-inheritance:
97 | :members:
98 |
99 |
--------------------------------------------------------------------------------
/doc/new-in-pylatexenc-3.rst:
--------------------------------------------------------------------------------
1 | What's new in `pylatexenc 3`
2 | ============================
3 |
4 | Wow, a *lot* of stuff has changed in the `latexwalker` and `macrospec` modules.
5 | There's even a new `latexnodes` module. I don't know where to start!
6 |
7 | The good news is, if you're simply using the latex-to-unicode and
8 | unicode-to-latex conversion tools, your code depending on `pylatexenc 2` should
9 | run without any chagnes. You might get some deprecation warnings which you can
10 | silence using python's warnings filter management (e.g., ``python -W
11 | 'ignore::DeprecationWarnings'`` or using :py:func:`warnings.simplefilter`)
12 |
13 | The `latex2text` and `latexencode` modules have barely changed.
14 |
15 | - New parsing mechanism in a new `latexnodes` module — everything gets delegated
16 | to "parser objects" that are specialized in parsing a specific construct. See
17 | :py:class:`pylatexenc.latexnodes.parsers.LatexParserBase`.
18 |
19 | - The parser has new enhanced handling of macro, environment, and specials
20 | arguments. Arguments can be named for easier lookup when traversing the node
21 | tree.
22 |
23 | - **WARNING**: While in *alpha* stage, I'm expecting that the new APIs might
24 | still change. I'll try to remain as backwards-compatible as possible with
25 | `pylatexenc 2.x` but new APIs introduced in the `3.0alphaX` versions might
26 | still change a bit until they are finalized.
27 |
28 | - Lists of latex node objects
29 | (:py:class:`~pylatexenc.latexnodes.nodes.LatexNode`) are now wrapped in a
30 | special object for node lists →
31 | :py:class:`pylatexenc.latexnodes.nodes.LatexNodeList`.
32 |
33 | - so much more ... ...
34 |
35 | - The `len` attribute in node objects is replaced by a `pos_end` attribute. The
36 | `len` attribute can still be accessed as a read-only computed attribute for
37 | compatibility with existing code using pylatexenc 2.
38 |
39 |
40 |
41 | .. _new-in-pylatexenc-3-possible-pitfall-changes:
42 |
43 | A couple things to look out for
44 | -------------------------------
45 |
46 | - If you created a :py:class:`~pylatexenc.macrospec.LatexContextDb` database
47 | from scratch, you might suddenly get errors about unknown macros. The default
48 | initialization for unknown macro, environment and specials specification
49 | objects for :py:class:`~pylatexenc.macrospec.LatexContextDb` was, and remains,
50 | `None`. What has changed is the interpretation of this `None`: Now, the latex
51 | walker (more precisely,
52 | :py:class:`~pylatexenc.latexnodes.LatexNodesCollector`) reports an error,
53 | whereas previously, the parser would simply assume the macro doesn't accept
54 | any arguments. To restore the earlier behavior, simply set the spec objects
55 | for unknown macro/environment/specials in your latex context db object::
56 |
57 | latex_context_db = macrospec.LatexContextDb()
58 | # ...
59 | latex_context_db.add_context_category( ... )
60 | # ...
61 | latex_context_db.set_unknown_macro_spec(macrospec.MacroSpec(''))
62 | latex_context_db.set_unknown_environment_spec(macrospec.EnvironmentSpec(''))
63 | #
64 | # unknown macros and environemnts are now accepted and are assumed
65 | # not to take any arguments
66 | #
67 |
68 | - Node lists are now encapsulated in a
69 | :py:class:`~pylatexenc.latexnodes.nodes.LatexNodeList`. It behaves very much
70 | like a list in all respects (indexing, slicing, etc.), except that it does not
71 | satisfy ``isinstance(nodelist, list)``. If you relied on such tests, you'll
72 | need to update them to the liking of ``isinstance(nodelist, (LatexNodeList,
73 | list))``.
74 |
--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx_issues
2 |
--------------------------------------------------------------------------------
/js-transcrypt/.gitignore:
--------------------------------------------------------------------------------
1 | #
2 | # additional git ignores
3 | #
4 |
5 |
6 | #
7 | # preprocessed python source code output
8 | #
9 | pp-tmp
10 |
11 | #
12 | # output folders
13 | #
14 | *_js_output
15 | pylatexenc-js
16 | test-pylatexenc-js
17 |
18 |
19 | node_modules
20 | .yarn
21 |
--------------------------------------------------------------------------------
/js-transcrypt/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | nodeLinker: 'node-modules'
2 |
--------------------------------------------------------------------------------
/js-transcrypt/README.md:
--------------------------------------------------------------------------------
1 | # Building a Javascript version of pylatexenc.latexnodes library via *transcrypt*
2 |
3 | You can use the fantastic [Transcrypt](http://www.transcrypt.org/) tool ([also
4 | on github](https://github.com/QQuick/Transcrypt)) for converting parts of the
5 | pylatexenc code base into JavaScript to make a JavaScript-based parser for
6 | simple LaTeX code.
7 |
8 | This procedure is very much still in alpha stage. Don't rely too much on it!
9 |
10 | To use commands listed here, make sure you installed the optional poetry
11 | dependency group "buildjslib":
12 |
13 | > poetry install --with=buildjslib
14 |
15 |
16 | ## The build script
17 |
18 | To generate the JS python sources simply run in this folder:
19 |
20 | # generates pylatexenc-js/
21 | > poetry run ./generate_pylatexenc_js.py
22 |
23 | (Make sure you've removed the `pylatexenc-js` folder from any previous run, or
24 | pass the `--delete-target-dir` option to the generator script.)
25 |
26 | To compile the tests along with the library, in its own folder:
27 |
28 | # generates both pylatexenc-js/ and test-pylatexenc-js/
29 | > poetry run ./generate_pylatexenc_js.py --compile-tests
30 |
31 | To run the tests using `node`, do:
32 |
33 | > node test-pylatexenc-js/runtests.js
34 |
35 |
36 | ## Steps handled by the build script
37 |
38 | These are broadly the steps that the build script will apply.
39 |
40 | ### Preprocessing the pylatexenc library in preparation for transcrypt:
41 |
42 | The script will first preprocess the pylatexenc source code to make it suitable
43 | for use with transcrypt. You can also do this manually with
44 |
45 | > export PYLATEXENC_SRC_DIR=/path/to/root/folder/of/pylatexenc/
46 | > export PREPROCESS_LIB_OUTPUT_DIR=pp-tmp/ # or some other temporary folder
47 | > poetry run python ../tools/preprocess_lib.py preprocesslib-pylatexenc.config.yaml
48 |
49 | ### Run Transcrypt to generate the Javascript sources
50 |
51 | We need to enable a lot of features in transcrypt, some of which are disabled by
52 | default. The build script basically follows the following commands.
53 |
54 | Transcrypt is called with the `import_pylatexenc_modules.py` module as entry
55 | point. This python module simply imports the subset of the `pylatexenc` library
56 | that we'll be compiling to JavaScript. The command to run is essentially:
57 |
58 | > poetry run transcrypt import_pylatexenc_modules.py --dassert --dext --ecom --gen --tconv --sform --kwargs --keycheck --opov --xreex --nomin --build --anno --parent .none -u .auto -xp 'pp-tmp$libpatches' -od pylatexenc-js
59 |
60 | The JavaScript files are output in the `pylatexenc-js` folder.
61 |
62 | ### Final touches
63 |
64 | The build script will then apply some additional steps and patches:
65 |
66 | - Create a `package.json` file that defines a module, so that you can import the
67 | sources using for instance:
68 |
69 | // js code
70 | import { Symbol1 [, ...] } from './pylatexenc-js/pylatexenc.latexnodes.js'
71 |
72 | - Create a `py.js` module that exports the functions `$$kw` and `repr`, exposing
73 | the keyword-argument functionality as well as python's `repr()` function.
74 | You can pass keywords to transcrypted functions as follows:
75 |
76 | // js code
77 | call_function_from_transcrypt(arg1, arg2, $$kw({ keywordarg1: value1,
78 | keywordarg2: value2 }))
79 |
80 | - Patch Transcrypt's internal runtime methods to add some missing support for
81 | additional functionality (see `transcrypt_runtime_patches.js`)
82 |
83 |
--------------------------------------------------------------------------------
/js-transcrypt/generate_pylatexenc_js.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path
3 | import re
4 | import sys
5 | import argparse
6 | import json
7 |
8 | import shutil
9 | import subprocess
10 |
11 | import logging
12 | logger = logging.getLogger('generate_pylatexenc_js')
13 |
14 | pylatexenc_src_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
15 |
16 | def run_main():
17 | parser = argparse.ArgumentParser()
18 |
19 | parser.add_argument('--pylatexenc-js-output-dir', action='store',
20 | default='pylatexenc-js',
21 | help="Folder where to output generated JavaScript pylatexenc sources")
22 |
23 | parser.add_argument('--delete-target-dir', action='store_true', default=False,
24 | help="With this option, the target directory is removed if it exists "
25 | "at the beginning of the script instead of throwing an error. Will "
26 | "also remove the tests target directory if --compile-tests is given.")
27 |
28 | parser.add_argument('--preprocess-lib-output-dir', action='store', default='pp-tmp',
29 | help="Temporary folder in which to write intermediate, "
30 | "preprocessed sources to be fed into Transcrypt")
31 |
32 | parser.add_argument('--compile-tests', action='store_true', default=False,
33 | help="Also compile the pylatexenc tests into a separate "
34 | "folder (by default ./test-pylatexenc-js)")
35 |
36 | parser.add_argument('--test-pylatexenc-js-output-dir', action='store',
37 | default='test-pylatexenc-js',
38 | help="Folder where to output generated JavaScript pylatexenc "
39 | "test sources. "
40 | "The main entry point for the tests will be the script 'runtests.js'")
41 |
42 | args = parser.parse_args()
43 |
44 | logging.basicConfig(level=logging.DEBUG)
45 |
46 | if args.delete_target_dir:
47 | if os.path.exists(args.pylatexenc_js_output_dir):
48 | shutil.rmtree(args.pylatexenc_js_output_dir)
49 | if args.compile_tests:
50 | if os.path.exists(args.test_pylatexenc_js_output_dir):
51 | shutil.rmtree(args.test_pylatexenc_js_output_dir)
52 |
53 | os.makedirs(args.preprocess_lib_output_dir, exist_ok=True)
54 |
55 | if os.path.exists(args.pylatexenc_js_output_dir):
56 | raise RuntimeError(
57 | f"Target destination ‘{args.pylatexenc_js_output_dir}’ already exists. "
58 | f"Please remove it first."
59 | )
60 |
61 | # pick up pylatexenc's generation script tool
62 |
63 | pylatexenc_tools_dir = os.path.join(pylatexenc_src_dir, 'tools')
64 | logger.info(f"Using pylatexenc_tools_dir = {pylatexenc_tools_dir!r}")
65 | sys.path.insert(0, pylatexenc_tools_dir)
66 |
67 | import utils_transcrypt_generate_js
68 |
69 | genutils = utils_transcrypt_generate_js.GenUtils(
70 | pylatexenc_src_dir=pylatexenc_src_dir,
71 | preprocess_lib_output_dir=args.preprocess_lib_output_dir,
72 | )
73 |
74 | # preprocess both pylatexenc & pylatexenc libraries to prepare them for Transcrypt -->
75 | genutils.preprocess_pylatexenc_lib()
76 | if args.compile_tests:
77 | genutils.preprocess_lib('preprocesslib-tests.config.yaml')
78 |
79 | # run Transcrypt pylatexenc lib now -->
80 | genutils.run_transcrypt(
81 | 'import_pylatexenc_modules.py',
82 | output_dir=args.pylatexenc_js_output_dir,
83 | )
84 | # final tweaks to finalize the JS package
85 | genutils.finalize_transcrypt_package(
86 | args.pylatexenc_js_output_dir,
87 | package_name='pylatexenc-js',
88 | package_version='0.0.1',
89 | package_description=\
90 | 'Automatically transliterated Javascript version of the pylatexenc sources'
91 | )
92 |
93 |
94 | if args.compile_tests:
95 |
96 | # Generate the test runner script
97 | runtests_py = genutils.generate_runtests_script(
98 | os.path.join(pylatexenc_src_dir, 'test'),
99 | test_file_patterns=[
100 | # these are regexes that are matched as ^( <...> )[.]py$
101 | 'test_latexnodes_.*',
102 | 'test_macrospec_.*',
103 | 'test_latexwalker_.*',
104 | 'test_latexencode',
105 | 'test_util',
106 | ]
107 | )
108 |
109 | # Transcrypt it
110 | genutils.run_transcrypt(
111 | runtests_py,
112 | add_import_paths=[
113 | os.path.join(args.preprocess_lib_output_dir, 'test')
114 | ],
115 | output_dir=args.test_pylatexenc_js_output_dir,
116 | )
117 | genutils.finalize_transcrypt_package(
118 | args.test_pylatexenc_js_output_dir,
119 | package_name='test-pylatexenc-js',
120 | )
121 |
122 | logger.info("Compiled the tests. To run them, try ‘node {}/runtests.js’"
123 | .format(args.test_pylatexenc_js_output_dir))
124 |
125 | logger.info(f"Done!")
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | if __name__ == '__main__':
134 | run_main()
135 |
--------------------------------------------------------------------------------
/js-transcrypt/import_pylatexenc_modules.py:
--------------------------------------------------------------------------------
1 |
2 | import pylatexenc
3 | #
4 | import pylatexenc.latexnodes
5 | import pylatexenc.macrospec
6 | import pylatexenc.latexwalker
7 |
8 | import pylatexenc.latexencode
9 | import pylatexenc.latexencode.get_builtin_rules
10 |
11 |
12 | # additional modules that we might need:
13 | import logging
14 | import collections
15 |
16 |
17 | # customjspatches is no longer needed, we're now directly patching the
18 | # Transcrypt runtime at JS sources generation time (see
19 | # generate_pylatexenc_js.py)
20 | #
21 | #import customjspatches #lgtm [py/unused-import]
22 |
23 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/bisect.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #
5 | # THIS METHOD CAN ASSUME THAT THERE ARE NO DUPLICATES IN THE LIST.
6 | #
7 | def bisect_right(a, x):
8 | # find the first index of a that is > pos
9 |
10 | lo = 0
11 | hi = len(a)
12 | mid = None
13 |
14 | while True:
15 |
16 | #print(f"{a=} {x=} :: {lo=} {hi=} (mid was {mid=})")
17 |
18 | if a[lo] > x:
19 | return lo
20 | if a[hi-1] <= x:
21 | return hi
22 |
23 | # we know that a[lo] <= x and a[hi-1] > x
24 |
25 | if hi - lo <= 2:
26 | if a[lo+1] > x: # a[lo] <= x and a[lo+1] > x --> return lo+1
27 | return lo+1
28 | else: #if a[lo+2] > x:
29 | return lo+2
30 |
31 | mid = (hi + lo) // 2
32 | if a[mid] > x:
33 | hi = mid+1 # we still have a[hi-1] > x
34 | else: # i.e., if a[mid] <= x:
35 | lo = mid # we still have a[lo] <= x
36 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/collections.py:
--------------------------------------------------------------------------------
1 |
2 | ### ChainMap
3 | # -- straight from the python sources.
4 |
5 |
6 | import logging
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 |
11 | class ChainMap:
12 | ''' A ChainMap groups multiple dicts (or other mappings) together
13 | to create a single, updateable view.
14 |
15 | The underlying mappings are stored in a list. That list is public and can
16 | be accessed or updated using the *maps* attribute. There is no other
17 | state.
18 |
19 | Lookups search the underlying mappings successively until a key is found.
20 | In contrast, writes, updates, and deletions only operate on the first
21 | mapping.
22 |
23 | '''
24 |
25 | def __init__(self, *maps):
26 | '''Initialize a ChainMap by setting *maps* to the given mappings.
27 | If no mappings are provided, a single empty dictionary is used.
28 |
29 | '''
30 | self.maps = list(maps) or [{}] # always at least one map
31 |
32 | def __missing__(self, key):
33 | raise KeyError(key)
34 | #return None
35 |
36 | def __getitem__(self, key):
37 | #logger.debug("Getting item %r", key)
38 | for mapping in self.maps:
39 | #logger.debug("\ttrying mapping: %r", mapping)
40 | if key not in mapping:
41 | continue
42 | #logger.debug("\tfound - %r", mapping[key])
43 | return mapping[key] # can't use 'key in mapping' with defaultdict
44 | #logger.debug("\tnot found :(")
45 | return self.__missing__(key) # support subclasses that define __missing__
46 |
47 | def get(self, key, default=None):
48 | return self[key] if key in self else default
49 |
50 | def __len__(self):
51 | return len(set().union(*self.maps)) # reuses stored hash values if possible
52 |
53 | def __iter__(self):
54 | d = {}
55 | for mapping in reversed(self.maps):
56 | d.update(dict.fromkeys(mapping)) # reuses stored hash values if possible
57 | return iter(d)
58 |
59 | def __contains__(self, key):
60 | return any(key in m for m in self.maps)
61 |
62 | def __bool__(self):
63 | return any(self.maps)
64 |
65 | def __repr__(self):
66 | return "{!r}({!r})".format(self.__class__.__name__, self.maps)
67 | # return f'{self.__class__.__name__}({", ".join(map(repr, self.maps))})'
68 |
69 | @classmethod
70 | def fromkeys(cls, iterable, *args):
71 | 'Create a ChainMap with a single dict created from the iterable.'
72 | return cls(dict.fromkeys(iterable, *args))
73 |
74 | def copy(self):
75 | 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
76 | return self.__class__(self.maps[0].copy(), *self.maps[1:])
77 |
78 | def __copy__(self):
79 | 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
80 | return self.__class__(self.maps[0].copy(), *self.maps[1:])
81 |
82 | def new_child(self, m=None): # like Django's Context.push()
83 | '''New ChainMap with a new map followed by all previous maps.
84 | If no map is provided, an empty dict is used.
85 | '''
86 | if m is None:
87 | m = {}
88 | return self.__class__(m, *self.maps)
89 |
90 | @property
91 | def parents(self): # like Django's Context.pop()
92 | 'New ChainMap from maps[1:].'
93 | return self.__class__(*self.maps[1:])
94 |
95 | def __setitem__(self, key, value):
96 | self.maps[0][key] = value
97 |
98 | def __delitem__(self, key):
99 | try:
100 | del self.maps[0][key]
101 | except KeyError:
102 | raise KeyError(f'Key not found in the first mapping: {key!r}')
103 |
104 | def popitem(self):
105 | 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
106 | try:
107 | return self.maps[0].popitem()
108 | except KeyError:
109 | raise KeyError('No keys found in the first mapping.')
110 |
111 | def pop(self, key, *args):
112 | 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
113 | try:
114 | return self.maps[0].pop(key, *args)
115 | except KeyError:
116 | raise KeyError(f'Key not found in the first mapping: {key!r}')
117 |
118 | def clear(self):
119 | 'Clear maps[0], leaving maps[1:] intact.'
120 | self.maps[0].clear()
121 |
122 | def __ior__(self, other):
123 | self.maps[0].update(other)
124 | return self
125 |
126 | def __or__(self, other):
127 | if not isinstance(other, _collections_abc.Mapping):
128 | return NotImplemented
129 | m = self.copy()
130 | m.maps[0].update(other)
131 | return m
132 |
133 | def __ror__(self, other):
134 | if not isinstance(other, _collections_abc.Mapping):
135 | return NotImplemented
136 | m = dict(other)
137 | for child in reversed(self.maps):
138 | m.update(child)
139 | return self.__class__(m)
140 |
141 |
142 |
143 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/customjspatches.js:
--------------------------------------------------------------------------------
1 | export function custom_apply_patches() {
2 |
3 | console.log("Applying custom JS patches ...");
4 |
5 | String.prototype.startswith = function (prefix, start) {
6 | //console.log("Custom startswith()! prefix = ", prefix, ", start = ", start);
7 | var pos_start = (typeof start === 'undefined' ? 0 : start);
8 | if (prefix instanceof Array) {
9 | for (var i=0;i= width) {
31 | return this;
32 | }
33 | return fill_char.repeat(width - this.length) + this;
34 | };
35 |
36 |
37 | };
38 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/functools.py:
--------------------------------------------------------------------------------
1 |
2 | def partial(fn, *args, **kwargs):
3 | return lambda *newargs, **newkwargs: fn(*args, *newargs, **dict(kwargs, **newkwargs))
4 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/json.py:
--------------------------------------------------------------------------------
1 |
2 | def loads(s, object_hook=None):
3 | if not object_hook:
4 | return JSON.parse(s)
5 | __pragma__('js', "{}", """
6 | var wrap_object_hook = (value) => {
7 | if (value instanceof Array || value instanceof Number || value instanceof String) {
8 | return value;
9 | }
10 | return object_hook(value);
11 | };""")
12 | return JSON.parse(s, wrap_object_hook)
13 |
14 |
15 | def dumps(d, indent=0):
16 | return JSON.stringify(d, None, indent)
17 |
18 |
19 | def dump(d, f, indent=0):
20 | f.write(dumps(d, indent))
21 |
22 |
23 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/logging.js:
--------------------------------------------------------------------------------
1 | //
2 | // mini-patch logger
3 | //
4 |
5 | import { repr } from './org.transcrypt.__runtime__.js';
6 |
7 | import debug_module from 'debug';
8 |
9 | debug_module.formatters.r = (v) => repr(v);
10 |
11 |
12 | class DebugLogger
13 | {
14 | constructor(scope)
15 | {
16 | this.scope = scope;
17 |
18 | this._debug_fn = debug_module(this.scope);
19 | this._debug_fn_star = debug_module(this.scope+'*'); // always output
20 |
21 | //console.debug(`setting up logger ‘${this.scope}’ via debug()`);
22 |
23 | this.error = (msg, ...args) => {
24 | const lastarg = args[args.length-1];
25 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); }
26 | this._debug_fn_star('[[logging.ERROR]] !! ' + msg, ...args);
27 | };
28 |
29 | this.critical = (msg, ...args) => {
30 | const lastarg = args[args.length-1];
31 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); }
32 | this._debug_fn_star('[[logging.CRITICAL]] !! ' + msg, ...args);
33 | };
34 |
35 | this.warning = (msg, ...args) => {
36 | const lastarg = args[args.length-1];
37 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); }
38 | this._debug_fn_star('[[logging.WARNING]] !! ' + msg, ...args);
39 | };
40 |
41 | this.info = (msg, ...args) => {
42 | const lastarg = args[args.length-1];
43 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); }
44 | this._debug_fn_star(msg, ...args);
45 | };
46 |
47 | this.debug = (msg, ...args) => {
48 | const lastarg = args[args.length-1];
49 | if (lastarg && lastarg.__kwargtrans__ === null) { this._process_kwargs(lastarg); }
50 | this._debug_fn('logging.debug ~~ ' + msg, ...args);
51 | };
52 | }
53 |
54 | _process_kwargs(kwargs)
55 | {
56 | if (kwargs.exc_info) {
57 | console.trace();
58 | }
59 | }
60 |
61 | // _emit(label, sep, msg, args, log_fn)
62 | // {
63 | // let s = label + sep + _assemble_msg(msg, args);
64 | // if (log_fn !== undefined) {
65 | // log_fn(s);
66 | // } else {
67 | // console.log(s);
68 | // }
69 | // }
70 | };
71 |
72 | function _assemble_msg(msg, args)
73 | {
74 | if (args.length) {
75 | return msg + " // " + args.map( (a) => repr(a) ).join(' ; ');
76 | }
77 | return msg;
78 | }
79 |
80 |
81 |
82 | let _logger_instances = {};
83 |
84 | export function getLogger(scope)
85 | {
86 | let logger = _logger_instances[scope];
87 | if (logger == null) { // null or undefined
88 | logger = new DebugLogger(scope);
89 | _logger_instances[scope] = logger;
90 | }
91 | return logger;
92 | }
93 |
94 | export function basicConfig()
95 | {
96 | }
97 |
98 |
--------------------------------------------------------------------------------
/js-transcrypt/libpatches/unique_object_id.js:
--------------------------------------------------------------------------------
1 |
2 | // thanks https://stackoverflow.com/a/43963612/1694896
3 |
4 | export var fn_unique_object_id = (() => {
5 | let currentId = 0;
6 | const map = new WeakMap();
7 |
8 | return (object) => {
9 | if (!map.has(object)) {
10 | map.set(object, ++currentId);
11 | }
12 |
13 | return map.get(object);
14 | };
15 | })();
16 |
--------------------------------------------------------------------------------
/js-transcrypt/my_test_script.py:
--------------------------------------------------------------------------------
1 | # some custom JS patches are necessary ... comment out these lines to run with python
2 | #import customjspatches
3 | #customjspatches.custom_apply_patches()
4 |
5 |
6 | #import pylatexenc.latexnodes as latexnodes
7 | import pylatexenc.latexnodes.parsers as parsers
8 | from pylatexenc.macrospec import LatexContextDb, MacroSpec, EnvironmentSpec, SpecialsSpec
9 | from pylatexenc.latexwalker import LatexWalker
10 |
11 |
12 | # # --- minitest ---
13 | # from pylatexenc.latexnodes import ParsingState
14 | # ps = ParsingState(s='', enable_comments=False)
15 | # from unique_object_id import fn_unique_object_id
16 | # print("Parsing state's id is = ", fn_unique_object_id(ps), "and its repr is = ", repr(ps))
17 | # raise StopHereThatllBeAllThanks
18 | # # ---
19 |
20 |
21 | latextext = r"""
22 | Here is some text that can contain some simple LaTeX macros, to produce
23 | for instance~\textbf{bold text} and \emph{italic text}.
24 |
25 | Two line breaks start a new paragraph. You can use inline math like
26 | \(\alpha=\sum_j\beta_j\) and display equations like
27 | \begin{align}
28 | S_1 &= I\,X\,Z\,Z\,X\ ; \nonumber\\
29 | S_2, \ldots, S_4 &= \text{cyclical permutations of \(S_1\)}\ .
30 | \label{eq:stabilizers}
31 | \end{align}
32 |
33 | Refer to equations with~\eqref{eq:stabilizers}, etc. ...
34 |
35 | Can we also parse citation commands like~\cite{Key1,Key2}.
36 | """
37 |
38 | lw_context = LatexContextDb()
39 | lw_context.add_context_category(
40 | 'my-base-latex-category',
41 | macros=[
42 | MacroSpec('textbf', '{',),
43 | MacroSpec('textit', '{',),
44 | MacroSpec('emph', '{',),
45 | MacroSpec('cite', '{',),
46 | MacroSpec('text', '{',),
47 | MacroSpec('label', '{',),
48 | MacroSpec('eqref', '{',),
49 | ],
50 | specials=[
51 | SpecialsSpec('~'),
52 | # new paragraph
53 | SpecialsSpec('\n\n'),
54 | ],
55 | environments=[
56 | EnvironmentSpec('align')
57 | ]
58 | )
59 |
60 | # for \alpha, \, etc.
61 | lw_context.set_unknown_macro_spec( MacroSpec('','') )
62 |
63 |
64 |
65 | lw = LatexWalker(
66 | latextext,
67 | latex_context=lw_context,
68 | tolerant_parsing=False
69 | )
70 |
71 | nodes, carryover_info = lw.parse_content( parsers.LatexGeneralNodesParser() )
72 |
73 | print("Got node list ->")
74 | print(nodes)
75 |
--------------------------------------------------------------------------------
/js-transcrypt/mytestjscode/my_test_js_code.js:
--------------------------------------------------------------------------------
1 | // some custom JS patches are necessary ... comment out these lines to run with python
2 | import * as latexnodes from 'pylatexenc-js/pylatexenc.latexnodes.js';
3 | import * as macrospec from 'pylatexenc-js/pylatexenc.macrospec.js';
4 | import * as latexwalker from 'pylatexenc-js/pylatexenc.latexwalker.js';
5 | import * as parsers from 'pylatexenc-js/pylatexenc.latexnodes.parsers.js';
6 |
7 | // some setup code
8 |
9 | import * as customjspatches from 'pylatexenc-js/customjspatches.js';
10 | customjspatches.custom_apply_patches();
11 |
12 | import {__kwargtrans__, repr} from 'pylatexenc-js/org.transcrypt.__runtime__.js';
13 | const $$kw = __kwargtrans__;
14 |
15 |
16 |
17 | const {LatexContextDb, MacroSpec, EnvironmentSpec, SpecialsSpec} = macrospec;
18 | const {LatexWalker} = latexwalker;
19 |
20 |
21 | const latextext = `
22 | Here is some text that can contain some simple LaTeX macros, to produce
23 | for instance~\\textbf{bold text} and \\emph{italic text}.
24 |
25 | Two line breaks start a new paragraph. You can use inline math like
26 | \\(\\alpha=\\sum_j\\beta_j\\) and display equations like
27 | \\begin{align}
28 | S_1 &= I\\,X\\,Z\\,Z\\,X\\ ; \\nonumber\\\\
29 | S_2, \\ldots, S_4 &= \\text{cyclical permutations of \\(S_1\\)}\\ .
30 | \\label{eq:stabilizers}
31 | \\end{align}
32 |
33 | Refer to equations with~\\eqref{eq:stabilizers}, etc. ...
34 |
35 | Can we also parse citation commands like~\\cite{Key1,Key2}.
36 | `;
37 |
38 | console.log('latextext = ', latextext);
39 |
40 |
41 | const lw_context = new LatexContextDb()
42 | lw_context.add_context_category(
43 | 'my-base-latex-category',
44 | $$kw({
45 | macros: [
46 | new MacroSpec('textbf', '{',),
47 | new MacroSpec('textit', '{',),
48 | new MacroSpec('emph', '{',),
49 | new MacroSpec('cite', '{',),
50 | new MacroSpec('text', '{',),
51 | new MacroSpec('label', '{',),
52 | new MacroSpec('eqref', '{',),
53 | ],
54 | environments: [
55 | new EnvironmentSpec('align')
56 | ],
57 | specials: [
58 | new SpecialsSpec('~'),
59 | // new paragraph
60 | new SpecialsSpec('\n\n'),
61 | ],
62 | })
63 | )
64 |
65 | // for \alpha, \, etc.
66 | lw_context.set_unknown_macro_spec( new MacroSpec('','') )
67 |
68 | const lw = new LatexWalker(
69 | latextext,
70 | $$kw({
71 | latex_context: lw_context,
72 | tolerant_parsing: false
73 | })
74 | )
75 |
76 | const [nodes, carryover_info] = lw.parse_content( new parsers.LatexGeneralNodesParser() )
77 |
78 | console.log("Got node list ->")
79 | console.log(repr(nodes))
80 | console.log(nodes)
81 |
82 |
--------------------------------------------------------------------------------
/js-transcrypt/mytestjscode/node_modules/pylatexenc-js:
--------------------------------------------------------------------------------
1 | ../../pylatexenc-js
--------------------------------------------------------------------------------
/js-transcrypt/mytestjscode/package.json:
--------------------------------------------------------------------------------
1 | {"type": "module"}
2 |
--------------------------------------------------------------------------------
/js-transcrypt/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "debug": "^4.3.4"
4 | }
5 | }
6 |
--------------------------------------------------------------------------------
/js-transcrypt/preprocesslib-pylatexenc.config.yaml:
--------------------------------------------------------------------------------
1 |
2 | # source directory -- will be provided by the js sources generation script
3 | source_dir: $PYLATEXENC_SRC_DIR
4 |
5 | # output directory -- will create a pylatexenc/ subfolder
6 | target_dir: $PREPROCESS_LIB_OUTPUT_DIR
7 |
8 | # which modules to preprocess
9 | module_list:
10 | - 'pylatexenc.latexnodes'
11 | - 'pylatexenc.macrospec'
12 | - 'pylatexenc.latexwalker'
13 | - 'pylatexenc.latexencode'
14 | - 'pylatexenc.latexencode.get_builtin_rules'
15 |
16 | # features
17 | enabled_features:
18 | keep_future_statements: False
19 | keep_relative_imports: False
20 | keep_super_arguments: False
21 | keep_dict_with_generator: False
22 | keep_frozenset: False
23 | keep_logger_debug: False #True # speed things up by removing logger.debug() calls
24 | guards:
25 | PYTHON2_SUPPORT_CODE: False
26 | PYLATEXENC1_LEGACY_SUPPORT_CODE: False
27 | PYLATEXENC2_LEGACY_SUPPORT_CODE: False
28 | PYLATEXENC_GET_DEFAULT_SPECS_FN: False
29 | LATEXWALKER_HELPERS: False
30 | DEBUG_SET_EQ_ATTRIBUTE: False
31 | patches:
32 | UNIQUE_OBJECT_ID: |
33 | import unique_object_id
34 | fn_unique_object_id = unique_object_id.fn_unique_object_id
35 |
36 | # Always specify u2lobj= to callables (JS will silently ignore extra args)
37 | LATEXENCODE_CALLABLE_ACCEPTS_U2LOBJ_ARG: |
38 | def _callable_accepts_u2lobj_arg(fn):
39 | return True
40 |
--------------------------------------------------------------------------------
/js-transcrypt/preprocesslib-tests.config.yaml:
--------------------------------------------------------------------------------
1 | # output directory -- will create a pylatexenc/ subfolder
2 | target_dir: $PREPROCESS_LIB_OUTPUT_DIR
3 |
4 | source_dir: $PYLATEXENC_SRC_DIR
5 |
6 | # which modules to preprocess
7 | module_list:
8 | - 'pylatexenc.latexnodes'
9 | - 'pylatexenc.macrospec'
10 | - 'pylatexenc.latexwalker'
11 | - 'pylatexenc.latexencode'
12 | - 'test.*'
13 |
14 | # features
15 | enabled_features:
16 | keep_future_statements: False
17 | keep_relative_imports: False
18 | keep_super_arguments: False
19 | keep_dict_with_generator: False
20 | keep_frozenset: False
21 | keep_logger_debug: True
22 | guards:
23 | PYTHON2_SUPPORT_CODE: False
24 | PYLATEXENC1_LEGACY_SUPPORT_CODE: False
25 | PYLATEXENC2_LEGACY_SUPPORT_CODE: False
26 | PYLATEXENC_GET_DEFAULT_SPECS_FN: False
27 | LATEXWALKER_HELPERS: False
28 | DEBUG_SET_EQ_ATTRIBUTE: False
29 | TEST_PYLATEXENC_SKIP: False
30 | patches:
31 | UNIQUE_OBJECT_ID: |
32 | import unique_object_id
33 | fn_unique_object_id = unique_object_id.fn_unique_object_id
34 |
35 | # Always specify u2lobj= to callables (JS will silently ignore extra args)
36 | LATEXENCODE_CALLABLE_ACCEPTS_U2LOBJ_ARG: |
37 | def _callable_accepts_u2lobj_arg(fn):
38 | return True
39 |
--------------------------------------------------------------------------------
/js-transcrypt/transcrypt_runtime_patches.js:
--------------------------------------------------------------------------------
1 | /*** PhF/PYLATEXENC - BEGIN CUSTOM PATCHES ***/
2 |
3 | //
4 | // Patch Transcrypt's implemnetations of some builtin object methods.
5 | //
6 | String.prototype.startswith = function (prefix, start) {
7 | //console.log("Custom startswith()! prefix = ", prefix, ", start = ", start);
8 | var pos_start = (typeof start === 'undefined' ? 0 : start);
9 | if (prefix instanceof Array) {
10 | for (var i=0;i= width) {
32 | return this;
33 | }
34 | return fill_char.repeat(width - this.length) + this;
35 | };
36 | String.prototype.rstrip = function(chars) {
37 | if (chars === undefined) {
38 | return this.replace (/\s*$/g, '');
39 | }
40 | var s = this;
41 | while (s.length && chars.indexOf(s.slice(-1)) !== -1) {
42 | s = s.slice(0, -1);
43 | }
44 | return s;
45 | }
46 | //
47 | // Patch Transcrypt's __pop__() method which has a bug
48 | // (https://github.com/QQuick/Transcrypt/issues/827)
49 | //
50 | __pop__ = function (aKey, aDefault) {
51 | var result = this [aKey];
52 | if (result !== undefined) {
53 | delete this [aKey];
54 | return result;
55 | } else {
56 | if ( aDefault === undefined ) {
57 | throw KeyError (aKey, new Error());
58 | }
59 | }
60 | return aDefault;
61 | }
62 |
63 | //
64 | // Check that a is not null, too, otherwise we get errors with "'__eq__' in a".
65 | // Also check for __eq__ in b object!
66 | //
67 | __eq__ = function (a, b) {
68 | if (typeof a == 'object' && a != null && '__eq__' in a) {
69 | return a.__eq__ (b);
70 | } else if (typeof b == 'object' && b != null && '__eq__' in b) {
71 | return b.__eq__ (a);
72 | } else {
73 | return a == b;
74 | }
75 | };
76 |
77 |
78 |
79 |
80 |
81 | /*** PhF/PYLATEXENC - END CUSTOM PATCHES ***/
82 |
--------------------------------------------------------------------------------
/js-transcrypt/yarn.lock:
--------------------------------------------------------------------------------
1 | # This file is generated by running "yarn install" inside your project.
2 | # Manual changes might be lost - proceed with caution!
3 |
4 | __metadata:
5 | version: 6
6 | cacheKey: 8
7 |
8 | "debug@npm:^4.3.4":
9 | version: 4.3.4
10 | resolution: "debug@npm:4.3.4"
11 | dependencies:
12 | ms: 2.1.2
13 | peerDependenciesMeta:
14 | supports-color:
15 | optional: true
16 | checksum: 3dbad3f94ea64f34431a9cbf0bafb61853eda57bff2880036153438f50fb5a84f27683ba0d8e5426bf41a8c6ff03879488120cf5b3a761e77953169c0600a708
17 | languageName: node
18 | linkType: hard
19 |
20 | "ms@npm:2.1.2":
21 | version: 2.1.2
22 | resolution: "ms@npm:2.1.2"
23 | checksum: 673cdb2c3133eb050c745908d8ce632ed2c02d85640e2edb3ace856a2266a813b30c613569bf3354fdf4ea7d1a1494add3bfa95e2713baa27d0c2c71fc44f58f
24 | languageName: node
25 | linkType: hard
26 |
27 | "root-workspace-0b6124@workspace:.":
28 | version: 0.0.0-use.local
29 | resolution: "root-workspace-0b6124@workspace:."
30 | dependencies:
31 | debug: ^4.3.4
32 | languageName: unknown
33 | linkType: soft
34 |
--------------------------------------------------------------------------------
/pylatexenc/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # The MIT License (MIT)
3 | #
4 | # Copyright (c) 2015 Philippe Faist
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | # THE SOFTWARE.
23 | #
24 |
25 |
26 | """
27 | Utilities for LaTeX to/from Unicode Text conversion.
28 |
29 | Main Site:
30 |
31 | https://github.com/phfaist/pylatexenc/
32 |
33 | """
34 |
35 | from .version import version_str as _version_str
36 |
37 | __version__ = _version_str
38 |
39 |
--------------------------------------------------------------------------------
/pylatexenc/_util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2019 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 |
31 | import bisect
32 | bisect_right = bisect.bisect_right
33 |
34 |
35 |
36 | # ------------------------------------------------------------------------------
37 |
38 |
39 |
40 | class LineNumbersCalculator(object):
41 | r"""
42 | Utility to calculate line numbers.
43 | """
44 | def __init__(self, s,
45 | line_number_offset=1, first_line_column_offset=0, column_offset=0):
46 | super(LineNumbersCalculator, self).__init__()
47 |
48 | self.line_number_offset = line_number_offset
49 | self.first_line_column_offset = first_line_column_offset
50 | self.column_offset = column_offset
51 |
52 | def find_all_new_lines(x):
53 | # first line starts at the beginning of the string
54 | yield 0
55 | k = 0
56 | while k < len(x):
57 | k = x.find('\n', k)
58 | if k == -1:
59 | return
60 | k += 1
61 | # s[k] is the character after the newline, i.e., the 0-th column
62 | # of the new line
63 | yield k
64 |
65 | self._pos_new_lines = list(find_all_new_lines(s))
66 |
67 |
68 | def pos_to_lineno_colno(self, pos, as_dict=False):
69 | r"""
70 | Return the line and column number corresponding to the given `pos`.
71 |
72 | Return a tuple `(lineno, colno)` giving line number and column number.
73 | Line numbers start at 1 and column number start at zero, i.e., the
74 | beginning of the document (`pos=0`) has line and column number `(1,0)`.
75 | If `as_dict=True`, then a dictionary with keys 'lineno', 'colno' is
76 | returned instead of a tuple.
77 | """
78 |
79 | if pos is None:
80 | if as_dict:
81 | return {'lineno': None, 'colno': None}
82 | return (None, None)
83 |
84 | # find line number in list
85 |
86 | # line_no is the index of the last item in self._pos_new_lines that is <= pos.
87 | line_no = bisect_right(self._pos_new_lines, pos)-1
88 | assert line_no >= 0 and line_no < len(self._pos_new_lines)
89 |
90 | col_no = pos - self._pos_new_lines[line_no]
91 |
92 | if line_no == 0:
93 | col_no += self.first_line_column_offset
94 | else:
95 | col_no += self.column_offset
96 | line_no += self.line_number_offset
97 |
98 | if as_dict:
99 | return {'lineno': line_no, 'colno': col_no}
100 | return (line_no, col_no)
101 |
102 |
103 |
104 | # ------------------------------------------------------------------------------
105 |
106 |
107 | class PushPropOverride(object):
108 | def __init__(self, obj, propname, new_value):
109 | super(PushPropOverride, self).__init__()
110 | self.obj = obj
111 | self.propname = propname
112 | self.new_value = new_value
113 |
114 | def __enter__(self):
115 | if self.new_value is not None:
116 | self.initval = getattr(self.obj, self.propname)
117 | setattr(self.obj, self.propname, self.new_value)
118 | return self
119 |
120 | def __exit__(self, type, value, traceback):
121 | # clean-up
122 | if self.new_value is not None:
123 | setattr(self.obj, self.propname, self.initval)
124 |
125 |
126 | # ------------------------------------------------------------------------------
127 |
128 |
129 | try:
130 | from collections import ChainMap
131 | except ImportError:
132 | pass
133 | ### BEGIN_PYTHON2_SUPPORT_CODE
134 | from chainmap import ChainMap
135 | ### END_PYTHON2_SUPPORT_CODE
136 |
137 |
138 |
139 | # ------------------------------------------------------------------------------
140 |
141 |
142 |
143 | pylatexenc_deprecated_ver = lambda *args: None #lgtm [py/multiple-definition]
144 | pylatexenc_deprecated_2 = lambda *args: None #lgtm [py/multiple-definition]
145 | pylatexenc_deprecated_3 = lambda *args: None #lgtm [py/multiple-definition]
146 | LazyDict = None #lgtm [py/multiple-definition]
147 |
148 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE
149 |
150 | from ._util_support import ( # lgtm [py/unused-import]
151 | pylatexenc_deprecated_ver,
152 | pylatexenc_deprecated_2,
153 | pylatexenc_deprecated_3,
154 | #
155 | LazyDict
156 | )
157 |
158 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE
159 |
--------------------------------------------------------------------------------
/pylatexenc/_util_support.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2019 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 |
33 | try:
34 | # Python >= 3.3
35 | from collections.abc import MutableMapping
36 | except ImportError:
37 | from collections import MutableMapping
38 |
39 | import warnings
40 |
41 |
42 |
43 |
44 | def pylatexenc_deprecated_ver(ver, msg, stacklevel=2):
45 | warnings.warn(
46 | "Deprecated (pylatexenc {}): {} ".format(ver, msg.strip()),
47 | DeprecationWarning,
48 | stacklevel=stacklevel+1
49 | )
50 |
51 |
52 | def pylatexenc_deprecated_2(msg, stacklevel=2):
53 | warnings.warn(
54 | ( "Deprecated (pylatexenc 2.0): {} "
55 | "[see https://pylatexenc.readthedocs.io/en/latest/new-in-pylatexenc-2/]" )
56 | .format(msg.strip()),
57 | DeprecationWarning,
58 | stacklevel=stacklevel+1
59 | )
60 |
61 | def pylatexenc_deprecated_3(msg, stacklevel=2):
62 | warnings.warn(
63 | ( "Deprecated (pylatexenc 3.0): {} "
64 | "[see https://pylatexenc.readthedocs.io/en/latest/new-in-pylatexenc-3/]" )
65 | .format(msg.strip()),
66 | DeprecationWarning,
67 | stacklevel=stacklevel+1
68 | )
69 |
70 |
71 |
72 | # ------------------------------------------------------------------------------
73 |
74 |
75 | class LazyDict(MutableMapping):
76 | r"""
77 | A lazy dictionary that loads its data when it is first queried.
78 |
79 | This is used to store the legacy
80 | :py:data:`pylatexenc.latexwalker.default_macro_dict` as well as
81 | :py:data:`pylatexenc.latex2text.default_macro_dict` etc. Such that these
82 | "dictionaries" are still exposed at the module-level, but the data is loaded
83 | only if they are actually queried.
84 | """
85 | def __init__(self, generate_dict_fn):
86 | self._full_dict = None
87 | self._generate_dict_fn = generate_dict_fn
88 |
89 | def _ensure_instance(self):
90 | if self._full_dict is not None:
91 | return
92 | self._full_dict = self._generate_dict_fn()
93 |
94 | def __getitem__(self, key):
95 | self._ensure_instance()
96 | return self._full_dict.__getitem__(key)
97 |
98 | def __setitem__(self, key, val):
99 | self._ensure_instance()
100 | return self._full_dict.__setitem__(key, val)
101 |
102 | def __delitem__(self, key):
103 | self._ensure_instance()
104 | return self._full_dict.__delitem__(key)
105 |
106 | def __iter__(self):
107 | self._ensure_instance()
108 | return iter(self._full_dict)
109 |
110 | def __len__(self):
111 | self._ensure_instance()
112 | return len(self._full_dict)
113 |
114 | def copy(self):
115 | self._ensure_instance()
116 | return self._full_dict.copy()
117 |
118 | def clear(self):
119 | self._ensure_instance()
120 | return self._full_dict.clear()
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/pylatexenc/latex2text/_inputlatexfile.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2021 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 | import os.path
33 |
34 | import logging
35 | logger = logging.getLogger(__name__)
36 |
37 |
38 | def read_latex_file(tex_input_directory, strict_input, fn):
39 |
40 | fnfull = os.path.realpath(os.path.join(tex_input_directory, fn))
41 | if strict_input:
42 | # make sure that the input file is strictly within dirfull, and
43 | # didn't escape with '../..' tricks or via symlinks.
44 | dirfull = os.path.realpath(tex_input_directory)
45 | if not fnfull.startswith(dirfull):
46 | logger.warning(
47 | "Can't access path '%s' leading outside of mandated directory "
48 | "[strict input mode]",
49 | fn
50 | )
51 | return ''
52 |
53 | if not os.path.exists(fnfull) and os.path.exists(fnfull + '.tex'):
54 | fnfull = fnfull + '.tex'
55 | if not os.path.exists(fnfull) and os.path.exists(fnfull + '.latex'):
56 | fnfull = fnfull + '.latex'
57 | if not os.path.isfile(fnfull):
58 | logger.warning("Error, file doesn't exist: '%s'", fn)
59 | return ''
60 |
61 | logger.debug("Reading input file %r", fnfull)
62 |
63 | try:
64 | with open(fnfull) as f:
65 | return f.read()
66 | except IOError as e:
67 | logger.warning("Error, can't access '%s': %s", fn, e)
68 | return ''
69 |
--------------------------------------------------------------------------------
/pylatexenc/latexencode/__main__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2019 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | import sys
28 | import fileinput
29 | import argparse
30 | import logging
31 |
32 |
33 | from ..latexencode import unicode_to_latex
34 | from ..version import version_str
35 |
36 |
37 |
38 | def main(argv=None):
39 |
40 | if argv is None:
41 | argv = sys.argv[1:]
42 |
43 | parser = argparse.ArgumentParser(prog='latexencode', add_help=False)
44 | parser.add_argument('files', metavar="FILE", nargs='*',
45 | help='Input files (if none specified, read from stdandard input)')
46 |
47 | parser.add_argument('--non-ascii-only', action='store_const', const=True,
48 | dest='non_ascii_only', default=False)
49 | parser.add_argument('--no-non-ascii-only', action='store_const', const=False,
50 | dest='non_ascii_only',
51 | help="The option --non-ascii-only specifies that only non-ascii characters "
52 | "are to be encoded into LaTeX sequences, and not characters like '$' "
53 | "even though they might have a special LaTeX meaning.")
54 |
55 | parser.add_argument('--replacement-latex-protection',
56 | choices=('braces', 'braces-all', 'braces-almost-all', 'braces-after-macro',
57 | 'none'),
58 | dest='replacement_latex_protection', default='braces',
59 | help=r"How to protect replacement latex code from producing invalid latex code "
60 | r"when concatenated in a longer string. One of 'braces', 'braces-all', "
61 | r"'braces-almost-all', 'braces-after-macro', 'none'. Example: using "
62 | r"choice 'braces' we avoid the invalid replacement 'a→b' -> 'a\tob' "
63 | r"with instead 'a{\to}b'.")
64 |
65 | parser.add_argument('--unknown-char-policy',
66 | choices=('keep', 'replace', 'ignore', 'fail'),
67 | dest='unknown_char_policy', default='keep',
68 | help="How to deal with nonascii characters with no known latex code equivalent.")
69 |
70 | parser.add_argument('-q', '--quiet', dest='logging_level', action='store_const',
71 | const=logging.ERROR, default=logging.INFO,
72 | help="Suppress warning messages")
73 | parser.add_argument('--version', action='version',
74 | version='pylatexenc {}'.format(version_str),
75 | help="Show version information and exit")
76 | parser.add_argument('--help', action='help',
77 | help="Show this help information and exit")
78 |
79 | args = parser.parse_args(argv)
80 |
81 | logging.basicConfig()
82 | logging.getLogger().setLevel(args.logging_level)
83 |
84 | latex = ''
85 | for line in fileinput.input(files=args.files):
86 | latex += line
87 |
88 | result = unicode_to_latex(
89 | latex,
90 | non_ascii_only=args.non_ascii_only,
91 | replacement_latex_protection=args.replacement_latex_protection,
92 | unknown_char_policy=args.unknown_char_policy
93 | )
94 |
95 | sys.stdout.write(result)
96 |
97 |
98 | def run_main():
99 | try:
100 |
101 | main()
102 |
103 | except SystemExit:
104 | raise
105 | except: # lgtm [py/catch-base-exception]
106 | import pdb
107 | import traceback
108 | traceback.print_exc()
109 | pdb.post_mortem()
110 |
111 |
112 | if __name__ == '__main__':
113 |
114 | # run_main() ## DEBUG
115 | main()
116 |
--------------------------------------------------------------------------------
/pylatexenc/latexencode/_partial_latex_encoder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2021 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 | from __future__ import print_function, absolute_import, unicode_literals
27 |
28 | #import sys
29 | import logging
30 |
31 | logger = logging.getLogger(__name__)
32 |
33 |
34 | from ._unicode_to_latex_encoder import (
35 | RULE_CALLABLE,
36 | UnicodeToLatexConversionRule,
37 | UnicodeToLatexEncoder
38 | )
39 |
40 |
41 | from ..latexwalker import _walker
42 |
43 |
44 | class PartialLatexToLatexEncoder(UnicodeToLatexEncoder):
45 | r"""
46 | Encode a string while preserving some (fuzzily detected) LaTeX constructs
47 | that the input string already has (e.g. accent macros or inline math modes).
48 |
49 | Sometimes you need to fully LaTeX-encode a string that already has some
50 | LaTeX constructs. For instance, titles of bibliographic entries might
51 | include some inline math or accents, but they might also include unicode
52 | characters that need to be encoded. Using a
53 | :py:class:`UnicodeToLatexEncoder` on such strings would result in ugly
54 | doubly-escaped strings such as ``\textbackslash{}'\{e\}``. Instead,
55 | constructs such as ``\'{e}`` should be preserved while other characters
56 | and/or constructs (say '&' or '%') as well as unicode characters should be
57 | encoded.
58 |
59 | This class offers a simple partial solution: Characters are encoded as per
60 | the given `conversion_rules` (or the default conversion rules of
61 | :py:class:`UnicodeToLatexEncoder` objects), except that the characters in
62 | `keep_latex_chars` are to be interpreted as LaTeX and are not to be further
63 | encoded.
64 |
65 | .. versionadded: 2.10
66 | """
67 | def __init__(self,
68 | # keyword arguments:
69 | keep_latex_chars=r'\${}^_',
70 | conversion_rules=None,
71 | **kwargs):
72 |
73 | base_conversion_rules = conversion_rules
74 | if base_conversion_rules is None:
75 | base_conversion_rules = ['defaults']
76 |
77 | super(PartialLatexToLatexEncoder, self).__init__(
78 | # only a single rule, our own special method that tries to parse
79 | # partial latex.
80 | conversion_rules=[UnicodeToLatexConversionRule(
81 | rule_type=RULE_CALLABLE,
82 | rule=self._do_partial_latex_encode_step,
83 | replacement_latex_protection='none'
84 | )] + base_conversion_rules,
85 | **kwargs
86 | )
87 |
88 | self.keep_latex_chars = keep_latex_chars
89 |
90 |
91 | def _do_partial_latex_encode_step(self, s, pos):
92 | r"""
93 | This method is used as a "callable rule" for the
94 | :py:class:`UnicodeToLatexEncoder` object.
95 |
96 | The strategy is to see if we have something that looks like a LaTeX char
97 | we want to keep. If so, keep it as is; if not, return `None` so that
98 | further rules can be considered by the base unicode encoder.
99 | """
100 |
101 | if s[pos] in self.keep_latex_chars:
102 | # Read a token and if it is a macro, keep the full macro!
103 | lw = _walker.LatexWalker(s, tolerant_parsing=False)
104 | ps = lw.make_parsing_state()
105 | tok = lw.make_token_reader(pos=pos).peek_token(parsing_state=ps)
106 |
107 | tok_as_latex = tok.pre_space + s[tok.pos : tok.pos+tok.len]
108 |
109 | # keep the LaTeX token as-is
110 | return (tok.pos+tok.len - pos, tok_as_latex)
111 |
112 | return None
113 |
--------------------------------------------------------------------------------
/pylatexenc/latexencode/get_builtin_rules.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2023 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 |
28 | # Internal module. Internal API may move, disappear or otherwise change at any
29 | # time and without notice.
30 |
31 | from __future__ import print_function, unicode_literals
32 |
33 |
34 | _MappingProxyType = dict
35 | #__pragma__('skip')
36 | import sys
37 | if sys.version_info.major > 2:
38 | from types import MappingProxyType as _MappingProxyType
39 | else:
40 | _MappingProxyType = dict
41 | #__pragma__('noskip')
42 |
43 |
44 | from ._rule import (
45 | RULE_DICT,
46 | RULE_REGEX,
47 | RULE_CALLABLE,
48 | UnicodeToLatexConversionRule,
49 | )
50 |
51 |
52 |
53 |
54 | def get_builtin_uni2latex_dict():
55 | r"""
56 | Return a dictionary that contains the default collection of known LaTeX
57 | escape sequences for unicode characters.
58 |
59 | The keys of the dictionary are integers that correspond to unicode code
60 | points (i.e., `ord(char)`). The values are the corresponding LaTeX
61 | replacement strings.
62 |
63 | The returned dictionary may not be modified. To alter the behavior of
64 | :py:func:`unicode_to_latex()`, you should specify custom rules to a new
65 | instance of :py:class:`UnicodeToLatexEncoder`.
66 |
67 | .. versionadded:: 2.0
68 |
69 | This function was introduced in `pylatexenc 2.0`.
70 | """
71 |
72 | from ._uni2latexmap import uni2latex as _uni2latex
73 | return _MappingProxyType(_uni2latex)
74 |
75 |
76 | def get_builtin_conversion_rules(builtin_name):
77 | r"""
78 | Return a built-in set of conversion rules specified by a given name
79 | `builtin_name`.
80 |
81 | There are two builtin conversion rules, with the following names:
82 |
83 | - `'defaults'`: the default conversion rules, a custom-curated list of
84 | unicode chars to LaTeX escapes.
85 |
86 | - `'unicode-xml'`: the conversion rules derived from the `unicode.xml` file
87 | maintained at https://www.w3.org/TR/xml-entity-names/#source by David
88 | Carlisle.
89 |
90 | The return value is a list of :py:class:`UnicodeToLatexConversionRule`
91 | objects that can be either directly specified to the `conversion_rules=`
92 | argument of :py:class:`UnicodeToLatexEncoder`, or included in a larger list
93 | that can be provided to that argument.
94 |
95 | .. versionadded:: 2.0
96 |
97 | This function was introduced in `pylatexenc 2.0`.
98 | """
99 | if builtin_name == 'defaults':
100 | return [ UnicodeToLatexConversionRule(rule_type=RULE_DICT,
101 | rule=get_builtin_uni2latex_dict()) ]
102 |
103 | if builtin_name == 'unicode-xml':
104 | from . import _uni2latexmap_xml
105 | return [ UnicodeToLatexConversionRule(rule_type=RULE_DICT,
106 | rule=_uni2latexmap_xml.uni2latex) ]
107 |
108 | raise ValueError("Unknown builtin rule set: {}".format(builtin_name))
109 |
110 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 | r"""
27 | .. versionadded:: 3.0
28 |
29 | The `latexnodes` module was introduced in `pylatexenc` 3.
30 |
31 | """
32 |
33 |
34 | from ._exctypes import *
35 |
36 | from ._token import LatexToken
37 |
38 | from ._nodescollector import (
39 | LatexNodesCollector
40 | )
41 |
42 | from ._parsingstate import (
43 | ParsingState
44 | )
45 |
46 | from ._parsingstatedelta import (
47 | ParsingStateDelta,
48 | ParsingStateDeltaReplaceParsingState,
49 | ParsingStateDeltaChained,
50 | ParsingStateDeltaWalkerEvent,
51 | ParsingStateDeltaEnterMathMode,
52 | ParsingStateDeltaLeaveMathMode,
53 | get_updated_parsing_state_from_delta,
54 | )
55 | from ._parsedargs import (
56 | LatexArgumentSpec,
57 | ParsedArguments,
58 | )
59 |
60 | from ._tokenreaderbase import (
61 | LatexTokenReaderBase,
62 | LatexTokenListTokenReader,
63 | )
64 | from ._tokenreader import (
65 | LatexTokenReader,
66 | )
67 |
68 | from ._callablespecbase import (
69 | CallableSpecBase
70 | )
71 |
72 | from ._walkerbase import (
73 | LatexWalkerParsingStateEventHandler,
74 | LatexWalkerBase,
75 | )
76 |
77 | from ._latexcontextdbbase import (
78 | LatexContextDbBase
79 | )
80 |
81 | from ._parsedargsinfo import (
82 | ParsedArgumentsInfo,
83 | SingleParsedArgumentInfo,
84 | )
85 |
86 | from ._latex_recomposer import (
87 | LatexNodesLatexRecomposer
88 | )
89 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/_callablespecbase.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 |
33 |
34 | class CallableSpecBase(object):
35 | r"""
36 | The base class for macro, environment, and specials spec classes (see the
37 | :py:mod:`pylatexenc.macrospec` module).
38 |
39 | As far as this :py:mod:`latexnodes` module's classes are concerned, a spec
40 | object is simply something that can provide a parser to parse the given
41 | construct (macro, environment, or specials).
42 |
43 | The spec object should implement :py:meth:`get_node_parser()`, and it should
44 | return a parser instance that can be used to parse the entire construct.
45 |
46 | See :py:class:`macrospec.MacroSpec` for how this is implemented in the
47 | :py:mod:`pylatexenc.macrospec` module.
48 |
49 | .. versionadded:: 3.0
50 |
51 | The :py:class:`CallableSpecBase` class was added in `pylatexenc 3.0`.
52 | """
53 |
54 | def get_node_parser(self, token):
55 | raise RuntimeError("Subclasses must reimplement get_node_parser()")
56 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/_latexcontextdbbase.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 |
33 | class LatexContextDbBase(object):
34 | r"""
35 | Base class for a parsing state's LaTeX context database.
36 |
37 | A full implementation of how to specify macro, environment, and specials
38 | definitions are actually in the :py:mod:`pylatexenc.macrospec` module. As
39 | far as this :py:mod:`latexnodes` is concerned, a latex context database
40 | object is simply an object that provides the :py:meth:`get_***_spec()`
41 | family of methods along with :py:meth:`test_for_specials()`, and they return
42 | relevant spec objects.
43 |
44 | The spec objects returned by :py:meth:`get_***_spec()` and
45 | :py:meth:`test_for_specials()` are subclasses of
46 | :py:class:`CallableSpecBase`.
47 |
48 |
49 | .. versionadded:: 3.0
50 |
51 | The :py:class:`LatexContextDbBase` class was added in `pylatexenc 3.0`.
52 | """
53 |
54 | def get_macro_spec(self, macroname):
55 | r"""
56 | Return the macro spec to use to parse a macro named `macroname`. The
57 | `macroname` does not contain the escape character (``\``) itself.
58 |
59 | This method should return the relevant spec object, which should be an
60 | instance of a subclass of :py:class:`CallableSpecBase`.
61 |
62 | The latex context database object may choose to provide a default spec
63 | object if `macroname` wasn't formally defined. As far as the parsers
64 | are concerned, if `get_macro_spec()` returns a spec object, then the
65 | parsers know how to parse the given macro and will happily proceed.
66 |
67 | If a macro of name `macroname` should not be considered as defined, and
68 | the parser should not attempt to parse a macro and raise an error
69 | instead (or recover from it in tolerant parsing mode), then this method
70 | should return `None`.
71 | """
72 | return None
73 |
74 | def get_environment_spec(self, environmentname):
75 | r"""
76 | Like :py:meth:`get_macro_spec()`, but for environments. The
77 | `environmentname` is the name of the environment specified between the
78 | curly braces after the ``\begin`` call.
79 |
80 | This method should return the relevant spec object, which should be an
81 | instance of a subclass of :py:class:`CallableSpecBase`.
82 |
83 | The latex context database object may choose to provide a default spec
84 | object if an environment named `environmentname` wasn't somehow formally
85 | defined. As far as the parsers are concerned, if
86 | `get_environment_spec()` returns a spec object, then the parsers know
87 | how to parse the given environment and will happily proceed.
88 |
89 | If an environment of name `environmentname` should not be considered as
90 | defined, and the parser should not attempt to parse the environment and
91 | raise an error instead (or recover from it in tolerant parsing mode),
92 | then this method should return `None`.
93 | """
94 | return None
95 |
96 | def get_specials_spec(self, specials_chars):
97 | r"""
98 | Like :py:meth:`get_macro_spec()`, but for specials. The `specials_chars` is
99 | the sequence of characters for which we'd like to find if they are a
100 | specials construct.
101 |
102 | Parsing of specials is different from macros and environments, because
103 | there is no universal syntax that distinguishes them (macros and
104 | environments are always initiated with the escape character ``\``). So
105 | the token reader will call :py:meth:`test_for_specials()` to see if the
106 | string at the given position can be matched for specials.
107 |
108 | The result is that :py:meth:`get_specials_spec()` usually doesn't get
109 | called when parsing tokens. The :py:meth:`get_specials_spec()` method
110 | is only called in certain specific situations, such as to get the spec
111 | object associated with the new paragraph token ``\n\n``.
112 |
113 | This method should return the relevant spec object, which should be an
114 | instance of a subclass of :py:class:`CallableSpecBase`, or `None` if
115 | these characters are not to be considered as specials.
116 | """
117 | return None
118 |
119 | def test_for_specials(self, s, pos, parsing_state):
120 | r"""
121 | Test the string `s` at position `pos` for the presence of specials.
122 |
123 | For instance, if the parser tests the string ``"Eq.~\eqref{eq:xyz}"`` at
124 | position 3, then the latex context database might want to report the
125 | character ``~`` as a specials construct and return a specials spec for
126 | it.
127 |
128 | If specials characters are recognized, then this method should return a
129 | corresponding spec object. The spec object should be an instance of a
130 | :py:class:`CallableSpecBase` subclass. In addition, the returned spec
131 | object must expose the attribute :py:attr:`specials_chars`. That
132 | attribute should contain the sequence of characters that were recognized
133 | as special.
134 |
135 | If no specials characters are recongized at exactly the position `pos`,
136 | then this method should return `None`.
137 | """
138 | return None
139 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/_walkerbase.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2021 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 | from ._parsingstatedelta import ParsingStateDelta
33 |
34 |
35 |
36 | class LatexWalkerParsingStateEventHandler(object):
37 | r"""
38 | A LatexWalker parsing state event handler.
39 |
40 | The LatexWalker instance will call methods on this object to determine how
41 | to update the parsing state upon certain events, such as entering or exiting
42 | math mode.
43 |
44 | Events:
45 |
46 | - enter math mode
47 |
48 | - exit math mode
49 |
50 | .. versionadded:: 3.0
51 |
52 | The :py:class:`LatexWalkerParsingStateEventHandler` class was added in
53 | `pylatexenc 3.0`.
54 | """
55 |
56 | def enter_math_mode(self, math_mode_delimiter=None, trigger_token=None):
57 | return ParsingStateDelta(
58 | set_attributes=dict(
59 | in_math_mode=True,
60 | math_mode_delimiter=math_mode_delimiter
61 | )
62 | )
63 |
64 | def leave_math_mode(self, trigger_token=None):
65 | return ParsingStateDelta(
66 | set_attributes=dict(
67 | in_math_mode=False,
68 | math_mode_delimiter=None
69 | )
70 | )
71 |
72 |
73 | _default_parsing_state_event_handler = LatexWalkerParsingStateEventHandler()
74 |
75 |
76 | class LatexWalkerBase(object):
77 | r"""
78 | Base class for a latex-walker. Essentially, this is all that the
79 | classes and methods in the :py:mod:`latexnodes` module need to know about
80 | what a LatexWalker does.
81 |
82 | See also :py:class:`latexwalker.LatexWalker`.
83 |
84 | .. versionadded:: 3.0
85 |
86 | The :py:class:`LatexWalkerBase` class was added in `pylatexenc 3.0`.
87 | """
88 |
89 | def parsing_state_event_handler(self):
90 | r"""
91 | Doc......
92 | """
93 | return _default_parsing_state_event_handler
94 |
95 | def parse_content(self, parser, token_reader=None, parsing_state=None,
96 | open_context=None, **kwargs):
97 | r"""
98 | Doc......
99 | """
100 | raise RuntimeError("LatexWalkerBase subclasses must reimplement parse_content()")
101 |
102 | def make_node(self, node_class, **kwargs):
103 | r"""
104 | Doc......
105 | """
106 | raise RuntimeError("LatexWalkerBase subclasses must reimplement make_node()")
107 |
108 | def make_nodelist(self, nodelist, **kwargs):
109 | r"""
110 | Doc......
111 | """
112 | raise RuntimeError("LatexWalkerBase subclasses must reimplement make_nodelist()")
113 |
114 | def make_nodes_collector(self,
115 | token_reader,
116 | parsing_state,
117 | **kwargs):
118 | r"""
119 | Doc......
120 | """
121 | raise RuntimeError(
122 | "LatexWalkerBase subclasses must reimplement make_nodes_collector()")
123 |
124 | def make_latex_group_parser(self, delimiters):
125 | r"""
126 | Doc......
127 | """
128 | raise RuntimeError(
129 | "LatexWalkerBase subclasses must reimplement make_latex_group_parser()")
130 |
131 | def make_latex_math_parser(self, math_mode_delimiters):
132 | r"""
133 | Doc......
134 | """
135 | raise RuntimeError(
136 | "LatexWalkerBase subclasses must reimplement make_latex_math_parser()")
137 |
138 |
139 | def check_tolerant_parsing_ignore_error(self, exc):
140 | r"""
141 | You can inspect the exception object `exc` and decide whether or not to
142 | attempt to recover from the exception (if you want to be tolerant to
143 | parsing errors).
144 |
145 | Return the exception object if it should be raised, or return None if
146 | recovery should be attempted.
147 | """
148 | return exc
149 |
150 | def format_node_pos(self, node):
151 | r"""
152 | Doc......
153 | """
154 | return 'character position '+repr(node.pos)
155 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 | r"""
27 | Collection of Parser objects that can parse specific types of LaTeX
28 | constructs.
29 | """
30 |
31 | from ._base import LatexParserBase
32 |
33 | from ._generalnodes import (
34 | LatexGeneralNodesParser,
35 | LatexSingleNodeParser,
36 | )
37 | from ._delimited import (
38 | LatexDelimitedExpressionParserInfo,
39 | LatexDelimitedExpressionParser,
40 | LatexDelimitedGroupParserInfo,
41 | LatexDelimitedGroupParser,
42 | LatexDelimitedMultiDelimGroupParserInfo,
43 | LatexDelimitedMultiDelimGroupParser,
44 | LatexDelimitedExpressionParserOpeningDelimiterNotFound,
45 | )
46 | from ._math import (
47 | LatexMathParser,
48 | )
49 |
50 | from ._expression import (
51 | LatexExpressionParser,
52 | )
53 |
54 | from ._optionals import (
55 | LatexOptionalSquareBracketsParser,
56 | LatexOptionalCharsMarkerParser,
57 | )
58 |
59 | from ._stdarg import (
60 | LatexStandardArgumentParser,
61 | get_standard_argument_parser,
62 | LatexCharsCommaSeparatedListParser,
63 | LatexCharsGroupParser,
64 | LatexTackOnInformationFieldMacrosParser,
65 | )
66 |
67 |
68 | from ._verbatim import (
69 | LatexVerbatimBaseParser,
70 | LatexDelimitedVerbatimParser,
71 | LatexVerbatimEnvironmentContentsParser,
72 | )
73 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/parsers/_base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 |
33 |
34 |
35 |
36 | # ------------------------------------------------------------------------------
37 |
38 | class LatexParserBase(object):
39 | r"""
40 | The base class for :py:mod:`pylatexenc.latexnodes.parsers` parsers.
41 |
42 | Parsers are objects that are designed to parse a specific type of latex
43 | construct, such as content enclosed in curly braces, into a node tree.
44 |
45 | When invoked, parse objects return a tuple `(nodes, parsing_state_delta)`.
46 | The first element, `nodes`, is the result nodes. It is usually a
47 | :py:class:`~pylatexenc.latexnodes.LatexNodeList` instance, but it can also
48 | be a specific node instance, or another related object like a
49 | :py:class:`~pylatexenc.latexnodes.ParsedArguments` instance. The second
50 | element, `parsing_state_delta`, encode any changes in the parsing state that
51 | should be caused by parsing the given construct. The `parsing_state_delta`
52 | should be either `None` (no parsing state changes) or a
53 | :py:class:`~pylatexenc.latexnodes.ParsingStateDelta` instance. For
54 | instance, if the parser encountered a ``\newcommand`` it can relay the
55 | corresponding state change through the `parsing_state_delta` object.
56 |
57 | The main functionality of the parser is implemented in the
58 | :py:meth:`parse()` method.
59 |
60 | Parser objects should be invoked via the latex walker instance, using
61 | `LatexWalker.parse_content()` (see :py:class:`LatexWalkerBase` and
62 | :py:class:`pylatexenc.latexwalker.LatexWalker`):
63 |
64 | .. code::
65 |
66 | my_latex_walker = LatexWalker(....)
67 | my_parser = .... # some LatexParserBase subclass
68 |
69 | token_reader = my_latex_walker.make_token_reader()
70 | parsing_state = my_latex_walker.make_parsing_state()
71 |
72 | # parse that specific construct:
73 | nodes, parsing_state_delta = my_latex_walker.parse_content(
74 | my_parser,
75 | token_reader,
76 | parsing_state
77 | )
78 | """
79 | def __init__(self):
80 | super(LatexParserBase, self).__init__()
81 |
82 | def parse(self, latex_walker, token_reader, parsing_state, **kwargs):
83 | r"""
84 | The main functionality of the parser is implemented in this method.
85 |
86 | Parser objects should not be called directly, but rather be invoked via
87 | the latex walker instance, using `LatexWalker.parse_content()`. (See
88 | class doc above.)
89 |
90 | Subclasses should implement this method to construct the relevant node
91 | tree by reading tokens from the `token_reader` (use
92 | `token_reader.next_token()` and friends, see
93 | :py:class:`~pylatexenc.latexnodes.LatexTokenReaderBase`)
94 |
95 | Subclasses should return a tuple pair `(nodes, parsing_state_delta)`.
96 |
97 | The `nodes` is the node list, node, or object that resulted from the
98 | parsing.
99 |
100 | The `parsing_state_delta` encodes any parsing state changes that
101 | resulted during the parsing of this construct. If there are no parsing
102 | state changes, `parsing_state_delta` can be set to `None`.
103 | """
104 | raise RuntimeError("LatexParserBase subclasses must reimplement parse()")
105 |
106 |
107 | def contents_can_be_empty(self):
108 | r"""
109 | If absorbing no tokens is a valid option for the thing this object is meant
110 | to parse, then we should return `True` here. This would be the case,
111 | for instance, for group contents, for optional arguments, etc. But a
112 | parser for a mandatory argument would return `False` here.
113 |
114 | This is used in certain special situations, for instance if a closing
115 | brace is immediately encountered after a macro that expected an argument
116 | (say ``\mymacro}`` --- it's an error if ``\mymacro`` requires a
117 | mandatory argument but it's ok if it accepts an optional argument). In
118 | this case, we need to check all the macro arguments' parser to see if it
119 | is okay that they have no contents.
120 | """
121 | return True
122 |
123 |
124 | def __repr__(self):
125 | return "<{}>".format(self.__class__.__name__)
126 |
--------------------------------------------------------------------------------
/pylatexenc/latexnodes/parsers/_math.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 | import logging
33 | logger = logging.getLogger(__name__)
34 |
35 | from .._exctypes import *
36 | from .. import nodes
37 | from .._parsingstatedelta import (
38 | ParsingStateDeltaEnterMathMode,
39 | get_updated_parsing_state_from_delta,
40 | )
41 |
42 | from ._delimited import (
43 | LatexDelimitedExpressionParserInfo,
44 | LatexDelimitedExpressionParser,
45 | )
46 |
47 |
48 |
49 | # for Py3
50 | _basestring = str
51 |
52 | ### BEGIN_PYTHON2_SUPPORT_CODE
53 | import sys
54 | if sys.version_info.major == 2:
55 | _basestring = basestring
56 | ### END_PYTHON2_SUPPORT_CODE
57 |
58 |
59 |
60 |
61 |
62 | class LatexMathParserInfo(LatexDelimitedExpressionParserInfo):
63 | r"""
64 | Reimplementation of the :py:class:`LatexDelimitedExpressionParserInfo` class
65 | for math environments, for :py:class:`LatexMathParser`.
66 | """
67 |
68 | @classmethod
69 | def is_opening_delimiter(cls, delimiters, first_token, group_parsing_state,
70 | delimited_expression_parser, latex_walker, **kwargs):
71 |
72 | if first_token.tok not in ('mathmode_inline', 'mathmode_display'):
73 | return False
74 |
75 | if not cls.check_opening_delimiter(
76 | delimiters=delimiters,
77 | parsed_opening_delimiter=first_token.arg,
78 | latex_walker=latex_walker
79 | ):
80 | return False
81 |
82 | return True
83 |
84 | @classmethod
85 | def get_acceptable_open_delimiter_list(cls, delimiters, group_parsing_state,
86 | delimited_expression_parser, latex_walker,
87 | **kwargs):
88 | if delimiters is not None:
89 | if isinstance(delimiters, _basestring):
90 | return [delimiters]
91 | else:
92 | return [delimiters[0]]
93 |
94 | return [
95 | od
96 | for (od, cd) in (
97 | group_parsing_state.latex_inline_math_delimiters
98 | + group_parsing_state.latex_display_math_delimiters
99 | )
100 | ]
101 |
102 |
103 | # ---
104 |
105 | def initialize(self):
106 | # set up all the relevant fields manually:
107 |
108 | self.math_mode_type = self.first_token.tok
109 | self.math_mode_delimiter = self.first_token.arg
110 |
111 | # enter math mode !
112 | self.math_parsing_state = get_updated_parsing_state_from_delta(
113 | self.parsing_state,
114 | ParsingStateDeltaEnterMathMode(
115 | math_mode_delimiter=self.math_mode_delimiter,
116 | trigger_token=self.first_token
117 | ),
118 | self.latex_walker,
119 | )
120 |
121 | self.contents_parsing_state = self.math_parsing_state
122 | self.parsed_delimiters = self.get_parsed_delimiters()
123 |
124 | def stop_token_condition(self, token):
125 | if token.tok == self.math_mode_type and token.arg == self.parsed_delimiters[1]:
126 | return True
127 | return False
128 |
129 | def get_matching_delimiter(self, opening_delimiter):
130 | return self.math_parsing_state._math_expecting_close_delim_info['close_delim']
131 |
132 |
133 | def make_group_node_and_parsing_state_delta(self, latex_walker, token_reader,
134 | nodelist, parsing_state_delta):
135 |
136 | # As for the delimited group parser, use cur_pos() so that it includes
137 | # the closing math mode delimiter.
138 | pos_end = token_reader.cur_pos()
139 |
140 | # note that nodelist can be None in case of a parse error
141 |
142 | if self.math_mode_type == 'mathmode_inline':
143 | displaytype = 'inline'
144 | elif self.math_mode_type == 'mathmode_display':
145 | displaytype = 'display'
146 | else:
147 | displaytype = ''
148 |
149 | math_node = latex_walker.make_node(
150 | nodes.LatexMathNode,
151 | displaytype=displaytype,
152 | nodelist=nodelist,
153 | parsing_state=self.parsing_state,
154 | delimiters=self.parsed_delimiters,
155 | pos=self.first_token.pos,
156 | pos_end=pos_end,
157 | )
158 |
159 | return math_node, parsing_state_delta
160 |
161 |
162 | # ------------------------------------------------------------------------------
163 |
164 | class LatexMathParser(LatexDelimitedExpressionParser):
165 | def __init__(self,
166 | math_mode_delimiters,
167 | **kwargs):
168 | super(LatexMathParser, self).__init__(
169 | delimiters=math_mode_delimiters,
170 | discard_parsing_state_delta=False,
171 | delimited_expression_parser_info_class=LatexMathParserInfo,
172 | **kwargs
173 | )
174 |
--------------------------------------------------------------------------------
/pylatexenc/latexwalker/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2018 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 | r'''
27 | The ``latexwalker`` module provides a simple API for parsing LaTeX snippets,
28 | and representing the contents using a data structure based on node classes.
29 |
30 | LatexWalker will understand the syntax of most common macros. However,
31 | ``latexwalker`` is NOT a replacement for a full LaTeX engine. (Originally,
32 | ``latexwalker`` was designed to extract useful text for indexing for text
33 | database searches of LaTeX content.)
34 |
35 | Simple example usage::
36 |
37 | >>> from pylatexenc.latexwalker import LatexWalker, LatexEnvironmentNode
38 | >>> w = LatexWalker(r"""
39 | ... \textbf{Hi there!} Here is \emph{a list}:
40 | ... \begin{enumerate}[label=(i)]
41 | ... \item One
42 | ... \item Two
43 | ... \end{enumerate}
44 | ... and $x$ is a variable.
45 | ... """)
46 | >>> (nodelist, pos, len_) = w.get_latex_nodes(pos=0)
47 | >>> nodelist[0]
48 | LatexCharsNode(pos=0, len=1, chars='\n')
49 | >>> nodelist[1]
50 | LatexMacroNode(pos=1, len=18, macroname='textbf',
51 | nodeargd=ParsedMacroArgs(argnlist=[LatexGroupNode(pos=8, len=11,
52 | nodelist=[LatexCharsNode(pos=9, len=9, chars='Hi there!')],
53 | delimiters=('{', '}'))], argspec='{'), macro_post_space='')
54 | >>> nodelist[5].isNodeType(LatexEnvironmentNode)
55 | True
56 | >>> nodelist[5].environmentname
57 | 'enumerate'
58 | >>> nodelist[5].nodeargd.argspec
59 | '['
60 | >>> nodelist[5].nodeargd.argnlist
61 | [LatexGroupNode(pos=60, len=11, nodelist=[LatexCharsNode(pos=61, len=9,
62 | chars='label=(i)')], delimiters=('[', ']'))]
63 | >>> nodelist[7].latex_verbatim()
64 | '$x$'
65 |
66 | You can also use `latexwalker` directly in command-line, producing JSON or a
67 | human-readable node tree::
68 |
69 | $ echo '\textit{italic} text' | latexwalker --output-format=json
70 | {
71 | "nodelist": [
72 | {
73 | "nodetype": "LatexMacroNode",
74 | "pos": 0,
75 | "len": 15,
76 | "macroname": "textit",
77 | [...]
78 |
79 | $ latexwalker --help
80 | [...]
81 |
82 | The parser can be influenced by specifying a collection of known macros and
83 | environments (the "latex context") that are specified using
84 | :py:class:`pylatexenc.macrospec.MacroSpec` and
85 | :py:class:`pylatexenc.macrospec.EnvironmentSpec` objects in a
86 | :py:class:`pylatexenc.macrospec.LatexContextDb` object. See the doc of the
87 | module :py:mod:`pylatexenc.macrospec` for more information.
88 | '''
89 |
90 | from __future__ import print_function, unicode_literals
91 |
92 |
93 | import logging
94 | logger = logging.getLogger(__name__)
95 |
96 |
97 |
98 | from .. import macrospec
99 |
100 |
101 | # ------------------------------------------------------------------------------
102 |
103 |
104 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE
105 | from ..latexnodes._exctypes import *
106 | from ..latexnodes.nodes import *
107 | from ..latexnodes._token import LatexToken
108 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE
109 |
110 |
111 |
112 | from ..latexnodes import ParsingState
113 |
114 | from ._walker import LatexWalker
115 |
116 |
117 | ### BEGIN_PYLATEXENC_GET_DEFAULT_SPECS_FN
118 | from ._get_defaultspecs import get_default_latex_context_db
119 | ### END_PYLATEXENC_GET_DEFAULT_SPECS_FN
120 |
121 |
122 | ### BEGIN_PYLATEXENC1_LEGACY_SUPPORT_CODE
123 | from ._legacy_py1x import (
124 | MacrosDef,
125 | default_macro_dict,
126 | get_token,
127 | get_latex_expression,
128 | get_latex_maybe_optional_arg,
129 | get_latex_braced_group,
130 | get_latex_environment,
131 | get_latex_nodes,
132 | )
133 | ### END_PYLATEXENC1_LEGACY_SUPPORT_CODE
134 |
135 |
136 |
137 | ### BEGIN_LATEXWALKER_HELPERS
138 | from ._helpers import (
139 | nodelist_to_latex,
140 | put_in_braces,
141 | disp_node,
142 | make_json_encoder,
143 | )
144 | ### END_LATEXWALKER_HELPERS
145 |
--------------------------------------------------------------------------------
/pylatexenc/latexwalker/__main__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2018 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 | import sys
27 | import fileinput
28 | import argparse
29 | import json
30 | import logging
31 |
32 |
33 | from ..latexwalker import LatexWalker, disp_node, make_json_encoder
34 | from ..version import version_str
35 |
36 |
37 |
38 | def main(argv=None):
39 |
40 | if argv is None:
41 | argv = sys.argv[1:]
42 |
43 | parser = argparse.ArgumentParser(prog='latexwalker', add_help=False)
44 |
45 | parser.add_argument('--output-format', metavar="FORMAT", dest="output_format",
46 | choices=["human", "json"], default='human',
47 | help='Requested output format for the node tree ("human" or "json")')
48 | parser.add_argument('--json-indent', metavar="NUMSPACES", dest="json_indent",
49 | type=int, default=2,
50 | help='Indentation in JSON output (specify number of spaces '
51 | 'per indentation level)')
52 | parser.add_argument('--json-compact', dest="json_indent", action='store_const', const=None,
53 | help='Output compact JSON')
54 |
55 | parser.add_argument('--keep-inline-math', action='store_const', const=True,
56 | dest='keep_inline_math', default=True,
57 | help=argparse.SUPPRESS)
58 | parser.add_argument('--no-keep-inline-math', action='store_const', const=False,
59 | dest='keep_inline_math',
60 | help=argparse.SUPPRESS)
61 |
62 | parser.add_argument('--tolerant-parsing', action='store_const', const=True,
63 | dest='tolerant_parsing', default=True)
64 | parser.add_argument('--no-tolerant-parsing', action='store_const', const=False,
65 | dest='tolerant_parsing',
66 | help="Tolerate syntax errors when parsing, and attempt "
67 | "to continue (default yes)")
68 |
69 | # I'm not sure this flag is useful and if it should be exposed at all.
70 | # Accept it, but make it hidden.
71 | parser.add_argument('--strict-braces', action='store_const', const=True,
72 | dest='strict_braces', default=False,
73 | help=argparse.SUPPRESS)
74 | parser.add_argument('--no-strict-braces', action='store_const', const=False,
75 | dest='strict_braces',
76 | #help="Report errors for mismatching LaTeX braces (default no)"
77 | help=argparse.SUPPRESS)
78 |
79 | parser.add_argument('-q', '--quiet', dest='logging_level', action='store_const',
80 | const=logging.ERROR, default=logging.INFO,
81 | help="Suppress warning messages")
82 | parser.add_argument('-v', '--verbose', dest='logging_level', action='store_const',
83 | const=logging.DEBUG,
84 | help="Verbose output")
85 | parser.add_argument('--version', action='version',
86 | version='pylatexenc {}'.format(version_str),
87 | help="Show version information and exit")
88 | parser.add_argument('--help', action='help',
89 | help="Show this help information and exit")
90 |
91 |
92 | parser.add_argument('--code', '-c', action='store', default=None, metavar="LATEX_CODE",
93 | help="Convert the given LATEX_CODE to unicode text instead of reading "
94 | "from FILE or standard input. You cannot specify FILEs if you use this "
95 | "option, and any standard input is ignored.")
96 |
97 | parser.add_argument('files', metavar="FILE", nargs='*',
98 | help='Input files (if none specified, read from stdandard input)')
99 |
100 | args = parser.parse_args(argv)
101 |
102 | logging.basicConfig()
103 | logging.getLogger().setLevel(args.logging_level)
104 | logger = logging.getLogger(__name__)
105 |
106 | latex = ''
107 | if args.code:
108 | if args.files:
109 | logger.error("Cannot specify both FILEs and --code option. "
110 | "Use --help option for more information.")
111 | sys.exit(1)
112 | latex = args.code
113 | else:
114 | for line in fileinput.input(files=args.files):
115 | latex += line
116 |
117 | latexwalker = LatexWalker(latex,
118 | tolerant_parsing=args.tolerant_parsing,
119 | strict_braces=args.strict_braces)
120 |
121 | (nodelist, pos, len_) = latexwalker.get_latex_nodes()
122 |
123 |
124 | if args.output_format == 'human':
125 | print('\n--- NODES ---\n')
126 | for n in nodelist:
127 | disp_node(n)
128 | print('\n-------------\n')
129 | return
130 |
131 | if args.output_format == 'json':
132 | json.dump({ 'nodelist': nodelist, },
133 | sys.stdout,
134 | cls=make_json_encoder(latexwalker),
135 | indent=args.json_indent)
136 | sys.stdout.write("\n")
137 | return
138 |
139 | raise ValueError("Invalid output format: "+args.output_format)
140 |
141 |
142 |
143 | def run_main():
144 |
145 | try:
146 |
147 | main()
148 |
149 | except SystemExit:
150 | raise
151 | except: # lgtm [py/catch-base-exception]
152 | import pdb
153 | import traceback
154 | traceback.print_exc()
155 | pdb.post_mortem()
156 |
157 |
158 |
159 | if __name__ == '__main__':
160 |
161 | run_main() # debug
162 | #main()
163 |
--------------------------------------------------------------------------------
/pylatexenc/latexwalker/_get_defaultspecs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2019 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 | from __future__ import print_function, unicode_literals
31 |
32 |
33 | # don't define this function in the `_defaultspecs.py` source file because we
34 | # would like to be able to define this function without having to actually load
35 | # the entire default specs module.
36 |
37 | def get_default_latex_context_db():
38 | r"""
39 | Return a :py:class:`pylatexenc.macrospec.LatexContextDb` instance
40 | initialized with a collection of known macros and environments.
41 |
42 | TODO: document categories.
43 |
44 | If you want to add your own definitions, you should use the
45 | :py:meth:`pylatexenc.macrospec.LatexContextDb.add_context_category()`
46 | method. If you would like to override some definitions, use that method
47 | with the argument `prepend=True`. See docs for
48 | :py:meth:`pylatexenc.macrospec.LatexContextDb.add_context_category()`.
49 |
50 | If there are too many macro/environment definitions, or if there are some
51 | irrelevant ones, you can always filter the returned database using
52 | :py:meth:`pylatexenc.macrospec.LatexContextDb.filter_context()`.
53 |
54 | .. versionadded:: 2.0
55 |
56 | The :py:class:`pylatexenc.macrospec.LatexContextDb` class as well as this
57 | method, were all introduced in `pylatexenc 2.0`.
58 | """
59 |
60 | from .. import macrospec
61 | from ._defaultspecs import specs
62 |
63 | db = macrospec.LatexContextDb()
64 |
65 | for cat, catspecs in specs:
66 | db.add_context_category(
67 | cat,
68 | macros=catspecs['macros'],
69 | environments=catspecs['environments'],
70 | specials=catspecs['specials']
71 | )
72 |
73 | db.set_unknown_macro_spec(macrospec.MacroSpec(''))
74 | db.set_unknown_environment_spec(macrospec.EnvironmentSpec(''))
75 |
76 | return db
77 |
78 | #
79 |
--------------------------------------------------------------------------------
/pylatexenc/macrospec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2022 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 | r"""
27 | Provides classes and helper functions to describe a LaTeX context of known
28 | macros and environments, specifying how they should be parsed by
29 | :py:mod:`pylatexenc.latexwalker`.
30 |
31 | .. versionadded:: 2.0
32 |
33 | The entire module :py:mod:`pylatexenc.macrospec` was introduced in
34 | `pylatexenc 2.0`.
35 | """
36 |
37 |
38 | from ._specclasses import (
39 | CallableSpec,
40 | MacroSpec,
41 | EnvironmentSpec,
42 | SpecialsSpec,
43 | )
44 |
45 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE
46 | from ._spechelpers import std_macro, std_environment, std_specials
47 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE
48 |
49 |
50 | from ._latexcontextdb import (
51 | LatexContextDb,
52 | ParsingStateDeltaExtendLatexContextDb,
53 | )
54 |
55 | from ._argumentsparser import (
56 | LatexArgumentsParser,
57 | LatexNoArgumentsParser,
58 | )
59 |
60 | from ._environmentbodyparser import (
61 | LatexEnvironmentBodyContentsParserInfo,
62 | LatexEnvironmentBodyContentsParser
63 | )
64 |
65 | from ._macrocallparser import (
66 | LatexMacroCallParser,
67 | LatexEnvironmentCallParser,
68 | LatexSpecialsCallParser
69 | )
70 |
71 |
72 | ### BEGIN_PYLATEXENC2_LEGACY_SUPPORT_CODE
73 | from ..latexnodes import ParsedArguments as ParsedMacroArgs
74 | from ._pyltxenc2_argparsers import (
75 | MacroStandardArgsParser,
76 | ParsedVerbatimArgs,
77 | VerbatimArgsParser,
78 | ParsedLstListingArgs,
79 | LstListingArgsParser,
80 | )
81 | ### END_PYLATEXENC2_LEGACY_SUPPORT_CODE
82 |
83 |
--------------------------------------------------------------------------------
/pylatexenc/macrospec/_pyltxenc2_argparsers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2021 Philippe Faist
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in
15 | # all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | # THE SOFTWARE.
24 | #
25 |
26 |
27 | # Internal module. Internal API may move, disappear or otherwise change at any
28 | # time and without notice.
29 |
30 |
31 |
32 | from ._base import (
33 | MacroStandardArgsParser
34 | )
35 |
36 |
37 | # ------------------------------------------------------------------------------
38 |
39 |
40 | from ._verbatimargsparser import (
41 | ParsedVerbatimArgs,
42 | VerbatimArgsParser,
43 | ParsedLstListingArgs,
44 | LstListingArgsParser,
45 | )
46 |
--------------------------------------------------------------------------------
/pylatexenc/version.py:
--------------------------------------------------------------------------------
1 | #
2 | # The MIT License (MIT)
3 | #
4 | # Copyright (c) 2021 Philippe Faist
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | # THE SOFTWARE.
23 | #
24 |
25 |
26 | #
27 | # Self-note: Checklist --- NOTE THESE ARE OLD INSTRUCTIONS DATING FROM 2.X VERSIONS
28 | #
29 | # 1) First some checks:
30 | #
31 | # - Set below in this file ' version_str = "X.Xb" ' (beta version for next
32 | # release) for the following tests.
33 | #
34 | # - tests pass: https://travis-ci.org/github/phfaist/pylatexenc
35 | #
36 | # - LGTM looks good: https://lgtm.com/projects/g/phfaist/pylatexenc/
37 | #
38 | # - python package creation works: (python setup.py sdist, pip install
39 | # dist/pylatexenc-xxx.tar.gz)
40 | #
41 | # 2) update change log (doc/changes.rst)
42 | #
43 | # 3) bump version number here
44 | #
45 | # 4) git commit any remaining changes
46 | #
47 | # 5) " git tag vX.X -am '' "
48 | #
49 | # 6) " git push && git push --tags "
50 | #
51 | # 7) on github.com, fill in release details with a summary of changes etc.
52 | #
53 | # 8) create the source package for PyPI (" python3 setup.py sdist ")
54 | #
55 | # 8) upload package to PyPI (twine upload dist/pylatexenc-X.X.tar.gz -r realpypi)
56 | #
57 |
58 |
59 | # ALSO BUMP IN pyproject.toml !
60 | version_str = "3.0alpha000032"
61 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "pylatexenc"
3 | version = "3.0alpha000032" # ALSO BUMP IN pylatexenc/version.py
4 | description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion"
5 | authors = ["Philippe Faist "]
6 | license = "MIT"
7 | readme = "README.rst"
8 |
9 | [tool.poetry.scripts]
10 | latexwalker = 'pylatexenc.latexwalker.__main__:main'
11 | latex2text = 'pylatexenc.latex2text.__main__:main'
12 | latexencode = 'pylatexenc.latexencode.__main__:main'
13 |
14 |
15 | [tool.poetry.dependencies]
16 | #python = "^2.7 || ^3.4" # This simply creates a dependency resolution mess that doesn't work.
17 | #python = "^3.7" # hmm seems like it severly constrains other package versions
18 | python = "^3.8"
19 |
20 | [tool.poetry.dev-dependencies]
21 | pytest = ">=7.0"
22 | toml = "^0.10.2"
23 |
24 | [tool.poetry.group.builddoc]
25 | optional = true
26 |
27 | [tool.poetry.group.builddoc.dependencies]
28 | Sphinx = ">=5.0.0"
29 | sphinx-issues = ">=3.0.0"
30 |
31 | [tool.poetry.group.buildjslib]
32 | optional = true
33 |
34 | [tool.poetry.group.buildjslib.dependencies]
35 | Transcrypt = ">=3.9.0"
36 | PyYAML = ">=5.0"
37 |
38 |
39 | [build-system]
40 | requires = ["poetry-core>=1.0.0"]
41 | build-backend = "poetry.core.masonry.api"
42 |
43 |
44 |
45 | [tool.pytest.ini_options]
46 | testpaths = [
47 | "test",
48 | ]
49 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #
2 | # The MIT License (MIT)
3 | #
4 | # Copyright (c) 2019 Philippe Faist
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | # THE SOFTWARE.
23 | #
24 |
25 | import os
26 | import os.path
27 | #import sys
28 |
29 | from setuptools import setup, find_packages
30 |
31 | from pylatexenc.version import version_str
32 |
33 | def read(*paths):
34 | """Build a file path from *paths* and return the contents."""
35 | with open(os.path.join(*paths), 'r') as f:
36 | return f.read()
37 |
38 | setup(
39 | name = "pylatexenc",
40 | version = version_str,
41 |
42 | # metadata for upload to PyPI
43 | author = "Philippe Faist",
44 | author_email = "philippe.faist@bluewin.ch",
45 | description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion",
46 | long_description = read("README.rst"),
47 | license = "MIT",
48 | keywords = "latex text unicode encode parse expression",
49 | url = "https://github.com/phfaist/pylatexenc",
50 | classifiers=[
51 | 'Development Status :: 5 - Production/Stable',
52 | 'License :: OSI Approved :: MIT License',
53 | 'Programming Language :: Python',
54 | 'Programming Language :: Python :: 2',
55 | 'Programming Language :: Python :: 2.7',
56 | 'Programming Language :: Python :: 3',
57 | 'Operating System :: MacOS :: MacOS X',
58 | 'Operating System :: Microsoft :: Windows',
59 | 'Operating System :: POSIX :: Linux',
60 | 'Intended Audience :: Developers',
61 | 'Topic :: Scientific/Engineering',
62 | 'Topic :: Text Processing :: General',
63 | 'Topic :: Text Processing :: Markup :: LaTeX',
64 | ],
65 |
66 | # files
67 | packages = find_packages(),
68 | entry_points = {
69 | 'console_scripts': [
70 | 'latexwalker=pylatexenc.latexwalker.__main__:main',
71 | 'latex2text=pylatexenc.latex2text.__main__:main',
72 | 'latexencode=pylatexenc.latexencode.__main__:main',
73 | ],
74 | },
75 | install_requires = [],
76 | package_data = {
77 | },
78 | )
79 |
--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
1 | _tmp_uni_chars_test.temp.txt
2 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phfaist/pylatexenc/6dc2ce7fcd89b7cd1536c79c800f49f09535f5e9/test/__init__.py
--------------------------------------------------------------------------------
/test/dummy/readme.txt:
--------------------------------------------------------------------------------
1 | This directory is just used as base '\input{}' directory, to test the strict_input flag of LatexNodes2Text.
2 |
--------------------------------------------------------------------------------
/test/test_input_1.tex:
--------------------------------------------------------------------------------
1 | \textit{hi there!} This is {\em an equation}:
2 | \begin{equation}
3 | x + y i = 0
4 | \end{equation}
5 |
6 | where $i$ is the imaginary unit.
7 |
--------------------------------------------------------------------------------
/test/test_latexencode_all.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import unicode_literals, print_function
3 |
4 | import unittest
5 |
6 | import sys
7 | import codecs
8 | import difflib
9 | import unicodedata
10 | import logging
11 | import os.path
12 |
13 | if sys.version_info.major >= 3:
14 | PY3 = True
15 | else:
16 | PY3 = False
17 |
18 | if PY3:
19 | def unicode(string): return string
20 | basestring = str
21 | unichr = chr
22 | else:
23 | range = xrange
24 |
25 |
26 | from pylatexenc.latexencode import UnicodeToLatexEncoder
27 |
28 |
29 |
30 |
31 | class TestLatexEncodeAll(unittest.TestCase):
32 |
33 | def __init__(self, *args, **kwargs):
34 | super(TestLatexEncodeAll, self).__init__(*args, **kwargs)
35 |
36 | # def test_pythonunicoderange(self):
37 | # self.assertGreater(sys.maxunicode, 0xFFFF+1,
38 | # "Your python build only supports unicode characters up to U+FFFF."
39 | # " Tests of unicode coverage will fail.")
40 |
41 | def test_all(self):
42 |
43 | loglevel = logging.getLogger().level
44 | logging.getLogger().setLevel(logging.CRITICAL)
45 |
46 | u = UnicodeToLatexEncoder(unknown_char_policy='fail',
47 | replacement_latex_protection='braces-almost-all')
48 |
49 | def fn(x, bdir=os.path.realpath(os.path.abspath(os.path.dirname(__file__)))):
50 | return os.path.join(bdir, x)
51 |
52 | with codecs.open(fn('_tmp_uni_chars_test.temp.txt'), 'w', encoding='utf-8') as testf:
53 |
54 | for i in range(0x10FFFF):
55 | # iter over all valid unicode characters
56 | try:
57 | chrname = unicodedata.name(unichr(i)) # test if valid, i.e., it has a UNICODE NAME
58 | except ValueError:
59 | continue
60 |
61 | line = "0x%04X %-50s |%s|\n"%(i, '['+chrname+']', unichr(i))
62 |
63 | # try to encode it using our unicode_to_latex routines
64 | try:
65 | enc = u.unicode_to_latex(line)
66 | except ValueError:
67 | continue
68 | testf.write(enc)
69 |
70 | with codecs.open(fn('uni_chars_test_previous.txt'), 'r', encoding='utf-8') as reff, \
71 | codecs.open(fn('_tmp_uni_chars_test.temp.txt'), 'r', encoding='utf-8') as testf:
72 | a = reff.readlines()
73 | b = testf.readlines()
74 |
75 | logging.getLogger().setLevel(loglevel)
76 | logger = logging.getLogger(__name__)
77 |
78 | # only check up to the supported unicode range
79 | if sys.maxunicode < 0x10FFFF:
80 | logger.warning("Only checking up to unicode U+%X, your python build doesn't support higher",
81 | sys.maxunicode)
82 | afiltered = [ aline for aline in a
83 | if int(aline[:aline.find(' ')], 0) < sys.maxunicode ]
84 | a = afiltered
85 |
86 | s = difflib.unified_diff(a, b,
87 | fromfile='uni_chars_test_previous.txt',
88 | tofile='_tmp_uni_chars_test.temp.txt')
89 | diffmsg = "".join(list(s)).strip()
90 | if diffmsg:
91 | print(diffmsg)
92 | raise self.failureException("Unicode coverage tests failed. See full diff above.")
93 |
94 |
95 | if __name__ == '__main__':
96 | logging.basicConfig(level=logging.DEBUG)
97 | unittest.main()
98 | #
99 |
100 |
--------------------------------------------------------------------------------
/test/test_latexnodes_parsers_optionals.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import logging
3 |
4 |
5 | from pylatexenc.latexnodes.parsers._optionals import (
6 | LatexOptionalCharsMarkerParser,
7 | LatexOptionalSquareBracketsParser,
8 | )
9 |
10 | from pylatexenc.latexnodes import (
11 | LatexWalkerParseError,
12 | LatexTokenReader,
13 | LatexToken,
14 | ParsingState,
15 | )
16 | from pylatexenc.latexnodes.nodes import *
17 |
18 | from ._helpers_tests import (
19 | DummyWalker,
20 | DummyLatexContextDb,
21 | )
22 |
23 |
24 |
25 |
26 | # --------------------------------------
27 |
28 | class TestLatexOptionalCharsMarkerParser(unittest.TestCase):
29 |
30 | maxDiff = None
31 |
32 | def test_simple_chars_marker_isthere(self):
33 |
34 | latextext = r'''*more'''
35 |
36 | tr = LatexTokenReader(latextext)
37 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb())
38 | lw = DummyWalker()
39 |
40 | parser = LatexOptionalCharsMarkerParser('*')
41 |
42 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps)
43 |
44 | self.assertEqual(
45 | nodes,
46 | LatexNodeList(
47 | [
48 | LatexCharsNode(
49 | parsing_state=ps,
50 | chars='*',
51 | pos=0,
52 | pos_end=1,
53 | )
54 | ],
55 | parsing_state=ps
56 | )
57 | )
58 |
59 |
60 | def test_simple_chars_marker_notthere(self):
61 |
62 | latextext = r'''more'''
63 |
64 | tr = LatexTokenReader(latextext)
65 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb())
66 | lw = DummyWalker()
67 |
68 | parser = LatexOptionalCharsMarkerParser('*')
69 |
70 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps)
71 |
72 | self.assertEqual(
73 | nodes,
74 | None,
75 | )
76 |
77 | def test_simple_chars_marker_notthere_reqempty(self):
78 |
79 | latextext = r'''more'''
80 |
81 | tr = LatexTokenReader(latextext)
82 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb())
83 | lw = DummyWalker()
84 |
85 | parser = LatexOptionalCharsMarkerParser('*', return_none_instead_of_empty=False)
86 |
87 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps)
88 |
89 | self.assertEqual(
90 | nodes,
91 | LatexNodeList([], parsing_state=ps, pos=0, pos_end=0),
92 | )
93 |
94 |
95 | def test_simple_chars_marker_isthere_notlist(self):
96 |
97 | latextext = r'''*more'''
98 |
99 | tr = LatexTokenReader(latextext)
100 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb())
101 | lw = DummyWalker()
102 |
103 | parser = LatexOptionalCharsMarkerParser('*', return_full_node_list=False)
104 |
105 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps)
106 |
107 | self.assertEqual(
108 | nodes,
109 | LatexCharsNode(
110 | parsing_state=ps,
111 | chars='*',
112 | pos=0,
113 | pos_end=1,
114 | )
115 | )
116 |
117 |
118 | def test_simple_chars_marker_notthere_notlist(self):
119 |
120 | latextext = r'''more'''
121 |
122 | tr = LatexTokenReader(latextext)
123 | ps = ParsingState(s=latextext, latex_context=DummyLatexContextDb())
124 | lw = DummyWalker()
125 |
126 | parser = LatexOptionalCharsMarkerParser('*', return_full_node_list=False)
127 |
128 | nodes, parsing_state_delta = lw.parse_content(parser, token_reader=tr, parsing_state=ps)
129 |
130 | self.assertEqual(
131 | nodes,
132 | None,
133 | )
134 |
135 |
136 |
137 |
138 | # ---
139 |
140 | if __name__ == '__main__':
141 | logging.basicConfig(level=logging.DEBUG)
142 | unittest.main()
143 | #
144 |
--------------------------------------------------------------------------------
/test/test_latexnodes_tokenreaderbase.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import logging
3 | logger = logging.getLogger(__name__)
4 |
5 |
6 | from pylatexenc.latexnodes._tokenreaderbase import (
7 | LatexTokenReaderBase,
8 | LatexTokenListTokenReader,
9 | )
10 |
11 | from pylatexenc.latexnodes import (
12 | LatexWalkerEndOfStream,
13 | LatexToken,
14 | ParsingState,
15 | )
16 |
17 |
18 |
19 |
20 | class TestTokenReaderBase(unittest.TestCase):
21 |
22 | def test_make_token(self):
23 | tb = LatexTokenReaderBase()
24 |
25 | self.assertEqual(
26 | tb.make_token(tok='char', arg='*', pos=3),
27 | LatexToken(tok='char', arg='*', pos=3)
28 | )
29 |
30 | def test_peek_token_or_none(self):
31 |
32 | class MyTokenReader(LatexTokenReaderBase):
33 | def __init__(self, at_end=False):
34 | super(MyTokenReader, self).__init__()
35 | self.at_end = at_end
36 |
37 | def peek_token(self, parsing_state):
38 | if not self.at_end:
39 | return self.make_token(tok='char', arg='-', pos=5)
40 | raise LatexWalkerEndOfStream()
41 |
42 | ps = ParsingState()
43 |
44 | tb = MyTokenReader(False)
45 | self.assertEqual( tb.peek_token_or_none(ps),
46 | LatexToken(tok='char', arg='-', pos=5) )
47 | tb = MyTokenReader(True)
48 | self.assertIsNone( tb.peek_token_or_none(ps) )
49 |
50 |
51 |
52 | class TestTokenReaderTokenList(unittest.TestCase):
53 |
54 | def test_reads_tokens(self):
55 |
56 | tlist = [
57 | LatexToken(tok='char', arg='a', pos=0, pos_end=1, pre_space=''),
58 | LatexToken(tok='char', arg='b', pos=1, pos_end=1+2, pre_space=''),
59 | LatexToken(tok='macro', arg='relax', pos=2, pos_end=2+2+len(r'\relax'),
60 | pre_space='', post_space='\t '),
61 | ]
62 |
63 | tr = LatexTokenListTokenReader(tlist)
64 |
65 | ps = ParsingState()
66 |
67 | self.assertEqual(tr.peek_token(ps), tlist[0])
68 | self.assertEqual(tr.peek_token(ps), tlist[0])
69 |
70 | self.assertEqual(tr.cur_pos(), tlist[0].pos)
71 |
72 | self.assertEqual(tr.next_token(ps), tlist[0])
73 |
74 | self.assertEqual(tr.peek_token(ps), tlist[1])
75 | self.assertEqual(tr.peek_token(ps), tlist[1])
76 |
77 | self.assertEqual(tr.next_token(ps), tlist[1])
78 |
79 | self.assertEqual(tr.next_token(ps), tlist[2])
80 |
81 | tr.move_to_token(tlist[1])
82 |
83 | self.assertEqual(tr.next_token(ps), tlist[1])
84 |
85 | tr.move_past_token(tlist[0])
86 |
87 | self.assertEqual(tr.next_token(ps), tlist[1])
88 |
89 | self.assertEqual(tr.cur_pos(), tlist[2].pos)
90 |
91 |
92 |
93 |
94 | if __name__ == '__main__':
95 | logging.basicConfig(level=logging.DEBUG)
96 | unittest.main()
97 | #
98 |
--------------------------------------------------------------------------------
/test/test_macrospec_argumentsparser.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import sys
3 | import logging
4 |
5 |
6 |
7 | # from pylatexenc.macrospec._argumentsparser import (
8 | # LatexArgumentSpec,
9 | # LatexNoArgumentsParser,
10 | # LatexArgumentsParser,
11 | # )
12 |
13 | # from pylatexenc.latexnodes import (
14 | # LatexWalkerTokenParseError,
15 | # LatexToken,
16 | # ParsingState
17 | # )
18 |
19 |
20 |
21 | class TestLatexArgumentsParser(unittest.TestCase):
22 |
23 | # ............. TODO, need to write good tests .................
24 |
25 |
26 | pass
27 |
28 |
29 |
30 | # class Test__LegacyPyltxenc2MacroArgsParserWrapper(unittest.TestCase):
31 | # def
32 |
33 |
34 |
35 | # ---
36 |
37 | if __name__ == '__main__':
38 | logging.basicConfig(level=logging.DEBUG)
39 | unittest.main()
40 | #
41 |
--------------------------------------------------------------------------------
/test/test_macrospec_environmentbodyparser.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 |
4 | from pylatexenc.macrospec._environmentbodyparser import (
5 | LatexEnvironmentBodyContentsParser,
6 | )
7 |
8 | from pylatexenc.latexnodes import (
9 | LatexTokenReader,
10 | LatexArgumentSpec,
11 | ParsedArguments,
12 | ParsingState,
13 | )
14 | from pylatexenc.latexnodes.nodes import *
15 | from pylatexenc.macrospec import (
16 | LatexContextDb,
17 | MacroSpec,
18 | ParsingStateDeltaExtendLatexContextDb,
19 | )
20 | from pylatexenc.latexwalker import LatexWalker
21 |
22 | from ._helpers_tests import (
23 | add_not_equal_warning_to_object
24 | )
25 |
26 |
27 | add_not_equal_warning_to_object(LatexNode)
28 | add_not_equal_warning_to_object(ParsingState)
29 | add_not_equal_warning_to_object(ParsedArguments)
30 | add_not_equal_warning_to_object(LatexArgumentSpec)
31 |
32 |
33 | class TestEnvironmentBodyContentsParser(unittest.TestCase):
34 |
35 | maxDiff = None
36 |
37 | def test_simple_1(self):
38 |
39 | # \begin{environment}
40 | latextext = r'''a+b=c\end{environment}'''
41 |
42 | tr = LatexTokenReader(latextext)
43 | lw = LatexWalker(latextext, latex_context=LatexContextDb())
44 | ps = lw.make_parsing_state()
45 |
46 | print("ps = ", ps)
47 |
48 | parser = LatexEnvironmentBodyContentsParser('environment')
49 | nodes, parsing_state_delta = \
50 | lw.parse_content(parser, token_reader=tr, parsing_state=ps)
51 |
52 | self.assertEqual(
53 | nodes,
54 | LatexNodeList(
55 | [
56 | LatexCharsNode(
57 | parsing_state=ps,
58 | latex_walker=lw,
59 | chars='a+b=c',
60 | pos=0,
61 | pos_end=5,
62 | ),
63 | ],
64 | pos=0,
65 | pos_end=5,
66 | )
67 | )
68 |
69 | def test_contents_and_child_parsing_state_delta(self):
70 |
71 | # \begin{enumerate}
72 | latextext = r'''
73 | \item A \textbf{\localcommand}\end{enumerate}'''.lstrip()
74 |
75 | latex_context = LatexContextDb()
76 | latex_context.add_context_category(
77 | 'main-context-category',
78 | macros=[
79 | MacroSpec("textbf", '{')
80 | ]
81 | )
82 |
83 | tr = LatexTokenReader(latextext)
84 | lw = LatexWalker(latextext, latex_context=latex_context, tolerant_parsing=False)
85 | ps = lw.make_parsing_state()
86 |
87 | print("ps = ", ps)
88 |
89 | ps_content_delta = ParsingStateDeltaExtendLatexContextDb(
90 | extend_latex_context=dict(
91 | macros=[
92 | MacroSpec("item", ''),
93 | ]
94 | )
95 | )
96 | ps_child_delta = ParsingStateDeltaExtendLatexContextDb(
97 | extend_latex_context=dict(
98 | macros=[
99 | MacroSpec("localcommand", ''),
100 | ]
101 | )
102 | )
103 |
104 | parser = LatexEnvironmentBodyContentsParser(
105 | 'enumerate',
106 | contents_parsing_state_delta=ps_content_delta,
107 | child_parsing_state_delta=ps_child_delta,
108 | )
109 | nodes, parsing_state_delta = \
110 | lw.parse_content(parser, token_reader=tr, parsing_state=ps)
111 |
112 | ps_content = nodes[1].parsing_state
113 | ps_child = nodes[0].parsing_state
114 | ps_child2 = nodes[2].parsing_state
115 |
116 | print("ps_content =", ps_content)
117 | print("ps_child =", ps_child)
118 |
119 | nodes_expected = LatexNodeList(
120 | [
121 | LatexMacroNode(
122 | parsing_state=ps_child,
123 | latex_walker=lw,
124 | macroname='item',
125 | spec=ps_content.latex_context.get_macro_spec('item'),
126 | nodeargd=ParsedArguments(argnlist=LatexNodeList([]),),
127 | pos=0,
128 | pos_end=6,
129 | macro_post_space=' ',
130 | ),
131 | LatexCharsNode(
132 | parsing_state=ps_content,
133 | latex_walker=lw,
134 | chars='A ',
135 | pos=6,
136 | pos_end=8,
137 | ),
138 | LatexMacroNode(
139 | parsing_state=ps_child2,
140 | latex_walker=lw,
141 | macroname='textbf',
142 | spec=ps.latex_context.get_macro_spec('textbf'),
143 | nodeargd=ParsedArguments(
144 | argnlist=[
145 | LatexGroupNode(
146 | parsing_state=ps_child2,
147 | latex_walker=lw,
148 | delimiters=('{','}'),
149 | nodelist=LatexNodeList(
150 | [
151 | LatexMacroNode(
152 | parsing_state=ps_child2,
153 | latex_walker=lw,
154 | spec=ps_child2.latex_context \
155 | .get_macro_spec('localcommand'),
156 | macroname='localcommand',
157 | nodeargd=ParsedArguments(
158 | argnlist=LatexNodeList([]),
159 | ),
160 | pos=16,
161 | pos_end=29,
162 | macro_post_space='',
163 | ),
164 | ],
165 | pos=16,
166 | pos_end=29,
167 | ),
168 | pos=15,
169 | pos_end=30,
170 | )
171 | ],
172 | arguments_spec_list=[
173 | LatexArgumentSpec(argname=None, parser='{'),
174 | ],
175 | ),
176 | pos=8,
177 | pos_end=30,
178 | macro_post_space='',
179 | ),
180 | ],
181 | pos=0,
182 | pos_end=30,
183 | )
184 |
185 | print(nodes)
186 | print(nodes_expected)
187 |
188 | # check that ps_content is the parsing state by inspecting the context db
189 | self.assertIsNotNone( ps_content.latex_context.get_macro_spec('item') )
190 | self.assertIsNone( ps_content.latex_context.get_macro_spec('localcommand') )
191 |
192 | # check that ps_child is the parsing state by inspecting the context db
193 | self.assertIsNone( ps_child.latex_context.get_macro_spec('item') )
194 | self.assertIsNotNone( ps_child.latex_context.get_macro_spec('localcommand') )
195 | self.assertIsNone( ps_child2.latex_context.get_macro_spec('item') )
196 | self.assertIsNotNone( ps_child2.latex_context.get_macro_spec('localcommand') )
197 |
198 |
199 | self.assertEqual(
200 | nodes,
201 | nodes_expected
202 | )
203 |
204 | if __name__ == '__main__':
205 | unittest.main()
206 |
--------------------------------------------------------------------------------
/test/test_macrospec_latexcontextdb.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import sys
3 | import logging
4 | logger = logging.getLogger(__name__)
5 |
6 | from pylatexenc.macrospec._latexcontextdb import (
7 | LatexContextDb,
8 | _autogen_category_prefix
9 | )
10 |
11 | from pylatexenc.macrospec import (
12 | MacroSpec,
13 | EnvironmentSpec,
14 | SpecialsSpec,
15 | )
16 |
17 |
18 |
19 | class TestLatexContextDb(unittest.TestCase):
20 |
21 |
22 | # TODO........ need more tests here
23 |
24 |
25 |
26 | def test_extended_with(self):
27 |
28 | context = LatexContextDb()
29 | context.add_context_category(
30 | 'base-category',
31 | macros=[ MacroSpec('base', '{'), ],
32 | environments=[ EnvironmentSpec('baseenv', '{'), ],
33 | specials=[ SpecialsSpec('~'), ],
34 | )
35 | context.freeze()
36 |
37 | logger.debug("context's category list = %r", context.category_list)
38 | logger.debug("context's d = %r", context.d)
39 | logger.debug("context's lookup maps are = %r", context.lookup_chain_maps)
40 |
41 | extd1 = dict(
42 | macros=[ MacroSpec('more', '{'), ],
43 | environments=[ EnvironmentSpec('moreenv', '{'), ],
44 | specials=[ SpecialsSpec('!'), ],
45 | )
46 | context2 = context.extended_with(**extd1)
47 |
48 | logger.debug("context2's category list = %r", context2.category_list)
49 | logger.debug("context2's d = %r", context2.d)
50 | logger.debug("context2's lookup maps are = %r", context2.lookup_chain_maps)
51 |
52 | self.assertEqual(len(context2.category_list), 2)
53 | self.assertTrue(context2.category_list[0].startswith(_autogen_category_prefix))
54 |
55 | self.assertEqual(context2.get_macro_spec('base'),
56 | context.get_macro_spec('base'))
57 | self.assertEqual(context2.get_environment_spec('baseenv'),
58 | context.get_environment_spec('baseenv'))
59 | self.assertEqual(context2.get_specials_spec('~'),
60 | context.get_specials_spec('~'))
61 | self.assertEqual(context2.test_for_specials('~~~~~~~', pos=0),
62 | context.get_specials_spec('~'))
63 | self.assertEqual(context2.get_macro_spec('more'), extd1['macros'][0])
64 | self.assertEqual(context2.get_environment_spec('moreenv'), extd1['environments'][0])
65 | self.assertEqual(context2.get_specials_spec('!'), extd1['specials'][0])
66 | self.assertEqual(context2.test_for_specials('!!!!!', pos=0), extd1['specials'][0])
67 |
68 | extd2 = dict(
69 | macros=[ MacroSpec('evenmore', '{'), ],
70 | environments=[ EnvironmentSpec('baseenv', '{'), ], # override baseenv
71 | specials=[ SpecialsSpec('!!'), ],
72 | )
73 | context3 = context2.extended_with(**extd2)
74 |
75 | # extended_with() a second time shouldn't add a new category as the
76 | # first category is already an autogenerated one
77 | self.assertEqual(len(context3.category_list), 2)
78 | self.assertEqual(context3.category_list, context2.category_list)
79 | self.assertTrue(context3.category_list[0].startswith(_autogen_category_prefix))
80 |
81 | logger.debug("context3's category list = %r", context3.category_list)
82 | logger.debug("context3's d = %r", context3.d)
83 | logger.debug("context3's lookup maps are = %r", context3.lookup_chain_maps)
84 |
85 | self.assertEqual(context3.get_macro_spec('base'),
86 | context.get_macro_spec('base'))
87 | # self.assertEqual(context3.get_environment_spec('baseenv'),
88 | # context.get_environment_spec('baseenv')) # NO!
89 | self.assertEqual(context3.get_specials_spec('~'),
90 | context.get_specials_spec('~'))
91 | self.assertEqual(context3.test_for_specials('~~~~~~~', pos=0),
92 | context.get_specials_spec('~'))
93 | self.assertEqual(context3.get_macro_spec('more'), extd1['macros'][0])
94 | self.assertEqual(context3.get_macro_spec('evenmore'), extd2['macros'][0])
95 | self.assertEqual(context3.get_environment_spec('moreenv'), extd1['environments'][0])
96 | self.assertEqual(context3.get_environment_spec('baseenv'), extd2['environments'][0])
97 | self.assertEqual(context3.get_specials_spec('!'), extd1['specials'][0])
98 | self.assertEqual(context3.get_specials_spec('!!'), extd2['specials'][0])
99 | self.assertEqual(context3.test_for_specials('!!!!!', pos=0), extd2['specials'][0])
100 |
101 |
102 |
103 |
104 |
105 | # ---
106 |
107 | if __name__ == '__main__':
108 | logging.basicConfig(level=logging.DEBUG)
109 | unittest.main()
110 | #
111 |
--------------------------------------------------------------------------------
/test/test_macrospec_macrocallparser.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import sys
3 | import logging
4 |
5 |
6 |
7 | # from pylatexenc.macrospec._macrocallparser import (
8 | # LatexMacroCallParser,
9 | # LatexEnvironmentCallParser,
10 | # LatexSpecialsCallParser,
11 | # )
12 |
13 | # from pylatexenc.latexnodes import (
14 | # ParsingState
15 | # )
16 | # from pylatexenc.latexnodes.nodes import *
17 |
18 |
19 |
20 | class TestLatexMacroCallParser(unittest.TestCase):
21 |
22 | # ............. TODO, need to write good tests .................
23 |
24 |
25 | pass
26 |
27 |
28 |
29 | # ---
30 |
31 | if __name__ == '__main__':
32 | logging.basicConfig(level=logging.DEBUG)
33 | unittest.main()
34 | #
35 |
--------------------------------------------------------------------------------
/test/test_pylatexenc.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | ### BEGIN_TEST_PYLATEXENC_SKIP
4 |
5 | import toml
6 | import os.path
7 |
8 | import pylatexenc
9 |
10 |
11 | # thanks https://github.com/python-poetry/poetry/issues/144#issuecomment-877835259
12 |
13 | class TestHardcodedPackageVersion(unittest.TestCase):
14 |
15 | def test_versions_are_in_sync(self):
16 | """Checks if the pyproject.toml and package.__init__.py __version__ are in sync."""
17 |
18 | path = os.path.join( os.path.dirname(__file__), '..', "pyproject.toml" )
19 | with open(path) as fpp:
20 | pyproject = toml.loads(fpp.read())
21 | pyproject_version = pyproject["tool"]["poetry"]["version"]
22 |
23 | package_init_version = pylatexenc.__version__
24 |
25 | self.assertEqual(package_init_version, pyproject_version)
26 |
27 |
28 | if __name__ == '__main__':
29 | unittest.main()
30 |
31 | ### END_TEST_PYLATEXENC_SKIP
32 |
--------------------------------------------------------------------------------
/test/test_util.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from pylatexenc import _util
4 |
5 |
6 | class TestLineNumbersCalculator(unittest.TestCase):
7 |
8 | def test_simple(self):
9 |
10 | s = """\
11 | one
12 | two
13 | three
14 | four
15 | five
16 | """.lstrip()
17 |
18 | ln = _util.LineNumbersCalculator(s)
19 |
20 | self.assertEqual( ln.pos_to_lineno_colno(0), (1,0) )
21 | self.assertEqual( ln.pos_to_lineno_colno(1), (1,1) )
22 | self.assertEqual( ln.pos_to_lineno_colno(2), (1,2) )
23 | self.assertEqual( ln.pos_to_lineno_colno(3), (1,3) )
24 | self.assertEqual( ln.pos_to_lineno_colno(4), (2,0) )
25 | self.assertEqual( ln.pos_to_lineno_colno(5), (2,1) )
26 | self.assertEqual( ln.pos_to_lineno_colno(6), (2,2) )
27 | self.assertEqual( ln.pos_to_lineno_colno(7), (2,3) )
28 | self.assertEqual( ln.pos_to_lineno_colno(8), (3,0) )
29 | self.assertEqual( ln.pos_to_lineno_colno(9), (3,1) )
30 |
31 | self.assertEqual( ln.pos_to_lineno_colno(23), (5,4) )
32 |
33 | def test_as_dict(self):
34 |
35 | s = """\
36 | one
37 | two
38 | three
39 | four
40 | five
41 | """.lstrip()
42 |
43 | ln = _util.LineNumbersCalculator(s)
44 |
45 | self.assertEqual( ln.pos_to_lineno_colno(9, as_dict=True),
46 | { 'lineno': 3,
47 | 'colno': 1 } )
48 |
--------------------------------------------------------------------------------
/tools/gen_l2t_from_lenc.py:
--------------------------------------------------------------------------------
1 | #
2 | # Inspect latexencode rules to see if there are symbols that we can use for
3 | # latex2text, too
4 | #
5 |
6 | # Py3 only script
7 | import sys
8 | assert sys.version_info > (3,0)
9 |
10 | import unicodedata
11 |
12 | import logging
13 | logging.basicConfig(level=logging.INFO)
14 | logger = logging.getLogger(__name__)
15 |
16 |
17 | from pylatexenc import latexwalker, latex2text, latexencode #, macrospec
18 |
19 | l2t_default_context = latex2text.get_default_latex_context_db()
20 |
21 | def extract_symbol_node(nodelist, uni, latex):
22 |
23 | if len(nodelist) != 1:
24 | # more than one "thing"
25 | logger.warning("Got nodelist with more than one node, skipping (%s): %s = %r",
26 | chr(uni), latex, nodelist)
27 | return
28 |
29 | thenode = nodelist[0]
30 |
31 | if not thenode.isNodeType(latexwalker.LatexMacroNode):
32 | logger.warning("Got node that is not a macro, skipping (%s): %s = %r",
33 | chr(uni), latex, thenode)
34 | return
35 |
36 | if thenode.macroname == 'ensuremath':
37 | # ignore, parse contents instead
38 | if thenode.nodeargd is None or not thenode.nodeargd.argnlist or \
39 | len(thenode.nodeargd.argnlist) != 1:
40 | logger.warning(r"\ensuremath with no arguments or wrong # of arguments (%s): %s = %r",
41 | chr(uni), latex, nodelist)
42 | return
43 |
44 | argnode = thenode.nodeargd.argnlist[0]
45 | if argnode.isNodeType(latexwalker.LatexGroupNode):
46 | argnodelist = argnode.nodelist
47 | else:
48 | argnodelist = [ argnode ]
49 |
50 | return extract_symbol_node(argnodelist, uni, latex)
51 |
52 | l2t_mspec = l2t_default_context.get_macro_spec(thenode.macroname)
53 | if l2t_mspec is not None and l2t_mspec.macroname:
54 | # macro found, already known
55 | logger.debug("Macro found (%s): %r", chr(uni), thenode)
56 | return
57 |
58 | if thenode.nodeargd and thenode.nodeargd.argnlist:
59 | logger.warning(r"Macro %r for ‘%s’ is not known to latex2text but it has arguments",
60 | thenode, chr(uni))
61 | return
62 |
63 | # got a symbol macro, go for it:
64 | print(" MacroTextSpec(%r, u'\\N{%s}'), # ‘%s’" % (
65 | thenode.macroname, unicodedata.name(chr(uni)), chr(uni)
66 | ))
67 |
68 |
69 | for builtin_name in ('defaults', 'unicode-xml'):
70 |
71 | rules = latexencode.get_builtin_conversion_rules(builtin_name)
72 |
73 | logger.info("Reader latexencode defaults %r", builtin_name)
74 | print(" # Rules from latexencode defaults '%s'"%(builtin_name))
75 |
76 | for rule in rules:
77 |
78 | if rule.rule_type != latexencode.RULE_DICT:
79 | logger.warning("Ignoring non-dict rule type %d", rule.rule_type)
80 | continue
81 |
82 | # inspect rules for symbols that latex2text might not already be aware of
83 | for uni, latex in rule.rule.items():
84 | try:
85 | nodelist, _, _ = latexwalker.LatexWalker(latex, tolerant_parsing=False).get_latex_nodes()
86 | except latexwalker.LatexWalkerError as e:
87 | logger.warning("Error parsing %r (%s): %s", latex, chr(uni), e)
88 | continue
89 |
90 | extract_symbol_node(nodelist, uni, latex)
91 |
--------------------------------------------------------------------------------
/tools/gen_xml_dic.py:
--------------------------------------------------------------------------------
1 | #
2 | # mini-script to generate the pylatexenc.latexencode._uni2latexmap_xml dict mapping
3 | #
4 | import re
5 | import sys
6 |
7 | if sys.version_info.major > 2:
8 | # python 3
9 | unichr = chr
10 |
11 | from xml.etree import ElementTree as ET
12 |
13 | e = ET.parse('unicode.xml')
14 |
15 | d = {}
16 | dnames = {}
17 |
18 | for chxml in e.find('charlist').iter('character'):
19 | Uid = chxml.attrib['id']
20 | if '-' in Uid:
21 | # composite/multiple characters not supported
22 | continue
23 | charord = int(Uid.lstrip('U'), 16)
24 | latexxml = chxml.find('latex')
25 | if latexxml is None:
26 | continue
27 | latexval = latexxml.text
28 | if latexval == unichr(charord):
29 | # "latex" representation is the same char directly
30 | continue
31 | if charord == 0x20:
32 | # skip space char
33 | continue
34 | if latexval.startswith(r'\ElsevierGlyph') or latexval.startswith(r'\El') \
35 | or latexval.startswith(r'\ensuremath{\El'):
36 | continue
37 | if re.search(r'\\[a-zA-Z]+\s+$', latexval):
38 | # ends with named macro+space, remove space because
39 | # latexencode.UnicodeToLatexEncoder will handle that with
40 | # replacement_latex_protection
41 | latexval = latexval.rstrip()
42 | d[charord] = latexval
43 | dnames[charord] = chxml.find('description').text
44 |
45 | # dump dictionary into new module file in current working directory
46 | outputfile = '_uni2latexmap_xml.py'
47 |
48 | HEADER = """\
49 | # -*- coding: utf-8 -*-
50 | #
51 | # Automatically generated from unicode.xml by gen_xml_dic.py
52 | #
53 |
54 | """
55 |
56 | with open(outputfile, 'w') as f:
57 | f.write(HEADER)
58 |
59 | f.write("uni2latex = {\n")
60 |
61 | for k,v in d.items():
62 | f.write("0x%04X: %r,\n"%(k, v))
63 |
64 | f.write("}\n")
65 |
66 | print("Successfully generated file %s"%(outputfile))
67 |
68 |
69 | # Now see which characters we don't have in our default set of symbols
70 | from pylatexenc.latexencode._uni2latexmap import uni2latex as uni2latex_defaults
71 |
72 | missing_keys = set(d.keys()).difference(set(uni2latex_defaults.keys()))
73 | if missing_keys:
74 | print("#\n# Missing keys added from unicode.xml\n#\n")
75 | for k in sorted(missing_keys):
76 | if "'" not in d[k]:
77 | therepr = "r'"+d[k]+"'"
78 | else:
79 | therepr = repr(d[k])
80 | thedef = "0x%04X: %s,"%(k, therepr)
81 | print("%-50s# %s [%s]"%(thedef, dnames[k], unichr(k)))
82 |
83 |
--------------------------------------------------------------------------------
/tools/unicode.xml.LICENSE:
--------------------------------------------------------------------------------
1 | [This notice should be placed within redistributed or derivative software code
2 | when appropriate. This particular formulation became active on December 31 2002,
3 | superseding the 1998 version.]
4 |
5 | unicode.xml: https://www.w3.org/2003/entities/2007xml/unicode.xml
6 | https://www.w3.org/TR/xml-entity-names/#source
7 |
8 | Copyright © 2015 World Wide Web Consortium, (Massachusetts Institute of
9 | Technology, European Research Consortium for Informatics and Mathematics,
10 | Keio University, Beihang). All Rights Reserved. This work is distributed
11 | under the W3C® Software License [1] in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 | or FITNESS FOR A PARTICULAR PURPOSE.
14 |
15 | [1] http://www.w3.org/Consortium/Legal/copyright-software
16 |
17 |
18 | ----------
19 |
20 | Copyright David Carlisle 1999-2015
21 |
22 | Use and distribution of this code are permitted under the terms of the
23 | W3C Software Notice and License.
24 | http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html
25 |
26 | ----------
27 |
28 | LICENSE
29 |
30 | By obtaining, using and/or copying this work, you (the licensee) agree that you
31 | have read, understood, and will comply with the following terms and conditions.
32 |
33 | Permission to copy, modify, and distribute this software and its documentation,
34 | with or without modification, for any purpose and without fee or royalty is
35 | hereby granted, provided that you include the following on ALL copies of the
36 | software and documentation or portions thereof, including modifications:
37 |
38 | - The full text of this NOTICE in a location viewable to users of the
39 | redistributed or derivative work.
40 |
41 | - Any pre-existing intellectual property disclaimers, notices, or terms and
42 | conditions. If none exist, the W3C Software Short Notice should be
43 | included (hypertext is preferred, text is permitted) within the body of
44 | any redistributed or derivative code.
45 |
46 | - Notice of any changes or modifications to the files, including the date
47 | changes were made. (We recommend you provide URIs to the location from
48 | which the code is derived.)
49 |
50 | DISCLAIMERS
51 |
52 | THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE
53 | NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
54 | TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
55 | THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
56 | PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
57 |
58 | COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR
59 | CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION.
60 |
61 | The name and trademarks of copyright holders may NOT be used in advertising or
62 | publicity pertaining to the software without specific, written prior
63 | permission. Title to copyright in this software and any associated documentation
64 | will at all times remain with copyright holders.
65 |
66 | NOTES
67 |
68 | This version: http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
69 |
70 | This formulation of W3C's notice and license became active on December 31
71 | 2002. This version removes the copyright ownership notice such that this license
72 | can be used with materials other than those owned by the W3C, reflects that
73 | ERCIM is now a host of the W3C, includes references to this specific dated
74 | version of the license, and removes the ambiguous grant of "use". Otherwise,
75 | this version is the same as the previous version and is written so as to
76 | preserve the Free Software Foundation's assessment of GPL compatibility and
77 | OSI's certification under the Open Source Definition.
78 |
--------------------------------------------------------------------------------