├── test
    ├── data
    │   ├── example_complex_envs_output.txt
    │   ├── simple.tex
    │   ├── example_complex_template_output.txt
    │   ├── example_simple_figure_caption_output.txt
    │   ├── example_placeholder_output.txt
    │   ├── example.tex
    │   ├── example_simple_figure_caption.txt
    │   ├── example_placeholder.txt
    │   ├── example_simple_itemize_output.txt
    │   ├── example_simple_cite_output.txt
    │   ├── example_simple_itemize.txt
    │   ├── example_simple_comments_output.txt
    │   ├── example_simple_cite.txt
    │   ├── example_simple_comments.txt
    │   ├── example_tables_strict_output.txt
    │   ├── example_complex_envs.txt
    │   └── example_tables_strict.txt
    ├── __init__.py
    ├── _base.py
    ├── test_gui.py
    ├── test_pipelines.py
    └── test_parsers.py
├── specs
    ├── upx.exe
    ├── PyDetex_Win.spec
    ├── PyDetex_Win_Single.spec
    ├── PyDetex_macOS.spec
    └── __init__.py
├── .replit
├── pydetex
    ├── res
    │   ├── cog.ico
    │   ├── icon.gif
    │   ├── icon.ico
    │   ├── icon.png
    │   ├── icon.icns
    │   ├── dictionary.ico
    │   ├── u_subscripts.txt
    │   ├── u_textfrak.txt
    │   ├── u_textcal.txt
    │   ├── u_textbb.txt
    │   ├── u_textmono.txt
    │   ├── placeholder_en.tex
    │   ├── u_superscripts.txt
    │   ├── placeholder_es.tex
    │   ├── u_textit.txt
    │   ├── u_textbf.txt
    │   └── u_symbols.txt
    ├── __init__.py
    ├── version.py
    ├── _fonts.py
    ├── pipelines.py
    ├── utils.py
    ├── _utils_lang.py
    ├── _symbols.py
    ├── _gui_settings.py
    └── _utils_tex.py
├── docs
    ├── _static
    │   ├── example_simple.png
    │   ├── example_strict.png
    │   └── pydetex_windows.png
    ├── _source
    │   ├── utils.rst
    │   ├── parsers.rst
    │   ├── pipelines.rst
    │   ├── license.rst
    │   └── contributors.rst
    ├── Makefile
    ├── make.bat
    ├── index.rst
    └── conf.py
├── codecov.yml
├── gui.py
├── .gitignore
├── requirements.txt
├── .editorconfig
├── MANIFEST.in
├── .readthedocs.yml
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    └── workflows
    │   └── ci.yml
├── LICENSE
├── setup.py
└── README.rst


/test/data/example_complex_envs_output.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/data/simple.tex:
--------------------------------------------------------------------------------
1 | this is a simple file


--------------------------------------------------------------------------------
/specs/upx.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/specs/upx.exe


--------------------------------------------------------------------------------
/.replit:
--------------------------------------------------------------------------------
1 | language = "bash"
2 | run = "python3 setup.py install; clear; python3 pydetex/gui.py"


--------------------------------------------------------------------------------
/pydetex/res/cog.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/cog.ico


--------------------------------------------------------------------------------
/pydetex/res/icon.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.gif


--------------------------------------------------------------------------------
/pydetex/res/icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.ico


--------------------------------------------------------------------------------
/pydetex/res/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.png


--------------------------------------------------------------------------------
/pydetex/res/icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.icns


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | PyDetex
3 | https://github.com/ppizarror/PyDetex
4 | 
5 | TESTS
6 | """
7 | 


--------------------------------------------------------------------------------
/pydetex/res/dictionary.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/dictionary.ico


--------------------------------------------------------------------------------
/docs/_static/example_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/docs/_static/example_simple.png


--------------------------------------------------------------------------------
/docs/_static/example_strict.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/docs/_static/example_strict.png


--------------------------------------------------------------------------------
/docs/_static/pydetex_windows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/docs/_static/pydetex_windows.png


--------------------------------------------------------------------------------
/docs/_source/utils.rst:
--------------------------------------------------------------------------------
1 | 
2 | =====
3 | Utils
4 | =====
5 | 
6 | .. automodule:: pydetex.utils
7 |     :members:
8 |     :exclude-members: Button


--------------------------------------------------------------------------------
/docs/_source/parsers.rst:
--------------------------------------------------------------------------------
1 | 
2 | =======
3 | Parsers
4 | =======
5 | 
6 | .. automodule:: pydetex.parsers
7 |     :members:
8 |     :exclude-members:


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ignore:
2 |   - "build.py"
3 |   - "gui.py"
4 |   - "pydetex/_gui_utils.py"
5 |   - "pydetex/gui.py"
6 |   - "setup.py"
7 |   - "test/*.py"


--------------------------------------------------------------------------------
/docs/_source/pipelines.rst:
--------------------------------------------------------------------------------
1 | 
2 | =========
3 | Pipelines
4 | =========
5 | 
6 | .. automodule:: pydetex.pipelines
7 |     :members:
8 |     :exclude-members:


--------------------------------------------------------------------------------
/gui.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyDetex
 3 | https://github.com/ppizarror/PyDetex
 4 | 
 5 | MAIN FILE
 6 | """
 7 | 
 8 | from pydetex.gui import main
 9 | 
10 | main()
11 | 


--------------------------------------------------------------------------------
/docs/_source/license.rst:
--------------------------------------------------------------------------------
1 | 
2 | =======
3 | License
4 | =======
5 | 
6 | .. include:: ../../LICENSE
7 | 
8 | The official license can be retrieved `here <https://opensource.org/licenses/MIT>`_.
9 | 


--------------------------------------------------------------------------------
/test/data/example_complex_template_output.txt:
--------------------------------------------------------------------------------
 1 | ℜ
 2 | ℑℑ
 3 | 
 4 | =
 5 | 
 6 | =
 7 | 
 8 | =
 9 | 
10 | =
11 | 
12 | =200pt
13 | =200pt
14 | =
15 | =
16 | 
17 | =`\-
18 | 
19 | 
20 | 
21 | 
22 | 
23 | =10000
24 | 
25 | `\="#1 "#1 `\=0↤↤


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | ._*
 3 | .DS_Store
 4 | .idea/
 5 | .vscode/
 6 | 
 7 | # Build
 8 | **.egg-info
 9 | build/
10 | dist/
11 | 
12 | # Settings
13 | pydetex/res/.pydetex.cfg
14 | 
15 | # Image projects
16 | docs/_static/**.vectornator


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4 >= 4.11.1
 2 | flatlatex >= 0.15
 3 | langdetect >= 1.0.9
 4 | nltk >= 3.9.1
 5 | outdated >= 0.2.2
 6 | Pillow >= 9.4.0
 7 | PyMultiDictionary >= 1.3.2
 8 | pyperclip >= 1.9.0
 9 | requests >= 2.32.3
10 | tkmacosx >= 1.0.5


--------------------------------------------------------------------------------
/test/_base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyDetex
 3 | https://github.com/ppizarror/PyDetex
 4 | 
 5 | TEST BASE
 6 | Base definition.
 7 | """
 8 | 
 9 | import unittest
10 | 
11 | 
12 | class BaseTest(unittest.TestCase):
13 |     """
14 |     Base test class.
15 |     """
16 | 


--------------------------------------------------------------------------------
/test/data/example_simple_figure_caption_output.txt:
--------------------------------------------------------------------------------
1 | Write or paste here your LaTeX code. It simply removes all tex-things and returns a friendly plain text!
2 | The following is a excellent figure:
3 | 
4 | CAPTION: A picture of the same gull looking the other way!
5 | 
6 | well EQUATION_0 epic α


--------------------------------------------------------------------------------
/test/data/example_placeholder_output.txt:
--------------------------------------------------------------------------------
 1 | Write or paste here your LaTeX code. It simply removes all tex-things and returns a friendly plain text!
 2 | 
 3 | PyDetex can process equation, lists, cites, references, and many more:
 4 | 
 5 | EQUATION_0
 6 | 
 7 | Or:
 8 | 
 9 | - As shown in Figure 1
10 | - Proposed by α-Feltes [1] because EQUATION_1


--------------------------------------------------------------------------------
/specs/PyDetex_Win.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | 
 3 | import sys; sys.path.insert(0, '.')
 4 | import specs
 5 | if specs.is_osx: exit()
 6 | 
 7 | # Create the analysis
 8 | a = specs.get_analysis(Analysis, TOC)
 9 | pyz = specs.get_pyz(PYZ, a)
10 | exe = specs.get_exe(EXE, pyz, a, False)
11 | coll = specs.get_collect(COLLECT, a, exe)


--------------------------------------------------------------------------------
/test/data/example.tex:
--------------------------------------------------------------------------------
1 | Table \ref{tab:review-rulebased} details the reviewed rule-based methods within floor plan recognition, considering the datasets used (Table \ref{tab:databases}) and the four categories of tasks, such as (1) \textit{Graphics separation}, (2) \textit{Pattern recognition}, (3) \textit{Vectorization}, and (4) \textit{Structural modeling}.


--------------------------------------------------------------------------------
/pydetex/res/u_subscripts.txt:
--------------------------------------------------------------------------------
 1 | 0 ₀
 2 | 1 ₁
 3 | 2 ₂
 4 | 3 ₃
 5 | 4 ₄
 6 | 5 ₅
 7 | 6 ₆
 8 | 7 ₇
 9 | 8 ₈
10 | 9 ₉
11 | + ₊
12 | - ₋
13 | = ₌
14 | ( ₍
15 | ) ₎
16 | a ₐ
17 | e ₑ
18 | h ₕ
19 | i ᵢ
20 | j ⱼ
21 | k ₖ
22 | l ₗ
23 | m ₘ
24 | n ₙ
25 | o ₒ
26 | p ₚ
27 | r ᵣ
28 | s ₛ
29 | t ₜ
30 | u ᵤ
31 | v ᵥ
32 | x ₓ
33 | β ᵦ
34 | γ ᵧ
35 | ρ ᵨ
36 | φ ᵩ
37 | χ ᵪ
38 | 


--------------------------------------------------------------------------------
/specs/PyDetex_Win_Single.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | 
 3 | import sys; sys.path.insert(0, '.')
 4 | import specs
 5 | if specs.is_osx: exit()
 6 | 
 7 | # Create the analysis
 8 | a = specs.get_analysis(Analysis, TOC)
 9 | pyz = specs.get_pyz(PYZ, a)
10 | exe = specs.get_exe(EXE, pyz, a, True)
11 | 
12 | # Save to zip
13 | specs.save_zip('PyDetex.exe', 'PyDetex.Win64')


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://EditorConfig.org
 2 | 
 3 | # Top-most EditorConfig file
 4 | root = true
 5 | 
 6 | # Unix-style newlines
 7 | [*]
 8 | charset = utf-8
 9 | end_of_line = lf
10 | insert_final_newline = false
11 | 
12 | # Configure languages
13 | [*.py]
14 | indent_size = 4
15 | indent_style = space
16 | 
17 | [{*.json, *.yml}]
18 | indent_size = 2
19 | indent_style = space


--------------------------------------------------------------------------------
/docs/_source/contributors.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | ============
 3 | Contributors
 4 | ============
 5 | 
 6 | Core developers:
 7 | 
 8 | - `Pablo Pizarro R. <https://ppizarror.com>`_
 9 | 
10 | Other contributors:
11 | 
12 | - `Xiong-Hui Chen <https://github.com/xionghuichen>`_
13 | 
14 | Ideas and contributions are always welcome. Any found bugs or enhancement
15 | suggestions should be posted on the `GitHub project page <https://github.com/ppizarror/PyDetex>`_.
16 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | exclude *.bat
 2 | exclude *.yml
 3 | exclude .github/ISSUE_TEMPLATE/*
 4 | exclude .gitignore
 5 | exclude .replit
 6 | exclude docs/*
 7 | exclude docs/_source/*
 8 | exclude docs/_static/*
 9 | exclude specs/*.py
10 | exclude test/*.py
11 | 
12 | include requirements.txt
13 | 
14 | recursive-include pydetex/res **.gif
15 | recursive-include pydetex/res **.ico
16 | recursive-include pydetex/res **.json
17 | recursive-include pydetex/res **.tex
18 | recursive-include pydetex/res **.txt


--------------------------------------------------------------------------------
/pydetex/res/u_textfrak.txt:
--------------------------------------------------------------------------------
 1 | A 𝔄
 2 | B 𝔅
 3 | C ℭ
 4 | D 𝔇
 5 | E 𝔈
 6 | F 𝔉
 7 | G 𝔊
 8 | H ℌ
 9 | I ℑ
10 | J 𝔍
11 | K 𝔎
12 | L 𝔏
13 | M 𝔐
14 | N 𝔑
15 | O 𝔒
16 | P 𝔓
17 | Q 𝔔
18 | R ℜ
19 | S 𝔖
20 | T 𝔗
21 | U 𝔘
22 | V 𝔙
23 | W 𝔚
24 | X 𝔛
25 | Y 𝔜
26 | Z ℨ
27 | a 𝔞
28 | b 𝔟
29 | c 𝔠
30 | d 𝔡
31 | e 𝔢
32 | f 𝔣
33 | g 𝔤
34 | h 𝔥
35 | i 𝔦
36 | j 𝔧
37 | k 𝔨
38 | l 𝔩
39 | m 𝔪
40 | n 𝔫
41 | o 𝔬
42 | p 𝔭
43 | q 𝔮
44 | r 𝔯
45 | s 𝔰
46 | t 𝔱
47 | u 𝔲
48 | v 𝔳
49 | w 𝔴
50 | x 𝔵
51 | y 𝔶
52 | z 𝔷
53 | 


--------------------------------------------------------------------------------
/pydetex/res/u_textcal.txt:
--------------------------------------------------------------------------------
 1 | A 𝓐
 2 | B 𝓑
 3 | C 𝓒
 4 | D 𝓓
 5 | E 𝓔
 6 | F 𝓕
 7 | G 𝓖
 8 | H 𝓗
 9 | I 𝓘
10 | J 𝓙
11 | K 𝓚
12 | L 𝓛
13 | M 𝓜
14 | N 𝓝
15 | O 𝓞
16 | P 𝓟
17 | Q 𝓠
18 | R 𝓡
19 | S 𝓢
20 | T 𝓣
21 | U 𝓤
22 | V 𝓥
23 | W 𝓦
24 | X 𝓧
25 | Y 𝓨
26 | Z 𝓩
27 | a 𝓪
28 | b 𝓫
29 | c 𝓬
30 | d 𝓭
31 | e 𝓮
32 | f 𝓯
33 | g 𝓰
34 | h 𝓱
35 | i 𝓲
36 | j 𝓳
37 | k 𝓴
38 | l 𝓵
39 | m 𝓶
40 | n 𝓷
41 | o 𝓸
42 | p 𝓹
43 | q 𝓺
44 | r 𝓻
45 | s 𝓼
46 | t 𝓽
47 | u 𝓾
48 | v 𝓿
49 | w 𝔀
50 | x 𝔁
51 | y 𝔂
52 | z 𝔃
53 | 


--------------------------------------------------------------------------------
/test/data/example_simple_figure_caption.txt:
--------------------------------------------------------------------------------
 1 | Write or paste here your \texttt{LaTeX} code. It simply removes all tex-things and returns a friendly plain text!
 2 |         The following is a excellent figure:
 3 | 
 4 |         \begin{figure}
 5 |           \centering
 6 |             \reflectbox{%
 7 |               \includegraphics[width=0.5\textwidth]{gull}}
 8 |           \caption{A picture of the same gull
 9 |                    looking the other way!}
10 |         \end{figure}
11 | 
12 |         well $nothing has happened really$ epic $\alpha$
13 | 


--------------------------------------------------------------------------------
/specs/PyDetex_macOS.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys; sys.path.insert(0, '.')
 5 | import specs
 6 | if specs.is_win: exit()
 7 | 
 8 | try:
 9 |     os.system(f'rm -rf dist/PyDetex_macOS.app')
10 |     os.system(f'rm dist/PyDetex_macOS')
11 | except:
12 |     pass
13 | 
14 | # Create the analysis
15 | a = specs.get_analysis(Analysis, TOC)
16 | pyz = specs.get_pyz(PYZ, a)
17 | exe = specs.get_exe(EXE, pyz, a, True)
18 | app = specs.get_bundle(BUNDLE, exe)
19 | 
20 | # Save to zip
21 | specs.save_zip('PyDetex_macOS.app', 'PyDetex.macOS')


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Configure os
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: '3.8'
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/conf.py
17 | 
18 | # Optionally set the version of Python and requirements required to build your docs
19 | python:
20 |   install:
21 |     - method: pip
22 |       path: .
23 |       extra_requirements:
24 |         - docs


--------------------------------------------------------------------------------
/pydetex/res/u_textbb.txt:
--------------------------------------------------------------------------------
 1 | A 𝔸
 2 | B 𝔹
 3 | C ℂ
 4 | D 𝔻
 5 | E 𝔼
 6 | F 𝔽
 7 | G 𝔾
 8 | H ℍ
 9 | I 𝕀
10 | J 𝕁
11 | K 𝕂
12 | L 𝕃
13 | M 𝕄
14 | N ℕ
15 | O 𝕆
16 | P ℙ
17 | Q ℚ
18 | R ℝ
19 | S 𝕊
20 | T 𝕋
21 | U 𝕌
22 | V 𝕍
23 | W 𝕎
24 | X 𝕏
25 | Y 𝕐
26 | Z ℤ
27 | a 𝕒
28 | b 𝕓
29 | c 𝕔
30 | d 𝕕
31 | e 𝕖
32 | f 𝕗
33 | g 𝕘
34 | h 𝕙
35 | i 𝕚
36 | j 𝕛
37 | k 𝕜
38 | l 𝕝
39 | m 𝕞
40 | n 𝕟
41 | o 𝕠
42 | p 𝕡
43 | q 𝕢
44 | r 𝕣
45 | s 𝕤
46 | t 𝕥
47 | u 𝕦
48 | v 𝕧
49 | w 𝕨
50 | x 𝕩
51 | y 𝕪
52 | z 𝕫
53 | 0 𝟘
54 | 1 𝟙
55 | 2 𝟚
56 | 3 𝟛
57 | 4 𝟜
58 | 5 𝟝
59 | 6 𝟞
60 | 7 𝟟
61 | 8 𝟠
62 | 9 𝟡
63 | 


--------------------------------------------------------------------------------
/pydetex/res/u_textmono.txt:
--------------------------------------------------------------------------------
 1 | A 𝙰
 2 | B 𝙱
 3 | C 𝙲
 4 | D 𝙳
 5 | E 𝙴
 6 | F 𝙵
 7 | G 𝙶
 8 | H 𝙷
 9 | I 𝙸
10 | J 𝙹
11 | K 𝙺
12 | L 𝙻
13 | M 𝙼
14 | N 𝙽
15 | O 𝙾
16 | P 𝙿
17 | Q 𝚀
18 | R 𝚁
19 | S 𝚂
20 | T 𝚃
21 | U 𝚄
22 | V 𝚅
23 | W 𝚆
24 | X 𝚇
25 | Y 𝚈
26 | Z 𝚉
27 | a 𝚊
28 | b 𝚋
29 | c 𝚌
30 | d 𝚍
31 | e 𝚎
32 | f 𝚏
33 | g 𝚐
34 | h 𝚑
35 | i 𝚒
36 | j 𝚓
37 | k 𝚔
38 | l 𝚕
39 | m 𝚖
40 | n 𝚗
41 | o 𝚘
42 | p 𝚙
43 | q 𝚚
44 | r 𝚛
45 | s 𝚜
46 | t 𝚝
47 | u 𝚞
48 | v 𝚟
49 | w 𝚠
50 | x 𝚡
51 | y 𝚢
52 | z 𝚣
53 | 0 𝟶
54 | 1 𝟷
55 | 2 𝟸
56 | 3 𝟹
57 | 4 𝟺
58 | 5 𝟻
59 | 6 𝟼
60 | 7 𝟽
61 | 8 𝟾
62 | 9 𝟿
63 | 


--------------------------------------------------------------------------------
/pydetex/res/placeholder_en.tex:
--------------------------------------------------------------------------------
 1 | Write or paste here your \texttt{LaTeX} code. It simply removes all tex-things and returns a friendly plain text! % And removes commands too!
 2 | 
 3 | PyDetex can process equation, lists, cites, references, and many more:
 4 | 
 5 | \begin{equation}
 6 |     a + \frac{c}{d} \longrightarrow k^n
 7 | \end{equation}
 8 | 
 9 | Or \myCustomCommand[\label{cmd1}]{can process \textbf{lists}}:
10 | 
11 | \begin{itemize}[font=\bfseries]
12 |     \item As shown in Figure \ref{fignumber}
13 |     \item Proposed by $\alpha$-Feltes \cite{feltes2008} because $x^n + y^n = z^n \forall n \in 0 \ldots \infty$
14 | \end{itemize}


--------------------------------------------------------------------------------
/pydetex/res/u_superscripts.txt:
--------------------------------------------------------------------------------
 1 | 0 ⁰
 2 | 1 ¹
 3 | 2 ²
 4 | 3 ³
 5 | 4 ⁴
 6 | 5 ⁵
 7 | 6 ⁶
 8 | 7 ⁷
 9 | 8 ⁸
10 | 9 ⁹
11 | + ⁺
12 | - ⁻
13 | = ⁼
14 | ( ⁽
15 | ) ⁾
16 | a ᵃ
17 | b ᵇ
18 | c ᶜ
19 | d ᵈ
20 | e ᵉ
21 | f ᶠ
22 | g ᵍ
23 | h ʰ
24 | i ⁱ
25 | j ʲ
26 | k ᵏ
27 | l ˡ
28 | m ᵐ
29 | n ⁿ
30 | o ᵒ
31 | p ᵖ
32 | r ʳ
33 | s ˢ
34 | t ᵗ
35 | u ᵘ
36 | v ᵛ
37 | w ʷ
38 | x ˣ
39 | y ʸ
40 | z ᶻ
41 | A ᴬ
42 | B ᴮ
43 | D ᴰ
44 | E ᴱ
45 | G ᴳ
46 | H ᴴ
47 | I ᴵ
48 | J ᴶ
49 | K ᴷ
50 | L ᴸ
51 | M ᴹ
52 | N ᴺ
53 | O ᴼ
54 | P ᴾ
55 | R ᴿ
56 | T ᵀ
57 | U ᵁ
58 | V ⱽ
59 | W ᵂ
60 | α ᵅ
61 | β ᵝ
62 | γ ᵞ
63 | δ ᵟ
64 | ∊ ᵋ
65 | θ ᶿ
66 | ι ᶥ
67 | Φ ᶲ 
68 | φ ᵠ 
69 | χ ᵡ
70 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: ppizarror
 4 | patreon: # Patreon
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: ppizarror
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 | 


--------------------------------------------------------------------------------
/pydetex/res/placeholder_es.tex:
--------------------------------------------------------------------------------
 1 | Escribe o pega aquí tu código \texttt{LaTeX}. El programa removerá lo relacionado a latex y retornará un amigable texto plano. % Y además elimina los comentarios!
 2 | 
 3 | PyDetex puede procesar ecuaciones, listas, citas, referencias, y mucho más:
 4 | 
 5 | \begin{equation}
 6 |     a + \frac{c}{d} \longrightarrow k^n
 7 | \end{equation}
 8 | 
 9 | Ó \myCustomCommand[\label{cmd1}]{puede procesar \textbf{listas}}:
10 | 
11 | \begin{itemize}[font=\bfseries]
12 |     \item Como es ilustrado en la Figura \ref{fignumber}
13 |     \item Propuesto por $\alpha$-Feltes \cite{feltes2008} dado que $x^n + y^n = z^n \forall n \in 0 \ldots \infty$
14 | \end{itemize}


--------------------------------------------------------------------------------
/test/data/example_placeholder.txt:
--------------------------------------------------------------------------------
 1 | Write or paste here your \texttt{LaTeX} code. It simply removes all tex-things and returns a friendly plain text! % And removes commands too!
 2 | 
 3 |         PyDetex can process equation, lists, cites, references, and many more:
 4 | 
 5 |         \begin{equation}
 6 |            a + \\frac{c}{d} \longrightarrow k^n
 7 |         \end{equation}
 8 | 
 9 |         Or \myCustomCommand[\label{cmd1}]{can process \textbf{lists}}:
10 | 
11 |         \begin{itemize}[font=\bfseries]
12 |            \item As shown in Figure \ref{fignumber}
13 |            \item Proposed by $\alpha$-Feltes \cite{feltes2008} because $x^n + y^n = z^n \forall n \in 0 \ldots \infty$
14 |         \end{itemize}


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/pydetex/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyDetex
 3 | https://github.com/ppizarror/PyDetex
 4 | 
 5 | PYDETEX
 6 | An application that transforms LaTeX codes to plain text.
 7 | """
 8 | 
 9 | import pydetex.version
10 | 
11 | __author__ = 'Pablo Pizarro R.'
12 | __copyright__ = 'Copyright 2021 Pablo Pizarro R. @ppizarror'
13 | __description__ = 'An application that transforms LaTeX code to plain text'
14 | __email__ = 'pablo@ppizarror.com'
15 | __keywords__ = 'latex detex parser gui'
16 | __license__ = 'MIT'
17 | __module_name__ = 'pydetex'
18 | __url__ = 'https://pydetex.readthedocs.io'
19 | __url_bug_tracker__ = 'https://github.com/ppizarror/PyDetex/issues'
20 | __url_documentation__ = 'https://pydetex.readthedocs.io'
21 | __url_source_code__ = 'https://github.com/ppizarror/PyDetex'
22 | __version__ = pydetex.version.ver
23 | 


--------------------------------------------------------------------------------
/test/data/example_simple_itemize_output.txt:
--------------------------------------------------------------------------------
1 | - The academic databases
2 | Web of Science, Scopus, IEEE/IET Xplore, Science Direct, ACM Digital Library, ASCE Library, ProQuest, and Springer were used for article search and selection. Also, online tools Semantic Scholar and Connected Papers were employed to retrieve similar articles powered by AI and visual graphs.
3 | - Keywords such as "floor plan analysis", "floor plan recognition and interpretation", "floor plan segmentation", "floor plan image", "apartment structure", "wall segmentation", "architectural plan vectorization", "room and wall retrieval", "apartment graph", "object detection in floor plans", and "parsing floor plan images" were used to search the databases. The search date period ranged from 1995 to
4 | fecha. For each article, its cross-references and similar works were also considered for revision.


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Environment information**
11 | Describe your environment information, such as:
12 | 
13 | - SO: win/linux
14 | - python version: v3.x
15 | - pydetex version: v0.x.x
16 | 
17 | **Describe the bug**
18 | A clear and concise description of what the bug is.
19 | 
20 | **To Reproduce**
21 | Please provide a **minimal** reproducible example that developers can run to investigate the problem.
22 | You can find help for creating such an example [here](https://stackoverflow.com/help/minimal-reproducible-example).
23 | 
24 | **Expected behavior**
25 | A clear and concise description of what you expected to happen.
26 | 
27 | **Additional context**
28 | Add any other context about the problem here.
29 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/pydetex/version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyDetex
 3 | https://github.com/ppizarror/PyDetex
 4 | 
 5 | VERSION
 6 | Defines version.
 7 | """
 8 | 
 9 | __all__ = ['Version', 'vernum', 'ver', 'rev']
10 | 
11 | 
12 | class Version(tuple):
13 |     """
14 |     Version class.
15 |     """
16 | 
17 |     __slots__ = ()
18 |     fields = 'major', 'minor', 'patch'
19 | 
20 |     def __new__(cls, major, minor, patch) -> tuple:
21 |         return tuple.__new__(cls, (major, minor, patch))
22 | 
23 |     def __repr__(self) -> str:
24 |         fields = (f'{fld}={val}' for fld, val in zip(self.fields, self))
25 |         return f'{self.__class__.__name__}({", ".join(fields)})'
26 | 
27 |     def __str__(self) -> str:
28 |         return '{}.{}.{}'.format(*self)
29 | 
30 |     major = property(lambda self: self[0])
31 |     minor = property(lambda self: self[1])
32 |     patch = property(lambda self: self[2])
33 | 
34 | 
35 | vernum = Version(1, 1, 1)
36 | ver = str(vernum)
37 | rev = ''
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Pablo Pizarro R.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pydetex/res/u_textit.txt:
--------------------------------------------------------------------------------
  1 | A 𝐴
  2 | B 𝐵
  3 | C 𝐶
  4 | D 𝐷
  5 | E 𝐸
  6 | F 𝐹
  7 | G 𝐺
  8 | H 𝐻
  9 | I 𝐼
 10 | J 𝐽
 11 | K 𝐾
 12 | L 𝐿
 13 | M 𝑀
 14 | N 𝑁
 15 | O 𝑂
 16 | P 𝑃
 17 | Q 𝑄
 18 | R 𝑅
 19 | S 𝑆
 20 | T 𝑇
 21 | U 𝑈
 22 | V 𝑉
 23 | W 𝑊
 24 | X 𝑋
 25 | Y 𝑌
 26 | Z 𝑍
 27 | a 𝑎
 28 | b 𝑏
 29 | c 𝑐
 30 | d 𝑑
 31 | e 𝑒
 32 | f 𝑓
 33 | g 𝑔
 34 | h ℎ
 35 | i 𝑖
 36 | j 𝑗
 37 | k 𝑘
 38 | l 𝑙
 39 | m 𝑚
 40 | n 𝑛
 41 | o 𝑜
 42 | p 𝑝
 43 | q 𝑞
 44 | r 𝑟
 45 | s 𝑠
 46 | t 𝑡
 47 | u 𝑢
 48 | v 𝑣
 49 | w 𝑤
 50 | x 𝑥
 51 | y 𝑦
 52 | z 𝑧
 53 | Α 𝛢
 54 | Β 𝛣
 55 | Γ 𝛤
 56 | Δ 𝛥
 57 | Ε 𝛦
 58 | Ζ 𝛧
 59 | Η 𝛨
 60 | Θ 𝛩
 61 | Ι 𝛪
 62 | Κ 𝛫
 63 | Λ 𝛬
 64 | Μ 𝛭
 65 | Ν 𝛮
 66 | Ξ 𝛯
 67 | Ο 𝛰
 68 | Π 𝛱
 69 | Ρ 𝛲
 70 | ϴ 𝛳
 71 | Σ 𝛴
 72 | Τ 𝛵
 73 | Υ 𝛶
 74 | Φ 𝛷
 75 | Χ 𝛸
 76 | Ψ 𝛹
 77 | Ω 𝛺
 78 | ∇ 𝛻
 79 | α 𝛼
 80 | β 𝛽
 81 | γ 𝛾
 82 | δ 𝛿
 83 | ε 𝜀
 84 | ζ 𝜁
 85 | η 𝜂
 86 | θ 𝜃
 87 | ι 𝜄
 88 | κ 𝜅
 89 | λ 𝜆
 90 | μ 𝜇
 91 | ν 𝜈
 92 | ξ 𝜉
 93 | ο 𝜊
 94 | π 𝜋
 95 | ρ 𝜌
 96 | ς 𝜍
 97 | σ 𝜎
 98 | τ 𝜏
 99 | υ 𝜐
100 | φ 𝜑
101 | χ 𝜒
102 | ψ 𝜓
103 | ω 𝜔
104 | ∂ 𝜕
105 | ϵ 𝜖
106 | ϑ 𝜗
107 | ϰ 𝜘
108 | ϕ 𝜙
109 | ϱ 𝜚
110 | ϖ 𝜛
111 | 


--------------------------------------------------------------------------------
/pydetex/res/u_textbf.txt:
--------------------------------------------------------------------------------
  1 | A 𝐀
  2 | B 𝐁
  3 | C 𝐂
  4 | D 𝐃
  5 | E 𝐄
  6 | F 𝐅
  7 | G 𝐆
  8 | H 𝐇
  9 | I 𝐈
 10 | J 𝐉
 11 | K 𝐊
 12 | L 𝐋
 13 | M 𝐌
 14 | N 𝐍
 15 | O 𝐎
 16 | P 𝐏
 17 | Q 𝐐
 18 | R 𝐑
 19 | S 𝐒
 20 | T 𝐓
 21 | U 𝐔
 22 | V 𝐕
 23 | W 𝐖
 24 | X 𝐗
 25 | Y 𝐘
 26 | Z 𝐙
 27 | a 𝐚
 28 | b 𝐛
 29 | c 𝐜
 30 | d 𝐝
 31 | e 𝐞
 32 | f 𝐟
 33 | g 𝐠
 34 | h 𝐡
 35 | i 𝐢
 36 | j 𝐣
 37 | k 𝐤
 38 | l 𝐥
 39 | m 𝐦
 40 | n 𝐧
 41 | o 𝐨
 42 | p 𝐩
 43 | q 𝐪
 44 | r 𝐫
 45 | s 𝐬
 46 | t 𝐭
 47 | u 𝐮
 48 | v 𝐯
 49 | w 𝐰
 50 | x 𝐱
 51 | y 𝐲
 52 | z 𝐳
 53 | Α 𝚨
 54 | Β 𝚩
 55 | Γ 𝚪
 56 | Δ 𝚫
 57 | Ε 𝚬
 58 | Ζ 𝚭
 59 | Η 𝚮
 60 | Θ 𝚯
 61 | Ι 𝚰
 62 | Κ 𝚱
 63 | Λ 𝚲
 64 | Μ 𝚳
 65 | Ν 𝚴
 66 | Ξ 𝚵
 67 | Ο 𝚶
 68 | Π 𝚷
 69 | Ρ 𝚸
 70 | ϴ 𝚹
 71 | Σ 𝚺
 72 | Τ 𝚻
 73 | Υ 𝚼
 74 | Φ 𝚽
 75 | Χ 𝚾
 76 | Ψ 𝚿
 77 | Ω 𝛀
 78 | ∇ 𝛁
 79 | α 𝛂
 80 | β 𝛃
 81 | γ 𝛄
 82 | δ 𝛅
 83 | ε 𝛆
 84 | ζ 𝛇
 85 | η 𝛈
 86 | θ 𝛉
 87 | ι 𝛊
 88 | κ 𝛋
 89 | λ 𝛌
 90 | μ 𝛍
 91 | ν 𝛎
 92 | ξ 𝛏
 93 | ο 𝛐
 94 | π 𝛑
 95 | ρ 𝛒
 96 | ς 𝛓
 97 | σ 𝛔
 98 | τ 𝛕
 99 | υ 𝛖
100 | φ 𝛗
101 | χ 𝛘
102 | ψ 𝛙
103 | ω 𝛚
104 | ∂ 𝛛
105 | ϵ 𝛜
106 | ϑ 𝛝
107 | ϰ 𝛞
108 | ϕ 𝛟
109 | ϱ 𝛠
110 | ϖ 𝛡
111 | 0 𝟎
112 | 1 𝟏
113 | 2 𝟐
114 | 3 𝟑
115 | 4 𝟒
116 | 5 𝟓
117 | 6 𝟔
118 | 7 𝟕
119 | 8 𝟖
120 | 9 𝟗
121 | 


--------------------------------------------------------------------------------
/test/data/example_simple_cite_output.txt:
--------------------------------------------------------------------------------
 1 | Yamasaki et al. [1] also presented a fully convolutional end-to-end FCN network to label pixels in apartment floor plans by performing a general semantic segmentation, ignoring spatial relations between elements and room boundary. The classified pixels from 12 classes formed a graph to model the structure and measure the structural similarity for apartment retrieval.
 2 | 
 3 | FIGURE_CAPTION: A U-Net model which segments the walls from a rasterized floor plan image. Layer legend: (yellow) convolutional block, (orange) max-pool, (blue) up-sampling, and (purple) softmax.
 4 | 
 5 | A U-Net approach was introduced by Yang η [2], alongside the pixel deconvolutional layers PixelDCL [3] to avoid checkerboard artifacts while segmenting walls and doors.
 6 | 
 7 | Discriminator architectures [4].
 8 | 
 9 | FIGURE_CAPTION: Pix2Pix model, which translates the rasterized floor plan image style into a segmented format.
10 | 
11 | Concerning the recognition and generation of floor plans, Huang and Zheng [5] introduced an application of Pix2PixHD [6] to detect rooms from 8 classes, which were colorized to generate a new image. In this example, the conditional GANs lead to translate the raster plan to a segmented style using annotated pairs, classifying each pixel while also preserving the underlying structure of the image. Pix2Pix was also adopted by Kim et al. [7, 8] to transform plans into


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | :orphan:
 3 | 
 4 | .. This page is orphan because its content concerns the internal working of the
 5 | .. library. However it is necessary in order to be able to quote its items in the
 6 | .. documentation.
 7 | 
 8 | .. include:: ../README.rst
 9 | 
10 | 
11 | ===
12 | API
13 | ===
14 | 
15 | Although PyDetex is intended to be used through its GUI, the module contains several
16 | practical methods to detect LaTex commands, environments, equations, among others.
17 | The GUI only uses the pipelines to transform the tex code to plain text. On the
18 | other hand, the pipelines use parsers.
19 | 
20 | You can check for the parsers, pipelines, and util’s methods to create your
21 | pipelines in the left menu!
22 | 
23 | .. toctree::
24 |     :maxdepth: 2
25 |     :hidden:
26 |     :caption: API
27 | 
28 |     _source/parsers
29 |     _source/pipelines
30 |     _source/utils
31 | 
32 | 
33 | =================
34 | About PyDetex
35 | =================
36 | 
37 | This project does not have a mailing list and so the issues tab should be the first
38 | point of contact if wishing to discuss the project. If you have questions that you
39 | do not feel are relevant to the issues tab or just want to let me know what you
40 | think about the software, feel free to email me at pablo@ppizarror.com
41 | 
42 | .. toctree::
43 |     :maxdepth: 2
44 |     :hidden:
45 |     :caption: About PyDetex
46 | 
47 |     _source/license
48 |     _source/contributors
49 | 
50 | 
51 | ==================
52 | Indices and tables
53 | ==================
54 | 
55 | * :ref:`genindex`
56 | * :ref:`modindex`
57 | * :ref:`search`
58 | 


--------------------------------------------------------------------------------
/pydetex/_fonts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyDetex
 3 | https://github.com/ppizarror/PyDetex
 4 | 
 5 | FONTS
 6 | Configures font styles.
 7 | """
 8 | 
 9 | __all__ = [
10 |     'FONT_TAGS',
11 |     'FONT_PROPERTIES',
12 |     'TAGS_FONT'
13 | ]
14 | 
15 | from typing import Dict, Optional, Union
16 | 
17 | # Define properties
18 | bg = 'background'
19 | bold = 'bold'
20 | fg = 'foreground'
21 | italic = 'italic'
22 | name = 'name'
23 | overstrike = 'overstrike'
24 | roman = 'roman'
25 | slant = 'slant'
26 | spacing3 = 'spacing3'
27 | underline = 'underline'
28 | weight = 'weight'
29 | size = 'size'
30 | 
31 | # Configure fonts
32 | FONT_PROPERTIES: Dict[str, Optional[Dict[str, Union[str, int]]]] = {
33 |     'bold': {weight: bold},
34 |     'bold_italic': {weight: bold, slant: italic},
35 |     'bullet': None,
36 |     'equation_char': {weight: bold, fg: '#19b70a'},
37 |     'equation_inside': {slant: italic, fg: '#ffa450'},
38 |     'error': {fg: '#ff6b68'},
39 |     'h1': {size: 2, weight: bold, spacing3: 1},
40 |     'highlight': {bg: '#fff200'},
41 |     'italic': {slant: italic},
42 |     'link': {weight: bold, fg: '#ff02a6'},
43 |     'normal': {},
44 |     'repeated_tag': {slant: italic, fg: '#ff002b'},
45 |     'repeated_word': {weight: bold},
46 |     'strike': {overstrike: True},
47 |     'tex_argument': {fg: '#999999'},
48 |     'tex_command': {fg: '#09accb'},
49 |     'underlined': {underline: True, spacing3: 1}
50 | }
51 | 
52 | # Configure the tags
53 | FONT_TAGS: Dict[str, str] = {}
54 | TAGS_FONT: Dict[str, str] = {}
55 | for k in FONT_PROPERTIES.keys():
56 |     FONT_TAGS[k] = f'⇱PYDETEX_FONT:{k.upper()}⇲'
57 |     TAGS_FONT[FONT_TAGS[k]] = k
58 | 


--------------------------------------------------------------------------------
/test/data/example_simple_itemize.txt:
--------------------------------------------------------------------------------
 1 | \begin{itemize}
 2 |         \item The academic databases
 3 |         Web of Science, % http://uchile.idm.oclc.org/login?url=http://webofknowledge.com/
 4 |         Scopus, % http://uchile.idm.oclc.org/login?url=http://www.scopus.com/
 5 |         IEEE/IET Xplore, % http://uchile.idm.oclc.org/login?url=http://ieeexplore.ieee.org/
 6 |         Science Direct, % http://uchile.idm.oclc.org/login?url=http://www.sciencedirect.com/
 7 |         ACM Digital Library, % http://uchile.idm.oclc.org/login?url=https://dl.acm.org/dl.cfm
 8 |         ASCE Library, % http://uchile.idm.oclc.org/login?url=http://ascelibrary.org
 9 |         ProQuest, % https://uchile.idm.oclc.org/login?url=http://search.proquest.com/computing?accountid=14621
10 |         and Springer % http://uchile.idm.oclc.org/login?url=https://link.springer.com
11 |         were used for article search and selection. Also, online tools Semantic Scholar and Connected Papers were employed to retrieve similar articles powered by AI and visual graphs.
12 | 
13 |         \item Keywords such as \doublequotes{floor plan analysis}, \doublequotes{floor plan recognition and interpretation}, \doublequotes{floor plan segmentation}, \doublequotes{floor plan image}, \doublequotes{apartment structure}, \doublequotes{wall segmentation}, \doublequotes{architectural plan vectorization}, \doublequotes{room and wall retrieval}, \doublequotes{apartment graph}, \doublequotes{object detection in floor plans}, and \doublequotes{parsing floor plan images} were used to search the databases. The search date period ranged from 1995 to \\fecha. For each article, its cross-references and similar works were also considered for revision.
14 |         \end{itemize}


--------------------------------------------------------------------------------
/test/data/example_simple_comments_output.txt:
--------------------------------------------------------------------------------
1 | Park and Kwon [1] recognized the main walls of apartments using the auxiliary dimension line, where windows can be retrieved as a subproduct. Feltes et al.'s [2] work is capable of finding the object's corners in wall-line drawing images by filtering out unnecessary points without changing the overall structure, especially those that appeared through over-segmentation on diagonal lines; also, a wall-gap filling is possible while performing a heuristic criterion. Tang et al. [3] automatically generated vector drawings by applying various filters, such as gradient, length, gap-filling, line-merging, and connectivity under several millimeter sizes, assuming walls are represented by parallel lines in both vertical and horizontal axis. Pan et al. [4] detected walls and windows considering empirical rules regarding their pixel layouts, where the user must adjust its thresholds. The bearing wall corresponded to black areas, non-bearing walls to parallel, unfilled rectangles, and windows are composed of three to four closer parallel lines. De [5] also assumed that only walls are illustrated as thick black lines in a floor plan layout. Thus, thick and thin lines can be distinguished using a morphological transformation; thick lines can be considered walls, whereas arc lines represent doors. On the other hand, in an effort to overcome the lack of a standard notation, de las Heras et al. [6] presented an unsupervised wall segmentation using the assumption of them being a repetitive element, rectangular, placed in orthogonal directions, filled with the same pattern and naturally distributed across the plan. Although assumptions might work over a set, they do not consider semantical relationships or work for multiple plan styles.


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | concurrency:
 4 |   cancel-in-progress: true
 5 |   group: ${{ github.repository }}-${{ github.workflow }}-${{ github.ref }}
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - master
11 |   pull_request:
12 |     branches:
13 |       - master
14 | 
15 | jobs:
16 |   test:
17 |     uses: ppizarror/workflow-actions/.github/workflows/test_python.yml@master
18 |     strategy:
19 |       matrix:
20 |         python: [ '3.10', '3.11', '3.12' ]
21 |     with:
22 |       env-vars: GITHUB=true
23 |       install-extras: test
24 |       os: ubuntu-latest
25 |       python-version: ${{ matrix.python }}
26 | 
27 |   build:
28 |     needs: test
29 |     runs-on: ${{ matrix.os }}
30 |     strategy:
31 |       matrix:
32 |         os:
33 |           - macos-latest
34 |           - windows-latest
35 |     steps:
36 |       - name: Checkout
37 |         uses: actions/checkout@v4
38 | 
39 |       - name: Set up Python
40 |         uses: actions/setup-python@v5
41 |         with:
42 |           cache: pip
43 |           python-version: '3.12'
44 | 
45 |       - name: Install dependencies
46 |         shell: bash
47 |         run: |
48 |           python -m pip install -e .[installer]
49 |           echo "PYDETEX_VERSION=$(python -c "import pydetex; print(pydetex.__version__)")" >> $GITHUB_ENV
50 | 
51 |       - name: Build
52 |         run: python build.py pyinstaller
53 | 
54 |       - name: Upload release
55 |         uses: actions/upload-artifact@v4
56 |         with:
57 |           name: PyDetex-${{ matrix.os }}-v${{ env.PYDETEX_VERSION }}
58 |           path: dist/out_zip
59 | 
60 |   codeql:
61 |     uses: ppizarror/workflow-actions/.github/workflows/codeql.yml@master
62 |     with:
63 |       language: python
64 | 
65 |   delete-artifacts:
66 |     needs: build
67 |     uses: ppizarror/workflow-actions/.github/workflows/delete_artifacts.yml@master


--------------------------------------------------------------------------------
/test/data/example_simple_cite.txt:
--------------------------------------------------------------------------------
 1 | Yamasaki et al. \cite{Yamasaki2018} also presented a fully convolutional end-to-end FCN network to label pixels in apartment floor plans by performing a general semantic segmentation, ignoring spatial relations between elements and room boundary. The classified pixels from 12 classes formed a graph to model the structure and measure the structural similarity for apartment retrieval. % \\% 1
 2 | 
 3 |         \insertimage[\label{unetmodel}]{unet_compressed}{width=\linewidth}{A U-Net model which segments the walls from a rasterized floor plan image. Layer legend: \textit{(yellow)} convolutional block, \textit{(orange)} max-pool, \textit{(blue)} up-sampling, and \textit{(purple)} softmax.}% The encoder, comprised of several de-convolutions, captures the context and finer grain structures. Conversely, the decoder reconstruct the output segmented image, combining spatial information from the encoder.}
 4 | 
 5 |         % U-NET
 6 |         A U-Net approach was introduced by Yang \eta \etal \cite{Yang2018}, alongside the pixel deconvolutional layers PixelDCL \cite{Gao2017} to avoid checkerboard artifacts while segmenting walls and doors.
 7 | 
 8 | Discriminator architectures \cite{Dong2021}.
 9 | 
10 | 
11 | \insertimage[\label{pix2pix2model}]{pix2pix_compressed}{width=\linewidth}{Pix2Pix model, which translates the rasterized floor plan image style into a segmented format.}
12 | 
13 | Concerning the recognition and generation of floor plans, Huang and Zheng \cite{Huang2018} introduced an application of Pix2PixHD \cite{Wang2018} to detect rooms from 8 classes, which were colorized to generate a new image. In this example, the conditional GANs lead to translate the raster plan to a segmented style using annotated pairs, classifying each pixel while also preserving the underlying structure of the image. Pix2Pix was also adopted by Kim et al. \cite{Kim2021, Kim2018} to transform plans into


--------------------------------------------------------------------------------
/test/data/example_simple_comments.txt:
--------------------------------------------------------------------------------
1 | % Aqui tirar metodos genericos que sean de poco impacto
2 |         % element recogniztion based on their line representation has been widely studied concerning rule-based approachesSeveral other studies have also considered a line representation and retrieval to recognize several structural elements from floor plans.
3 |         Park and Kwon \cite{Park2003} recognized the main walls of apartments using the auxiliary dimension line, where windows can be retrieved as a subproduct. Feltes et al.'s \cite{Feltes2014} work is capable of finding the object's corners in wall-line drawing images by filtering out unnecessary points without changing the overall structure, especially those that appeared through over-segmentation on diagonal lines; also, a wall-gap filling is possible while performing a heuristic criterion. Tang et al. \cite{Tang2017} automatically generated vector drawings by applying various filters, such as gradient, length, gap-filling, line-merging, and connectivity under several millimeter sizes, assuming walls are represented by parallel lines in both vertical and horizontal axis. Pan et al. \cite{GuanghuiPan2017} detected walls and windows considering empirical rules regarding their pixel layouts, where the user must adjust its thresholds. The bearing wall corresponded to black areas, non-bearing walls to parallel, unfilled rectangles, and windows are composed of three to four closer parallel lines. De \cite{De2019} also assumed that only walls are illustrated as thick black lines in a floor plan layout. Thus, thick and thin lines can be distinguished using a morphological transformation; thick lines can be considered walls, whereas arc lines represent doors. On the other hand, in an effort to overcome the lack of a standard notation, de las Heras et al. \cite{DelasHeras2013a} presented an unsupervised wall segmentation using the assumption of them being a repetitive element, rectangular, placed in orthogonal directions, filled with the same pattern and naturally distributed across the plan. Although assumptions might work over a set, they do not consider semantical relationships or work for multiple plan styles.


--------------------------------------------------------------------------------
/test/data/example_tables_strict_output.txt:
--------------------------------------------------------------------------------
 1 | Datasets
 2 | 
 3 | Datasets have played an important role within floor plan analysis as there is not a standard notation for its composition; therefore, designed models must incorporate specific rules for each particular style, facing high variability due to: (1) the visual representation of the building, wherein best cases only 70% of the graphical information is compliant with some standard rules [1], (2) the nature of the information contained, and (3) the way of the information is visually represented [2]. Moreover, each floor plan dataset has limitations regarding quantity or complexity. Thus, researchers opt to utilize the datasets suitable for their purposes, including specific processing steps that could not be generalized to other formats [3].
 4 | 
 5 | For such datasets to be useful in floor plan analysis, there must be pixel-wise annotations for objects such as walls, openings, and rooms. However, there are few public datasets because it is difficult for floor plans to be invariably labeled due to ambiguity in notation and the need for high-level expertise for object recognition [2, 4]. Even though several practical tools have been developed to annotate them conveniently [5-7], it is difficult to do so because there is no way to guarantee the same annotations from different experts, especially for complicated plans [3].
 6 | 
 7 | CAPTION: Datasets used by floor plan analysis research.
 8 | 
 9 | Note: Res – Resolution in pixels (px).
10 | - [a] (all links visited on 10/01/2021)
11 | - [b] LINK: b
12 | - [c]
13 | 
14 | CAPTION: Floor plan image examples from datasets.
15 | 
16 | The existing datasets were summarized in Table 1, considering its source article, availability, annotation, and quantity, ordered by release year. Figure 2 illustrates a selection of images from the datasets considered within the review. It can be noted that there are distinct drawing styles and semantics among the apartment and house plans; some have colored floors, text, icons, dimension lines, furniture, and walls with several styles, angles, and complex arrangements. These diverse settings were exploited by rule-based methods, described in section 3, which recognize walls, doors, windows, furniture, and rooms by defining algorithms that considered different approaches specific to each style; or by learning-based ones (section 4), that trained models to automatically recognize the objects.


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyDetex
 3 | https://github.com/ppizarror/PyDetex
 4 | 
 5 | SETUP DISTRIBUTION
 6 | Create setup for PyPI.
 7 | """
 8 | 
 9 | from setuptools import setup, find_packages
10 | import pydetex
11 | 
12 | # Load readme
13 | with open('README.rst', encoding='utf-8') as f:
14 |     long_description: str = f.read()
15 |     long_description = long_description.split('Install Instructions')[0].strip()
16 | 
17 | # Load requirements
18 | with open('requirements.txt', encoding='utf-8') as f:
19 |     requirements = []
20 |     for line in f:
21 |         requirements.append(line.strip())
22 | 
23 | # Setup library
24 | setup(
25 |     name=pydetex.__module_name__,
26 |     version=pydetex.__version__,
27 |     author=pydetex.__author__,
28 |     author_email=pydetex.__email__,
29 |     description=pydetex.__description__,
30 |     long_description=long_description,
31 |     url=pydetex.__url__,
32 |     project_urls={
33 |         'Bug Tracker': pydetex.__url_bug_tracker__,
34 |         'Documentation': pydetex.__url_documentation__,
35 |         'Source Code': pydetex.__url_source_code__
36 |     },
37 |     license=pydetex.__license__,
38 |     platforms=['any'],
39 |     keywords=pydetex.__keywords__,
40 |     classifiers=[
41 |         'License :: OSI Approved :: MIT License',
42 |         'Natural Language :: English',
43 |         'Operating System :: OS Independent',
44 |         'Programming Language :: Python :: 3.7',
45 |         'Programming Language :: Python :: 3.8',
46 |         'Programming Language :: Python :: 3.9',
47 |         'Programming Language :: Python :: 3.10',
48 |         'Programming Language :: Python :: 3.11',
49 |         'Programming Language :: Python :: 3.12',
50 |         'Programming Language :: Python',
51 |         'Topic :: Multimedia',
52 |         'Topic :: Text Processing'
53 |     ],
54 |     include_package_data=True,
55 |     packages=find_packages(exclude=['test']),
56 |     python_requires='>=3.7, <4',
57 |     install_requires=requirements,
58 |     entry_points={
59 |         'console_scripts': [
60 |             'pydetex = pydetex.gui:main',
61 |         ],
62 |         'gui_scripts': [
63 |             'pydetex = pydetex.gui:main',
64 |         ]
65 |     },
66 |     extras_require={
67 |         'docs': ['sphinx<7', 'sphinx-autodoc-typehints>=1.2.0', 'sphinx-rtd-theme'],
68 |         'installer': ['pyinstaller==6.7.0'],
69 |         'test': ['nose2[coverage_plugin]', 'pytest']
70 |     },
71 |     setup_requires=[
72 |         'setuptools',
73 |     ],
74 |     options={
75 |         'bdist_wheel': {'universal': False}
76 |     }
77 | )
78 | 


--------------------------------------------------------------------------------
/test/data/example_complex_envs.txt:
--------------------------------------------------------------------------------
  1 | \begin{tikzpicture}
  2 | \draw[gray, thick] (-1,2) -- (2,-4);
  3 | \draw[gray, thick] (-1,-1) -- (2,2);
  4 | \filldraw[black] (0,0) circle (2pt) node[anchor=west]{Intersection point};
  5 | \end{tikzpicture}
  6 | 
  7 | \begin{references}
  8 | \bibitem a+b
  9 | \end{references}
 10 | 
 11 | \begin{verbatim*}
 12 | Text enclosed inside \texttt{verbatim} environment
 13 | is printed directly
 14 | and all \LaTeX{} commands are ignored.
 15 | \end{verbatim*}
 16 | 
 17 | \begin{lstlisting}
 18 | import numpy as np
 19 | 
 20 | def incmatrix(genl1,genl2):
 21 |     m = len(genl1)
 22 |     n = len(genl2)
 23 |     M = None #to become the incidence matrix
 24 |     VT = np.zeros((n*m,1), int)  #dummy variable
 25 | 
 26 |     #compute the bitwise xor matrix
 27 |     M1 = bitxormatrix(genl1)
 28 |     M2 = np.triu(bitxormatrix(genl2),1)
 29 | 
 30 |     for i in range(m-1):
 31 |         for j in range(i+1, m):
 32 |             [r,c] = np.where(M2 == M1[i,j])
 33 |             for k in range(len(r)):
 34 |                 VT[(i)*n + r[k]] = 1;
 35 |                 VT[(i)*n + c[k]] = 1;
 36 |                 VT[(j)*n + r[k]] = 1;
 37 |                 VT[(j)*n + c[k]] = 1;
 38 | 
 39 |                 if M is None:
 40 |                     M = np.copy(VT)
 41 |                 else:
 42 |                     M = np.concatenate((M, VT), 1)
 43 | 
 44 |                 VT = np.zeros((n*m,1), int)
 45 | 
 46 |     return M
 47 | \end{lstlisting}
 48 | 
 49 | \begin{minted}{python}
 50 | import numpy as np
 51 | 
 52 | def incmatrix(genl1,genl2):
 53 |     m = len(genl1)
 54 |     n = len(genl2)
 55 |     M = None #to become the incidence matrix
 56 |     VT = np.zeros((n*m,1), int)  #dummy variable
 57 | 
 58 |     #compute the bitwise xor matrix
 59 |     M1 = bitxormatrix(genl1)
 60 |     M2 = np.triu(bitxormatrix(genl2),1)
 61 | 
 62 |     for i in range(m-1):
 63 |         for j in range(i+1, m):
 64 |             [r,c] = np.where(M2 == M1[i,j])
 65 |             for k in range(len(r)):
 66 |                 VT[(i)*n + r[k]] = 1;
 67 |                 VT[(i)*n + c[k]] = 1;
 68 |                 VT[(j)*n + r[k]] = 1;
 69 |                 VT[(j)*n + c[k]] = 1;
 70 | 
 71 |                 if M is None:
 72 |                     M = np.copy(VT)
 73 |                 else:
 74 |                     M = np.concatenate((M, VT), 1)
 75 | 
 76 |                 VT = np.zeros((n*m,1), int)
 77 | 
 78 |     return M
 79 | \end{minted}
 80 | 
 81 | \begin{sourcecode}[\label{algorithm}]{pseudocodecolor}{Wall assign algorithm.}
 82 | function aggregation($R$, $N$, $\varepsilon$):
 83 | 	$G \leftarrow \emptyset$ # New collection of wall groups
 84 | 	for $r$ in $R$ do:
 85 | 		for $g$ in $G$:
 86 | 			if distance($g$, $r$) $\ge \varepsilon$:
 87 | 				$g$ $\oplus \ \{r\}$
 88 | 				break
 89 | 			end if
 90 | 		end for
 91 | 	end for
 92 | 	for $i$=1 to $N$: # Merge groups
 93 | 		$\bar{G} \leftarrow \emptyset$
 94 | 		for $g$ in $G$ do:
 95 | 			$merged \leftarrow $ false
 96 | 			for $w$ in $\bar{G}$:
 97 | 				if distance($w$, $g$) $\ge \varepsilon$:
 98 | 					$w$ $\oplus \ \{g\}$
 99 | 					$merged \leftarrow $ true
100 | 				end if
101 | 			end for
102 | 			if not $merged$ and $\lVert g \rVert \neq \emptyset$:
103 | 				$\bar{G}$ $\oplus \ \{g\}$
104 | 			end if
105 | 		end for
106 | 		$G \leftarrow \bar{G}$ # Update group
107 | 	end for
108 | 	return $G$
109 | \end{sourcecode}


--------------------------------------------------------------------------------
/test/test_gui.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | TEST GUI
  6 | Test guis.
  7 | """
  8 | 
  9 | from test._base import BaseTest
 10 | 
 11 | from pydetex.gui import PyDetexGUI
 12 | # noinspection PyProtectedMember
 13 | from pydetex._gui_settings import Settings, _SETTINGS_FILE, _SETTINGS_TEST
 14 | # noinspection PyProtectedMember
 15 | from pydetex._gui_utils import SettingsWindow
 16 | import pydetex.pipelines as pip
 17 | 
 18 | import os
 19 | 
 20 | # Configure settings to default
 21 | _SETTINGS_FILE[0] = _SETTINGS_TEST
 22 | 
 23 | 
 24 | class GuiTest(BaseTest):
 25 | 
 26 |     def test_gui(self) -> None:
 27 |         """
 28 |         Gui test.
 29 |         """
 30 |         if 'GITHUB' in os.environ:
 31 |             return
 32 |         gui = PyDetexGUI()
 33 |         cfg = gui._cfg
 34 |         cfg.set(cfg.CFG_CHECK_REPETITION, False)
 35 |         cfg.set(cfg.CFG_OUTPUT_FONT_FORMAT, False)
 36 |         gui._clear()
 37 |         self.assertEqual(gui.pipeline, pip.strict)
 38 |         self.assertFalse(gui._ready)
 39 | 
 40 |         # Process the pipeline
 41 |         gui._text_in.insert(0.0, 'This is \\textbf{Latex}')
 42 |         gui._process_inner()
 43 |         self.assertEqual(gui._get_pipeline_results(), 'This is Latex')
 44 |         self.assertTrue(gui._ready)
 45 | 
 46 |         # Check clear
 47 |         gui._clear()
 48 |         self.assertFalse(gui._ready)
 49 | 
 50 |         # Check clip
 51 |         gui._process_clip()
 52 |         gui._copy_to_clip()
 53 | 
 54 |         # Test gui settings
 55 |         gui_settings = SettingsWindow((360, 320), cfg)
 56 |         gui_settings.close()
 57 | 
 58 |         gui._open_dictionary()
 59 | 
 60 |     def test_settings(self) -> None:
 61 |         """
 62 |         Test the app settings.
 63 |         """
 64 |         cfg = Settings(ignore_file=True)
 65 |         self.assertEqual(cfg.get(cfg.CFG_PIPELINE), pip.strict)
 66 |         self.assertFalse(cfg.get(cfg.CFG_CHECK_REPETITION))
 67 |         cfg.save()
 68 | 
 69 |         # Test invalid
 70 |         self.assertFalse(cfg.check_setting('UNKNOWN', ''))
 71 |         self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, 3.5))
 72 |         self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, -1))
 73 |         self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, '-1'))
 74 |         self.assertTrue(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, '1'))
 75 |         self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, '1f'))
 76 |         self.assertTrue(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, 1))
 77 | 
 78 |         # Test day diff
 79 |         self.assertEqual(cfg._last_opened_day_diff, 0)
 80 | 
 81 |         # Test font size
 82 |         self.assertFalse(cfg.check_setting(cfg.CFG_FONT_SIZE, 55))
 83 |         self.assertTrue(cfg.check_setting(cfg.CFG_FONT_SIZE, 11))
 84 | 
 85 |         self.assertFalse(cfg.check_setting(cfg.CFG_PIPELINE, ''))
 86 | 
 87 |         # Get
 88 |         self.assertEqual(cfg.get(cfg.CFG_REPETITION_MIN_CHAR), 4)
 89 |         cfg.set(cfg.CFG_REPETITION_MIN_CHAR, 2)
 90 |         self.assertEqual(cfg.get(cfg.CFG_REPETITION_MIN_CHAR), 2)
 91 |         cfg.set(cfg.CFG_REPETITION_MIN_CHAR, '3')
 92 |         self.assertEqual(cfg.get(cfg.CFG_REPETITION_MIN_CHAR), 3)
 93 | 
 94 |         # Test without ignore
 95 |         Settings()
 96 | 
 97 |         # Test language entries
 98 |         cfg.set(cfg.CFG_LANG, 'en')
 99 |         self.assertEqual(cfg.lang('lang'), 'English')
100 |         cfg.set(cfg.CFG_LANG, 'es')
101 |         self.assertEqual(cfg.lang('lang'), 'Español')
102 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | CONF
  6 | Configuration file for the Sphinx documentation builder.
  7 | 
  8 | This file only contains a selection of the most common options. For a full
  9 | list see the documentation:
 10 | https://www.sphinx-doc.org/en/master/usage/configuration.html
 11 | """
 12 | 
 13 | # -- Path setup ---------------------------------------------------------------
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here
 18 | #
 19 | import os
 20 | import sys
 21 | 
 22 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 23 | 
 24 | import pydetex
 25 | 
 26 | # -- Project information ------------------------------------------------------
 27 | 
 28 | project = pydetex.__module_name__
 29 | # noinspection PyShadowingBuiltins
 30 | copyright = pydetex.__copyright__
 31 | author = pydetex.__author__
 32 | 
 33 | # The full version, including alpha/beta/rc tags
 34 | release = pydetex.__version__
 35 | 
 36 | # -- General configuration ----------------------------------------------------
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones
 41 | extensions = ['sphinx.ext.autodoc',
 42 |               'sphinx.ext.viewcode',
 43 |               'sphinx.ext.intersphinx',
 44 |               'sphinx.ext.autosectionlabel',
 45 |               'sphinx_autodoc_typehints'
 46 |               ]
 47 | 
 48 | # autodoc_default_options = {
 49 | #     'private-members': False
 50 | # }
 51 | 
 52 | # Add any paths that contain templates here, relative to this directory
 53 | templates_path = ['_templates']
 54 | 
 55 | # The document name of the "master" document, that is, the document that
 56 | # contains the root toc-tree directive. Default is 'index'
 57 | master_doc = 'index'
 58 | 
 59 | # List of patterns, relative to source directory, that match files and
 60 | # directories to ignore when looking for source files
 61 | # This pattern also affects html_static_path and html_extra_path
 62 | exclude_patterns = ['build', 'Thumbs.db', '.DS_Store']
 63 | 
 64 | # -- Intersphinx configuration ------------------------------------------------
 65 | 
 66 | intersphinx_mapping = {
 67 |     'python': ('https://docs.python.org/3.9', None)
 68 | }
 69 | 
 70 | # -- Options for HTML output --------------------------------------------------
 71 | 
 72 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 73 | # a list of builtin themes
 74 | html_theme = 'sphinx_rtd_theme'
 75 | 
 76 | # Add any paths that contain custom static files (such as style sheets) here,
 77 | # relative to this directory. They are copied after the builtin static files,
 78 | # so a file named "default.css" will overwrite the builtin "default.css"
 79 | html_static_path = ['_static']
 80 | 
 81 | html_title = f'{project} {release} Documentation'
 82 | 
 83 | html_logo = '../pydetex/res/icon.png'
 84 | 
 85 | html_theme_options = {
 86 |     'prev_next_buttons_location': None
 87 | }
 88 | 
 89 | # -- Options for LaTeX output -------------------------------------------------
 90 | 
 91 | # noinspection SpellCheckingInspection
 92 | latex_elements = {
 93 |     'papersize': 'a4paper',
 94 |     'pointsize': '10pt',
 95 |     'preamble': r'\def\thempfootnote{\arabic{mpfootnote}}'  # workaround sphinx issue #2530
 96 | }
 97 | 
 98 | latex_documents = [
 99 |     (
100 |         'index',  # source start file
101 |         f'{project}.tex',  # target filename
102 |         f'{project} Documentation',  # title
103 |         author,  # author
104 |         'manual',  # documentclass
105 |         True,  # documents ref'd from toc-tree only
106 |     ),
107 | ]
108 | 
109 | latex_show_pagerefs = True
110 | 
111 | # -- Options for autodoc - typehints ---
112 | 
113 | set_type_checking_flag = True
114 | 


--------------------------------------------------------------------------------
/pydetex/pipelines.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | PIPELINES
  6 | Defines the pipelines which apply parsers.
  7 | """
  8 | 
  9 | __all__ = [
 10 |     'simple',
 11 |     'strict',
 12 |     'strict_eqn',
 13 |     'PipelineType'
 14 | ]
 15 | 
 16 | import pydetex.parsers as par
 17 | from pydetex.utils import ProgressBar
 18 | from typing import Callable
 19 | 
 20 | PipelineType = Callable
 21 | 
 22 | 
 23 | def simple(
 24 |     s: str,
 25 |     lang: str = 'en',
 26 |     show_progress: bool = False,
 27 |     replace_pydetex_tags: bool = True,
 28 |     remove_common_tags: bool = True,
 29 |     replace_single_chars_eqn: bool = True,
 30 |     **kwargs
 31 | ) -> str:
 32 |     """
 33 |     The most simple pipeline ever.
 34 | 
 35 |     :param s: String latex
 36 |     :param lang: Language tag of the code
 37 |     :param show_progress: Show progress bar
 38 |     :param replace_pydetex_tags: Replace pydetex tags like symbols, cites
 39 |     :param remove_common_tags: Call ``remove_common_tags`` parser
 40 |     :param replace_single_chars_eqn: Replaces all single char equations
 41 |     :return: String with no latex!
 42 |     """
 43 |     if len(s) == 0:
 44 |         return s
 45 |     steps = 17
 46 |     if not replace_pydetex_tags:
 47 |         steps -= 1
 48 |     if not replace_single_chars_eqn:
 49 |         steps -= 1
 50 |     pb = kwargs.get('progressbar', ProgressBar(steps)) if show_progress else None
 51 |     s = '\n'.join(s.splitlines())  # Removes \r\n
 52 |     s = par.process_inputs(s, pb=pb)
 53 |     s = par.remove_comments(s, pb=pb)
 54 |     s = par.process_begin_document(s, pb=pb)
 55 |     s = par.simple_replace(s, pb=pb)
 56 |     s = par.process_def(s, pb=pb, replace=kwargs.get('replace_defs', False))
 57 |     if remove_common_tags:
 58 |         s = par.remove_common_tags(s, pb=pb)
 59 |     s = par.process_cite(s, pb=pb, compress_cite=kwargs.get('compress_cite', True))
 60 |     s = par.process_citeauthor(s, lang, pb=pb)
 61 |     s = par.process_ref(s, pb=pb)
 62 |     s = par.process_labels(s, pb=pb)
 63 |     s = par.process_items(s, lang, pb=pb)
 64 |     if replace_single_chars_eqn:
 65 |         s = par.process_chars_equations(s, lang, single_only=True, pb=pb)
 66 |     s = par.unicode_chars_equations(s, pb=pb)
 67 |     s = par.remove_comments(s, pb=pb)  # comments, replace tags, strip
 68 |     if replace_pydetex_tags:
 69 |         s = par.replace_pydetex_tags(s, pb=pb, **kwargs)
 70 |     s = par.strip_punctuation(s, pb=pb)
 71 |     s = par.simple_replace(s, pb=pb)
 72 |     if s[-1] == '\\':
 73 |         s = s[0:len(s) - 1]
 74 |     return s
 75 | 
 76 | 
 77 | def strict(
 78 |     s: str,
 79 |     lang: str = 'en',
 80 |     show_progress: bool = False,
 81 |     eqn_simple: bool = True,
 82 |     **kwargs
 83 | ) -> str:
 84 |     """
 85 |     Apply simple + removes all commands.
 86 | 
 87 |     :param s: String latex
 88 |     :param lang: Language tag of the code
 89 |     :param show_progress: Show progress bar
 90 |     :param eqn_simple: If true, replace equations with a label, else, attempt to write it as-is
 91 |     :return: String with no latex!
 92 |     """
 93 |     pb = ProgressBar(steps=24) if show_progress else None
 94 |     if 'progressbar' not in kwargs.keys():
 95 |         # noinspection PyTypeChecker
 96 |         kwargs['progressbar'] = pb
 97 |     s = simple(s, lang, replace_pydetex_tags=False, remove_common_tags=False,
 98 |                show_progress=show_progress, replace_single_chars_eqn=False, **kwargs)  # 15 steps
 99 |     s = par.process_chars_equations(s, lang, single_only=not eqn_simple, pb=pb)
100 |     s = par.remove_equations(s, pb=pb)
101 |     s = par.remove_environments(s, pb=pb)
102 |     s = par.remove_commands_param(s, lang, pb=pb)
103 |     s = par.remove_commands_param_noargv(s, pb=pb)
104 |     s = par.remove_comments(s, pb=pb)
105 |     s = par.replace_pydetex_tags(s, pb=pb, **kwargs)
106 |     s = par.strip_punctuation(s, pb=pb)
107 |     s = par.simple_replace(s, pb=pb)
108 |     return s
109 | 
110 | 
111 | def strict_eqn(
112 |     s: str,
113 |     lang: str = 'en',
114 |     show_progress: bool = False,
115 |     **kwargs
116 | ) -> str:
117 |     """
118 |     Same as strict, but replaces the equations with their string representation.
119 | 
120 |     :param s: String latex
121 |     :param lang: Language tag of the code
122 |     :param show_progress: Show progress bar
123 |     :return: String with no latex!
124 |     """
125 |     return strict(s, lang, show_progress, eqn_simple=False, **kwargs)
126 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | =======
  3 | PyDetex
  4 | =======
  5 | 
  6 | .. image:: https://img.shields.io/badge/author-Pablo%20Pizarro%20R.-lightgray.svg
  7 |     :target: https://ppizarror.com
  8 |     :alt: @ppizarror
  9 | 
 10 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg
 11 |     :target: https://opensource.org/licenses/MIT
 12 |     :alt: License MIT
 13 | 
 14 | .. image:: https://img.shields.io/badge/python-3.7+-red.svg
 15 |     :target: https://www.python.org/downloads
 16 |     :alt: Python 3.7+
 17 | 
 18 | .. image:: https://badge.fury.io/py/pydetex.svg
 19 |     :target: https://pypi.org/project/pydetex
 20 |     :alt: PyPi package
 21 | 
 22 | .. image:: https://img.shields.io/github/actions/workflow/status/ppizarror/PyDetex/ci.yml?branch=master
 23 |     :target: https://github.com/ppizarror/PyDetex/actions/workflows/ci.yml
 24 |     :alt: Build status
 25 |     
 26 | .. image:: https://app.fossa.com/api/projects/git%2Bgithub.com%2Fppizarror%2FPyDetex.svg?type=shield
 27 |     :target: https://app.fossa.com/projects/git%2Bgithub.com%2Fppizarror%2FPyDetex?ref=badge_shield
 28 |     :alt: FOSSA Status
 29 |     
 30 | .. image:: https://readthedocs.org/projects/pydetex/badge/?version=latest
 31 |     :target: https://pydetex.readthedocs.io
 32 |     :alt: Documentation Status
 33 | 
 34 | .. image:: https://codecov.io/gh/ppizarror/PyDetex/branch/master/graph/badge.svg
 35 |     :target: https://codecov.io/gh/ppizarror/PyDetex
 36 |     :alt: Codecov
 37 | 
 38 | .. image:: https://img.shields.io/github/issues/ppizarror/PyDetex
 39 |     :target: https://github.com/ppizarror/PyDetex/issues
 40 |     :alt: Open issues
 41 | 
 42 | .. image:: https://img.shields.io/pypi/dm/pydetex?color=purple
 43 |     :target: https://pypi.org/project/pydetex
 44 |     :alt: PyPi downloads
 45 | 
 46 | .. image:: https://static.pepy.tech/personalized-badge/pydetex?period=total&units=international_system&left_color=grey&right_color=lightgrey&left_text=total%20downloads
 47 |     :target: https://pepy.tech/project/pydetex
 48 |     :alt: Total downloads
 49 |     
 50 | .. image:: https://img.shields.io/badge/buy%20me%20a-Ko--fi-02b9fe
 51 |     :target: https://ko-fi.com/ppizarror
 52 |     :alt: Buy me a Ko-fi
 53 | 
 54 | Source repo on `GitHub <https://github.com/ppizarror/PyDetex>`_, 
 55 | and run it on `Repl.it <https://repl.it/github/ppizarror/PyDetex>`_
 56 | 
 57 | Introduction
 58 | ------------
 59 | 
 60 | PyDetex is a Python application that transforms LaTeX code into plain text. It has multiple
 61 | language support (15+), detects repeated words, offers a dictionary (synonyms, antonyms,
 62 | definitions), and many more things to come!
 63 | 
 64 | Comprehensive documentation for the latest version (if you plan to use the API)
 65 | is available at https://pydetex.readthedocs.io
 66 | 
 67 | Install Instructions
 68 | --------------------
 69 | 
 70 | PyDetex can be installed via pip for MacOS, Windows & Linux. Simply run:
 71 | 
 72 | .. code-block:: bash
 73 | 
 74 |     $> python3 pip install pydetex -U
 75 | 
 76 | Also, compiled binaries for Windows (x64) and macOS are available through GitHub releases.
 77 | 
 78 | Launch the GUI or use the library
 79 | ---------------------------------
 80 | 
 81 | You can just run this command anywhere to execute the application.
 82 | 
 83 | .. code-block:: bash
 84 | 
 85 |     $> python3 -m pydetex.gui
 86 | 
 87 | .. figure:: https://raw.githubusercontent.com/ppizarror/pydetex/master/docs/_static/example_simple.png
 88 |     :scale: 40%
 89 |     :align: center
 90 | 
 91 |     **(Simple Pipeline)** Tadada... !!! A simple GUI to process your LaTex and paste it into Google Docs, an email, or Grammarly ＼(^o^)／
 92 | 
 93 | .. figure:: https://raw.githubusercontent.com/ppizarror/pydetex/master/docs/_static/example_strict.png
 94 |     :scale: 40%
 95 |     :align: center
 96 | 
 97 |     **(Strict Pipeline)** The strict pipeline removes all commands or replaces them with some known tags.
 98 |     
 99 | 
100 | .. figure:: https://raw.githubusercontent.com/ppizarror/pydetex/master/docs/_static/pydetex_windows.png
101 |     :scale: 40%
102 |     :align: center
103 | 
104 |     Multiple options to configure: Check repeated words, highlight undetected code, or use different pipelines.
105 | 
106 | You can also import the library and use the parsers (methods that take latex code
107 | and perform a single task) or the pipelines (a combination of parsers). For more
108 | information, please visit the `documentation <https://pydetex.readthedocs.io>`_.
109 | 
110 | .. code-block:: python
111 | 
112 |     import pydetex.pipelines as pip
113 |     text = "This is a \\textbf{LaTex} code..."
114 |     out = pip.simple(text)
115 | 
116 | TO-DOs
117 | ------
118 | 
119 | Currently, many things must be improved:
120 | 
121 | - Add syntax checking for several languages, like `language-check <https://github.com/myint/language-check>`_.
122 | - Custom support for environments, such as *table*.
123 | 
124 | Author
125 | ------
126 | 
127 | `Pablo Pizarro R. <https://ppizarror.com>`_ | 2021 - 2025
128 | 


--------------------------------------------------------------------------------
/test/data/example_tables_strict.txt:
--------------------------------------------------------------------------------
 1 | % !TeX spellcheck = en_US
 2 | 
 3 | \subsection{Datasets}
 4 | 
 5 | Datasets have played an important role within floor plan analysis as there is not a standard notation for its composition; therefore, designed models must incorporate specific rules for each particular style, facing high variability due to: (1) the visual representation of the building, wherein best cases only 70\% of the graphical information is compliant with some standard rules \cite{Ah-Soon1997}, (2) the nature of the information contained, and (3) the way of the information is visually represented \cite{DelasHeras2014}. Moreover, each floor plan dataset has limitations regarding quantity or complexity. Thus, researchers opt to utilize the datasets suitable for their purposes, including specific processing steps that could not be generalized to other formats \cite{Kim2021}. \\
 6 | 
 7 | For such datasets to be useful in floor plan analysis, there must be pixel-wise annotations for objects such as walls, openings, and rooms. However, there are few public datasets because it is difficult for floor plans to be invariably labeled due to ambiguity in notation and the need for high-level expertise for object recognition \cite{Mace2010, DelasHeras2014}. Even though several practical tools have been developed to annotate them conveniently \cite{Rendek2004, Russell2008, DelasHeras2015}, it is difficult to do so because there is no way to guarantee the same annotations from different experts, especially for complicated plans \cite{Kim2021}. \\
 8 | 
 9 | % revisar --->
10 | % + ROBIN
11 | % + SESYD
12 | \begin{table*}
13 | 	\begin{threeparttable}
14 | 		\centering
15 | 		\caption{Datasets used by floor plan analysis research.}
16 | 		\itemresize{1}{
17 | 		\begin{tabular}[t]{lcL{13.2cm}}
18 | 			\hline
19 | 			\textbf{Dataset (year)} & \textbf{Public} & \textbf{Annotation (quantity)} \\
20 | 			\hline
21 | 			% antes usaba \checked \tnote{c}
22 | 			
23 | FPLAN-POLY \cite{Rusinol2010} (2010) & \cite{Rusinol2010a} & Walls, doors, windows, and furniture from 37 classes in vectorized format (42)  \\
24 | 
25 | SESYD \cite{Delalandre2010} (2010) & \cite{Delalandre2010a} & Walls, doors, windows, and six furniture classes; 10 different synthetic apartment configurations, designed to study symbol recognition. Res 1837--6775 (1000) \\ % res
26 | 			
27 | 			\hline
28 | 			
29 | 		\end{tabular}
30 | 		}
31 | 		% \vspace{\baselineskip}
32 | 	\begin{tablenotes}
33 | 			Note: Res -- Resolution in pixels (px).
34 | 			\item[a] \url{http://dag.cvc.uab.es/resources/floorplans} (all links visited on 10/01/2021)
35 | 			\item[b] \href{a}{b}
36 | 			\item[c] \url{http://mathieu.delalandre.free.fr/projects/sesyd}
37 | %			\item[d] \url{https://www.cs.toronto.edu/~fidler/projects/rent3D.html}
38 | 		\end{tablenotes}
39 | 		\label{tab:databases}
40 | 	\end{threeparttable}
41 | \end{table*}
42 | 
43 | \def\heightfp {3.5cm}
44 | \begin{figure*}[t]
45 | 	\centering
46 | 	\caption{Floor plan image examples from datasets.}
47 | 	\itemresize{1}{
48 | 		\begin{tabular}[t]{ccccc}
49 | 			\includegraphics[height=\heightfp]{datasets/fplanpoly.png} &
50 | 			\includegraphics[height=\heightfp]{datasets/sesyd.png} &
51 | 			\includegraphics[height=\heightfp]{datasets/cvcfp1} & 
52 | 			\includegraphics[height=\heightfp]{datasets/r3d1} &
53 | 			\includegraphics[height=\heightfp]{datasets/sydneyhouse4} \\
54 | 			
55 | 			\footnotesize {\textbf{FPLAN-POLY} \cite{Rusinol2010}} &
56 | 			\footnotesize {\textbf{SESYD} \cite{Delalandre2010}} &
57 | 			\footnotesize {\textbf{CVC-FP} \cite{DelasHeras2015}} &
58 | 			\footnotesize {\textbf{R3D -- Rent3D} \cite{ChenxiLiu2015}} &
59 | 			\footnotesize {\textbf{SydneyHouse} \cite{Chu2016}} \\
60 | 			
61 | 			&&&& \\
62 | 			
63 | 			\includegraphics[height=\heightfp]{datasets/rfp2} &
64 | 			\includegraphics[height=\heightfp]{datasets/robin} &
65 | 			\includegraphics[height=\heightfp]{datasets/r2v1} &
66 | 			\includegraphics[width=\heightfp,angle=90]{datasets/cubicasa5k1} &
67 | 			\includegraphics[width=\heightfp,angle=90]{datasets/rplan4.pdf} \\
68 | 			
69 | 			\footnotesize {\textbf{R-FP -- Rakuten} \cite{Dodge2017}} &
70 | 			\footnotesize {\textbf{ROBIN} \cite{Sharma2017}} &
71 | 			\footnotesize {\textbf{R2V} \cite{Liu2017} / \textbf{LIFULL} \cite{NationalInstituteofInformaticsNII2021}} &
72 | 			\footnotesize {\textbf{CubiCasa5K} \cite{Kalervo2019}} &
73 | 			\footnotesize {\textbf{RPLAN} \cite{Wu2019}} \\
74 | 			
75 | 			
76 | 			&&&& \\
77 | 			
78 | 			\includegraphics[height=\heightfp]{datasets/bti.jpg} &
79 | 			\includegraphics[width=\heightfp,angle=90]{datasets/eais2_1} &
80 | 			\includegraphics[height=\heightfp]{datasets/zscvfp} &
81 | 			\includegraphics[height=\heightfp]{datasets/rfp.jpg} &
82 | 			\includegraphics[width=\heightfp,angle=90]{datasets/ruraldataset2.jpg} \\
83 | 			
84 | 			\footnotesize {\textbf{BTI} \cite{Surikov2020}} &
85 | 			\footnotesize {\textbf{EAIS} \cite{Jang2020, MinistryofLandandTransport2021}} &
86 | 			\footnotesize {\textbf{ZSCVFP} \cite{Dong2021}} &
87 | 			\footnotesize {\textbf{RFP} \cite{Lv2021}} &
88 | 			\footnotesize {\textbf{RuralHomeData} \cite{Lu2021}} \\
89 | 		
90 | 		\end{tabular}
91 | 	}
92 | 	\label{dataset:imgs}
93 | \end{figure*}
94 | 
95 | The existing datasets were summarized in Table \ref{tab:databases}, considering its source article, availability, annotation, and quantity, ordered by release year. Figure \ref{dataset:imgs} illustrates a selection of images from the datasets considered within the review. It can be noted that there are distinct drawing styles and semantics among the apartment and house plans; some have colored floors, text, icons, dimension lines, furniture, and walls with several styles, angles, and complex arrangements. These diverse settings were exploited by rule-based methods, described in section \ref{rulebased}, which recognize walls, doors, windows, furniture, and rooms by defining algorithms that considered different approaches specific to each style; or by learning-based ones (section \ref{learningbased}), that trained models to automatically recognize the objects.


--------------------------------------------------------------------------------
/pydetex/res/u_symbols.txt:
--------------------------------------------------------------------------------
  1 | \texttrademark ™
  2 | \trademark ™
  3 | \textregistered ®
  4 | \registered ®
  5 | \copyright ©
  6 | \pilcrow ¶
  7 | \pound £
  8 | \euro €
  9 | \cents ¢
 10 | \section §
 11 | \space ␣
 12 | \degree °
 13 | \zeta ζ
 14 | \Xi Ξ
 15 | \xi ξ
 16 | \wr ≀
 17 | \wp ℘
 18 | \wedge ∧
 19 | \land ∧
 20 | \Vvdash ⊪
 21 | \veebar ⊻
 22 | \vee ∨
 23 | \lor ∨
 24 | \vdots ⋮
 25 | \Vdash ⊩
 26 | \vDash ⊨
 27 | \vdash ⊢
 28 | \vartriangleright ⊳
 29 | \vartriangleleft ⊲
 30 | \vartriangle △
 31 | \vartheta ϑ
 32 | \varsigma ς
 33 | \varrho ϱ
 34 | \varpropto ∝
 35 | \varpi ϖ
 36 | \varphi φ
 37 | \varnothing ∅
 38 | \varkappa ϰ
 39 | \varepsilon ε
 40 | \upuparrows ⇈
 41 | \Upsilon Υ
 42 | \upsilon υ
 43 | \uplus ⊎
 44 | \upharpoonright ↾
 45 | \upharpoonleft ↿
 46 | \Updownarrow ⇕
 47 | \updownarrow ↕
 48 | \Uparrow ⇑
 49 | \uparrow ↑
 50 | \unrhd ⊵
 51 | \unlhd ⊴
 52 | \twoheadrightarrow ↠
 53 | \twoheadleftarrow ↞
 54 | \trianglerighteq ⊵
 55 | \triangleright ▷
 56 | \triangleq ≜
 57 | \trianglelefteq ⊴
 58 | \triangleleft ◁
 59 | \triangledown ▽
 60 | \triangle △
 61 | \top ⊤
 62 | \times ×
 63 | \thicksim ∼
 64 | \thickapprox ≈
 65 | \Theta Θ
 66 | \theta θ
 67 | \therefore ∴
 68 | \tau τ
 69 | \swarrow ↙
 70 | \surd √
 71 | \supseteq ⊇
 72 | \Supset ⋑
 73 | \supset ⊃
 74 | \sum ∑
 75 | \succsim ≿
 76 | \succeq ⪰
 77 | \succcurlyeq ≽
 78 | \succ ≻
 79 | \subseteq ⊆
 80 | \Subset ⋐
 81 | \subset ⊂
 82 | \star ⋆
 83 | \square □
 84 | \sqsupseteq ⊒
 85 | \sqsupset ⊐
 86 | \sqsubseteq ⊑
 87 | \sqsubset ⊏
 88 | \sqcup ⊔
 89 | \sqcap ⊓
 90 | \sphericalangle ∢
 91 | \spadesuit ♠
 92 | \smile ⌣
 93 | \smallsmile ⌣
 94 | \smallsetminus ∖
 95 | \smallfrown ⌢
 96 | \simeq ≃
 97 | \sim ∼
 98 | \Sigma Σ
 99 | \sigma σ
100 | \shortparallel ∥
101 | \sharp ♯
102 | \setminus ∖
103 | \searrow ↘
104 | \rVert ‖
105 | \rtimes ⋊
106 | \Rsh ↱
107 | \Rrightarrow ⇛
108 | \risingdotseq ≓
109 | \rightthreetimes ⋌
110 | \rightsquigarrow ⇝
111 | \rightrightarrows ⇉
112 | \rightleftharpoons ⇌
113 | \rightleftarrows ⇄
114 | \rightharpoonup ⇀
115 | \rightharpoondown ⇁
116 | \rightarrowtail ↣
117 | \Rightarrow ⇒
118 | \rightarrow →
119 | \to →
120 | \rho ρ
121 | \rhd ⊳
122 | \rfloor ⌋
123 | \Re ℜ
124 | \rceil ⌉
125 | \Psi Ψ
126 | \psi ψ
127 | \propto ∝
128 | \prod ∏
129 | \prime ′
130 | \precsim ≾
131 | \preceq ⪯
132 | \preccurlyeq ≼
133 | \prec ≺
134 | \pm ±
135 | \Pi Π
136 | \pi π
137 | \pitchfork ⋔
138 | \Phi Φ
139 | \phi ϕ
140 | \perp ⊥
141 | \partial ∂
142 | \parallel ∥
143 | \otimes ⊗
144 | \oslash ⊘
145 | \oplus ⊕
146 | \ominus ⊖
147 | \Omega Ω
148 | \omega ω
149 | \oint ∮
150 | \odot ⊙
151 | \nwarrow ↖
152 | \nu ν
153 | \notin ∉
154 | \ni ∋
155 | \nexists ∄
156 | \neq ≠
157 | \neg ¬
158 | \nearrow ↗
159 | \natural ♮
160 | \nabla ∇
161 | \mu μ
162 | \multimap ⊸
163 | \mp ∓
164 | \models ⊨
165 | \mid ∣
166 | \mho ℧
167 | \measuredangle ∡
168 | \mapsto ↦
169 | \lVert ‖
170 | \ltimes ⋉
171 | \Lsh ↰
172 | \lozenge ◊
173 | \looparrowright ↬
174 | \looparrowleft ↫
175 | \Longrightarrow ⟹
176 | \longrightarrow ⟶
177 | \longmapsto ⟼
178 | \Longleftrightarrow ⟺
179 | \longleftrightarrow ⟷
180 | \Longleftarrow ⟸
181 | \longleftarrow ⟵
182 | \lll ⋘
183 | \Lleftarrow ⇚
184 | \ll ≪
185 | \lhd ⊲
186 | \lfloor ⌊
187 | \lesssim ≲
188 | \lessgtr ≶
189 | \lesseqgtr ⋚
190 | \lessdot ⋖
191 | \leqslant ⩽
192 | \leqq ≦
193 | \leq ≤
194 | \leftthreetimes ⋋
195 | \leftrightsquigarrow ↭
196 | \leftrightharpoons ⇋
197 | \leftrightarrows ⇆
198 | \Leftrightarrow ⇔
199 | \leftrightarrow ↔
200 | \leftleftarrows ⇇
201 | \leftharpoonup ↼
202 | \leftharpoondown ↽
203 | \leftarrowtail ↢
204 | \Leftarrow ⇐
205 | \leftarrow ←
206 | \leadsto ↝
207 | \le ≤
208 | \lceil ⌈
209 | \Lambda Λ
210 | \lambda λ
211 | \kappa κ
212 | \Join ⋈
213 | \iota ι
214 | \intercal ⊺
215 | \int ∫
216 | \infty ∞
217 | \in ∈
218 | \implies ⇒
219 | \Im ℑ
220 | \hslash ℏ
221 | \hookrightarrow ↪
222 | \hookleftarrow ↩
223 | \heartsuit ♡
224 | \hbar ℏ
225 | \gtrsim ≳
226 | \gtrless ≷
227 | \gtreqless ⋛
228 | \gtrdot ⋗
229 | \gimel ℷ
230 | \ggg ⋙
231 | \gg ≫
232 | \geqq ≧
233 | \geq ≥
234 | \ge ≥
235 | \Gamma Γ
236 | \gamma γ
237 | \frown ⌢
238 | \forall ∀
239 | \flat ♭
240 | \Finv Ⅎ
241 | \fallingdotseq ≒
242 | \exists ∃
243 | \eth ð
244 | \eta η
245 | \equiv ≡
246 | \eqcirc ≖
247 | \epsilon ∊
248 | \emptyset ∅
249 | \ell ℓ
250 | \downharpoonright ⇂
251 | \downharpoonleft ⇃
252 | \downdownarrows ⇊
253 | \Downarrow ⇓
254 | \downarrow ↓
255 | \dots …
256 | \dotplus ∔
257 | \doteqdot ≑
258 | \doteq ≐
259 | \divideontimes ⋇
260 | \div ÷
261 | \digamma Ϝ
262 | \diamondsuit ♢
263 | \Diamond ◇
264 | \diamond ⋄
265 | \Delta Δ
266 | \delta δ
267 | \ddots ⋱
268 | \ddagger ‡
269 | \dashv ⊣
270 | \dashrightarrow ⇢
271 | \dashleftarrow ⇠
272 | \daleth ℸ
273 | \dagger †
274 | \curvearrowright ↷
275 | \curvearrowleft ↶
276 | \curlywedge ⋏
277 | \curlyvee ⋎
278 | \curlyeqsucc ⋟
279 | \curlyeqprec ⋞
280 | \Cup ⋓
281 | \cup ∪
282 | \coprod ∐
283 | \cong ≅
284 | \complement ∁
285 | \clubsuit ♣
286 | \circledS Ⓢ
287 | \circleddash ⊝
288 | \circledcirc ⊚
289 | \circledast ⊛
290 | \circlearrowright ↻
291 | \circlearrowleft ↺
292 | \circeq ≗
293 | \circ ∘
294 | \chi χ
295 | \centerdot ⋅
296 | \cdots ⋯
297 | \cdot ⋅
298 | \Cap ⋒
299 | \cap ∩
300 | \Bumpeq ≎
301 | \bumpeq ≏
302 | \bullet ∙
303 | \boxtimes ⊠
304 | \boxplus ⊞
305 | \boxminus ⊟
306 | \boxdot ⊡
307 | \Box □
308 | \bowtie ⋈
309 | \bot ⊥
310 | \blacktriangleright ▶
311 | \blacktriangleleft ◀
312 | \blacktriangledown ▼
313 | \blacktriangle ▲
314 | \blacksquare ■
315 | \blacklozenge ◆
316 | \bigwedge ⋀
317 | \bigvee ⋁
318 | \biguplus ⨄
319 | \bigtriangleup △
320 | \bigtriangledown ▽
321 | \bigstar ★
322 | \bigsqcup ⨆
323 | \bigotimes ⨂
324 | \bigoplus ⨁
325 | \bigodot ⨀
326 | \bigcup ⋃
327 | \bigcirc ○
328 | \bigcap ⋂
329 | \between ≬
330 | \beth ℶ
331 | \beta β
332 | \because ∵
333 | \barwedge ⊼
334 | \doublebarwedge ⩞
335 | \backsim ∽
336 | \backprime ‵
337 | \backepsilon ∍
338 | \asymp ≍
339 | \ast ∗
340 | \approxeq ≊
341 | \approx ≈
342 | \angle ∠
343 | \alpha α
344 | \aleph ℵ
345 | \rangle ⟩
346 | \langle ⟨
347 | \sqrt √
348 | \frac12 ½
349 | \frac13 ⅓
350 | \frac23 ⅔
351 | \frac14 ¼
352 | \frac34 ¾
353 | \frac15 ⅕
354 | \frac25 ⅖
355 | \frac35 ⅗
356 | \frac45 ⅘
357 | \frac16 ⅙
358 | \frac56 ⅚
359 | \frac17 ⅐
360 | \frac18 ⅛
361 | \frac38 ⅜
362 | \frac58 ⅝
363 | \frac78 ⅞
364 | \frac19 ⅑
365 | \frac110 ⅒
366 | 


--------------------------------------------------------------------------------
/test/test_pipelines.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | TEST PIPELINES
  6 | Test the pipelines.
  7 | """
  8 | 
  9 | from test._base import BaseTest
 10 | import pydetex.pipelines as pip
 11 | import pydetex.parsers as par
 12 | import os
 13 | 
 14 | 
 15 | class ParserTest(BaseTest):
 16 | 
 17 |     def test_simple(self) -> None:
 18 |         """
 19 |         Test simple pipeline.
 20 |         """
 21 |         s = 'Table \\ref{tab:review-rulebased} details the reviewed rule-based ' \
 22 |             'methods within floor plan recognition, considering the datasets ' \
 23 |             'used (Table \\ref{tab:databases}) and the four categories of tasks,' \
 24 |             ' such as (1) \\textit{Graphics separation}, (2) \\textit{Pattern ' \
 25 |             'recognition}, (3) \\textit{Vectorization}, and (4) \\textit{Structural modeling}.'
 26 |         self.assertEqual(
 27 |             pip.simple(s, show_progress=True),
 28 |             'Table 1 details the reviewed rule-based methods within floor plan '
 29 |             'recognition, considering the datasets used (Table 2) and the four '
 30 |             'categories of tasks, such as (1) Graphics separation, (2) Pattern '
 31 |             'recognition, (3) Vectorization, and (4) Structural modeling.')
 32 | 
 33 |         s = 'aa\\begin{document}x\\end{document}'
 34 |         self.assertEqual(pip.simple(s, show_progress=True), 'x')
 35 | 
 36 |         s = '$a$a\\def\\a{a}\\a'
 37 |         self.assertEqual(pip.simple(s, show_progress=True, replace_defs=True), 'aaa')
 38 | 
 39 |         # New lines
 40 |         s = 'New space \\ and line \\\\Epic'
 41 |         self.assertEqual(pip.simple(s), 'New space and line\nEpic')
 42 | 
 43 |         # Empty
 44 |         self.assertEqual(pip.simple(''), '')
 45 | 
 46 |         # Test with invalid last char
 47 |         self.assertEqual(pip.simple('This is epic\\\nThis is epic\\'), 'This is epic\nThis is epic')
 48 | 
 49 |         # Test replacers
 50 |         s = 'This is a \\Thetamagic but\\xspace also \\Theta is not or \\Theta\\Epic or \\Theta\n sad'
 51 |         t = 'This is a \\Thetamagic but also Θ is not or Θ\Epic or Θ\nsad'
 52 |         self.assertEqual(pip.simple(s), t)
 53 | 
 54 |         # Check files
 55 |         example_files = [
 56 |             ('data/example_simple_itemize.txt', 'data/example_simple_itemize_output.txt'),
 57 |             ('data/example_simple_comments.txt', 'data/example_simple_comments_output.txt')
 58 |         ]
 59 |         for f in example_files:
 60 |             self.assertEqual(pip.simple(par._load_file_search(f[0])), par._load_file_search(f[1]))
 61 | 
 62 |     def test_strict(self) -> None:
 63 |         """
 64 |         Strict pipeline.
 65 |         """
 66 |         s = 'This contains \\insertimageanother{\label{1}}{2}{3}commands, but must be removed!\\'
 67 |         self.assertEqual(pip.strict(s, show_progress=True),
 68 |                          'This contains commands, but must be removed!')
 69 | 
 70 |         s = 'This \$12bn is very \citeauthor{nice!} nice'
 71 |         self.assertEqual(pip.strict(s), 'This $12bn is very [author] nice')
 72 | 
 73 |         s = 'This \\quoteepic{code removed!}is removed\\totally. Not epic \\cite{nice}'
 74 |         self.assertEqual(pip.strict(s), 'This is removed. Not epic [1]')
 75 | 
 76 |         s = 'This \\quoteepic{code removed!}is removed \\totally nice. Not epic \\cite{nice}'
 77 |         self.assertEqual(pip.strict(s), 'This is removed nice. Not epic [1]')
 78 | 
 79 |         # Empty
 80 |         self.assertEqual(pip.strict('', show_progress=True), '')
 81 | 
 82 |         s = '\DeclareUnicodeCharacter{2292}{\ensuremath{\ensuremath{\\to}}}'
 83 |         self.assertEqual(pip.strict(s), '')
 84 | 
 85 |         s = """% !TeX spellcheck = en_US
 86 |         \\begin{table*}[t]
 87 | 
 88 |         \centering
 89 |         % \\vspace{\\baselineskip}
 90 |         \\begin{tablenotes}
 91 |             \item[a] Graphics separation
 92 |             \item[b] Door/Window/Furniture/Others
 93 |             \item[c] OCR or Dimensions were recognized
 94 |             \item[d] Vectorization
 95 |             \item[e] Modeling (Graph, other)
 96 |         \end{tablenotes}
 97 |         \label{tab:review-rulebased}
 98 |         \end{threeparttable}
 99 |         \end{table*}
100 |         """
101 |         self.assertEqual(
102 |             pip.strict(s, show_progress=True),
103 |             '- [a] Graphics separation\n- [b] Door/Window/Furniture/Others\n- [c'
104 |             '] OCR or Dimensions were recognized\n- [d] Vectorization\n- [e] Mod'
105 |             'eling (Graph, other)')
106 | 
107 |         # Check files
108 |         example_files = [
109 |             ('data/example_tables_strict.txt', 'data/example_tables_strict_output.txt'),
110 |             ('data/example_placeholder.txt', 'data/example_placeholder_output.txt'),
111 |             ('data/example_simple_figure_caption.txt', 'data/example_simple_figure_caption_output.txt'),
112 |             ('data/example_simple_cite.txt', 'data/example_simple_cite_output.txt')
113 |         ]
114 |         for f in example_files:
115 |             self.assertEqual(pip.strict(par._load_file_search(f[0])),
116 |                              par._load_file_search(f[1]))
117 | 
118 |         # Test remove environments
119 |         self.assertEqual(pip.strict(par._load_file_search('data/example_complex_envs.txt'),
120 |                                     show_progress=True).strip(),
121 |                          par._load_file_search('data/example_complex_envs_output.txt'))
122 | 
123 |         # Exclusive tests
124 |         example_files = [
125 |             ('data/example_complex_template.txt', 'data/example_complex_template_output.txt')
126 |         ]
127 |         if not (True and 'GITHUB' not in os.environ):  # If not test complex
128 |             example_files.clear()
129 |         for f in example_files:
130 |             self.assertEqual(pip.strict(par._load_file_search(f[0])), par._load_file_search(f[1]))
131 | 
132 |     def test_strict_eqn(self) -> None:
133 |         """
134 |         Test strict eqn pipeline.
135 |         """
136 |         self.assertEqual(
137 |             pip.strict_eqn('My value is: $0.4375\ \\frac{\\text{tonf}}{{\\text{m}}^2}$. Nice!'),
138 |             'My value is: 0.4375 (tonf)/(m²). Nice!')
139 | 


--------------------------------------------------------------------------------
/specs/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | SPECS
  6 | Define spec constructor.
  7 | """
  8 | 
  9 | __all__ = [
 10 |     'block_cipher',
 11 |     'get_analysis',
 12 |     'get_bundle',
 13 |     'get_collect',
 14 |     'get_exe',
 15 |     'get_pyz',
 16 |     'is_osx',
 17 |     'is_win',
 18 |     'save_zip'
 19 | ]
 20 | 
 21 | from pydetex import __file__
 22 | from pydetex.version import ver
 23 | from zipfile import ZipFile, ZIP_DEFLATED
 24 | import os
 25 | import platform
 26 | 
 27 | print('Inializing specs')
 28 | print(f'Current path: {os.getcwd()}')
 29 | print(f'Platform: {platform.system()}')
 30 | 
 31 | sep = os.path.sep
 32 | is_osx = platform.system() == 'Darwin'
 33 | is_win = platform.system() == 'Windows'
 34 | 
 35 | # Configure
 36 | app_name = 'PyDetex' if not is_osx else 'PyDetex_macOS'
 37 | app_icon = '../pydetex/res/icon.ico' if not is_osx else '../pydetex/res/icon.icns'
 38 | block_cipher = None
 39 | 
 40 | excluded_binaries = [
 41 |     'brotli._brotli',
 42 |     'cryptography.hazmat.bindings._rust',
 43 |     'libc++.1.dylib',
 44 |     'libiconv.2.dylib',
 45 |     'libicudata.68.dylib',
 46 |     'libicuuc.68.dylib',
 47 |     'libncurses.6.dylib',
 48 |     'libomp.dylib',
 49 |     'libreadline.8.dylib',
 50 |     'libtinfo.6.dylib',
 51 |     'libtinfow.6.dylib',
 52 |     'libxml2.2.dylib',
 53 |     'libzmq.5.dylib',
 54 |     'yaml._yaml'
 55 | ]
 56 | excluded_binaries_contains = [
 57 |     'api-ms-win-',
 58 |     # 'lib-dynload',
 59 |     'lxml',
 60 |     'markupsafe',
 61 |     f'miktex{sep}bin',
 62 |     'pandas',
 63 |     'pygame',
 64 |     # 'sklearn',
 65 |     'Windows Performance Toolkit',
 66 |     f'zmq{sep}backend{sep}cython'
 67 | ]
 68 | excluded_modules = [
 69 |     'IPython',
 70 |     'matplotlib',
 71 |     'notebook',
 72 |     'numpy',
 73 |     'PIL',
 74 |     'PyQt5',
 75 |     'scipy'
 76 | ]
 77 | 
 78 | 
 79 | def _append_to_datas(datas: list, file_path: str, target_folder: str,
 80 |                      base_target_folder: str = 'pydetex', relative: bool = True) -> None:
 81 |     """
 82 |     Add a path to datas.
 83 | 
 84 |     :param datas: Data list
 85 |     :param file_path: File path
 86 |     :param target_folder: Folder to paste the resources
 87 |     :param base_target_folder: Base folder of the resource
 88 |     :param relative: If True append pydetex_folder
 89 |     """
 90 |     if relative:
 91 |         res_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_path)
 92 |     else:
 93 |         res_path = file_path
 94 |     if target_folder == '':
 95 |         target_folder = os.path.basename(os.path.dirname(res_path))
 96 |     if os.path.exists(res_path):
 97 |         datas.append((res_path, os.path.join(base_target_folder, target_folder)))
 98 |     else:
 99 |         raise FileNotFoundError(f'{file_path} does not exist')
100 | 
101 | 
102 | def _file_sz(f: str) -> str:
103 |     """
104 |     Computes the file size in KB.
105 |     """
106 |     sz = round(os.path.getsize(f) / 1024, 1)
107 |     return f'{sz} KB'
108 | 
109 | 
110 | def _path(p: str, sz: int = 60) -> str:
111 |     """
112 |     Returns a parsed path.
113 |     """
114 |     p = p.replace(sep, '/')
115 |     if len(p) < sz:
116 |         return p
117 |     else:
118 |         return '...' + p[len(p) - sz:len(p)]
119 | 
120 | 
121 | def get_analysis(analysis, toc):
122 |     """
123 |     Return the ANALYSIS object.
124 |     """
125 |     datas = []
126 |     for f in [
127 |         'res/cog.ico',
128 |         'res/dictionary.ico',
129 |         'res/icon.gif',
130 |         'res/icon.ico',
131 |         'res/placeholder_en.tex',
132 |         'res/placeholder_es.tex',
133 |         'res/stopwords.json',
134 |         'res/u_subscripts.txt',
135 |         'res/u_superscripts.txt',
136 |         'res/u_symbols.txt',
137 |         'res/u_textbb.txt',
138 |         'res/u_textbf.txt',
139 |         'res/u_textcal.txt',
140 |         'res/u_textfrak.txt',
141 |         'res/u_textit.txt',
142 |         'res/u_textmono.txt'
143 |     ]:
144 |         _append_to_datas(datas, f, target_folder='')
145 | 
146 |     # Make object
147 |     a = analysis(
148 |         ['../gui.py'],
149 |         binaries=[],
150 |         cipher=block_cipher,
151 |         datas=datas,
152 |         excludes=excluded_modules,
153 |         hiddenimports=['pydetex'],
154 |         hooksconfig={},
155 |         hookspath=[],
156 |         noarchive=False,
157 |         pathex=['../'],
158 |         runtime_hooks=[],
159 |         win_no_prefer_redirects=False,
160 |         win_private_assemblies=False
161 |     )
162 | 
163 |     # Update its propeties
164 |     print('Updating binaries')
165 |     new_binaries = []
166 |     for i in a.binaries:
167 |         ex_contains = False
168 |         for j in excluded_binaries_contains:
169 |             if j in i[1]:
170 |                 ex_contains = True
171 |                 break
172 |         if 'sklearn' in i[0] and i[0] != 'sklearn.__check_build._check_build':
173 |             ex_contains = True
174 |         if ex_contains or i[0] in excluded_binaries:
175 |             print(f'\tRemoved:\t{_path(i[1])} ({_file_sz(i[1])}) <{i[0]}>')
176 |             continue
177 |         new_binaries.append(i)
178 |     print('Program binaries')
179 |     a.binaries = toc(new_binaries)
180 |     for j in a.binaries:
181 |         print(f'\t{j[0]}\n\t\t{_path(j[1])} ({_file_sz(j[1])} KB)')
182 | 
183 |     # Scripts
184 |     print('Program scripts')
185 |     for j in a.scripts:
186 |         print(f'\t{j[0]}\t{_path(j[1])}')
187 | 
188 |     # Return the analysis
189 |     return a
190 | 
191 | 
192 | def get_bundle(bundle, exe):
193 |     """
194 |     Return a bundle for OSX.
195 |     """
196 |     return bundle(
197 |         exe,
198 |         name=app_name + '.app',
199 |         icon=app_icon,
200 |         bundle_identifier='com.ppizarror',
201 |         info_plist={
202 |             'NSPrincipalClass': 'NSApplication',
203 |             'NSAppleScriptEnabled': False
204 |         },
205 |     )
206 | 
207 | 
208 | def get_collect(collect, a, exe):
209 |     """
210 |     Return the COLLECT object.
211 |     """
212 |     return collect(
213 |         exe,
214 |         a.binaries,
215 |         a.zipfiles,
216 |         a.datas,
217 |         strip=False,
218 |         name=app_name,
219 |         upx_exclude=[],
220 |         upx=True
221 |     )
222 | 
223 | 
224 | def get_exe(exe, pyz, a, single: bool):
225 |     """
226 |     Return the EXE object.
227 |     """
228 |     if single:
229 |         return exe(
230 |             pyz,
231 |             a.scripts,
232 |             a.binaries,
233 |             a.zipfiles,
234 |             a.datas,
235 |             [],
236 |             bootloader_ignore_signals=False,
237 |             codesign_identity=None,
238 |             console=False,
239 |             debug=False,
240 |             disable_windowed_traceback=False,
241 |             entitlements_file=None,
242 |             icon=app_icon,
243 |             name=app_name,
244 |             runtime_tmpdir=None,
245 |             strip=False,
246 |             target_arch=None,
247 |             upx_exclude=[],
248 |             upx=True
249 |         )
250 |     else:
251 |         return exe(
252 |             pyz,
253 |             a.scripts,
254 |             [],
255 |             bootloader_ignore_signals=False,
256 |             codesign_identity=None,
257 |             console=True,
258 |             debug=False,
259 |             disable_windowed_traceback=False,
260 |             entitlements_file=None,
261 |             exclude_binaries=True,
262 |             icon=app_icon,
263 |             name=app_name,
264 |             strip=False,
265 |             target_arch=None,
266 |             upx=True
267 |         )
268 | 
269 | 
270 | def get_pyz(pyz, a):
271 |     """
272 |     Return the PYZ object.
273 |     """
274 |     return pyz(a.pure, a.zipped_data, cipher=block_cipher)
275 | 
276 | 
277 | def save_zip(filename, output, in_folder='dist', out_folder='dist/out_zip'):
278 |     """
279 |     Save a zip file.
280 |     """
281 |     # Removes the old file
282 |     if not os.path.isdir(out_folder):
283 |         os.makedirs(out_folder)
284 |     for k in os.listdir(out_folder):
285 |         if output in k:
286 |             print(f'Removing old zip: {out_folder}/{k}')
287 |             os.remove(f'{out_folder}/{k}')
288 | 
289 |     filename_full = f'{in_folder}/{filename}'
290 |     output = f'{out_folder}/{output}'
291 |     out_file = f'{output}_v{ver}.zip'
292 |     print(f'Compressing to: {out_file}')
293 |     with ZipFile(out_file, 'w', ZIP_DEFLATED) as zipf:
294 |         if os.path.isdir(filename_full):
295 |             zipdir(filename_full, zipf)
296 |         else:
297 |             zipf.write(filename_full, arcname=filename)
298 | 
299 | 
300 | def zipdir(path, ziph):
301 |     """
302 |     Zip a folder.
303 |     """
304 |     for root, dirs, files in os.walk(path):
305 |         for file in files:
306 |             ziph.write(os.path.join(root, file),
307 |                        os.path.relpath(os.path.join(root, file),
308 |                                        os.path.join(path, '..')))
309 | 


--------------------------------------------------------------------------------
/pydetex/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | UTILS
  6 | Module that contain all util methods and classes used in parsers and pipelines,
  7 | from tex, language, and low-level.
  8 | """
  9 | 
 10 | __all__ = [
 11 |     'apply_tag_between_inside_char_command',
 12 |     'apply_tag_tex_commands',
 13 |     'apply_tag_tex_commands_no_argv',
 14 |     'Button',
 15 |     'check_repeated_words',
 16 |     'complete_langs_dict',
 17 |     'detect_language',
 18 |     'find_tex_command_char',
 19 |     'find_tex_commands',
 20 |     'find_tex_commands_noargv',
 21 |     'find_tex_environments',
 22 |     'format_number_d',
 23 |     'get_diff_startend_word',
 24 |     'get_language_name',
 25 |     'get_local_path',
 26 |     'get_number_of_day',
 27 |     'get_tex_commands_args',
 28 |     'get_word_from_cursor',
 29 |     'IS_OSX',
 30 |     'LangTexTextTags',
 31 |     'make_stemmer',
 32 |     'open_file',
 33 |     'ProgressBar',
 34 |     'RESOURCES_PATH',
 35 |     'split_tags',
 36 |     'syntax_highlight',
 37 |     'TEX_COMMAND_CHARS',
 38 |     'TEX_EQUATION_CHARS',
 39 |     'tex_to_unicode',
 40 |     'tokenize',
 41 |     'validate_float',
 42 |     'validate_int'
 43 | ]
 44 | 
 45 | import datetime
 46 | import os
 47 | import platform
 48 | import sys
 49 | import time
 50 | 
 51 | from pathlib import Path
 52 | from typing import List, Tuple, Dict
 53 | 
 54 | from pydetex._fonts import FONT_TAGS as _FONT_TAGS
 55 | from pydetex._utils_lang import *
 56 | from pydetex._utils_tex import *
 57 | 
 58 | # Resources path
 59 | __actualpath = str(os.path.abspath(os.path.dirname(__file__))).replace('\\', '/') + '/'
 60 | RESOURCES_PATH = __actualpath + 'res/'
 61 | 
 62 | # Check OS
 63 | IS_OSX = platform.system() == 'Darwin'
 64 | 
 65 | # Import Button widget
 66 | if IS_OSX:
 67 |     from tkmacosx import Button
 68 | else:
 69 |     from tkinter import Button
 70 | 
 71 | 
 72 | def split_tags(s: str, tags: List[str]) -> List[Tuple[str, str]]:
 73 |     """
 74 |     Split a string based on tags, each line is then tagged.
 75 | 
 76 |     String format:
 77 |     [TAG1]new line[TAG2]this is[TAG1]very epic
 78 | 
 79 |     Output:
 80 |     [('TAG1', 'newline'), ('TAG', 'this is), ('TAG1', 'very epic') ... ]
 81 | 
 82 |     :param s: String
 83 |     :param tags: Tag list
 84 |     :return: Split tags
 85 |     """
 86 |     assert len(tags) > 0
 87 |     tagged_lines: List[Tuple[str, str]] = []
 88 |     r = 0
 89 |     for tag in tags:
 90 |         if r == 0:  # First occurence
 91 |             new = s.split(tag)
 92 |             for j in new:
 93 |                 if j == '':
 94 |                     continue
 95 |                 tagged_lines.append((tag, j))
 96 |         else:
 97 |             new_tagged_lines: List[Tuple[str, str]] = []
 98 |             for j in range(len(tagged_lines)):
 99 |                 if tag in tagged_lines[j][1]:  # If tag exists
100 |                     new = tagged_lines[j][1].split(tag)
101 |                     new_tagged_lines.append((tagged_lines[j][0], new[0]))
102 |                     for w in range(len(new) - 1):
103 |                         new_tagged_lines.append((tag, new[w + 1]))
104 |                 else:
105 |                     new_tagged_lines.append(tagged_lines[j])
106 |             tagged_lines = new_tagged_lines
107 | 
108 |         r += 1
109 | 
110 |     # Merge consecutive tags
111 |     merged_tags: List[Tuple[str, str]] = []
112 |     r = 0
113 |     for tagged in tagged_lines:
114 |         if len(merged_tags) == 0 or tagged[0] != merged_tags[r - 1][0]:
115 |             merged_tags.append(tagged)
116 |             r += 1
117 |         else:
118 |             merged_tags[r - 1] = (tagged[0], merged_tags[r - 1][1] + tagged[1])
119 | 
120 |     return merged_tags
121 | 
122 | 
123 | def button_text(s: str) -> str:
124 |     """
125 |     Generates the button text.
126 | 
127 |     :param s: Button's text
128 |     :return: Text
129 |     """
130 |     return s if IS_OSX else f'  {s}  '
131 | 
132 | 
133 | def validate_int(p: str) -> bool:
134 |     """
135 |     Validate an integer.
136 | 
137 |     :param p: Value
138 |     :return: True if integer
139 |     """
140 |     if p == '' or p == '-':
141 |         return True
142 |     try:
143 |         p = float(p)
144 |         return int(p) == p
145 |     except ValueError:
146 |         pass
147 |     return False
148 | 
149 | 
150 | def validate_float(p: str) -> bool:
151 |     """
152 |     Validate a float.
153 | 
154 |     :param p: Value
155 |     :return: True if integer
156 |     """
157 |     if p == '' or p == '-':
158 |         return True
159 |     try:
160 |         float(p)
161 |         return True
162 |     except ValueError:
163 |         pass
164 |     return False
165 | 
166 | 
167 | def syntax_highlight(s: str) -> str:
168 |     """
169 |     Syntax highlighter.
170 | 
171 |     :param s: Latex string code
172 |     :return: Code with format
173 |     """
174 |     # Add initial normal
175 |     s = _FONT_TAGS['normal'] + s.strip()
176 | 
177 |     # Format equations
178 |     s = apply_tag_between_inside_char_command(
179 |         s=s,
180 |         symbols_char=TEX_EQUATION_CHARS,
181 |         tags=(_FONT_TAGS['equation_char'], _FONT_TAGS['equation_inside'],
182 |               _FONT_TAGS['equation_char'], _FONT_TAGS['normal'])
183 |     )
184 | 
185 |     # Format commands with {arguments}
186 |     s = apply_tag_tex_commands(
187 |         s=s,
188 |         tags=(_FONT_TAGS['tex_command'],
189 |               _FONT_TAGS['normal'],
190 |               _FONT_TAGS['tex_argument'],
191 |               _FONT_TAGS['normal'],
192 |               '')
193 |     )
194 | 
195 |     # Format commands without arguments
196 |     s = apply_tag_tex_commands_no_argv(
197 |         s=s,
198 |         tags=(_FONT_TAGS['tex_command'], _FONT_TAGS['normal'])
199 |     )
200 | 
201 |     # Return formatted string
202 |     return s
203 | 
204 | 
205 | def format_number_d(n: int, c: str) -> str:
206 |     """
207 |     Formats a number on thousands.
208 | 
209 |     :param n: Number
210 |     :param c: Format char
211 |     :return: Formatted number
212 |     """
213 |     assert isinstance(n, int)
214 |     return format(n, ',').replace(',', c)
215 | 
216 | 
217 | def get_number_of_day() -> int:
218 |     """
219 |     Return the number of the day from the current year.
220 | 
221 |     :return: Day number
222 |     """
223 |     return datetime.datetime.now().timetuple().tm_yday
224 | 
225 | 
226 | def open_file(f: str) -> str:
227 |     """
228 |     Open file and return its string.
229 | 
230 |     :param f: Filename
231 |     :return: File content
232 |     """
233 |     o = open(f, encoding='utf-8')
234 |     text = ''.join(o.readlines())
235 |     o.close()
236 |     return text
237 | 
238 | 
239 | def make_path_if_not_exists(path: str) -> str:
240 |     """
241 |     Create path if not exists.
242 | 
243 |     :param path: Path
244 |     :return: Path
245 |     """
246 |     if not os.path.isdir(path):
247 |         Path(path).mkdir(parents=True, exist_ok=True)
248 |     return path
249 | 
250 | 
251 | def get_local_path() -> str:
252 |     """
253 |     :return: Returns the app local path
254 |     """
255 |     appdata = os.getenv('LOCALAPPDATA')
256 |     if appdata is None:
257 |         appdata = os.path.join(get_user_path(), 'Applications')
258 | 
259 |     path = os.path.join(appdata, 'PyDetex')
260 |     return make_path_if_not_exists(path)
261 | 
262 | 
263 | def get_user_path() -> str:
264 |     """
265 |     :return: Returns the user path
266 |     """
267 |     return os.path.expanduser('~')
268 | 
269 | 
270 | class ProgressBar(object):
271 |     """
272 |     Basic progress bar implementation.
273 |     """
274 | 
275 |     _current: int
276 |     _last_step: float
277 |     _size: int
278 |     _step_times: Dict[str, float]
279 |     _steps: int
280 |     _t0: float
281 | 
282 |     def __init__(self, steps: int, size: int = 15) -> None:
283 |         """
284 |         Constructor.
285 | 
286 |         :param steps: How many steps have the procedure
287 |         :param size: Bar size
288 |         """
289 |         assert isinstance(steps, int) and steps >= 1
290 |         assert isinstance(size, int) and size >= 1
291 |         self._current = 0
292 |         self._last_step = time.time()
293 |         self._size = size  # Bar size
294 |         self._step_times = {}
295 |         self._steps = steps - 1
296 |         self._t0 = time.time()
297 | 
298 |     def _print_progress_bar(self, i: int, max_: int, post_text: str) -> None:
299 |         """
300 |         Prints a progress bar.
301 | 
302 |         :param i: Progress bar
303 |         :param max_: Max steps
304 |         :param post_text: Status
305 |         """
306 |         j = i / max_
307 |         sys.stdout.write('\r')
308 |         sys.stdout.write(f"[{'=' * int(self._size * j):{self._size}s}] {int(100 * j)}%  {post_text}")
309 |         sys.stdout.flush()
310 | 
311 |     def update(self, status: str = '', print_total_time: bool = True) -> None:
312 |         """
313 |         Update the current status to a new step.
314 | 
315 |         :param status: Status text
316 |         :param print_total_time: Prints total computing time
317 |         """
318 |         if self._current > self._steps:
319 |             return
320 |         self._print_progress_bar(self._current, self._steps, status)
321 |         dt = time.time() - self._last_step
322 |         self._last_step = time.time()
323 |         self._step_times[status] = dt
324 |         self._current += 1
325 |         if self._current == self._steps + 1:
326 |             print('')
327 |             sys.stdout.flush()
328 |             if print_total_time:
329 |                 print(f'Process finished in {time.time() - self._t0:.3f} seconds')
330 | 
331 |     def detail_times(self) -> None:
332 |         """
333 |         Print times.
334 |         """
335 |         for k in self._step_times.keys():
336 |             print(f'{self._step_times[k]:.3f}s\t{k}')
337 | 
338 |     def reset(self) -> None:
339 |         """
340 |         Reset the steps.
341 |         """
342 |         self._current = 0
343 |         self._t0 = time.time()
344 |         self._last_step = time.time()
345 |         self._step_times.clear()
346 | 


--------------------------------------------------------------------------------
/pydetex/_utils_lang.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | UTILS LANG
  6 | Language utils.
  7 | """
  8 | 
  9 | __all__ = [
 10 |     'check_repeated_words',
 11 |     'complete_langs_dict',
 12 |     'detect_language',
 13 |     'get_diff_startend_word',
 14 |     'get_language_name',
 15 |     'get_phrase_from_cursor',
 16 |     'get_word_from_cursor',
 17 |     'LangTexTextTags',
 18 |     'make_stemmer',
 19 |     'tokenize'
 20 | ]
 21 | 
 22 | # langdetect supports:
 23 | # af, ar, bg, bn, ca, cs, cy, da, de, el, en, es, et, fa, fi, fr, gu, he,
 24 | # hi, hr, hu, id, it, ja, kn, ko, lt, lv, mk, ml, mr, ne, nl, no, pa, pl,
 25 | # pt, ro, ru, sk, sl, so, sq, sv, sw, ta, te, th, tl, tr, uk, ur, vi, zh-cn, zh-tw
 26 | import langdetect
 27 | 
 28 | import json
 29 | import os
 30 | 
 31 | # noinspection PyProtectedMember
 32 | from PyMultiDictionary._utils import tokenize, get_language_name
 33 | from nltk.stem import SnowballStemmer
 34 | from typing import List, Tuple, Optional, Dict
 35 | from warnings import warn
 36 | 
 37 | # Resources path
 38 | __actualpath = str(os.path.abspath(os.path.dirname(__file__))).replace('\\', '/') + '/'
 39 | 
 40 | # Load all stopwords
 41 | with open(__actualpath + 'res/' + 'stopwords.json', encoding='UTF-8') as json_data:
 42 |     _STOPWORDS = json.load(json_data)
 43 | 
 44 | _AVAILABLE_STEMMER_LANGS: Dict[str, str] = {
 45 |     'ar': 'arabic',
 46 |     'da': 'danish',
 47 |     'de': 'german',
 48 |     'en': 'english',
 49 |     'es': 'spanish',
 50 |     'fi': 'finnish',
 51 |     'fr': 'french',
 52 |     'hu': 'hungarian',
 53 |     'it': 'italian',
 54 |     'nb': 'norwegian',
 55 |     'nd': 'norwegian',
 56 |     'nl': 'dutch',
 57 |     'nn': 'norwegian',
 58 |     'no': 'norwegian',
 59 |     'pt': 'portuguese',
 60 |     'ro': 'romanian',
 61 |     'ru': 'russian',
 62 |     'sv': 'swedish'
 63 | }
 64 | 
 65 | 
 66 | class LangTexTextTags(object):
 67 |     """
 68 |     Stores the tex tags for several commands.
 69 |     """
 70 | 
 71 |     _lang: Dict[str, Dict[str, str]]
 72 | 
 73 |     def __init__(self) -> None:
 74 |         """
 75 |         Constructor.
 76 |         """
 77 |         self._lang = {
 78 |             'en': {
 79 |                 'caption': 'CAPTION: {0}',
 80 |                 'citeauthor_multiple': 'authors',
 81 |                 'citeauthor_single': 'author',
 82 |                 'figure_caption': 'FIGURE_CAPTION: {0}',
 83 |                 'link': 'LINK: {0}',
 84 |                 'multi_char_equ': 'EQUATION_{0}',
 85 |                 'sub_figure_title': 'SUB_FIGURE TITLE: {0}'
 86 |             },
 87 |             'es': {
 88 |                 'caption': 'LEYENDA: {0}',
 89 |                 'citeauthor_multiple': 'autores',
 90 |                 'citeauthor_single': 'autor',
 91 |                 'figure_caption': 'LEYENDA_FIGURA: {0}',
 92 |                 'link': 'ENLACE: {0}',
 93 |                 'multi_char_equ': 'ECUACIÓN_{0}',
 94 |                 'sub_figure_title': 'TÍTULO SUB_FIGURA: {0}'
 95 |             }
 96 |         }
 97 |         complete_langs_dict(self._lang)
 98 | 
 99 |     def get(self, lang: str, tag: str) -> str:
100 |         """
101 |         Retrieves a language tag value.
102 | 
103 |         :param lang: Language
104 |         :param tag: Tag to retrieve
105 |         :return: Value of the language's tag
106 |         """
107 |         if lang not in self._lang.keys():
108 |             lang = 'en'
109 |         if tag not in self._lang[lang].keys():
110 |             raise ValueError(f'Lang {lang} tag {tag} does not exist')
111 |         return self._lang[lang][tag]
112 | 
113 | 
114 | def complete_langs_dict(lang: Dict[str, Dict[str, str]]) -> None:
115 |     """
116 |     Completes a language dict. Assumes ``'en'`` is the main language.
117 | 
118 |     :param lang: Language dict
119 |     """
120 |     for k in lang.keys():
121 |         if k == 'en':
122 |             continue
123 |         for t in lang['en'].keys():
124 |             if t not in lang[k]:
125 |                 error = f'Language entry "{t}" on lang "{k}" does not exist'
126 |                 warn(error)
127 |                 lang[k][t] = lang['en'][t]
128 | 
129 | 
130 | def detect_language(s: str) -> str:
131 |     """
132 |     Detects languages.
133 | 
134 |     :param s: String
135 |     :return: Detected language
136 |     """
137 |     if s == '':
138 |         return '–'
139 |     try:
140 |         lang = langdetect.detect(s)
141 |         if lang == 'zh-cn' or lang == 'zh-tw':
142 |             lang = 'zh'
143 |         return lang
144 |     except langdetect.lang_detect_exception.LangDetectException:  # No features in text
145 |         return '–'
146 | 
147 | 
148 | def get_diff_startend_word(original: str, new: str) -> Tuple[str, str]:
149 |     """
150 |     Return the difference of the word from start and end, for example:
151 | 
152 |     .. code-block:: none
153 | 
154 |         original XXXwordYY
155 |         new         word
156 |         diff = (XXX, YY)
157 | 
158 |     :param original: Original word
159 |     :param new: New word
160 |     :return: Diff word
161 |     """
162 |     pos: int = original.find(new)
163 |     if pos == -1:
164 |         return '', ''
165 |     return original[0:pos], original[pos + len(new):len(original)]
166 | 
167 | 
168 | def make_stemmer(lang: str) -> Optional['SnowballStemmer']:
169 |     """
170 |     Returns a stemmer.
171 | 
172 |     :param lang: Lang code
173 |     :return: Stemmer or None if not available
174 |     """
175 |     if lang in _AVAILABLE_STEMMER_LANGS.keys():
176 |         return SnowballStemmer(_AVAILABLE_STEMMER_LANGS[lang])
177 |     return None
178 | 
179 | 
180 | def check_repeated_words(
181 |     s: str,
182 |     lang: str,
183 |     min_chars: int,
184 |     window: int,
185 |     stopwords: bool,
186 |     stemming: bool,
187 |     ignore: Optional[List[str]] = None,
188 |     remove_tokens: Optional[List[str]] = None,
189 |     font_tag_format: str = '',
190 |     font_param_format: str = '',
191 |     font_normal_format: str = '',
192 |     tag: str = 'repeated'
193 | ) -> str:
194 |     """
195 |     Check repeated words.
196 | 
197 |     :param s: Text
198 |     :param lang: Language code
199 |     :param min_chars: Min chars to accept
200 |     :param window: Window words span to check
201 |     :param stopwords: Use stopwords
202 |     :param stemming: Use stemming
203 |     :param ignore: Ignore a list of words
204 |     :param remove_tokens: Remove keys before verify repeat
205 |     :param font_tag_format: Tag's format
206 |     :param font_param_format: Param's format
207 |     :param font_normal_format: Normal's format
208 |     :param tag: Tag's name
209 |     :return: Text with repeated words marked
210 |     """
211 |     assert isinstance(window, int) and window > 1
212 |     assert isinstance(min_chars, int) and min_chars >= 1
213 | 
214 |     if not ignore:
215 |         ignore = []
216 |     if not remove_tokens:
217 |         remove_tokens = []
218 | 
219 |     # Check languages
220 |     if lang in _AVAILABLE_STEMMER_LANGS.keys():
221 |         stop = _STOPWORDS[lang]
222 |         stemmer = make_stemmer(lang)
223 |     else:
224 |         return s
225 | 
226 |     ignored_words = []
227 |     # Apply filters to ignored words
228 |     for w in ignore:
229 |         if stemming:
230 |             w = stemmer.stem(w)
231 |         if stopwords and w in stop:
232 |             w = ''
233 |         if w == '':
234 |             continue
235 |         ignored_words.append(w)
236 | 
237 |     # Add space to newline
238 |     newline_format = '      \n'
239 |     s = s.replace('\n', newline_format)
240 | 
241 |     # Separeate words
242 |     wordswin = []  # Stores the words
243 |     words = s.split(' ')
244 |     new_s = []
245 | 
246 |     for w in words:
247 |         original_w = w
248 | 
249 |         # Remove tokens
250 |         if len(remove_tokens) > 0:
251 |             for rt in remove_tokens:
252 |                 w = w.replace(rt, '')
253 | 
254 |         # If command in word
255 |         if '\\' in w:
256 |             w = ''
257 | 
258 |         # Apply filters
259 |         if len(w) <= min_chars:
260 |             w = ''
261 |         if w != '':
262 |             w = tokenize(w)
263 |         if stemming:
264 |             w = stemmer.stem(w)
265 |         if stopwords and w in stop:
266 |             w = ''
267 | 
268 |         # Check if word is ignored
269 |         if w in ignored_words:
270 |             w = ''
271 | 
272 |         # Check if the word exists on the list
273 |         if w in wordswin and w != '':
274 |             ww = wordswin[::-1].index(w) + 1
275 |             stemmed_word = tokenize(original_w)
276 |             diff_word = get_diff_startend_word(original_w, stemmed_word)
277 |             if diff_word == ('', ''):
278 |                 stemmed_word = original_w
279 |             original_w = f'{diff_word[0]}{font_tag_format}<{tag}:{ww}>' \
280 |                          f'{font_param_format}{stemmed_word}' \
281 |                          f'{font_tag_format}</{tag}>{font_normal_format}{diff_word[1]}'
282 | 
283 |         # Push the new word
284 |         wordswin.append(w)
285 |         if len(wordswin) > window:
286 |             wordswin.pop(0)
287 | 
288 |         # Append word
289 |         new_s.append(original_w)
290 | 
291 |     # Return string with repeated format
292 |     out_s = ' '.join(new_s)
293 |     out_s = out_s.replace(newline_format, '\n')
294 |     return out_s
295 | 
296 | 
297 | def get_word_from_cursor(s: str, pos: int) -> Tuple[str, int, int]:
298 |     """
299 |     Return the word from a string on a given cursor.
300 | 
301 |     :param s: String
302 |     :param pos: Position to check the string
303 |     :return: Word, position start, position end
304 |     """
305 |     assert 0 <= pos < len(s)
306 |     pos += 1
307 |     s = ' ' + s
308 |     p = 0
309 | 
310 |     # Check if pos is an empty character, find the following word
311 |     if s[pos].strip() == '':
312 |         found = False
313 |         for k in range(pos, len(s)):  # First
314 |             if s[k].strip() != '' and not found:
315 |                 p = k
316 |                 found = True
317 |             elif s[k].strip() == '' and found:
318 |                 return s[p:k].strip(), p, k - 1
319 | 
320 |     else:
321 |         for w in range(pos):  # Find prev
322 |             j = pos - w - 1
323 |             if s[j].strip() == '':
324 |                 p = j
325 |                 break
326 |             elif s[j].strip() == '>':
327 |                 p = j + 1
328 |                 break
329 |         for j in range(pos + 1, len(s)):  # Find next
330 |             if s[j].strip() in ('', '<'):
331 |                 return s[p:j].strip(), p, j - 1
332 | 
333 |     return '', -1, -1
334 | 
335 | 
336 | def get_phrase_from_cursor(s: str, pos_init: int, pos_end: int) -> str:
337 |     """
338 |     Get a phrase from the cursor. It tries to retrieve the entire words selected.
339 | 
340 |     :param s: String
341 |     :param pos_init: Initial position
342 |     :param pos_end: End position
343 |     :return: Retrieved word
344 |     """
345 |     assert pos_init <= pos_end
346 | 
347 |     # Get the first word
348 |     s0, i, _ = get_word_from_cursor(s, pos_init)
349 |     # noinspection PyUnusedLocal
350 |     j: int = i
351 | 
352 |     if s[pos_end].strip() == '':  # Is empty, find the previous word
353 |         for k in range(1, pos_end):
354 |             _k = pos_end - k
355 |             if s[_k].strip() != '':
356 |                 # noinspection PyUnusedLocal
357 |                 j = _k + 1
358 |                 break
359 |     else:
360 |         _, _, j = get_word_from_cursor(s, pos_end)
361 | 
362 |     if j <= i:
363 |         return s0
364 | 
365 |     return s[i:j]
366 | 


--------------------------------------------------------------------------------
/pydetex/_symbols.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | SYMBOLS
  6 | Contain latex commands converted to symbol.
  7 | """
  8 | 
  9 | __all__ = [
 10 |     'REPLACE_EQUATION_SYMBOLS_LIBRARY',
 11 |     'REPLACE_SYMBOLS_LIBRARY',
 12 |     'REPLACE_TEX_COMMANDS_LIBRARY'
 13 | ]
 14 | 
 15 | from typing import List, Tuple
 16 | 
 17 | REPLACE_SYMBOLS_LIBRARY: List[Tuple[str, str]] = [
 18 |     # Common
 19 |     ('\\ ', ' '),
 20 |     ('\\\\', '\n'),
 21 | 
 22 |     # Letters
 23 |     ('--', '–'),
 24 |     ('---', '—'),
 25 |     (r'\#', '#'),
 26 |     (r'\&', '&'),
 27 |     (r'\_', '_'),
 28 |     ('~', ' '),
 29 |     ('ﬁ', 'fi')
 30 | ]
 31 | 
 32 | REPLACE_EQUATION_SYMBOLS_LIBRARY: List[Tuple[str, str]] = [
 33 |     ('^(', '⁽'),
 34 |     ('^)', '⁾'),
 35 |     ('^+', '⁺'),
 36 |     ('^-', '⁻'),
 37 |     ('^0', '⁰'),
 38 |     ('^1', '¹'),
 39 |     ('^2', '²'),
 40 |     ('^3', '³'),
 41 |     ('^4', '⁴'),
 42 |     ('^5', '⁵'),
 43 |     ('^6', '⁶'),
 44 |     ('^7', '⁷'),
 45 |     ('^8', '⁸'),
 46 |     ('^9', '⁹'),
 47 |     ('^=', '⁼'),
 48 |     ('^A', 'ᴬ'),
 49 |     ('^a', 'ᵃ'),
 50 |     ('^B', 'ᴮ'),
 51 |     ('^b', 'ᵇ'),
 52 |     ('^c', 'ᶜ'),
 53 |     ('^D', 'ᴰ'),
 54 |     ('^d', 'ᵈ'),
 55 |     ('^E', 'ᴱ'),
 56 |     ('^e', 'ᵉ'),
 57 |     ('^f', 'ᶠ'),
 58 |     ('^G', 'ᴳ'),
 59 |     ('^g', 'ᵍ'),
 60 |     ('^h', 'ʰ'),
 61 |     ('^H', 'ᴴ'),
 62 |     ('^I', 'ᴵ'),
 63 |     ('^i', 'ⁱ'),
 64 |     ('^j', 'ʲ'),
 65 |     ('^J', 'ᴶ'),
 66 |     ('^K', 'ᴷ'),
 67 |     ('^k', 'ᵏ'),
 68 |     ('^l', 'ˡ'),
 69 |     ('^L', 'ᴸ'),
 70 |     ('^M', 'ᴹ'),
 71 |     ('^m', 'ᵐ'),
 72 |     ('^N', 'ᴺ'),
 73 |     ('^n', 'ⁿ'),
 74 |     ('^O', 'ᴼ'),
 75 |     ('^o', 'ᵒ'),
 76 |     ('^P', 'ᴾ'),
 77 |     ('^p', 'ᵖ'),
 78 |     ('^r', 'ʳ'),
 79 |     ('^R', 'ᴿ'),
 80 |     ('^s', 'ˢ'),
 81 |     ('^T', 'ᵀ'),
 82 |     ('^t', 'ᵗ'),
 83 |     ('^U', 'ᵁ'),
 84 |     ('^u', 'ᵘ'),
 85 |     ('^v', 'ᵛ'),
 86 |     ('^w', 'ʷ'),
 87 |     ('^W', 'ᵂ'),
 88 |     ('^x', 'ˣ'),
 89 |     ('^y', 'ʸ'),
 90 |     ('^z', 'ᶻ'),
 91 |     ('_(', '₍'),
 92 |     ('_)', '₎'),
 93 |     ('_+', '₊'),
 94 |     ('_-', '₋'),
 95 |     ('_0', '₀'),
 96 |     ('_1', '₁'),
 97 |     ('_2', '₂'),
 98 |     ('_3', '₃'),
 99 |     ('_4', '₄'),
100 |     ('_5', '₅'),
101 |     ('_6', '₆'),
102 |     ('_7', '₇'),
103 |     ('_8', '₈'),
104 |     ('_9', '₉'),
105 |     ('_=', '₌'),
106 |     ('_a', 'ₐ'),
107 |     ('_e', 'ₑ'),
108 |     ('_h', 'ₕ'),
109 |     ('_i', 'ᵢ'),
110 |     ('_k', 'ₖ'),
111 |     ('_l', 'ₗ'),
112 |     ('_m', 'ₘ'),
113 |     ('_n', 'ₙ'),
114 |     ('_o', 'ₒ'),
115 |     ('_p', 'ₚ'),
116 |     ('_r', 'ᵣ'),
117 |     ('_s', 'ₛ'),
118 |     ('_t', 'ₜ'),
119 |     ('_u', 'ᵤ'),
120 |     ('_v', 'ᵥ'),
121 |     ('_x', 'ₓ')
122 | ]
123 | 
124 | REPLACE_TEX_COMMANDS_LIBRARY: List[Tuple[str, str]] = [
125 |     ('\\AC', '∿'),
126 |     ('\\aleph', 'ℵ'),
127 |     ('\\alpha', 'α'),
128 |     ('\\amalg', '⨿'),
129 |     ('\\angle', '∠'),
130 |     ('\\approx', '≈'),
131 |     ('\\approxeq', '≊'),
132 |     ('\\asterism', '⁂'),
133 |     ('\\asymp', '≍'),
134 |     ('\\backepsilon', '϶'),
135 |     ('\\backprime', '‵'),
136 |     ('\\backsim', '∽'),
137 |     ('\\backsimeq', '⋍'),
138 |     ('\\barwedge', '⊼'),
139 |     ('\\because', '∵'),
140 |     ('\\beginsmallmatrix', ''),
141 |     ('\\beta', 'β'),
142 |     ('\\beth', 'ℶ'),
143 |     ('\\between', '≬'),
144 |     ('\\bigcap', '⋂'),
145 |     ('\\bigcup', '⋃'),
146 |     ('\\bigodot', '⨀'),
147 |     ('\\bigoplus', '⨁'),
148 |     ('\\bigotimes', '⨂'),
149 |     ('\\bigsqcap', '⨅'),
150 |     ('\\bigsqcup', '⨆'),
151 |     ('\\bigvee', '⋁'),
152 |     ('\\bigwedge', '⋀'),
153 |     ('\\bot', '⊥'),
154 |     ('\\bowtie', '⋈'),
155 |     ('\\boxdot', '⊡'),
156 |     ('\\boxminus', '⊟'),
157 |     ('\\boxplus', '⊞'),
158 |     ('\\boxtimes', '⊠'),
159 |     ('\\bullet', '•'),
160 |     ('\\Bumpeq', '≎'),
161 |     ('\\bumpeq', '≏'),
162 |     ('\\cap', '∩'),
163 |     ('\\Cap', '⋒'),
164 |     ('\\cdot', '·'),
165 |     ('\\cdots', '⋯'),
166 |     ('\\checkmark', '✓'),
167 |     ('\\chi', 'χ'),
168 |     ('\\circ', '∘'),
169 |     ('\\circeq', '≗'),
170 |     ('\\circlearrowleft', '↺'),
171 |     ('\\circlearrowright', '↻'),
172 |     ('\\circledast', '⊛'),
173 |     ('\\circledcirc', '⊚'),
174 |     ('\\circleddash', '⊝'),
175 |     ('\\clubsuit', '♣'),
176 |     ('\\coloneqq', '≔'),
177 |     ('\\complement', '∁'),
178 |     ('\\cong', '≅'),
179 |     ('\\coprod', '∐'),
180 |     ('\\copyright', '©'),
181 |     ('\\cup', '∪'),
182 |     ('\\Cup', '⋓'),
183 |     ('\\curlyeqprec', '⋞'),
184 |     ('\\curlyeqsucc', '⋟'),
185 |     ('\\curlyvee', '⋎'),
186 |     ('\\curlywedge', '⋏'),
187 |     ('\\curvearrowleft', '↶'),
188 |     ('\\curvearrowright', '↷'),
189 |     ('\\dagger', '†'),
190 |     ('\\daleth', 'ℸ'),
191 |     ('\\dashleftarrow', '⇠'),
192 |     ('\\dashrightarrow', '⇢'),
193 |     ('\\dashv', '⊣'),
194 |     ('\\dbend', '☡'),
195 |     ('\\ddag', '‡'),
196 |     ('\\ddots', '⋱'),
197 |     ('\\ddot{\\phantom{x}}', '̈'),
198 |     ('\\Delta', 'Δ'),
199 |     ('\\delta', 'δ'),
200 |     ('\\diameter', '⌀'),
201 |     ('\\diamond', '⋄'),
202 |     ('\\diamondsuit', '♢'),
203 |     ('\\Digamma', 'Ϝ'),
204 |     ('\\digamma', 'ϝ'),
205 |     ('\\div', '÷'),
206 |     ('\\divideontimes', '⋇'),
207 |     ('\\doteq', '≐'),
208 |     ('\\doteqdot', '≑'),
209 |     ('\\dotminus', '∸'),
210 |     ('\\dotplus', '∔'),
211 |     ('\\downarrow', '↓'),
212 |     ('\\Downarrow', '⇓'),
213 |     ('\\downdownarrows', '⇊'),
214 |     ('\\downharpoonleft', '⇃'),
215 |     ('\\downharpoonright', '⇂'),
216 |     ('\\ell', 'ℓ'),
217 |     ('\\epsilon', 'ϵ'),
218 |     ('\\eqcirc', '≖'),
219 |     ('\\eqcolon', '∹'),
220 |     ('\\eqqcolon', '≕'),
221 |     ('\\equiv', '≡'),
222 |     ('\\eta', 'η'),
223 |     ('\\Euler', 'ℇ'),
224 |     ('\\euro', '€'),
225 |     ('\\exists', '∃'),
226 |     ('\\fallingdotseq', '≒'),
227 |     ('\\fbox{\\checkmark}', '☑'),
228 |     ('\\fbox{\\phantom{{\\checkmark}}}', '☐'),
229 |     ('\\Finv', 'Ⅎ'),
230 |     ('\\flat', '♭'),
231 |     ('\\forall', '∀'),
232 |     ('\\frac{1}{2}', '½'),
233 |     ('\\frac{1}{3}', '⅓'),
234 |     ('\\frac{1}{4}', '¼'),
235 |     ('\\frac{1}{5}', '⅕'),
236 |     ('\\frac{1}{6}', '⅙'),
237 |     ('\\frac{1}{8}', '⅛'),
238 |     ('\\frac{2}{3}', '⅔'),
239 |     ('\\frac{2}{5}', '⅖'),
240 |     ('\\frac{3}{4}', '¾'),
241 |     ('\\frac{3}{5}', '⅗'),
242 |     ('\\frac{4}{5}', '⅘'),
243 |     ('\\frac{5}{6}', '⅚'),
244 |     ('\\frac{5}{8}', '⅝'),
245 |     ('\\frac{7}{8}', '⅞'),
246 |     ('\\frown', '⌢'),
247 |     ('\\frownie', '☹'),
248 |     ('\\Game', '⅁'),
249 |     ('\\Gamma', 'Γ'),
250 |     ('\\gamma', 'γ'),
251 |     ('\\ge', '≥'),
252 |     ('\\geqq', '≧'),
253 |     ('\\geqslant', '⩾'),
254 |     ('\\gg', '≫'),
255 |     ('\\ggg', '⋙'),
256 |     ('\\gimel', 'ℷ'),
257 |     ('\\gneqq', '≩'),
258 |     ('\\gnsim', '⋧'),
259 |     ('\\gtrdot', '⋗'),
260 |     ('\\gtreqless', '⋛'),
261 |     ('\\gtrless', '≷'),
262 |     ('\\gtrsim', '≳'),
263 |     ('\\guillemotleft', '«'),
264 |     ('\\guillemotright', '»'),
265 |     ('\\guilsinglleft', '‹'),
266 |     ('\\guilsinglright', '›'),
267 |     ('\\hat{\\phantom{x}}', '̂'),
268 |     ('\\hbar', 'ℏ'),
269 |     ('\\heartsuit', '♡'),
270 |     ('\\hookleftarrow', '↩'),
271 |     ('\\hookrightarrow', '↪'),
272 |     ('\\iddots', '⋰'),
273 |     ('\\iiiint', '⨌'),
274 |     ('\\iiint', '∭'),
275 |     ('\\iint', '∬'),
276 |     ('\\Im', 'ℑ'),
277 |     ('\\IM', 'ℑ'),
278 |     ('\\imath', 'ı'),
279 |     ('\\in', '∈'),
280 |     ('\\infty', '∞'),
281 |     ('\\int', '∫'),
282 |     ('\\intercal', '⊺'),
283 |     ('\\invamp', '⅋'),
284 |     ('\\iota', 'ι'),
285 |     ('\\jmath', 'ȷ'),
286 |     ('\\Join', '⨝'),
287 |     ('\\kappa', 'κ'),
288 |     ('\\Koppa', 'Ϟ'),
289 |     ('\\koppa', 'ϟ'),
290 |     ('\\Lambda', 'Λ'),
291 |     ('\\lambda', 'λ'),
292 |     ('\\langle', '〈'),
293 |     ('\\lceil', '⌈'),
294 |     ('\\ldots', '…'),
295 |     ('\\left', ''),
296 |     ('\\leftarrow', '←'),
297 |     ('\\Leftarrow', '⇐'),
298 |     ('\\LeftArrowBar', '⇤'),
299 |     ('\\leftarrowtail', '↢'),
300 |     ('\\leftarrowtriangle', '⇽'),
301 |     ('\\leftharpoondown', '↽'),
302 |     ('\\leftharpoonup', '↼'),
303 |     ('\\leftleftarrows', '⇇'),
304 |     ('\\leftrightarrow', '↔'),
305 |     ('\\Leftrightarrow', '⇔'),
306 |     ('\\leftrightarrows', '⇆'),
307 |     ('\\leftrightarrowtriangle', '⇿'),
308 |     ('\\leftrightharpoons', '⇋'),
309 |     ('\\leftrightsquigarrow', '↭'),
310 |     ('\\leftsquigarrow', '⇜'),
311 |     ('\\leftthreetimes', '⋋'),
312 |     ('\\leqq', '≦'),
313 |     ('\\leqslant', '⩽'),
314 |     ('\\lessdot', '⋖'),
315 |     ('\\lesseqgtr', '⋚'),
316 |     ('\\lessgtr', '≶'),
317 |     ('\\lesssim', '≲'),
318 |     ('\\lfloor', '⌊'),
319 |     ('\\lightning', '↯'),
320 |     ('\\ll', '≪'),
321 |     ('\\llangle', '⟪'),
322 |     ('\\llbracket', '〚'),
323 |     ('\\Lleftarrow', '⇚'),
324 |     ('\\lll', '⋘'),
325 |     ('\\ln', '㏑'),
326 |     ('\\lneqq', '≨'),
327 |     ('\\lnsim', '⋦'),
328 |     ('\\log', '㏒'),
329 |     ('\\longleftarrow', '⟵'),
330 |     ('\\longrightarrow', '⟶'),
331 |     ('\\looparrowleft', '↫'),
332 |     ('\\looparrowright', '↬'),
333 |     ('\\Lsh', '↰'),
334 |     ('\\ltimes', '⋉'),
335 |     ('\\mapsfrom', '↤'),
336 |     ('\\mapsto', '↦'),
337 |     ('\\Mapsto', '⇰'),
338 |     ('\\mathbb{0}', '𝟘'),
339 |     ('\\mathbb{1}', '𝟙'),
340 |     ('\\mathbb{2}', '𝟚'),
341 |     ('\\mathbb{3}', '𝟛'),
342 |     ('\\mathbb{4}', '𝟜'),
343 |     ('\\mathbb{5}', '𝟝'),
344 |     ('\\mathbb{6}', '𝟞'),
345 |     ('\\mathbb{7}', '𝟟'),
346 |     ('\\mathbb{8}', '𝟠'),
347 |     ('\\mathbb{9}', '𝟡'),
348 |     ('\\mathbb{\\gamma}', 'ℽ'),
349 |     ('\\mathbb{\\Gamma}', 'ℿ'),
350 |     ('\\mathbb{\\pi}', 'ℼ'),
351 |     ('\\mathbb{\\Pi}', 'ℾ'),
352 |     ('\\mathbb{\\Sigma}', '⅀'),
353 |     ('\\mathbb{A}', '𝔸'),
354 |     ('\\mathbb{a}', '𝕒'),
355 |     ('\\mathbb{B}', '𝔹'),
356 |     ('\\mathbb{b}', '𝕓'),
357 |     ('\\mathbb{C}', 'ℂ'),
358 |     ('\\mathbb{c}', '𝕔'),
359 |     ('\\mathbb{D}', '𝔻'),
360 |     ('\\mathbb{d}', '𝕕'),
361 |     ('\\mathbb{E}', '𝔼'),
362 |     ('\\mathbb{e}', '𝕖'),
363 |     ('\\mathbb{F}', '𝔽'),
364 |     ('\\mathbb{f}', '𝕗'),
365 |     ('\\mathbb{G}', '𝔾'),
366 |     ('\\mathbb{g}', '𝕘'),
367 |     ('\\mathbb{H}', 'ℍ'),
368 |     ('\\mathbb{h}', '𝕙'),
369 |     ('\\mathbb{I}', '𝕀'),
370 |     ('\\mathbb{i}', '𝕚'),
371 |     ('\\mathbb{J}', '𝕁'),
372 |     ('\\mathbb{j}', '𝕛'),
373 |     ('\\mathbb{K}', '𝕂'),
374 |     ('\\mathbb{k}', '𝕜'),
375 |     ('\\mathbb{L}', '𝕃'),
376 |     ('\\mathbb{l}', '𝕝'),
377 |     ('\\mathbb{M}', '𝕄'),
378 |     ('\\mathbb{m}', '𝕞'),
379 |     ('\\mathbb{N}', 'ℕ'),
380 |     ('\\mathbb{n}', '𝕟'),
381 |     ('\\mathbb{O}', '𝕆'),
382 |     ('\\mathbb{o}', '𝕠'),
383 |     ('\\mathbb{P}', 'ℙ'),
384 |     ('\\mathbb{p}', '𝕡'),
385 |     ('\\mathbb{Q}', 'ℚ'),
386 |     ('\\mathbb{q}', '𝕢'),
387 |     ('\\mathbb{R}', 'ℝ'),
388 |     ('\\mathbb{r}', '𝕣'),
389 |     ('\\mathbb{S}', '𝕊'),
390 |     ('\\mathbb{s}', '𝕤'),
391 |     ('\\mathbb{T}', '𝕋'),
392 |     ('\\mathbb{t}', '𝕥'),
393 |     ('\\mathbb{U}', '𝕌'),
394 |     ('\\mathbb{u}', '𝕦'),
395 |     ('\\mathbb{V}', '𝕍'),
396 |     ('\\mathbb{v}', '𝕧'),
397 |     ('\\mathbb{W}', '𝕎'),
398 |     ('\\mathbb{w}', '𝕨'),
399 |     ('\\mathbb{X}', '𝕏'),
400 |     ('\\mathbb{x}', '𝕩'),
401 |     ('\\mathbb{Y}', '𝕐'),
402 |     ('\\mathbb{y}', '𝕪'),
403 |     ('\\mathbb{Z}', 'ℤ'),
404 |     ('\\mathbb{z}', '𝕫'),
405 |     ('\\mathbf{A}', '𝐀'),
406 |     ('\\mathbf{a}', '𝐚'),
407 |     ('\\mathbf{B}', '𝐁'),
408 |     ('\\mathbf{b}', '𝐛'),
409 |     ('\\mathbf{C}', '𝐂'),
410 |     ('\\mathbf{c}', '𝐜'),
411 |     ('\\mathbf{D}', '𝐃'),
412 |     ('\\mathbf{d}', '𝐝'),
413 |     ('\\mathbf{E}', '𝐄'),
414 |     ('\\mathbf{e}', '𝐞'),
415 |     ('\\mathbf{F}', '𝐅'),
416 |     ('\\mathbf{f}', '𝐟'),
417 |     ('\\mathbf{G}', '𝐆'),
418 |     ('\\mathbf{g}', '𝐠'),
419 |     ('\\mathbf{H}', '𝐇'),
420 |     ('\\mathbf{h}', '𝐡'),
421 |     ('\\mathbf{I}', '𝐈'),
422 |     ('\\mathbf{i}', '𝐢'),
423 |     ('\\mathbf{J}', '𝐉'),
424 |     ('\\mathbf{j}', '𝐣'),
425 |     ('\\mathbf{K}', '𝐊'),
426 |     ('\\mathbf{k}', '𝐤'),
427 |     ('\\mathbf{L}', '𝐋'),
428 |     ('\\mathbf{l}', '𝐥'),
429 |     ('\\mathbf{M}', '𝐌'),
430 |     ('\\mathbf{m}', '𝐦'),
431 |     ('\\mathbf{N}', '𝐍'),
432 |     ('\\mathbf{n}', '𝐧'),
433 |     ('\\mathbf{O}', '𝐎'),
434 |     ('\\mathbf{o}', '𝐨'),
435 |     ('\\mathbf{P}', '𝐏'),
436 |     ('\\mathbf{p}', '𝐩'),
437 |     ('\\mathbf{Q}', '𝐐'),
438 |     ('\\mathbf{q}', '𝐪'),
439 |     ('\\mathbf{R}', '𝐑'),
440 |     ('\\mathbf{r}', '𝐫'),
441 |     ('\\mathbf{S}', '𝐒'),
442 |     ('\\mathbf{s}', '𝐬'),
443 |     ('\\mathbf{T}', '𝐓'),
444 |     ('\\mathbf{t}', '𝐭'),
445 |     ('\\mathbf{U}', '𝐔'),
446 |     ('\\mathbf{u}', '𝐮'),
447 |     ('\\mathbf{V}', '𝐕'),
448 |     ('\\mathbf{v}', '𝐯'),
449 |     ('\\mathbf{W}', '𝐖'),
450 |     ('\\mathbf{w}', '𝐰'),
451 |     ('\\mathbf{X}', '𝐗'),
452 |     ('\\mathbf{x}', '𝐱'),
453 |     ('\\mathbf{Y}', '𝐘'),
454 |     ('\\mathbf{y}', '𝐲'),
455 |     ('\\mathbf{Z}', '𝐙'),
456 |     ('\\mathbf{z}', '𝐳'),
457 |     ('\\mathcal B', 'ℬ'),
458 |     ('\\mathcal e', 'ℯ'),
459 |     ('\\mathcal E', 'ℰ'),
460 |     ('\\mathcal F', 'ℱ'),
461 |     ('\\mathcal g', 'ℊ'),
462 |     ('\\mathcal H', 'ℋ'),
463 |     ('\\mathcal I', 'ℐ'),
464 |     ('\\mathcal L', 'ℒ'),
465 |     ('\\mathcal{A}', '𝓐'),
466 |     ('\\mathcal{B}', '𝓑'),
467 |     ('\\mathcal{C}', '𝓒'),
468 |     ('\\mathcal{D}', '𝓓'),
469 |     ('\\mathcal{E}', '𝓔'),
470 |     ('\\mathcal{F}', '𝓕'),
471 |     ('\\mathcal{G}', '𝓖'),
472 |     ('\\mathcal{H}', '𝓗'),
473 |     ('\\mathcal{I}', '𝓘'),
474 |     ('\\mathcal{J}', '𝓙'),
475 |     ('\\mathcal{K}', '𝓚'),
476 |     ('\\mathcal{L}', '𝓛'),
477 |     ('\\mathcal{M}', '𝓜'),
478 |     ('\\mathcal{N}', '𝓝'),
479 |     ('\\mathcal{O}', '𝓞'),
480 |     ('\\mathcal{P}', '𝓟'),
481 |     ('\\mathcal{Q}', '𝓠'),
482 |     ('\\mathcal{R}', '𝓡'),
483 |     ('\\mathcal{S}', '𝓢'),
484 |     ('\\mathcal{T}', '𝓣'),
485 |     ('\\mathcal{U}', '𝓤'),
486 |     ('\\mathcal{V}', '𝓥'),
487 |     ('\\mathcal{W}', '𝓦'),
488 |     ('\\mathcal{X}', '𝓧'),
489 |     ('\\mathcal{Y}', '𝓨'),
490 |     ('\\mathcal{Z}', '𝓩'),
491 |     ('\\mathfrak C', 'ℭ'),
492 |     ('\\mathfrak H', 'ℌ'),
493 |     ('\\mathfrak Z', 'ℨ'),
494 |     ('\\mathfrak{A}', '𝔄'),
495 |     ('\\mathfrak{a}', '𝔞'),
496 |     ('\\mathfrak{B}', '𝔅'),
497 |     ('\\mathfrak{b}', '𝔟'),
498 |     ('\\mathfrak{c}', '𝔠'),
499 |     ('\\mathfrak{D}', '𝔇'),
500 |     ('\\mathfrak{d}', '𝔡'),
501 |     ('\\mathfrak{E}', '𝔈'),
502 |     ('\\mathfrak{e}', '𝔢'),
503 |     ('\\mathfrak{F}', '𝔉'),
504 |     ('\\mathfrak{f}', '𝔣'),
505 |     ('\\mathfrak{G}', '𝔊'),
506 |     ('\\mathfrak{g}', '𝔤'),
507 |     ('\\mathfrak{h}', '𝔥'),
508 |     ('\\mathfrak{i}', '𝔦'),
509 |     ('\\mathfrak{J}', '𝔍'),
510 |     ('\\mathfrak{j}', '𝔧'),
511 |     ('\\mathfrak{K}', '𝔎'),
512 |     ('\\mathfrak{k}', '𝔨'),
513 |     ('\\mathfrak{L}', '𝔏'),
514 |     ('\\mathfrak{l}', '𝔩'),
515 |     ('\\mathfrak{M}', '𝔐'),
516 |     ('\\mathfrak{m}', '𝔪'),
517 |     ('\\mathfrak{N}', '𝔑'),
518 |     ('\\mathfrak{n}', '𝔫'),
519 |     ('\\mathfrak{O}', '𝔒'),
520 |     ('\\mathfrak{o}', '𝔬'),
521 |     ('\\mathfrak{P}', '𝔓'),
522 |     ('\\mathfrak{p}', '𝔭'),
523 |     ('\\mathfrak{Q}', '𝔔'),
524 |     ('\\mathfrak{q}', '𝔮'),
525 |     ('\\mathfrak{r}', '𝔯'),
526 |     ('\\mathfrak{S}', '𝔖'),
527 |     ('\\mathfrak{s}', '𝔰'),
528 |     ('\\mathfrak{T}', '𝔗'),
529 |     ('\\mathfrak{t}', '𝔱'),
530 |     ('\\mathfrak{U}', '𝔘'),
531 |     ('\\mathfrak{u}', '𝔲'),
532 |     ('\\mathfrak{V}', '𝔙'),
533 |     ('\\mathfrak{v}', '𝔳'),
534 |     ('\\mathfrak{W}', '𝔚'),
535 |     ('\\mathfrak{w}', '𝔴'),
536 |     ('\\mathfrak{X}', '𝔛'),
537 |     ('\\mathfrak{x}', '𝔵'),
538 |     ('\\mathfrak{Y}', '𝔜'),
539 |     ('\\mathfrak{y}', '𝔶'),
540 |     ('\\mathfrak{z}', '𝔷'),
541 |     ('\\mathit{A}', '𝐴'),
542 |     ('\\mathit{a}', '𝑎'),
543 |     ('\\mathit{B}', '𝐵'),
544 |     ('\\mathit{b}', '𝑏'),
545 |     ('\\mathit{C}', '𝐶'),
546 |     ('\\mathit{c}', '𝑐'),
547 |     ('\\mathit{D}', '𝐷'),
548 |     ('\\mathit{d}', '𝑑'),
549 |     ('\\mathit{E}', '𝐸'),
550 |     ('\\mathit{e}', '𝑒'),
551 |     ('\\mathit{F}', '𝐹'),
552 |     ('\\mathit{f}', '𝑓'),
553 |     ('\\mathit{G}', '𝐺'),
554 |     ('\\mathit{g}', '𝑔'),
555 |     ('\\mathit{H}', '𝐻'),
556 |     ('\\mathit{h}', '𝘩'),
557 |     ('\\mathit{I}', '𝐼'),
558 |     ('\\mathit{i}', '𝑖'),
559 |     ('\\mathit{J}', '𝐽'),
560 |     ('\\mathit{j}', '𝑗'),
561 |     ('\\mathit{K}', '𝐾'),
562 |     ('\\mathit{k}', '𝑘'),
563 |     ('\\mathit{L}', '𝐿'),
564 |     ('\\mathit{l}', '𝑙'),
565 |     ('\\mathit{M}', '𝑀'),
566 |     ('\\mathit{m}', '𝑚'),
567 |     ('\\mathit{N}', '𝑁'),
568 |     ('\\mathit{n}', '𝑛'),
569 |     ('\\mathit{O}', '𝑂'),
570 |     ('\\mathit{o}', '𝑜'),
571 |     ('\\mathit{P}', '𝑃'),
572 |     ('\\mathit{p}', '𝑝'),
573 |     ('\\mathit{Q}', '𝑄'),
574 |     ('\\mathit{q}', '𝑞'),
575 |     ('\\mathit{R}', '𝑅'),
576 |     ('\\mathit{r}', '𝑟'),
577 |     ('\\mathit{S}', '𝑆'),
578 |     ('\\mathit{s}', '𝑠'),
579 |     ('\\mathit{T}', '𝑇'),
580 |     ('\\mathit{t}', '𝑡'),
581 |     ('\\mathit{U}', '𝑈'),
582 |     ('\\mathit{u}', '𝑢'),
583 |     ('\\mathit{V}', '𝑉'),
584 |     ('\\mathit{v}', '𝑣'),
585 |     ('\\mathit{W}', '𝑊'),
586 |     ('\\mathit{w}', '𝑤'),
587 |     ('\\mathit{X}', '𝑋'),
588 |     ('\\mathit{x}', '𝑥'),
589 |     ('\\mathit{Y}', '𝑌'),
590 |     ('\\mathit{y}', '𝑦'),
591 |     ('\\mathit{Z}', '𝑍'),
592 |     ('\\mathit{z}', '𝑧'),
593 |     ('\\mathring{\\mathrm A}', 'Å'),
594 |     ('\\mathrm K', 'K'),
595 |     ('\\mathrm{d}', 'ⅆ'),
596 |     ('\\mathrsfs B', 'ℬ'),
597 |     ('\\mathrsfs e', 'ℯ'),
598 |     ('\\mathrsfs E', 'ℰ'),
599 |     ('\\mathrsfs F', 'ℱ'),
600 |     ('\\mathrsfs H', 'ℋ'),
601 |     ('\\mathrsfs I', 'ℐ'),
602 |     ('\\mathrsfs L', 'ℒ'),
603 |     ('\\mathscr{A}', '𝒜'),
604 |     ('\\mathscr{C}', '𝒞'),
605 |     ('\\mathscr{D}', '𝒟'),
606 |     ('\\mathscr{G}', '𝒢'),
607 |     ('\\mathscr{J}', '𝒥'),
608 |     ('\\mathscr{K}', '𝒦'),
609 |     ('\\mathscr{M}', 'ℳ'),
610 |     ('\\mathscr{N}', '𝒩'),
611 |     ('\\mathscr{O}', '𝒪'),
612 |     ('\\mathscr{P}', '𝒫'),
613 |     ('\\mathscr{Q}', '𝒬'),
614 |     ('\\mathscr{R}', 'ℛ'),
615 |     ('\\mathscr{S}', '𝒮'),
616 |     ('\\mathscr{T}', '𝒯'),
617 |     ('\\mathscr{U}', '𝒰'),
618 |     ('\\mathscr{V}', '𝒱'),
619 |     ('\\mathscr{W}', '𝒲'),
620 |     ('\\mathscr{X}', '𝒳'),
621 |     ('\\mathscr{Y}', '𝒴'),
622 |     ('\\mathscr{Z}', '𝒵'),
623 |     ('\\measuredangle', '∡'),
624 |     ('\\mho', '℧'),
625 |     ('\\mid', '∣'),
626 |     ('\\models', '⊧'),
627 |     ('\\mp', '∓'),
628 |     ('\\mu', 'μ'),
629 |     ('\\multimap', '⊸'),
630 |     ('\\nabla', '∇'),
631 |     ('\\natural', '♮'),
632 |     ('\\ncong', '≇'),
633 |     ('\\ne', '≠'),
634 |     ('\\nearrow', '↗'),
635 |     ('\\neg', '¬'),
636 |     ('\\nexist', '∄'),
637 |     ('\\ngeq', '≱'),
638 |     ('\\ngtr', '≯'),
639 |     ('\\ni', '∋'),
640 |     ('\\nleftarrow', '↚'),
641 |     ('\\nLeftarrow', '⇍'),
642 |     ('\\nleftrightarrow', '↮'),
643 |     ('\\nLeftrightarrow', '⇎'),
644 |     ('\\nleq', '≰'),
645 |     ('\\nless', '≮'),
646 |     ('\\nmid', '∤'),
647 |     ('\\not\\approx', '≉'),
648 |     ('\\not\\asymp', '≭'),
649 |     ('\\not\\equiv', '≢'),
650 |     ('\\not\\exists', '∄'),
651 |     ('\\not\\gtrless', '≹'),
652 |     ('\\not\\gtrsim', '≵'),
653 |     ('\\not\\lessgtr', '≸'),
654 |     ('\\not\\lesssim', '≴'),
655 |     ('\\not\\preceq', '⋠'),
656 |     ('\\not\\simeq', '≄'),
657 |     ('\\not\\sqsubseteq', '⋢'),
658 |     ('\\not\\sqsupseteq', '⋣'),
659 |     ('\\not\\subset', '⊄'),
660 |     ('\\not\\succeq', '⋡'),
661 |     ('\\not\\supset', '⊅'),
662 |     ('\\not\\triangleleft', '⋪'),
663 |     ('\\not\\trianglelefteq', '⋬'),
664 |     ('\\not\\triangleright', '⋫'),
665 |     ('\\not\\trianglerighteq', '⋭'),
666 |     ('\\not\\vdash', '⊬'),
667 |     ('\\not\\vDash', '⊭'),
668 |     ('\\not\\Vdash', '⊮'),
669 |     ('\\not\\VDash', '⊯'),
670 |     ('\\notin', '∉'),
671 |     ('\\notni', '∌'),
672 |     ('\\nparallel', '∦'),
673 |     ('\\nprec', '⊀'),
674 |     ('\\nrightarrow', '↛'),
675 |     ('\\nRightarrow', '⇏'),
676 |     ('\\nsim', '≁'),
677 |     ('\\nsubseteq', '⊈'),
678 |     ('\\nsucc', '⊁'),
679 |     ('\\nsupseteq', '⊉'),
680 |     ('\\nu', 'ν'),
681 |     ('\\nwarrow', '↖'),
682 |     ('\\odot', '⊙'),
683 |     ('\\oiiint', '∰'),
684 |     ('\\oiint', '∯'),
685 |     ('\\oiintctrclockwise', '∳'),
686 |     ('\\oint', '∮'),
687 |     ('\\ointclockwise', '∲'),
688 |     ('\\Omega', 'Ω'),
689 |     ('\\omega', 'ω'),
690 |     ('\\ominus', '⊖'),
691 |     ('\\oplus', '⊕'),
692 |     ('\\oslash', '⊘'),
693 |     ('\\otimes', '⊗'),
694 |     ('\\overline{0}', '‾'),
695 |     ('\\parallel', '∥'),
696 |     ('\\partial', '∂'),
697 |     ('\\perp', '⟂'),
698 |     ('\\Phi', 'Φ'),
699 |     ('\\phi', 'φ'),
700 |     ('\\Pi', 'Π'),
701 |     ('\\pi', 'π'),
702 |     ('\\pitchfork', '⋔'),
703 |     ('\\pm', '±'),
704 |     ('\\pounds', '£'),
705 |     ('\\prec', '≺'),
706 |     ('\\preccurlyeq', '≼'),
707 |     ('\\preceq', '⪯'),
708 |     ('\\precnsim', '⋨'),
709 |     ('\\precsim', '≾'),
710 |     ('\\prime', '′'),
711 |     ('\\prod', '∏'),
712 |     ('\\Proportion', '∷'),
713 |     ('\\propto', '∝'),
714 |     ('\\Psi', 'Ψ'),
715 |     ('\\psi', 'ψ'),
716 |     ('\\Qoppa', 'Ϙ'),
717 |     ('\\qoppa', 'ϙ'),
718 |     ('\\quotedblbase', '„'),
719 |     ('\\quotesinglbase', '‚'),
720 |     ('\\rangle', '〉'),
721 |     ('\\rceil', '⌉'),
722 |     ('\\Re', 'ℜ'),
723 |     ('\\rfloor', '⌋'),
724 |     ('\\RHD', '‣'),
725 |     ('\\rho', 'ρ'),
726 |     ('\\rightarrow', '→'),
727 |     ('\\Rightarrow', '⇒'),
728 |     ('\\RightArrowBar', '⇥'),
729 |     ('\\rightarrowtail', '↣'),
730 |     ('\\rightarrowtriangle', '⇾'),
731 |     ('\\rightharpoondown', '⇁'),
732 |     ('\\rightharpoonup', '⇀'),
733 |     ('\\rightleftarrows', '⇄'),
734 |     ('\\rightleftharpoons', '⇌'),
735 |     ('\\rightrightarrows', '⇉'),
736 |     ('\\rightsquigarrow', '⇝'),
737 |     ('\\rightthreetimes', '⋌'),
738 |     ('\\risingdotseq', '≓'),
739 |     ('\\rrangle', '⟫'),
740 |     ('\\rrbracket', '〛'),
741 |     ('\\Rrightarrow', '⇛'),
742 |     ('\\Rsh', '↱'),
743 |     ('\\rtimes', '⋊'),
744 |     ('\\Sampi', 'Ϡ'),
745 |     ('\\sampi', 'ϡ'),
746 |     ('\\searrow', '↘'),
747 |     ('\\second', '″'),
748 |     ('\\setminus', '⧵'),
749 |     ('\\sharp', '♯'),
750 |     ('\\Sigma', 'Σ'),
751 |     ('\\sigma', 'σ'),
752 |     ('\\sim', '∼'),
753 |     ('\\simeq', '≃'),
754 |     ('\\smallsetminus', '∖'),
755 |     ('\\smile', '⌣'),
756 |     ('\\smiley', '☺'),
757 |     ('\\spadesuit', '♠'),
758 |     ('\\sphericalangle', '∢'),
759 |     ('\\sqbullet', '∍'),
760 |     ('\\sqcap', '⊓'),
761 |     ('\\sqcup', '⊔'),
762 |     ('\\sqrt[3]{}', '∛'),
763 |     ('\\sqrt[4]{}', '∜'),
764 |     ('\\sqrt{}', '√'),
765 |     ('\\sqsubset', '⊏'),
766 |     ('\\sqsubseteq', '⊑'),
767 |     ('\\sqsubsetneq', '⋤'),
768 |     ('\\sqsupset', '⊐'),
769 |     ('\\sqsupseteq', '⊒'),
770 |     ('\\sqsupsetneq', '⋥'),
771 |     ('\\square', '□'),
772 |     ('\\stackrel{=}{=}', '≣'),
773 |     ('\\stackrel{\\frown}{=}', '≘'),
774 |     ('\\stackrel{\\star}{=}', '≛'),
775 |     ('\\stackrel{\\text{\\tiny ?}}{=}', '≟'),
776 |     ('\\stackrel{\\text{\\tiny def}}{=}', '≝'),
777 |     ('\\stackrel{\\vee}{=}', '≚'),
778 |     ('\\stackrel{\\wedge}{=}', '≙'),
779 |     ('\\star', '∗'),
780 |     ('\\Stigma', 'Ϛ'),
781 |     ('\\stigma', 'ϛ'),
782 |     ('\\subset', '⊂'),
783 |     ('\\Subset', '⋐'),
784 |     ('\\subseteq', '⊆'),
785 |     ('\\subsetneq', '⊊'),
786 |     ('\\succ', '≻'),
787 |     ('\\succccurlyeq', '≽'),
788 |     ('\\succeq', '⪰'),
789 |     ('\\succnsim', '⋩'),
790 |     ('\\succsim', '≿'),
791 |     ('\\sum', '∑'),
792 |     ('\\supset', '⊃'),
793 |     ('\\Supset', '⋑'),
794 |     ('\\supseteq', '⊇'),
795 |     ('\\supsetneq', '⊋'),
796 |     ('\\swarrow', '↙'),
797 |     ('\\tau', 'τ'),
798 |     ('\\textasciimacron', '¯'),
799 |     ('\\textbardbl', '‖'),
800 |     ('\\textbrokenbar', '¦'),
801 |     ('\\textcent', '¢'),
802 |     ('\\textcurrency', '¤'),
803 |     ('\\textdiscount', '⁒'),
804 |     ('\\textestimated', '℮'),
805 |     ('\\textexclamdown', '¡'),
806 |     ('\\textinterrobang', '‽'),
807 |     ('\\textinterrobangdown', '⸘'),
808 |     ('\\textlquill', '⁅'),
809 |     ('\\textmu', 'µ'),
810 |     ('\\textordfeminine', 'ª'),
811 |     ('\\textordmasculine', 'º'),
812 |     ('\\textpertenthousand', '‱'),
813 |     ('\\textperthousand', '‰'),
814 |     ('\\textquestiondown', '¿'),
815 |     ('\\textquotedblleft', '“'),
816 |     ('\\textquotedblright', '”'),
817 |     ('\\textquoteleft', '‘'),
818 |     ('\\textquoteright', '’'),
819 |     ('\\textreferencemark', '※'),
820 |     ('\\textsuperscript{\\textregistered}', '®'),
821 |     ('\\textrquill', '⁆'),
822 |     ('\\textsuperscript{1}', '¹'),
823 |     ('\\textsuperscript{2}', '²'),
824 |     ('\\textsuperscript{3}', '³'),
825 |     ('\\textsuperscript{o}', '°'),
826 |     ('\\texttrademark', '™'),
827 |     ('\\textyen', '¥'),
828 |     ('\\therefore', '∴'),
829 |     ('\\Theta', 'Θ'),
830 |     ('\\theta', 'θ'),
831 |     ('\\third', '‴'),
832 |     ('\\times', '×'),
833 |     ('\\top', '⊤'),
834 |     ('\\triangle', '△'),
835 |     ('\\triangleleft', '⊲'),
836 |     ('\\triangleq', '≜'),
837 |     ('\\triangleright', '⊳'),
838 |     ('\\twoheadleftarrow', '↞'),
839 |     ('\\twoheadrightarrow', '↠'),
840 |     ('\\underline{\\phantom{x}}', '̲'),
841 |     ('\\unlhd', '⊴'),
842 |     ('\\unrhd', '⊵'),
843 |     ('\\uparrow', '↑'),
844 |     ('\\Uparrow', '⇑'),
845 |     ('\\updownarrow', '↕'),
846 |     ('\\Updownarrow', '⇕'),
847 |     ('\\updownarrows', '⇅'),
848 |     ('\\upharpoonleft', '↿'),
849 |     ('\\upharpoonright', '↾'),
850 |     ('\\uplus', '⊎'),
851 |     ('\\Upsilon', 'Υ'),
852 |     ('\\upsilon', 'υ'),
853 |     ('\\upuparrows', '⇈'),
854 |     ('\\varepsilon', 'ε'),
855 |     ('\\varkappa', 'ϰ'),
856 |     ('\\varnothing', '∅'),
857 |     ('\\varphi', '𝜑'),
858 |     ('\\varpi', '𝜛'),
859 |     ('\\varrho', '𝜚'),
860 |     ('\\varsigma', 'ς'),
861 |     ('\\vartheta', '𝜗'),
862 |     ('\\vdash', '⊢'),
863 |     ('\\Vdash', '⊩'),
864 |     ('\\VDash', '⊫'),
865 |     ('\\vdots', '⋮'),
866 |     ('\\vee', '∨'),
867 |     ('\\veebar', '⊻'),
868 |     ('\\Vvdash', '⊪'),
869 |     ('\\wedge', '∧'),
870 |     ('\\wp', '℘'),
871 |     ('\\wr', '≀'),
872 |     ('\\Xi', 'Ξ'),
873 |     ('\\xi', 'ξ'),
874 |     ('\\xspace', ' '),
875 |     ('\\Yup', '⅄'),
876 |     ('\\zeta', 'ζ')
877 | ]
878 | 
879 | # Add conflicting tokens
880 | for _ in [
881 |     ('\\textregistered', '®'),
882 | ]:
883 |     REPLACE_TEX_COMMANDS_LIBRARY.append(_)
884 | 
885 | # Add final token for text
886 | REPLACE_TEX_COMMANDS_LIBRARY.append(('\\text', ''))
887 | 


--------------------------------------------------------------------------------
/pydetex/_gui_settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | GUI SETTINGS
  6 | Provides settings for the gui.
  7 | """
  8 | 
  9 | __all__ = ['Settings']
 10 | 
 11 | import os
 12 | 
 13 | from typing import Callable, Tuple, Dict, Any, List, Type, Union
 14 | from warnings import warn
 15 | 
 16 | import datetime
 17 | import pydetex.pipelines as pip
 18 | import pydetex.utils as ut
 19 | import pydetex.version as ver
 20 | 
 21 | from pydetex import __author__
 22 | 
 23 | _SETTINGS_FILE = [os.path.join(ut.get_local_path(), '.pydetex.cfg')]
 24 | _SETTINGS_TEST = os.path.join(ut.RESOURCES_PATH, '.pydetex.cfg')
 25 | 
 26 | # Store the pipelines
 27 | _PIPELINES = {
 28 |     'pipeline_simple': pip.simple,
 29 |     'pipeline_strict': pip.strict,
 30 |     'pipeline_strict_eqn': pip.strict_eqn
 31 | }
 32 | 
 33 | # Store the window sizes (w, h, height_richtext, margin_between_richtext, button_margin)
 34 | _WINDOW_SIZE = {
 35 |     'window_size_small': [720, 480, 175, 3, 6],
 36 |     'window_size_medium': [960, 540, 200, 5, 10],
 37 |     'window_size_large': [1280, 720, 285, 5, 15],
 38 |     'window_size_xlarge': [1440, 850, 343, 10, 19]
 39 | }
 40 | 
 41 | 
 42 | class _LangManager(object):
 43 |     """
 44 |     Stores language.
 45 |     """
 46 | 
 47 |     def __init__(self) -> None:
 48 |         """
 49 |         Constructor.
 50 |         """
 51 |         self._lang = {
 52 |             'en': {
 53 |                 'about': 'About',
 54 |                 'about_author': 'Author',
 55 |                 'about_opened': 'Total app openings',
 56 |                 'about_processed': 'Total processed words',
 57 |                 'about_ver_dev': 'Development version',
 58 |                 'about_ver_err_conn': 'Cannot check for new versions (Connection Error)',
 59 |                 'about_ver_err_unkn': 'Cannot check for new versions (Unknown Error)',
 60 |                 'about_ver_latest': 'Software version up-to-date',
 61 |                 'about_ver_upgrade': 'Note: You are using an outdated version, consider upgrading to v{0}',
 62 |                 'cfg_check': 'Check',
 63 |                 'cfg_error_auto_copy': 'Invalid auto copy process value',
 64 |                 'cfg_error_font_size': 'Invalid font size value',
 65 |                 'cfg_error_lang': 'Invalid lang value',
 66 |                 'cfg_error_output_format': 'Invalid output font format value',
 67 |                 'cfg_error_pipeline': 'Invalid pipeline value',
 68 |                 'cfg_error_pipeline_compress_cite': 'Invalid compress \\cite value',
 69 |                 'cfg_error_pipeline_replace_defs': 'Invalid replace \\def value',
 70 |                 'cfg_error_repetition': 'Invalid repetition value',
 71 |                 'cfg_error_repetition_chars': 'Repetition min chars must be greater than zero',
 72 |                 'cfg_error_repetition_distance': 'Repetition distance must be greater than 2 and lower than 50',
 73 |                 'cfg_error_repetition_words': 'Invalid ignore words',
 74 |                 'cfg_error_show_line_numbers': 'Invalid show line numbers value',
 75 |                 'cfg_error_stemming': 'Invalid repetition stemming value',
 76 |                 'cfg_error_stopwords': 'Invalid repetition stopwords value',
 77 |                 'cfg_error_window_size': 'Invalid window size value',
 78 |                 'cfg_font_format': 'Output font format',
 79 |                 'cfg_font_size': 'Font size',
 80 |                 'cfg_lang': 'Language',
 81 |                 'cfg_pipeline': 'Pipeline',
 82 |                 'cfg_pipeline_compress_cite': 'Compress \\cite',
 83 |                 'cfg_pipeline_replace_defs': 'Replace \\def',
 84 |                 'cfg_process_auto_copy': 'Auto-copy after process',
 85 |                 'cfg_save': 'Save',
 86 |                 'cfg_show_line_numbers': 'Show line numbers',
 87 |                 'cfg_tab_pipeline': 'Pipeline',
 88 |                 'cfg_tab_ui': 'UI',
 89 |                 'cfg_window_size': 'Window size',
 90 |                 'cfg_words_repetition': 'Words repetition',
 91 |                 'cfg_words_repetition_distance': 'Repetition distance',
 92 |                 'cfg_words_repetition_ignorew': 'Ignored words',
 93 |                 'cfg_words_repetition_minchars': 'Repetition min chars',
 94 |                 'cfg_words_repetition_stemming': 'Use stemming',
 95 |                 'cfg_words_repetition_stopwords': 'Use stopwords',
 96 |                 'clear': 'Clear',
 97 |                 'clip_empty': 'Clipboard is empty',
 98 |                 'copy_from_clip': 'Copying from clipboard',
 99 |                 'detected_lang': 'Detected language: {0} ({1})',
100 |                 'detected_lang_write': 'Write something to recognize the language',
101 |                 'dictionary': 'Dictionary',
102 |                 'dictionary_antonym': 'Antonym',
103 |                 'dictionary_loading': 'Loading ...',
104 |                 'dictionary_meaning': 'Meaning',
105 |                 'dictionary_no_results': 'No results',
106 |                 'dictionary_querying': 'Querying dictionary',
107 |                 'dictionary_synonym': 'Synonym',
108 |                 'dictionary_timeout': 'Error: Query timeout exceeded limit',
109 |                 'dictionary_translation': 'Translation',
110 |                 'dictionary_wikipedia': 'Wikipedia',
111 |                 'dictionary_word': 'Word',
112 |                 'format_d': ',',
113 |                 'lang': 'English',
114 |                 'menu_copy': 'Copy',
115 |                 'menu_cut': 'Cut',
116 |                 'menu_paste': 'Paste',
117 |                 'open_file': 'Open file',
118 |                 'open_file_latex_file': 'LaTeX file',
119 |                 'open_file_select': 'Select a LaTeX file',
120 |                 'pipeline_simple': 'Simple',
121 |                 'pipeline_simple_description': 'Removes common Tex commands, replaces cites and references',
122 |                 'pipeline_strict': 'Strict',
123 |                 'pipeline_strict_description': 'An extension of the simple pipeline which removes all Tex commands and environments',
124 |                 'pipeline_strict_eqn': 'Strict + equation',
125 |                 'pipeline_strict_eqn_description': 'An extension of the strict pipeline which converts also the equations',
126 |                 'placeholder': ut.open_file(ut.RESOURCES_PATH + 'placeholder_en.tex'),
127 |                 'process': 'Process',
128 |                 'process_clip': 'Process from clipboard',
129 |                 'process_copy': 'Copy to clipboard',
130 |                 'process_error': 'An error has occured while processing the text.\nPlease create a new issue in the GitHub '
131 |                                  'page ({0}) with full defails and minimal working example.\n\nError traceback:\n{1}\n',
132 |                 'reload_message_message': 'To apply these changes, the app must be reloaded',
133 |                 'reload_message_title': 'Reload is required',
134 |                 'settings': 'Settings',
135 |                 'status_copy_to_clip': 'Copying to clip',
136 |                 'status_cursor': 'Cursor: {0}:{1}',
137 |                 'status_cursor_input_focusout': 'Input not selected',
138 |                 'status_cursor_input_focusout_min': 'Not selected',
139 |                 'status_cursor_input_focusout_min2': 'No sel',
140 |                 'status_cursor_min': 'Cur: {0}:{1}',
141 |                 'status_cursor_null': 'Cursor: Empty',
142 |                 'status_cursor_selected': 'Selected',
143 |                 'status_cursor_selected_all': 'Selected: all',
144 |                 'status_cursor_selected_chars': '{0} chars',
145 |                 'status_cursor_selected_chars_min': '{0} ch',
146 |                 'status_cursor_selected_chars_single': '1 char',
147 |                 'status_cursor_selected_min': 'Sel',
148 |                 'status_idle': 'Idle',
149 |                 'status_processing': 'Processing',
150 |                 'status_requesting_file': 'Requesting file',
151 |                 'status_words': 'Words: {0}',
152 |                 'status_writing': 'Writing',
153 |                 'tag_repeated': 'repeated',
154 |                 'version_upgrade': 'You are using an outdated PyDetex version, consider upgrading to v{0}.\n\nTo update, '
155 |                                    'run "pip install --upgrade pydetex" in your terminal',
156 |                 'version_upgrade_title': 'Oudated PyDetex version',
157 |                 'window_size_large': 'Large',
158 |                 'window_size_medium': 'Medium',
159 |                 'window_size_small': 'Small',
160 |                 'window_size_xlarge': 'Extra Large'
161 |             },
162 |             'es': {
163 |                 'about': 'Acerca de',
164 |                 'about_author': 'Autor',
165 |                 'about_opened': 'Nº ejecuciones app',
166 |                 'about_processed': 'Nº palabras procesadas',
167 |                 'about_ver_dev': 'Versión de desarrollo',
168 |                 'about_ver_err_conn': 'No se pudo verificar nuevas versiones (Error de Conexión)',
169 |                 'about_ver_err_unkn': 'No se pudo verificar nuevas versiones (Error desconocido)',
170 |                 'about_ver_latest': 'Software actualizado a la última versión',
171 |                 'about_ver_upgrade': 'Nota: Estás usando una versión desactualizada, considera actualizar a la v{0}',
172 |                 'cfg_check': 'Activar',
173 |                 'cfg_error_auto_copy': 'Valor auto copiado al procesar incorrecto',
174 |                 'cfg_error_font_size': 'Tamaño fuente incorrecta',
175 |                 'cfg_error_lang': 'Valor idioma incorrecto',
176 |                 'cfg_error_output_format': 'Valor formato output incorrecto',
177 |                 'cfg_error_pipeline': 'Valor pipeline incorrecto',
178 |                 'cfg_error_pipeline_compress_cite': 'Valor compresión \\cite incorrecto',
179 |                 'cfg_error_pipeline_replace_defs': 'Valor reemplazo \\def incorrecto',
180 |                 'cfg_error_repetition': 'Valor repetición incorrecto',
181 |                 'cfg_error_repetition_chars': 'Caracter mínimo de repetición debe ser mayor a cero',
182 |                 'cfg_error_repetition_distance': 'Distancia de repetición debe ser superior o igual a 2, y menor que 50',
183 |                 'cfg_error_repetition_words': 'Repetición palabras incorrectas',
184 |                 'cfg_error_show_line_numbers': 'Valor mostrar número de líneas incorrecto',
185 |                 'cfg_error_stemming': 'Valor stemming incorrecto',
186 |                 'cfg_error_stopwords': 'Valor stopwords incorrecto',
187 |                 'cfg_error_window_size': 'Tamaño ventana incorrecto',
188 |                 'cfg_font_format': 'Formatear fuentes',
189 |                 'cfg_font_size': 'Tamaño de la fuente',
190 |                 'cfg_lang': 'Idioma',
191 |                 'cfg_pipeline': 'Pipeline',
192 |                 'cfg_pipeline_compress_cite': 'Comprimir \\cite',
193 |                 'cfg_pipeline_replace_defs': 'Reemplazar \\def',
194 |                 'cfg_process_auto_copy': 'Auto-copiado al procesar',
195 |                 'cfg_save': 'Guardar',
196 |                 'cfg_show_line_numbers': 'Mostrar nº líneas',
197 |                 'cfg_tab_pipeline': 'Pipeline',
198 |                 'cfg_tab_ui': 'UI',
199 |                 'cfg_window_size': 'Tamaño de ventana',
200 |                 'cfg_words_repetition': 'Rep. palabras',
201 |                 'cfg_words_repetition_distance': 'Distancia de repetición',
202 |                 'cfg_words_repetition_ignorew': 'Palabras ignoradas',
203 |                 'cfg_words_repetition_minchars': 'Mínimo de carácteres',
204 |                 'cfg_words_repetition_stemming': 'Usar stemming',
205 |                 'cfg_words_repetition_stopwords': 'Usar stopwords',
206 |                 'clear': 'Limpiar',
207 |                 'clip_empty': 'Portapapeles vacío',
208 |                 'copy_from_clip': 'Copiando desde portapapeles',
209 |                 'detected_lang': 'Idioma detectado: {0} ({1})',
210 |                 'detected_lang_write': 'Escribe algo para detectar el idioma',
211 |                 'dictionary': 'Diccionario',
212 |                 'dictionary_antonym': 'Antónimos',
213 |                 'dictionary_loading': 'Cargando ...',
214 |                 'dictionary_meaning': 'Definición',
215 |                 'dictionary_no_results': 'Sin resultados',
216 |                 'dictionary_querying': 'Consultando diccionario',
217 |                 'dictionary_synonym': 'Sinónimos',
218 |                 'dictionary_timeout': 'Error: El tiempo de consulta excedió el límite',
219 |                 'dictionary_translation': 'Traducción',
220 |                 'dictionary_wikipedia': 'Wikipedia',
221 |                 'dictionary_word': 'Word',
222 |                 'format_d': '.',
223 |                 'lang': 'Español',
224 |                 'menu_copy': 'Copiar',
225 |                 'menu_cut': 'Cortar',
226 |                 'menu_paste': 'Pegar',
227 |                 'open_file': 'Abrir archivo',
228 |                 'open_file_latex_file': 'Archivo LaTeX',
229 |                 'open_file_select': 'Selecciona un archivo LaTeX',
230 |                 'pipeline_simple': 'Simple',
231 |                 'pipeline_simple_description': 'Elimina comandos Tex comunes, remplaza citas y referencias',
232 |                 'pipeline_strict': 'Estricto',
233 |                 'pipeline_strict_description': 'Una extensión del pipeline simple que elimina todos los entornos y comandos',
234 |                 'pipeline_strict_eqn': 'Estricto + ecuación',
235 |                 'pipeline_strict_eqn_description': 'Pipeline estricto que además reemplaza las ecuaciones',
236 |                 'placeholder': ut.open_file(ut.RESOURCES_PATH + 'placeholder_es.tex'),
237 |                 'process': 'Procesar',
238 |                 'process_clip': 'Procesar desde portapapeles',
239 |                 'process_copy': 'Copiar al portapapeles',
240 |                 'process_error': 'Un error ha ocurrido mientras se procesaba el texto.\nPor favor crea un nuevo issue en'
241 |                                  ' la página de GitHub ({0}) con los detalles completos y un ejemplo mínimo para probar'
242 |                                  ' las soluciones.\n\nDetalles del error:\n{1}\n',
243 |                 'reload_message_message': 'Para aplicar estos cambios, la aplicación se debe reiniciar',
244 |                 'reload_message_title': 'Se requiere de un reinicio',
245 |                 'settings': 'Configuraciones',
246 |                 'status_copy_to_clip': 'Copiando al portapapeles',
247 |                 'status_cursor': 'Cursor: {0}:{1}',
248 |                 'status_cursor_input_focusout': 'Texto entrada no seleccionado',
249 |                 'status_cursor_input_focusout_min': 'No seleccionado',
250 |                 'status_cursor_input_focusout_min2': 'No sel',
251 |                 'status_cursor_min': 'Cur: {0}:{1}',
252 |                 'status_cursor_null': 'Cursor: Vacío',
253 |                 'status_cursor_selected': 'Selección',
254 |                 'status_cursor_selected_all': 'Selección: todo',
255 |                 'status_cursor_selected_chars': '{0} carácteres',
256 |                 'status_cursor_selected_chars_min': '{0} crs.',
257 |                 'status_cursor_selected_chars_single': '1 caracter',
258 |                 'status_cursor_selected_min': 'Sel',
259 |                 'status_idle': 'Esperando',
260 |                 'status_processing': 'Procesando',
261 |                 'status_requesting_file': 'Esperando archivo',
262 |                 'status_words': 'Palabras: {0}',
263 |                 'status_writing': 'Escribiendo',
264 |                 'tag_repeated': 'repetido',
265 |                 'version_upgrade': 'Estás usando una versión desactualizada de PyDetex, considera actualizar a v{0}.'
266 |                                    '\n\nPara esto, ejecuta "pip install --upgrade pydetex" en tu terminal',
267 |                 'version_upgrade_title': 'Versión desactualizada de PyDetex',
268 |                 'window_size_large': 'Grande',
269 |                 'window_size_medium': 'Mediano',
270 |                 'window_size_small': 'Pequeño',
271 |                 'window_size_xlarge': 'Extra Grande'
272 |             }
273 |         }
274 | 
275 |         # Extend languages if not defined
276 |         ut.complete_langs_dict(self._lang)
277 | 
278 |         # Update window sizes
279 |         for la in self._lang.keys():
280 |             for tok in self._lang[la].keys():
281 |                 if tok in _WINDOW_SIZE.keys():
282 |                     self._lang[la][tok] += f' ({_WINDOW_SIZE[tok][0]}x{_WINDOW_SIZE[tok][1]})'
283 | 
284 |     def get_available(self) -> List[str]:
285 |         """
286 |         Return the available langs.
287 | 
288 |         :return: Lang list
289 |         """
290 |         return list(self._lang.keys())
291 | 
292 |     def get(self, lang: str, tag: str) -> str:
293 |         """
294 |         Returns a lang entry.
295 | 
296 |         :param lang: Language code
297 |         :param tag: Language tag
298 |         :return: Language value
299 |         """
300 |         return self._lang[lang][tag]
301 | 
302 | 
303 | class Settings(object):
304 |     """
305 |     Settings.
306 |     """
307 | 
308 |     _available_pipelines: List[str]
309 |     _default_settings: Dict[str, Tuple[Any, Type, Union[List[Any], Callable[[Any], bool]]]]
310 |     _lang: '_LangManager'
311 |     _last_opened_day_diff: int
312 |     _settings: Dict[str, Any]
313 |     _valid_font_sizes: List[int]
314 |     _valid_window_sizes: List[str]
315 | 
316 |     def __init__(self, ignore_file: bool = False) -> None:
317 |         """
318 |         Constructor.
319 | 
320 |         :param ignore_file: If True, the settings file is ignored
321 |         """
322 |         load = []
323 | 
324 |         def _load_file() -> List[str]:
325 |             """
326 |             Loads the setting file.
327 |             """
328 |             _load = []
329 |             try:
330 |                 _f = open(_SETTINGS_FILE[0])
331 |                 _load = _f.readlines()
332 |                 _f.close()
333 |             except FileNotFoundError:
334 |                 _error = f'Setting file {_SETTINGS_FILE[0]} could not be loaded or not exist. Creating new file'
335 |                 warn(_error)
336 |             return _load
337 | 
338 |         if not ignore_file:
339 |             try:
340 |                 load = _load_file()
341 |             except PermissionError:
342 |                 error = f'Settings file {_SETTINGS_FILE[0]} could not be opened (PermissionError)'
343 |                 warn(error)
344 |         else:
345 |             _SETTINGS_FILE[0] = _SETTINGS_TEST
346 | 
347 |         # Creates the lang manager
348 |         self._lang = _LangManager()
349 | 
350 |         # General settings
351 |         self.CFG_CHECK_REPETITION = 'CHECK_REPETITION'
352 |         self.CFG_FONT_SIZE = 'FONT_SIZE'
353 |         self.CFG_LANG = 'LANG'
354 |         self.CFG_LAST_OPENED_DAY = 'LAST_OPENED_DAY'
355 |         self.CFG_LAST_OPENED_FOLDER = 'LAST_OPENED_FOLDER'
356 |         self.CFG_OUTPUT_FONT_FORMAT = 'OUTPUT_FONT_FORMAT'
357 |         self.CFG_PIPELINE = 'PIPELINE'
358 |         self.CFG_PIPELINE_COMPRESS_CITE = 'PIPELINE_COMPRESS_CITE'
359 |         self.CFG_PIPELINE_REPLACE_DEFS = 'PIPELINE_REPLACE_DEFS'
360 |         self.CFG_PROCESS_AUTO_COPY = 'PROCESS_AUTO_COPY'
361 |         self.CFG_SHOW_LINE_NUMBERS = 'SHOW_LINE_NUMBERS'
362 |         self.CFG_WINDOW_SIZE = 'WINDOW_SIZE'
363 | 
364 |         # Words repetition
365 |         self.CFG_REPETITION_DISTANCE = 'REPETITION_DISTANCE'
366 |         self.CFG_REPETITION_IGNORE_WORDS = 'REPETITION_IGNORE_WORDS'
367 |         self.CFG_REPETITION_MIN_CHAR = 'REPETITION_MIN_CHAR'
368 |         self.CFG_REPETITION_USE_STEMMING = 'REPETITION_USE_STEMMING'
369 |         self.CFG_REPETITION_USE_STOPWORDS = 'REPETITION_USE_STOPWORDS'
370 | 
371 |         # Stats
372 |         self.CFG_TOTAL_OPENED_APP = 'TOTAL_OPENED_APP'
373 |         self.CFG_TOTAL_PROCESSED_WORDS = 'TOTAL_PROCESSED_WORDS'
374 | 
375 |         # Stores default settings and the valid values
376 |         self._available_pipelines = list(_PIPELINES.keys())
377 |         self._valid_font_sizes = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
378 |         self._valid_window_sizes = list(_WINDOW_SIZE.keys())
379 | 
380 |         self._default_settings = {
381 |             self.CFG_CHECK_REPETITION: (False, bool, [True, False]),
382 |             self.CFG_FONT_SIZE: (11 if ut.IS_OSX else 10, int, self._valid_font_sizes),
383 |             self.CFG_LANG: ('en', str, self._lang.get_available()),
384 |             self.CFG_LAST_OPENED_DAY: (ut.get_number_of_day(), int, lambda x: x >= 0),
385 |             self.CFG_LAST_OPENED_FOLDER: ('/', str, lambda x: os.path.isdir(x)),
386 |             self.CFG_OUTPUT_FONT_FORMAT: (True, bool, [True, False]),
387 |             self.CFG_PIPELINE: (self._available_pipelines[1], str, self._available_pipelines),
388 |             self.CFG_PIPELINE_COMPRESS_CITE: (True, bool, [True, False]),
389 |             self.CFG_PIPELINE_REPLACE_DEFS: (False, bool, [True, False]),
390 |             self.CFG_PROCESS_AUTO_COPY: (False, bool, [True, False]),
391 |             self.CFG_REPETITION_DISTANCE: (15, int, lambda x: 50 > x > 1),
392 |             self.CFG_REPETITION_IGNORE_WORDS: ('ignored_word_1, ignored_word_2', str, None),
393 |             self.CFG_REPETITION_MIN_CHAR: (4, int, lambda x: x > 0),
394 |             self.CFG_REPETITION_USE_STEMMING: (True, bool, [True, False]),
395 |             self.CFG_REPETITION_USE_STOPWORDS: (True, bool, [True, False]),
396 |             self.CFG_SHOW_LINE_NUMBERS: (True, bool, [True, False]),
397 |             self.CFG_TOTAL_OPENED_APP: (0, int, lambda x: x >= 0),
398 |             self.CFG_TOTAL_PROCESSED_WORDS: (0, int, lambda x: x >= 0),
399 |             self.CFG_WINDOW_SIZE: (self._valid_window_sizes[1], str, self._valid_window_sizes)
400 |         }
401 | 
402 |         # The valid settings
403 |         self._settings = {}
404 |         for k in self._default_settings.keys():
405 |             self._settings[k] = self._default_settings[k][0]
406 | 
407 |         # Load the user settings
408 |         for f in load:
409 |             if '#' in f:
410 |                 continue
411 |             if '=' in f:  # If string has control character
412 |                 sp = f.split('=')
413 |                 if len(sp) != 2:
414 |                     continue
415 |                 j = sp[0].strip()
416 |                 val = sp[1].strip()
417 |                 if self.check_setting(j, val):
418 |                     self._settings[j] = self._parse_str(val)  # Update setting value
419 | 
420 |         # Update the value
421 |         today = ut.get_number_of_day()
422 |         self._last_opened_day_diff = abs(ut.get_number_of_day() - self.get(self.CFG_LAST_OPENED_DAY))
423 |         self.set(self.CFG_LAST_OPENED_DAY, today)
424 |         self.set(self.CFG_TOTAL_OPENED_APP, self.get(self.CFG_TOTAL_OPENED_APP) + 1)
425 | 
426 |         # Save the settings
427 |         self.save()
428 | 
429 |     @staticmethod
430 |     def _parse_str(value: Any) -> Any:
431 |         """
432 |         Parse common string values.
433 | 
434 |         :param value: Value
435 |         :return: Parsed value
436 |         """
437 |         if isinstance(value, str):
438 |             if value == 'True':
439 |                 value = True
440 |             elif value == 'False':
441 |                 value = False
442 |             elif value.replace('.', '').replace('-', '').replace('+', '').isdigit():
443 |                 try:
444 |                     old_val = value
445 |                     value = float(value)
446 |                     if '.' not in old_val and int(value) == value:
447 |                         value = int(value)
448 |                 except ValueError:
449 |                     pass
450 |             else:
451 |                 value = value.strip()
452 |         return value
453 | 
454 |     def check_setting(self, key: str, value: Any) -> bool:
455 |         """
456 |         Check if a setting is valid.
457 | 
458 |         :param key: Key setting
459 |         :param value: Value
460 |         :return: True if valid
461 |         """
462 |         # Apply custom values
463 |         if isinstance(value, str):
464 |             value = self._parse_str(value)
465 | 
466 |         # Checks
467 |         if key in self._default_settings.keys():
468 |             val_type = self._default_settings[key][1]
469 |             val_valids = self._default_settings[key][2]
470 |             # Check val type
471 |             if not isinstance(value, val_type):
472 |                 error = f'Setting {key} should be type {val_type}, but received {type(value)}'
473 |                 warn(error)
474 |                 return False
475 |             if isinstance(val_valids, list):
476 |                 if value in val_valids:  # Setting is within valid ones
477 |                     return True
478 |                 else:
479 |                     str_valids = []
480 |                     for t in val_valids:
481 |                         str_valids.append(str(t))
482 |                     error = f'Setting {key} value should have these values: {",".join(str_valids)}'
483 |                     warn(error)
484 |             elif val_valids is None:
485 |                 return True
486 |             else:  # Is a function
487 |                 if not val_valids(value):
488 |                     error = f'Setting {key} do not pass valid test'
489 |                     warn(error)
490 |                 else:
491 |                     return True
492 |         else:
493 |             error = f'Setting {key} does not exist'
494 |             warn(error)
495 |         return False
496 | 
497 |     def get(self, key: str, update: bool = True) -> Any:
498 |         """
499 |         Return the settings value.
500 | 
501 |         :param key: Setting key
502 |         :param update: Updates settings value
503 |         :return: Value
504 |         """
505 |         val = self._settings[key]
506 | 
507 |         # Update for some values
508 |         if update:
509 |             if key == self.CFG_PIPELINE:
510 |                 val = _PIPELINES[val]
511 |             elif key == self.CFG_WINDOW_SIZE:
512 |                 val = _WINDOW_SIZE[val]
513 | 
514 |         return val
515 | 
516 |     def set(self, key: str, value: Any) -> None:
517 |         """
518 |         Update a setting value.
519 | 
520 |         :param key: Setting key
521 |         :param value: Value
522 |         """
523 |         if not self.check_setting(key, value):
524 |             raise ValueError(f'Invalid value for {key}')
525 |         self._settings[key] = self._parse_str(value)
526 | 
527 |     def lang(self, tag: str) -> str:
528 |         """
529 |         Get a lang tag.
530 | 
531 |         :param tag: Lang's tag
532 |         :return: Lang value
533 |         """
534 |         return self._lang.get(self.get(self.CFG_LANG), tag)
535 | 
536 |     def add_words(self, w: int) -> None:
537 |         """
538 |         Add processed words.
539 | 
540 |         :param w: Words
541 |         """
542 |         self._settings[self.CFG_TOTAL_PROCESSED_WORDS] += w
543 |         self.save()
544 | 
545 |     def save(self) -> None:
546 |         """
547 |         Save the settings to the file.
548 |         """
549 |         try:
550 |             f = open(_SETTINGS_FILE[0], 'w')
551 |             keys = list(self._settings.keys())
552 |             keys.sort()
553 |             f.write(f'# PyDetex v{ver.vernum} @ {__author__}\n')
554 |             f.write(f'# Settings stored on {datetime.datetime.today().ctime()}\n')
555 |             for k in keys:
556 |                 f.write(f'{k} = {str(self._settings[k]).strip()}\n')
557 |             f.close()
558 |         except PermissionError:
559 |             error = f'Settings file {_SETTINGS_FILE[0]} could not saved (PermissionError)'
560 |             warn(error)
561 | 


--------------------------------------------------------------------------------
/pydetex/_utils_tex.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | UTILS TEX
  6 | Latex utils.
  7 | """
  8 | 
  9 | __all__ = [
 10 |     'apply_tag_between_inside_char_command',
 11 |     'apply_tag_tex_commands',
 12 |     'apply_tag_tex_commands_no_argv',
 13 |     'find_tex_command_char',
 14 |     'find_tex_commands',
 15 |     'find_tex_commands_noargv',
 16 |     'find_tex_environments',
 17 |     'get_tex_commands_args',
 18 |     'TEX_COMMAND_CHARS',
 19 |     'TEX_EQUATION_CHARS',
 20 |     'tex_to_unicode'
 21 | ]
 22 | 
 23 | import flatlatex
 24 | import os
 25 | import re
 26 | 
 27 | from flatlatex.parser import LatexSyntaxError
 28 | from typing import Tuple, Union, List, Dict, Optional, Any
 29 | 
 30 | # Flat latex object
 31 | _FLATLATEX = flatlatex.converter(ignore_newlines=False, keep_spaces=True)
 32 | 
 33 | # Tex to unicode
 34 | _TEX_TO_UNICODE: Dict[str, Union[Dict[Any, str], List[Tuple[str, str]]]] = {
 35 |     'latex_symbols': [],
 36 |     'subscripts': {},
 37 |     'superscripts': {},
 38 |     'textbb': {},
 39 |     'textbf': {},
 40 |     'textcal': {},
 41 |     'textfrak': {},
 42 |     'textit': {},
 43 |     'textmono': {}
 44 | }
 45 | 
 46 | # Valid command chars
 47 | TEX_COMMAND_CHARS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
 48 |                      'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
 49 |                      'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 50 |                      'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
 51 |                      'W', 'X', 'Y', 'Z', '*', '@']
 52 | TEX_EQUATION_CHARS = [
 53 |     ('$', '$', True),
 54 |     (r'\(', r'\)', False),
 55 |     (r'\[', r'\]', False),
 56 |     ('\\begin{align*}', '\\end{align*}', False),
 57 |     ('\\begin{align}', '\\end{align}', False),
 58 |     ('\\begin{displaymath}', '\\end{displaymath}', False),
 59 |     ('\\begin{equation*}', '\\end{equation*}', False),
 60 |     ('\\begin{equation}', '\\end{equation}', False),
 61 |     ('\\begin{gather*}', '\\end{gather*}', False),
 62 |     ('\\begin{gather}', '\\end{gather}', False),
 63 |     ('\\begin{math}', '\\end{math}', False)
 64 | ]
 65 | 
 66 | 
 67 | def find_tex_command_char(
 68 |     s: str,
 69 |     symbols_char: List[Tuple[str, str, bool]],
 70 | ) -> Tuple[Tuple[int, int, int, int], ...]:
 71 |     """
 72 |     Find symbols command positions.
 73 | 
 74 |     Example:
 75 | 
 76 |     .. code-block:: none
 77 | 
 78 |                00000000001111111111....
 79 |                01234567890123456789....
 80 |         Input: This is a $formula$ and this is not.
 81 |         Output: ((10, 11, 17, 18), ...)
 82 | 
 83 |     :param s: Latex string code
 84 |     :param symbols_char: Symbols to check ``[(initial, final, ignore escape), ...]``
 85 |     :return: Positions
 86 |     """
 87 |     assert isinstance(symbols_char, list)
 88 |     max_len = 0
 89 |     for j in symbols_char:
 90 |         assert len(j) == 3, f'Format is (initial, final, ignore escape); but received {j}'
 91 |         assert isinstance(j[0], str) and len(j[0]) > 0 and ' ' not in j[0]
 92 |         assert isinstance(j[1], str) and len(j[1]) > 0 and ' ' not in j[1]
 93 |         assert isinstance(j[2], bool)
 94 |         max_len = max(max_len, len(j[0]), len(j[1]))
 95 | 
 96 |     def _find(k: int, y: int, p: bool = True) -> bool:
 97 |         """
 98 |         Returns true if from k char (in s) the symbols-char-y element is present.
 99 | 
100 |         :param k: Position to start
101 |         :param y: Indes of the symbol within the list
102 |         :param p: Reads the first (True) or last element
103 |         :return: True if exist
104 |         """
105 |         if y < 0:
106 |             return False
107 |         n, m, ignore_escape = symbols_char[y]
108 |         nm = n if p else m
109 |         total = 0
110 |         for z in range(len(nm)):
111 |             if s[k + z] == nm[z] and (z == 0 and (not ignore_escape or ignore_escape and s[k - 1] != '\\') or z > 0):
112 |                 total += 1
113 |         return total == len(nm)
114 | 
115 |     def _find_initial(k: int) -> int:
116 |         """
117 |         Find which symbol is contained.
118 | 
119 |         :param k: Position to start from
120 |         :return: The index of the symbol within the list
121 |         """
122 |         for y in range(len(symbols_char)):
123 |             if _find(k, y):
124 |                 return y
125 |         return -1
126 | 
127 |     s = '_' + s + ' ' * max_len
128 |     r = False  # Inside tag
129 |     r_u = -1
130 |     a = 0
131 |     found = []
132 | 
133 |     for i in range(1, len(s) - max_len):
134 |         u = _find_initial(i)
135 |         v = _find(i, r_u, False)
136 |         # Open tag
137 |         if not r and u >= 0:
138 |             a = i
139 |             r = True
140 |             r_u = u
141 |         # Close
142 |         elif r and v:
143 |             r = False
144 |             f, g = a - 1, i - 1
145 |             found.append((f, f + len(symbols_char[r_u][0]), g - 1, g + len(symbols_char[r_u][1]) - 1))
146 | 
147 |     return tuple(found)
148 | 
149 | 
150 | def apply_tag_between_inside_char_command(
151 |     s: str,
152 |     symbols_char: List[Tuple[str, str, bool]],
153 |     tags: Union[Tuple[str, str, str, str], str]
154 | ) -> str:
155 |     """
156 |     Apply tag between symbols.
157 | 
158 |     For example, if symbols are ``($, $)`` and tag is ``[1,2,3,4]``:
159 | 
160 |     .. code-block:: none
161 | 
162 |         Input: This is a $formula$ and this is not.
163 |         Output: This is a 1$2formula3$4 and this is not
164 | 
165 |     :param s: Latex string code
166 |     :param symbols_char:  ``[(initial, final, ignore escape), ...]``
167 |     :param tags: Tags to replace
168 |     :return: String with tags
169 |     """
170 |     if isinstance(tags, str):
171 |         if tags == '':
172 |             return s
173 |         tags = (tags, tags, tags, tags)
174 | 
175 |     assert len(tags) == 4
176 |     a, b, c, d = tags
177 |     tex_tags = find_tex_command_char(s, symbols_char)
178 | 
179 |     if len(tex_tags) == 0:
180 |         return s
181 |     new_s = ''
182 |     k = 0  # Moves through tags
183 |     for i in range(len(s)):
184 |         if k < len(tex_tags):
185 |             if i == tex_tags[k][0]:
186 |                 new_s += a + s[i]
187 |                 continue
188 |             elif tex_tags[k][0] < i < tex_tags[k][1]:
189 |                 pass
190 |             elif i == tex_tags[k][1] and tex_tags[k][1] != tex_tags[k][3]:
191 |                 new_s += b + s[i]
192 |                 if tex_tags[k][2] - tex_tags[k][1] == 0:
193 |                     new_s += c
194 |                 continue
195 |             elif i == tex_tags[k][2] and tex_tags[k][2] != tex_tags[k][0]:
196 |                 new_s += s[i] + c
197 |                 continue
198 |             elif tex_tags[k][2] < i < tex_tags[k][3]:
199 |                 pass
200 |             elif i == tex_tags[k][3]:
201 |                 new_s += s[i] + d
202 |                 k += 1
203 |                 continue
204 |         new_s += s[i]
205 | 
206 |     return new_s
207 | 
208 | 
209 | def find_tex_commands(s: str, offset: int = 0) -> Tuple[Tuple[int, int, int, int, bool], ...]:
210 |     """
211 |     Find all tex commands within a code.
212 | 
213 |     .. code-block:: none
214 | 
215 |                  00000000001111111111222
216 |                  01234567890123456789012
217 |                          a        b c  d
218 |         Example: This is \\aCommand{nice}...
219 |         Output: ((8, 16, 18, 21), ...)
220 | 
221 |     :param s: Latex string code
222 |     :param offset: Offset added to the positioning, useful when using recursive calling on substrings
223 |     :return: Tuple if found codes ``(a, b, c, d, command continues)``
224 |     """
225 |     found: List = []
226 |     is_cmd = False
227 |     is_argv = False
228 |     s += '_'
229 |     a, b, c0, c1, d = 0, -1, 0, 0, 0
230 |     depth_0 = 0  # {}
231 |     depth_1 = 0  # []
232 |     cont_chars = ('{', '[', ' ', '\n')
233 |     cmd_idx = 0  # index
234 |     mode_arg = -1
235 | 
236 |     for i in range(len(s) - 1):
237 |         # Start a command
238 |         if not is_cmd and s[i] == '\\' and s[i + 1] in TEX_COMMAND_CHARS:
239 |             a, b, is_cmd, is_argv = i, -1, True, False
240 |             cmd_idx += 1
241 |             mode_arg = -1
242 |             depth_0, depth_1 = 0, 0
243 | 
244 |         # If command before args encounter an invalid chad, disables the command
245 |         elif is_cmd and not is_argv and s[i] not in cont_chars and s[i] not in TEX_COMMAND_CHARS:
246 |             is_cmd = False
247 |             if s[i] == '\\' and s[i + 1] in TEX_COMMAND_CHARS:
248 |                 a, b, is_cmd, is_argv = i, -1, True, False
249 |                 cmd_idx += 1
250 | 
251 |         # If command has a new line, but following chars are not space
252 |         elif is_cmd and not is_argv and s[i] == '\n' and s[i + 1] in TEX_COMMAND_CHARS:
253 |             is_cmd = False
254 | 
255 |         # If command, not arg, but an invalid char follows the space, disables the command
256 |         elif is_cmd and not is_argv and s[i - 1] == ' ' and s[i] not in cont_chars:
257 |             is_cmd = False
258 | 
259 |         # Inits a new arg
260 |         elif is_cmd and s[i] in ('{', '[') and s[i - 1] != '\\':
261 |             is_argv = True
262 |             if b == -1:
263 |                 b = i - 1
264 |                 depth_0, depth_1 = 0, 0
265 |             if s[i] == '{':
266 |                 if depth_0 == 0:
267 |                     c0 = i + 1
268 |                     if mode_arg < 0:
269 |                         mode_arg = 0
270 |                 depth_0 += 1
271 |             else:
272 |                 if depth_1 == 0:
273 |                     c1 = i + 1
274 |                     if mode_arg < 0:
275 |                         mode_arg = 1
276 |                 depth_1 += 1
277 | 
278 |         # Ends the argument, only if depth condition satisfies
279 |         elif is_cmd and is_argv and s[i] in ('}', ']') and s[i - 1] != '\\':
280 |             if s[i] == '}':
281 |                 depth_0 -= 1
282 |             else:  # ]
283 |                 depth_1 -= 1
284 | 
285 |             if (depth_0 == 0 and mode_arg == 0) or (depth_1 == 0 and mode_arg == 1):  # Finished
286 |                 d = i - 1
287 |                 found.append([a, b, c0 if s[i] == '}' else c1, d, cmd_idx])
288 |                 if s[i + 1] not in cont_chars:
289 |                     is_cmd = False
290 |                 is_argv = False
291 |                 mode_arg = -1
292 |             # elif depth_0 < 0 or depth_1 < 0:  # Invalid argument (parenthesis imbalance)
293 |             #     is_cmd = False
294 |             #     is_argv = False
295 |             # mode_arg = -1
296 | 
297 |     # Add the offsets
298 |     for f in found:
299 |         f[0] += offset
300 |         f[1] += offset
301 |         f[2] += offset
302 |         f[3] += offset
303 | 
304 |     # Check if command continues
305 |     if len(found) == 0:
306 |         return ()
307 |     elif len(found) == 1:
308 |         found[0][4] = False
309 |     else:
310 |         for k in range(1, len(found)):
311 |             if found[k][4] == found[k - 1][4]:
312 |                 found[k - 1][4] = True
313 |             else:
314 |                 found[k - 1][4] = False
315 |             if k == len(found) - 1:
316 |                 found[k][4] = False
317 |     for k in range(len(found)):
318 |         # noinspection PyUnresolvedReferences
319 |         found[k] = tuple(found[k])
320 | 
321 |     return tuple(found)
322 | 
323 | 
324 | def _find_tex_env_recursive(original_s: str, s: str, offset: int = 0, depth: int = 0) -> List:
325 |     """
326 |     Find all environments.
327 | 
328 |     :param s: Latex string code
329 |     :param offset: Offset applied to the search
330 |     :return: Tuple of all commands
331 |     """
332 |     tags = find_tex_commands(s, offset=offset)
333 |     new_tags = []
334 |     for t in tags:
335 |         a, b, c, d, _ = t
336 |         source_cmd = s[a - offset:b - offset + 1]
337 |         if 'begin' not in source_cmd and 'end' not in source_cmd:
338 |             # Get the arguments of the command, and check more environments there
339 |             cmd_args = s[c - offset:d - offset + 1]
340 |             if 'begin' in cmd_args or 'end' in cmd_args:
341 |                 if 'newenvironment' in source_cmd or 'newcommand' in source_cmd:  # Prone to bugs
342 |                     continue
343 |                 for tr in _find_tex_env_recursive(original_s, cmd_args, offset=c, depth=depth + 1):
344 |                     new_tags.append(tr)
345 |         else:
346 |             new_tags.append(t)
347 |     return new_tags
348 | 
349 | 
350 | def find_tex_environments(s: str) -> Tuple[Tuple[str, int, int, int, int, str, int, int], ...]:
351 |     r"""
352 |     Find all tex commands within a code.
353 | 
354 |     Example:
355 | 
356 |     .. code-block:: none
357 | 
358 |                  0000000000111111111122222222223333333333
359 |                  0123456789012345678901234567890123456789
360 |                          a           b        c         d
361 |         Example: This is \begin{nice}[cmd]my...\end{nice}
362 |         Output: (('nice', 8, 20, 29, 39, 'parentenv', 0, -1), ...)
363 | 
364 |     This method also returns the name of the parent environment, the depth of the
365 |     environment, and the depth of the item enviroment (if itemizable).
366 | 
367 |     :param s: Latex string code
368 |     :return: Tuple if found environment ``(env_name, a, b, c, d, parent_env_name, env_depth, env_item_depth)``
369 |     """
370 | 
371 |     def _env_common(e: str) -> str:
372 |         """
373 |         Return the common environment for a given name.
374 | 
375 |         :param e: Environment name
376 |         :return: Common environment
377 |         """
378 |         if ('itemize' in e) or ('enumerate' in e) or ('tablenotes' in e):
379 |             return 'item_'
380 |         return ''
381 | 
382 |     tags = _find_tex_env_recursive(s, s)
383 |     envs = []
384 |     env: Dict[str, List[Tuple[int, int, str, int]]] = {}
385 |     last_env = ''
386 |     env_depth = 0
387 |     cmds_cont = []
388 |     env_depths: Dict[str, int] = {}
389 | 
390 |     for t in tags:
391 |         a, b, c, d, _ = t
392 |         if 'begin' in s[a:b + 1]:
393 |             env_name = s[c:d + 1]
394 |             c_env_name = _env_common(env_name)  # Common environment name
395 |             if c_env_name not in env_depths.keys():
396 |                 env_depths[c_env_name] = 0
397 |             else:
398 |                 env_depths[c_env_name] += 1
399 |             env_i = (a, d + 2, last_env, env_depth)
400 |             if env_name not in env:
401 |                 env[env_name] = [env_i]
402 |             else:
403 |                 env[env_name].append(env_i)
404 |             if a not in cmds_cont:
405 |                 cmds_cont.append(a)
406 |                 last_env = env_name
407 |                 env_depth += 1
408 |         elif 'end' in s[a:b + 1]:
409 |             env_name = s[c:d + 1]
410 |             c_env_name = _env_common(env_name)  # Common environment name
411 | 
412 |             if env_name in env.keys():
413 |                 env_i = env[env_name].pop()
414 | 
415 |                 # Update env itemize depth
416 |                 env_depth_item = -1
417 |                 if c_env_name != '':
418 |                     env_depth_item = env_depths[c_env_name]
419 |                     env_depths[c_env_name] -= 1
420 | 
421 |                 envs.append((
422 |                     env_name,  # Environment name
423 |                     env_i[0],  # a-position of the env
424 |                     env_i[1],  # b-position
425 |                     a,  # c-position
426 |                     d,  # d-position
427 |                     env_i[2],  # parent environment name
428 |                     env_i[3],  # depth of the environment
429 |                     env_depth_item  # itemize depth
430 |                 ))
431 | 
432 |                 if len(env[env_name]) == 0:
433 |                     del env[env_name]
434 |                 last_env = env_i[2]
435 |                 env_depth -= 1
436 | 
437 |     return tuple(envs)
438 | 
439 | 
440 | def get_tex_commands_args(
441 |     s: str,
442 |     pos: bool = False
443 | ) -> Tuple[Tuple[Union[str, Tuple[str, bool], Tuple[int, int]], ...], ...]:
444 |     r"""
445 |     Get all the arguments from a tex command. Each command argument has a boolean
446 |     indicating if that is optional or not.
447 | 
448 |     .. code-block:: none
449 | 
450 |         Example: This is \aCommand[\label{}]{nice} and...
451 |         Output: (('aCommand', ('\label{}', True), ('nice', False)), ...)
452 | 
453 |     :param s: Latex string code
454 |     :param pos: Add the numerical position of the original string at the last position
455 |     :return: Arguments
456 |     """
457 |     tags = find_tex_commands(s)
458 |     commands = []
459 |     command = []
460 |     for t in tags:
461 |         a, b, c, d, cont = t
462 |         if len(command) == 0:
463 |             command.append(s[a + 1:b + 1].strip())
464 |         arg = s[c - 1:d + 2]
465 |         command.append((arg[1:-1], len(arg) != 0 and arg[0] == '['))
466 |         if not cont:
467 |             if pos:
468 |                 command.append((a, d + 2))
469 |             commands.append(tuple(command))
470 |             command = []
471 |     return tuple(commands)
472 | 
473 | 
474 | def find_tex_commands_noargv(s: str) -> Tuple[Tuple[int, int], ...]:
475 |     """
476 |     Find all tex commands with no arguments within a code.
477 | 
478 |     .. code-block:: none
479 | 
480 |                  00000000001111111111222
481 |                  01234567890123456789012
482 |                          x       x
483 |         Example: This is \aCommand ...
484 |         Output: ((8,16), ...)
485 | 
486 |     :param s: Latex string code
487 |     :return: Tuple if found codes
488 |     """
489 |     found = []
490 |     is_cmd = False
491 |     s += '_'
492 |     a = 0
493 |     cont_chars = ('{', '[', ' ')
494 | 
495 |     for i in range(len(s) - 1):
496 |         if not is_cmd and s[i] == '\\' and s[i + 1] in TEX_COMMAND_CHARS:
497 |             if i > 0 and s[i - 1] == '⇲':
498 |                 continue
499 |             a = i
500 |             is_cmd = True
501 | 
502 |         elif is_cmd and s[i] == '\\':
503 |             if i - 1 - a > 0:
504 |                 found.append([a, i - 1])
505 |             a = i
506 | 
507 |         elif is_cmd and s[i] in ('{', '['):
508 |             is_cmd = False
509 | 
510 |         # If command, not arg, but an invalid char follows the space, disables the command
511 |         elif is_cmd and s[i - 1] == ' ' and s[i] not in cont_chars:
512 |             is_cmd = False
513 |             found.append([a, i - 1])
514 | 
515 |         elif is_cmd and s[i] not in TEX_COMMAND_CHARS and s[i] not in cont_chars:
516 |             is_cmd = False
517 |             found.append([a, i - 1])
518 | 
519 |     if is_cmd and a != len(s) - 2:
520 |         found.append([a, len(s) - 2])
521 | 
522 |     # Strip chars
523 |     for k in range(len(found)):
524 |         ch = found[k][1]
525 |         for j in range(ch):
526 |             if s[found[k][1]] == ' ':
527 |                 found[k][1] -= 1
528 |             else:
529 |                 break
530 |         # noinspection PyUnresolvedReferences
531 |         found[k] = tuple(found[k])
532 | 
533 |     # noinspection PyTypeChecker
534 |     return tuple(found)
535 | 
536 | 
537 | def apply_tag_tex_commands(
538 |     s: str,
539 |     tags: Union[Tuple[str, str, str, str, str], str]
540 | ) -> str:
541 |     """
542 |     Apply tag to tex command.
543 | 
544 |     For example, if tag is ``[1,2,3,4,5]``:
545 | 
546 |     .. code-block:: none
547 | 
548 |         Input: This is a \\formula{epic} and this is not
549 |         Output: This is a 1\\formula2{3epic4}5 and this is not
550 | 
551 |     :param s: Latex string code
552 |     :param tags: Tags (length 5)
553 |     :return: Code with tags
554 |     """
555 |     if isinstance(tags, str):
556 |         if tags == '':
557 |             return s
558 |         tags = (tags, tags, tags, tags, tags)
559 |     assert len(tags) == 5
560 |     a, b, c, d, e = tags  # Unpack
561 | 
562 |     tex_tags = find_tex_commands(s)
563 |     if len(tex_tags) == 0:
564 |         return s
565 |     new_s = ''
566 |     k = 0  # Moves through tags
567 |     i = -1
568 |     for _ in range(len(s)):
569 |         i += 1
570 |         if i == len(s):
571 |             break
572 |         if k < len(tex_tags) and i in tex_tags[k][0:4]:
573 |             if i == tex_tags[k][0]:
574 |                 new_s += a + s[i]
575 |             elif i == tex_tags[k][1]:
576 |                 new_s += s[i] + b
577 |             elif i == tex_tags[k][2] and i != tex_tags[k][3]:
578 |                 new_s += c + s[i]
579 |             elif i == tex_tags[k][3]:
580 |                 if i == tex_tags[k][2]:
581 |                     new_s += c
582 |                 new_s += s[i] + d + s[i + 1] + e
583 |                 i += 1
584 |                 # if continues
585 |                 if tex_tags[k][4]:
586 |                     new_s += b
587 |                 k += 1
588 |         else:
589 |             new_s += s[i]
590 | 
591 |     return new_s[0:len(new_s)]
592 | 
593 | 
594 | def apply_tag_tex_commands_no_argv(
595 |     s: str,
596 |     tags: Union[Tuple[str, str], str]
597 | ) -> str:
598 |     """
599 |     Apply tag to tex command.
600 | 
601 |     For example, if tag is ``[1,2]``:
602 | 
603 |     .. code-block:: none
604 | 
605 |         Input: This is a \\formula and this is not.
606 |         Output: This is a 1\\formula2 and this is not
607 | 
608 |     :param s: Latex string code
609 |     :param tags: Tags (length 5)
610 |     :return: Code with tags
611 |     """
612 |     if isinstance(tags, str):
613 |         if tags == '':
614 |             return s
615 |         tags = (tags, tags)
616 |     assert len(tags) == 2
617 |     a, b = tags  # Unpack
618 | 
619 |     tex_tags = find_tex_commands_noargv(s)
620 |     if len(tex_tags) == 0:
621 |         return s
622 |     new_s = ''
623 |     k = 0  # Moves through tags
624 |     i = -1
625 |     for _ in range(len(s)):
626 |         i += 1
627 |         if k < len(tex_tags) and i in tex_tags[k]:
628 |             if i == tex_tags[k][0]:
629 |                 new_s += a + s[i]
630 |             elif i == tex_tags[k][1]:
631 |                 new_s += s[i] + b
632 |                 k += 1
633 |         else:
634 |             new_s += s[i]
635 | 
636 |     return new_s
637 | 
638 | 
639 | def _convert_single_symbol(s: str) -> Optional[str]:
640 |     """
641 |     If ``s`` is just a latex code ``'alpha'`` or ``'beta'`` it converts it to its
642 |     unicode representation.
643 | 
644 |     :param s: Latex string code
645 |     :return: Latex with converted single symbols
646 |     """
647 |     if '\\' not in s[0]:
648 |         s = '\\' + s
649 |     for (code, val) in _TEX_TO_UNICODE['latex_symbols']:
650 |         if code == s:
651 |             return val
652 |     return None
653 | 
654 | 
655 | def _convert_latex_symbols(s: str) -> str:
656 |     """
657 |     Replace each ``'\alpha'``, ``'\beta'`` and similar latex symbols with
658 |     their unicode representation.
659 | 
660 |     :param s: Latex string code
661 |     :return: Replaced symbols
662 |     """
663 |     for (code, val) in _TEX_TO_UNICODE['latex_symbols']:
664 |         s = s.replace(code, val)
665 |     return s
666 | 
667 | 
668 | def _process_starting_modifiers(s: str) -> str:
669 |     """
670 |     If s start with ``'it '``, ``'cal '``, etc. then make the whole string
671 |     italic, calligraphic, etc.
672 | 
673 |     :param s: Latex string code
674 |     :return: Modified text
675 |     """
676 |     s = re.sub('^bb ', r'\\bb{', s)
677 |     s = re.sub('^bf ', r'\\bf{', s)
678 |     s = re.sub('^it ', r'\\it{', s)
679 |     s = re.sub('^cal ', r'\\cal{', s)
680 |     s = re.sub('^frak ', r'\\frak{', s)
681 |     s = re.sub('^mono ', r'\\mono{', s)
682 |     return s
683 | 
684 | 
685 | def _apply_all_modifiers(s: str) -> str:
686 |     """
687 |     Applies all modifiers.
688 | 
689 |     :param s: Latex string code
690 |     :return: Text with replaced chars
691 |     """
692 |     s = _apply_modifier(s, '^', _TEX_TO_UNICODE['superscripts'])
693 |     s = _apply_modifier(s, '_', _TEX_TO_UNICODE['subscripts'])
694 | 
695 |     s = _apply_modifier(s, '\\bb', _TEX_TO_UNICODE['textbb'])
696 |     s = _apply_modifier(s, '\\bf', _TEX_TO_UNICODE['textbf'])
697 |     s = _apply_modifier(s, '\\cal', _TEX_TO_UNICODE['textcal'])
698 |     s = _apply_modifier(s, '\\emph', _TEX_TO_UNICODE['textit'])
699 |     s = _apply_modifier(s, '\\frak', _TEX_TO_UNICODE['textfrak'])
700 |     s = _apply_modifier(s, '\\it', _TEX_TO_UNICODE['textit'])
701 |     s = _apply_modifier(s, '\\mono', _TEX_TO_UNICODE['textmono'])
702 | 
703 |     return s
704 | 
705 | 
706 | def _apply_modifier(s: str, modifier: str, d: Dict[Any, str]) -> str:
707 |     """
708 |     This will search for the ^ signs and replace the next
709 |     digit or (digits when {} is used) with its/their uppercase representation.
710 | 
711 |     :param s: Latex string code
712 |     :param modifier: Modifier command
713 |     :param d: Dict to look upon
714 |     :return: New text with replaced text.
715 |     """
716 |     s = s.replace(modifier, "^")
717 |     newtext = ""
718 |     mode_normal, mode_modified, mode_long = range(3)
719 |     mode = mode_normal
720 |     for ch in s:
721 |         if mode == mode_normal and ch == '^':
722 |             mode = mode_modified
723 |             continue
724 |         elif mode == mode_modified and ch == '{':
725 |             mode = mode_long
726 |             continue
727 |         elif mode == mode_modified:
728 |             newtext += d.get(ch, ch)
729 |             mode = mode_normal
730 |             continue
731 |         elif mode == mode_long and ch == '}':
732 |             mode = mode_normal
733 |             continue
734 | 
735 |         if mode == mode_normal:
736 |             newtext += ch
737 |         else:
738 |             newtext += d.get(ch, ch)
739 |     return newtext
740 | 
741 | 
742 | def __load_unicode() -> None:
743 |     """
744 |     Loads the unicode data.
745 |     """
746 |     respath = str(os.path.abspath(os.path.dirname(__file__))).replace('\\', '/') + '/res/u_'
747 |     for j in _TEX_TO_UNICODE.keys():
748 |         if j == 'latex_symbols':
749 |             with open(f'{respath}symbols.txt', encoding='utf-8') as f:
750 |                 line = f.readline()
751 |                 while line != "":
752 |                     words = line.split()
753 |                     code = words[0]
754 |                     val = words[1]
755 |                     _TEX_TO_UNICODE['latex_symbols'].append((code, val))
756 |                     line = f.readline()
757 |         else:
758 |             with open(f'{respath}{j}.txt', encoding='utf-8') as f:
759 |                 line = f.readline()
760 |                 while line != '':
761 |                     words = line.split()
762 |                     code = words[0]
763 |                     val = words[1]
764 |                     _TEX_TO_UNICODE[j][code] = val
765 |                     line = f.readline()
766 | 
767 | 
768 | def tex_to_unicode(s: str) -> str:
769 |     """
770 |     Transforms tex code to unicode.
771 | 
772 |     :param s: Latex string code
773 |     :return: Text in unicode
774 |     """
775 |     if s.strip() == '':
776 |         return s
777 |     ss = _convert_single_symbol(s)
778 |     if ss is not None:
779 |         return ss
780 | 
781 |     s = _convert_latex_symbols(s)
782 |     s = _process_starting_modifiers(s)
783 |     s = _apply_all_modifiers(s)
784 | 
785 |     # Last filter
786 |     s = s.replace('\n\n', '\n').replace('  ', ' ').replace('\t', ' ')
787 |     try:
788 |         s = _FLATLATEX.convert(s)
789 |     except LatexSyntaxError:
790 |         pass
791 | 
792 |     return s
793 | 
794 | 
795 | # Loads the unicode data
796 | __load_unicode()
797 | 


--------------------------------------------------------------------------------
/test/test_parsers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyDetex
  3 | https://github.com/ppizarror/PyDetex
  4 | 
  5 | TEST PARSERS
  6 | Test several parsers which perform a single operation.
  7 | """
  8 | 
  9 | from test._base import BaseTest
 10 | import pydetex
 11 | import pydetex.parsers as par
 12 | import pydetex.utils as ut
 13 | 
 14 | 
 15 | class ParserTest(BaseTest):
 16 | 
 17 |     def test_version(self) -> None:
 18 |         """
 19 |         Configure version.
 20 |         """
 21 |         self.assertNotEqual(pydetex.version.vernum, '')
 22 | 
 23 |     def test_process_labels(self) -> None:
 24 |         """
 25 |         Removes labels.
 26 |         """
 27 |         self.assertEqual(par.process_labels('\\section{Research method}\\label{researchmethod}'),
 28 |                          '\\section{Research method}')
 29 |         self.assertEqual(par.process_labels('This is \\label{epic} a very nice latex'),
 30 |                          'This is  a very nice latex')
 31 | 
 32 |     def test_find_str(self) -> None:
 33 |         """
 34 |         Test find string.
 35 |         """
 36 |         s = 'This is a latex string, \\textbf{in bold}'
 37 |         self.assertEqual(par.find_str(s, '\\textit'), -1)
 38 |         self.assertEqual(par.find_str(s, '\\textbf'), 24)
 39 |         s2 = """
 40 |         This is another example, \\cite{A} thinks that it is good, whereas
 41 |         \\citep{K} don't. However, \\cite*{A} is more interesting.
 42 |         """
 43 |         self.assertEqual(par.find_str(s2, '\\cite'), 34)
 44 |         self.assertEqual(par.find_str(s2, '\\cite*'), 109)
 45 | 
 46 |     def test_remove_tag(self) -> None:
 47 |         """
 48 |         Test remove tags.
 49 |         """
 50 |         self.assertEqual(par.remove_tag('lorem ipsum \\textbf{hi}', 'textbf'), 'lorem ipsum hi')
 51 |         self.assertEqual(par.remove_tag('lorem ipsum \\textbf{\\textbf{hi}}', 'textbf'), 'lorem ipsum hi')
 52 | 
 53 |     def test_process_cite(self) -> None:
 54 |         """
 55 |         Removes cites from text.
 56 |         """
 57 |         s = 'hello \\cite{number1,number2} epic'
 58 |         self.assertEqual(par.replace_pydetex_tags(par.process_cite(s)),
 59 |                          'hello [1, 2] epic')
 60 |         s = 'this is \\cite{number1} epic \\cite{number2} and \\cite{number1}'
 61 |         self.assertEqual(par.replace_pydetex_tags(par.process_cite(s)),
 62 |                          'this is [1] epic [2] and [1]')
 63 |         s = 'This is another example, \\cite*{Downson} et al. suggests that yes, but \\cite{Epic} not'
 64 |         self.assertEqual(
 65 |             par.replace_pydetex_tags(par.process_cite(s)),
 66 |             'This is another example, [1] et al. suggests that yes, but [2] not')
 67 |         # Test equation cite
 68 |         s = 'Here, we test an equation with \\eqref{mycite}'
 69 |         self.assertEqual(
 70 |             par.replace_pydetex_tags(par.process_cite(s)),
 71 |             'Here, we test an equation with (1)')
 72 |         # Test multiple cites
 73 |         s = 'This is an example \\cite{b} \\cite{a,    b,    c    , d, e}'
 74 |         self.assertEqual(par.replace_pydetex_tags(par.process_cite(s)), 'This is an example [1] [1-5]')
 75 |         self.assertEqual(par.replace_pydetex_tags(par.process_cite(s, compress_cite=False)),
 76 |                          'This is an example [1] [1, 2, 3, 4, 5]')
 77 |         self.assertEqual(par.replace_pydetex_tags(par.process_cite(s, sort_cites=False)),
 78 |                          'This is an example [1] [2, 1, 3-5]')
 79 | 
 80 |     def test_process_citeauthor(self) -> None:
 81 |         """
 82 |         Removes citeauthor from text.
 83 |         """
 84 |         s = 'hello \\citeauthor{number1,number2} epic'
 85 |         self.assertEqual(par.replace_pydetex_tags(par.process_citeauthor(par.process_cite(s), 'en')),
 86 |                          'hello [authors] epic')
 87 |         s = 'hello \\citeauthor{number1} epic'
 88 |         self.assertEqual(par.replace_pydetex_tags(par.process_citeauthor(par.process_cite(s), 'en')),
 89 |                          'hello [author] epic')
 90 | 
 91 |     def test_process_ref(self) -> None:
 92 |         """
 93 |         Removes references from text.
 94 |         """
 95 |         self.assertEqual(par.process_ref('this is a \\ref{myref}'), 'this is a 1')
 96 |         self.assertEqual(par.process_ref('this is a \\ref{myref} and \\ref*{myref}'), 'this is a 1 and 1')
 97 | 
 98 |     def test_remove_common_tags(self) -> None:
 99 |         """
100 |         Remove common tags.
101 |         """
102 |         self.assertEqual(par.remove_common_tags('this is \\hl{a}'), 'this is a')
103 |         self.assertEqual(par.remove_common_tags('this is \\textsuperscript{\\hl{nice}}'), 'this is nice')
104 | 
105 |     def test_remove_comments(self) -> None:
106 |         """
107 |         Removes comments.
108 |         """
109 |         self.assertEqual(par.remove_comments('This is a \% percentage, and % a comment'),
110 |                          'This is a ⇱COMMENT_PERCENTAGE_SYMBOL⇲ percentage, and')
111 |         s = """
112 |         This is a multi-line file, typical from latex% comment
113 |         
114 |         % Typical comment lines.....
115 |         
116 |         Whereas this is another line or paragraph. So boring
117 |         """
118 |         self.assertEqual(par.remove_comments(s),
119 |                          'This is a multi-line file, typical from latex\n\nWhereas this is another line or paragraph. So boring')
120 | 
121 |         # Comments right to text
122 |         s = """
123 |         Web of Science, % https://webofknowledge.com/
124 |         Scopus, % https://www.scopus.com/
125 |         IEEE/IET Xplore, % https://ieeexplore.ieee.org/
126 |         Science Direct, % https://uchile.idm.oclc.org/login?url=https://www.sciencedirect.com/
127 |         """
128 |         self.assertEqual(par.remove_comments(s), 'Web of Science, Scopus, IEEE/IET Xplore, Science Direct,')
129 | 
130 |         # Comments at start
131 |         s = """% !TeX spellcheck = en_US
132 | 
133 |         \section{Introduction}
134 |         
135 |         Architectural floor plans are documents that result from an iterative design, planning, and engineering pro"""
136 |         self.assertEqual(
137 |             par.remove_comments(s),
138 |             '\\section{Introduction}\n\nArchitectural floor plans are documents that result from an iterative design, '
139 |             'planning, and engineering pro')
140 | 
141 |         # Comment right to newline
142 |         s = 'Therefore, the scope was restricted to analyzing vector-based CAD files or retrieving individual elements ' \
143 |             'from plans with a simple format. \\\\% Therefore, the scope was restricted to analyze vector-based CAD files,' \
144 |             ' or retrieving individual elements from plans with a simple format. \\'
145 |         t = 'Therefore, the scope was restricted to analyzing vector-based CAD files or retrieving individual elements ' \
146 |             'from plans with a simple format. \\\\'
147 |         self.assertEqual(par.remove_comments(s), t)
148 | 
149 |     def test_simple_replace(self) -> None:
150 |         """
151 |         Test simple replace format.
152 |         """
153 |         self.assertEqual(par.simple_replace('This is an \\itemBad a'), 'This is an \\itemBad a')
154 |         self.assertEqual(par.simple_replace('This is a example formula $\\alpha\longrightarrow\\beta+1$'),
155 |                          'This is a example formula $α⟶β+1$')
156 |         self.assertEqual(par.simple_replace('This is \\alphaNot but \\alpha'),
157 |                          'This is \\alphaNot but α')
158 |         self.assertEqual(par.simple_replace('This is a $x_0$ and $x^2$'), 'This is a $x₀$ and $x²$')
159 |         self.assertEqual(par.simple_replace('The following example $\\alpha_0+\\beta^2=0$'),
160 |                          'The following example $α₀+β²=0$')
161 |         self.assertEqual(par.simple_replace('This is a $x_0$ and \(x^2\)'), 'This is a $x₀$ and \(x²\)')
162 |         self.assertEqual(par.simple_replace('This is $\\alpha$'), 'This is $α$')
163 |         self.assertEqual(par.simple_replace('This is \#my\_var'), 'This is #my_var')
164 | 
165 |     def test_parse_inputs(self) -> None:
166 |         """
167 |         Parse inputs.
168 |         """
169 |         self.assertEqual(par._NOT_FOUND_FILES, [])
170 |         self.assertEqual(par.process_inputs('This loads a \\input{latex} or \\input{} epic'),
171 |                          'This loads a \\input{latex} or \\input{} epic')
172 |         self.assertEqual(par._NOT_FOUND_FILES, ['latex.tex', '.tex'])
173 |         self.assertEqual(par.process_inputs('This loads a \\input{latex} or \\input{} epic'),
174 |                          'This loads a \\input{latex} or \\input{} epic')
175 |         self.assertEqual(par.process_inputs('This loads a \\input{data/simple} epic', clear_not_found_files=True),
176 |                          'This loads a this is a simple file epic')
177 | 
178 |     def test_remove_commands_char(self) -> None:
179 |         """
180 |         Remove commands char.
181 |         """
182 |         s = 'This is a $command$!'
183 |         self.assertEqual(par.remove_equations(s), 'This is a !')
184 |         s = 'This is a $command\$ but this does not delete$!'
185 |         self.assertEqual(par.remove_equations(s), 'This is a !')
186 |         s = 'This is a $command!'
187 |         self.assertEqual(par.remove_commands_char(s, chars=ut.TEX_EQUATION_CHARS), s)
188 |         s = 'This is a$$ command!'
189 |         self.assertEqual(par.remove_equations(s), 'This is a command!')
190 |         s = 'This is a $comman$ and $this should be removed too$!'
191 |         self.assertEqual(par.remove_equations(s), 'This is a  and !')
192 |         s = 'This is a \(comman\) and \(this should be removed too\)!'
193 |         self.assertEqual(par.remove_equations(s), 'This is a  and !')
194 |         s = 'This is a \(\) and $X$!'
195 |         self.assertEqual(par.remove_equations(s), 'This is a  and !')
196 |         s = '$X$\(y\)$alpha$$$$$$key$'
197 |         self.assertEqual(par.remove_equations(s), '')
198 | 
199 |     def test_remove_commands(self) -> None:
200 |         """
201 |         Remove commands.
202 |         """
203 |         s = 'This \\f{must be removed} yes!'
204 |         self.assertEqual(par.remove_commands_param(s, 'en'), 'This  yes!')
205 |         self.assertEqual(par.remove_commands_param('', 'en'), '')
206 |         s = 'This \\texttt{\insertimage{nice}{1}}no'
207 |         self.assertEqual(par.remove_commands_param(s, 'en'), 'This no')
208 |         s = '\\insertimage[\label{epic}]{delete this}'
209 |         self.assertEqual(par.remove_commands_param(s, 'en'), '')
210 |         s = 'Very\\insertimage[\label{epic}]{delete this} Epic'
211 |         self.assertEqual(par.remove_commands_param(s, 'en'), 'Very Epic')
212 |         s = 'Very\\insertimage[\label{epic}]{delete this} Epic \\not yes'
213 |         self.assertEqual(par.remove_commands_param(s, 'en'), 'Very Epic \\not yes')
214 |         s = 'Ni\\f       {}ce'
215 |         self.assertEqual(par.remove_commands_param(s, 'en'), 'Nice')
216 |         s = 'Ni\\f   \n    [][][]{}ce'
217 |         self.assertEqual(par.remove_commands_param(s, 'en'), 'Nice')
218 |         s = '\caption {thus, the analysis \{cannot\} be based \mycommand{only} using {nice} symbols}'
219 |         self.assertEqual(par.replace_pydetex_tags(par.remove_commands_param(s, 'en')).strip(),
220 |                          'CAPTION: thus, the analysis \{cannot\} be based  using nice symbols')
221 | 
222 |     def test_remove_commands_noargv(self) -> None:
223 |         """
224 |         Remove commands without arguments.
225 |         """
226 |         s = 'This\\image remove'
227 |         self.assertEqual(par.remove_commands_param_noargv(s), 'This remove')
228 |         s = 'This inserts an \\insertimage[width=1\linewidth]'
229 |         self.assertEqual(par.remove_commands_param_noargv(s), 'This inserts an \\insertimage[width=1]')
230 |         s = 'This \\delete'
231 |         self.assertEqual(par.remove_commands_param_noargv(s), 'This ')
232 |         s = 'This \\delete '
233 |         self.assertEqual(par.remove_commands_param_noargv(s), 'This  ')
234 |         s = '\\delete yes'
235 |         self.assertEqual(par.remove_commands_param_noargv(s), ' yes')
236 |         s = '\\delete'
237 |         self.assertEqual(par.remove_commands_param_noargv(s), '')
238 | 
239 |     def test_process_chars_equations(self) -> None:
240 |         """
241 |         Process single char equations.
242 |         """
243 |         # Test single only
244 |         s = 'This code does not \$contain any equation$!!'
245 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), s)
246 |         s = 'This code must be $x$ processed!!'
247 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'This code must be x processed!!')
248 |         s = par.simple_replace('$\\alpha$-shape is really nice')
249 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'α-shape is really nice')
250 |         s = 'Because $x$ no lower needs any other supervision as $y$ or $z$ in \$30 or \$40$$'
251 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True),
252 |                          'Because x no lower needs any other supervision as y or z in \$30 or \$40')
253 |         s = 'This code $with several chars$ should not be removed'
254 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True),
255 |                          'This code with several chars should not be removed')
256 |         s = 'This code must be $$ processed!!'
257 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'This code must be  processed!!')
258 |         s = 'an $x$$y$$z$'
259 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'an xyz')
260 | 
261 |         # Test multiple
262 |         s = 'This code $with several chars$ should not be removed'
263 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=False),
264 |                          'This code EQUATION_0 should not be removed')
265 |         s = 'This code \(with several chars\) should not be removed'
266 |         self.assertEqual(par.process_chars_equations(s, 'en', single_only=False),
267 |                          'This code EQUATION_0 should not be removed')
268 |         s = 'This $equation 0$ and \$equation $equation 1$ must by replaced'
269 |         self.assertEqual(par.process_chars_equations(s, '-', single_only=False),
270 |                          'This EQUATION_0 and \$equation EQUATION_1 must by replaced')
271 | 
272 |         # Test environments
273 |         s = """My new equation:
274 |         \\begin{equation}
275 |         a+b
276 |         \\end{equation}"""
277 |         self.assertEqual(par.process_chars_equations(s, '-', single_only=False),
278 |                          'My new equation:\n        EQUATION_0')
279 | 
280 |     def test_output_text_for_some_commands(self) -> None:
281 |         """
282 |         Test output text for some commands, like caption or subfigure.
283 |         """
284 | 
285 |         def out(s_: str) -> str:
286 |             """
287 |             Call method.
288 |             """
289 |             return par.replace_pydetex_tags(par.output_text_for_some_commands(s_, 'en')).strip()
290 | 
291 |         s = """
292 |         \\begin{figure}
293 |             \centering
294 |             \\reflectbox{%
295 |             \includegraphics[width=0.5\textwidth]{gull}}
296 |             \caption   {A picture of the same gull\nlooking the other way!}
297 |             \caption[invalid]
298 |         \end{figure}
299 |         """
300 |         self.assertEqual(out(s), 'CAPTION: A picture of the same gull looking the other way!')
301 | 
302 |         # Custom template
303 |         s = '\\insertimage[]{imagefile}{width=5cm}{e}'
304 |         self.assertEqual(out(s), 'FIGURE_CAPTION: e')
305 |         s = '\\insertimage{imagefile}{width=5cm}{e}'
306 |         self.assertEqual(out(s), 'FIGURE_CAPTION: e')
307 |         s = '\\insertimageboxed{imagefile}{width=5cm}{0.5}{legend}'
308 |         self.assertEqual(out(s), 'FIGURE_CAPTION: legend')
309 |         s = 'Nice\n\insertimage[\label{unetmodel}]{unet_compressed}{width=\linewidth}{A U-Net model.}'
310 |         self.assertEqual(out(s), 'FIGURE_CAPTION: A U-Net model.')
311 | 
312 |         # Test other
313 |         s = 'This is a \\href{https://google.com}{A link}'
314 |         self.assertEqual(out(s), 'LINK: A link')
315 |         s = '\section{a}\section*{a}]'
316 |         self.assertEqual(out(s), 'a\n\na')
317 |         s = '\\texttt{nice!} and \emph{nice!}'
318 |         self.assertEqual(out(s), 'nice!nice!')
319 |         s = '\\textit{\href{a}{link}}'
320 |         self.assertEqual(out(s), 'LINK: link')
321 | 
322 |         # Test MakeUppercase
323 |         s = '\\MakeUppercase{this is a Test}'
324 |         self.assertEqual(out(s), 'THIS IS A TEST')
325 |         s = '\\uppercase{this is a Test}'
326 |         self.assertEqual(out(s), 'THIS IS A TEST')
327 |         s = '\\MakeLowercase{THIS is a Test}'
328 |         self.assertEqual(out(s), 'this is a test')
329 |         s = '\\lowercase{THIS is a Test}'
330 |         self.assertEqual(out(s), 'this is a test')
331 | 
332 |         # Test quotes
333 |         s = '\quotes{a quoted}'
334 |         self.assertEqual(out(s), '"a quoted"')
335 |         s = '\enquote{a quoted}'
336 |         self.assertEqual(out(s), '"a quoted"')
337 |         s = '\quotes{\href{a}{link}}'
338 |         self.assertEqual(out(s), '"LINK: link"')
339 |         s = '\doublequotes{\href{a}{link}}'
340 |         self.assertEqual(out(s), '"LINK: link"')
341 | 
342 |         # Test acronym
343 |         for i in ('ac', 'acf', 'acs', 'acl'):
344 |             self.assertEqual(out(f'\\{i}{{XYZ}}'), 'XYZ')
345 | 
346 |         # Test underline/strike
347 |         for i in ('underline', 'so', 'st', 'hl'):
348 |             self.assertEqual(out(f'\\{i}{{XYZ}}'), 'XYZ')
349 | 
350 |     def test_unicode_chars_equations(self) -> None:
351 |         """
352 |         Test unicode char equations.
353 |         """
354 |         s = 'This is my $\\alpha^2 \cdot \\alpha^{2+3} \equiv \\alpha^7$ equation'
355 |         self.assertEqual(par.unicode_chars_equations(s), 'This is my $α² ⋅ α²⁺³ ≡ α⁷$ equation')
356 |         s = 'This is my $x$ equation'
357 |         self.assertEqual(par.unicode_chars_equations(s), 'This is my $x$ equation')
358 |         s = 'This is my $\{a+b\}=min\{t\}$ equation'
359 |         self.assertEqual(par.replace_pydetex_tags(par.unicode_chars_equations(s)),
360 |                          'This is my ${a+b}=min{t}$ equation')
361 |         s = 'This is my $$ equation'
362 |         self.assertEqual(par.unicode_chars_equations(s), 'This is my $$ equation')
363 |         s = 'This is my \\begin{align}\\alpha^2 \cdot \\alpha^{2+3} \equiv \\alpha^7\\end{align} equation'
364 |         self.assertEqual(par.unicode_chars_equations(s), 'This is my \\begin{align}α² ⋅ α²⁺³ ≡ α⁷\end{align} equation')
365 | 
366 |     def test_strip_punctuation(self) -> None:
367 |         """
368 |         Test strip punctuation.
369 |         """
370 |         self.assertEqual(par.strip_punctuation('Or , for example : yes !'), 'Or, for example: yes!')
371 | 
372 |     def test_process_items(self) -> None:
373 |         """
374 |         Test process items.
375 |         """
376 |         s = '\\begin{itemize}\item a \item b\\begin{itemize}\item a \item b\end{itemize}\end{itemize}'
377 |         self.assertEqual(par.replace_pydetex_tags(par.process_items(s, lang='en')),
378 |                          '\n-  a \n-  b\n   •  a\n   •  b')
379 | 
380 |         s = """\\begin{itemize}[font=\\bfseries]
381 |            \item As shown in Figure \\ref{fignumber}
382 |            \item Proposed
383 |         \end{itemize}"""
384 |         self.assertEqual(par.replace_pydetex_tags(par.process_items(s, lang='en')),
385 |                          '\n-  As shown in Figure \n-  Proposed')
386 | 
387 |         s = """\\begin{enumerate}
388 |             \\item a
389 |             \\begin{enumerate}
390 |                 \\item a
391 |                 \\item b
392 |                     \\begin{enumerate}
393 |                         \\item a
394 |                         \\item b
395 |                         \\item c
396 |                         \\begin{enumerate}[font=\\bfseries]
397 |                             \\item a
398 |                             \\item b
399 |                             \\item c
400 |                             \\begin{enumerate}[[font=\\bfseries]]
401 |                                 \\item a
402 |                                 \\item b
403 |                                 \\item c
404 |                             \\end{enumerate}
405 |                         \\end{enumerate}
406 |                     \\end{enumerate}
407 |             \\end{enumerate}
408 |             \\item c
409 |             \\begin{itemize}
410 |                 \\item a
411 |                 \\item b
412 |                 \\item c
413 |             \\end{itemize}
414 |             \\item epic
415 |         \\end{enumerate}
416 |         """
417 | 
418 |         t = par.replace_pydetex_tags(par.process_items(s, lang='en'))
419 |         self.assertEqual(
420 |             t, '\n1. a\n   a) a\n   b) b\n      i. a\n      ii. b\n      iii. c\n'
421 |                '         A) a\n         B) b\n         C) c\n            I. a\n '
422 |                '           II. b\n            III. c\n2. c\n   •  a\n   •  b\n  '
423 |                ' •  c\n3. epic\n        ')
424 | 
425 |         self.assertEqual(par._process_item('', ''), '')
426 | 
427 |         s = """
428 |         \\begin{enumerate}
429 |         \item b
430 |         \end{enumerate}
431 |         
432 |         \\begin{itemize}
433 |         \item a
434 |         \\end{itemize}
435 |         
436 |         \\begin{tablenotes}
437 |         Note: Res - Resolution in pixels (px).
438 |         \\end{tablenotes}
439 |         
440 |         epic
441 |         """
442 |         self.assertEqual(
443 |             par.replace_pydetex_tags(par.process_items(s, lang='en')),
444 |             '\n        \n1. b\n        \n        \n-  a\n        \n        Note:'
445 |             ' Res - Resolution in pixels (px).\n        \n        epic\n        '
446 |         )
447 | 
448 |         # Multiple non-nested
449 |         s = """
450 |         \\begin{enumerate}
451 |             \item a
452 |         \end{enumerate}
453 |         \\begin{enumeratebf}
454 |             \item a
455 |         \end{enumeratebf}
456 |         \\begin{enumerate}
457 |             \item b
458 |         \end{enumerate}
459 |         \\begin{enumerate}
460 |             \item a
461 |         \end{enumerate}
462 |         \\begin{enumerate}
463 |             \item b
464 |             \\begin{itemize}
465 |                 \item c
466 |             \end{itemize}
467 |             \item d
468 |         \end{enumerate}
469 |         \\begin{nice}
470 |         \\end{nice}
471 |         """
472 |         self.assertEqual(
473 |             ut.find_tex_environments(s),
474 |             (('enumerate', 9, 26, 55, 68, '', 0, 0),
475 |              ('enumeratebf', 79, 98, 127, 142, '', 0, 0),
476 |              ('enumerate', 153, 170, 199, 212, '', 0, 0),
477 |              ('enumerate', 223, 240, 269, 282, '', 0, 0),
478 |              ('itemize', 343, 358, 395, 406, 'enumerate', 1, 1),
479 |              ('enumerate', 293, 310, 437, 450, '', 0, 0),
480 |              ('nice', 461, 473, 482, 490, '', 0, -1))
481 |         )
482 |         self.assertEqual(
483 |             par.replace_pydetex_tags(par.process_items(s, lang='en')).strip(),
484 |             '1. a\n        \n1. a\n        \n1. b\n        \n1. a\n        \n1. '
485 |             'b\n   •  c\n2. d\n        \\begin{nice}\n        \\end{nice}')
486 | 
487 |     def test_remove_environments(self) -> None:
488 |         """
489 |         Remove environment test.
490 |         """
491 |         s = 'e\\begin{nice}x\\end{nice}p\\begin{y}z\\end{y}i\\begin{k}z\\end{k}c'
492 |         self.assertEqual(par.remove_environments(s), s)
493 |         self.assertEqual(par.remove_environments(s, ['y']), 'e\\begin{nice}x\end{nice}pi\\begin{k}z\end{k}c')
494 |         self.assertEqual(par.remove_environments(s, ['y', 'nice']), 'epi\\begin{k}z\end{k}c')
495 |         self.assertEqual(par.remove_environments(s, ['y', 'nice', 'k']), 'epic')
496 | 
497 |         s = """The following is a tikz figure, and must be removed:
498 |         
499 |         \\begin{tikzpicture}[line cap=round, line join=round, >=triangle 45,
500 |                      x=4.0cm, y=1.0cm, scale=1]
501 |           \draw [->,color=black] (-0.1,0) -- (2.5,0);
502 |           \\foreach \\x in {1,2}
503 |           \draw [shift={(\\x,0)}, color=black] (0pt,2pt)
504 |               -- (0pt,-2pt) node [below] {\\footnotesize $\\x$};
505 |           \draw [color=black] (2.5,0) node [below] {$x$};
506 |           \draw [->,color=black] (0,-0.1) -- (0,4.5);
507 |            \\foreach \y in {1,2,3,4}
508 |           \draw [shift={(0,\y)}, color=black] (2pt,0pt)
509 |               -- (-2pt,0pt) node[left] {\\footnotesize $\y$};
510 |           \draw [color=black] (0,4.5) node [right] {$y$};
511 |           \draw [color=black] (0pt,-10pt) node [left] {\\footnotesize $0$};
512 |           \draw [domain=0:2.2, line width=1.0pt] plot (\\x,{(\\x)^2});
513 |           \clip(0,-0.5) rectangle (3,5);
514 |           \draw (2,0) -- (2,4);
515 |           \\foreach \i in {1,...,\\thehigher}
516 |           \draw [fill=black,fill opacity=0.3, smooth,samples=50] ({1+(\i-1)/\\thehigher},{(1+(\i)/\\thehigher)^2})
517 |                   --({1+(\i)/\\thehigher},{(1+(\i)/\\thehigher)^2})
518 |                   --  ({1+(\i)/\thehigher},0)
519 |                   -- ({1+(\i-1)/\\thehigher},0)
520 |                   -- cycle;
521 |         \end{tikzpicture}and it was removed!!
522 |         
523 |         \\begin{epic}
524 |         But this should not be removed!
525 |         \\end{epic}"""
526 |         self.assertEqual(
527 |             par.remove_environments(s),
528 |             'The following is a tikz figure, and must be removed:\n        \n   '
529 |             '     and it was removed!!\n        \n        \\begin{epic}\n       '
530 |             ' But this should not be removed!\n        \\end{epic}')
531 | 
532 |     def test_process_def(self) -> None:
533 |         """
534 |         Process defs test.
535 |         """
536 |         par._DEFS.clear()
537 | 
538 |         s = 'This is my \\def\\code {epic!} but yes \\def\\a{} epic'
539 |         self.assertEqual(par.process_def(s), 'This is my  but yes  epic')
540 |         self.assertEqual(len(par._DEFS), 2)
541 |         self.assertEqual(par._DEFS['\\code'], 'epic!')
542 | 
543 |         s = """
544 |         \def\\underline#1{\\relax\ifmmode\@@underline{#1}\else $\@@underline{\hbox{#1}}\m@th$\\relax\\fi}
545 |         \def\@greek#1{%
546 |             \ifcase#1%
547 |                 \or $\\alpha$%
548 |                 \or $\\beta$%
549 |                 \or $\gamma$%
550 |                 \or $\delta$%
551 |                 \or $\epsilon$%
552 |                 \or $\zeta$%
553 |                 \or $\eta$%
554 |                 \or $\\theta$%
555 |                 \or $\iota$%
556 |                 \or $\kappa$%
557 |                 \or $\lambda$%
558 |                 \or $\mu$%
559 |                 \or $\\nu$%
560 |                 \or $\\xi$%
561 |                 \or $o$%
562 |                 \or $\pi$%
563 |                 \or $\\rho$%
564 |                 \or $\sigma$%
565 |                 \or $\\tau$%
566 |                 \or $\\upsilon$%
567 |                 \or $\phi$%
568 |                 \or $\chi$%
569 |                 \or $\psi$%
570 |                 \or $\omega$%
571 |             \\fi%
572 |         }
573 |         not epic
574 |         """
575 |         self.assertEqual(par.process_def(s).strip(), 'not epic')
576 |         self.assertEqual(len(par._DEFS), 0)
577 | 
578 |         s = '\\def\\mycommand{epic}This is really \mycommand yes'
579 |         self.assertEqual(par.process_def(s, replace=True), 'This is really epic yes')
580 |         s = '\\def\\mycommand{epic}This is really \mycommand'
581 |         self.assertEqual(par.process_def(s, replace=True), 'This is really epic')
582 | 
583 |         s = 'a\\def\e{e}'
584 |         self.assertEqual(par.process_def(s), 'a')
585 |         s = '\\def\e{e}'
586 |         self.assertEqual(par.process_def(s), '')
587 |         s = '\\def\e{e}\\def\p{p}\\def\i       {i}\\def\c\n{c}\e\p\i\c'
588 |         self.assertEqual(par.process_def(s, replace=True), 'epic')
589 |         s = '\epic \def\\a{a} \\nice \\item \\a\\a\\a not \\b'
590 |         self.assertEqual(par.process_def(s, replace=True), '\epic  \\nice \\item aaa not \\b')
591 |         s = 'a\\def\e{e} jjajjajaja'
592 |         self.assertEqual(par.process_def(s, replace=True), 'a jjajjajaja')
593 | 
594 |         s = """
595 |         \\begin{itemize}[font=\\bfseries]
596 |             \item a
597 |         \end{itemize}
598 |         
599 |         a\def\\a{epic}
600 |         jejeje \\a
601 |         """
602 |         self.assertEqual(
603 |             par.process_def(s, replace=True).strip(),
604 |             '\\begin{itemize}[font=\\bfseries]\n            \item a\n        \\end{itemize}\n        \n'
605 |             '        a\n        jejeje epic'
606 |         )
607 | 
608 |         # Invalid defs
609 |         s = '\def\\a{a} and \def\\b{b} and \\def   \nc{c} and \\defee\\d{d}: \\a\\b\\c\\d.'
610 |         self.assertEqual(par.process_def(s, replace=True), ' and  and  and \defee\d{d}: ab\c\d.')
611 | 
612 |         # Def with commands
613 |         s = '\def\\a{\\textsuperscript{a}nice!!} epic \\a'
614 |         self.assertEqual(par.process_def(s, replace=True), ' epic anice!!')
615 | 
616 |     def test_begin_document(self) -> None:
617 |         """
618 |         Test begin document parser.
619 |         """
620 |         s = '\\begin{document}:end_\\end{document}'
621 |         self.assertEqual(par.process_begin_document(s), ':end_')
622 | 
623 |         s = ':end_\\end{document}'
624 |         self.assertEqual(par.process_begin_document(s), ':end_\\end{document}')
625 | 
626 |         # Others
627 |         s = '\\end{document}\\begin{document}:end_\\begin{document}\\end{document}\\begin{document}'
628 |         self.assertEqual(par.process_begin_document(s), ':end_\\begin{document}')
629 | 
630 |         s = """
631 |         % Document
632 |         \input{epic}
633 |         This line of code should not be included
634 |         \\begin{figure}
635 |         This figure should not be included
636 |         \\end{figure}
637 |         \let\\a\\b
638 |         \\begin    {document}
639 |         Test
640 |         \\end      {document}
641 |         Removed as well!!
642 |         """
643 |         self.assertEqual(par.process_begin_document(s).strip(), 'Test')
644 | 


--------------------------------------------------------------------------------