├── test ├── data │ ├── example_complex_envs_output.txt │ ├── simple.tex │ ├── example_complex_template_output.txt │ ├── example_simple_figure_caption_output.txt │ ├── example_placeholder_output.txt │ ├── example.tex │ ├── example_simple_figure_caption.txt │ ├── example_placeholder.txt │ ├── example_simple_itemize_output.txt │ ├── example_simple_cite_output.txt │ ├── example_simple_itemize.txt │ ├── example_simple_comments_output.txt │ ├── example_simple_cite.txt │ ├── example_simple_comments.txt │ ├── example_tables_strict_output.txt │ ├── example_complex_envs.txt │ └── example_tables_strict.txt ├── __init__.py ├── _base.py ├── test_gui.py ├── test_pipelines.py └── test_parsers.py ├── specs ├── upx.exe ├── PyDetex_Win.spec ├── PyDetex_Win_Single.spec ├── PyDetex_macOS.spec └── __init__.py ├── .replit ├── pydetex ├── res │ ├── cog.ico │ ├── icon.gif │ ├── icon.ico │ ├── icon.png │ ├── icon.icns │ ├── dictionary.ico │ ├── u_subscripts.txt │ ├── u_textfrak.txt │ ├── u_textcal.txt │ ├── u_textbb.txt │ ├── u_textmono.txt │ ├── placeholder_en.tex │ ├── u_superscripts.txt │ ├── placeholder_es.tex │ ├── u_textit.txt │ ├── u_textbf.txt │ └── u_symbols.txt ├── __init__.py ├── version.py ├── _fonts.py ├── pipelines.py ├── utils.py ├── _utils_lang.py ├── _symbols.py ├── _gui_settings.py └── _utils_tex.py ├── docs ├── _static │ ├── example_simple.png │ ├── example_strict.png │ └── pydetex_windows.png ├── _source │ ├── utils.rst │ ├── parsers.rst │ ├── pipelines.rst │ ├── license.rst │ └── contributors.rst ├── Makefile ├── make.bat ├── index.rst └── conf.py ├── codecov.yml ├── gui.py ├── .gitignore ├── requirements.txt ├── .editorconfig ├── MANIFEST.in ├── .readthedocs.yml ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ └── ci.yml ├── LICENSE ├── setup.py └── README.rst /test/data/example_complex_envs_output.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/data/simple.tex: -------------------------------------------------------------------------------- 1 | this is a simple file -------------------------------------------------------------------------------- /specs/upx.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/specs/upx.exe -------------------------------------------------------------------------------- /.replit: -------------------------------------------------------------------------------- 1 | language = "bash" 2 | run = "python3 setup.py install; clear; python3 pydetex/gui.py" -------------------------------------------------------------------------------- /pydetex/res/cog.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/cog.ico -------------------------------------------------------------------------------- /pydetex/res/icon.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.gif -------------------------------------------------------------------------------- /pydetex/res/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.ico -------------------------------------------------------------------------------- /pydetex/res/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.png -------------------------------------------------------------------------------- /pydetex/res/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/icon.icns -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | TESTS 6 | """ 7 | -------------------------------------------------------------------------------- /pydetex/res/dictionary.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/pydetex/res/dictionary.ico -------------------------------------------------------------------------------- /docs/_static/example_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/docs/_static/example_simple.png -------------------------------------------------------------------------------- /docs/_static/example_strict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/docs/_static/example_strict.png -------------------------------------------------------------------------------- /docs/_static/pydetex_windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppizarror/PyDetex/HEAD/docs/_static/pydetex_windows.png -------------------------------------------------------------------------------- /docs/_source/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | ===== 3 | Utils 4 | ===== 5 | 6 | .. automodule:: pydetex.utils 7 | :members: 8 | :exclude-members: Button -------------------------------------------------------------------------------- /docs/_source/parsers.rst: -------------------------------------------------------------------------------- 1 | 2 | ======= 3 | Parsers 4 | ======= 5 | 6 | .. automodule:: pydetex.parsers 7 | :members: 8 | :exclude-members: -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "build.py" 3 | - "gui.py" 4 | - "pydetex/_gui_utils.py" 5 | - "pydetex/gui.py" 6 | - "setup.py" 7 | - "test/*.py" -------------------------------------------------------------------------------- /docs/_source/pipelines.rst: -------------------------------------------------------------------------------- 1 | 2 | ========= 3 | Pipelines 4 | ========= 5 | 6 | .. automodule:: pydetex.pipelines 7 | :members: 8 | :exclude-members: -------------------------------------------------------------------------------- /gui.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | MAIN FILE 6 | """ 7 | 8 | from pydetex.gui import main 9 | 10 | main() 11 | -------------------------------------------------------------------------------- /docs/_source/license.rst: -------------------------------------------------------------------------------- 1 | 2 | ======= 3 | License 4 | ======= 5 | 6 | .. include:: ../../LICENSE 7 | 8 | The official license can be retrieved `here `_. 9 | -------------------------------------------------------------------------------- /test/data/example_complex_template_output.txt: -------------------------------------------------------------------------------- 1 | ℜ 2 | ℑℑ 3 | 4 | = 5 | 6 | = 7 | 8 | = 9 | 10 | = 11 | 12 | =200pt 13 | =200pt 14 | = 15 | = 16 | 17 | =`\- 18 | 19 | 20 | 21 | 22 | 23 | =10000 24 | 25 | `\="#1 "#1 `\=0↤↤ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | ._* 3 | .DS_Store 4 | .idea/ 5 | .vscode/ 6 | 7 | # Build 8 | **.egg-info 9 | build/ 10 | dist/ 11 | 12 | # Settings 13 | pydetex/res/.pydetex.cfg 14 | 15 | # Image projects 16 | docs/_static/**.vectornator -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 >= 4.11.1 2 | flatlatex >= 0.15 3 | langdetect >= 1.0.9 4 | nltk >= 3.9.1 5 | outdated >= 0.2.2 6 | Pillow >= 9.4.0 7 | PyMultiDictionary >= 1.3.2 8 | pyperclip >= 1.9.0 9 | requests >= 2.32.3 10 | tkmacosx >= 1.0.5 -------------------------------------------------------------------------------- /test/_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | TEST BASE 6 | Base definition. 7 | """ 8 | 9 | import unittest 10 | 11 | 12 | class BaseTest(unittest.TestCase): 13 | """ 14 | Base test class. 15 | """ 16 | -------------------------------------------------------------------------------- /test/data/example_simple_figure_caption_output.txt: -------------------------------------------------------------------------------- 1 | Write or paste here your LaTeX code. It simply removes all tex-things and returns a friendly plain text! 2 | The following is a excellent figure: 3 | 4 | CAPTION: A picture of the same gull looking the other way! 5 | 6 | well EQUATION_0 epic α -------------------------------------------------------------------------------- /test/data/example_placeholder_output.txt: -------------------------------------------------------------------------------- 1 | Write or paste here your LaTeX code. It simply removes all tex-things and returns a friendly plain text! 2 | 3 | PyDetex can process equation, lists, cites, references, and many more: 4 | 5 | EQUATION_0 6 | 7 | Or: 8 | 9 | - As shown in Figure 1 10 | - Proposed by α-Feltes [1] because EQUATION_1 -------------------------------------------------------------------------------- /specs/PyDetex_Win.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | import sys; sys.path.insert(0, '.') 4 | import specs 5 | if specs.is_osx: exit() 6 | 7 | # Create the analysis 8 | a = specs.get_analysis(Analysis, TOC) 9 | pyz = specs.get_pyz(PYZ, a) 10 | exe = specs.get_exe(EXE, pyz, a, False) 11 | coll = specs.get_collect(COLLECT, a, exe) -------------------------------------------------------------------------------- /test/data/example.tex: -------------------------------------------------------------------------------- 1 | Table \ref{tab:review-rulebased} details the reviewed rule-based methods within floor plan recognition, considering the datasets used (Table \ref{tab:databases}) and the four categories of tasks, such as (1) \textit{Graphics separation}, (2) \textit{Pattern recognition}, (3) \textit{Vectorization}, and (4) \textit{Structural modeling}. -------------------------------------------------------------------------------- /pydetex/res/u_subscripts.txt: -------------------------------------------------------------------------------- 1 | 0 ₀ 2 | 1 ₁ 3 | 2 ₂ 4 | 3 ₃ 5 | 4 ₄ 6 | 5 ₅ 7 | 6 ₆ 8 | 7 ₇ 9 | 8 ₈ 10 | 9 ₉ 11 | + ₊ 12 | - ₋ 13 | = ₌ 14 | ( ₍ 15 | ) ₎ 16 | a ₐ 17 | e ₑ 18 | h ₕ 19 | i ᵢ 20 | j ⱼ 21 | k ₖ 22 | l ₗ 23 | m ₘ 24 | n ₙ 25 | o ₒ 26 | p ₚ 27 | r ᵣ 28 | s ₛ 29 | t ₜ 30 | u ᵤ 31 | v ᵥ 32 | x ₓ 33 | β ᵦ 34 | γ ᵧ 35 | ρ ᵨ 36 | φ ᵩ 37 | χ ᵪ 38 | -------------------------------------------------------------------------------- /specs/PyDetex_Win_Single.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | import sys; sys.path.insert(0, '.') 4 | import specs 5 | if specs.is_osx: exit() 6 | 7 | # Create the analysis 8 | a = specs.get_analysis(Analysis, TOC) 9 | pyz = specs.get_pyz(PYZ, a) 10 | exe = specs.get_exe(EXE, pyz, a, True) 11 | 12 | # Save to zip 13 | specs.save_zip('PyDetex.exe', 'PyDetex.Win64') -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # Top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines 7 | [*] 8 | charset = utf-8 9 | end_of_line = lf 10 | insert_final_newline = false 11 | 12 | # Configure languages 13 | [*.py] 14 | indent_size = 4 15 | indent_style = space 16 | 17 | [{*.json, *.yml}] 18 | indent_size = 2 19 | indent_style = space -------------------------------------------------------------------------------- /docs/_source/contributors.rst: -------------------------------------------------------------------------------- 1 | 2 | ============ 3 | Contributors 4 | ============ 5 | 6 | Core developers: 7 | 8 | - `Pablo Pizarro R. `_ 9 | 10 | Other contributors: 11 | 12 | - `Xiong-Hui Chen `_ 13 | 14 | Ideas and contributions are always welcome. Any found bugs or enhancement 15 | suggestions should be posted on the `GitHub project page `_. 16 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | exclude *.bat 2 | exclude *.yml 3 | exclude .github/ISSUE_TEMPLATE/* 4 | exclude .gitignore 5 | exclude .replit 6 | exclude docs/* 7 | exclude docs/_source/* 8 | exclude docs/_static/* 9 | exclude specs/*.py 10 | exclude test/*.py 11 | 12 | include requirements.txt 13 | 14 | recursive-include pydetex/res **.gif 15 | recursive-include pydetex/res **.ico 16 | recursive-include pydetex/res **.json 17 | recursive-include pydetex/res **.tex 18 | recursive-include pydetex/res **.txt -------------------------------------------------------------------------------- /pydetex/res/u_textfrak.txt: -------------------------------------------------------------------------------- 1 | A 𝔄 2 | B 𝔅 3 | C ℭ 4 | D 𝔇 5 | E 𝔈 6 | F 𝔉 7 | G 𝔊 8 | H ℌ 9 | I ℑ 10 | J 𝔍 11 | K 𝔎 12 | L 𝔏 13 | M 𝔐 14 | N 𝔑 15 | O 𝔒 16 | P 𝔓 17 | Q 𝔔 18 | R ℜ 19 | S 𝔖 20 | T 𝔗 21 | U 𝔘 22 | V 𝔙 23 | W 𝔚 24 | X 𝔛 25 | Y 𝔜 26 | Z ℨ 27 | a 𝔞 28 | b 𝔟 29 | c 𝔠 30 | d 𝔡 31 | e 𝔢 32 | f 𝔣 33 | g 𝔤 34 | h 𝔥 35 | i 𝔦 36 | j 𝔧 37 | k 𝔨 38 | l 𝔩 39 | m 𝔪 40 | n 𝔫 41 | o 𝔬 42 | p 𝔭 43 | q 𝔮 44 | r 𝔯 45 | s 𝔰 46 | t 𝔱 47 | u 𝔲 48 | v 𝔳 49 | w 𝔴 50 | x 𝔵 51 | y 𝔶 52 | z 𝔷 53 | -------------------------------------------------------------------------------- /pydetex/res/u_textcal.txt: -------------------------------------------------------------------------------- 1 | A 𝓐 2 | B 𝓑 3 | C 𝓒 4 | D 𝓓 5 | E 𝓔 6 | F 𝓕 7 | G 𝓖 8 | H 𝓗 9 | I 𝓘 10 | J 𝓙 11 | K 𝓚 12 | L 𝓛 13 | M 𝓜 14 | N 𝓝 15 | O 𝓞 16 | P 𝓟 17 | Q 𝓠 18 | R 𝓡 19 | S 𝓢 20 | T 𝓣 21 | U 𝓤 22 | V 𝓥 23 | W 𝓦 24 | X 𝓧 25 | Y 𝓨 26 | Z 𝓩 27 | a 𝓪 28 | b 𝓫 29 | c 𝓬 30 | d 𝓭 31 | e 𝓮 32 | f 𝓯 33 | g 𝓰 34 | h 𝓱 35 | i 𝓲 36 | j 𝓳 37 | k 𝓴 38 | l 𝓵 39 | m 𝓶 40 | n 𝓷 41 | o 𝓸 42 | p 𝓹 43 | q 𝓺 44 | r 𝓻 45 | s 𝓼 46 | t 𝓽 47 | u 𝓾 48 | v 𝓿 49 | w 𝔀 50 | x 𝔁 51 | y 𝔂 52 | z 𝔃 53 | -------------------------------------------------------------------------------- /test/data/example_simple_figure_caption.txt: -------------------------------------------------------------------------------- 1 | Write or paste here your \texttt{LaTeX} code. It simply removes all tex-things and returns a friendly plain text! 2 | The following is a excellent figure: 3 | 4 | \begin{figure} 5 | \centering 6 | \reflectbox{% 7 | \includegraphics[width=0.5\textwidth]{gull}} 8 | \caption{A picture of the same gull 9 | looking the other way!} 10 | \end{figure} 11 | 12 | well $nothing has happened really$ epic $\alpha$ 13 | -------------------------------------------------------------------------------- /specs/PyDetex_macOS.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | import os 4 | import sys; sys.path.insert(0, '.') 5 | import specs 6 | if specs.is_win: exit() 7 | 8 | try: 9 | os.system(f'rm -rf dist/PyDetex_macOS.app') 10 | os.system(f'rm dist/PyDetex_macOS') 11 | except: 12 | pass 13 | 14 | # Create the analysis 15 | a = specs.get_analysis(Analysis, TOC) 16 | pyz = specs.get_pyz(PYZ, a) 17 | exe = specs.get_exe(EXE, pyz, a, True) 18 | app = specs.get_bundle(BUNDLE, exe) 19 | 20 | # Save to zip 21 | specs.save_zip('PyDetex_macOS.app', 'PyDetex.macOS') -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Configure os 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: '3.8' 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # Optionally set the version of Python and requirements required to build your docs 19 | python: 20 | install: 21 | - method: pip 22 | path: . 23 | extra_requirements: 24 | - docs -------------------------------------------------------------------------------- /pydetex/res/u_textbb.txt: -------------------------------------------------------------------------------- 1 | A 𝔸 2 | B 𝔹 3 | C ℂ 4 | D 𝔻 5 | E 𝔼 6 | F 𝔽 7 | G 𝔾 8 | H ℍ 9 | I 𝕀 10 | J 𝕁 11 | K 𝕂 12 | L 𝕃 13 | M 𝕄 14 | N ℕ 15 | O 𝕆 16 | P ℙ 17 | Q ℚ 18 | R ℝ 19 | S 𝕊 20 | T 𝕋 21 | U 𝕌 22 | V 𝕍 23 | W 𝕎 24 | X 𝕏 25 | Y 𝕐 26 | Z ℤ 27 | a 𝕒 28 | b 𝕓 29 | c 𝕔 30 | d 𝕕 31 | e 𝕖 32 | f 𝕗 33 | g 𝕘 34 | h 𝕙 35 | i 𝕚 36 | j 𝕛 37 | k 𝕜 38 | l 𝕝 39 | m 𝕞 40 | n 𝕟 41 | o 𝕠 42 | p 𝕡 43 | q 𝕢 44 | r 𝕣 45 | s 𝕤 46 | t 𝕥 47 | u 𝕦 48 | v 𝕧 49 | w 𝕨 50 | x 𝕩 51 | y 𝕪 52 | z 𝕫 53 | 0 𝟘 54 | 1 𝟙 55 | 2 𝟚 56 | 3 𝟛 57 | 4 𝟜 58 | 5 𝟝 59 | 6 𝟞 60 | 7 𝟟 61 | 8 𝟠 62 | 9 𝟡 63 | -------------------------------------------------------------------------------- /pydetex/res/u_textmono.txt: -------------------------------------------------------------------------------- 1 | A 𝙰 2 | B 𝙱 3 | C 𝙲 4 | D 𝙳 5 | E 𝙴 6 | F 𝙵 7 | G 𝙶 8 | H 𝙷 9 | I 𝙸 10 | J 𝙹 11 | K 𝙺 12 | L 𝙻 13 | M 𝙼 14 | N 𝙽 15 | O 𝙾 16 | P 𝙿 17 | Q 𝚀 18 | R 𝚁 19 | S 𝚂 20 | T 𝚃 21 | U 𝚄 22 | V 𝚅 23 | W 𝚆 24 | X 𝚇 25 | Y 𝚈 26 | Z 𝚉 27 | a 𝚊 28 | b 𝚋 29 | c 𝚌 30 | d 𝚍 31 | e 𝚎 32 | f 𝚏 33 | g 𝚐 34 | h 𝚑 35 | i 𝚒 36 | j 𝚓 37 | k 𝚔 38 | l 𝚕 39 | m 𝚖 40 | n 𝚗 41 | o 𝚘 42 | p 𝚙 43 | q 𝚚 44 | r 𝚛 45 | s 𝚜 46 | t 𝚝 47 | u 𝚞 48 | v 𝚟 49 | w 𝚠 50 | x 𝚡 51 | y 𝚢 52 | z 𝚣 53 | 0 𝟶 54 | 1 𝟷 55 | 2 𝟸 56 | 3 𝟹 57 | 4 𝟺 58 | 5 𝟻 59 | 6 𝟼 60 | 7 𝟽 61 | 8 𝟾 62 | 9 𝟿 63 | -------------------------------------------------------------------------------- /pydetex/res/placeholder_en.tex: -------------------------------------------------------------------------------- 1 | Write or paste here your \texttt{LaTeX} code. It simply removes all tex-things and returns a friendly plain text! % And removes commands too! 2 | 3 | PyDetex can process equation, lists, cites, references, and many more: 4 | 5 | \begin{equation} 6 | a + \frac{c}{d} \longrightarrow k^n 7 | \end{equation} 8 | 9 | Or \myCustomCommand[\label{cmd1}]{can process \textbf{lists}}: 10 | 11 | \begin{itemize}[font=\bfseries] 12 | \item As shown in Figure \ref{fignumber} 13 | \item Proposed by $\alpha$-Feltes \cite{feltes2008} because $x^n + y^n = z^n \forall n \in 0 \ldots \infty$ 14 | \end{itemize} -------------------------------------------------------------------------------- /pydetex/res/u_superscripts.txt: -------------------------------------------------------------------------------- 1 | 0 ⁰ 2 | 1 ¹ 3 | 2 ² 4 | 3 ³ 5 | 4 ⁴ 6 | 5 ⁵ 7 | 6 ⁶ 8 | 7 ⁷ 9 | 8 ⁸ 10 | 9 ⁹ 11 | + ⁺ 12 | - ⁻ 13 | = ⁼ 14 | ( ⁽ 15 | ) ⁾ 16 | a ᵃ 17 | b ᵇ 18 | c ᶜ 19 | d ᵈ 20 | e ᵉ 21 | f ᶠ 22 | g ᵍ 23 | h ʰ 24 | i ⁱ 25 | j ʲ 26 | k ᵏ 27 | l ˡ 28 | m ᵐ 29 | n ⁿ 30 | o ᵒ 31 | p ᵖ 32 | r ʳ 33 | s ˢ 34 | t ᵗ 35 | u ᵘ 36 | v ᵛ 37 | w ʷ 38 | x ˣ 39 | y ʸ 40 | z ᶻ 41 | A ᴬ 42 | B ᴮ 43 | D ᴰ 44 | E ᴱ 45 | G ᴳ 46 | H ᴴ 47 | I ᴵ 48 | J ᴶ 49 | K ᴷ 50 | L ᴸ 51 | M ᴹ 52 | N ᴺ 53 | O ᴼ 54 | P ᴾ 55 | R ᴿ 56 | T ᵀ 57 | U ᵁ 58 | V ⱽ 59 | W ᵂ 60 | α ᵅ 61 | β ᵝ 62 | γ ᵞ 63 | δ ᵟ 64 | ∊ ᵋ 65 | θ ᶿ 66 | ι ᶥ 67 | Φ ᶲ 68 | φ ᵠ 69 | χ ᵡ 70 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: ppizarror 4 | patreon: # Patreon 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: ppizarror 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /pydetex/res/placeholder_es.tex: -------------------------------------------------------------------------------- 1 | Escribe o pega aquí tu código \texttt{LaTeX}. El programa removerá lo relacionado a latex y retornará un amigable texto plano. % Y además elimina los comentarios! 2 | 3 | PyDetex puede procesar ecuaciones, listas, citas, referencias, y mucho más: 4 | 5 | \begin{equation} 6 | a + \frac{c}{d} \longrightarrow k^n 7 | \end{equation} 8 | 9 | Ó \myCustomCommand[\label{cmd1}]{puede procesar \textbf{listas}}: 10 | 11 | \begin{itemize}[font=\bfseries] 12 | \item Como es ilustrado en la Figura \ref{fignumber} 13 | \item Propuesto por $\alpha$-Feltes \cite{feltes2008} dado que $x^n + y^n = z^n \forall n \in 0 \ldots \infty$ 14 | \end{itemize} -------------------------------------------------------------------------------- /test/data/example_placeholder.txt: -------------------------------------------------------------------------------- 1 | Write or paste here your \texttt{LaTeX} code. It simply removes all tex-things and returns a friendly plain text! % And removes commands too! 2 | 3 | PyDetex can process equation, lists, cites, references, and many more: 4 | 5 | \begin{equation} 6 | a + \\frac{c}{d} \longrightarrow k^n 7 | \end{equation} 8 | 9 | Or \myCustomCommand[\label{cmd1}]{can process \textbf{lists}}: 10 | 11 | \begin{itemize}[font=\bfseries] 12 | \item As shown in Figure \ref{fignumber} 13 | \item Proposed by $\alpha$-Feltes \cite{feltes2008} because $x^n + y^n = z^n \forall n \in 0 \ldots \infty$ 14 | \end{itemize} -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /pydetex/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | PYDETEX 6 | An application that transforms LaTeX codes to plain text. 7 | """ 8 | 9 | import pydetex.version 10 | 11 | __author__ = 'Pablo Pizarro R.' 12 | __copyright__ = 'Copyright 2021 Pablo Pizarro R. @ppizarror' 13 | __description__ = 'An application that transforms LaTeX code to plain text' 14 | __email__ = 'pablo@ppizarror.com' 15 | __keywords__ = 'latex detex parser gui' 16 | __license__ = 'MIT' 17 | __module_name__ = 'pydetex' 18 | __url__ = 'https://pydetex.readthedocs.io' 19 | __url_bug_tracker__ = 'https://github.com/ppizarror/PyDetex/issues' 20 | __url_documentation__ = 'https://pydetex.readthedocs.io' 21 | __url_source_code__ = 'https://github.com/ppizarror/PyDetex' 22 | __version__ = pydetex.version.ver 23 | -------------------------------------------------------------------------------- /test/data/example_simple_itemize_output.txt: -------------------------------------------------------------------------------- 1 | - The academic databases 2 | Web of Science, Scopus, IEEE/IET Xplore, Science Direct, ACM Digital Library, ASCE Library, ProQuest, and Springer were used for article search and selection. Also, online tools Semantic Scholar and Connected Papers were employed to retrieve similar articles powered by AI and visual graphs. 3 | - Keywords such as "floor plan analysis", "floor plan recognition and interpretation", "floor plan segmentation", "floor plan image", "apartment structure", "wall segmentation", "architectural plan vectorization", "room and wall retrieval", "apartment graph", "object detection in floor plans", and "parsing floor plan images" were used to search the databases. The search date period ranged from 1995 to 4 | fecha. For each article, its cross-references and similar works were also considered for revision. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Environment information** 11 | Describe your environment information, such as: 12 | 13 | - SO: win/linux 14 | - python version: v3.x 15 | - pydetex version: v0.x.x 16 | 17 | **Describe the bug** 18 | A clear and concise description of what the bug is. 19 | 20 | **To Reproduce** 21 | Please provide a **minimal** reproducible example that developers can run to investigate the problem. 22 | You can find help for creating such an example [here](https://stackoverflow.com/help/minimal-reproducible-example). 23 | 24 | **Expected behavior** 25 | A clear and concise description of what you expected to happen. 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /pydetex/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | VERSION 6 | Defines version. 7 | """ 8 | 9 | __all__ = ['Version', 'vernum', 'ver', 'rev'] 10 | 11 | 12 | class Version(tuple): 13 | """ 14 | Version class. 15 | """ 16 | 17 | __slots__ = () 18 | fields = 'major', 'minor', 'patch' 19 | 20 | def __new__(cls, major, minor, patch) -> tuple: 21 | return tuple.__new__(cls, (major, minor, patch)) 22 | 23 | def __repr__(self) -> str: 24 | fields = (f'{fld}={val}' for fld, val in zip(self.fields, self)) 25 | return f'{self.__class__.__name__}({", ".join(fields)})' 26 | 27 | def __str__(self) -> str: 28 | return '{}.{}.{}'.format(*self) 29 | 30 | major = property(lambda self: self[0]) 31 | minor = property(lambda self: self[1]) 32 | patch = property(lambda self: self[2]) 33 | 34 | 35 | vernum = Version(1, 1, 1) 36 | ver = str(vernum) 37 | rev = '' 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Pablo Pizarro R. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pydetex/res/u_textit.txt: -------------------------------------------------------------------------------- 1 | A 𝐴 2 | B 𝐵 3 | C 𝐶 4 | D 𝐷 5 | E 𝐸 6 | F 𝐹 7 | G 𝐺 8 | H 𝐻 9 | I 𝐼 10 | J 𝐽 11 | K 𝐾 12 | L 𝐿 13 | M 𝑀 14 | N 𝑁 15 | O 𝑂 16 | P 𝑃 17 | Q 𝑄 18 | R 𝑅 19 | S 𝑆 20 | T 𝑇 21 | U 𝑈 22 | V 𝑉 23 | W 𝑊 24 | X 𝑋 25 | Y 𝑌 26 | Z 𝑍 27 | a 𝑎 28 | b 𝑏 29 | c 𝑐 30 | d 𝑑 31 | e 𝑒 32 | f 𝑓 33 | g 𝑔 34 | h ℎ 35 | i 𝑖 36 | j 𝑗 37 | k 𝑘 38 | l 𝑙 39 | m 𝑚 40 | n 𝑛 41 | o 𝑜 42 | p 𝑝 43 | q 𝑞 44 | r 𝑟 45 | s 𝑠 46 | t 𝑡 47 | u 𝑢 48 | v 𝑣 49 | w 𝑤 50 | x 𝑥 51 | y 𝑦 52 | z 𝑧 53 | Α 𝛢 54 | Β 𝛣 55 | Γ 𝛤 56 | Δ 𝛥 57 | Ε 𝛦 58 | Ζ 𝛧 59 | Η 𝛨 60 | Θ 𝛩 61 | Ι 𝛪 62 | Κ 𝛫 63 | Λ 𝛬 64 | Μ 𝛭 65 | Ν 𝛮 66 | Ξ 𝛯 67 | Ο 𝛰 68 | Π 𝛱 69 | Ρ 𝛲 70 | ϴ 𝛳 71 | Σ 𝛴 72 | Τ 𝛵 73 | Υ 𝛶 74 | Φ 𝛷 75 | Χ 𝛸 76 | Ψ 𝛹 77 | Ω 𝛺 78 | ∇ 𝛻 79 | α 𝛼 80 | β 𝛽 81 | γ 𝛾 82 | δ 𝛿 83 | ε 𝜀 84 | ζ 𝜁 85 | η 𝜂 86 | θ 𝜃 87 | ι 𝜄 88 | κ 𝜅 89 | λ 𝜆 90 | μ 𝜇 91 | ν 𝜈 92 | ξ 𝜉 93 | ο 𝜊 94 | π 𝜋 95 | ρ 𝜌 96 | ς 𝜍 97 | σ 𝜎 98 | τ 𝜏 99 | υ 𝜐 100 | φ 𝜑 101 | χ 𝜒 102 | ψ 𝜓 103 | ω 𝜔 104 | ∂ 𝜕 105 | ϵ 𝜖 106 | ϑ 𝜗 107 | ϰ 𝜘 108 | ϕ 𝜙 109 | ϱ 𝜚 110 | ϖ 𝜛 111 | -------------------------------------------------------------------------------- /pydetex/res/u_textbf.txt: -------------------------------------------------------------------------------- 1 | A 𝐀 2 | B 𝐁 3 | C 𝐂 4 | D 𝐃 5 | E 𝐄 6 | F 𝐅 7 | G 𝐆 8 | H 𝐇 9 | I 𝐈 10 | J 𝐉 11 | K 𝐊 12 | L 𝐋 13 | M 𝐌 14 | N 𝐍 15 | O 𝐎 16 | P 𝐏 17 | Q 𝐐 18 | R 𝐑 19 | S 𝐒 20 | T 𝐓 21 | U 𝐔 22 | V 𝐕 23 | W 𝐖 24 | X 𝐗 25 | Y 𝐘 26 | Z 𝐙 27 | a 𝐚 28 | b 𝐛 29 | c 𝐜 30 | d 𝐝 31 | e 𝐞 32 | f 𝐟 33 | g 𝐠 34 | h 𝐡 35 | i 𝐢 36 | j 𝐣 37 | k 𝐤 38 | l 𝐥 39 | m 𝐦 40 | n 𝐧 41 | o 𝐨 42 | p 𝐩 43 | q 𝐪 44 | r 𝐫 45 | s 𝐬 46 | t 𝐭 47 | u 𝐮 48 | v 𝐯 49 | w 𝐰 50 | x 𝐱 51 | y 𝐲 52 | z 𝐳 53 | Α 𝚨 54 | Β 𝚩 55 | Γ 𝚪 56 | Δ 𝚫 57 | Ε 𝚬 58 | Ζ 𝚭 59 | Η 𝚮 60 | Θ 𝚯 61 | Ι 𝚰 62 | Κ 𝚱 63 | Λ 𝚲 64 | Μ 𝚳 65 | Ν 𝚴 66 | Ξ 𝚵 67 | Ο 𝚶 68 | Π 𝚷 69 | Ρ 𝚸 70 | ϴ 𝚹 71 | Σ 𝚺 72 | Τ 𝚻 73 | Υ 𝚼 74 | Φ 𝚽 75 | Χ 𝚾 76 | Ψ 𝚿 77 | Ω 𝛀 78 | ∇ 𝛁 79 | α 𝛂 80 | β 𝛃 81 | γ 𝛄 82 | δ 𝛅 83 | ε 𝛆 84 | ζ 𝛇 85 | η 𝛈 86 | θ 𝛉 87 | ι 𝛊 88 | κ 𝛋 89 | λ 𝛌 90 | μ 𝛍 91 | ν 𝛎 92 | ξ 𝛏 93 | ο 𝛐 94 | π 𝛑 95 | ρ 𝛒 96 | ς 𝛓 97 | σ 𝛔 98 | τ 𝛕 99 | υ 𝛖 100 | φ 𝛗 101 | χ 𝛘 102 | ψ 𝛙 103 | ω 𝛚 104 | ∂ 𝛛 105 | ϵ 𝛜 106 | ϑ 𝛝 107 | ϰ 𝛞 108 | ϕ 𝛟 109 | ϱ 𝛠 110 | ϖ 𝛡 111 | 0 𝟎 112 | 1 𝟏 113 | 2 𝟐 114 | 3 𝟑 115 | 4 𝟒 116 | 5 𝟓 117 | 6 𝟔 118 | 7 𝟕 119 | 8 𝟖 120 | 9 𝟗 121 | -------------------------------------------------------------------------------- /test/data/example_simple_cite_output.txt: -------------------------------------------------------------------------------- 1 | Yamasaki et al. [1] also presented a fully convolutional end-to-end FCN network to label pixels in apartment floor plans by performing a general semantic segmentation, ignoring spatial relations between elements and room boundary. The classified pixels from 12 classes formed a graph to model the structure and measure the structural similarity for apartment retrieval. 2 | 3 | FIGURE_CAPTION: A U-Net model which segments the walls from a rasterized floor plan image. Layer legend: (yellow) convolutional block, (orange) max-pool, (blue) up-sampling, and (purple) softmax. 4 | 5 | A U-Net approach was introduced by Yang η [2], alongside the pixel deconvolutional layers PixelDCL [3] to avoid checkerboard artifacts while segmenting walls and doors. 6 | 7 | Discriminator architectures [4]. 8 | 9 | FIGURE_CAPTION: Pix2Pix model, which translates the rasterized floor plan image style into a segmented format. 10 | 11 | Concerning the recognition and generation of floor plans, Huang and Zheng [5] introduced an application of Pix2PixHD [6] to detect rooms from 8 classes, which were colorized to generate a new image. In this example, the conditional GANs lead to translate the raster plan to a segmented style using annotated pairs, classifying each pixel while also preserving the underlying structure of the image. Pix2Pix was also adopted by Kim et al. [7, 8] to transform plans into -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | :orphan: 3 | 4 | .. This page is orphan because its content concerns the internal working of the 5 | .. library. However it is necessary in order to be able to quote its items in the 6 | .. documentation. 7 | 8 | .. include:: ../README.rst 9 | 10 | 11 | === 12 | API 13 | === 14 | 15 | Although PyDetex is intended to be used through its GUI, the module contains several 16 | practical methods to detect LaTex commands, environments, equations, among others. 17 | The GUI only uses the pipelines to transform the tex code to plain text. On the 18 | other hand, the pipelines use parsers. 19 | 20 | You can check for the parsers, pipelines, and util’s methods to create your 21 | pipelines in the left menu! 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | :hidden: 26 | :caption: API 27 | 28 | _source/parsers 29 | _source/pipelines 30 | _source/utils 31 | 32 | 33 | ================= 34 | About PyDetex 35 | ================= 36 | 37 | This project does not have a mailing list and so the issues tab should be the first 38 | point of contact if wishing to discuss the project. If you have questions that you 39 | do not feel are relevant to the issues tab or just want to let me know what you 40 | think about the software, feel free to email me at pablo@ppizarror.com 41 | 42 | .. toctree:: 43 | :maxdepth: 2 44 | :hidden: 45 | :caption: About PyDetex 46 | 47 | _source/license 48 | _source/contributors 49 | 50 | 51 | ================== 52 | Indices and tables 53 | ================== 54 | 55 | * :ref:`genindex` 56 | * :ref:`modindex` 57 | * :ref:`search` 58 | -------------------------------------------------------------------------------- /pydetex/_fonts.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | FONTS 6 | Configures font styles. 7 | """ 8 | 9 | __all__ = [ 10 | 'FONT_TAGS', 11 | 'FONT_PROPERTIES', 12 | 'TAGS_FONT' 13 | ] 14 | 15 | from typing import Dict, Optional, Union 16 | 17 | # Define properties 18 | bg = 'background' 19 | bold = 'bold' 20 | fg = 'foreground' 21 | italic = 'italic' 22 | name = 'name' 23 | overstrike = 'overstrike' 24 | roman = 'roman' 25 | slant = 'slant' 26 | spacing3 = 'spacing3' 27 | underline = 'underline' 28 | weight = 'weight' 29 | size = 'size' 30 | 31 | # Configure fonts 32 | FONT_PROPERTIES: Dict[str, Optional[Dict[str, Union[str, int]]]] = { 33 | 'bold': {weight: bold}, 34 | 'bold_italic': {weight: bold, slant: italic}, 35 | 'bullet': None, 36 | 'equation_char': {weight: bold, fg: '#19b70a'}, 37 | 'equation_inside': {slant: italic, fg: '#ffa450'}, 38 | 'error': {fg: '#ff6b68'}, 39 | 'h1': {size: 2, weight: bold, spacing3: 1}, 40 | 'highlight': {bg: '#fff200'}, 41 | 'italic': {slant: italic}, 42 | 'link': {weight: bold, fg: '#ff02a6'}, 43 | 'normal': {}, 44 | 'repeated_tag': {slant: italic, fg: '#ff002b'}, 45 | 'repeated_word': {weight: bold}, 46 | 'strike': {overstrike: True}, 47 | 'tex_argument': {fg: '#999999'}, 48 | 'tex_command': {fg: '#09accb'}, 49 | 'underlined': {underline: True, spacing3: 1} 50 | } 51 | 52 | # Configure the tags 53 | FONT_TAGS: Dict[str, str] = {} 54 | TAGS_FONT: Dict[str, str] = {} 55 | for k in FONT_PROPERTIES.keys(): 56 | FONT_TAGS[k] = f'⇱PYDETEX_FONT:{k.upper()}⇲' 57 | TAGS_FONT[FONT_TAGS[k]] = k 58 | -------------------------------------------------------------------------------- /test/data/example_simple_itemize.txt: -------------------------------------------------------------------------------- 1 | \begin{itemize} 2 | \item The academic databases 3 | Web of Science, % http://uchile.idm.oclc.org/login?url=http://webofknowledge.com/ 4 | Scopus, % http://uchile.idm.oclc.org/login?url=http://www.scopus.com/ 5 | IEEE/IET Xplore, % http://uchile.idm.oclc.org/login?url=http://ieeexplore.ieee.org/ 6 | Science Direct, % http://uchile.idm.oclc.org/login?url=http://www.sciencedirect.com/ 7 | ACM Digital Library, % http://uchile.idm.oclc.org/login?url=https://dl.acm.org/dl.cfm 8 | ASCE Library, % http://uchile.idm.oclc.org/login?url=http://ascelibrary.org 9 | ProQuest, % https://uchile.idm.oclc.org/login?url=http://search.proquest.com/computing?accountid=14621 10 | and Springer % http://uchile.idm.oclc.org/login?url=https://link.springer.com 11 | were used for article search and selection. Also, online tools Semantic Scholar and Connected Papers were employed to retrieve similar articles powered by AI and visual graphs. 12 | 13 | \item Keywords such as \doublequotes{floor plan analysis}, \doublequotes{floor plan recognition and interpretation}, \doublequotes{floor plan segmentation}, \doublequotes{floor plan image}, \doublequotes{apartment structure}, \doublequotes{wall segmentation}, \doublequotes{architectural plan vectorization}, \doublequotes{room and wall retrieval}, \doublequotes{apartment graph}, \doublequotes{object detection in floor plans}, and \doublequotes{parsing floor plan images} were used to search the databases. The search date period ranged from 1995 to \\fecha. For each article, its cross-references and similar works were also considered for revision. 14 | \end{itemize} -------------------------------------------------------------------------------- /test/data/example_simple_comments_output.txt: -------------------------------------------------------------------------------- 1 | Park and Kwon [1] recognized the main walls of apartments using the auxiliary dimension line, where windows can be retrieved as a subproduct. Feltes et al.'s [2] work is capable of finding the object's corners in wall-line drawing images by filtering out unnecessary points without changing the overall structure, especially those that appeared through over-segmentation on diagonal lines; also, a wall-gap filling is possible while performing a heuristic criterion. Tang et al. [3] automatically generated vector drawings by applying various filters, such as gradient, length, gap-filling, line-merging, and connectivity under several millimeter sizes, assuming walls are represented by parallel lines in both vertical and horizontal axis. Pan et al. [4] detected walls and windows considering empirical rules regarding their pixel layouts, where the user must adjust its thresholds. The bearing wall corresponded to black areas, non-bearing walls to parallel, unfilled rectangles, and windows are composed of three to four closer parallel lines. De [5] also assumed that only walls are illustrated as thick black lines in a floor plan layout. Thus, thick and thin lines can be distinguished using a morphological transformation; thick lines can be considered walls, whereas arc lines represent doors. On the other hand, in an effort to overcome the lack of a standard notation, de las Heras et al. [6] presented an unsupervised wall segmentation using the assumption of them being a repetitive element, rectangular, placed in orthogonal directions, filled with the same pattern and naturally distributed across the plan. Although assumptions might work over a set, they do not consider semantical relationships or work for multiple plan styles. -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | concurrency: 4 | cancel-in-progress: true 5 | group: ${{ github.repository }}-${{ github.workflow }}-${{ github.ref }} 6 | 7 | on: 8 | push: 9 | branches: 10 | - master 11 | pull_request: 12 | branches: 13 | - master 14 | 15 | jobs: 16 | test: 17 | uses: ppizarror/workflow-actions/.github/workflows/test_python.yml@master 18 | strategy: 19 | matrix: 20 | python: [ '3.10', '3.11', '3.12' ] 21 | with: 22 | env-vars: GITHUB=true 23 | install-extras: test 24 | os: ubuntu-latest 25 | python-version: ${{ matrix.python }} 26 | 27 | build: 28 | needs: test 29 | runs-on: ${{ matrix.os }} 30 | strategy: 31 | matrix: 32 | os: 33 | - macos-latest 34 | - windows-latest 35 | steps: 36 | - name: Checkout 37 | uses: actions/checkout@v4 38 | 39 | - name: Set up Python 40 | uses: actions/setup-python@v5 41 | with: 42 | cache: pip 43 | python-version: '3.12' 44 | 45 | - name: Install dependencies 46 | shell: bash 47 | run: | 48 | python -m pip install -e .[installer] 49 | echo "PYDETEX_VERSION=$(python -c "import pydetex; print(pydetex.__version__)")" >> $GITHUB_ENV 50 | 51 | - name: Build 52 | run: python build.py pyinstaller 53 | 54 | - name: Upload release 55 | uses: actions/upload-artifact@v4 56 | with: 57 | name: PyDetex-${{ matrix.os }}-v${{ env.PYDETEX_VERSION }} 58 | path: dist/out_zip 59 | 60 | codeql: 61 | uses: ppizarror/workflow-actions/.github/workflows/codeql.yml@master 62 | with: 63 | language: python 64 | 65 | delete-artifacts: 66 | needs: build 67 | uses: ppizarror/workflow-actions/.github/workflows/delete_artifacts.yml@master -------------------------------------------------------------------------------- /test/data/example_simple_cite.txt: -------------------------------------------------------------------------------- 1 | Yamasaki et al. \cite{Yamasaki2018} also presented a fully convolutional end-to-end FCN network to label pixels in apartment floor plans by performing a general semantic segmentation, ignoring spatial relations between elements and room boundary. The classified pixels from 12 classes formed a graph to model the structure and measure the structural similarity for apartment retrieval. % \\% 1 2 | 3 | \insertimage[\label{unetmodel}]{unet_compressed}{width=\linewidth}{A U-Net model which segments the walls from a rasterized floor plan image. Layer legend: \textit{(yellow)} convolutional block, \textit{(orange)} max-pool, \textit{(blue)} up-sampling, and \textit{(purple)} softmax.}% The encoder, comprised of several de-convolutions, captures the context and finer grain structures. Conversely, the decoder reconstruct the output segmented image, combining spatial information from the encoder.} 4 | 5 | % U-NET 6 | A U-Net approach was introduced by Yang \eta \etal \cite{Yang2018}, alongside the pixel deconvolutional layers PixelDCL \cite{Gao2017} to avoid checkerboard artifacts while segmenting walls and doors. 7 | 8 | Discriminator architectures \cite{Dong2021}. 9 | 10 | 11 | \insertimage[\label{pix2pix2model}]{pix2pix_compressed}{width=\linewidth}{Pix2Pix model, which translates the rasterized floor plan image style into a segmented format.} 12 | 13 | Concerning the recognition and generation of floor plans, Huang and Zheng \cite{Huang2018} introduced an application of Pix2PixHD \cite{Wang2018} to detect rooms from 8 classes, which were colorized to generate a new image. In this example, the conditional GANs lead to translate the raster plan to a segmented style using annotated pairs, classifying each pixel while also preserving the underlying structure of the image. Pix2Pix was also adopted by Kim et al. \cite{Kim2021, Kim2018} to transform plans into -------------------------------------------------------------------------------- /test/data/example_simple_comments.txt: -------------------------------------------------------------------------------- 1 | % Aqui tirar metodos genericos que sean de poco impacto 2 | % element recogniztion based on their line representation has been widely studied concerning rule-based approachesSeveral other studies have also considered a line representation and retrieval to recognize several structural elements from floor plans. 3 | Park and Kwon \cite{Park2003} recognized the main walls of apartments using the auxiliary dimension line, where windows can be retrieved as a subproduct. Feltes et al.'s \cite{Feltes2014} work is capable of finding the object's corners in wall-line drawing images by filtering out unnecessary points without changing the overall structure, especially those that appeared through over-segmentation on diagonal lines; also, a wall-gap filling is possible while performing a heuristic criterion. Tang et al. \cite{Tang2017} automatically generated vector drawings by applying various filters, such as gradient, length, gap-filling, line-merging, and connectivity under several millimeter sizes, assuming walls are represented by parallel lines in both vertical and horizontal axis. Pan et al. \cite{GuanghuiPan2017} detected walls and windows considering empirical rules regarding their pixel layouts, where the user must adjust its thresholds. The bearing wall corresponded to black areas, non-bearing walls to parallel, unfilled rectangles, and windows are composed of three to four closer parallel lines. De \cite{De2019} also assumed that only walls are illustrated as thick black lines in a floor plan layout. Thus, thick and thin lines can be distinguished using a morphological transformation; thick lines can be considered walls, whereas arc lines represent doors. On the other hand, in an effort to overcome the lack of a standard notation, de las Heras et al. \cite{DelasHeras2013a} presented an unsupervised wall segmentation using the assumption of them being a repetitive element, rectangular, placed in orthogonal directions, filled with the same pattern and naturally distributed across the plan. Although assumptions might work over a set, they do not consider semantical relationships or work for multiple plan styles. -------------------------------------------------------------------------------- /test/data/example_tables_strict_output.txt: -------------------------------------------------------------------------------- 1 | Datasets 2 | 3 | Datasets have played an important role within floor plan analysis as there is not a standard notation for its composition; therefore, designed models must incorporate specific rules for each particular style, facing high variability due to: (1) the visual representation of the building, wherein best cases only 70% of the graphical information is compliant with some standard rules [1], (2) the nature of the information contained, and (3) the way of the information is visually represented [2]. Moreover, each floor plan dataset has limitations regarding quantity or complexity. Thus, researchers opt to utilize the datasets suitable for their purposes, including specific processing steps that could not be generalized to other formats [3]. 4 | 5 | For such datasets to be useful in floor plan analysis, there must be pixel-wise annotations for objects such as walls, openings, and rooms. However, there are few public datasets because it is difficult for floor plans to be invariably labeled due to ambiguity in notation and the need for high-level expertise for object recognition [2, 4]. Even though several practical tools have been developed to annotate them conveniently [5-7], it is difficult to do so because there is no way to guarantee the same annotations from different experts, especially for complicated plans [3]. 6 | 7 | CAPTION: Datasets used by floor plan analysis research. 8 | 9 | Note: Res – Resolution in pixels (px). 10 | - [a] (all links visited on 10/01/2021) 11 | - [b] LINK: b 12 | - [c] 13 | 14 | CAPTION: Floor plan image examples from datasets. 15 | 16 | The existing datasets were summarized in Table 1, considering its source article, availability, annotation, and quantity, ordered by release year. Figure 2 illustrates a selection of images from the datasets considered within the review. It can be noted that there are distinct drawing styles and semantics among the apartment and house plans; some have colored floors, text, icons, dimension lines, furniture, and walls with several styles, angles, and complex arrangements. These diverse settings were exploited by rule-based methods, described in section 3, which recognize walls, doors, windows, furniture, and rooms by defining algorithms that considered different approaches specific to each style; or by learning-based ones (section 4), that trained models to automatically recognize the objects. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | SETUP DISTRIBUTION 6 | Create setup for PyPI. 7 | """ 8 | 9 | from setuptools import setup, find_packages 10 | import pydetex 11 | 12 | # Load readme 13 | with open('README.rst', encoding='utf-8') as f: 14 | long_description: str = f.read() 15 | long_description = long_description.split('Install Instructions')[0].strip() 16 | 17 | # Load requirements 18 | with open('requirements.txt', encoding='utf-8') as f: 19 | requirements = [] 20 | for line in f: 21 | requirements.append(line.strip()) 22 | 23 | # Setup library 24 | setup( 25 | name=pydetex.__module_name__, 26 | version=pydetex.__version__, 27 | author=pydetex.__author__, 28 | author_email=pydetex.__email__, 29 | description=pydetex.__description__, 30 | long_description=long_description, 31 | url=pydetex.__url__, 32 | project_urls={ 33 | 'Bug Tracker': pydetex.__url_bug_tracker__, 34 | 'Documentation': pydetex.__url_documentation__, 35 | 'Source Code': pydetex.__url_source_code__ 36 | }, 37 | license=pydetex.__license__, 38 | platforms=['any'], 39 | keywords=pydetex.__keywords__, 40 | classifiers=[ 41 | 'License :: OSI Approved :: MIT License', 42 | 'Natural Language :: English', 43 | 'Operating System :: OS Independent', 44 | 'Programming Language :: Python :: 3.7', 45 | 'Programming Language :: Python :: 3.8', 46 | 'Programming Language :: Python :: 3.9', 47 | 'Programming Language :: Python :: 3.10', 48 | 'Programming Language :: Python :: 3.11', 49 | 'Programming Language :: Python :: 3.12', 50 | 'Programming Language :: Python', 51 | 'Topic :: Multimedia', 52 | 'Topic :: Text Processing' 53 | ], 54 | include_package_data=True, 55 | packages=find_packages(exclude=['test']), 56 | python_requires='>=3.7, <4', 57 | install_requires=requirements, 58 | entry_points={ 59 | 'console_scripts': [ 60 | 'pydetex = pydetex.gui:main', 61 | ], 62 | 'gui_scripts': [ 63 | 'pydetex = pydetex.gui:main', 64 | ] 65 | }, 66 | extras_require={ 67 | 'docs': ['sphinx<7', 'sphinx-autodoc-typehints>=1.2.0', 'sphinx-rtd-theme'], 68 | 'installer': ['pyinstaller==6.7.0'], 69 | 'test': ['nose2[coverage_plugin]', 'pytest'] 70 | }, 71 | setup_requires=[ 72 | 'setuptools', 73 | ], 74 | options={ 75 | 'bdist_wheel': {'universal': False} 76 | } 77 | ) 78 | -------------------------------------------------------------------------------- /test/data/example_complex_envs.txt: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture} 2 | \draw[gray, thick] (-1,2) -- (2,-4); 3 | \draw[gray, thick] (-1,-1) -- (2,2); 4 | \filldraw[black] (0,0) circle (2pt) node[anchor=west]{Intersection point}; 5 | \end{tikzpicture} 6 | 7 | \begin{references} 8 | \bibitem a+b 9 | \end{references} 10 | 11 | \begin{verbatim*} 12 | Text enclosed inside \texttt{verbatim} environment 13 | is printed directly 14 | and all \LaTeX{} commands are ignored. 15 | \end{verbatim*} 16 | 17 | \begin{lstlisting} 18 | import numpy as np 19 | 20 | def incmatrix(genl1,genl2): 21 | m = len(genl1) 22 | n = len(genl2) 23 | M = None #to become the incidence matrix 24 | VT = np.zeros((n*m,1), int) #dummy variable 25 | 26 | #compute the bitwise xor matrix 27 | M1 = bitxormatrix(genl1) 28 | M2 = np.triu(bitxormatrix(genl2),1) 29 | 30 | for i in range(m-1): 31 | for j in range(i+1, m): 32 | [r,c] = np.where(M2 == M1[i,j]) 33 | for k in range(len(r)): 34 | VT[(i)*n + r[k]] = 1; 35 | VT[(i)*n + c[k]] = 1; 36 | VT[(j)*n + r[k]] = 1; 37 | VT[(j)*n + c[k]] = 1; 38 | 39 | if M is None: 40 | M = np.copy(VT) 41 | else: 42 | M = np.concatenate((M, VT), 1) 43 | 44 | VT = np.zeros((n*m,1), int) 45 | 46 | return M 47 | \end{lstlisting} 48 | 49 | \begin{minted}{python} 50 | import numpy as np 51 | 52 | def incmatrix(genl1,genl2): 53 | m = len(genl1) 54 | n = len(genl2) 55 | M = None #to become the incidence matrix 56 | VT = np.zeros((n*m,1), int) #dummy variable 57 | 58 | #compute the bitwise xor matrix 59 | M1 = bitxormatrix(genl1) 60 | M2 = np.triu(bitxormatrix(genl2),1) 61 | 62 | for i in range(m-1): 63 | for j in range(i+1, m): 64 | [r,c] = np.where(M2 == M1[i,j]) 65 | for k in range(len(r)): 66 | VT[(i)*n + r[k]] = 1; 67 | VT[(i)*n + c[k]] = 1; 68 | VT[(j)*n + r[k]] = 1; 69 | VT[(j)*n + c[k]] = 1; 70 | 71 | if M is None: 72 | M = np.copy(VT) 73 | else: 74 | M = np.concatenate((M, VT), 1) 75 | 76 | VT = np.zeros((n*m,1), int) 77 | 78 | return M 79 | \end{minted} 80 | 81 | \begin{sourcecode}[\label{algorithm}]{pseudocodecolor}{Wall assign algorithm.} 82 | function aggregation($R$, $N$, $\varepsilon$): 83 | $G \leftarrow \emptyset$ # New collection of wall groups 84 | for $r$ in $R$ do: 85 | for $g$ in $G$: 86 | if distance($g$, $r$) $\ge \varepsilon$: 87 | $g$ $\oplus \ \{r\}$ 88 | break 89 | end if 90 | end for 91 | end for 92 | for $i$=1 to $N$: # Merge groups 93 | $\bar{G} \leftarrow \emptyset$ 94 | for $g$ in $G$ do: 95 | $merged \leftarrow $ false 96 | for $w$ in $\bar{G}$: 97 | if distance($w$, $g$) $\ge \varepsilon$: 98 | $w$ $\oplus \ \{g\}$ 99 | $merged \leftarrow $ true 100 | end if 101 | end for 102 | if not $merged$ and $\lVert g \rVert \neq \emptyset$: 103 | $\bar{G}$ $\oplus \ \{g\}$ 104 | end if 105 | end for 106 | $G \leftarrow \bar{G}$ # Update group 107 | end for 108 | return $G$ 109 | \end{sourcecode} -------------------------------------------------------------------------------- /test/test_gui.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | TEST GUI 6 | Test guis. 7 | """ 8 | 9 | from test._base import BaseTest 10 | 11 | from pydetex.gui import PyDetexGUI 12 | # noinspection PyProtectedMember 13 | from pydetex._gui_settings import Settings, _SETTINGS_FILE, _SETTINGS_TEST 14 | # noinspection PyProtectedMember 15 | from pydetex._gui_utils import SettingsWindow 16 | import pydetex.pipelines as pip 17 | 18 | import os 19 | 20 | # Configure settings to default 21 | _SETTINGS_FILE[0] = _SETTINGS_TEST 22 | 23 | 24 | class GuiTest(BaseTest): 25 | 26 | def test_gui(self) -> None: 27 | """ 28 | Gui test. 29 | """ 30 | if 'GITHUB' in os.environ: 31 | return 32 | gui = PyDetexGUI() 33 | cfg = gui._cfg 34 | cfg.set(cfg.CFG_CHECK_REPETITION, False) 35 | cfg.set(cfg.CFG_OUTPUT_FONT_FORMAT, False) 36 | gui._clear() 37 | self.assertEqual(gui.pipeline, pip.strict) 38 | self.assertFalse(gui._ready) 39 | 40 | # Process the pipeline 41 | gui._text_in.insert(0.0, 'This is \\textbf{Latex}') 42 | gui._process_inner() 43 | self.assertEqual(gui._get_pipeline_results(), 'This is Latex') 44 | self.assertTrue(gui._ready) 45 | 46 | # Check clear 47 | gui._clear() 48 | self.assertFalse(gui._ready) 49 | 50 | # Check clip 51 | gui._process_clip() 52 | gui._copy_to_clip() 53 | 54 | # Test gui settings 55 | gui_settings = SettingsWindow((360, 320), cfg) 56 | gui_settings.close() 57 | 58 | gui._open_dictionary() 59 | 60 | def test_settings(self) -> None: 61 | """ 62 | Test the app settings. 63 | """ 64 | cfg = Settings(ignore_file=True) 65 | self.assertEqual(cfg.get(cfg.CFG_PIPELINE), pip.strict) 66 | self.assertFalse(cfg.get(cfg.CFG_CHECK_REPETITION)) 67 | cfg.save() 68 | 69 | # Test invalid 70 | self.assertFalse(cfg.check_setting('UNKNOWN', '')) 71 | self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, 3.5)) 72 | self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, -1)) 73 | self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, '-1')) 74 | self.assertTrue(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, '1')) 75 | self.assertFalse(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, '1f')) 76 | self.assertTrue(cfg.check_setting(cfg.CFG_REPETITION_MIN_CHAR, 1)) 77 | 78 | # Test day diff 79 | self.assertEqual(cfg._last_opened_day_diff, 0) 80 | 81 | # Test font size 82 | self.assertFalse(cfg.check_setting(cfg.CFG_FONT_SIZE, 55)) 83 | self.assertTrue(cfg.check_setting(cfg.CFG_FONT_SIZE, 11)) 84 | 85 | self.assertFalse(cfg.check_setting(cfg.CFG_PIPELINE, '')) 86 | 87 | # Get 88 | self.assertEqual(cfg.get(cfg.CFG_REPETITION_MIN_CHAR), 4) 89 | cfg.set(cfg.CFG_REPETITION_MIN_CHAR, 2) 90 | self.assertEqual(cfg.get(cfg.CFG_REPETITION_MIN_CHAR), 2) 91 | cfg.set(cfg.CFG_REPETITION_MIN_CHAR, '3') 92 | self.assertEqual(cfg.get(cfg.CFG_REPETITION_MIN_CHAR), 3) 93 | 94 | # Test without ignore 95 | Settings() 96 | 97 | # Test language entries 98 | cfg.set(cfg.CFG_LANG, 'en') 99 | self.assertEqual(cfg.lang('lang'), 'English') 100 | cfg.set(cfg.CFG_LANG, 'es') 101 | self.assertEqual(cfg.lang('lang'), 'Español') 102 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | CONF 6 | Configuration file for the Sphinx documentation builder. 7 | 8 | This file only contains a selection of the most common options. For a full 9 | list see the documentation: 10 | https://www.sphinx-doc.org/en/master/usage/configuration.html 11 | """ 12 | 13 | # -- Path setup --------------------------------------------------------------- 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here 18 | # 19 | import os 20 | import sys 21 | 22 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 23 | 24 | import pydetex 25 | 26 | # -- Project information ------------------------------------------------------ 27 | 28 | project = pydetex.__module_name__ 29 | # noinspection PyShadowingBuiltins 30 | copyright = pydetex.__copyright__ 31 | author = pydetex.__author__ 32 | 33 | # The full version, including alpha/beta/rc tags 34 | release = pydetex.__version__ 35 | 36 | # -- General configuration ---------------------------------------------------- 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones 41 | extensions = ['sphinx.ext.autodoc', 42 | 'sphinx.ext.viewcode', 43 | 'sphinx.ext.intersphinx', 44 | 'sphinx.ext.autosectionlabel', 45 | 'sphinx_autodoc_typehints' 46 | ] 47 | 48 | # autodoc_default_options = { 49 | # 'private-members': False 50 | # } 51 | 52 | # Add any paths that contain templates here, relative to this directory 53 | templates_path = ['_templates'] 54 | 55 | # The document name of the "master" document, that is, the document that 56 | # contains the root toc-tree directive. Default is 'index' 57 | master_doc = 'index' 58 | 59 | # List of patterns, relative to source directory, that match files and 60 | # directories to ignore when looking for source files 61 | # This pattern also affects html_static_path and html_extra_path 62 | exclude_patterns = ['build', 'Thumbs.db', '.DS_Store'] 63 | 64 | # -- Intersphinx configuration ------------------------------------------------ 65 | 66 | intersphinx_mapping = { 67 | 'python': ('https://docs.python.org/3.9', None) 68 | } 69 | 70 | # -- Options for HTML output -------------------------------------------------- 71 | 72 | # The theme to use for HTML and HTML Help pages. See the documentation for 73 | # a list of builtin themes 74 | html_theme = 'sphinx_rtd_theme' 75 | 76 | # Add any paths that contain custom static files (such as style sheets) here, 77 | # relative to this directory. They are copied after the builtin static files, 78 | # so a file named "default.css" will overwrite the builtin "default.css" 79 | html_static_path = ['_static'] 80 | 81 | html_title = f'{project} {release} Documentation' 82 | 83 | html_logo = '../pydetex/res/icon.png' 84 | 85 | html_theme_options = { 86 | 'prev_next_buttons_location': None 87 | } 88 | 89 | # -- Options for LaTeX output ------------------------------------------------- 90 | 91 | # noinspection SpellCheckingInspection 92 | latex_elements = { 93 | 'papersize': 'a4paper', 94 | 'pointsize': '10pt', 95 | 'preamble': r'\def\thempfootnote{\arabic{mpfootnote}}' # workaround sphinx issue #2530 96 | } 97 | 98 | latex_documents = [ 99 | ( 100 | 'index', # source start file 101 | f'{project}.tex', # target filename 102 | f'{project} Documentation', # title 103 | author, # author 104 | 'manual', # documentclass 105 | True, # documents ref'd from toc-tree only 106 | ), 107 | ] 108 | 109 | latex_show_pagerefs = True 110 | 111 | # -- Options for autodoc - typehints --- 112 | 113 | set_type_checking_flag = True 114 | -------------------------------------------------------------------------------- /pydetex/pipelines.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | PIPELINES 6 | Defines the pipelines which apply parsers. 7 | """ 8 | 9 | __all__ = [ 10 | 'simple', 11 | 'strict', 12 | 'strict_eqn', 13 | 'PipelineType' 14 | ] 15 | 16 | import pydetex.parsers as par 17 | from pydetex.utils import ProgressBar 18 | from typing import Callable 19 | 20 | PipelineType = Callable 21 | 22 | 23 | def simple( 24 | s: str, 25 | lang: str = 'en', 26 | show_progress: bool = False, 27 | replace_pydetex_tags: bool = True, 28 | remove_common_tags: bool = True, 29 | replace_single_chars_eqn: bool = True, 30 | **kwargs 31 | ) -> str: 32 | """ 33 | The most simple pipeline ever. 34 | 35 | :param s: String latex 36 | :param lang: Language tag of the code 37 | :param show_progress: Show progress bar 38 | :param replace_pydetex_tags: Replace pydetex tags like symbols, cites 39 | :param remove_common_tags: Call ``remove_common_tags`` parser 40 | :param replace_single_chars_eqn: Replaces all single char equations 41 | :return: String with no latex! 42 | """ 43 | if len(s) == 0: 44 | return s 45 | steps = 17 46 | if not replace_pydetex_tags: 47 | steps -= 1 48 | if not replace_single_chars_eqn: 49 | steps -= 1 50 | pb = kwargs.get('progressbar', ProgressBar(steps)) if show_progress else None 51 | s = '\n'.join(s.splitlines()) # Removes \r\n 52 | s = par.process_inputs(s, pb=pb) 53 | s = par.remove_comments(s, pb=pb) 54 | s = par.process_begin_document(s, pb=pb) 55 | s = par.simple_replace(s, pb=pb) 56 | s = par.process_def(s, pb=pb, replace=kwargs.get('replace_defs', False)) 57 | if remove_common_tags: 58 | s = par.remove_common_tags(s, pb=pb) 59 | s = par.process_cite(s, pb=pb, compress_cite=kwargs.get('compress_cite', True)) 60 | s = par.process_citeauthor(s, lang, pb=pb) 61 | s = par.process_ref(s, pb=pb) 62 | s = par.process_labels(s, pb=pb) 63 | s = par.process_items(s, lang, pb=pb) 64 | if replace_single_chars_eqn: 65 | s = par.process_chars_equations(s, lang, single_only=True, pb=pb) 66 | s = par.unicode_chars_equations(s, pb=pb) 67 | s = par.remove_comments(s, pb=pb) # comments, replace tags, strip 68 | if replace_pydetex_tags: 69 | s = par.replace_pydetex_tags(s, pb=pb, **kwargs) 70 | s = par.strip_punctuation(s, pb=pb) 71 | s = par.simple_replace(s, pb=pb) 72 | if s[-1] == '\\': 73 | s = s[0:len(s) - 1] 74 | return s 75 | 76 | 77 | def strict( 78 | s: str, 79 | lang: str = 'en', 80 | show_progress: bool = False, 81 | eqn_simple: bool = True, 82 | **kwargs 83 | ) -> str: 84 | """ 85 | Apply simple + removes all commands. 86 | 87 | :param s: String latex 88 | :param lang: Language tag of the code 89 | :param show_progress: Show progress bar 90 | :param eqn_simple: If true, replace equations with a label, else, attempt to write it as-is 91 | :return: String with no latex! 92 | """ 93 | pb = ProgressBar(steps=24) if show_progress else None 94 | if 'progressbar' not in kwargs.keys(): 95 | # noinspection PyTypeChecker 96 | kwargs['progressbar'] = pb 97 | s = simple(s, lang, replace_pydetex_tags=False, remove_common_tags=False, 98 | show_progress=show_progress, replace_single_chars_eqn=False, **kwargs) # 15 steps 99 | s = par.process_chars_equations(s, lang, single_only=not eqn_simple, pb=pb) 100 | s = par.remove_equations(s, pb=pb) 101 | s = par.remove_environments(s, pb=pb) 102 | s = par.remove_commands_param(s, lang, pb=pb) 103 | s = par.remove_commands_param_noargv(s, pb=pb) 104 | s = par.remove_comments(s, pb=pb) 105 | s = par.replace_pydetex_tags(s, pb=pb, **kwargs) 106 | s = par.strip_punctuation(s, pb=pb) 107 | s = par.simple_replace(s, pb=pb) 108 | return s 109 | 110 | 111 | def strict_eqn( 112 | s: str, 113 | lang: str = 'en', 114 | show_progress: bool = False, 115 | **kwargs 116 | ) -> str: 117 | """ 118 | Same as strict, but replaces the equations with their string representation. 119 | 120 | :param s: String latex 121 | :param lang: Language tag of the code 122 | :param show_progress: Show progress bar 123 | :return: String with no latex! 124 | """ 125 | return strict(s, lang, show_progress, eqn_simple=False, **kwargs) 126 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | ======= 3 | PyDetex 4 | ======= 5 | 6 | .. image:: https://img.shields.io/badge/author-Pablo%20Pizarro%20R.-lightgray.svg 7 | :target: https://ppizarror.com 8 | :alt: @ppizarror 9 | 10 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg 11 | :target: https://opensource.org/licenses/MIT 12 | :alt: License MIT 13 | 14 | .. image:: https://img.shields.io/badge/python-3.7+-red.svg 15 | :target: https://www.python.org/downloads 16 | :alt: Python 3.7+ 17 | 18 | .. image:: https://badge.fury.io/py/pydetex.svg 19 | :target: https://pypi.org/project/pydetex 20 | :alt: PyPi package 21 | 22 | .. image:: https://img.shields.io/github/actions/workflow/status/ppizarror/PyDetex/ci.yml?branch=master 23 | :target: https://github.com/ppizarror/PyDetex/actions/workflows/ci.yml 24 | :alt: Build status 25 | 26 | .. image:: https://app.fossa.com/api/projects/git%2Bgithub.com%2Fppizarror%2FPyDetex.svg?type=shield 27 | :target: https://app.fossa.com/projects/git%2Bgithub.com%2Fppizarror%2FPyDetex?ref=badge_shield 28 | :alt: FOSSA Status 29 | 30 | .. image:: https://readthedocs.org/projects/pydetex/badge/?version=latest 31 | :target: https://pydetex.readthedocs.io 32 | :alt: Documentation Status 33 | 34 | .. image:: https://codecov.io/gh/ppizarror/PyDetex/branch/master/graph/badge.svg 35 | :target: https://codecov.io/gh/ppizarror/PyDetex 36 | :alt: Codecov 37 | 38 | .. image:: https://img.shields.io/github/issues/ppizarror/PyDetex 39 | :target: https://github.com/ppizarror/PyDetex/issues 40 | :alt: Open issues 41 | 42 | .. image:: https://img.shields.io/pypi/dm/pydetex?color=purple 43 | :target: https://pypi.org/project/pydetex 44 | :alt: PyPi downloads 45 | 46 | .. image:: https://static.pepy.tech/personalized-badge/pydetex?period=total&units=international_system&left_color=grey&right_color=lightgrey&left_text=total%20downloads 47 | :target: https://pepy.tech/project/pydetex 48 | :alt: Total downloads 49 | 50 | .. image:: https://img.shields.io/badge/buy%20me%20a-Ko--fi-02b9fe 51 | :target: https://ko-fi.com/ppizarror 52 | :alt: Buy me a Ko-fi 53 | 54 | Source repo on `GitHub `_, 55 | and run it on `Repl.it `_ 56 | 57 | Introduction 58 | ------------ 59 | 60 | PyDetex is a Python application that transforms LaTeX code into plain text. It has multiple 61 | language support (15+), detects repeated words, offers a dictionary (synonyms, antonyms, 62 | definitions), and many more things to come! 63 | 64 | Comprehensive documentation for the latest version (if you plan to use the API) 65 | is available at https://pydetex.readthedocs.io 66 | 67 | Install Instructions 68 | -------------------- 69 | 70 | PyDetex can be installed via pip for MacOS, Windows & Linux. Simply run: 71 | 72 | .. code-block:: bash 73 | 74 | $> python3 pip install pydetex -U 75 | 76 | Also, compiled binaries for Windows (x64) and macOS are available through GitHub releases. 77 | 78 | Launch the GUI or use the library 79 | --------------------------------- 80 | 81 | You can just run this command anywhere to execute the application. 82 | 83 | .. code-block:: bash 84 | 85 | $> python3 -m pydetex.gui 86 | 87 | .. figure:: https://raw.githubusercontent.com/ppizarror/pydetex/master/docs/_static/example_simple.png 88 | :scale: 40% 89 | :align: center 90 | 91 | **(Simple Pipeline)** Tadada... !!! A simple GUI to process your LaTex and paste it into Google Docs, an email, or Grammarly \(^o^)/ 92 | 93 | .. figure:: https://raw.githubusercontent.com/ppizarror/pydetex/master/docs/_static/example_strict.png 94 | :scale: 40% 95 | :align: center 96 | 97 | **(Strict Pipeline)** The strict pipeline removes all commands or replaces them with some known tags. 98 | 99 | 100 | .. figure:: https://raw.githubusercontent.com/ppizarror/pydetex/master/docs/_static/pydetex_windows.png 101 | :scale: 40% 102 | :align: center 103 | 104 | Multiple options to configure: Check repeated words, highlight undetected code, or use different pipelines. 105 | 106 | You can also import the library and use the parsers (methods that take latex code 107 | and perform a single task) or the pipelines (a combination of parsers). For more 108 | information, please visit the `documentation `_. 109 | 110 | .. code-block:: python 111 | 112 | import pydetex.pipelines as pip 113 | text = "This is a \\textbf{LaTex} code..." 114 | out = pip.simple(text) 115 | 116 | TO-DOs 117 | ------ 118 | 119 | Currently, many things must be improved: 120 | 121 | - Add syntax checking for several languages, like `language-check `_. 122 | - Custom support for environments, such as *table*. 123 | 124 | Author 125 | ------ 126 | 127 | `Pablo Pizarro R. `_ | 2021 - 2025 128 | -------------------------------------------------------------------------------- /test/data/example_tables_strict.txt: -------------------------------------------------------------------------------- 1 | % !TeX spellcheck = en_US 2 | 3 | \subsection{Datasets} 4 | 5 | Datasets have played an important role within floor plan analysis as there is not a standard notation for its composition; therefore, designed models must incorporate specific rules for each particular style, facing high variability due to: (1) the visual representation of the building, wherein best cases only 70\% of the graphical information is compliant with some standard rules \cite{Ah-Soon1997}, (2) the nature of the information contained, and (3) the way of the information is visually represented \cite{DelasHeras2014}. Moreover, each floor plan dataset has limitations regarding quantity or complexity. Thus, researchers opt to utilize the datasets suitable for their purposes, including specific processing steps that could not be generalized to other formats \cite{Kim2021}. \\ 6 | 7 | For such datasets to be useful in floor plan analysis, there must be pixel-wise annotations for objects such as walls, openings, and rooms. However, there are few public datasets because it is difficult for floor plans to be invariably labeled due to ambiguity in notation and the need for high-level expertise for object recognition \cite{Mace2010, DelasHeras2014}. Even though several practical tools have been developed to annotate them conveniently \cite{Rendek2004, Russell2008, DelasHeras2015}, it is difficult to do so because there is no way to guarantee the same annotations from different experts, especially for complicated plans \cite{Kim2021}. \\ 8 | 9 | % revisar ---> 10 | % + ROBIN 11 | % + SESYD 12 | \begin{table*} 13 | \begin{threeparttable} 14 | \centering 15 | \caption{Datasets used by floor plan analysis research.} 16 | \itemresize{1}{ 17 | \begin{tabular}[t]{lcL{13.2cm}} 18 | \hline 19 | \textbf{Dataset (year)} & \textbf{Public} & \textbf{Annotation (quantity)} \\ 20 | \hline 21 | % antes usaba \checked \tnote{c} 22 | 23 | FPLAN-POLY \cite{Rusinol2010} (2010) & \cite{Rusinol2010a} & Walls, doors, windows, and furniture from 37 classes in vectorized format (42) \\ 24 | 25 | SESYD \cite{Delalandre2010} (2010) & \cite{Delalandre2010a} & Walls, doors, windows, and six furniture classes; 10 different synthetic apartment configurations, designed to study symbol recognition. Res 1837--6775 (1000) \\ % res 26 | 27 | \hline 28 | 29 | \end{tabular} 30 | } 31 | % \vspace{\baselineskip} 32 | \begin{tablenotes} 33 | Note: Res -- Resolution in pixels (px). 34 | \item[a] \url{http://dag.cvc.uab.es/resources/floorplans} (all links visited on 10/01/2021) 35 | \item[b] \href{a}{b} 36 | \item[c] \url{http://mathieu.delalandre.free.fr/projects/sesyd} 37 | % \item[d] \url{https://www.cs.toronto.edu/~fidler/projects/rent3D.html} 38 | \end{tablenotes} 39 | \label{tab:databases} 40 | \end{threeparttable} 41 | \end{table*} 42 | 43 | \def\heightfp {3.5cm} 44 | \begin{figure*}[t] 45 | \centering 46 | \caption{Floor plan image examples from datasets.} 47 | \itemresize{1}{ 48 | \begin{tabular}[t]{ccccc} 49 | \includegraphics[height=\heightfp]{datasets/fplanpoly.png} & 50 | \includegraphics[height=\heightfp]{datasets/sesyd.png} & 51 | \includegraphics[height=\heightfp]{datasets/cvcfp1} & 52 | \includegraphics[height=\heightfp]{datasets/r3d1} & 53 | \includegraphics[height=\heightfp]{datasets/sydneyhouse4} \\ 54 | 55 | \footnotesize {\textbf{FPLAN-POLY} \cite{Rusinol2010}} & 56 | \footnotesize {\textbf{SESYD} \cite{Delalandre2010}} & 57 | \footnotesize {\textbf{CVC-FP} \cite{DelasHeras2015}} & 58 | \footnotesize {\textbf{R3D -- Rent3D} \cite{ChenxiLiu2015}} & 59 | \footnotesize {\textbf{SydneyHouse} \cite{Chu2016}} \\ 60 | 61 | &&&& \\ 62 | 63 | \includegraphics[height=\heightfp]{datasets/rfp2} & 64 | \includegraphics[height=\heightfp]{datasets/robin} & 65 | \includegraphics[height=\heightfp]{datasets/r2v1} & 66 | \includegraphics[width=\heightfp,angle=90]{datasets/cubicasa5k1} & 67 | \includegraphics[width=\heightfp,angle=90]{datasets/rplan4.pdf} \\ 68 | 69 | \footnotesize {\textbf{R-FP -- Rakuten} \cite{Dodge2017}} & 70 | \footnotesize {\textbf{ROBIN} \cite{Sharma2017}} & 71 | \footnotesize {\textbf{R2V} \cite{Liu2017} / \textbf{LIFULL} \cite{NationalInstituteofInformaticsNII2021}} & 72 | \footnotesize {\textbf{CubiCasa5K} \cite{Kalervo2019}} & 73 | \footnotesize {\textbf{RPLAN} \cite{Wu2019}} \\ 74 | 75 | 76 | &&&& \\ 77 | 78 | \includegraphics[height=\heightfp]{datasets/bti.jpg} & 79 | \includegraphics[width=\heightfp,angle=90]{datasets/eais2_1} & 80 | \includegraphics[height=\heightfp]{datasets/zscvfp} & 81 | \includegraphics[height=\heightfp]{datasets/rfp.jpg} & 82 | \includegraphics[width=\heightfp,angle=90]{datasets/ruraldataset2.jpg} \\ 83 | 84 | \footnotesize {\textbf{BTI} \cite{Surikov2020}} & 85 | \footnotesize {\textbf{EAIS} \cite{Jang2020, MinistryofLandandTransport2021}} & 86 | \footnotesize {\textbf{ZSCVFP} \cite{Dong2021}} & 87 | \footnotesize {\textbf{RFP} \cite{Lv2021}} & 88 | \footnotesize {\textbf{RuralHomeData} \cite{Lu2021}} \\ 89 | 90 | \end{tabular} 91 | } 92 | \label{dataset:imgs} 93 | \end{figure*} 94 | 95 | The existing datasets were summarized in Table \ref{tab:databases}, considering its source article, availability, annotation, and quantity, ordered by release year. Figure \ref{dataset:imgs} illustrates a selection of images from the datasets considered within the review. It can be noted that there are distinct drawing styles and semantics among the apartment and house plans; some have colored floors, text, icons, dimension lines, furniture, and walls with several styles, angles, and complex arrangements. These diverse settings were exploited by rule-based methods, described in section \ref{rulebased}, which recognize walls, doors, windows, furniture, and rooms by defining algorithms that considered different approaches specific to each style; or by learning-based ones (section \ref{learningbased}), that trained models to automatically recognize the objects. -------------------------------------------------------------------------------- /pydetex/res/u_symbols.txt: -------------------------------------------------------------------------------- 1 | \texttrademark ™ 2 | \trademark ™ 3 | \textregistered ® 4 | \registered ® 5 | \copyright © 6 | \pilcrow ¶ 7 | \pound £ 8 | \euro € 9 | \cents ¢ 10 | \section § 11 | \space ␣ 12 | \degree ° 13 | \zeta ζ 14 | \Xi Ξ 15 | \xi ξ 16 | \wr ≀ 17 | \wp ℘ 18 | \wedge ∧ 19 | \land ∧ 20 | \Vvdash ⊪ 21 | \veebar ⊻ 22 | \vee ∨ 23 | \lor ∨ 24 | \vdots ⋮ 25 | \Vdash ⊩ 26 | \vDash ⊨ 27 | \vdash ⊢ 28 | \vartriangleright ⊳ 29 | \vartriangleleft ⊲ 30 | \vartriangle △ 31 | \vartheta ϑ 32 | \varsigma ς 33 | \varrho ϱ 34 | \varpropto ∝ 35 | \varpi ϖ 36 | \varphi φ 37 | \varnothing ∅ 38 | \varkappa ϰ 39 | \varepsilon ε 40 | \upuparrows ⇈ 41 | \Upsilon Υ 42 | \upsilon υ 43 | \uplus ⊎ 44 | \upharpoonright ↾ 45 | \upharpoonleft ↿ 46 | \Updownarrow ⇕ 47 | \updownarrow ↕ 48 | \Uparrow ⇑ 49 | \uparrow ↑ 50 | \unrhd ⊵ 51 | \unlhd ⊴ 52 | \twoheadrightarrow ↠ 53 | \twoheadleftarrow ↞ 54 | \trianglerighteq ⊵ 55 | \triangleright ▷ 56 | \triangleq ≜ 57 | \trianglelefteq ⊴ 58 | \triangleleft ◁ 59 | \triangledown ▽ 60 | \triangle △ 61 | \top ⊤ 62 | \times × 63 | \thicksim ∼ 64 | \thickapprox ≈ 65 | \Theta Θ 66 | \theta θ 67 | \therefore ∴ 68 | \tau τ 69 | \swarrow ↙ 70 | \surd √ 71 | \supseteq ⊇ 72 | \Supset ⋑ 73 | \supset ⊃ 74 | \sum ∑ 75 | \succsim ≿ 76 | \succeq ⪰ 77 | \succcurlyeq ≽ 78 | \succ ≻ 79 | \subseteq ⊆ 80 | \Subset ⋐ 81 | \subset ⊂ 82 | \star ⋆ 83 | \square □ 84 | \sqsupseteq ⊒ 85 | \sqsupset ⊐ 86 | \sqsubseteq ⊑ 87 | \sqsubset ⊏ 88 | \sqcup ⊔ 89 | \sqcap ⊓ 90 | \sphericalangle ∢ 91 | \spadesuit ♠ 92 | \smile ⌣ 93 | \smallsmile ⌣ 94 | \smallsetminus ∖ 95 | \smallfrown ⌢ 96 | \simeq ≃ 97 | \sim ∼ 98 | \Sigma Σ 99 | \sigma σ 100 | \shortparallel ∥ 101 | \sharp ♯ 102 | \setminus ∖ 103 | \searrow ↘ 104 | \rVert ‖ 105 | \rtimes ⋊ 106 | \Rsh ↱ 107 | \Rrightarrow ⇛ 108 | \risingdotseq ≓ 109 | \rightthreetimes ⋌ 110 | \rightsquigarrow ⇝ 111 | \rightrightarrows ⇉ 112 | \rightleftharpoons ⇌ 113 | \rightleftarrows ⇄ 114 | \rightharpoonup ⇀ 115 | \rightharpoondown ⇁ 116 | \rightarrowtail ↣ 117 | \Rightarrow ⇒ 118 | \rightarrow → 119 | \to → 120 | \rho ρ 121 | \rhd ⊳ 122 | \rfloor ⌋ 123 | \Re ℜ 124 | \rceil ⌉ 125 | \Psi Ψ 126 | \psi ψ 127 | \propto ∝ 128 | \prod ∏ 129 | \prime ′ 130 | \precsim ≾ 131 | \preceq ⪯ 132 | \preccurlyeq ≼ 133 | \prec ≺ 134 | \pm ± 135 | \Pi Π 136 | \pi π 137 | \pitchfork ⋔ 138 | \Phi Φ 139 | \phi ϕ 140 | \perp ⊥ 141 | \partial ∂ 142 | \parallel ∥ 143 | \otimes ⊗ 144 | \oslash ⊘ 145 | \oplus ⊕ 146 | \ominus ⊖ 147 | \Omega Ω 148 | \omega ω 149 | \oint ∮ 150 | \odot ⊙ 151 | \nwarrow ↖ 152 | \nu ν 153 | \notin ∉ 154 | \ni ∋ 155 | \nexists ∄ 156 | \neq ≠ 157 | \neg ¬ 158 | \nearrow ↗ 159 | \natural ♮ 160 | \nabla ∇ 161 | \mu μ 162 | \multimap ⊸ 163 | \mp ∓ 164 | \models ⊨ 165 | \mid ∣ 166 | \mho ℧ 167 | \measuredangle ∡ 168 | \mapsto ↦ 169 | \lVert ‖ 170 | \ltimes ⋉ 171 | \Lsh ↰ 172 | \lozenge ◊ 173 | \looparrowright ↬ 174 | \looparrowleft ↫ 175 | \Longrightarrow ⟹ 176 | \longrightarrow ⟶ 177 | \longmapsto ⟼ 178 | \Longleftrightarrow ⟺ 179 | \longleftrightarrow ⟷ 180 | \Longleftarrow ⟸ 181 | \longleftarrow ⟵ 182 | \lll ⋘ 183 | \Lleftarrow ⇚ 184 | \ll ≪ 185 | \lhd ⊲ 186 | \lfloor ⌊ 187 | \lesssim ≲ 188 | \lessgtr ≶ 189 | \lesseqgtr ⋚ 190 | \lessdot ⋖ 191 | \leqslant ⩽ 192 | \leqq ≦ 193 | \leq ≤ 194 | \leftthreetimes ⋋ 195 | \leftrightsquigarrow ↭ 196 | \leftrightharpoons ⇋ 197 | \leftrightarrows ⇆ 198 | \Leftrightarrow ⇔ 199 | \leftrightarrow ↔ 200 | \leftleftarrows ⇇ 201 | \leftharpoonup ↼ 202 | \leftharpoondown ↽ 203 | \leftarrowtail ↢ 204 | \Leftarrow ⇐ 205 | \leftarrow ← 206 | \leadsto ↝ 207 | \le ≤ 208 | \lceil ⌈ 209 | \Lambda Λ 210 | \lambda λ 211 | \kappa κ 212 | \Join ⋈ 213 | \iota ι 214 | \intercal ⊺ 215 | \int ∫ 216 | \infty ∞ 217 | \in ∈ 218 | \implies ⇒ 219 | \Im ℑ 220 | \hslash ℏ 221 | \hookrightarrow ↪ 222 | \hookleftarrow ↩ 223 | \heartsuit ♡ 224 | \hbar ℏ 225 | \gtrsim ≳ 226 | \gtrless ≷ 227 | \gtreqless ⋛ 228 | \gtrdot ⋗ 229 | \gimel ℷ 230 | \ggg ⋙ 231 | \gg ≫ 232 | \geqq ≧ 233 | \geq ≥ 234 | \ge ≥ 235 | \Gamma Γ 236 | \gamma γ 237 | \frown ⌢ 238 | \forall ∀ 239 | \flat ♭ 240 | \Finv Ⅎ 241 | \fallingdotseq ≒ 242 | \exists ∃ 243 | \eth ð 244 | \eta η 245 | \equiv ≡ 246 | \eqcirc ≖ 247 | \epsilon ∊ 248 | \emptyset ∅ 249 | \ell ℓ 250 | \downharpoonright ⇂ 251 | \downharpoonleft ⇃ 252 | \downdownarrows ⇊ 253 | \Downarrow ⇓ 254 | \downarrow ↓ 255 | \dots … 256 | \dotplus ∔ 257 | \doteqdot ≑ 258 | \doteq ≐ 259 | \divideontimes ⋇ 260 | \div ÷ 261 | \digamma Ϝ 262 | \diamondsuit ♢ 263 | \Diamond ◇ 264 | \diamond ⋄ 265 | \Delta Δ 266 | \delta δ 267 | \ddots ⋱ 268 | \ddagger ‡ 269 | \dashv ⊣ 270 | \dashrightarrow ⇢ 271 | \dashleftarrow ⇠ 272 | \daleth ℸ 273 | \dagger † 274 | \curvearrowright ↷ 275 | \curvearrowleft ↶ 276 | \curlywedge ⋏ 277 | \curlyvee ⋎ 278 | \curlyeqsucc ⋟ 279 | \curlyeqprec ⋞ 280 | \Cup ⋓ 281 | \cup ∪ 282 | \coprod ∐ 283 | \cong ≅ 284 | \complement ∁ 285 | \clubsuit ♣ 286 | \circledS Ⓢ 287 | \circleddash ⊝ 288 | \circledcirc ⊚ 289 | \circledast ⊛ 290 | \circlearrowright ↻ 291 | \circlearrowleft ↺ 292 | \circeq ≗ 293 | \circ ∘ 294 | \chi χ 295 | \centerdot ⋅ 296 | \cdots ⋯ 297 | \cdot ⋅ 298 | \Cap ⋒ 299 | \cap ∩ 300 | \Bumpeq ≎ 301 | \bumpeq ≏ 302 | \bullet ∙ 303 | \boxtimes ⊠ 304 | \boxplus ⊞ 305 | \boxminus ⊟ 306 | \boxdot ⊡ 307 | \Box □ 308 | \bowtie ⋈ 309 | \bot ⊥ 310 | \blacktriangleright ▶ 311 | \blacktriangleleft ◀ 312 | \blacktriangledown ▼ 313 | \blacktriangle ▲ 314 | \blacksquare ■ 315 | \blacklozenge ◆ 316 | \bigwedge ⋀ 317 | \bigvee ⋁ 318 | \biguplus ⨄ 319 | \bigtriangleup △ 320 | \bigtriangledown ▽ 321 | \bigstar ★ 322 | \bigsqcup ⨆ 323 | \bigotimes ⨂ 324 | \bigoplus ⨁ 325 | \bigodot ⨀ 326 | \bigcup ⋃ 327 | \bigcirc ○ 328 | \bigcap ⋂ 329 | \between ≬ 330 | \beth ℶ 331 | \beta β 332 | \because ∵ 333 | \barwedge ⊼ 334 | \doublebarwedge ⩞ 335 | \backsim ∽ 336 | \backprime ‵ 337 | \backepsilon ∍ 338 | \asymp ≍ 339 | \ast ∗ 340 | \approxeq ≊ 341 | \approx ≈ 342 | \angle ∠ 343 | \alpha α 344 | \aleph ℵ 345 | \rangle ⟩ 346 | \langle ⟨ 347 | \sqrt √ 348 | \frac12 ½ 349 | \frac13 ⅓ 350 | \frac23 ⅔ 351 | \frac14 ¼ 352 | \frac34 ¾ 353 | \frac15 ⅕ 354 | \frac25 ⅖ 355 | \frac35 ⅗ 356 | \frac45 ⅘ 357 | \frac16 ⅙ 358 | \frac56 ⅚ 359 | \frac17 ⅐ 360 | \frac18 ⅛ 361 | \frac38 ⅜ 362 | \frac58 ⅝ 363 | \frac78 ⅞ 364 | \frac19 ⅑ 365 | \frac110 ⅒ 366 | -------------------------------------------------------------------------------- /test/test_pipelines.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | TEST PIPELINES 6 | Test the pipelines. 7 | """ 8 | 9 | from test._base import BaseTest 10 | import pydetex.pipelines as pip 11 | import pydetex.parsers as par 12 | import os 13 | 14 | 15 | class ParserTest(BaseTest): 16 | 17 | def test_simple(self) -> None: 18 | """ 19 | Test simple pipeline. 20 | """ 21 | s = 'Table \\ref{tab:review-rulebased} details the reviewed rule-based ' \ 22 | 'methods within floor plan recognition, considering the datasets ' \ 23 | 'used (Table \\ref{tab:databases}) and the four categories of tasks,' \ 24 | ' such as (1) \\textit{Graphics separation}, (2) \\textit{Pattern ' \ 25 | 'recognition}, (3) \\textit{Vectorization}, and (4) \\textit{Structural modeling}.' 26 | self.assertEqual( 27 | pip.simple(s, show_progress=True), 28 | 'Table 1 details the reviewed rule-based methods within floor plan ' 29 | 'recognition, considering the datasets used (Table 2) and the four ' 30 | 'categories of tasks, such as (1) Graphics separation, (2) Pattern ' 31 | 'recognition, (3) Vectorization, and (4) Structural modeling.') 32 | 33 | s = 'aa\\begin{document}x\\end{document}' 34 | self.assertEqual(pip.simple(s, show_progress=True), 'x') 35 | 36 | s = '$a$a\\def\\a{a}\\a' 37 | self.assertEqual(pip.simple(s, show_progress=True, replace_defs=True), 'aaa') 38 | 39 | # New lines 40 | s = 'New space \\ and line \\\\Epic' 41 | self.assertEqual(pip.simple(s), 'New space and line\nEpic') 42 | 43 | # Empty 44 | self.assertEqual(pip.simple(''), '') 45 | 46 | # Test with invalid last char 47 | self.assertEqual(pip.simple('This is epic\\\nThis is epic\\'), 'This is epic\nThis is epic') 48 | 49 | # Test replacers 50 | s = 'This is a \\Thetamagic but\\xspace also \\Theta is not or \\Theta\\Epic or \\Theta\n sad' 51 | t = 'This is a \\Thetamagic but also Θ is not or Θ\Epic or Θ\nsad' 52 | self.assertEqual(pip.simple(s), t) 53 | 54 | # Check files 55 | example_files = [ 56 | ('data/example_simple_itemize.txt', 'data/example_simple_itemize_output.txt'), 57 | ('data/example_simple_comments.txt', 'data/example_simple_comments_output.txt') 58 | ] 59 | for f in example_files: 60 | self.assertEqual(pip.simple(par._load_file_search(f[0])), par._load_file_search(f[1])) 61 | 62 | def test_strict(self) -> None: 63 | """ 64 | Strict pipeline. 65 | """ 66 | s = 'This contains \\insertimageanother{\label{1}}{2}{3}commands, but must be removed!\\' 67 | self.assertEqual(pip.strict(s, show_progress=True), 68 | 'This contains commands, but must be removed!') 69 | 70 | s = 'This \$12bn is very \citeauthor{nice!} nice' 71 | self.assertEqual(pip.strict(s), 'This $12bn is very [author] nice') 72 | 73 | s = 'This \\quoteepic{code removed!}is removed\\totally. Not epic \\cite{nice}' 74 | self.assertEqual(pip.strict(s), 'This is removed. Not epic [1]') 75 | 76 | s = 'This \\quoteepic{code removed!}is removed \\totally nice. Not epic \\cite{nice}' 77 | self.assertEqual(pip.strict(s), 'This is removed nice. Not epic [1]') 78 | 79 | # Empty 80 | self.assertEqual(pip.strict('', show_progress=True), '') 81 | 82 | s = '\DeclareUnicodeCharacter{2292}{\ensuremath{\ensuremath{\\to}}}' 83 | self.assertEqual(pip.strict(s), '') 84 | 85 | s = """% !TeX spellcheck = en_US 86 | \\begin{table*}[t] 87 | 88 | \centering 89 | % \\vspace{\\baselineskip} 90 | \\begin{tablenotes} 91 | \item[a] Graphics separation 92 | \item[b] Door/Window/Furniture/Others 93 | \item[c] OCR or Dimensions were recognized 94 | \item[d] Vectorization 95 | \item[e] Modeling (Graph, other) 96 | \end{tablenotes} 97 | \label{tab:review-rulebased} 98 | \end{threeparttable} 99 | \end{table*} 100 | """ 101 | self.assertEqual( 102 | pip.strict(s, show_progress=True), 103 | '- [a] Graphics separation\n- [b] Door/Window/Furniture/Others\n- [c' 104 | '] OCR or Dimensions were recognized\n- [d] Vectorization\n- [e] Mod' 105 | 'eling (Graph, other)') 106 | 107 | # Check files 108 | example_files = [ 109 | ('data/example_tables_strict.txt', 'data/example_tables_strict_output.txt'), 110 | ('data/example_placeholder.txt', 'data/example_placeholder_output.txt'), 111 | ('data/example_simple_figure_caption.txt', 'data/example_simple_figure_caption_output.txt'), 112 | ('data/example_simple_cite.txt', 'data/example_simple_cite_output.txt') 113 | ] 114 | for f in example_files: 115 | self.assertEqual(pip.strict(par._load_file_search(f[0])), 116 | par._load_file_search(f[1])) 117 | 118 | # Test remove environments 119 | self.assertEqual(pip.strict(par._load_file_search('data/example_complex_envs.txt'), 120 | show_progress=True).strip(), 121 | par._load_file_search('data/example_complex_envs_output.txt')) 122 | 123 | # Exclusive tests 124 | example_files = [ 125 | ('data/example_complex_template.txt', 'data/example_complex_template_output.txt') 126 | ] 127 | if not (True and 'GITHUB' not in os.environ): # If not test complex 128 | example_files.clear() 129 | for f in example_files: 130 | self.assertEqual(pip.strict(par._load_file_search(f[0])), par._load_file_search(f[1])) 131 | 132 | def test_strict_eqn(self) -> None: 133 | """ 134 | Test strict eqn pipeline. 135 | """ 136 | self.assertEqual( 137 | pip.strict_eqn('My value is: $0.4375\ \\frac{\\text{tonf}}{{\\text{m}}^2}$. Nice!'), 138 | 'My value is: 0.4375 (tonf)/(m²). Nice!') 139 | -------------------------------------------------------------------------------- /specs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | SPECS 6 | Define spec constructor. 7 | """ 8 | 9 | __all__ = [ 10 | 'block_cipher', 11 | 'get_analysis', 12 | 'get_bundle', 13 | 'get_collect', 14 | 'get_exe', 15 | 'get_pyz', 16 | 'is_osx', 17 | 'is_win', 18 | 'save_zip' 19 | ] 20 | 21 | from pydetex import __file__ 22 | from pydetex.version import ver 23 | from zipfile import ZipFile, ZIP_DEFLATED 24 | import os 25 | import platform 26 | 27 | print('Inializing specs') 28 | print(f'Current path: {os.getcwd()}') 29 | print(f'Platform: {platform.system()}') 30 | 31 | sep = os.path.sep 32 | is_osx = platform.system() == 'Darwin' 33 | is_win = platform.system() == 'Windows' 34 | 35 | # Configure 36 | app_name = 'PyDetex' if not is_osx else 'PyDetex_macOS' 37 | app_icon = '../pydetex/res/icon.ico' if not is_osx else '../pydetex/res/icon.icns' 38 | block_cipher = None 39 | 40 | excluded_binaries = [ 41 | 'brotli._brotli', 42 | 'cryptography.hazmat.bindings._rust', 43 | 'libc++.1.dylib', 44 | 'libiconv.2.dylib', 45 | 'libicudata.68.dylib', 46 | 'libicuuc.68.dylib', 47 | 'libncurses.6.dylib', 48 | 'libomp.dylib', 49 | 'libreadline.8.dylib', 50 | 'libtinfo.6.dylib', 51 | 'libtinfow.6.dylib', 52 | 'libxml2.2.dylib', 53 | 'libzmq.5.dylib', 54 | 'yaml._yaml' 55 | ] 56 | excluded_binaries_contains = [ 57 | 'api-ms-win-', 58 | # 'lib-dynload', 59 | 'lxml', 60 | 'markupsafe', 61 | f'miktex{sep}bin', 62 | 'pandas', 63 | 'pygame', 64 | # 'sklearn', 65 | 'Windows Performance Toolkit', 66 | f'zmq{sep}backend{sep}cython' 67 | ] 68 | excluded_modules = [ 69 | 'IPython', 70 | 'matplotlib', 71 | 'notebook', 72 | 'numpy', 73 | 'PIL', 74 | 'PyQt5', 75 | 'scipy' 76 | ] 77 | 78 | 79 | def _append_to_datas(datas: list, file_path: str, target_folder: str, 80 | base_target_folder: str = 'pydetex', relative: bool = True) -> None: 81 | """ 82 | Add a path to datas. 83 | 84 | :param datas: Data list 85 | :param file_path: File path 86 | :param target_folder: Folder to paste the resources 87 | :param base_target_folder: Base folder of the resource 88 | :param relative: If True append pydetex_folder 89 | """ 90 | if relative: 91 | res_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_path) 92 | else: 93 | res_path = file_path 94 | if target_folder == '': 95 | target_folder = os.path.basename(os.path.dirname(res_path)) 96 | if os.path.exists(res_path): 97 | datas.append((res_path, os.path.join(base_target_folder, target_folder))) 98 | else: 99 | raise FileNotFoundError(f'{file_path} does not exist') 100 | 101 | 102 | def _file_sz(f: str) -> str: 103 | """ 104 | Computes the file size in KB. 105 | """ 106 | sz = round(os.path.getsize(f) / 1024, 1) 107 | return f'{sz} KB' 108 | 109 | 110 | def _path(p: str, sz: int = 60) -> str: 111 | """ 112 | Returns a parsed path. 113 | """ 114 | p = p.replace(sep, '/') 115 | if len(p) < sz: 116 | return p 117 | else: 118 | return '...' + p[len(p) - sz:len(p)] 119 | 120 | 121 | def get_analysis(analysis, toc): 122 | """ 123 | Return the ANALYSIS object. 124 | """ 125 | datas = [] 126 | for f in [ 127 | 'res/cog.ico', 128 | 'res/dictionary.ico', 129 | 'res/icon.gif', 130 | 'res/icon.ico', 131 | 'res/placeholder_en.tex', 132 | 'res/placeholder_es.tex', 133 | 'res/stopwords.json', 134 | 'res/u_subscripts.txt', 135 | 'res/u_superscripts.txt', 136 | 'res/u_symbols.txt', 137 | 'res/u_textbb.txt', 138 | 'res/u_textbf.txt', 139 | 'res/u_textcal.txt', 140 | 'res/u_textfrak.txt', 141 | 'res/u_textit.txt', 142 | 'res/u_textmono.txt' 143 | ]: 144 | _append_to_datas(datas, f, target_folder='') 145 | 146 | # Make object 147 | a = analysis( 148 | ['../gui.py'], 149 | binaries=[], 150 | cipher=block_cipher, 151 | datas=datas, 152 | excludes=excluded_modules, 153 | hiddenimports=['pydetex'], 154 | hooksconfig={}, 155 | hookspath=[], 156 | noarchive=False, 157 | pathex=['../'], 158 | runtime_hooks=[], 159 | win_no_prefer_redirects=False, 160 | win_private_assemblies=False 161 | ) 162 | 163 | # Update its propeties 164 | print('Updating binaries') 165 | new_binaries = [] 166 | for i in a.binaries: 167 | ex_contains = False 168 | for j in excluded_binaries_contains: 169 | if j in i[1]: 170 | ex_contains = True 171 | break 172 | if 'sklearn' in i[0] and i[0] != 'sklearn.__check_build._check_build': 173 | ex_contains = True 174 | if ex_contains or i[0] in excluded_binaries: 175 | print(f'\tRemoved:\t{_path(i[1])} ({_file_sz(i[1])}) <{i[0]}>') 176 | continue 177 | new_binaries.append(i) 178 | print('Program binaries') 179 | a.binaries = toc(new_binaries) 180 | for j in a.binaries: 181 | print(f'\t{j[0]}\n\t\t{_path(j[1])} ({_file_sz(j[1])} KB)') 182 | 183 | # Scripts 184 | print('Program scripts') 185 | for j in a.scripts: 186 | print(f'\t{j[0]}\t{_path(j[1])}') 187 | 188 | # Return the analysis 189 | return a 190 | 191 | 192 | def get_bundle(bundle, exe): 193 | """ 194 | Return a bundle for OSX. 195 | """ 196 | return bundle( 197 | exe, 198 | name=app_name + '.app', 199 | icon=app_icon, 200 | bundle_identifier='com.ppizarror', 201 | info_plist={ 202 | 'NSPrincipalClass': 'NSApplication', 203 | 'NSAppleScriptEnabled': False 204 | }, 205 | ) 206 | 207 | 208 | def get_collect(collect, a, exe): 209 | """ 210 | Return the COLLECT object. 211 | """ 212 | return collect( 213 | exe, 214 | a.binaries, 215 | a.zipfiles, 216 | a.datas, 217 | strip=False, 218 | name=app_name, 219 | upx_exclude=[], 220 | upx=True 221 | ) 222 | 223 | 224 | def get_exe(exe, pyz, a, single: bool): 225 | """ 226 | Return the EXE object. 227 | """ 228 | if single: 229 | return exe( 230 | pyz, 231 | a.scripts, 232 | a.binaries, 233 | a.zipfiles, 234 | a.datas, 235 | [], 236 | bootloader_ignore_signals=False, 237 | codesign_identity=None, 238 | console=False, 239 | debug=False, 240 | disable_windowed_traceback=False, 241 | entitlements_file=None, 242 | icon=app_icon, 243 | name=app_name, 244 | runtime_tmpdir=None, 245 | strip=False, 246 | target_arch=None, 247 | upx_exclude=[], 248 | upx=True 249 | ) 250 | else: 251 | return exe( 252 | pyz, 253 | a.scripts, 254 | [], 255 | bootloader_ignore_signals=False, 256 | codesign_identity=None, 257 | console=True, 258 | debug=False, 259 | disable_windowed_traceback=False, 260 | entitlements_file=None, 261 | exclude_binaries=True, 262 | icon=app_icon, 263 | name=app_name, 264 | strip=False, 265 | target_arch=None, 266 | upx=True 267 | ) 268 | 269 | 270 | def get_pyz(pyz, a): 271 | """ 272 | Return the PYZ object. 273 | """ 274 | return pyz(a.pure, a.zipped_data, cipher=block_cipher) 275 | 276 | 277 | def save_zip(filename, output, in_folder='dist', out_folder='dist/out_zip'): 278 | """ 279 | Save a zip file. 280 | """ 281 | # Removes the old file 282 | if not os.path.isdir(out_folder): 283 | os.makedirs(out_folder) 284 | for k in os.listdir(out_folder): 285 | if output in k: 286 | print(f'Removing old zip: {out_folder}/{k}') 287 | os.remove(f'{out_folder}/{k}') 288 | 289 | filename_full = f'{in_folder}/{filename}' 290 | output = f'{out_folder}/{output}' 291 | out_file = f'{output}_v{ver}.zip' 292 | print(f'Compressing to: {out_file}') 293 | with ZipFile(out_file, 'w', ZIP_DEFLATED) as zipf: 294 | if os.path.isdir(filename_full): 295 | zipdir(filename_full, zipf) 296 | else: 297 | zipf.write(filename_full, arcname=filename) 298 | 299 | 300 | def zipdir(path, ziph): 301 | """ 302 | Zip a folder. 303 | """ 304 | for root, dirs, files in os.walk(path): 305 | for file in files: 306 | ziph.write(os.path.join(root, file), 307 | os.path.relpath(os.path.join(root, file), 308 | os.path.join(path, '..'))) 309 | -------------------------------------------------------------------------------- /pydetex/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | UTILS 6 | Module that contain all util methods and classes used in parsers and pipelines, 7 | from tex, language, and low-level. 8 | """ 9 | 10 | __all__ = [ 11 | 'apply_tag_between_inside_char_command', 12 | 'apply_tag_tex_commands', 13 | 'apply_tag_tex_commands_no_argv', 14 | 'Button', 15 | 'check_repeated_words', 16 | 'complete_langs_dict', 17 | 'detect_language', 18 | 'find_tex_command_char', 19 | 'find_tex_commands', 20 | 'find_tex_commands_noargv', 21 | 'find_tex_environments', 22 | 'format_number_d', 23 | 'get_diff_startend_word', 24 | 'get_language_name', 25 | 'get_local_path', 26 | 'get_number_of_day', 27 | 'get_tex_commands_args', 28 | 'get_word_from_cursor', 29 | 'IS_OSX', 30 | 'LangTexTextTags', 31 | 'make_stemmer', 32 | 'open_file', 33 | 'ProgressBar', 34 | 'RESOURCES_PATH', 35 | 'split_tags', 36 | 'syntax_highlight', 37 | 'TEX_COMMAND_CHARS', 38 | 'TEX_EQUATION_CHARS', 39 | 'tex_to_unicode', 40 | 'tokenize', 41 | 'validate_float', 42 | 'validate_int' 43 | ] 44 | 45 | import datetime 46 | import os 47 | import platform 48 | import sys 49 | import time 50 | 51 | from pathlib import Path 52 | from typing import List, Tuple, Dict 53 | 54 | from pydetex._fonts import FONT_TAGS as _FONT_TAGS 55 | from pydetex._utils_lang import * 56 | from pydetex._utils_tex import * 57 | 58 | # Resources path 59 | __actualpath = str(os.path.abspath(os.path.dirname(__file__))).replace('\\', '/') + '/' 60 | RESOURCES_PATH = __actualpath + 'res/' 61 | 62 | # Check OS 63 | IS_OSX = platform.system() == 'Darwin' 64 | 65 | # Import Button widget 66 | if IS_OSX: 67 | from tkmacosx import Button 68 | else: 69 | from tkinter import Button 70 | 71 | 72 | def split_tags(s: str, tags: List[str]) -> List[Tuple[str, str]]: 73 | """ 74 | Split a string based on tags, each line is then tagged. 75 | 76 | String format: 77 | [TAG1]new line[TAG2]this is[TAG1]very epic 78 | 79 | Output: 80 | [('TAG1', 'newline'), ('TAG', 'this is), ('TAG1', 'very epic') ... ] 81 | 82 | :param s: String 83 | :param tags: Tag list 84 | :return: Split tags 85 | """ 86 | assert len(tags) > 0 87 | tagged_lines: List[Tuple[str, str]] = [] 88 | r = 0 89 | for tag in tags: 90 | if r == 0: # First occurence 91 | new = s.split(tag) 92 | for j in new: 93 | if j == '': 94 | continue 95 | tagged_lines.append((tag, j)) 96 | else: 97 | new_tagged_lines: List[Tuple[str, str]] = [] 98 | for j in range(len(tagged_lines)): 99 | if tag in tagged_lines[j][1]: # If tag exists 100 | new = tagged_lines[j][1].split(tag) 101 | new_tagged_lines.append((tagged_lines[j][0], new[0])) 102 | for w in range(len(new) - 1): 103 | new_tagged_lines.append((tag, new[w + 1])) 104 | else: 105 | new_tagged_lines.append(tagged_lines[j]) 106 | tagged_lines = new_tagged_lines 107 | 108 | r += 1 109 | 110 | # Merge consecutive tags 111 | merged_tags: List[Tuple[str, str]] = [] 112 | r = 0 113 | for tagged in tagged_lines: 114 | if len(merged_tags) == 0 or tagged[0] != merged_tags[r - 1][0]: 115 | merged_tags.append(tagged) 116 | r += 1 117 | else: 118 | merged_tags[r - 1] = (tagged[0], merged_tags[r - 1][1] + tagged[1]) 119 | 120 | return merged_tags 121 | 122 | 123 | def button_text(s: str) -> str: 124 | """ 125 | Generates the button text. 126 | 127 | :param s: Button's text 128 | :return: Text 129 | """ 130 | return s if IS_OSX else f' {s} ' 131 | 132 | 133 | def validate_int(p: str) -> bool: 134 | """ 135 | Validate an integer. 136 | 137 | :param p: Value 138 | :return: True if integer 139 | """ 140 | if p == '' or p == '-': 141 | return True 142 | try: 143 | p = float(p) 144 | return int(p) == p 145 | except ValueError: 146 | pass 147 | return False 148 | 149 | 150 | def validate_float(p: str) -> bool: 151 | """ 152 | Validate a float. 153 | 154 | :param p: Value 155 | :return: True if integer 156 | """ 157 | if p == '' or p == '-': 158 | return True 159 | try: 160 | float(p) 161 | return True 162 | except ValueError: 163 | pass 164 | return False 165 | 166 | 167 | def syntax_highlight(s: str) -> str: 168 | """ 169 | Syntax highlighter. 170 | 171 | :param s: Latex string code 172 | :return: Code with format 173 | """ 174 | # Add initial normal 175 | s = _FONT_TAGS['normal'] + s.strip() 176 | 177 | # Format equations 178 | s = apply_tag_between_inside_char_command( 179 | s=s, 180 | symbols_char=TEX_EQUATION_CHARS, 181 | tags=(_FONT_TAGS['equation_char'], _FONT_TAGS['equation_inside'], 182 | _FONT_TAGS['equation_char'], _FONT_TAGS['normal']) 183 | ) 184 | 185 | # Format commands with {arguments} 186 | s = apply_tag_tex_commands( 187 | s=s, 188 | tags=(_FONT_TAGS['tex_command'], 189 | _FONT_TAGS['normal'], 190 | _FONT_TAGS['tex_argument'], 191 | _FONT_TAGS['normal'], 192 | '') 193 | ) 194 | 195 | # Format commands without arguments 196 | s = apply_tag_tex_commands_no_argv( 197 | s=s, 198 | tags=(_FONT_TAGS['tex_command'], _FONT_TAGS['normal']) 199 | ) 200 | 201 | # Return formatted string 202 | return s 203 | 204 | 205 | def format_number_d(n: int, c: str) -> str: 206 | """ 207 | Formats a number on thousands. 208 | 209 | :param n: Number 210 | :param c: Format char 211 | :return: Formatted number 212 | """ 213 | assert isinstance(n, int) 214 | return format(n, ',').replace(',', c) 215 | 216 | 217 | def get_number_of_day() -> int: 218 | """ 219 | Return the number of the day from the current year. 220 | 221 | :return: Day number 222 | """ 223 | return datetime.datetime.now().timetuple().tm_yday 224 | 225 | 226 | def open_file(f: str) -> str: 227 | """ 228 | Open file and return its string. 229 | 230 | :param f: Filename 231 | :return: File content 232 | """ 233 | o = open(f, encoding='utf-8') 234 | text = ''.join(o.readlines()) 235 | o.close() 236 | return text 237 | 238 | 239 | def make_path_if_not_exists(path: str) -> str: 240 | """ 241 | Create path if not exists. 242 | 243 | :param path: Path 244 | :return: Path 245 | """ 246 | if not os.path.isdir(path): 247 | Path(path).mkdir(parents=True, exist_ok=True) 248 | return path 249 | 250 | 251 | def get_local_path() -> str: 252 | """ 253 | :return: Returns the app local path 254 | """ 255 | appdata = os.getenv('LOCALAPPDATA') 256 | if appdata is None: 257 | appdata = os.path.join(get_user_path(), 'Applications') 258 | 259 | path = os.path.join(appdata, 'PyDetex') 260 | return make_path_if_not_exists(path) 261 | 262 | 263 | def get_user_path() -> str: 264 | """ 265 | :return: Returns the user path 266 | """ 267 | return os.path.expanduser('~') 268 | 269 | 270 | class ProgressBar(object): 271 | """ 272 | Basic progress bar implementation. 273 | """ 274 | 275 | _current: int 276 | _last_step: float 277 | _size: int 278 | _step_times: Dict[str, float] 279 | _steps: int 280 | _t0: float 281 | 282 | def __init__(self, steps: int, size: int = 15) -> None: 283 | """ 284 | Constructor. 285 | 286 | :param steps: How many steps have the procedure 287 | :param size: Bar size 288 | """ 289 | assert isinstance(steps, int) and steps >= 1 290 | assert isinstance(size, int) and size >= 1 291 | self._current = 0 292 | self._last_step = time.time() 293 | self._size = size # Bar size 294 | self._step_times = {} 295 | self._steps = steps - 1 296 | self._t0 = time.time() 297 | 298 | def _print_progress_bar(self, i: int, max_: int, post_text: str) -> None: 299 | """ 300 | Prints a progress bar. 301 | 302 | :param i: Progress bar 303 | :param max_: Max steps 304 | :param post_text: Status 305 | """ 306 | j = i / max_ 307 | sys.stdout.write('\r') 308 | sys.stdout.write(f"[{'=' * int(self._size * j):{self._size}s}] {int(100 * j)}% {post_text}") 309 | sys.stdout.flush() 310 | 311 | def update(self, status: str = '', print_total_time: bool = True) -> None: 312 | """ 313 | Update the current status to a new step. 314 | 315 | :param status: Status text 316 | :param print_total_time: Prints total computing time 317 | """ 318 | if self._current > self._steps: 319 | return 320 | self._print_progress_bar(self._current, self._steps, status) 321 | dt = time.time() - self._last_step 322 | self._last_step = time.time() 323 | self._step_times[status] = dt 324 | self._current += 1 325 | if self._current == self._steps + 1: 326 | print('') 327 | sys.stdout.flush() 328 | if print_total_time: 329 | print(f'Process finished in {time.time() - self._t0:.3f} seconds') 330 | 331 | def detail_times(self) -> None: 332 | """ 333 | Print times. 334 | """ 335 | for k in self._step_times.keys(): 336 | print(f'{self._step_times[k]:.3f}s\t{k}') 337 | 338 | def reset(self) -> None: 339 | """ 340 | Reset the steps. 341 | """ 342 | self._current = 0 343 | self._t0 = time.time() 344 | self._last_step = time.time() 345 | self._step_times.clear() 346 | -------------------------------------------------------------------------------- /pydetex/_utils_lang.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | UTILS LANG 6 | Language utils. 7 | """ 8 | 9 | __all__ = [ 10 | 'check_repeated_words', 11 | 'complete_langs_dict', 12 | 'detect_language', 13 | 'get_diff_startend_word', 14 | 'get_language_name', 15 | 'get_phrase_from_cursor', 16 | 'get_word_from_cursor', 17 | 'LangTexTextTags', 18 | 'make_stemmer', 19 | 'tokenize' 20 | ] 21 | 22 | # langdetect supports: 23 | # af, ar, bg, bn, ca, cs, cy, da, de, el, en, es, et, fa, fi, fr, gu, he, 24 | # hi, hr, hu, id, it, ja, kn, ko, lt, lv, mk, ml, mr, ne, nl, no, pa, pl, 25 | # pt, ro, ru, sk, sl, so, sq, sv, sw, ta, te, th, tl, tr, uk, ur, vi, zh-cn, zh-tw 26 | import langdetect 27 | 28 | import json 29 | import os 30 | 31 | # noinspection PyProtectedMember 32 | from PyMultiDictionary._utils import tokenize, get_language_name 33 | from nltk.stem import SnowballStemmer 34 | from typing import List, Tuple, Optional, Dict 35 | from warnings import warn 36 | 37 | # Resources path 38 | __actualpath = str(os.path.abspath(os.path.dirname(__file__))).replace('\\', '/') + '/' 39 | 40 | # Load all stopwords 41 | with open(__actualpath + 'res/' + 'stopwords.json', encoding='UTF-8') as json_data: 42 | _STOPWORDS = json.load(json_data) 43 | 44 | _AVAILABLE_STEMMER_LANGS: Dict[str, str] = { 45 | 'ar': 'arabic', 46 | 'da': 'danish', 47 | 'de': 'german', 48 | 'en': 'english', 49 | 'es': 'spanish', 50 | 'fi': 'finnish', 51 | 'fr': 'french', 52 | 'hu': 'hungarian', 53 | 'it': 'italian', 54 | 'nb': 'norwegian', 55 | 'nd': 'norwegian', 56 | 'nl': 'dutch', 57 | 'nn': 'norwegian', 58 | 'no': 'norwegian', 59 | 'pt': 'portuguese', 60 | 'ro': 'romanian', 61 | 'ru': 'russian', 62 | 'sv': 'swedish' 63 | } 64 | 65 | 66 | class LangTexTextTags(object): 67 | """ 68 | Stores the tex tags for several commands. 69 | """ 70 | 71 | _lang: Dict[str, Dict[str, str]] 72 | 73 | def __init__(self) -> None: 74 | """ 75 | Constructor. 76 | """ 77 | self._lang = { 78 | 'en': { 79 | 'caption': 'CAPTION: {0}', 80 | 'citeauthor_multiple': 'authors', 81 | 'citeauthor_single': 'author', 82 | 'figure_caption': 'FIGURE_CAPTION: {0}', 83 | 'link': 'LINK: {0}', 84 | 'multi_char_equ': 'EQUATION_{0}', 85 | 'sub_figure_title': 'SUB_FIGURE TITLE: {0}' 86 | }, 87 | 'es': { 88 | 'caption': 'LEYENDA: {0}', 89 | 'citeauthor_multiple': 'autores', 90 | 'citeauthor_single': 'autor', 91 | 'figure_caption': 'LEYENDA_FIGURA: {0}', 92 | 'link': 'ENLACE: {0}', 93 | 'multi_char_equ': 'ECUACIÓN_{0}', 94 | 'sub_figure_title': 'TÍTULO SUB_FIGURA: {0}' 95 | } 96 | } 97 | complete_langs_dict(self._lang) 98 | 99 | def get(self, lang: str, tag: str) -> str: 100 | """ 101 | Retrieves a language tag value. 102 | 103 | :param lang: Language 104 | :param tag: Tag to retrieve 105 | :return: Value of the language's tag 106 | """ 107 | if lang not in self._lang.keys(): 108 | lang = 'en' 109 | if tag not in self._lang[lang].keys(): 110 | raise ValueError(f'Lang {lang} tag {tag} does not exist') 111 | return self._lang[lang][tag] 112 | 113 | 114 | def complete_langs_dict(lang: Dict[str, Dict[str, str]]) -> None: 115 | """ 116 | Completes a language dict. Assumes ``'en'`` is the main language. 117 | 118 | :param lang: Language dict 119 | """ 120 | for k in lang.keys(): 121 | if k == 'en': 122 | continue 123 | for t in lang['en'].keys(): 124 | if t not in lang[k]: 125 | error = f'Language entry "{t}" on lang "{k}" does not exist' 126 | warn(error) 127 | lang[k][t] = lang['en'][t] 128 | 129 | 130 | def detect_language(s: str) -> str: 131 | """ 132 | Detects languages. 133 | 134 | :param s: String 135 | :return: Detected language 136 | """ 137 | if s == '': 138 | return '–' 139 | try: 140 | lang = langdetect.detect(s) 141 | if lang == 'zh-cn' or lang == 'zh-tw': 142 | lang = 'zh' 143 | return lang 144 | except langdetect.lang_detect_exception.LangDetectException: # No features in text 145 | return '–' 146 | 147 | 148 | def get_diff_startend_word(original: str, new: str) -> Tuple[str, str]: 149 | """ 150 | Return the difference of the word from start and end, for example: 151 | 152 | .. code-block:: none 153 | 154 | original XXXwordYY 155 | new word 156 | diff = (XXX, YY) 157 | 158 | :param original: Original word 159 | :param new: New word 160 | :return: Diff word 161 | """ 162 | pos: int = original.find(new) 163 | if pos == -1: 164 | return '', '' 165 | return original[0:pos], original[pos + len(new):len(original)] 166 | 167 | 168 | def make_stemmer(lang: str) -> Optional['SnowballStemmer']: 169 | """ 170 | Returns a stemmer. 171 | 172 | :param lang: Lang code 173 | :return: Stemmer or None if not available 174 | """ 175 | if lang in _AVAILABLE_STEMMER_LANGS.keys(): 176 | return SnowballStemmer(_AVAILABLE_STEMMER_LANGS[lang]) 177 | return None 178 | 179 | 180 | def check_repeated_words( 181 | s: str, 182 | lang: str, 183 | min_chars: int, 184 | window: int, 185 | stopwords: bool, 186 | stemming: bool, 187 | ignore: Optional[List[str]] = None, 188 | remove_tokens: Optional[List[str]] = None, 189 | font_tag_format: str = '', 190 | font_param_format: str = '', 191 | font_normal_format: str = '', 192 | tag: str = 'repeated' 193 | ) -> str: 194 | """ 195 | Check repeated words. 196 | 197 | :param s: Text 198 | :param lang: Language code 199 | :param min_chars: Min chars to accept 200 | :param window: Window words span to check 201 | :param stopwords: Use stopwords 202 | :param stemming: Use stemming 203 | :param ignore: Ignore a list of words 204 | :param remove_tokens: Remove keys before verify repeat 205 | :param font_tag_format: Tag's format 206 | :param font_param_format: Param's format 207 | :param font_normal_format: Normal's format 208 | :param tag: Tag's name 209 | :return: Text with repeated words marked 210 | """ 211 | assert isinstance(window, int) and window > 1 212 | assert isinstance(min_chars, int) and min_chars >= 1 213 | 214 | if not ignore: 215 | ignore = [] 216 | if not remove_tokens: 217 | remove_tokens = [] 218 | 219 | # Check languages 220 | if lang in _AVAILABLE_STEMMER_LANGS.keys(): 221 | stop = _STOPWORDS[lang] 222 | stemmer = make_stemmer(lang) 223 | else: 224 | return s 225 | 226 | ignored_words = [] 227 | # Apply filters to ignored words 228 | for w in ignore: 229 | if stemming: 230 | w = stemmer.stem(w) 231 | if stopwords and w in stop: 232 | w = '' 233 | if w == '': 234 | continue 235 | ignored_words.append(w) 236 | 237 | # Add space to newline 238 | newline_format = ' \n' 239 | s = s.replace('\n', newline_format) 240 | 241 | # Separeate words 242 | wordswin = [] # Stores the words 243 | words = s.split(' ') 244 | new_s = [] 245 | 246 | for w in words: 247 | original_w = w 248 | 249 | # Remove tokens 250 | if len(remove_tokens) > 0: 251 | for rt in remove_tokens: 252 | w = w.replace(rt, '') 253 | 254 | # If command in word 255 | if '\\' in w: 256 | w = '' 257 | 258 | # Apply filters 259 | if len(w) <= min_chars: 260 | w = '' 261 | if w != '': 262 | w = tokenize(w) 263 | if stemming: 264 | w = stemmer.stem(w) 265 | if stopwords and w in stop: 266 | w = '' 267 | 268 | # Check if word is ignored 269 | if w in ignored_words: 270 | w = '' 271 | 272 | # Check if the word exists on the list 273 | if w in wordswin and w != '': 274 | ww = wordswin[::-1].index(w) + 1 275 | stemmed_word = tokenize(original_w) 276 | diff_word = get_diff_startend_word(original_w, stemmed_word) 277 | if diff_word == ('', ''): 278 | stemmed_word = original_w 279 | original_w = f'{diff_word[0]}{font_tag_format}<{tag}:{ww}>' \ 280 | f'{font_param_format}{stemmed_word}' \ 281 | f'{font_tag_format}{font_normal_format}{diff_word[1]}' 282 | 283 | # Push the new word 284 | wordswin.append(w) 285 | if len(wordswin) > window: 286 | wordswin.pop(0) 287 | 288 | # Append word 289 | new_s.append(original_w) 290 | 291 | # Return string with repeated format 292 | out_s = ' '.join(new_s) 293 | out_s = out_s.replace(newline_format, '\n') 294 | return out_s 295 | 296 | 297 | def get_word_from_cursor(s: str, pos: int) -> Tuple[str, int, int]: 298 | """ 299 | Return the word from a string on a given cursor. 300 | 301 | :param s: String 302 | :param pos: Position to check the string 303 | :return: Word, position start, position end 304 | """ 305 | assert 0 <= pos < len(s) 306 | pos += 1 307 | s = ' ' + s 308 | p = 0 309 | 310 | # Check if pos is an empty character, find the following word 311 | if s[pos].strip() == '': 312 | found = False 313 | for k in range(pos, len(s)): # First 314 | if s[k].strip() != '' and not found: 315 | p = k 316 | found = True 317 | elif s[k].strip() == '' and found: 318 | return s[p:k].strip(), p, k - 1 319 | 320 | else: 321 | for w in range(pos): # Find prev 322 | j = pos - w - 1 323 | if s[j].strip() == '': 324 | p = j 325 | break 326 | elif s[j].strip() == '>': 327 | p = j + 1 328 | break 329 | for j in range(pos + 1, len(s)): # Find next 330 | if s[j].strip() in ('', '<'): 331 | return s[p:j].strip(), p, j - 1 332 | 333 | return '', -1, -1 334 | 335 | 336 | def get_phrase_from_cursor(s: str, pos_init: int, pos_end: int) -> str: 337 | """ 338 | Get a phrase from the cursor. It tries to retrieve the entire words selected. 339 | 340 | :param s: String 341 | :param pos_init: Initial position 342 | :param pos_end: End position 343 | :return: Retrieved word 344 | """ 345 | assert pos_init <= pos_end 346 | 347 | # Get the first word 348 | s0, i, _ = get_word_from_cursor(s, pos_init) 349 | # noinspection PyUnusedLocal 350 | j: int = i 351 | 352 | if s[pos_end].strip() == '': # Is empty, find the previous word 353 | for k in range(1, pos_end): 354 | _k = pos_end - k 355 | if s[_k].strip() != '': 356 | # noinspection PyUnusedLocal 357 | j = _k + 1 358 | break 359 | else: 360 | _, _, j = get_word_from_cursor(s, pos_end) 361 | 362 | if j <= i: 363 | return s0 364 | 365 | return s[i:j] 366 | -------------------------------------------------------------------------------- /pydetex/_symbols.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | SYMBOLS 6 | Contain latex commands converted to symbol. 7 | """ 8 | 9 | __all__ = [ 10 | 'REPLACE_EQUATION_SYMBOLS_LIBRARY', 11 | 'REPLACE_SYMBOLS_LIBRARY', 12 | 'REPLACE_TEX_COMMANDS_LIBRARY' 13 | ] 14 | 15 | from typing import List, Tuple 16 | 17 | REPLACE_SYMBOLS_LIBRARY: List[Tuple[str, str]] = [ 18 | # Common 19 | ('\\ ', ' '), 20 | ('\\\\', '\n'), 21 | 22 | # Letters 23 | ('--', '–'), 24 | ('---', '—'), 25 | (r'\#', '#'), 26 | (r'\&', '&'), 27 | (r'\_', '_'), 28 | ('~', ' '), 29 | ('fi', 'fi') 30 | ] 31 | 32 | REPLACE_EQUATION_SYMBOLS_LIBRARY: List[Tuple[str, str]] = [ 33 | ('^(', '⁽'), 34 | ('^)', '⁾'), 35 | ('^+', '⁺'), 36 | ('^-', '⁻'), 37 | ('^0', '⁰'), 38 | ('^1', '¹'), 39 | ('^2', '²'), 40 | ('^3', '³'), 41 | ('^4', '⁴'), 42 | ('^5', '⁵'), 43 | ('^6', '⁶'), 44 | ('^7', '⁷'), 45 | ('^8', '⁸'), 46 | ('^9', '⁹'), 47 | ('^=', '⁼'), 48 | ('^A', 'ᴬ'), 49 | ('^a', 'ᵃ'), 50 | ('^B', 'ᴮ'), 51 | ('^b', 'ᵇ'), 52 | ('^c', 'ᶜ'), 53 | ('^D', 'ᴰ'), 54 | ('^d', 'ᵈ'), 55 | ('^E', 'ᴱ'), 56 | ('^e', 'ᵉ'), 57 | ('^f', 'ᶠ'), 58 | ('^G', 'ᴳ'), 59 | ('^g', 'ᵍ'), 60 | ('^h', 'ʰ'), 61 | ('^H', 'ᴴ'), 62 | ('^I', 'ᴵ'), 63 | ('^i', 'ⁱ'), 64 | ('^j', 'ʲ'), 65 | ('^J', 'ᴶ'), 66 | ('^K', 'ᴷ'), 67 | ('^k', 'ᵏ'), 68 | ('^l', 'ˡ'), 69 | ('^L', 'ᴸ'), 70 | ('^M', 'ᴹ'), 71 | ('^m', 'ᵐ'), 72 | ('^N', 'ᴺ'), 73 | ('^n', 'ⁿ'), 74 | ('^O', 'ᴼ'), 75 | ('^o', 'ᵒ'), 76 | ('^P', 'ᴾ'), 77 | ('^p', 'ᵖ'), 78 | ('^r', 'ʳ'), 79 | ('^R', 'ᴿ'), 80 | ('^s', 'ˢ'), 81 | ('^T', 'ᵀ'), 82 | ('^t', 'ᵗ'), 83 | ('^U', 'ᵁ'), 84 | ('^u', 'ᵘ'), 85 | ('^v', 'ᵛ'), 86 | ('^w', 'ʷ'), 87 | ('^W', 'ᵂ'), 88 | ('^x', 'ˣ'), 89 | ('^y', 'ʸ'), 90 | ('^z', 'ᶻ'), 91 | ('_(', '₍'), 92 | ('_)', '₎'), 93 | ('_+', '₊'), 94 | ('_-', '₋'), 95 | ('_0', '₀'), 96 | ('_1', '₁'), 97 | ('_2', '₂'), 98 | ('_3', '₃'), 99 | ('_4', '₄'), 100 | ('_5', '₅'), 101 | ('_6', '₆'), 102 | ('_7', '₇'), 103 | ('_8', '₈'), 104 | ('_9', '₉'), 105 | ('_=', '₌'), 106 | ('_a', 'ₐ'), 107 | ('_e', 'ₑ'), 108 | ('_h', 'ₕ'), 109 | ('_i', 'ᵢ'), 110 | ('_k', 'ₖ'), 111 | ('_l', 'ₗ'), 112 | ('_m', 'ₘ'), 113 | ('_n', 'ₙ'), 114 | ('_o', 'ₒ'), 115 | ('_p', 'ₚ'), 116 | ('_r', 'ᵣ'), 117 | ('_s', 'ₛ'), 118 | ('_t', 'ₜ'), 119 | ('_u', 'ᵤ'), 120 | ('_v', 'ᵥ'), 121 | ('_x', 'ₓ') 122 | ] 123 | 124 | REPLACE_TEX_COMMANDS_LIBRARY: List[Tuple[str, str]] = [ 125 | ('\\AC', '∿'), 126 | ('\\aleph', 'ℵ'), 127 | ('\\alpha', 'α'), 128 | ('\\amalg', '⨿'), 129 | ('\\angle', '∠'), 130 | ('\\approx', '≈'), 131 | ('\\approxeq', '≊'), 132 | ('\\asterism', '⁂'), 133 | ('\\asymp', '≍'), 134 | ('\\backepsilon', '϶'), 135 | ('\\backprime', '‵'), 136 | ('\\backsim', '∽'), 137 | ('\\backsimeq', '⋍'), 138 | ('\\barwedge', '⊼'), 139 | ('\\because', '∵'), 140 | ('\\beginsmallmatrix', ''), 141 | ('\\beta', 'β'), 142 | ('\\beth', 'ℶ'), 143 | ('\\between', '≬'), 144 | ('\\bigcap', '⋂'), 145 | ('\\bigcup', '⋃'), 146 | ('\\bigodot', '⨀'), 147 | ('\\bigoplus', '⨁'), 148 | ('\\bigotimes', '⨂'), 149 | ('\\bigsqcap', '⨅'), 150 | ('\\bigsqcup', '⨆'), 151 | ('\\bigvee', '⋁'), 152 | ('\\bigwedge', '⋀'), 153 | ('\\bot', '⊥'), 154 | ('\\bowtie', '⋈'), 155 | ('\\boxdot', '⊡'), 156 | ('\\boxminus', '⊟'), 157 | ('\\boxplus', '⊞'), 158 | ('\\boxtimes', '⊠'), 159 | ('\\bullet', '•'), 160 | ('\\Bumpeq', '≎'), 161 | ('\\bumpeq', '≏'), 162 | ('\\cap', '∩'), 163 | ('\\Cap', '⋒'), 164 | ('\\cdot', '·'), 165 | ('\\cdots', '⋯'), 166 | ('\\checkmark', '✓'), 167 | ('\\chi', 'χ'), 168 | ('\\circ', '∘'), 169 | ('\\circeq', '≗'), 170 | ('\\circlearrowleft', '↺'), 171 | ('\\circlearrowright', '↻'), 172 | ('\\circledast', '⊛'), 173 | ('\\circledcirc', '⊚'), 174 | ('\\circleddash', '⊝'), 175 | ('\\clubsuit', '♣'), 176 | ('\\coloneqq', '≔'), 177 | ('\\complement', '∁'), 178 | ('\\cong', '≅'), 179 | ('\\coprod', '∐'), 180 | ('\\copyright', '©'), 181 | ('\\cup', '∪'), 182 | ('\\Cup', '⋓'), 183 | ('\\curlyeqprec', '⋞'), 184 | ('\\curlyeqsucc', '⋟'), 185 | ('\\curlyvee', '⋎'), 186 | ('\\curlywedge', '⋏'), 187 | ('\\curvearrowleft', '↶'), 188 | ('\\curvearrowright', '↷'), 189 | ('\\dagger', '†'), 190 | ('\\daleth', 'ℸ'), 191 | ('\\dashleftarrow', '⇠'), 192 | ('\\dashrightarrow', '⇢'), 193 | ('\\dashv', '⊣'), 194 | ('\\dbend', '☡'), 195 | ('\\ddag', '‡'), 196 | ('\\ddots', '⋱'), 197 | ('\\ddot{\\phantom{x}}', '̈'), 198 | ('\\Delta', 'Δ'), 199 | ('\\delta', 'δ'), 200 | ('\\diameter', '⌀'), 201 | ('\\diamond', '⋄'), 202 | ('\\diamondsuit', '♢'), 203 | ('\\Digamma', 'Ϝ'), 204 | ('\\digamma', 'ϝ'), 205 | ('\\div', '÷'), 206 | ('\\divideontimes', '⋇'), 207 | ('\\doteq', '≐'), 208 | ('\\doteqdot', '≑'), 209 | ('\\dotminus', '∸'), 210 | ('\\dotplus', '∔'), 211 | ('\\downarrow', '↓'), 212 | ('\\Downarrow', '⇓'), 213 | ('\\downdownarrows', '⇊'), 214 | ('\\downharpoonleft', '⇃'), 215 | ('\\downharpoonright', '⇂'), 216 | ('\\ell', 'ℓ'), 217 | ('\\epsilon', 'ϵ'), 218 | ('\\eqcirc', '≖'), 219 | ('\\eqcolon', '∹'), 220 | ('\\eqqcolon', '≕'), 221 | ('\\equiv', '≡'), 222 | ('\\eta', 'η'), 223 | ('\\Euler', 'ℇ'), 224 | ('\\euro', '€'), 225 | ('\\exists', '∃'), 226 | ('\\fallingdotseq', '≒'), 227 | ('\\fbox{\\checkmark}', '☑'), 228 | ('\\fbox{\\phantom{{\\checkmark}}}', '☐'), 229 | ('\\Finv', 'Ⅎ'), 230 | ('\\flat', '♭'), 231 | ('\\forall', '∀'), 232 | ('\\frac{1}{2}', '½'), 233 | ('\\frac{1}{3}', '⅓'), 234 | ('\\frac{1}{4}', '¼'), 235 | ('\\frac{1}{5}', '⅕'), 236 | ('\\frac{1}{6}', '⅙'), 237 | ('\\frac{1}{8}', '⅛'), 238 | ('\\frac{2}{3}', '⅔'), 239 | ('\\frac{2}{5}', '⅖'), 240 | ('\\frac{3}{4}', '¾'), 241 | ('\\frac{3}{5}', '⅗'), 242 | ('\\frac{4}{5}', '⅘'), 243 | ('\\frac{5}{6}', '⅚'), 244 | ('\\frac{5}{8}', '⅝'), 245 | ('\\frac{7}{8}', '⅞'), 246 | ('\\frown', '⌢'), 247 | ('\\frownie', '☹'), 248 | ('\\Game', '⅁'), 249 | ('\\Gamma', 'Γ'), 250 | ('\\gamma', 'γ'), 251 | ('\\ge', '≥'), 252 | ('\\geqq', '≧'), 253 | ('\\geqslant', '⩾'), 254 | ('\\gg', '≫'), 255 | ('\\ggg', '⋙'), 256 | ('\\gimel', 'ℷ'), 257 | ('\\gneqq', '≩'), 258 | ('\\gnsim', '⋧'), 259 | ('\\gtrdot', '⋗'), 260 | ('\\gtreqless', '⋛'), 261 | ('\\gtrless', '≷'), 262 | ('\\gtrsim', '≳'), 263 | ('\\guillemotleft', '«'), 264 | ('\\guillemotright', '»'), 265 | ('\\guilsinglleft', '‹'), 266 | ('\\guilsinglright', '›'), 267 | ('\\hat{\\phantom{x}}', '̂'), 268 | ('\\hbar', 'ℏ'), 269 | ('\\heartsuit', '♡'), 270 | ('\\hookleftarrow', '↩'), 271 | ('\\hookrightarrow', '↪'), 272 | ('\\iddots', '⋰'), 273 | ('\\iiiint', '⨌'), 274 | ('\\iiint', '∭'), 275 | ('\\iint', '∬'), 276 | ('\\Im', 'ℑ'), 277 | ('\\IM', 'ℑ'), 278 | ('\\imath', 'ı'), 279 | ('\\in', '∈'), 280 | ('\\infty', '∞'), 281 | ('\\int', '∫'), 282 | ('\\intercal', '⊺'), 283 | ('\\invamp', '⅋'), 284 | ('\\iota', 'ι'), 285 | ('\\jmath', 'ȷ'), 286 | ('\\Join', '⨝'), 287 | ('\\kappa', 'κ'), 288 | ('\\Koppa', 'Ϟ'), 289 | ('\\koppa', 'ϟ'), 290 | ('\\Lambda', 'Λ'), 291 | ('\\lambda', 'λ'), 292 | ('\\langle', '〈'), 293 | ('\\lceil', '⌈'), 294 | ('\\ldots', '…'), 295 | ('\\left', ''), 296 | ('\\leftarrow', '←'), 297 | ('\\Leftarrow', '⇐'), 298 | ('\\LeftArrowBar', '⇤'), 299 | ('\\leftarrowtail', '↢'), 300 | ('\\leftarrowtriangle', '⇽'), 301 | ('\\leftharpoondown', '↽'), 302 | ('\\leftharpoonup', '↼'), 303 | ('\\leftleftarrows', '⇇'), 304 | ('\\leftrightarrow', '↔'), 305 | ('\\Leftrightarrow', '⇔'), 306 | ('\\leftrightarrows', '⇆'), 307 | ('\\leftrightarrowtriangle', '⇿'), 308 | ('\\leftrightharpoons', '⇋'), 309 | ('\\leftrightsquigarrow', '↭'), 310 | ('\\leftsquigarrow', '⇜'), 311 | ('\\leftthreetimes', '⋋'), 312 | ('\\leqq', '≦'), 313 | ('\\leqslant', '⩽'), 314 | ('\\lessdot', '⋖'), 315 | ('\\lesseqgtr', '⋚'), 316 | ('\\lessgtr', '≶'), 317 | ('\\lesssim', '≲'), 318 | ('\\lfloor', '⌊'), 319 | ('\\lightning', '↯'), 320 | ('\\ll', '≪'), 321 | ('\\llangle', '⟪'), 322 | ('\\llbracket', '〚'), 323 | ('\\Lleftarrow', '⇚'), 324 | ('\\lll', '⋘'), 325 | ('\\ln', '㏑'), 326 | ('\\lneqq', '≨'), 327 | ('\\lnsim', '⋦'), 328 | ('\\log', '㏒'), 329 | ('\\longleftarrow', '⟵'), 330 | ('\\longrightarrow', '⟶'), 331 | ('\\looparrowleft', '↫'), 332 | ('\\looparrowright', '↬'), 333 | ('\\Lsh', '↰'), 334 | ('\\ltimes', '⋉'), 335 | ('\\mapsfrom', '↤'), 336 | ('\\mapsto', '↦'), 337 | ('\\Mapsto', '⇰'), 338 | ('\\mathbb{0}', '𝟘'), 339 | ('\\mathbb{1}', '𝟙'), 340 | ('\\mathbb{2}', '𝟚'), 341 | ('\\mathbb{3}', '𝟛'), 342 | ('\\mathbb{4}', '𝟜'), 343 | ('\\mathbb{5}', '𝟝'), 344 | ('\\mathbb{6}', '𝟞'), 345 | ('\\mathbb{7}', '𝟟'), 346 | ('\\mathbb{8}', '𝟠'), 347 | ('\\mathbb{9}', '𝟡'), 348 | ('\\mathbb{\\gamma}', 'ℽ'), 349 | ('\\mathbb{\\Gamma}', 'ℿ'), 350 | ('\\mathbb{\\pi}', 'ℼ'), 351 | ('\\mathbb{\\Pi}', 'ℾ'), 352 | ('\\mathbb{\\Sigma}', '⅀'), 353 | ('\\mathbb{A}', '𝔸'), 354 | ('\\mathbb{a}', '𝕒'), 355 | ('\\mathbb{B}', '𝔹'), 356 | ('\\mathbb{b}', '𝕓'), 357 | ('\\mathbb{C}', 'ℂ'), 358 | ('\\mathbb{c}', '𝕔'), 359 | ('\\mathbb{D}', '𝔻'), 360 | ('\\mathbb{d}', '𝕕'), 361 | ('\\mathbb{E}', '𝔼'), 362 | ('\\mathbb{e}', '𝕖'), 363 | ('\\mathbb{F}', '𝔽'), 364 | ('\\mathbb{f}', '𝕗'), 365 | ('\\mathbb{G}', '𝔾'), 366 | ('\\mathbb{g}', '𝕘'), 367 | ('\\mathbb{H}', 'ℍ'), 368 | ('\\mathbb{h}', '𝕙'), 369 | ('\\mathbb{I}', '𝕀'), 370 | ('\\mathbb{i}', '𝕚'), 371 | ('\\mathbb{J}', '𝕁'), 372 | ('\\mathbb{j}', '𝕛'), 373 | ('\\mathbb{K}', '𝕂'), 374 | ('\\mathbb{k}', '𝕜'), 375 | ('\\mathbb{L}', '𝕃'), 376 | ('\\mathbb{l}', '𝕝'), 377 | ('\\mathbb{M}', '𝕄'), 378 | ('\\mathbb{m}', '𝕞'), 379 | ('\\mathbb{N}', 'ℕ'), 380 | ('\\mathbb{n}', '𝕟'), 381 | ('\\mathbb{O}', '𝕆'), 382 | ('\\mathbb{o}', '𝕠'), 383 | ('\\mathbb{P}', 'ℙ'), 384 | ('\\mathbb{p}', '𝕡'), 385 | ('\\mathbb{Q}', 'ℚ'), 386 | ('\\mathbb{q}', '𝕢'), 387 | ('\\mathbb{R}', 'ℝ'), 388 | ('\\mathbb{r}', '𝕣'), 389 | ('\\mathbb{S}', '𝕊'), 390 | ('\\mathbb{s}', '𝕤'), 391 | ('\\mathbb{T}', '𝕋'), 392 | ('\\mathbb{t}', '𝕥'), 393 | ('\\mathbb{U}', '𝕌'), 394 | ('\\mathbb{u}', '𝕦'), 395 | ('\\mathbb{V}', '𝕍'), 396 | ('\\mathbb{v}', '𝕧'), 397 | ('\\mathbb{W}', '𝕎'), 398 | ('\\mathbb{w}', '𝕨'), 399 | ('\\mathbb{X}', '𝕏'), 400 | ('\\mathbb{x}', '𝕩'), 401 | ('\\mathbb{Y}', '𝕐'), 402 | ('\\mathbb{y}', '𝕪'), 403 | ('\\mathbb{Z}', 'ℤ'), 404 | ('\\mathbb{z}', '𝕫'), 405 | ('\\mathbf{A}', '𝐀'), 406 | ('\\mathbf{a}', '𝐚'), 407 | ('\\mathbf{B}', '𝐁'), 408 | ('\\mathbf{b}', '𝐛'), 409 | ('\\mathbf{C}', '𝐂'), 410 | ('\\mathbf{c}', '𝐜'), 411 | ('\\mathbf{D}', '𝐃'), 412 | ('\\mathbf{d}', '𝐝'), 413 | ('\\mathbf{E}', '𝐄'), 414 | ('\\mathbf{e}', '𝐞'), 415 | ('\\mathbf{F}', '𝐅'), 416 | ('\\mathbf{f}', '𝐟'), 417 | ('\\mathbf{G}', '𝐆'), 418 | ('\\mathbf{g}', '𝐠'), 419 | ('\\mathbf{H}', '𝐇'), 420 | ('\\mathbf{h}', '𝐡'), 421 | ('\\mathbf{I}', '𝐈'), 422 | ('\\mathbf{i}', '𝐢'), 423 | ('\\mathbf{J}', '𝐉'), 424 | ('\\mathbf{j}', '𝐣'), 425 | ('\\mathbf{K}', '𝐊'), 426 | ('\\mathbf{k}', '𝐤'), 427 | ('\\mathbf{L}', '𝐋'), 428 | ('\\mathbf{l}', '𝐥'), 429 | ('\\mathbf{M}', '𝐌'), 430 | ('\\mathbf{m}', '𝐦'), 431 | ('\\mathbf{N}', '𝐍'), 432 | ('\\mathbf{n}', '𝐧'), 433 | ('\\mathbf{O}', '𝐎'), 434 | ('\\mathbf{o}', '𝐨'), 435 | ('\\mathbf{P}', '𝐏'), 436 | ('\\mathbf{p}', '𝐩'), 437 | ('\\mathbf{Q}', '𝐐'), 438 | ('\\mathbf{q}', '𝐪'), 439 | ('\\mathbf{R}', '𝐑'), 440 | ('\\mathbf{r}', '𝐫'), 441 | ('\\mathbf{S}', '𝐒'), 442 | ('\\mathbf{s}', '𝐬'), 443 | ('\\mathbf{T}', '𝐓'), 444 | ('\\mathbf{t}', '𝐭'), 445 | ('\\mathbf{U}', '𝐔'), 446 | ('\\mathbf{u}', '𝐮'), 447 | ('\\mathbf{V}', '𝐕'), 448 | ('\\mathbf{v}', '𝐯'), 449 | ('\\mathbf{W}', '𝐖'), 450 | ('\\mathbf{w}', '𝐰'), 451 | ('\\mathbf{X}', '𝐗'), 452 | ('\\mathbf{x}', '𝐱'), 453 | ('\\mathbf{Y}', '𝐘'), 454 | ('\\mathbf{y}', '𝐲'), 455 | ('\\mathbf{Z}', '𝐙'), 456 | ('\\mathbf{z}', '𝐳'), 457 | ('\\mathcal B', 'ℬ'), 458 | ('\\mathcal e', 'ℯ'), 459 | ('\\mathcal E', 'ℰ'), 460 | ('\\mathcal F', 'ℱ'), 461 | ('\\mathcal g', 'ℊ'), 462 | ('\\mathcal H', 'ℋ'), 463 | ('\\mathcal I', 'ℐ'), 464 | ('\\mathcal L', 'ℒ'), 465 | ('\\mathcal{A}', '𝓐'), 466 | ('\\mathcal{B}', '𝓑'), 467 | ('\\mathcal{C}', '𝓒'), 468 | ('\\mathcal{D}', '𝓓'), 469 | ('\\mathcal{E}', '𝓔'), 470 | ('\\mathcal{F}', '𝓕'), 471 | ('\\mathcal{G}', '𝓖'), 472 | ('\\mathcal{H}', '𝓗'), 473 | ('\\mathcal{I}', '𝓘'), 474 | ('\\mathcal{J}', '𝓙'), 475 | ('\\mathcal{K}', '𝓚'), 476 | ('\\mathcal{L}', '𝓛'), 477 | ('\\mathcal{M}', '𝓜'), 478 | ('\\mathcal{N}', '𝓝'), 479 | ('\\mathcal{O}', '𝓞'), 480 | ('\\mathcal{P}', '𝓟'), 481 | ('\\mathcal{Q}', '𝓠'), 482 | ('\\mathcal{R}', '𝓡'), 483 | ('\\mathcal{S}', '𝓢'), 484 | ('\\mathcal{T}', '𝓣'), 485 | ('\\mathcal{U}', '𝓤'), 486 | ('\\mathcal{V}', '𝓥'), 487 | ('\\mathcal{W}', '𝓦'), 488 | ('\\mathcal{X}', '𝓧'), 489 | ('\\mathcal{Y}', '𝓨'), 490 | ('\\mathcal{Z}', '𝓩'), 491 | ('\\mathfrak C', 'ℭ'), 492 | ('\\mathfrak H', 'ℌ'), 493 | ('\\mathfrak Z', 'ℨ'), 494 | ('\\mathfrak{A}', '𝔄'), 495 | ('\\mathfrak{a}', '𝔞'), 496 | ('\\mathfrak{B}', '𝔅'), 497 | ('\\mathfrak{b}', '𝔟'), 498 | ('\\mathfrak{c}', '𝔠'), 499 | ('\\mathfrak{D}', '𝔇'), 500 | ('\\mathfrak{d}', '𝔡'), 501 | ('\\mathfrak{E}', '𝔈'), 502 | ('\\mathfrak{e}', '𝔢'), 503 | ('\\mathfrak{F}', '𝔉'), 504 | ('\\mathfrak{f}', '𝔣'), 505 | ('\\mathfrak{G}', '𝔊'), 506 | ('\\mathfrak{g}', '𝔤'), 507 | ('\\mathfrak{h}', '𝔥'), 508 | ('\\mathfrak{i}', '𝔦'), 509 | ('\\mathfrak{J}', '𝔍'), 510 | ('\\mathfrak{j}', '𝔧'), 511 | ('\\mathfrak{K}', '𝔎'), 512 | ('\\mathfrak{k}', '𝔨'), 513 | ('\\mathfrak{L}', '𝔏'), 514 | ('\\mathfrak{l}', '𝔩'), 515 | ('\\mathfrak{M}', '𝔐'), 516 | ('\\mathfrak{m}', '𝔪'), 517 | ('\\mathfrak{N}', '𝔑'), 518 | ('\\mathfrak{n}', '𝔫'), 519 | ('\\mathfrak{O}', '𝔒'), 520 | ('\\mathfrak{o}', '𝔬'), 521 | ('\\mathfrak{P}', '𝔓'), 522 | ('\\mathfrak{p}', '𝔭'), 523 | ('\\mathfrak{Q}', '𝔔'), 524 | ('\\mathfrak{q}', '𝔮'), 525 | ('\\mathfrak{r}', '𝔯'), 526 | ('\\mathfrak{S}', '𝔖'), 527 | ('\\mathfrak{s}', '𝔰'), 528 | ('\\mathfrak{T}', '𝔗'), 529 | ('\\mathfrak{t}', '𝔱'), 530 | ('\\mathfrak{U}', '𝔘'), 531 | ('\\mathfrak{u}', '𝔲'), 532 | ('\\mathfrak{V}', '𝔙'), 533 | ('\\mathfrak{v}', '𝔳'), 534 | ('\\mathfrak{W}', '𝔚'), 535 | ('\\mathfrak{w}', '𝔴'), 536 | ('\\mathfrak{X}', '𝔛'), 537 | ('\\mathfrak{x}', '𝔵'), 538 | ('\\mathfrak{Y}', '𝔜'), 539 | ('\\mathfrak{y}', '𝔶'), 540 | ('\\mathfrak{z}', '𝔷'), 541 | ('\\mathit{A}', '𝐴'), 542 | ('\\mathit{a}', '𝑎'), 543 | ('\\mathit{B}', '𝐵'), 544 | ('\\mathit{b}', '𝑏'), 545 | ('\\mathit{C}', '𝐶'), 546 | ('\\mathit{c}', '𝑐'), 547 | ('\\mathit{D}', '𝐷'), 548 | ('\\mathit{d}', '𝑑'), 549 | ('\\mathit{E}', '𝐸'), 550 | ('\\mathit{e}', '𝑒'), 551 | ('\\mathit{F}', '𝐹'), 552 | ('\\mathit{f}', '𝑓'), 553 | ('\\mathit{G}', '𝐺'), 554 | ('\\mathit{g}', '𝑔'), 555 | ('\\mathit{H}', '𝐻'), 556 | ('\\mathit{h}', '𝘩'), 557 | ('\\mathit{I}', '𝐼'), 558 | ('\\mathit{i}', '𝑖'), 559 | ('\\mathit{J}', '𝐽'), 560 | ('\\mathit{j}', '𝑗'), 561 | ('\\mathit{K}', '𝐾'), 562 | ('\\mathit{k}', '𝑘'), 563 | ('\\mathit{L}', '𝐿'), 564 | ('\\mathit{l}', '𝑙'), 565 | ('\\mathit{M}', '𝑀'), 566 | ('\\mathit{m}', '𝑚'), 567 | ('\\mathit{N}', '𝑁'), 568 | ('\\mathit{n}', '𝑛'), 569 | ('\\mathit{O}', '𝑂'), 570 | ('\\mathit{o}', '𝑜'), 571 | ('\\mathit{P}', '𝑃'), 572 | ('\\mathit{p}', '𝑝'), 573 | ('\\mathit{Q}', '𝑄'), 574 | ('\\mathit{q}', '𝑞'), 575 | ('\\mathit{R}', '𝑅'), 576 | ('\\mathit{r}', '𝑟'), 577 | ('\\mathit{S}', '𝑆'), 578 | ('\\mathit{s}', '𝑠'), 579 | ('\\mathit{T}', '𝑇'), 580 | ('\\mathit{t}', '𝑡'), 581 | ('\\mathit{U}', '𝑈'), 582 | ('\\mathit{u}', '𝑢'), 583 | ('\\mathit{V}', '𝑉'), 584 | ('\\mathit{v}', '𝑣'), 585 | ('\\mathit{W}', '𝑊'), 586 | ('\\mathit{w}', '𝑤'), 587 | ('\\mathit{X}', '𝑋'), 588 | ('\\mathit{x}', '𝑥'), 589 | ('\\mathit{Y}', '𝑌'), 590 | ('\\mathit{y}', '𝑦'), 591 | ('\\mathit{Z}', '𝑍'), 592 | ('\\mathit{z}', '𝑧'), 593 | ('\\mathring{\\mathrm A}', 'Å'), 594 | ('\\mathrm K', 'K'), 595 | ('\\mathrm{d}', 'ⅆ'), 596 | ('\\mathrsfs B', 'ℬ'), 597 | ('\\mathrsfs e', 'ℯ'), 598 | ('\\mathrsfs E', 'ℰ'), 599 | ('\\mathrsfs F', 'ℱ'), 600 | ('\\mathrsfs H', 'ℋ'), 601 | ('\\mathrsfs I', 'ℐ'), 602 | ('\\mathrsfs L', 'ℒ'), 603 | ('\\mathscr{A}', '𝒜'), 604 | ('\\mathscr{C}', '𝒞'), 605 | ('\\mathscr{D}', '𝒟'), 606 | ('\\mathscr{G}', '𝒢'), 607 | ('\\mathscr{J}', '𝒥'), 608 | ('\\mathscr{K}', '𝒦'), 609 | ('\\mathscr{M}', 'ℳ'), 610 | ('\\mathscr{N}', '𝒩'), 611 | ('\\mathscr{O}', '𝒪'), 612 | ('\\mathscr{P}', '𝒫'), 613 | ('\\mathscr{Q}', '𝒬'), 614 | ('\\mathscr{R}', 'ℛ'), 615 | ('\\mathscr{S}', '𝒮'), 616 | ('\\mathscr{T}', '𝒯'), 617 | ('\\mathscr{U}', '𝒰'), 618 | ('\\mathscr{V}', '𝒱'), 619 | ('\\mathscr{W}', '𝒲'), 620 | ('\\mathscr{X}', '𝒳'), 621 | ('\\mathscr{Y}', '𝒴'), 622 | ('\\mathscr{Z}', '𝒵'), 623 | ('\\measuredangle', '∡'), 624 | ('\\mho', '℧'), 625 | ('\\mid', '∣'), 626 | ('\\models', '⊧'), 627 | ('\\mp', '∓'), 628 | ('\\mu', 'μ'), 629 | ('\\multimap', '⊸'), 630 | ('\\nabla', '∇'), 631 | ('\\natural', '♮'), 632 | ('\\ncong', '≇'), 633 | ('\\ne', '≠'), 634 | ('\\nearrow', '↗'), 635 | ('\\neg', '¬'), 636 | ('\\nexist', '∄'), 637 | ('\\ngeq', '≱'), 638 | ('\\ngtr', '≯'), 639 | ('\\ni', '∋'), 640 | ('\\nleftarrow', '↚'), 641 | ('\\nLeftarrow', '⇍'), 642 | ('\\nleftrightarrow', '↮'), 643 | ('\\nLeftrightarrow', '⇎'), 644 | ('\\nleq', '≰'), 645 | ('\\nless', '≮'), 646 | ('\\nmid', '∤'), 647 | ('\\not\\approx', '≉'), 648 | ('\\not\\asymp', '≭'), 649 | ('\\not\\equiv', '≢'), 650 | ('\\not\\exists', '∄'), 651 | ('\\not\\gtrless', '≹'), 652 | ('\\not\\gtrsim', '≵'), 653 | ('\\not\\lessgtr', '≸'), 654 | ('\\not\\lesssim', '≴'), 655 | ('\\not\\preceq', '⋠'), 656 | ('\\not\\simeq', '≄'), 657 | ('\\not\\sqsubseteq', '⋢'), 658 | ('\\not\\sqsupseteq', '⋣'), 659 | ('\\not\\subset', '⊄'), 660 | ('\\not\\succeq', '⋡'), 661 | ('\\not\\supset', '⊅'), 662 | ('\\not\\triangleleft', '⋪'), 663 | ('\\not\\trianglelefteq', '⋬'), 664 | ('\\not\\triangleright', '⋫'), 665 | ('\\not\\trianglerighteq', '⋭'), 666 | ('\\not\\vdash', '⊬'), 667 | ('\\not\\vDash', '⊭'), 668 | ('\\not\\Vdash', '⊮'), 669 | ('\\not\\VDash', '⊯'), 670 | ('\\notin', '∉'), 671 | ('\\notni', '∌'), 672 | ('\\nparallel', '∦'), 673 | ('\\nprec', '⊀'), 674 | ('\\nrightarrow', '↛'), 675 | ('\\nRightarrow', '⇏'), 676 | ('\\nsim', '≁'), 677 | ('\\nsubseteq', '⊈'), 678 | ('\\nsucc', '⊁'), 679 | ('\\nsupseteq', '⊉'), 680 | ('\\nu', 'ν'), 681 | ('\\nwarrow', '↖'), 682 | ('\\odot', '⊙'), 683 | ('\\oiiint', '∰'), 684 | ('\\oiint', '∯'), 685 | ('\\oiintctrclockwise', '∳'), 686 | ('\\oint', '∮'), 687 | ('\\ointclockwise', '∲'), 688 | ('\\Omega', 'Ω'), 689 | ('\\omega', 'ω'), 690 | ('\\ominus', '⊖'), 691 | ('\\oplus', '⊕'), 692 | ('\\oslash', '⊘'), 693 | ('\\otimes', '⊗'), 694 | ('\\overline{0}', '‾'), 695 | ('\\parallel', '∥'), 696 | ('\\partial', '∂'), 697 | ('\\perp', '⟂'), 698 | ('\\Phi', 'Φ'), 699 | ('\\phi', 'φ'), 700 | ('\\Pi', 'Π'), 701 | ('\\pi', 'π'), 702 | ('\\pitchfork', '⋔'), 703 | ('\\pm', '±'), 704 | ('\\pounds', '£'), 705 | ('\\prec', '≺'), 706 | ('\\preccurlyeq', '≼'), 707 | ('\\preceq', '⪯'), 708 | ('\\precnsim', '⋨'), 709 | ('\\precsim', '≾'), 710 | ('\\prime', '′'), 711 | ('\\prod', '∏'), 712 | ('\\Proportion', '∷'), 713 | ('\\propto', '∝'), 714 | ('\\Psi', 'Ψ'), 715 | ('\\psi', 'ψ'), 716 | ('\\Qoppa', 'Ϙ'), 717 | ('\\qoppa', 'ϙ'), 718 | ('\\quotedblbase', '„'), 719 | ('\\quotesinglbase', '‚'), 720 | ('\\rangle', '〉'), 721 | ('\\rceil', '⌉'), 722 | ('\\Re', 'ℜ'), 723 | ('\\rfloor', '⌋'), 724 | ('\\RHD', '‣'), 725 | ('\\rho', 'ρ'), 726 | ('\\rightarrow', '→'), 727 | ('\\Rightarrow', '⇒'), 728 | ('\\RightArrowBar', '⇥'), 729 | ('\\rightarrowtail', '↣'), 730 | ('\\rightarrowtriangle', '⇾'), 731 | ('\\rightharpoondown', '⇁'), 732 | ('\\rightharpoonup', '⇀'), 733 | ('\\rightleftarrows', '⇄'), 734 | ('\\rightleftharpoons', '⇌'), 735 | ('\\rightrightarrows', '⇉'), 736 | ('\\rightsquigarrow', '⇝'), 737 | ('\\rightthreetimes', '⋌'), 738 | ('\\risingdotseq', '≓'), 739 | ('\\rrangle', '⟫'), 740 | ('\\rrbracket', '〛'), 741 | ('\\Rrightarrow', '⇛'), 742 | ('\\Rsh', '↱'), 743 | ('\\rtimes', '⋊'), 744 | ('\\Sampi', 'Ϡ'), 745 | ('\\sampi', 'ϡ'), 746 | ('\\searrow', '↘'), 747 | ('\\second', '″'), 748 | ('\\setminus', '⧵'), 749 | ('\\sharp', '♯'), 750 | ('\\Sigma', 'Σ'), 751 | ('\\sigma', 'σ'), 752 | ('\\sim', '∼'), 753 | ('\\simeq', '≃'), 754 | ('\\smallsetminus', '∖'), 755 | ('\\smile', '⌣'), 756 | ('\\smiley', '☺'), 757 | ('\\spadesuit', '♠'), 758 | ('\\sphericalangle', '∢'), 759 | ('\\sqbullet', '∍'), 760 | ('\\sqcap', '⊓'), 761 | ('\\sqcup', '⊔'), 762 | ('\\sqrt[3]{}', '∛'), 763 | ('\\sqrt[4]{}', '∜'), 764 | ('\\sqrt{}', '√'), 765 | ('\\sqsubset', '⊏'), 766 | ('\\sqsubseteq', '⊑'), 767 | ('\\sqsubsetneq', '⋤'), 768 | ('\\sqsupset', '⊐'), 769 | ('\\sqsupseteq', '⊒'), 770 | ('\\sqsupsetneq', '⋥'), 771 | ('\\square', '□'), 772 | ('\\stackrel{=}{=}', '≣'), 773 | ('\\stackrel{\\frown}{=}', '≘'), 774 | ('\\stackrel{\\star}{=}', '≛'), 775 | ('\\stackrel{\\text{\\tiny ?}}{=}', '≟'), 776 | ('\\stackrel{\\text{\\tiny def}}{=}', '≝'), 777 | ('\\stackrel{\\vee}{=}', '≚'), 778 | ('\\stackrel{\\wedge}{=}', '≙'), 779 | ('\\star', '∗'), 780 | ('\\Stigma', 'Ϛ'), 781 | ('\\stigma', 'ϛ'), 782 | ('\\subset', '⊂'), 783 | ('\\Subset', '⋐'), 784 | ('\\subseteq', '⊆'), 785 | ('\\subsetneq', '⊊'), 786 | ('\\succ', '≻'), 787 | ('\\succccurlyeq', '≽'), 788 | ('\\succeq', '⪰'), 789 | ('\\succnsim', '⋩'), 790 | ('\\succsim', '≿'), 791 | ('\\sum', '∑'), 792 | ('\\supset', '⊃'), 793 | ('\\Supset', '⋑'), 794 | ('\\supseteq', '⊇'), 795 | ('\\supsetneq', '⊋'), 796 | ('\\swarrow', '↙'), 797 | ('\\tau', 'τ'), 798 | ('\\textasciimacron', '¯'), 799 | ('\\textbardbl', '‖'), 800 | ('\\textbrokenbar', '¦'), 801 | ('\\textcent', '¢'), 802 | ('\\textcurrency', '¤'), 803 | ('\\textdiscount', '⁒'), 804 | ('\\textestimated', '℮'), 805 | ('\\textexclamdown', '¡'), 806 | ('\\textinterrobang', '‽'), 807 | ('\\textinterrobangdown', '⸘'), 808 | ('\\textlquill', '⁅'), 809 | ('\\textmu', 'µ'), 810 | ('\\textordfeminine', 'ª'), 811 | ('\\textordmasculine', 'º'), 812 | ('\\textpertenthousand', '‱'), 813 | ('\\textperthousand', '‰'), 814 | ('\\textquestiondown', '¿'), 815 | ('\\textquotedblleft', '“'), 816 | ('\\textquotedblright', '”'), 817 | ('\\textquoteleft', '‘'), 818 | ('\\textquoteright', '’'), 819 | ('\\textreferencemark', '※'), 820 | ('\\textsuperscript{\\textregistered}', '®'), 821 | ('\\textrquill', '⁆'), 822 | ('\\textsuperscript{1}', '¹'), 823 | ('\\textsuperscript{2}', '²'), 824 | ('\\textsuperscript{3}', '³'), 825 | ('\\textsuperscript{o}', '°'), 826 | ('\\texttrademark', '™'), 827 | ('\\textyen', '¥'), 828 | ('\\therefore', '∴'), 829 | ('\\Theta', 'Θ'), 830 | ('\\theta', 'θ'), 831 | ('\\third', '‴'), 832 | ('\\times', '×'), 833 | ('\\top', '⊤'), 834 | ('\\triangle', '△'), 835 | ('\\triangleleft', '⊲'), 836 | ('\\triangleq', '≜'), 837 | ('\\triangleright', '⊳'), 838 | ('\\twoheadleftarrow', '↞'), 839 | ('\\twoheadrightarrow', '↠'), 840 | ('\\underline{\\phantom{x}}', '̲'), 841 | ('\\unlhd', '⊴'), 842 | ('\\unrhd', '⊵'), 843 | ('\\uparrow', '↑'), 844 | ('\\Uparrow', '⇑'), 845 | ('\\updownarrow', '↕'), 846 | ('\\Updownarrow', '⇕'), 847 | ('\\updownarrows', '⇅'), 848 | ('\\upharpoonleft', '↿'), 849 | ('\\upharpoonright', '↾'), 850 | ('\\uplus', '⊎'), 851 | ('\\Upsilon', 'Υ'), 852 | ('\\upsilon', 'υ'), 853 | ('\\upuparrows', '⇈'), 854 | ('\\varepsilon', 'ε'), 855 | ('\\varkappa', 'ϰ'), 856 | ('\\varnothing', '∅'), 857 | ('\\varphi', '𝜑'), 858 | ('\\varpi', '𝜛'), 859 | ('\\varrho', '𝜚'), 860 | ('\\varsigma', 'ς'), 861 | ('\\vartheta', '𝜗'), 862 | ('\\vdash', '⊢'), 863 | ('\\Vdash', '⊩'), 864 | ('\\VDash', '⊫'), 865 | ('\\vdots', '⋮'), 866 | ('\\vee', '∨'), 867 | ('\\veebar', '⊻'), 868 | ('\\Vvdash', '⊪'), 869 | ('\\wedge', '∧'), 870 | ('\\wp', '℘'), 871 | ('\\wr', '≀'), 872 | ('\\Xi', 'Ξ'), 873 | ('\\xi', 'ξ'), 874 | ('\\xspace', ' '), 875 | ('\\Yup', '⅄'), 876 | ('\\zeta', 'ζ') 877 | ] 878 | 879 | # Add conflicting tokens 880 | for _ in [ 881 | ('\\textregistered', '®'), 882 | ]: 883 | REPLACE_TEX_COMMANDS_LIBRARY.append(_) 884 | 885 | # Add final token for text 886 | REPLACE_TEX_COMMANDS_LIBRARY.append(('\\text', '')) 887 | -------------------------------------------------------------------------------- /pydetex/_gui_settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | GUI SETTINGS 6 | Provides settings for the gui. 7 | """ 8 | 9 | __all__ = ['Settings'] 10 | 11 | import os 12 | 13 | from typing import Callable, Tuple, Dict, Any, List, Type, Union 14 | from warnings import warn 15 | 16 | import datetime 17 | import pydetex.pipelines as pip 18 | import pydetex.utils as ut 19 | import pydetex.version as ver 20 | 21 | from pydetex import __author__ 22 | 23 | _SETTINGS_FILE = [os.path.join(ut.get_local_path(), '.pydetex.cfg')] 24 | _SETTINGS_TEST = os.path.join(ut.RESOURCES_PATH, '.pydetex.cfg') 25 | 26 | # Store the pipelines 27 | _PIPELINES = { 28 | 'pipeline_simple': pip.simple, 29 | 'pipeline_strict': pip.strict, 30 | 'pipeline_strict_eqn': pip.strict_eqn 31 | } 32 | 33 | # Store the window sizes (w, h, height_richtext, margin_between_richtext, button_margin) 34 | _WINDOW_SIZE = { 35 | 'window_size_small': [720, 480, 175, 3, 6], 36 | 'window_size_medium': [960, 540, 200, 5, 10], 37 | 'window_size_large': [1280, 720, 285, 5, 15], 38 | 'window_size_xlarge': [1440, 850, 343, 10, 19] 39 | } 40 | 41 | 42 | class _LangManager(object): 43 | """ 44 | Stores language. 45 | """ 46 | 47 | def __init__(self) -> None: 48 | """ 49 | Constructor. 50 | """ 51 | self._lang = { 52 | 'en': { 53 | 'about': 'About', 54 | 'about_author': 'Author', 55 | 'about_opened': 'Total app openings', 56 | 'about_processed': 'Total processed words', 57 | 'about_ver_dev': 'Development version', 58 | 'about_ver_err_conn': 'Cannot check for new versions (Connection Error)', 59 | 'about_ver_err_unkn': 'Cannot check for new versions (Unknown Error)', 60 | 'about_ver_latest': 'Software version up-to-date', 61 | 'about_ver_upgrade': 'Note: You are using an outdated version, consider upgrading to v{0}', 62 | 'cfg_check': 'Check', 63 | 'cfg_error_auto_copy': 'Invalid auto copy process value', 64 | 'cfg_error_font_size': 'Invalid font size value', 65 | 'cfg_error_lang': 'Invalid lang value', 66 | 'cfg_error_output_format': 'Invalid output font format value', 67 | 'cfg_error_pipeline': 'Invalid pipeline value', 68 | 'cfg_error_pipeline_compress_cite': 'Invalid compress \\cite value', 69 | 'cfg_error_pipeline_replace_defs': 'Invalid replace \\def value', 70 | 'cfg_error_repetition': 'Invalid repetition value', 71 | 'cfg_error_repetition_chars': 'Repetition min chars must be greater than zero', 72 | 'cfg_error_repetition_distance': 'Repetition distance must be greater than 2 and lower than 50', 73 | 'cfg_error_repetition_words': 'Invalid ignore words', 74 | 'cfg_error_show_line_numbers': 'Invalid show line numbers value', 75 | 'cfg_error_stemming': 'Invalid repetition stemming value', 76 | 'cfg_error_stopwords': 'Invalid repetition stopwords value', 77 | 'cfg_error_window_size': 'Invalid window size value', 78 | 'cfg_font_format': 'Output font format', 79 | 'cfg_font_size': 'Font size', 80 | 'cfg_lang': 'Language', 81 | 'cfg_pipeline': 'Pipeline', 82 | 'cfg_pipeline_compress_cite': 'Compress \\cite', 83 | 'cfg_pipeline_replace_defs': 'Replace \\def', 84 | 'cfg_process_auto_copy': 'Auto-copy after process', 85 | 'cfg_save': 'Save', 86 | 'cfg_show_line_numbers': 'Show line numbers', 87 | 'cfg_tab_pipeline': 'Pipeline', 88 | 'cfg_tab_ui': 'UI', 89 | 'cfg_window_size': 'Window size', 90 | 'cfg_words_repetition': 'Words repetition', 91 | 'cfg_words_repetition_distance': 'Repetition distance', 92 | 'cfg_words_repetition_ignorew': 'Ignored words', 93 | 'cfg_words_repetition_minchars': 'Repetition min chars', 94 | 'cfg_words_repetition_stemming': 'Use stemming', 95 | 'cfg_words_repetition_stopwords': 'Use stopwords', 96 | 'clear': 'Clear', 97 | 'clip_empty': 'Clipboard is empty', 98 | 'copy_from_clip': 'Copying from clipboard', 99 | 'detected_lang': 'Detected language: {0} ({1})', 100 | 'detected_lang_write': 'Write something to recognize the language', 101 | 'dictionary': 'Dictionary', 102 | 'dictionary_antonym': 'Antonym', 103 | 'dictionary_loading': 'Loading ...', 104 | 'dictionary_meaning': 'Meaning', 105 | 'dictionary_no_results': 'No results', 106 | 'dictionary_querying': 'Querying dictionary', 107 | 'dictionary_synonym': 'Synonym', 108 | 'dictionary_timeout': 'Error: Query timeout exceeded limit', 109 | 'dictionary_translation': 'Translation', 110 | 'dictionary_wikipedia': 'Wikipedia', 111 | 'dictionary_word': 'Word', 112 | 'format_d': ',', 113 | 'lang': 'English', 114 | 'menu_copy': 'Copy', 115 | 'menu_cut': 'Cut', 116 | 'menu_paste': 'Paste', 117 | 'open_file': 'Open file', 118 | 'open_file_latex_file': 'LaTeX file', 119 | 'open_file_select': 'Select a LaTeX file', 120 | 'pipeline_simple': 'Simple', 121 | 'pipeline_simple_description': 'Removes common Tex commands, replaces cites and references', 122 | 'pipeline_strict': 'Strict', 123 | 'pipeline_strict_description': 'An extension of the simple pipeline which removes all Tex commands and environments', 124 | 'pipeline_strict_eqn': 'Strict + equation', 125 | 'pipeline_strict_eqn_description': 'An extension of the strict pipeline which converts also the equations', 126 | 'placeholder': ut.open_file(ut.RESOURCES_PATH + 'placeholder_en.tex'), 127 | 'process': 'Process', 128 | 'process_clip': 'Process from clipboard', 129 | 'process_copy': 'Copy to clipboard', 130 | 'process_error': 'An error has occured while processing the text.\nPlease create a new issue in the GitHub ' 131 | 'page ({0}) with full defails and minimal working example.\n\nError traceback:\n{1}\n', 132 | 'reload_message_message': 'To apply these changes, the app must be reloaded', 133 | 'reload_message_title': 'Reload is required', 134 | 'settings': 'Settings', 135 | 'status_copy_to_clip': 'Copying to clip', 136 | 'status_cursor': 'Cursor: {0}:{1}', 137 | 'status_cursor_input_focusout': 'Input not selected', 138 | 'status_cursor_input_focusout_min': 'Not selected', 139 | 'status_cursor_input_focusout_min2': 'No sel', 140 | 'status_cursor_min': 'Cur: {0}:{1}', 141 | 'status_cursor_null': 'Cursor: Empty', 142 | 'status_cursor_selected': 'Selected', 143 | 'status_cursor_selected_all': 'Selected: all', 144 | 'status_cursor_selected_chars': '{0} chars', 145 | 'status_cursor_selected_chars_min': '{0} ch', 146 | 'status_cursor_selected_chars_single': '1 char', 147 | 'status_cursor_selected_min': 'Sel', 148 | 'status_idle': 'Idle', 149 | 'status_processing': 'Processing', 150 | 'status_requesting_file': 'Requesting file', 151 | 'status_words': 'Words: {0}', 152 | 'status_writing': 'Writing', 153 | 'tag_repeated': 'repeated', 154 | 'version_upgrade': 'You are using an outdated PyDetex version, consider upgrading to v{0}.\n\nTo update, ' 155 | 'run "pip install --upgrade pydetex" in your terminal', 156 | 'version_upgrade_title': 'Oudated PyDetex version', 157 | 'window_size_large': 'Large', 158 | 'window_size_medium': 'Medium', 159 | 'window_size_small': 'Small', 160 | 'window_size_xlarge': 'Extra Large' 161 | }, 162 | 'es': { 163 | 'about': 'Acerca de', 164 | 'about_author': 'Autor', 165 | 'about_opened': 'Nº ejecuciones app', 166 | 'about_processed': 'Nº palabras procesadas', 167 | 'about_ver_dev': 'Versión de desarrollo', 168 | 'about_ver_err_conn': 'No se pudo verificar nuevas versiones (Error de Conexión)', 169 | 'about_ver_err_unkn': 'No se pudo verificar nuevas versiones (Error desconocido)', 170 | 'about_ver_latest': 'Software actualizado a la última versión', 171 | 'about_ver_upgrade': 'Nota: Estás usando una versión desactualizada, considera actualizar a la v{0}', 172 | 'cfg_check': 'Activar', 173 | 'cfg_error_auto_copy': 'Valor auto copiado al procesar incorrecto', 174 | 'cfg_error_font_size': 'Tamaño fuente incorrecta', 175 | 'cfg_error_lang': 'Valor idioma incorrecto', 176 | 'cfg_error_output_format': 'Valor formato output incorrecto', 177 | 'cfg_error_pipeline': 'Valor pipeline incorrecto', 178 | 'cfg_error_pipeline_compress_cite': 'Valor compresión \\cite incorrecto', 179 | 'cfg_error_pipeline_replace_defs': 'Valor reemplazo \\def incorrecto', 180 | 'cfg_error_repetition': 'Valor repetición incorrecto', 181 | 'cfg_error_repetition_chars': 'Caracter mínimo de repetición debe ser mayor a cero', 182 | 'cfg_error_repetition_distance': 'Distancia de repetición debe ser superior o igual a 2, y menor que 50', 183 | 'cfg_error_repetition_words': 'Repetición palabras incorrectas', 184 | 'cfg_error_show_line_numbers': 'Valor mostrar número de líneas incorrecto', 185 | 'cfg_error_stemming': 'Valor stemming incorrecto', 186 | 'cfg_error_stopwords': 'Valor stopwords incorrecto', 187 | 'cfg_error_window_size': 'Tamaño ventana incorrecto', 188 | 'cfg_font_format': 'Formatear fuentes', 189 | 'cfg_font_size': 'Tamaño de la fuente', 190 | 'cfg_lang': 'Idioma', 191 | 'cfg_pipeline': 'Pipeline', 192 | 'cfg_pipeline_compress_cite': 'Comprimir \\cite', 193 | 'cfg_pipeline_replace_defs': 'Reemplazar \\def', 194 | 'cfg_process_auto_copy': 'Auto-copiado al procesar', 195 | 'cfg_save': 'Guardar', 196 | 'cfg_show_line_numbers': 'Mostrar nº líneas', 197 | 'cfg_tab_pipeline': 'Pipeline', 198 | 'cfg_tab_ui': 'UI', 199 | 'cfg_window_size': 'Tamaño de ventana', 200 | 'cfg_words_repetition': 'Rep. palabras', 201 | 'cfg_words_repetition_distance': 'Distancia de repetición', 202 | 'cfg_words_repetition_ignorew': 'Palabras ignoradas', 203 | 'cfg_words_repetition_minchars': 'Mínimo de carácteres', 204 | 'cfg_words_repetition_stemming': 'Usar stemming', 205 | 'cfg_words_repetition_stopwords': 'Usar stopwords', 206 | 'clear': 'Limpiar', 207 | 'clip_empty': 'Portapapeles vacío', 208 | 'copy_from_clip': 'Copiando desde portapapeles', 209 | 'detected_lang': 'Idioma detectado: {0} ({1})', 210 | 'detected_lang_write': 'Escribe algo para detectar el idioma', 211 | 'dictionary': 'Diccionario', 212 | 'dictionary_antonym': 'Antónimos', 213 | 'dictionary_loading': 'Cargando ...', 214 | 'dictionary_meaning': 'Definición', 215 | 'dictionary_no_results': 'Sin resultados', 216 | 'dictionary_querying': 'Consultando diccionario', 217 | 'dictionary_synonym': 'Sinónimos', 218 | 'dictionary_timeout': 'Error: El tiempo de consulta excedió el límite', 219 | 'dictionary_translation': 'Traducción', 220 | 'dictionary_wikipedia': 'Wikipedia', 221 | 'dictionary_word': 'Word', 222 | 'format_d': '.', 223 | 'lang': 'Español', 224 | 'menu_copy': 'Copiar', 225 | 'menu_cut': 'Cortar', 226 | 'menu_paste': 'Pegar', 227 | 'open_file': 'Abrir archivo', 228 | 'open_file_latex_file': 'Archivo LaTeX', 229 | 'open_file_select': 'Selecciona un archivo LaTeX', 230 | 'pipeline_simple': 'Simple', 231 | 'pipeline_simple_description': 'Elimina comandos Tex comunes, remplaza citas y referencias', 232 | 'pipeline_strict': 'Estricto', 233 | 'pipeline_strict_description': 'Una extensión del pipeline simple que elimina todos los entornos y comandos', 234 | 'pipeline_strict_eqn': 'Estricto + ecuación', 235 | 'pipeline_strict_eqn_description': 'Pipeline estricto que además reemplaza las ecuaciones', 236 | 'placeholder': ut.open_file(ut.RESOURCES_PATH + 'placeholder_es.tex'), 237 | 'process': 'Procesar', 238 | 'process_clip': 'Procesar desde portapapeles', 239 | 'process_copy': 'Copiar al portapapeles', 240 | 'process_error': 'Un error ha ocurrido mientras se procesaba el texto.\nPor favor crea un nuevo issue en' 241 | ' la página de GitHub ({0}) con los detalles completos y un ejemplo mínimo para probar' 242 | ' las soluciones.\n\nDetalles del error:\n{1}\n', 243 | 'reload_message_message': 'Para aplicar estos cambios, la aplicación se debe reiniciar', 244 | 'reload_message_title': 'Se requiere de un reinicio', 245 | 'settings': 'Configuraciones', 246 | 'status_copy_to_clip': 'Copiando al portapapeles', 247 | 'status_cursor': 'Cursor: {0}:{1}', 248 | 'status_cursor_input_focusout': 'Texto entrada no seleccionado', 249 | 'status_cursor_input_focusout_min': 'No seleccionado', 250 | 'status_cursor_input_focusout_min2': 'No sel', 251 | 'status_cursor_min': 'Cur: {0}:{1}', 252 | 'status_cursor_null': 'Cursor: Vacío', 253 | 'status_cursor_selected': 'Selección', 254 | 'status_cursor_selected_all': 'Selección: todo', 255 | 'status_cursor_selected_chars': '{0} carácteres', 256 | 'status_cursor_selected_chars_min': '{0} crs.', 257 | 'status_cursor_selected_chars_single': '1 caracter', 258 | 'status_cursor_selected_min': 'Sel', 259 | 'status_idle': 'Esperando', 260 | 'status_processing': 'Procesando', 261 | 'status_requesting_file': 'Esperando archivo', 262 | 'status_words': 'Palabras: {0}', 263 | 'status_writing': 'Escribiendo', 264 | 'tag_repeated': 'repetido', 265 | 'version_upgrade': 'Estás usando una versión desactualizada de PyDetex, considera actualizar a v{0}.' 266 | '\n\nPara esto, ejecuta "pip install --upgrade pydetex" en tu terminal', 267 | 'version_upgrade_title': 'Versión desactualizada de PyDetex', 268 | 'window_size_large': 'Grande', 269 | 'window_size_medium': 'Mediano', 270 | 'window_size_small': 'Pequeño', 271 | 'window_size_xlarge': 'Extra Grande' 272 | } 273 | } 274 | 275 | # Extend languages if not defined 276 | ut.complete_langs_dict(self._lang) 277 | 278 | # Update window sizes 279 | for la in self._lang.keys(): 280 | for tok in self._lang[la].keys(): 281 | if tok in _WINDOW_SIZE.keys(): 282 | self._lang[la][tok] += f' ({_WINDOW_SIZE[tok][0]}x{_WINDOW_SIZE[tok][1]})' 283 | 284 | def get_available(self) -> List[str]: 285 | """ 286 | Return the available langs. 287 | 288 | :return: Lang list 289 | """ 290 | return list(self._lang.keys()) 291 | 292 | def get(self, lang: str, tag: str) -> str: 293 | """ 294 | Returns a lang entry. 295 | 296 | :param lang: Language code 297 | :param tag: Language tag 298 | :return: Language value 299 | """ 300 | return self._lang[lang][tag] 301 | 302 | 303 | class Settings(object): 304 | """ 305 | Settings. 306 | """ 307 | 308 | _available_pipelines: List[str] 309 | _default_settings: Dict[str, Tuple[Any, Type, Union[List[Any], Callable[[Any], bool]]]] 310 | _lang: '_LangManager' 311 | _last_opened_day_diff: int 312 | _settings: Dict[str, Any] 313 | _valid_font_sizes: List[int] 314 | _valid_window_sizes: List[str] 315 | 316 | def __init__(self, ignore_file: bool = False) -> None: 317 | """ 318 | Constructor. 319 | 320 | :param ignore_file: If True, the settings file is ignored 321 | """ 322 | load = [] 323 | 324 | def _load_file() -> List[str]: 325 | """ 326 | Loads the setting file. 327 | """ 328 | _load = [] 329 | try: 330 | _f = open(_SETTINGS_FILE[0]) 331 | _load = _f.readlines() 332 | _f.close() 333 | except FileNotFoundError: 334 | _error = f'Setting file {_SETTINGS_FILE[0]} could not be loaded or not exist. Creating new file' 335 | warn(_error) 336 | return _load 337 | 338 | if not ignore_file: 339 | try: 340 | load = _load_file() 341 | except PermissionError: 342 | error = f'Settings file {_SETTINGS_FILE[0]} could not be opened (PermissionError)' 343 | warn(error) 344 | else: 345 | _SETTINGS_FILE[0] = _SETTINGS_TEST 346 | 347 | # Creates the lang manager 348 | self._lang = _LangManager() 349 | 350 | # General settings 351 | self.CFG_CHECK_REPETITION = 'CHECK_REPETITION' 352 | self.CFG_FONT_SIZE = 'FONT_SIZE' 353 | self.CFG_LANG = 'LANG' 354 | self.CFG_LAST_OPENED_DAY = 'LAST_OPENED_DAY' 355 | self.CFG_LAST_OPENED_FOLDER = 'LAST_OPENED_FOLDER' 356 | self.CFG_OUTPUT_FONT_FORMAT = 'OUTPUT_FONT_FORMAT' 357 | self.CFG_PIPELINE = 'PIPELINE' 358 | self.CFG_PIPELINE_COMPRESS_CITE = 'PIPELINE_COMPRESS_CITE' 359 | self.CFG_PIPELINE_REPLACE_DEFS = 'PIPELINE_REPLACE_DEFS' 360 | self.CFG_PROCESS_AUTO_COPY = 'PROCESS_AUTO_COPY' 361 | self.CFG_SHOW_LINE_NUMBERS = 'SHOW_LINE_NUMBERS' 362 | self.CFG_WINDOW_SIZE = 'WINDOW_SIZE' 363 | 364 | # Words repetition 365 | self.CFG_REPETITION_DISTANCE = 'REPETITION_DISTANCE' 366 | self.CFG_REPETITION_IGNORE_WORDS = 'REPETITION_IGNORE_WORDS' 367 | self.CFG_REPETITION_MIN_CHAR = 'REPETITION_MIN_CHAR' 368 | self.CFG_REPETITION_USE_STEMMING = 'REPETITION_USE_STEMMING' 369 | self.CFG_REPETITION_USE_STOPWORDS = 'REPETITION_USE_STOPWORDS' 370 | 371 | # Stats 372 | self.CFG_TOTAL_OPENED_APP = 'TOTAL_OPENED_APP' 373 | self.CFG_TOTAL_PROCESSED_WORDS = 'TOTAL_PROCESSED_WORDS' 374 | 375 | # Stores default settings and the valid values 376 | self._available_pipelines = list(_PIPELINES.keys()) 377 | self._valid_font_sizes = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15] 378 | self._valid_window_sizes = list(_WINDOW_SIZE.keys()) 379 | 380 | self._default_settings = { 381 | self.CFG_CHECK_REPETITION: (False, bool, [True, False]), 382 | self.CFG_FONT_SIZE: (11 if ut.IS_OSX else 10, int, self._valid_font_sizes), 383 | self.CFG_LANG: ('en', str, self._lang.get_available()), 384 | self.CFG_LAST_OPENED_DAY: (ut.get_number_of_day(), int, lambda x: x >= 0), 385 | self.CFG_LAST_OPENED_FOLDER: ('/', str, lambda x: os.path.isdir(x)), 386 | self.CFG_OUTPUT_FONT_FORMAT: (True, bool, [True, False]), 387 | self.CFG_PIPELINE: (self._available_pipelines[1], str, self._available_pipelines), 388 | self.CFG_PIPELINE_COMPRESS_CITE: (True, bool, [True, False]), 389 | self.CFG_PIPELINE_REPLACE_DEFS: (False, bool, [True, False]), 390 | self.CFG_PROCESS_AUTO_COPY: (False, bool, [True, False]), 391 | self.CFG_REPETITION_DISTANCE: (15, int, lambda x: 50 > x > 1), 392 | self.CFG_REPETITION_IGNORE_WORDS: ('ignored_word_1, ignored_word_2', str, None), 393 | self.CFG_REPETITION_MIN_CHAR: (4, int, lambda x: x > 0), 394 | self.CFG_REPETITION_USE_STEMMING: (True, bool, [True, False]), 395 | self.CFG_REPETITION_USE_STOPWORDS: (True, bool, [True, False]), 396 | self.CFG_SHOW_LINE_NUMBERS: (True, bool, [True, False]), 397 | self.CFG_TOTAL_OPENED_APP: (0, int, lambda x: x >= 0), 398 | self.CFG_TOTAL_PROCESSED_WORDS: (0, int, lambda x: x >= 0), 399 | self.CFG_WINDOW_SIZE: (self._valid_window_sizes[1], str, self._valid_window_sizes) 400 | } 401 | 402 | # The valid settings 403 | self._settings = {} 404 | for k in self._default_settings.keys(): 405 | self._settings[k] = self._default_settings[k][0] 406 | 407 | # Load the user settings 408 | for f in load: 409 | if '#' in f: 410 | continue 411 | if '=' in f: # If string has control character 412 | sp = f.split('=') 413 | if len(sp) != 2: 414 | continue 415 | j = sp[0].strip() 416 | val = sp[1].strip() 417 | if self.check_setting(j, val): 418 | self._settings[j] = self._parse_str(val) # Update setting value 419 | 420 | # Update the value 421 | today = ut.get_number_of_day() 422 | self._last_opened_day_diff = abs(ut.get_number_of_day() - self.get(self.CFG_LAST_OPENED_DAY)) 423 | self.set(self.CFG_LAST_OPENED_DAY, today) 424 | self.set(self.CFG_TOTAL_OPENED_APP, self.get(self.CFG_TOTAL_OPENED_APP) + 1) 425 | 426 | # Save the settings 427 | self.save() 428 | 429 | @staticmethod 430 | def _parse_str(value: Any) -> Any: 431 | """ 432 | Parse common string values. 433 | 434 | :param value: Value 435 | :return: Parsed value 436 | """ 437 | if isinstance(value, str): 438 | if value == 'True': 439 | value = True 440 | elif value == 'False': 441 | value = False 442 | elif value.replace('.', '').replace('-', '').replace('+', '').isdigit(): 443 | try: 444 | old_val = value 445 | value = float(value) 446 | if '.' not in old_val and int(value) == value: 447 | value = int(value) 448 | except ValueError: 449 | pass 450 | else: 451 | value = value.strip() 452 | return value 453 | 454 | def check_setting(self, key: str, value: Any) -> bool: 455 | """ 456 | Check if a setting is valid. 457 | 458 | :param key: Key setting 459 | :param value: Value 460 | :return: True if valid 461 | """ 462 | # Apply custom values 463 | if isinstance(value, str): 464 | value = self._parse_str(value) 465 | 466 | # Checks 467 | if key in self._default_settings.keys(): 468 | val_type = self._default_settings[key][1] 469 | val_valids = self._default_settings[key][2] 470 | # Check val type 471 | if not isinstance(value, val_type): 472 | error = f'Setting {key} should be type {val_type}, but received {type(value)}' 473 | warn(error) 474 | return False 475 | if isinstance(val_valids, list): 476 | if value in val_valids: # Setting is within valid ones 477 | return True 478 | else: 479 | str_valids = [] 480 | for t in val_valids: 481 | str_valids.append(str(t)) 482 | error = f'Setting {key} value should have these values: {",".join(str_valids)}' 483 | warn(error) 484 | elif val_valids is None: 485 | return True 486 | else: # Is a function 487 | if not val_valids(value): 488 | error = f'Setting {key} do not pass valid test' 489 | warn(error) 490 | else: 491 | return True 492 | else: 493 | error = f'Setting {key} does not exist' 494 | warn(error) 495 | return False 496 | 497 | def get(self, key: str, update: bool = True) -> Any: 498 | """ 499 | Return the settings value. 500 | 501 | :param key: Setting key 502 | :param update: Updates settings value 503 | :return: Value 504 | """ 505 | val = self._settings[key] 506 | 507 | # Update for some values 508 | if update: 509 | if key == self.CFG_PIPELINE: 510 | val = _PIPELINES[val] 511 | elif key == self.CFG_WINDOW_SIZE: 512 | val = _WINDOW_SIZE[val] 513 | 514 | return val 515 | 516 | def set(self, key: str, value: Any) -> None: 517 | """ 518 | Update a setting value. 519 | 520 | :param key: Setting key 521 | :param value: Value 522 | """ 523 | if not self.check_setting(key, value): 524 | raise ValueError(f'Invalid value for {key}') 525 | self._settings[key] = self._parse_str(value) 526 | 527 | def lang(self, tag: str) -> str: 528 | """ 529 | Get a lang tag. 530 | 531 | :param tag: Lang's tag 532 | :return: Lang value 533 | """ 534 | return self._lang.get(self.get(self.CFG_LANG), tag) 535 | 536 | def add_words(self, w: int) -> None: 537 | """ 538 | Add processed words. 539 | 540 | :param w: Words 541 | """ 542 | self._settings[self.CFG_TOTAL_PROCESSED_WORDS] += w 543 | self.save() 544 | 545 | def save(self) -> None: 546 | """ 547 | Save the settings to the file. 548 | """ 549 | try: 550 | f = open(_SETTINGS_FILE[0], 'w') 551 | keys = list(self._settings.keys()) 552 | keys.sort() 553 | f.write(f'# PyDetex v{ver.vernum} @ {__author__}\n') 554 | f.write(f'# Settings stored on {datetime.datetime.today().ctime()}\n') 555 | for k in keys: 556 | f.write(f'{k} = {str(self._settings[k]).strip()}\n') 557 | f.close() 558 | except PermissionError: 559 | error = f'Settings file {_SETTINGS_FILE[0]} could not saved (PermissionError)' 560 | warn(error) 561 | -------------------------------------------------------------------------------- /pydetex/_utils_tex.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | UTILS TEX 6 | Latex utils. 7 | """ 8 | 9 | __all__ = [ 10 | 'apply_tag_between_inside_char_command', 11 | 'apply_tag_tex_commands', 12 | 'apply_tag_tex_commands_no_argv', 13 | 'find_tex_command_char', 14 | 'find_tex_commands', 15 | 'find_tex_commands_noargv', 16 | 'find_tex_environments', 17 | 'get_tex_commands_args', 18 | 'TEX_COMMAND_CHARS', 19 | 'TEX_EQUATION_CHARS', 20 | 'tex_to_unicode' 21 | ] 22 | 23 | import flatlatex 24 | import os 25 | import re 26 | 27 | from flatlatex.parser import LatexSyntaxError 28 | from typing import Tuple, Union, List, Dict, Optional, Any 29 | 30 | # Flat latex object 31 | _FLATLATEX = flatlatex.converter(ignore_newlines=False, keep_spaces=True) 32 | 33 | # Tex to unicode 34 | _TEX_TO_UNICODE: Dict[str, Union[Dict[Any, str], List[Tuple[str, str]]]] = { 35 | 'latex_symbols': [], 36 | 'subscripts': {}, 37 | 'superscripts': {}, 38 | 'textbb': {}, 39 | 'textbf': {}, 40 | 'textcal': {}, 41 | 'textfrak': {}, 42 | 'textit': {}, 43 | 'textmono': {} 44 | } 45 | 46 | # Valid command chars 47 | TEX_COMMAND_CHARS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 48 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 49 | 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 50 | 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 51 | 'W', 'X', 'Y', 'Z', '*', '@'] 52 | TEX_EQUATION_CHARS = [ 53 | ('$', '$', True), 54 | (r'\(', r'\)', False), 55 | (r'\[', r'\]', False), 56 | ('\\begin{align*}', '\\end{align*}', False), 57 | ('\\begin{align}', '\\end{align}', False), 58 | ('\\begin{displaymath}', '\\end{displaymath}', False), 59 | ('\\begin{equation*}', '\\end{equation*}', False), 60 | ('\\begin{equation}', '\\end{equation}', False), 61 | ('\\begin{gather*}', '\\end{gather*}', False), 62 | ('\\begin{gather}', '\\end{gather}', False), 63 | ('\\begin{math}', '\\end{math}', False) 64 | ] 65 | 66 | 67 | def find_tex_command_char( 68 | s: str, 69 | symbols_char: List[Tuple[str, str, bool]], 70 | ) -> Tuple[Tuple[int, int, int, int], ...]: 71 | """ 72 | Find symbols command positions. 73 | 74 | Example: 75 | 76 | .. code-block:: none 77 | 78 | 00000000001111111111.... 79 | 01234567890123456789.... 80 | Input: This is a $formula$ and this is not. 81 | Output: ((10, 11, 17, 18), ...) 82 | 83 | :param s: Latex string code 84 | :param symbols_char: Symbols to check ``[(initial, final, ignore escape), ...]`` 85 | :return: Positions 86 | """ 87 | assert isinstance(symbols_char, list) 88 | max_len = 0 89 | for j in symbols_char: 90 | assert len(j) == 3, f'Format is (initial, final, ignore escape); but received {j}' 91 | assert isinstance(j[0], str) and len(j[0]) > 0 and ' ' not in j[0] 92 | assert isinstance(j[1], str) and len(j[1]) > 0 and ' ' not in j[1] 93 | assert isinstance(j[2], bool) 94 | max_len = max(max_len, len(j[0]), len(j[1])) 95 | 96 | def _find(k: int, y: int, p: bool = True) -> bool: 97 | """ 98 | Returns true if from k char (in s) the symbols-char-y element is present. 99 | 100 | :param k: Position to start 101 | :param y: Indes of the symbol within the list 102 | :param p: Reads the first (True) or last element 103 | :return: True if exist 104 | """ 105 | if y < 0: 106 | return False 107 | n, m, ignore_escape = symbols_char[y] 108 | nm = n if p else m 109 | total = 0 110 | for z in range(len(nm)): 111 | if s[k + z] == nm[z] and (z == 0 and (not ignore_escape or ignore_escape and s[k - 1] != '\\') or z > 0): 112 | total += 1 113 | return total == len(nm) 114 | 115 | def _find_initial(k: int) -> int: 116 | """ 117 | Find which symbol is contained. 118 | 119 | :param k: Position to start from 120 | :return: The index of the symbol within the list 121 | """ 122 | for y in range(len(symbols_char)): 123 | if _find(k, y): 124 | return y 125 | return -1 126 | 127 | s = '_' + s + ' ' * max_len 128 | r = False # Inside tag 129 | r_u = -1 130 | a = 0 131 | found = [] 132 | 133 | for i in range(1, len(s) - max_len): 134 | u = _find_initial(i) 135 | v = _find(i, r_u, False) 136 | # Open tag 137 | if not r and u >= 0: 138 | a = i 139 | r = True 140 | r_u = u 141 | # Close 142 | elif r and v: 143 | r = False 144 | f, g = a - 1, i - 1 145 | found.append((f, f + len(symbols_char[r_u][0]), g - 1, g + len(symbols_char[r_u][1]) - 1)) 146 | 147 | return tuple(found) 148 | 149 | 150 | def apply_tag_between_inside_char_command( 151 | s: str, 152 | symbols_char: List[Tuple[str, str, bool]], 153 | tags: Union[Tuple[str, str, str, str], str] 154 | ) -> str: 155 | """ 156 | Apply tag between symbols. 157 | 158 | For example, if symbols are ``($, $)`` and tag is ``[1,2,3,4]``: 159 | 160 | .. code-block:: none 161 | 162 | Input: This is a $formula$ and this is not. 163 | Output: This is a 1$2formula3$4 and this is not 164 | 165 | :param s: Latex string code 166 | :param symbols_char: ``[(initial, final, ignore escape), ...]`` 167 | :param tags: Tags to replace 168 | :return: String with tags 169 | """ 170 | if isinstance(tags, str): 171 | if tags == '': 172 | return s 173 | tags = (tags, tags, tags, tags) 174 | 175 | assert len(tags) == 4 176 | a, b, c, d = tags 177 | tex_tags = find_tex_command_char(s, symbols_char) 178 | 179 | if len(tex_tags) == 0: 180 | return s 181 | new_s = '' 182 | k = 0 # Moves through tags 183 | for i in range(len(s)): 184 | if k < len(tex_tags): 185 | if i == tex_tags[k][0]: 186 | new_s += a + s[i] 187 | continue 188 | elif tex_tags[k][0] < i < tex_tags[k][1]: 189 | pass 190 | elif i == tex_tags[k][1] and tex_tags[k][1] != tex_tags[k][3]: 191 | new_s += b + s[i] 192 | if tex_tags[k][2] - tex_tags[k][1] == 0: 193 | new_s += c 194 | continue 195 | elif i == tex_tags[k][2] and tex_tags[k][2] != tex_tags[k][0]: 196 | new_s += s[i] + c 197 | continue 198 | elif tex_tags[k][2] < i < tex_tags[k][3]: 199 | pass 200 | elif i == tex_tags[k][3]: 201 | new_s += s[i] + d 202 | k += 1 203 | continue 204 | new_s += s[i] 205 | 206 | return new_s 207 | 208 | 209 | def find_tex_commands(s: str, offset: int = 0) -> Tuple[Tuple[int, int, int, int, bool], ...]: 210 | """ 211 | Find all tex commands within a code. 212 | 213 | .. code-block:: none 214 | 215 | 00000000001111111111222 216 | 01234567890123456789012 217 | a b c d 218 | Example: This is \\aCommand{nice}... 219 | Output: ((8, 16, 18, 21), ...) 220 | 221 | :param s: Latex string code 222 | :param offset: Offset added to the positioning, useful when using recursive calling on substrings 223 | :return: Tuple if found codes ``(a, b, c, d, command continues)`` 224 | """ 225 | found: List = [] 226 | is_cmd = False 227 | is_argv = False 228 | s += '_' 229 | a, b, c0, c1, d = 0, -1, 0, 0, 0 230 | depth_0 = 0 # {} 231 | depth_1 = 0 # [] 232 | cont_chars = ('{', '[', ' ', '\n') 233 | cmd_idx = 0 # index 234 | mode_arg = -1 235 | 236 | for i in range(len(s) - 1): 237 | # Start a command 238 | if not is_cmd and s[i] == '\\' and s[i + 1] in TEX_COMMAND_CHARS: 239 | a, b, is_cmd, is_argv = i, -1, True, False 240 | cmd_idx += 1 241 | mode_arg = -1 242 | depth_0, depth_1 = 0, 0 243 | 244 | # If command before args encounter an invalid chad, disables the command 245 | elif is_cmd and not is_argv and s[i] not in cont_chars and s[i] not in TEX_COMMAND_CHARS: 246 | is_cmd = False 247 | if s[i] == '\\' and s[i + 1] in TEX_COMMAND_CHARS: 248 | a, b, is_cmd, is_argv = i, -1, True, False 249 | cmd_idx += 1 250 | 251 | # If command has a new line, but following chars are not space 252 | elif is_cmd and not is_argv and s[i] == '\n' and s[i + 1] in TEX_COMMAND_CHARS: 253 | is_cmd = False 254 | 255 | # If command, not arg, but an invalid char follows the space, disables the command 256 | elif is_cmd and not is_argv and s[i - 1] == ' ' and s[i] not in cont_chars: 257 | is_cmd = False 258 | 259 | # Inits a new arg 260 | elif is_cmd and s[i] in ('{', '[') and s[i - 1] != '\\': 261 | is_argv = True 262 | if b == -1: 263 | b = i - 1 264 | depth_0, depth_1 = 0, 0 265 | if s[i] == '{': 266 | if depth_0 == 0: 267 | c0 = i + 1 268 | if mode_arg < 0: 269 | mode_arg = 0 270 | depth_0 += 1 271 | else: 272 | if depth_1 == 0: 273 | c1 = i + 1 274 | if mode_arg < 0: 275 | mode_arg = 1 276 | depth_1 += 1 277 | 278 | # Ends the argument, only if depth condition satisfies 279 | elif is_cmd and is_argv and s[i] in ('}', ']') and s[i - 1] != '\\': 280 | if s[i] == '}': 281 | depth_0 -= 1 282 | else: # ] 283 | depth_1 -= 1 284 | 285 | if (depth_0 == 0 and mode_arg == 0) or (depth_1 == 0 and mode_arg == 1): # Finished 286 | d = i - 1 287 | found.append([a, b, c0 if s[i] == '}' else c1, d, cmd_idx]) 288 | if s[i + 1] not in cont_chars: 289 | is_cmd = False 290 | is_argv = False 291 | mode_arg = -1 292 | # elif depth_0 < 0 or depth_1 < 0: # Invalid argument (parenthesis imbalance) 293 | # is_cmd = False 294 | # is_argv = False 295 | # mode_arg = -1 296 | 297 | # Add the offsets 298 | for f in found: 299 | f[0] += offset 300 | f[1] += offset 301 | f[2] += offset 302 | f[3] += offset 303 | 304 | # Check if command continues 305 | if len(found) == 0: 306 | return () 307 | elif len(found) == 1: 308 | found[0][4] = False 309 | else: 310 | for k in range(1, len(found)): 311 | if found[k][4] == found[k - 1][4]: 312 | found[k - 1][4] = True 313 | else: 314 | found[k - 1][4] = False 315 | if k == len(found) - 1: 316 | found[k][4] = False 317 | for k in range(len(found)): 318 | # noinspection PyUnresolvedReferences 319 | found[k] = tuple(found[k]) 320 | 321 | return tuple(found) 322 | 323 | 324 | def _find_tex_env_recursive(original_s: str, s: str, offset: int = 0, depth: int = 0) -> List: 325 | """ 326 | Find all environments. 327 | 328 | :param s: Latex string code 329 | :param offset: Offset applied to the search 330 | :return: Tuple of all commands 331 | """ 332 | tags = find_tex_commands(s, offset=offset) 333 | new_tags = [] 334 | for t in tags: 335 | a, b, c, d, _ = t 336 | source_cmd = s[a - offset:b - offset + 1] 337 | if 'begin' not in source_cmd and 'end' not in source_cmd: 338 | # Get the arguments of the command, and check more environments there 339 | cmd_args = s[c - offset:d - offset + 1] 340 | if 'begin' in cmd_args or 'end' in cmd_args: 341 | if 'newenvironment' in source_cmd or 'newcommand' in source_cmd: # Prone to bugs 342 | continue 343 | for tr in _find_tex_env_recursive(original_s, cmd_args, offset=c, depth=depth + 1): 344 | new_tags.append(tr) 345 | else: 346 | new_tags.append(t) 347 | return new_tags 348 | 349 | 350 | def find_tex_environments(s: str) -> Tuple[Tuple[str, int, int, int, int, str, int, int], ...]: 351 | r""" 352 | Find all tex commands within a code. 353 | 354 | Example: 355 | 356 | .. code-block:: none 357 | 358 | 0000000000111111111122222222223333333333 359 | 0123456789012345678901234567890123456789 360 | a b c d 361 | Example: This is \begin{nice}[cmd]my...\end{nice} 362 | Output: (('nice', 8, 20, 29, 39, 'parentenv', 0, -1), ...) 363 | 364 | This method also returns the name of the parent environment, the depth of the 365 | environment, and the depth of the item enviroment (if itemizable). 366 | 367 | :param s: Latex string code 368 | :return: Tuple if found environment ``(env_name, a, b, c, d, parent_env_name, env_depth, env_item_depth)`` 369 | """ 370 | 371 | def _env_common(e: str) -> str: 372 | """ 373 | Return the common environment for a given name. 374 | 375 | :param e: Environment name 376 | :return: Common environment 377 | """ 378 | if ('itemize' in e) or ('enumerate' in e) or ('tablenotes' in e): 379 | return 'item_' 380 | return '' 381 | 382 | tags = _find_tex_env_recursive(s, s) 383 | envs = [] 384 | env: Dict[str, List[Tuple[int, int, str, int]]] = {} 385 | last_env = '' 386 | env_depth = 0 387 | cmds_cont = [] 388 | env_depths: Dict[str, int] = {} 389 | 390 | for t in tags: 391 | a, b, c, d, _ = t 392 | if 'begin' in s[a:b + 1]: 393 | env_name = s[c:d + 1] 394 | c_env_name = _env_common(env_name) # Common environment name 395 | if c_env_name not in env_depths.keys(): 396 | env_depths[c_env_name] = 0 397 | else: 398 | env_depths[c_env_name] += 1 399 | env_i = (a, d + 2, last_env, env_depth) 400 | if env_name not in env: 401 | env[env_name] = [env_i] 402 | else: 403 | env[env_name].append(env_i) 404 | if a not in cmds_cont: 405 | cmds_cont.append(a) 406 | last_env = env_name 407 | env_depth += 1 408 | elif 'end' in s[a:b + 1]: 409 | env_name = s[c:d + 1] 410 | c_env_name = _env_common(env_name) # Common environment name 411 | 412 | if env_name in env.keys(): 413 | env_i = env[env_name].pop() 414 | 415 | # Update env itemize depth 416 | env_depth_item = -1 417 | if c_env_name != '': 418 | env_depth_item = env_depths[c_env_name] 419 | env_depths[c_env_name] -= 1 420 | 421 | envs.append(( 422 | env_name, # Environment name 423 | env_i[0], # a-position of the env 424 | env_i[1], # b-position 425 | a, # c-position 426 | d, # d-position 427 | env_i[2], # parent environment name 428 | env_i[3], # depth of the environment 429 | env_depth_item # itemize depth 430 | )) 431 | 432 | if len(env[env_name]) == 0: 433 | del env[env_name] 434 | last_env = env_i[2] 435 | env_depth -= 1 436 | 437 | return tuple(envs) 438 | 439 | 440 | def get_tex_commands_args( 441 | s: str, 442 | pos: bool = False 443 | ) -> Tuple[Tuple[Union[str, Tuple[str, bool], Tuple[int, int]], ...], ...]: 444 | r""" 445 | Get all the arguments from a tex command. Each command argument has a boolean 446 | indicating if that is optional or not. 447 | 448 | .. code-block:: none 449 | 450 | Example: This is \aCommand[\label{}]{nice} and... 451 | Output: (('aCommand', ('\label{}', True), ('nice', False)), ...) 452 | 453 | :param s: Latex string code 454 | :param pos: Add the numerical position of the original string at the last position 455 | :return: Arguments 456 | """ 457 | tags = find_tex_commands(s) 458 | commands = [] 459 | command = [] 460 | for t in tags: 461 | a, b, c, d, cont = t 462 | if len(command) == 0: 463 | command.append(s[a + 1:b + 1].strip()) 464 | arg = s[c - 1:d + 2] 465 | command.append((arg[1:-1], len(arg) != 0 and arg[0] == '[')) 466 | if not cont: 467 | if pos: 468 | command.append((a, d + 2)) 469 | commands.append(tuple(command)) 470 | command = [] 471 | return tuple(commands) 472 | 473 | 474 | def find_tex_commands_noargv(s: str) -> Tuple[Tuple[int, int], ...]: 475 | """ 476 | Find all tex commands with no arguments within a code. 477 | 478 | .. code-block:: none 479 | 480 | 00000000001111111111222 481 | 01234567890123456789012 482 | x x 483 | Example: This is \aCommand ... 484 | Output: ((8,16), ...) 485 | 486 | :param s: Latex string code 487 | :return: Tuple if found codes 488 | """ 489 | found = [] 490 | is_cmd = False 491 | s += '_' 492 | a = 0 493 | cont_chars = ('{', '[', ' ') 494 | 495 | for i in range(len(s) - 1): 496 | if not is_cmd and s[i] == '\\' and s[i + 1] in TEX_COMMAND_CHARS: 497 | if i > 0 and s[i - 1] == '⇲': 498 | continue 499 | a = i 500 | is_cmd = True 501 | 502 | elif is_cmd and s[i] == '\\': 503 | if i - 1 - a > 0: 504 | found.append([a, i - 1]) 505 | a = i 506 | 507 | elif is_cmd and s[i] in ('{', '['): 508 | is_cmd = False 509 | 510 | # If command, not arg, but an invalid char follows the space, disables the command 511 | elif is_cmd and s[i - 1] == ' ' and s[i] not in cont_chars: 512 | is_cmd = False 513 | found.append([a, i - 1]) 514 | 515 | elif is_cmd and s[i] not in TEX_COMMAND_CHARS and s[i] not in cont_chars: 516 | is_cmd = False 517 | found.append([a, i - 1]) 518 | 519 | if is_cmd and a != len(s) - 2: 520 | found.append([a, len(s) - 2]) 521 | 522 | # Strip chars 523 | for k in range(len(found)): 524 | ch = found[k][1] 525 | for j in range(ch): 526 | if s[found[k][1]] == ' ': 527 | found[k][1] -= 1 528 | else: 529 | break 530 | # noinspection PyUnresolvedReferences 531 | found[k] = tuple(found[k]) 532 | 533 | # noinspection PyTypeChecker 534 | return tuple(found) 535 | 536 | 537 | def apply_tag_tex_commands( 538 | s: str, 539 | tags: Union[Tuple[str, str, str, str, str], str] 540 | ) -> str: 541 | """ 542 | Apply tag to tex command. 543 | 544 | For example, if tag is ``[1,2,3,4,5]``: 545 | 546 | .. code-block:: none 547 | 548 | Input: This is a \\formula{epic} and this is not 549 | Output: This is a 1\\formula2{3epic4}5 and this is not 550 | 551 | :param s: Latex string code 552 | :param tags: Tags (length 5) 553 | :return: Code with tags 554 | """ 555 | if isinstance(tags, str): 556 | if tags == '': 557 | return s 558 | tags = (tags, tags, tags, tags, tags) 559 | assert len(tags) == 5 560 | a, b, c, d, e = tags # Unpack 561 | 562 | tex_tags = find_tex_commands(s) 563 | if len(tex_tags) == 0: 564 | return s 565 | new_s = '' 566 | k = 0 # Moves through tags 567 | i = -1 568 | for _ in range(len(s)): 569 | i += 1 570 | if i == len(s): 571 | break 572 | if k < len(tex_tags) and i in tex_tags[k][0:4]: 573 | if i == tex_tags[k][0]: 574 | new_s += a + s[i] 575 | elif i == tex_tags[k][1]: 576 | new_s += s[i] + b 577 | elif i == tex_tags[k][2] and i != tex_tags[k][3]: 578 | new_s += c + s[i] 579 | elif i == tex_tags[k][3]: 580 | if i == tex_tags[k][2]: 581 | new_s += c 582 | new_s += s[i] + d + s[i + 1] + e 583 | i += 1 584 | # if continues 585 | if tex_tags[k][4]: 586 | new_s += b 587 | k += 1 588 | else: 589 | new_s += s[i] 590 | 591 | return new_s[0:len(new_s)] 592 | 593 | 594 | def apply_tag_tex_commands_no_argv( 595 | s: str, 596 | tags: Union[Tuple[str, str], str] 597 | ) -> str: 598 | """ 599 | Apply tag to tex command. 600 | 601 | For example, if tag is ``[1,2]``: 602 | 603 | .. code-block:: none 604 | 605 | Input: This is a \\formula and this is not. 606 | Output: This is a 1\\formula2 and this is not 607 | 608 | :param s: Latex string code 609 | :param tags: Tags (length 5) 610 | :return: Code with tags 611 | """ 612 | if isinstance(tags, str): 613 | if tags == '': 614 | return s 615 | tags = (tags, tags) 616 | assert len(tags) == 2 617 | a, b = tags # Unpack 618 | 619 | tex_tags = find_tex_commands_noargv(s) 620 | if len(tex_tags) == 0: 621 | return s 622 | new_s = '' 623 | k = 0 # Moves through tags 624 | i = -1 625 | for _ in range(len(s)): 626 | i += 1 627 | if k < len(tex_tags) and i in tex_tags[k]: 628 | if i == tex_tags[k][0]: 629 | new_s += a + s[i] 630 | elif i == tex_tags[k][1]: 631 | new_s += s[i] + b 632 | k += 1 633 | else: 634 | new_s += s[i] 635 | 636 | return new_s 637 | 638 | 639 | def _convert_single_symbol(s: str) -> Optional[str]: 640 | """ 641 | If ``s`` is just a latex code ``'alpha'`` or ``'beta'`` it converts it to its 642 | unicode representation. 643 | 644 | :param s: Latex string code 645 | :return: Latex with converted single symbols 646 | """ 647 | if '\\' not in s[0]: 648 | s = '\\' + s 649 | for (code, val) in _TEX_TO_UNICODE['latex_symbols']: 650 | if code == s: 651 | return val 652 | return None 653 | 654 | 655 | def _convert_latex_symbols(s: str) -> str: 656 | """ 657 | Replace each ``'\alpha'``, ``'\beta'`` and similar latex symbols with 658 | their unicode representation. 659 | 660 | :param s: Latex string code 661 | :return: Replaced symbols 662 | """ 663 | for (code, val) in _TEX_TO_UNICODE['latex_symbols']: 664 | s = s.replace(code, val) 665 | return s 666 | 667 | 668 | def _process_starting_modifiers(s: str) -> str: 669 | """ 670 | If s start with ``'it '``, ``'cal '``, etc. then make the whole string 671 | italic, calligraphic, etc. 672 | 673 | :param s: Latex string code 674 | :return: Modified text 675 | """ 676 | s = re.sub('^bb ', r'\\bb{', s) 677 | s = re.sub('^bf ', r'\\bf{', s) 678 | s = re.sub('^it ', r'\\it{', s) 679 | s = re.sub('^cal ', r'\\cal{', s) 680 | s = re.sub('^frak ', r'\\frak{', s) 681 | s = re.sub('^mono ', r'\\mono{', s) 682 | return s 683 | 684 | 685 | def _apply_all_modifiers(s: str) -> str: 686 | """ 687 | Applies all modifiers. 688 | 689 | :param s: Latex string code 690 | :return: Text with replaced chars 691 | """ 692 | s = _apply_modifier(s, '^', _TEX_TO_UNICODE['superscripts']) 693 | s = _apply_modifier(s, '_', _TEX_TO_UNICODE['subscripts']) 694 | 695 | s = _apply_modifier(s, '\\bb', _TEX_TO_UNICODE['textbb']) 696 | s = _apply_modifier(s, '\\bf', _TEX_TO_UNICODE['textbf']) 697 | s = _apply_modifier(s, '\\cal', _TEX_TO_UNICODE['textcal']) 698 | s = _apply_modifier(s, '\\emph', _TEX_TO_UNICODE['textit']) 699 | s = _apply_modifier(s, '\\frak', _TEX_TO_UNICODE['textfrak']) 700 | s = _apply_modifier(s, '\\it', _TEX_TO_UNICODE['textit']) 701 | s = _apply_modifier(s, '\\mono', _TEX_TO_UNICODE['textmono']) 702 | 703 | return s 704 | 705 | 706 | def _apply_modifier(s: str, modifier: str, d: Dict[Any, str]) -> str: 707 | """ 708 | This will search for the ^ signs and replace the next 709 | digit or (digits when {} is used) with its/their uppercase representation. 710 | 711 | :param s: Latex string code 712 | :param modifier: Modifier command 713 | :param d: Dict to look upon 714 | :return: New text with replaced text. 715 | """ 716 | s = s.replace(modifier, "^") 717 | newtext = "" 718 | mode_normal, mode_modified, mode_long = range(3) 719 | mode = mode_normal 720 | for ch in s: 721 | if mode == mode_normal and ch == '^': 722 | mode = mode_modified 723 | continue 724 | elif mode == mode_modified and ch == '{': 725 | mode = mode_long 726 | continue 727 | elif mode == mode_modified: 728 | newtext += d.get(ch, ch) 729 | mode = mode_normal 730 | continue 731 | elif mode == mode_long and ch == '}': 732 | mode = mode_normal 733 | continue 734 | 735 | if mode == mode_normal: 736 | newtext += ch 737 | else: 738 | newtext += d.get(ch, ch) 739 | return newtext 740 | 741 | 742 | def __load_unicode() -> None: 743 | """ 744 | Loads the unicode data. 745 | """ 746 | respath = str(os.path.abspath(os.path.dirname(__file__))).replace('\\', '/') + '/res/u_' 747 | for j in _TEX_TO_UNICODE.keys(): 748 | if j == 'latex_symbols': 749 | with open(f'{respath}symbols.txt', encoding='utf-8') as f: 750 | line = f.readline() 751 | while line != "": 752 | words = line.split() 753 | code = words[0] 754 | val = words[1] 755 | _TEX_TO_UNICODE['latex_symbols'].append((code, val)) 756 | line = f.readline() 757 | else: 758 | with open(f'{respath}{j}.txt', encoding='utf-8') as f: 759 | line = f.readline() 760 | while line != '': 761 | words = line.split() 762 | code = words[0] 763 | val = words[1] 764 | _TEX_TO_UNICODE[j][code] = val 765 | line = f.readline() 766 | 767 | 768 | def tex_to_unicode(s: str) -> str: 769 | """ 770 | Transforms tex code to unicode. 771 | 772 | :param s: Latex string code 773 | :return: Text in unicode 774 | """ 775 | if s.strip() == '': 776 | return s 777 | ss = _convert_single_symbol(s) 778 | if ss is not None: 779 | return ss 780 | 781 | s = _convert_latex_symbols(s) 782 | s = _process_starting_modifiers(s) 783 | s = _apply_all_modifiers(s) 784 | 785 | # Last filter 786 | s = s.replace('\n\n', '\n').replace(' ', ' ').replace('\t', ' ') 787 | try: 788 | s = _FLATLATEX.convert(s) 789 | except LatexSyntaxError: 790 | pass 791 | 792 | return s 793 | 794 | 795 | # Loads the unicode data 796 | __load_unicode() 797 | -------------------------------------------------------------------------------- /test/test_parsers.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyDetex 3 | https://github.com/ppizarror/PyDetex 4 | 5 | TEST PARSERS 6 | Test several parsers which perform a single operation. 7 | """ 8 | 9 | from test._base import BaseTest 10 | import pydetex 11 | import pydetex.parsers as par 12 | import pydetex.utils as ut 13 | 14 | 15 | class ParserTest(BaseTest): 16 | 17 | def test_version(self) -> None: 18 | """ 19 | Configure version. 20 | """ 21 | self.assertNotEqual(pydetex.version.vernum, '') 22 | 23 | def test_process_labels(self) -> None: 24 | """ 25 | Removes labels. 26 | """ 27 | self.assertEqual(par.process_labels('\\section{Research method}\\label{researchmethod}'), 28 | '\\section{Research method}') 29 | self.assertEqual(par.process_labels('This is \\label{epic} a very nice latex'), 30 | 'This is a very nice latex') 31 | 32 | def test_find_str(self) -> None: 33 | """ 34 | Test find string. 35 | """ 36 | s = 'This is a latex string, \\textbf{in bold}' 37 | self.assertEqual(par.find_str(s, '\\textit'), -1) 38 | self.assertEqual(par.find_str(s, '\\textbf'), 24) 39 | s2 = """ 40 | This is another example, \\cite{A} thinks that it is good, whereas 41 | \\citep{K} don't. However, \\cite*{A} is more interesting. 42 | """ 43 | self.assertEqual(par.find_str(s2, '\\cite'), 34) 44 | self.assertEqual(par.find_str(s2, '\\cite*'), 109) 45 | 46 | def test_remove_tag(self) -> None: 47 | """ 48 | Test remove tags. 49 | """ 50 | self.assertEqual(par.remove_tag('lorem ipsum \\textbf{hi}', 'textbf'), 'lorem ipsum hi') 51 | self.assertEqual(par.remove_tag('lorem ipsum \\textbf{\\textbf{hi}}', 'textbf'), 'lorem ipsum hi') 52 | 53 | def test_process_cite(self) -> None: 54 | """ 55 | Removes cites from text. 56 | """ 57 | s = 'hello \\cite{number1,number2} epic' 58 | self.assertEqual(par.replace_pydetex_tags(par.process_cite(s)), 59 | 'hello [1, 2] epic') 60 | s = 'this is \\cite{number1} epic \\cite{number2} and \\cite{number1}' 61 | self.assertEqual(par.replace_pydetex_tags(par.process_cite(s)), 62 | 'this is [1] epic [2] and [1]') 63 | s = 'This is another example, \\cite*{Downson} et al. suggests that yes, but \\cite{Epic} not' 64 | self.assertEqual( 65 | par.replace_pydetex_tags(par.process_cite(s)), 66 | 'This is another example, [1] et al. suggests that yes, but [2] not') 67 | # Test equation cite 68 | s = 'Here, we test an equation with \\eqref{mycite}' 69 | self.assertEqual( 70 | par.replace_pydetex_tags(par.process_cite(s)), 71 | 'Here, we test an equation with (1)') 72 | # Test multiple cites 73 | s = 'This is an example \\cite{b} \\cite{a, b, c , d, e}' 74 | self.assertEqual(par.replace_pydetex_tags(par.process_cite(s)), 'This is an example [1] [1-5]') 75 | self.assertEqual(par.replace_pydetex_tags(par.process_cite(s, compress_cite=False)), 76 | 'This is an example [1] [1, 2, 3, 4, 5]') 77 | self.assertEqual(par.replace_pydetex_tags(par.process_cite(s, sort_cites=False)), 78 | 'This is an example [1] [2, 1, 3-5]') 79 | 80 | def test_process_citeauthor(self) -> None: 81 | """ 82 | Removes citeauthor from text. 83 | """ 84 | s = 'hello \\citeauthor{number1,number2} epic' 85 | self.assertEqual(par.replace_pydetex_tags(par.process_citeauthor(par.process_cite(s), 'en')), 86 | 'hello [authors] epic') 87 | s = 'hello \\citeauthor{number1} epic' 88 | self.assertEqual(par.replace_pydetex_tags(par.process_citeauthor(par.process_cite(s), 'en')), 89 | 'hello [author] epic') 90 | 91 | def test_process_ref(self) -> None: 92 | """ 93 | Removes references from text. 94 | """ 95 | self.assertEqual(par.process_ref('this is a \\ref{myref}'), 'this is a 1') 96 | self.assertEqual(par.process_ref('this is a \\ref{myref} and \\ref*{myref}'), 'this is a 1 and 1') 97 | 98 | def test_remove_common_tags(self) -> None: 99 | """ 100 | Remove common tags. 101 | """ 102 | self.assertEqual(par.remove_common_tags('this is \\hl{a}'), 'this is a') 103 | self.assertEqual(par.remove_common_tags('this is \\textsuperscript{\\hl{nice}}'), 'this is nice') 104 | 105 | def test_remove_comments(self) -> None: 106 | """ 107 | Removes comments. 108 | """ 109 | self.assertEqual(par.remove_comments('This is a \% percentage, and % a comment'), 110 | 'This is a ⇱COMMENT_PERCENTAGE_SYMBOL⇲ percentage, and') 111 | s = """ 112 | This is a multi-line file, typical from latex% comment 113 | 114 | % Typical comment lines..... 115 | 116 | Whereas this is another line or paragraph. So boring 117 | """ 118 | self.assertEqual(par.remove_comments(s), 119 | 'This is a multi-line file, typical from latex\n\nWhereas this is another line or paragraph. So boring') 120 | 121 | # Comments right to text 122 | s = """ 123 | Web of Science, % https://webofknowledge.com/ 124 | Scopus, % https://www.scopus.com/ 125 | IEEE/IET Xplore, % https://ieeexplore.ieee.org/ 126 | Science Direct, % https://uchile.idm.oclc.org/login?url=https://www.sciencedirect.com/ 127 | """ 128 | self.assertEqual(par.remove_comments(s), 'Web of Science, Scopus, IEEE/IET Xplore, Science Direct,') 129 | 130 | # Comments at start 131 | s = """% !TeX spellcheck = en_US 132 | 133 | \section{Introduction} 134 | 135 | Architectural floor plans are documents that result from an iterative design, planning, and engineering pro""" 136 | self.assertEqual( 137 | par.remove_comments(s), 138 | '\\section{Introduction}\n\nArchitectural floor plans are documents that result from an iterative design, ' 139 | 'planning, and engineering pro') 140 | 141 | # Comment right to newline 142 | s = 'Therefore, the scope was restricted to analyzing vector-based CAD files or retrieving individual elements ' \ 143 | 'from plans with a simple format. \\\\% Therefore, the scope was restricted to analyze vector-based CAD files,' \ 144 | ' or retrieving individual elements from plans with a simple format. \\' 145 | t = 'Therefore, the scope was restricted to analyzing vector-based CAD files or retrieving individual elements ' \ 146 | 'from plans with a simple format. \\\\' 147 | self.assertEqual(par.remove_comments(s), t) 148 | 149 | def test_simple_replace(self) -> None: 150 | """ 151 | Test simple replace format. 152 | """ 153 | self.assertEqual(par.simple_replace('This is an \\itemBad a'), 'This is an \\itemBad a') 154 | self.assertEqual(par.simple_replace('This is a example formula $\\alpha\longrightarrow\\beta+1$'), 155 | 'This is a example formula $α⟶β+1$') 156 | self.assertEqual(par.simple_replace('This is \\alphaNot but \\alpha'), 157 | 'This is \\alphaNot but α') 158 | self.assertEqual(par.simple_replace('This is a $x_0$ and $x^2$'), 'This is a $x₀$ and $x²$') 159 | self.assertEqual(par.simple_replace('The following example $\\alpha_0+\\beta^2=0$'), 160 | 'The following example $α₀+β²=0$') 161 | self.assertEqual(par.simple_replace('This is a $x_0$ and \(x^2\)'), 'This is a $x₀$ and \(x²\)') 162 | self.assertEqual(par.simple_replace('This is $\\alpha$'), 'This is $α$') 163 | self.assertEqual(par.simple_replace('This is \#my\_var'), 'This is #my_var') 164 | 165 | def test_parse_inputs(self) -> None: 166 | """ 167 | Parse inputs. 168 | """ 169 | self.assertEqual(par._NOT_FOUND_FILES, []) 170 | self.assertEqual(par.process_inputs('This loads a \\input{latex} or \\input{} epic'), 171 | 'This loads a \\input{latex} or \\input{} epic') 172 | self.assertEqual(par._NOT_FOUND_FILES, ['latex.tex', '.tex']) 173 | self.assertEqual(par.process_inputs('This loads a \\input{latex} or \\input{} epic'), 174 | 'This loads a \\input{latex} or \\input{} epic') 175 | self.assertEqual(par.process_inputs('This loads a \\input{data/simple} epic', clear_not_found_files=True), 176 | 'This loads a this is a simple file epic') 177 | 178 | def test_remove_commands_char(self) -> None: 179 | """ 180 | Remove commands char. 181 | """ 182 | s = 'This is a $command$!' 183 | self.assertEqual(par.remove_equations(s), 'This is a !') 184 | s = 'This is a $command\$ but this does not delete$!' 185 | self.assertEqual(par.remove_equations(s), 'This is a !') 186 | s = 'This is a $command!' 187 | self.assertEqual(par.remove_commands_char(s, chars=ut.TEX_EQUATION_CHARS), s) 188 | s = 'This is a$$ command!' 189 | self.assertEqual(par.remove_equations(s), 'This is a command!') 190 | s = 'This is a $comman$ and $this should be removed too$!' 191 | self.assertEqual(par.remove_equations(s), 'This is a and !') 192 | s = 'This is a \(comman\) and \(this should be removed too\)!' 193 | self.assertEqual(par.remove_equations(s), 'This is a and !') 194 | s = 'This is a \(\) and $X$!' 195 | self.assertEqual(par.remove_equations(s), 'This is a and !') 196 | s = '$X$\(y\)$alpha$$$$$$key$' 197 | self.assertEqual(par.remove_equations(s), '') 198 | 199 | def test_remove_commands(self) -> None: 200 | """ 201 | Remove commands. 202 | """ 203 | s = 'This \\f{must be removed} yes!' 204 | self.assertEqual(par.remove_commands_param(s, 'en'), 'This yes!') 205 | self.assertEqual(par.remove_commands_param('', 'en'), '') 206 | s = 'This \\texttt{\insertimage{nice}{1}}no' 207 | self.assertEqual(par.remove_commands_param(s, 'en'), 'This no') 208 | s = '\\insertimage[\label{epic}]{delete this}' 209 | self.assertEqual(par.remove_commands_param(s, 'en'), '') 210 | s = 'Very\\insertimage[\label{epic}]{delete this} Epic' 211 | self.assertEqual(par.remove_commands_param(s, 'en'), 'Very Epic') 212 | s = 'Very\\insertimage[\label{epic}]{delete this} Epic \\not yes' 213 | self.assertEqual(par.remove_commands_param(s, 'en'), 'Very Epic \\not yes') 214 | s = 'Ni\\f {}ce' 215 | self.assertEqual(par.remove_commands_param(s, 'en'), 'Nice') 216 | s = 'Ni\\f \n [][][]{}ce' 217 | self.assertEqual(par.remove_commands_param(s, 'en'), 'Nice') 218 | s = '\caption {thus, the analysis \{cannot\} be based \mycommand{only} using {nice} symbols}' 219 | self.assertEqual(par.replace_pydetex_tags(par.remove_commands_param(s, 'en')).strip(), 220 | 'CAPTION: thus, the analysis \{cannot\} be based using nice symbols') 221 | 222 | def test_remove_commands_noargv(self) -> None: 223 | """ 224 | Remove commands without arguments. 225 | """ 226 | s = 'This\\image remove' 227 | self.assertEqual(par.remove_commands_param_noargv(s), 'This remove') 228 | s = 'This inserts an \\insertimage[width=1\linewidth]' 229 | self.assertEqual(par.remove_commands_param_noargv(s), 'This inserts an \\insertimage[width=1]') 230 | s = 'This \\delete' 231 | self.assertEqual(par.remove_commands_param_noargv(s), 'This ') 232 | s = 'This \\delete ' 233 | self.assertEqual(par.remove_commands_param_noargv(s), 'This ') 234 | s = '\\delete yes' 235 | self.assertEqual(par.remove_commands_param_noargv(s), ' yes') 236 | s = '\\delete' 237 | self.assertEqual(par.remove_commands_param_noargv(s), '') 238 | 239 | def test_process_chars_equations(self) -> None: 240 | """ 241 | Process single char equations. 242 | """ 243 | # Test single only 244 | s = 'This code does not \$contain any equation$!!' 245 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), s) 246 | s = 'This code must be $x$ processed!!' 247 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'This code must be x processed!!') 248 | s = par.simple_replace('$\\alpha$-shape is really nice') 249 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'α-shape is really nice') 250 | s = 'Because $x$ no lower needs any other supervision as $y$ or $z$ in \$30 or \$40$$' 251 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 252 | 'Because x no lower needs any other supervision as y or z in \$30 or \$40') 253 | s = 'This code $with several chars$ should not be removed' 254 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 255 | 'This code with several chars should not be removed') 256 | s = 'This code must be $$ processed!!' 257 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'This code must be processed!!') 258 | s = 'an $x$$y$$z$' 259 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=True), 'an xyz') 260 | 261 | # Test multiple 262 | s = 'This code $with several chars$ should not be removed' 263 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=False), 264 | 'This code EQUATION_0 should not be removed') 265 | s = 'This code \(with several chars\) should not be removed' 266 | self.assertEqual(par.process_chars_equations(s, 'en', single_only=False), 267 | 'This code EQUATION_0 should not be removed') 268 | s = 'This $equation 0$ and \$equation $equation 1$ must by replaced' 269 | self.assertEqual(par.process_chars_equations(s, '-', single_only=False), 270 | 'This EQUATION_0 and \$equation EQUATION_1 must by replaced') 271 | 272 | # Test environments 273 | s = """My new equation: 274 | \\begin{equation} 275 | a+b 276 | \\end{equation}""" 277 | self.assertEqual(par.process_chars_equations(s, '-', single_only=False), 278 | 'My new equation:\n EQUATION_0') 279 | 280 | def test_output_text_for_some_commands(self) -> None: 281 | """ 282 | Test output text for some commands, like caption or subfigure. 283 | """ 284 | 285 | def out(s_: str) -> str: 286 | """ 287 | Call method. 288 | """ 289 | return par.replace_pydetex_tags(par.output_text_for_some_commands(s_, 'en')).strip() 290 | 291 | s = """ 292 | \\begin{figure} 293 | \centering 294 | \\reflectbox{% 295 | \includegraphics[width=0.5\textwidth]{gull}} 296 | \caption {A picture of the same gull\nlooking the other way!} 297 | \caption[invalid] 298 | \end{figure} 299 | """ 300 | self.assertEqual(out(s), 'CAPTION: A picture of the same gull looking the other way!') 301 | 302 | # Custom template 303 | s = '\\insertimage[]{imagefile}{width=5cm}{e}' 304 | self.assertEqual(out(s), 'FIGURE_CAPTION: e') 305 | s = '\\insertimage{imagefile}{width=5cm}{e}' 306 | self.assertEqual(out(s), 'FIGURE_CAPTION: e') 307 | s = '\\insertimageboxed{imagefile}{width=5cm}{0.5}{legend}' 308 | self.assertEqual(out(s), 'FIGURE_CAPTION: legend') 309 | s = 'Nice\n\insertimage[\label{unetmodel}]{unet_compressed}{width=\linewidth}{A U-Net model.}' 310 | self.assertEqual(out(s), 'FIGURE_CAPTION: A U-Net model.') 311 | 312 | # Test other 313 | s = 'This is a \\href{https://google.com}{A link}' 314 | self.assertEqual(out(s), 'LINK: A link') 315 | s = '\section{a}\section*{a}]' 316 | self.assertEqual(out(s), 'a\n\na') 317 | s = '\\texttt{nice!} and \emph{nice!}' 318 | self.assertEqual(out(s), 'nice!nice!') 319 | s = '\\textit{\href{a}{link}}' 320 | self.assertEqual(out(s), 'LINK: link') 321 | 322 | # Test MakeUppercase 323 | s = '\\MakeUppercase{this is a Test}' 324 | self.assertEqual(out(s), 'THIS IS A TEST') 325 | s = '\\uppercase{this is a Test}' 326 | self.assertEqual(out(s), 'THIS IS A TEST') 327 | s = '\\MakeLowercase{THIS is a Test}' 328 | self.assertEqual(out(s), 'this is a test') 329 | s = '\\lowercase{THIS is a Test}' 330 | self.assertEqual(out(s), 'this is a test') 331 | 332 | # Test quotes 333 | s = '\quotes{a quoted}' 334 | self.assertEqual(out(s), '"a quoted"') 335 | s = '\enquote{a quoted}' 336 | self.assertEqual(out(s), '"a quoted"') 337 | s = '\quotes{\href{a}{link}}' 338 | self.assertEqual(out(s), '"LINK: link"') 339 | s = '\doublequotes{\href{a}{link}}' 340 | self.assertEqual(out(s), '"LINK: link"') 341 | 342 | # Test acronym 343 | for i in ('ac', 'acf', 'acs', 'acl'): 344 | self.assertEqual(out(f'\\{i}{{XYZ}}'), 'XYZ') 345 | 346 | # Test underline/strike 347 | for i in ('underline', 'so', 'st', 'hl'): 348 | self.assertEqual(out(f'\\{i}{{XYZ}}'), 'XYZ') 349 | 350 | def test_unicode_chars_equations(self) -> None: 351 | """ 352 | Test unicode char equations. 353 | """ 354 | s = 'This is my $\\alpha^2 \cdot \\alpha^{2+3} \equiv \\alpha^7$ equation' 355 | self.assertEqual(par.unicode_chars_equations(s), 'This is my $α² ⋅ α²⁺³ ≡ α⁷$ equation') 356 | s = 'This is my $x$ equation' 357 | self.assertEqual(par.unicode_chars_equations(s), 'This is my $x$ equation') 358 | s = 'This is my $\{a+b\}=min\{t\}$ equation' 359 | self.assertEqual(par.replace_pydetex_tags(par.unicode_chars_equations(s)), 360 | 'This is my ${a+b}=min{t}$ equation') 361 | s = 'This is my $$ equation' 362 | self.assertEqual(par.unicode_chars_equations(s), 'This is my $$ equation') 363 | s = 'This is my \\begin{align}\\alpha^2 \cdot \\alpha^{2+3} \equiv \\alpha^7\\end{align} equation' 364 | self.assertEqual(par.unicode_chars_equations(s), 'This is my \\begin{align}α² ⋅ α²⁺³ ≡ α⁷\end{align} equation') 365 | 366 | def test_strip_punctuation(self) -> None: 367 | """ 368 | Test strip punctuation. 369 | """ 370 | self.assertEqual(par.strip_punctuation('Or , for example : yes !'), 'Or, for example: yes!') 371 | 372 | def test_process_items(self) -> None: 373 | """ 374 | Test process items. 375 | """ 376 | s = '\\begin{itemize}\item a \item b\\begin{itemize}\item a \item b\end{itemize}\end{itemize}' 377 | self.assertEqual(par.replace_pydetex_tags(par.process_items(s, lang='en')), 378 | '\n- a \n- b\n • a\n • b') 379 | 380 | s = """\\begin{itemize}[font=\\bfseries] 381 | \item As shown in Figure \\ref{fignumber} 382 | \item Proposed 383 | \end{itemize}""" 384 | self.assertEqual(par.replace_pydetex_tags(par.process_items(s, lang='en')), 385 | '\n- As shown in Figure \n- Proposed') 386 | 387 | s = """\\begin{enumerate} 388 | \\item a 389 | \\begin{enumerate} 390 | \\item a 391 | \\item b 392 | \\begin{enumerate} 393 | \\item a 394 | \\item b 395 | \\item c 396 | \\begin{enumerate}[font=\\bfseries] 397 | \\item a 398 | \\item b 399 | \\item c 400 | \\begin{enumerate}[[font=\\bfseries]] 401 | \\item a 402 | \\item b 403 | \\item c 404 | \\end{enumerate} 405 | \\end{enumerate} 406 | \\end{enumerate} 407 | \\end{enumerate} 408 | \\item c 409 | \\begin{itemize} 410 | \\item a 411 | \\item b 412 | \\item c 413 | \\end{itemize} 414 | \\item epic 415 | \\end{enumerate} 416 | """ 417 | 418 | t = par.replace_pydetex_tags(par.process_items(s, lang='en')) 419 | self.assertEqual( 420 | t, '\n1. a\n a) a\n b) b\n i. a\n ii. b\n iii. c\n' 421 | ' A) a\n B) b\n C) c\n I. a\n ' 422 | ' II. b\n III. c\n2. c\n • a\n • b\n ' 423 | ' • c\n3. epic\n ') 424 | 425 | self.assertEqual(par._process_item('', ''), '') 426 | 427 | s = """ 428 | \\begin{enumerate} 429 | \item b 430 | \end{enumerate} 431 | 432 | \\begin{itemize} 433 | \item a 434 | \\end{itemize} 435 | 436 | \\begin{tablenotes} 437 | Note: Res - Resolution in pixels (px). 438 | \\end{tablenotes} 439 | 440 | epic 441 | """ 442 | self.assertEqual( 443 | par.replace_pydetex_tags(par.process_items(s, lang='en')), 444 | '\n \n1. b\n \n \n- a\n \n Note:' 445 | ' Res - Resolution in pixels (px).\n \n epic\n ' 446 | ) 447 | 448 | # Multiple non-nested 449 | s = """ 450 | \\begin{enumerate} 451 | \item a 452 | \end{enumerate} 453 | \\begin{enumeratebf} 454 | \item a 455 | \end{enumeratebf} 456 | \\begin{enumerate} 457 | \item b 458 | \end{enumerate} 459 | \\begin{enumerate} 460 | \item a 461 | \end{enumerate} 462 | \\begin{enumerate} 463 | \item b 464 | \\begin{itemize} 465 | \item c 466 | \end{itemize} 467 | \item d 468 | \end{enumerate} 469 | \\begin{nice} 470 | \\end{nice} 471 | """ 472 | self.assertEqual( 473 | ut.find_tex_environments(s), 474 | (('enumerate', 9, 26, 55, 68, '', 0, 0), 475 | ('enumeratebf', 79, 98, 127, 142, '', 0, 0), 476 | ('enumerate', 153, 170, 199, 212, '', 0, 0), 477 | ('enumerate', 223, 240, 269, 282, '', 0, 0), 478 | ('itemize', 343, 358, 395, 406, 'enumerate', 1, 1), 479 | ('enumerate', 293, 310, 437, 450, '', 0, 0), 480 | ('nice', 461, 473, 482, 490, '', 0, -1)) 481 | ) 482 | self.assertEqual( 483 | par.replace_pydetex_tags(par.process_items(s, lang='en')).strip(), 484 | '1. a\n \n1. a\n \n1. b\n \n1. a\n \n1. ' 485 | 'b\n • c\n2. d\n \\begin{nice}\n \\end{nice}') 486 | 487 | def test_remove_environments(self) -> None: 488 | """ 489 | Remove environment test. 490 | """ 491 | s = 'e\\begin{nice}x\\end{nice}p\\begin{y}z\\end{y}i\\begin{k}z\\end{k}c' 492 | self.assertEqual(par.remove_environments(s), s) 493 | self.assertEqual(par.remove_environments(s, ['y']), 'e\\begin{nice}x\end{nice}pi\\begin{k}z\end{k}c') 494 | self.assertEqual(par.remove_environments(s, ['y', 'nice']), 'epi\\begin{k}z\end{k}c') 495 | self.assertEqual(par.remove_environments(s, ['y', 'nice', 'k']), 'epic') 496 | 497 | s = """The following is a tikz figure, and must be removed: 498 | 499 | \\begin{tikzpicture}[line cap=round, line join=round, >=triangle 45, 500 | x=4.0cm, y=1.0cm, scale=1] 501 | \draw [->,color=black] (-0.1,0) -- (2.5,0); 502 | \\foreach \\x in {1,2} 503 | \draw [shift={(\\x,0)}, color=black] (0pt,2pt) 504 | -- (0pt,-2pt) node [below] {\\footnotesize $\\x$}; 505 | \draw [color=black] (2.5,0) node [below] {$x$}; 506 | \draw [->,color=black] (0,-0.1) -- (0,4.5); 507 | \\foreach \y in {1,2,3,4} 508 | \draw [shift={(0,\y)}, color=black] (2pt,0pt) 509 | -- (-2pt,0pt) node[left] {\\footnotesize $\y$}; 510 | \draw [color=black] (0,4.5) node [right] {$y$}; 511 | \draw [color=black] (0pt,-10pt) node [left] {\\footnotesize $0$}; 512 | \draw [domain=0:2.2, line width=1.0pt] plot (\\x,{(\\x)^2}); 513 | \clip(0,-0.5) rectangle (3,5); 514 | \draw (2,0) -- (2,4); 515 | \\foreach \i in {1,...,\\thehigher} 516 | \draw [fill=black,fill opacity=0.3, smooth,samples=50] ({1+(\i-1)/\\thehigher},{(1+(\i)/\\thehigher)^2}) 517 | --({1+(\i)/\\thehigher},{(1+(\i)/\\thehigher)^2}) 518 | -- ({1+(\i)/\thehigher},0) 519 | -- ({1+(\i-1)/\\thehigher},0) 520 | -- cycle; 521 | \end{tikzpicture}and it was removed!! 522 | 523 | \\begin{epic} 524 | But this should not be removed! 525 | \\end{epic}""" 526 | self.assertEqual( 527 | par.remove_environments(s), 528 | 'The following is a tikz figure, and must be removed:\n \n ' 529 | ' and it was removed!!\n \n \\begin{epic}\n ' 530 | ' But this should not be removed!\n \\end{epic}') 531 | 532 | def test_process_def(self) -> None: 533 | """ 534 | Process defs test. 535 | """ 536 | par._DEFS.clear() 537 | 538 | s = 'This is my \\def\\code {epic!} but yes \\def\\a{} epic' 539 | self.assertEqual(par.process_def(s), 'This is my but yes epic') 540 | self.assertEqual(len(par._DEFS), 2) 541 | self.assertEqual(par._DEFS['\\code'], 'epic!') 542 | 543 | s = """ 544 | \def\\underline#1{\\relax\ifmmode\@@underline{#1}\else $\@@underline{\hbox{#1}}\m@th$\\relax\\fi} 545 | \def\@greek#1{% 546 | \ifcase#1% 547 | \or $\\alpha$% 548 | \or $\\beta$% 549 | \or $\gamma$% 550 | \or $\delta$% 551 | \or $\epsilon$% 552 | \or $\zeta$% 553 | \or $\eta$% 554 | \or $\\theta$% 555 | \or $\iota$% 556 | \or $\kappa$% 557 | \or $\lambda$% 558 | \or $\mu$% 559 | \or $\\nu$% 560 | \or $\\xi$% 561 | \or $o$% 562 | \or $\pi$% 563 | \or $\\rho$% 564 | \or $\sigma$% 565 | \or $\\tau$% 566 | \or $\\upsilon$% 567 | \or $\phi$% 568 | \or $\chi$% 569 | \or $\psi$% 570 | \or $\omega$% 571 | \\fi% 572 | } 573 | not epic 574 | """ 575 | self.assertEqual(par.process_def(s).strip(), 'not epic') 576 | self.assertEqual(len(par._DEFS), 0) 577 | 578 | s = '\\def\\mycommand{epic}This is really \mycommand yes' 579 | self.assertEqual(par.process_def(s, replace=True), 'This is really epic yes') 580 | s = '\\def\\mycommand{epic}This is really \mycommand' 581 | self.assertEqual(par.process_def(s, replace=True), 'This is really epic') 582 | 583 | s = 'a\\def\e{e}' 584 | self.assertEqual(par.process_def(s), 'a') 585 | s = '\\def\e{e}' 586 | self.assertEqual(par.process_def(s), '') 587 | s = '\\def\e{e}\\def\p{p}\\def\i {i}\\def\c\n{c}\e\p\i\c' 588 | self.assertEqual(par.process_def(s, replace=True), 'epic') 589 | s = '\epic \def\\a{a} \\nice \\item \\a\\a\\a not \\b' 590 | self.assertEqual(par.process_def(s, replace=True), '\epic \\nice \\item aaa not \\b') 591 | s = 'a\\def\e{e} jjajjajaja' 592 | self.assertEqual(par.process_def(s, replace=True), 'a jjajjajaja') 593 | 594 | s = """ 595 | \\begin{itemize}[font=\\bfseries] 596 | \item a 597 | \end{itemize} 598 | 599 | a\def\\a{epic} 600 | jejeje \\a 601 | """ 602 | self.assertEqual( 603 | par.process_def(s, replace=True).strip(), 604 | '\\begin{itemize}[font=\\bfseries]\n \item a\n \\end{itemize}\n \n' 605 | ' a\n jejeje epic' 606 | ) 607 | 608 | # Invalid defs 609 | s = '\def\\a{a} and \def\\b{b} and \\def \nc{c} and \\defee\\d{d}: \\a\\b\\c\\d.' 610 | self.assertEqual(par.process_def(s, replace=True), ' and and and \defee\d{d}: ab\c\d.') 611 | 612 | # Def with commands 613 | s = '\def\\a{\\textsuperscript{a}nice!!} epic \\a' 614 | self.assertEqual(par.process_def(s, replace=True), ' epic anice!!') 615 | 616 | def test_begin_document(self) -> None: 617 | """ 618 | Test begin document parser. 619 | """ 620 | s = '\\begin{document}:end_\\end{document}' 621 | self.assertEqual(par.process_begin_document(s), ':end_') 622 | 623 | s = ':end_\\end{document}' 624 | self.assertEqual(par.process_begin_document(s), ':end_\\end{document}') 625 | 626 | # Others 627 | s = '\\end{document}\\begin{document}:end_\\begin{document}\\end{document}\\begin{document}' 628 | self.assertEqual(par.process_begin_document(s), ':end_\\begin{document}') 629 | 630 | s = """ 631 | % Document 632 | \input{epic} 633 | This line of code should not be included 634 | \\begin{figure} 635 | This figure should not be included 636 | \\end{figure} 637 | \let\\a\\b 638 | \\begin {document} 639 | Test 640 | \\end {document} 641 | Removed as well!! 642 | """ 643 | self.assertEqual(par.process_begin_document(s).strip(), 'Test') 644 | --------------------------------------------------------------------------------