├── LICENSE
├── README.md
├── composer.json
├── data
    ├── amssymb
    │   ├── both-amssymb.tex
    │   ├── math-amssymb-alphabets.tex
    │   ├── math-amssymb-binops.tex
    │   ├── math-amssymb-greek.tex
    │   ├── math-amssymb-loglike.tex
    │   ├── math-amssymb-misc.tex
    │   ├── math-amssymb-symbols.tex
    │   └── math-amssymb-varsized-delimiters.tex
    ├── amstext
    │   └── both-amstext-alphabets.tex
    ├── base.php
    ├── both-alphabets.tex
    ├── both-refs.tex
    ├── both-spaces.tex
    ├── both.tex
    ├── compile.php
    ├── fixltx2e
    │   └── both-fixltx2e.tex
    ├── hyperref
    │   └── text-hyperref.tex
    ├── math-accents.tex
    ├── math-alphabets.tex
    ├── math-arrows.tex
    ├── math-binops.tex
    ├── math-delimiters.tex
    ├── math-greek.tex
    ├── math-large-delimeters.tex
    ├── math-loglike.tex
    ├── math-misc.tex
    ├── math-other.tex
    ├── math-punctuation.tex
    ├── math-relations.tex
    ├── math-spaces.tex
    ├── math-varsymbols.tex
    ├── math.tex
    ├── text-accents.tex
    ├── text-fontsize.tex
    ├── text-primitives.tex
    ├── text-spaces.tex
    └── text.tex
└── library
    └── PhpLatex
        ├── Filter
            └── Html2Latex.php
        ├── Lexer.php
        ├── Node.php
        ├── Parser.php
        ├── PdfLatex.php
        ├── Renderer
            ├── Abstract.php
            ├── Html.php
            ├── NodeRenderer.php
            └── Typestyle.php
        ├── Utils.php
        ├── Utils
            ├── PeekableArrayIterator.php
            ├── PeekableIterator.php
            └── TreeDebug.php
        ├── commands.php
        ├── environs.php
        └── latex_utf8.php


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Xemlock
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # php-latex
  2 | 
  3 | [![Build status](https://github.com/xemlock/php-latex/workflows/build/badge.svg)](https://github.com/xemlock/php-latex/actions?query=workflow/build)
  4 | [![License](https://img.shields.io/github/license/xemlock/php-latex.svg)](https://packagist.org/packages/xemlock/php-latex)
  5 | 
  6 | 
  7 | The main purpose of this library is to provide a valid LaTeX output from, not always valid, user input. You can also render LaTeX code to HTML, with one limitation though - rendering to HTML is done only for the text mode, the math mode needs to be handled by a JavaScript
  8 | library - in the browser. For this I recommend using [MathJax](https://www.mathjax.org/).
  9 | 
 10 | Bear in mind that not every LaTeX command is recognized or implemented. If you happen to need a command that's
 11 | not supported you can either define it manually (see description below), or file a [feature request](https://github.com/xemlock/php-latex/issues/new/choose).
 12 | 
 13 | ## Installation
 14 | 
 15 | To use php-latex, you install it just as any other php package - with [Composer](https://getcomposer.org/).
 16 | 
 17 | ```
 18 | composer require xemlock/php-latex:dev-master
 19 | ```
 20 | 
 21 | ## Usage
 22 | 
 23 | Basic usage is as follows:
 24 | 
 25 | ### Parsing LaTeX source code
 26 | 
 27 | ```php
 28 | $parser = new PhpLatex_Parser();
 29 | $parsedTree = $parser->parse($input);
 30 | // $parsedTree contains object representation of the LaTeX document
 31 | ```
 32 | 
 33 | ### Render parsed LaTeX source
 34 | 
 35 | Once you have a parsed source code, you can render it to HTML (or to LaTeX) - please mind that math-mode code is rendered as-is.
 36 | 
 37 | ```php
 38 | // render parsed LaTeX code to HTML
 39 | $htmlRenderer = new PhpLatex_Renderer_Html();
 40 | $html = $htmlRenderer->render($parsedTree);
 41 | 
 42 | // render parsed LaTeX code to sanitized LaTeX code
 43 | $latex = PhpLatex_Renderer_Abstract::toLatex($parsedTree);
 44 | ```
 45 | 
 46 | ### Customization
 47 | 
 48 | You can add custom (or not yet implemented) commands to the parser:
 49 | 
 50 | ```php
 51 | $parser = new PhpLatex_Parser();
 52 | $parser->addCommand(
 53 |     '\placeholder',
 54 |     array(
 55 |         // number of arguments
 56 |         'numArgs' => 1,
 57 |         // number of optional arguments, default 0
 58 |         'numOptArgs' => 1,
 59 |         // mode this command is valid in, can be: 'both', 'math', 'text'
 60 |         'mode' => 'both',
 61 |         // whether command arguments should be parsed, or handled as-is
 62 |         'parseArgs' => false,
 63 |         // whether command allows a starred variant
 64 |         'starred' => false,
 65 |     )
 66 | );
 67 | ```
 68 | 
 69 | ### pdflatex
 70 | 
 71 | Additionally, this library provides a wrapper for pdflatex to make rendering and compiling `.tex` files
 72 | from PHP scripts easier.
 73 | 
 74 | ```php
 75 | $pdflatex = new PhpLatex_PdfLatex();
 76 | 
 77 | // to generate a PDF from .tex file
 78 | $pathToGeneratedPdf = $pdflatex->compile('/path/to/document.tex', 
 79 |     array(/* optional paths to files included by .tex file (images) */])
 80 | );
 81 | ```
 82 | 
 83 | You can access the build log of the last `compile` call via:
 84 | 
 85 | ```php
 86 | echo $pdflatex->getLog();
 87 | ```
 88 | 
 89 | You can even compile on the fly a LaTeX string:
 90 | 
 91 | ```php
 92 | $pathToGeneratedPdf = $pdflatex->compileString('
 93 | \documentclass{article}
 94 | \begin{document}
 95 | Hello from \LaTeX!
 96 | \end{document}
 97 | ');
 98 | ```
 99 | 
100 | By default, a system temp dir is used for generating PDF from string. You can however customize it:
101 | 
102 | ```php
103 | $pdflatex->setBuildDir('/path/to/temp'); 
104 | ```
105 | 
106 | ## License
107 | 
108 | The MIT License (MIT). See the LICENSE file.
109 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "xemlock/php-latex",
 3 |     "description": "LaTeX parser and renderer",
 4 |     "type": "library",
 5 |     "license": "MIT",
 6 |     "authors": [
 7 |         {
 8 |             "name": "xemlock",
 9 |             "email": "xemlock@gmail.com"
10 |         }
11 |     ],
12 |     "require": {
13 |         "php": ">=5.3.0",
14 |         "ext-dom": "*",
15 |         "ext-mbstring": "*"
16 |     },
17 |     "require-dev": {
18 |         "phpunit/phpunit": ">=5.7 <10.0"
19 |     },
20 |     "autoload": {
21 |         "psr-0": { "PhpLatex_": "library" }
22 |     },
23 |     "scripts": {
24 |         "post-install-cmd": [
25 |             "@php .scripts/patch-phpunit.php"
26 |         ],
27 |         "post-update-cmd": [
28 |             "@php .scripts/patch-phpunit.php"
29 |         ],
30 |         "test": "phpunit"
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/data/amssymb/both-amssymb.tex:
--------------------------------------------------------------------------------
1 | \checkmark
2 | \circledR
3 | \maltese
4 | \yen
5 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-alphabets.tex:
--------------------------------------------------------------------------------
1 | \mathfrak{Abc}
2 | \mathbb{NRC}
3 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-binops.tex:
--------------------------------------------------------------------------------
 1 | \barwedge
 2 | \boxdot
 3 | \boxminus
 4 | \boxplus
 5 | \boxtimes
 6 | \Cap
 7 | \centerdot
 8 | \circledast
 9 | \circledcirc
10 | \circleddash
11 | \Cup
12 | \curlyvee
13 | \curlywedge
14 | \divideontimes
15 | \dotplus
16 | \doublebarwedge
17 | \intercal
18 | \leftthreetimes
19 | \ltimes
20 | \rightthreetimes
21 | \rtimes
22 | \smallsetminus
23 | \veebar
24 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-greek.tex:
--------------------------------------------------------------------------------
1 | \digamma
2 | \varkappa
3 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-loglike.tex:
--------------------------------------------------------------------------------
1 | \injlim
2 | \projlim
3 | \varinjlim
4 | \varliminf
5 | \varlimsup
6 | \varprojlim
7 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-misc.tex:
--------------------------------------------------------------------------------
 1 | \angle
 2 | \Box
 3 | \dashleftarrow
 4 | \dashrightarrow
 5 | \Diamond
 6 | \hbar
 7 | \llcorner
 8 | \lrcorner
 9 | \mho
10 | \rightleftharpoons
11 | \sqsubset
12 | \sqsupset
13 | \ulcorner
14 | \urcorner
15 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-symbols.tex:
--------------------------------------------------------------------------------
  1 | \angle
  2 | \approxeq
  3 | \backepsilon
  4 | \backprime
  5 | \backsim
  6 | \backsimeq
  7 | \barwedge
  8 | \Bbbk
  9 | \because
 10 | \beth
 11 | \between
 12 | \bigstar
 13 | \blacklozenge
 14 | \blacksquare
 15 | \blacktriangle
 16 | \blacktriangledown
 17 | \blacktriangleleft
 18 | \blacktriangleright
 19 | \boxdot
 20 | \boxminus
 21 | \boxplus
 22 | \boxtimes
 23 | \Bumpeq
 24 | \bumpeq
 25 | \Cap
 26 | \centerdot
 27 | \circeq
 28 | \circlearrowleft
 29 | \circlearrowright
 30 | \circledast
 31 | \circledcirc
 32 | \circleddash
 33 | \circledS
 34 | \complement
 35 | \Cup
 36 | \curlyeqprec
 37 | \curlyeqsucc
 38 | \curlyvee
 39 | \curlywedge
 40 | \curvearrowleft
 41 | \curvearrowright
 42 | \daleth
 43 | \diagdown
 44 | \diagup
 45 | \digamma
 46 | \divideontimes
 47 | \Doteq
 48 | \doteqdot
 49 | \dotplus
 50 | \doublebarwedge
 51 | \doublecap
 52 | \doublecup
 53 | \downdownarrows
 54 | \downharpoonleft
 55 | \downharpoonright
 56 | \eqcirc
 57 | \eqsim
 58 | \eqslantgtr
 59 | \eqslantless
 60 | \eth
 61 | \fallingdotseq
 62 | \Finv
 63 | \Game
 64 | \geqq
 65 | \geqslant
 66 | \ggg
 67 | \gggtr
 68 | \gimel
 69 | \gnapprox
 70 | \gneq
 71 | \gneqq
 72 | \gnsim
 73 | \gtrapprox
 74 | \gtrdot
 75 | \gtreqless
 76 | \gtreqqless
 77 | \gtrless
 78 | \gtrsim
 79 | \gvertneqq
 80 | \hbar
 81 | \hslash
 82 | \intercal
 83 | \leftarrowtail
 84 | \leftleftarrows
 85 | \leftrightarrows
 86 | \leftrightharpoons
 87 | \leftrightsquigarrow
 88 | \leftthreetimes
 89 | \leqq
 90 | \leqslant
 91 | \lessapprox
 92 | \lessdot
 93 | \lesseqgtr
 94 | \lesseqqgtr
 95 | \lessgtr
 96 | \lesssim
 97 | \Lleftarrow
 98 | \lll
 99 | \llless
100 | \lnapprox
101 | \lneq
102 | \lneqq
103 | \lnsim
104 | \looparrowleft
105 | \looparrowright
106 | \lozenge
107 | \Lsh
108 | \ltimes
109 | \lvertneqq
110 | \measuredangle
111 | \mho
112 | \multimap
113 | \ncong
114 | \nexists
115 | \ngeq
116 | \ngeqq
117 | \ngeqslant
118 | \ngtr
119 | \nleftarrow
120 | \nLeftarrow
121 | \nLeftrightarrow
122 | \nleftrightarrow
123 | \nleq
124 | \nleqq
125 | \nleqslant
126 | \nless
127 | \nmid
128 | \nparallel
129 | \nprec
130 | \npreceq
131 | \nrightarrow
132 | \nRightarrow
133 | \nshortmid
134 | \nshortparallel
135 | \nsim
136 | \nsubseteq
137 | \nsubseteqq
138 | \nsucc
139 | \nsucceq
140 | \nsupseteq
141 | \nsupseteqq
142 | \ntriangleleft
143 | \ntrianglelefteq
144 | \ntriangleright
145 | \ntrianglerighteq
146 | \nvdash
147 | \nVdash
148 | \nVDash
149 | \nvDash
150 | \pitchfork
151 | \precapprox
152 | \preccurlyeq
153 | \precnapprox
154 | \precneqq
155 | \precnsim
156 | \precsim
157 | \restriction
158 | \rightarrowtail
159 | \rightleftarrows
160 | \rightleftharpoons
161 | \rightrightarrows
162 | \rightsquigarrow
163 | \rightthreetimes
164 | \risingdotseq
165 | \Rrightarrow
166 | \Rsh
167 | \rtimes
168 | \shortmid
169 | \shortparallel
170 | \smallfrown
171 | \smallsetminus
172 | \smallsmile
173 | \sphericalangle
174 | \sqsubset
175 | \sqsupset
176 | \square
177 | \Subset
178 | \subseteqq
179 | \subsetneq
180 | \subsetneqq
181 | \succapprox
182 | \succcurlyeq
183 | \succnapprox
184 | \succneqq
185 | \succnsim
186 | \succsim
187 | \Supset
188 | \supseteqq
189 | \supsetneq
190 | \supsetneqq
191 | \therefore
192 | \thickapprox
193 | \thicksim
194 | \triangledown
195 | \trianglelefteq
196 | \triangleq
197 | \trianglerighteq
198 | \twoheadleftarrow
199 | \twoheadrightarrow
200 | \upharpoonleft
201 | \upharpoonright
202 | \upuparrows
203 | \varkappa
204 | \varnothing
205 | \varpropto
206 | \varsubsetneq
207 | \varsubsetneqq
208 | \varsupsetneq
209 | \varsupsetneqq
210 | \vartriangle
211 | \vartriangleleft
212 | \vartriangleright
213 | \vDash
214 | \Vdash
215 | \veebar
216 | \Vvdash
217 | 


--------------------------------------------------------------------------------
/data/amssymb/math-amssymb-varsized-delimiters.tex:
--------------------------------------------------------------------------------
1 | \lvert
2 | \lVert
3 | \rvert
4 | \rVert
5 | 


--------------------------------------------------------------------------------
/data/amstext/both-amstext-alphabets.tex:
--------------------------------------------------------------------------------
1 | \text{Abc}
2 | 


--------------------------------------------------------------------------------
/data/base.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | // environs - required environments
 3 | return array(
 4 |     '\\string' => array( // TeX primitive
 5 |         'numArgs'   => 1,
 6 |         'parseArgs' => false,
 7 |         'mode'      => 'both',
 8 |     ),
 9 |     '\\ ' => array(
10 |         'mode'       => 'both',
11 |         'numArgs'    => 0,
12 |         'numOptArgs' => 0,
13 |     ),
14 |     '\\chapter' => array(
15 |         'numArgs'   => 1,
16 |         'mode'      => 'text',
17 |         'starred'   => true,
18 |         'counter'   => 'chapter',
19 |         'counterReset' => array(
20 |             'section', 'subsection', 'subsubsection', 'paragraph', 'subparagraph',
21 |         ),
22 |     ),
23 |     '\\section' => array(
24 |         'numArgs'      => 1,
25 |         'mode'         => 'text',
26 |         'starred'      => true,
27 |         'counter'      => 'section',
28 |         'counterReset' => array(
29 |             'subsection', 'subsubsection', 'paragraph', 'subparagraph',
30 |         ),
31 |     ),
32 |     '\\subsection' => array(
33 |         'numArgs'     => 1,
34 |         'mode'         => 'text',
35 |         'starred'      => true,
36 |         'counter'      => 'subsection',
37 |         'counterReset' => array(
38 |             'subsubsection', 'paragraph', 'subparagraph',
39 |         ),
40 |     ),
41 |     '\\subsubsection' => array(
42 |         'numArgs'      => 1,
43 |         'mode'         => 'text',
44 |         'starred'      => true,
45 |         'counter'      => 'subsubsection',
46 |         'counterReset' => array(
47 |             'paragraph', 'subparagraph',
48 |         ),
49 |     ),
50 |     '\\paragraph' => array(
51 |         'numArgs'      => 1,
52 |         'mode'         => 'text',
53 |         'starred'      => true,
54 |         'counter'      => 'paragraph',
55 |         'counterReset' => array(
56 |             'subparagraph',
57 |         ),
58 |     ),
59 |     '\\subparagraph' => array(
60 |         'numArgs'      => 1,
61 |         'mode'         => 'text',
62 |         'starred'      => true,
63 |         'counter'      => 'subparagraph',
64 |     ),
65 |     '\\item' => array(
66 |         'mode'         => 'text',
67 |         'environs'     => array('itemize', 'enumerate'),
68 |     ),
69 |     '\\hline' => array(
70 |         'mode'         => 'text',
71 |         'environs'     => array('tabular'),
72 |     ),
73 | );
74 | 


--------------------------------------------------------------------------------
/data/both-alphabets.tex:
--------------------------------------------------------------------------------
1 | \emph{Abc}
2 | \textbf{Abc}
3 | \textit{Abc}
4 | \textrm{Abc}
5 | \textsf{Abc}
6 | \texttt{Abc}
7 | \textup{Abc}
8 | 


--------------------------------------------------------------------------------
/data/both-refs.tex:
--------------------------------------------------------------------------------
1 | \label{marker}
2 | \ref{marker}
3 | \pageref{marker}
4 | 


--------------------------------------------------------------------------------
/data/both-spaces.tex:
--------------------------------------------------------------------------------
1 | \,
2 | \enspace
3 | \quad
4 | 


--------------------------------------------------------------------------------
/data/both.tex:
--------------------------------------------------------------------------------
 1 | \#
 2 | \$
 3 | \\
 4 | \_
 5 | \copyright
 6 | \dag
 7 | \ddag
 8 | \dots
 9 | \newline
10 | \P
11 | \pounds
12 | \S
13 | \textsuperscript{a}
14 | \{
15 | \}
16 | \*
17 | 


--------------------------------------------------------------------------------
/data/compile.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | // compile commands to a single file
 4 | 
 5 | $commands = require dirname(__FILE__) . '/base.php';
 6 | $it = new RecursiveIteratorIterator(new RecursiveDirectoryIterator('.'));
 7 | 
 8 | foreach ($it as $file) {
 9 |     if (substr($file, -4) !== '.tex') {
10 |         continue;
11 |     }
12 | 
13 |     // command mode is stored in file name
14 |     if (!preg_match('#^(?P<mode>both|math|text)#', basename($file), $match)) {
15 |         continue;
16 |     }
17 | 
18 |     $fileCommands = array_filter(array_map(function ($str) {
19 |         $str = preg_replace('/%.*/', '', $str);
20 |         $str = trim($str);
21 |         return $str;
22 |     }, file($file)), 'strlen');
23 |     $mode = $match['mode'];
24 | 
25 |     foreach ($fileCommands as $command) {
26 |         // extract command name and number of args
27 |         if (!preg_match('#^(?P<command>\\\\([a-zA-Z]+|[^a-zA-Z]| ))#', $command, $match)) {
28 |             throw new Exception(
29 |                 sprintf("File %s contains invalid command name '%s'", $file, $command)
30 |             );
31 |         }
32 | 
33 |         $name = $match['command'];
34 | 
35 |         // in case [ or { is part of command name, search for substrings in part
36 |         // of command after its name
37 |         $numArgs = substr_count(substr($command, strlen($name)), '{');
38 |         $numOptArgs = substr_count(substr($command, strlen($name)), '[');
39 | 
40 |         if (isset($commands[$name])) {
41 |             $c = $commands[$name];
42 |             if ($c['numArgs'] !== $numArgs || $c['numOptArgs'] !== $numOptArgs) {
43 |                 throw new Exception(
44 |                     sprintf('File %s contains conflicting definition of command %s', $file, $name)
45 |                 );
46 |             }
47 |         }
48 | 
49 |         if (isset($commands[$name])) {
50 |             if ($commands[$name]['numArgs'] !== $numArgs) {
51 |                 throw new Exception(sprintf(
52 |                     'Duplicate definition of %s, conflicting number of arguments %d vs %d',
53 |                     $name, $commands[$name]['numArgs'], $numArgs
54 |                 ));
55 |             }
56 |             if ($commands[$name]['numOptArgs'] !== $numOptArgs) {
57 |                 throw new Exception(sprintf(
58 |                     'Duplicate definition of %s, conflicting number of optional arguments %d vs %d',
59 |                     $name, $commands[$name]['numOptArgs'], $numOptArgs
60 |                 ));
61 |             }
62 |             if ($commands[$name]['mode'] !== 'both' && $commands[$name]['mode'] !== $mode) {
63 |                 $commands[$name]['mode'] = 'both';
64 |             }
65 |         } else {
66 |             $commands[$name]['mode'] = $mode;
67 |             $commands[$name]['numArgs'] = $numArgs;
68 |             $commands[$name]['numOptArgs'] = $numOptArgs;
69 |         }
70 |     }
71 | }
72 | 
73 | uksort($commands, function ($a, $b) {
74 |     // strip leading backslash
75 |     $a = substr($a, 1);
76 |     $b = substr($b, 1);
77 | 
78 |     $casecmp = strcasecmp($a, $b);
79 |     if (!$casecmp) {
80 |         return strcmp($a, $b);
81 |     }
82 | 
83 |     return $casecmp;
84 | });
85 | 
86 | $php = str_replace('  ', '    ', var_export($commands, true));
87 | $php = preg_replace('#\s+=>\s+array \(#', " => array(", $php);
88 | 
89 | file_put_contents(dirname(__FILE__) . '/../library/PhpLatex/commands.php', '<?php return ' . $php . ";\n");
90 | 
91 | 


--------------------------------------------------------------------------------
/data/fixltx2e/both-fixltx2e.tex:
--------------------------------------------------------------------------------
1 | \textsubscript{a}
2 | 


--------------------------------------------------------------------------------
/data/hyperref/text-hyperref.tex:
--------------------------------------------------------------------------------
1 | \href{abc}{xyz}
2 | \url{abc}
3 | 


--------------------------------------------------------------------------------
/data/math-accents.tex:
--------------------------------------------------------------------------------
 1 | \acute{a}
 2 | \bar{a}
 3 | \breve{a}
 4 | \check{a}
 5 | \ddot{a}
 6 | \dot{a}
 7 | \grave{a}
 8 | \hat{a}
 9 | \tilde{a}
10 | \vec{a}
11 | 


--------------------------------------------------------------------------------
/data/math-alphabets.tex:
--------------------------------------------------------------------------------
1 | \mathcal{ABC}
2 | \mathit{ABCdef123}
3 | \mathnormal{ABCdef123}
4 | \mathrm{ABCdef123}
5 | 


--------------------------------------------------------------------------------
/data/math-arrows.tex:
--------------------------------------------------------------------------------
 1 | \Downarrow
 2 | \downarrow
 3 | \hookleftarrow
 4 | \hookrightarrow
 5 | \Leftarrow
 6 | \leftarrow
 7 | \leftharpoondown
 8 | \leftharpoonup
 9 | \leftrightarrow
10 | \Leftrightarrow
11 | \Longleftarrow
12 | \longleftarrow
13 | \longleftrightarrow
14 | \Longleftrightarrow
15 | \longmapsto
16 | \longrightarrow
17 | \Longrightarrow
18 | \mapsto
19 | \nearrow
20 | \nwarrow
21 | \Rightarrow
22 | \rightarrow
23 | \rightharpoondown
24 | \rightharpoonup
25 | \rightleftharpoons
26 | \searrow
27 | \swarrow
28 | \uparrow
29 | \Uparrow
30 | \Updownarrow
31 | \updownarrow
32 | 


--------------------------------------------------------------------------------
/data/math-binops.tex:
--------------------------------------------------------------------------------
 1 | \amalg
 2 | \ast
 3 | \bigcirc
 4 | \bigtriangledown
 5 | \bigtriangleup
 6 | \bullet
 7 | \cap
 8 | \cdot
 9 | \circ
10 | \cup
11 | \dagger
12 | \ddagger
13 | \diamond
14 | \div
15 | \mp
16 | \odot
17 | \ominus
18 | \oplus
19 | \oslash
20 | \otimes
21 | \pm
22 | \setminus
23 | \sqcap
24 | \sqcup
25 | \star
26 | \times
27 | \triangleleft
28 | \triangleright
29 | \uplus
30 | \vee
31 | \wedge
32 | \wr
33 | 


--------------------------------------------------------------------------------
/data/math-delimiters.tex:
--------------------------------------------------------------------------------
 1 | \backslash
 2 | \langle
 3 | \lceil
 4 | \lfloor
 5 | \rangle
 6 | \rceil
 7 | \rfloor
 8 | \{
 9 | \|
10 | \}
11 | 


--------------------------------------------------------------------------------
/data/math-greek.tex:
--------------------------------------------------------------------------------
 1 | \alpha
 2 | \beta
 3 | \chi
 4 | \Delta
 5 | \delta
 6 | \epsilon
 7 | \eta
 8 | \Gamma
 9 | \gamma
10 | \kappa
11 | \lambda
12 | \Lambda
13 | \mu
14 | \nu
15 | \omega
16 | \Omega
17 | \phi
18 | \Phi
19 | \pi
20 | \Pi
21 | \psi
22 | \Psi
23 | \rho
24 | \Sigma
25 | \sigma
26 | \tau
27 | \Theta
28 | \theta
29 | \Upsilon
30 | \upsilon
31 | \varepsilon
32 | \varphi
33 | \varpi
34 | \varrho
35 | \varsigma
36 | \vartheta
37 | \Xi
38 | \xi
39 | \zeta
40 | 


--------------------------------------------------------------------------------
/data/math-large-delimeters.tex:
--------------------------------------------------------------------------------
1 | \arrowvert
2 | \Arrowvert
3 | \bracevert
4 | \lgroup
5 | \lmoustache
6 | \rgroup
7 | \rmoustache
8 | 


--------------------------------------------------------------------------------
/data/math-loglike.tex:
--------------------------------------------------------------------------------
 1 | \arccos
 2 | \arcsin
 3 | \arctan
 4 | \arg
 5 | \cos
 6 | \cosh
 7 | \cot
 8 | \coth
 9 | \csc
10 | \deg
11 | \det
12 | \dim
13 | \exp
14 | \gcd
15 | \hom
16 | \inf
17 | \ker
18 | \lg
19 | \lim
20 | \liminf
21 | \limsup
22 | \ln
23 | \log
24 | \max
25 | \min
26 | \Pr
27 | \sec
28 | \sin
29 | \sinh
30 | \sup
31 | \tan
32 | \tanh
33 | 


--------------------------------------------------------------------------------
/data/math-misc.tex:
--------------------------------------------------------------------------------
 1 | \aleph
 2 | \angle
 3 | \bot
 4 | \cdots
 5 | \clubsuit
 6 | \ddots
 7 | \diamondsuit
 8 | \ell
 9 | \emptyset
10 | \exists
11 | \flat
12 | \forall
13 | \hbar
14 | \heartsuit
15 | \Im
16 | \imath
17 | \infty
18 | \jmath
19 | \ldots
20 | \nabla
21 | \natural
22 | \neg
23 | \partial
24 | \prime
25 | \Re
26 | \sharp
27 | \spadesuit
28 | \surd
29 | \top
30 | \triangle
31 | \vdots
32 | \wp
33 | 


--------------------------------------------------------------------------------
/data/math-other.tex:
--------------------------------------------------------------------------------
 1 | \frac{abc}{xyz}
 2 | \overbrace{abc}
 3 | \overleftarrow{abc}
 4 | \overline{abc}
 5 | \overrightarrow{abc}
 6 | \sqrt[n]{abc}
 7 | \underbrace{abc}
 8 | \underline{abc}
 9 | \widehat{abc}
10 | \widetilde{abc}
11 | 


--------------------------------------------------------------------------------
/data/math-punctuation.tex:
--------------------------------------------------------------------------------
1 | \colon
2 | \ldotp
3 | \cdotp
4 | 


--------------------------------------------------------------------------------
/data/math-relations.tex:
--------------------------------------------------------------------------------
 1 | \approx
 2 | \asymp
 3 | \bowtie
 4 | \cong
 5 | \dashv
 6 | \doteq
 7 | \equiv
 8 | \frown
 9 | \geq
10 | \gg
11 | \in
12 | \leq
13 | \ll
14 | \mid
15 | \models
16 | \neq
17 | \ni
18 | \parallel
19 | \perp
20 | \prec
21 | \preceq
22 | \propto
23 | \sim
24 | \simeq
25 | \smile
26 | \sqsubseteq
27 | \sqsupseteq
28 | \subset
29 | \subseteq
30 | \succ
31 | \succeq
32 | \supset
33 | \supseteq
34 | \vdash
35 | 


--------------------------------------------------------------------------------
/data/math-spaces.tex:
--------------------------------------------------------------------------------
1 | \:
2 | \;
3 | \!
4 | 


--------------------------------------------------------------------------------
/data/math-varsymbols.tex:
--------------------------------------------------------------------------------
 1 | \bigcap
 2 | \bigcup
 3 | \bigodot
 4 | \bigoplus
 5 | \bigotimes
 6 | \bigsqcup
 7 | \biguplus
 8 | \bigvee
 9 | \bigwedge
10 | \coprod
11 | \int
12 | \oint
13 | \prod
14 | \sum
15 | 


--------------------------------------------------------------------------------
/data/math.tex:
--------------------------------------------------------------------------------
1 | \mathdollar
2 | \mathellipsis
3 | \mathparagraph
4 | \mathsection
5 | \mathsterling
6 | \mathunderscore
7 | 


--------------------------------------------------------------------------------
/data/text-accents.tex:
--------------------------------------------------------------------------------
 1 | \`{a} % (grave accent): à
 2 | \'{a} % (acute accent): á
 3 | \^{a} % (circumflex or “hat”): â
 4 | \"{a} % (umlaut or dieresis): ä
 5 | \~{a} % (tilde or “squiggle”): ã
 6 | \={a} % (macron or “bar”): ā
 7 | \.{a} % (dot accent): ȧ
 8 | \u{a} % (breve accent): ă
 9 | \v{a} % (háček or “check”): ǎ
10 | \H{a} % (long Hungarian umlaut): ő
11 | \t{a} % (tie-after accent): a͡
12 | \c{a} % (cedilla): ş
13 | \d{a} % (dot-under accent): ạ
14 | \b{a} % (bar-under accent): ο̩
15 | \k{a} % (ogonek): ą
16 | 


--------------------------------------------------------------------------------
/data/text-fontsize.tex:
--------------------------------------------------------------------------------
 1 | \tiny
 2 | \scriptsize
 3 | \footnotesize
 4 | \small
 5 | \normalsize
 6 | \large
 7 | \Large
 8 | \LARGE
 9 | \huge
10 | \Huge
11 | 


--------------------------------------------------------------------------------
/data/text-primitives.tex:
--------------------------------------------------------------------------------
1 | \par
2 | 


--------------------------------------------------------------------------------
/data/text-spaces.tex:
--------------------------------------------------------------------------------
1 | \@
2 | 


--------------------------------------------------------------------------------
/data/text.tex:
--------------------------------------------------------------------------------
 1 | \LaTeX
 2 | \TeX
 3 | \textasciicircum
 4 | \textasciitilde
 5 | \textasteriskcentered
 6 | \textbackslash
 7 | \textbar
 8 | \textbardbl
 9 | \textbigcircle
10 | \textbraceleft
11 | \textbraceright
12 | \textbullet
13 | \textcopyright
14 | \textdagger
15 | \textdaggerdbl
16 | \textdollar
17 | \textellipsis
18 | \textemdash
19 | \textendash
20 | \textexclamdown
21 | \textgreater
22 | \textless
23 | \textordfeminine
24 | \textordmasculine
25 | \textparagraph
26 | \textperiodcentered
27 | \textpertenthousand
28 | \textperthousand
29 | \textquestiondown
30 | \textquotedblleft
31 | \textquotedblright
32 | \textquoteleft
33 | \textquoteright
34 | \textregistered
35 | \textsection
36 | \textsterling
37 | \texttrademark
38 | \textunderscore
39 | \textvisiblespace
40 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Filter/Html2Latex.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | // Paragraph container. Only non-empty paragraphs are stored,
  4 | // a paragraph cannot contain LF characters,
  5 | // line break or paragraph start must be explicitly given
  6 | class PhpLatex_Filter_ParagraphList implements Countable, IteratorAggregate
  7 | {
  8 |     /**
  9 |      * @var array
 10 |      */
 11 |     protected $_paragraphs = array(); // non-empty paragraphs
 12 | 
 13 |     /**
 14 |      * @var int
 15 |      */
 16 |     protected $_pos = 0;
 17 | 
 18 |     /**
 19 |      * @var bool
 20 |      */
 21 |     protected $_nl = false;
 22 | 
 23 |     public function addText($text)
 24 |     {
 25 |         $text = preg_replace('/\s+/', ' ', $text);
 26 | 
 27 |         if (strlen($text)) {
 28 |             // echo '[' . @$this->_paragraphs[$this->_pos] . '](', $text, ') -> ';
 29 | 
 30 |             if (isset($this->_paragraphs[$this->_pos])) {
 31 |                 if ($this->_nl) {
 32 |                     if ($text !== ' ') {
 33 |                         $this->_nl = false;
 34 |                         $par = $this->_paragraphs[$this->_pos] . "\\\\\n" . $text;
 35 |                     } else {
 36 |                         // do nothing - do not append space-only string or line break
 37 |                         // wait for more text to come
 38 |                         $par = $text;
 39 |                     }
 40 |                 } else {
 41 |                     // append new text to existing paragraph, merge spaces on the
 42 |                     // strings boundary into a single space
 43 |                     $par = $this->_paragraphs[$this->_pos] . $text;
 44 |                     $par = str_replace('  ', ' ', $par);
 45 |                 }
 46 |             } else {
 47 |                 // new paragraph must start with a non-space character,
 48 |                 // no line break at the beginning of the paragraph, trailing
 49 |                 // spaces are allowed (there will be no more than 2)
 50 |                 $par = $text;
 51 |             }
 52 | 
 53 |             if (strlen($par)) {
 54 |                 $this->_paragraphs[$this->_pos] = $par;
 55 |             }
 56 | 
 57 |             // echo '[' . @$this->_paragraphs[$this->_pos] . ']', "\n\n";
 58 |         }
 59 | 
 60 | 
 61 |         return $this;
 62 |     }
 63 | 
 64 |     public function breakLine()
 65 |     {
 66 |         if ($this->_nl) {
 67 |             $this->newParagraph();
 68 |         } elseif (isset($this->_paragraphs[$this->_pos]) && !ctype_space($this->_paragraphs[$this->_pos])) {
 69 |             // line break can only be placed in a non-empty paragraph
 70 |             $this->_nl = true;
 71 |         }
 72 |         return $this;
 73 |     }
 74 | 
 75 |     public function newParagraph()
 76 |     {
 77 |         $this->_nl = false;
 78 |         if (isset($this->_paragraphs[$this->_pos])) {
 79 |             ++$this->_pos;
 80 |         }
 81 |         return $this;
 82 |     }
 83 | 
 84 |     public function clear()
 85 |     {
 86 |         $this->_paragraphs = array();
 87 |         $this->_pos = 0;
 88 |         $this->_nl = false;
 89 |         return $this;
 90 |     }
 91 | 
 92 |     public function count()
 93 |     {
 94 |         return count($this->_paragraphs);
 95 |     }
 96 | 
 97 |     public function getIterator()
 98 |     {
 99 |         return new ArrayIterator($this->_paragraphs);
100 |     }
101 | 
102 |     public function __toString()
103 |     {
104 |         if (count($this->_paragraphs)) {
105 |             return preg_replace('/[ ]+/', ' ', implode("\n\n", $this->_paragraphs)) . "\n\n";
106 |         }
107 |         return '';
108 |     }
109 | 
110 |     public function toArray()
111 |     {
112 |         return $this->_paragraphs;
113 |     }
114 | }
115 | 
116 | class PhpLatex_Filter_Html2Latex
117 | {
118 |     protected static $_outputEncoding = 'ANSI';
119 | 
120 |     /**
121 |      * Set output encoding
122 |      * @param $encoding
123 |      */
124 |     public static function setOutputEncoding($encoding)
125 |     {
126 |         self::$_outputEncoding = strtoupper($encoding);
127 |     }
128 | 
129 |     /**
130 |      * @param  string $html
131 |      * @param  array $options OPTIONAL
132 |      * @return string
133 |      */
134 |     public static function filter($html, array $options = null)
135 |     {
136 |         $errors = libxml_use_internal_errors(true);
137 | 
138 |         $doc = new DOMDocument();
139 |         $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
140 | 
141 |         libxml_clear_errors();
142 |         libxml_use_internal_errors($errors);
143 | 
144 |         foreach ($doc->childNodes as $item) {
145 |             if ($item->nodeType == XML_PI_NODE) {
146 |                 $doc->removeChild($item);
147 |             }
148 |         }
149 | 
150 |         $doc->encoding = 'UTF-8';
151 | 
152 |         $body = $doc->getElementsByTagName('body')->item(0);
153 | 
154 |         $debug = 0;
155 |         if($debug){
156 |             header('Content-Type: text/plain; charset=utf-8');
157 |             echo $doc->saveHTML(), "\n\n";
158 |         }
159 | 
160 |         if ($body) {
161 |             $elems = array($body);
162 |             $refs = array();
163 |             $filter = new Zefram_Filter_Slug(); // FIXME dependency!
164 |             // extract all referenced ids of elements, they will be used for internal links creation
165 |             while ($elem = array_shift($elems)) {
166 |                 foreach ($elem->childNodes as $item) {
167 |                     if ($item->nodeType === XML_ELEMENT_NODE) {
168 |                         $elems[] = $item;
169 |                     }
170 |                 }
171 |                 if ($elem->nodeType === XML_ELEMENT_NODE && strtoupper($elem->tagName) === 'A') {
172 |                     $href = trim($elem->getAttribute('href'));
173 |                     if (strlen($href) && $href{0} === '#') {
174 |                         $id = substr($href, 1);
175 |                         $refs[$id] = 'ref:' . $filter->filter(str_ireplace('ref:', '', $id));
176 |                     }
177 |                 }
178 |             }
179 | 
180 |             self::$_refs = $refs;
181 | 
182 |             // TODO create IDs map
183 |             $latex = self::processBlock($body, self::TRIM);
184 | 
185 |             if($debug){
186 |                 header('Content-Type: text/plain; charset=utf-8');
187 |                 echo $latex;exit;
188 |             }
189 |             return $latex;
190 |         }
191 |         return '';
192 |     }
193 | 
194 |     protected static $_refs;
195 | 
196 |     public static function processBlock(DOMNode $body, $flags = 0)
197 |     {
198 |         $latex = '';
199 |         $par = new PhpLatex_Filter_ParagraphList();
200 |         foreach ($body->childNodes as $item) {
201 |             switch ($item->nodeType) {
202 |                 case XML_TEXT_NODE:
203 |                 case XML_ENTITY_NODE:
204 |                     self::_addToParagraph($par, $item);
205 |                     break;
206 | 
207 |                 case XML_ELEMENT_NODE:
208 |                     switch (strtoupper($item->tagName)) {
209 |                         case 'H1':
210 |                         case 'H2':
211 |                         case 'H3':
212 |                         case 'H4':
213 |                         case 'H5':
214 |                         case 'H6':
215 |                             $value = trim(self::getText($item));
216 |                             if (!($flags & self::NO_HEADINGS)) {
217 |                                 $map = array(
218 |                                     'H1' => 'section',
219 |                                     'H2' => 'section',
220 |                                     'H3' => 'subsection',
221 |                                     'H4' => 'subsubsection',
222 |                                     'H5' => 'paragraph',
223 |                                     'H6' => 'subparagraph',
224 |                                 );
225 | 
226 |                                 // TODO handle math mode \texorpdfstring
227 | 
228 |                                 $value = '\\' . $map[strtoupper($item->tagName)] . '*{' . $value . '}' . "\n";
229 | 
230 |                                 // find first id, if found, create label,
231 |                                 // analyze elements in document order
232 |                                 $elems = array($item);
233 |                                 while ($elem = array_shift($elems)) {
234 |                                     if ($elem->nodeType === XML_ELEMENT_NODE) {
235 |                                         $id = str_ireplace('ref:', '', $elem->getAttribute('id'));
236 |                                         if (strlen($id)) {
237 |                                             $value .= '\\label{ref:' . $id . '}' . "\n";
238 |                                             break;
239 |                                         }
240 |                                         foreach ($elem->childNodes as $child) {
241 |                                             $elems[] = $child;
242 |                                         }
243 |                                     }
244 |                                 }
245 |                             }
246 |                             $latex .= $par . $value;
247 |                             $par->clear();
248 |                             break;
249 | 
250 |                         case 'UL':
251 |                         case 'OL':
252 |                         case 'DL':
253 |                             $latex .= $par . self::processList($item);
254 |                             $par->clear();
255 |                             break;
256 | 
257 |                         case 'TABLE':
258 |                             $latex .= $par . self::processTable($item);
259 |                             $par->clear();
260 |                             break;
261 | 
262 |                         default:
263 |                             self::_addToParagraph($par, $item);
264 |                             break;
265 |                     }
266 |             }
267 |         }
268 | 
269 |         if (count($par)) {
270 |             $latex .= $par;
271 |             $par->clear();
272 |         }
273 | 
274 |         if ($flags & self::TRIM) { // trim only new lines
275 |             $latex = str_replace("\n", '', $latex);
276 |         }
277 | 
278 |         return $latex;
279 |     }
280 | 
281 |     const BOLD          = 0x0001;
282 |     const ITALIC        = 0x0002;
283 |     const TELETYPE      = 0x0004;
284 |     const UNDERLINE     = 0x0008;
285 |     const NO_PARAGRAPH  = 0x0010;
286 |     const TRIM          = 0x0020;
287 |     const LINK          = 0x0040;
288 |     const NO_HEADINGS   = 0x0080;
289 | 
290 |     // TODO table
291 | 
292 |     public static function processTable(DOMElement $table, $flags = 0)
293 |     {
294 |         // requires tabularx package
295 |         $tbodies = array($table);
296 |         foreach (self::getChildren($table, 'TBODY') as $tbody) {
297 |             $tbodies[] = $tbody;
298 |         }
299 |         $ncols = 0;
300 |         $content = '';
301 |         while ($tbody = array_shift($tbodies)) {
302 |             foreach (self::getChildren($tbody, 'TR') as $tr) {
303 |                 $tds = self::getChildren($tr, 'TD');
304 |                 $ncols = max($ncols, count($tds));
305 |                 $row = array();
306 |                 foreach ($tds as $td) {
307 |                     $row[] = self::getText($td);
308 |                 }
309 |                 $row = implode(' & ', $row);
310 |                 if (strlen($row)) {
311 |                     $content .= $row . '\\\\' . "\n";
312 |                 }
313 |             }
314 |         }
315 | 
316 |         $latex = '';
317 |         if ($content) {
318 |             if ($ncols === 2) {
319 |                 $colspec = 'Xr';
320 |             } elseif ($ncols === 3) {
321 |                 $colspec = 'lXr';
322 |             } else {
323 |                 $colspec = str_repeat('X', $ncols);
324 |             }
325 |             // TODO handle colspec -> borders, alignment
326 |             $latex .= '\\vspace{5ex}' . "\n";
327 |             $latex .= '\\begin{tabularx}{\textwidth}{' . $colspec . '}' . "\n";
328 |             $latex .= $content;
329 |             $latex .= '\\end{tabularx}' . "\n";
330 |             $latex .= '\\vspace{5ex}' . "\n\n";
331 |         }
332 |         return $latex;
333 |     }
334 | 
335 |     public static function getChildren(DOMNode $node, $tagName)
336 |     {
337 |         $children = array();
338 |         if ($node->nodeType === XML_ELEMENT_NODE) {
339 |             foreach ($node->childNodes as $child) {
340 |                 if ($child->nodeType === XML_ELEMENT_NODE && strtoupper($child->tagName) === $tagName) {
341 |                     $children[] = $child;
342 |                 }
343 |             }
344 |         }
345 |         return $children;
346 |     }
347 | 
348 |     public static function processList(DOMElement $element, $flags = 0, $level = 0)
349 |     {
350 |         // TODO handle indented lists
351 | 
352 |         $tagName = strtoupper($element->tagName);
353 |         if (!in_array($tagName, array('OL', 'UL', 'DL'))) {
354 |             throw new InvalidArgumentException('Not a list: ' . $tagName);
355 |         }
356 | 
357 |         // Lists in LaTeX can be 4 levels deep
358 |         if ($level >= 4) {
359 |             return self::getText($element, self::NO_PARAGRAPH | self::TRIM);
360 |         }
361 | 
362 |         $env = null;
363 | 
364 |         if ($tagName === 'OL') {
365 |             $env = 'enumerate';
366 |         } elseif ($tagName === 'UL') {
367 |             $env = 'itemize';
368 |         } elseif ($tagName === 'DL') {
369 |             $env = 'description';
370 |         }
371 | 
372 |         $latex = '';
373 | 
374 |         // paragraphs in list item?
375 |         $prevTag = null;
376 |         foreach ($element->childNodes as $item) {
377 |             if ($item->nodeType !== XML_ELEMENT_NODE) {
378 |                 continue;
379 |             }
380 |             $t = strtoupper($item->tagName);
381 |             switch ($t) {
382 |                 case 'LI':
383 |                 case 'DD':
384 |                     $text = self::processBlock($item, self::TRIM | self::NO_HEADINGS);
385 |                     // there can be more than one paragraph in list item
386 |                     $text = preg_replace('/\n[ \t]*\n+/', "\n\n", trim($text));
387 | 
388 |                     if ($t == 'LI' || ($t == 'DD' && (!$prevTag && $prevTag !== 'DT'))) {
389 |                         $latex .= '    \\item ' . trim($text) . "\n";
390 |                     } else {
391 |                         $latex .= '    ' . trim($text) . "\n";
392 |                     }
393 |                     break;
394 | 
395 |                 case 'DT':
396 |                     $text = self::getText($item, self::TRIM | self::NO_PARAGRAPH);
397 |                     $latex .= '    \\item';
398 |                     if (strlen($text)) {
399 |                         $latex .= '[{' . $text . '}]' . "\n";
400 |                     }
401 |                     break;
402 | 
403 |                 default:
404 |                     var_dumP($item);exit;
405 |             }
406 |             $prevTag = $t;
407 |         }
408 | 
409 |         return sprintf("\\begin{%s}\n%s\\end{%s}\n\n", $env, $latex, $env);
410 |     }
411 | 
412 |     public static function processLink(DOMElement $element, $flags = 0)
413 |     {
414 |         if ($flags & self::LINK) {
415 |             // no nested links
416 |             return;
417 |         }
418 | 
419 |         $text = self::getText($element, self::NO_PARAGRAPH | self::LINK);
420 |         if (strlen($text)) {
421 |             $href = trim($element->getAttribute('href'));
422 |             if (strlen($href)) {
423 |                 $label = PhpLatex_Utils::escape($text);
424 |                 if ($href{0} === '#') {
425 |                     $id = substr($href, 1);
426 |                     if (isset(self::$_refs[$id])) {
427 |                         return '\\hyperref[{' . self::$_refs[$id] . '}]{' . $label . '}';
428 |                     }
429 |                     return;
430 |                 }
431 |                 return '\\href{' . PhpLatex_Utils::escape($href) . '}{' . $label . '}';
432 |             }
433 |         }
434 |     }
435 | 
436 |     protected static function _addToParagraph(PhpLatex_Filter_ParagraphList $par, DOMNode $item, $flags = 0)
437 |     {
438 |         $cflags = $flags;
439 |         switch ($item->nodeType) {
440 |             case XML_TEXT_NODE:
441 |                 $par->addText(self::getTextValue($item));
442 |                 break;
443 | 
444 |             case XML_ENTITY_NODE:
445 |                 $par->addText(self::getTextValue($item));
446 |                 break;
447 | 
448 |             case XML_ELEMENT_NODE:
449 |                 switch (strtoupper($item->tagName)) {
450 |                     case 'BR':
451 |                         $par->breakLine();
452 |                         break;
453 | 
454 |                     case 'STRONG':
455 |                     case 'B':
456 |                         $value = self::getText($item, $cflags | self::BOLD);
457 |                         if (!($flags & self::BOLD) && strlen($value)) {
458 |                             $value = '\\textbf{' . $value . '}';
459 |                         }
460 |                         $par->addText($value);
461 |                         break;
462 | 
463 |                     case 'EM':
464 |                     case 'I':
465 |                         $value = self::getText($item, $cflags | self::ITALIC);
466 |                         if (!($flags & self::ITALIC) && strlen($value)) {
467 |                             $value = '\\textit{' . $value . '}';
468 |                         }
469 |                         $par->addText($value);
470 |                         break;
471 | 
472 |                     case 'CODE':
473 |                         $value = self::getText($item, $cflags | self::TELETYPE);
474 |                         if (!($flags & self::TELETYPE) && strlen($value)) {
475 |                             $value = '\\texttt{' . $value . '}';
476 |                         }
477 |                         $par->addText($value);
478 |                         break;
479 | 
480 |                     case 'U':
481 |                         $value = self::getText($item, $cflags | self::UNDERLINE);
482 |                         if (!($flags & self::UNDERLINE) && strlen($value)) {
483 |                             $value = '\\underline{' . $value . '}';
484 |                         }
485 |                         $par->addText($value);
486 |                         break;
487 | 
488 |                     // TODO handle indented paragraphs
489 |                     case 'P':
490 |                         $par->newParagraph();
491 |                         foreach ($item->childNodes as $child) {
492 |                             self::_addToParagraph($par, $child);
493 |                         }
494 |                         break;
495 | 
496 |                     case 'A':
497 |                         $par->addText(self::processLink($item, $flags));
498 |                         break;
499 | 
500 |                     case 'SUB':
501 |                         // requires \usepackage{fixltx2e} for releases prior to 2015/01/01
502 |                         $par->addText('\\textsubscript{' . self::getText($item, $cflags | self::NO_PARAGRAPH) . '}');
503 |                         break;
504 | 
505 |                     case 'SUP':
506 |                         $par->addText('\\textsuperscript{' . self::getText($item, $cflags | self::NO_PARAGRAPH) . '}');
507 |                         break;
508 | 
509 |                     default:
510 |                         $par->addText(self::getText($item, $cflags));
511 |                         break;
512 |                 }
513 |                 break;
514 |         }
515 | 
516 |         return $par;
517 |     }
518 | 
519 |     public static function getText(DOMNode $element, $flags = 0)
520 |     {
521 |         $par = new PhpLatex_Filter_ParagraphList();
522 | 
523 |         foreach ($element->childNodes as $item) {
524 |             switch ($item->nodeType) {
525 |                 case XML_TEXT_NODE:
526 |                 case XML_ENTITY_NODE:
527 |                     $par->addText(self::getTextValue($item));
528 |                     break;
529 | 
530 |                 case XML_ELEMENT_NODE:
531 |                     self::_addToParagraph($par, $item, $flags);
532 |                     break;
533 |             }
534 |         }
535 | 
536 |         return implode(' ', $par->toArray());
537 |     }
538 | 
539 |     public static function getTextValue(DOMText $node)
540 |     {
541 |         $value = str_replace(array("\r\n", "\r"), "\n", $node->wholeText);
542 |         $value = PhpLatex_Utils::escape($value);
543 | 
544 |         // replace UTF-8 characters with their counterparts if encoding is not UTF-8,
545 |         // otherwise remove invalid UTF-8 characters
546 |         if (in_array(self::$_outputEncoding, array('UTF-8', 'UTF8'), true)) {
547 |             // regex taken from http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
548 |             $regex = '
549 |             /
550 |               (
551 |                 (?: [\x00-\x7F]                 # single-byte sequences   0xxxxxxx
552 |                 |   [\xC0-\xDF][\x80-\xBF]      # double-byte sequences   110xxxxx 10xxxxxx
553 |                 |   [\xE0-\xEF][\x80-\xBF]{2}   # triple-byte sequences   1110xxxx 10xxxxxx * 2
554 |                 |   [\xF0-\xF7][\x80-\xBF]{3}   # quadruple-byte sequence 11110xxx 10xxxxxx * 3
555 |                 ){1,100}                        # ...one or more times
556 |               )
557 |             | .                                 # anything else
558 |             /x';
559 |             $value = preg_replace($regex, '$1', $value);
560 |         } else {
561 |             $value = PhpLatex_Utils::escapeUtf8($value);
562 |         }
563 |         return $value;
564 |     }
565 | }
566 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Lexer.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | class PhpLatex_Lexer
  4 | {
  5 |     const EOF = "EOF";
  6 | 
  7 |     const STATE_DEFAULT = 0;
  8 |     const STATE_BSLASH  = 1;
  9 |     const STATE_CONTROL = 2;
 10 |     const STATE_SPACE   = 3;
 11 | 
 12 |     const TYPE_TEXT     = 'text';
 13 |     const TYPE_SPACE    = 'space';
 14 |     const TYPE_CWORD    = 'cword';
 15 |     const TYPE_CSYMBOL  = 'csymbol';
 16 |     const TYPE_SPECIAL  = 'special';
 17 | 
 18 |     /** @deprecated  */
 19 |     const TYPE_COMMENT  = 'comment';
 20 | 
 21 |     protected $_str;
 22 |     protected $_pos;
 23 | 
 24 |     protected $_line;
 25 |     protected $_column;
 26 | 
 27 |     protected $_pline;
 28 |     protected $_pcolumn;
 29 | 
 30 |     /**
 31 |      * @var array|null
 32 |      */
 33 |     protected $_token;
 34 | 
 35 |     protected $_state;
 36 | 
 37 |     /**
 38 |      * @var array|null
 39 |      */
 40 |     protected $_tokenPosition;
 41 | 
 42 |     public function __construct($str)
 43 |     {
 44 |         $this->setString($str);
 45 |     }
 46 | 
 47 |     public function setString($str)
 48 |     {
 49 |         // skip leading and trailing whitespaces
 50 |         $str = trim($str);
 51 | 
 52 |         // perform initial transformations to mimic how TeX handles whitespaces.
 53 |         // Because of these transformations verbatim environments must be handled
 54 |         // elsewhere (i.e., replaced with placeholders before passing the input
 55 |         // to this lexer).
 56 | 
 57 |         // Unify newline character across platforms, replace tab with space
 58 |         $str = str_replace(
 59 |             array("\r\n", "\r", "\t"),
 60 |             array("\n", "\n", " "),
 61 |             (string) $str
 62 |         );
 63 | 
 64 |         // Replace ASCII control characters with spaces, so that token positions
 65 |         // remain unchanged
 66 |         $str = preg_replace(
 67 |             '/[\x{0000}-\x{0009}\x{000B}-\x{001F}\x{007F}]/u',
 68 |             ' ',
 69 |             $str
 70 |         );
 71 | 
 72 |         $this->_str = $str;
 73 |         $this->_pos = 0;
 74 | 
 75 |         $this->_line = 1;
 76 |         $this->_column = 0;
 77 | 
 78 |         $this->_pline = null;
 79 |         $this->_pcolumn = null;
 80 | 
 81 |         $this->_token = null;
 82 |         $this->_tokenPosition = null;
 83 | 
 84 |         $this->_state = self::STATE_DEFAULT;
 85 |     }
 86 | 
 87 |     public function current()
 88 |     {
 89 |         return $this->_token;
 90 |     }
 91 | 
 92 |     /**
 93 |      * @return array|false
 94 |      */
 95 |     public function next()
 96 |     {
 97 |         $buf = '';
 98 | 
 99 |         do {
100 |             $c = $this->_getChar();
101 | 
102 |             switch ($c) {
103 |                 case self::EOF:
104 |                     switch ($this->_state) {
105 |                         case self::STATE_DEFAULT:
106 |                             if (strlen($buf)) {
107 |                                 return $this->_setToken(self::TYPE_TEXT, $buf);
108 |                             }
109 |                             break;
110 | 
111 |                         case self::STATE_BSLASH:
112 |                             break;
113 | 
114 |                         case self::STATE_CONTROL:
115 |                             return $this->_setToken(self::TYPE_CWORD, $buf);
116 | 
117 |                         case self::STATE_SPACE:
118 |                             // ignore trailing spaces
119 |                             break;
120 |                     }
121 |                     break;
122 | 
123 |                 case "\\":
124 |                     switch ($this->_state) {
125 |                         case self::STATE_DEFAULT:
126 |                             // if there is something in the buffer return it
127 |                             // before switching state
128 |                             if (strlen($buf)) {
129 |                                 $this->_ungetChar();
130 |                                 return $this->_setToken(self::TYPE_TEXT, $buf);
131 |                             }
132 |                             $this->_state = self::STATE_BSLASH;
133 |                             $buf = "\\";
134 |                             $this->storeTokenPosition();
135 |                             break;
136 | 
137 |                         case self::STATE_BSLASH:
138 |                             return $this->_setToken(self::TYPE_CSYMBOL, '\\\\');
139 | 
140 |                         case self::STATE_CONTROL:
141 |                             // end of command, unget char, return buffer
142 |                             $this->_ungetChar();
143 |                             return $this->_setToken(self::TYPE_CWORD, $buf);
144 | 
145 |                         case self::STATE_SPACE:
146 |                             $this->_ungetChar();
147 |                             return $this->_setSpaceToken($buf);
148 |                     }
149 |                     break;
150 | 
151 |                 case ' ':
152 |                 case "\n":
153 |                     switch ($this->_state) {
154 |                         case self::STATE_DEFAULT:
155 |                             if (strlen($buf)) {
156 |                                 $this->_ungetChar();
157 |                                 return $this->_setToken(self::TYPE_TEXT, $buf);
158 |                             }
159 |                             $this->_state = self::STATE_SPACE;
160 |                             $buf = $c;
161 |                             $this->storeTokenPosition();
162 |                             if ($c === "\n") {
163 |                                 $this->_line++;
164 |                                 $this->_column = 0;
165 |                             }
166 |                             break;
167 | 
168 |                         case self::STATE_BSLASH:
169 |                             $this->storeTokenPosition();
170 |                             // if space then return control symbol, otherwise
171 |                             // switch to default state and unget this char to
172 |                             // be handler later (ignore this backslash)
173 |                             if ($c === ' ') {
174 |                                 return $this->_setToken(self::TYPE_CSYMBOL, '\\ ');
175 |                             }
176 |                             $this->_state = self::STATE_DEFAULT;
177 |                             $this->_ungetChar();
178 |                             break;
179 | 
180 |                         case self::STATE_CONTROL:
181 |                             // end of control word
182 |                             $this->_ungetChar();
183 |                             return $this->_setToken(self::TYPE_CWORD, $buf);
184 | 
185 |                         case self::STATE_SPACE:
186 |                             $buf .= $c;
187 |                             if ($c === "\n") {
188 |                                 $this->_line++;
189 |                                 $this->_column = 0;
190 |                             }
191 |                             break;
192 |                     }
193 |                     break;
194 | 
195 |                 case '%':
196 |                     switch ($this->_state) {
197 |                         case self::STATE_DEFAULT:
198 |                             // there may be something in buffer, if so, return
199 |                             // it before returning this token
200 |                             if (strlen($buf)) {
201 |                                 $this->_ungetChar();
202 |                                 return $this->_setToken(self::TYPE_TEXT, $buf);
203 |                             }
204 | 
205 |                             // http://en.wikibooks.org/wiki/LaTeX/Basics#Comments:
206 |                             // "When LaTeX encounters a % character while processing an input file, it
207 |                             // ignores the rest of the current line, the line break, and all whitespace
208 |                             // [newline excluded!] at the beginning of the next line."
209 |                             // This behavior can be illustrated by the following example:
210 |                             //   A% comment
211 |                             //       B
212 |                             // will be rendered as:
213 |                             //   AB
214 |                             // whereas:
215 |                             //   A% comment
216 |                             //
217 |                             //       B
218 |                             // as:
219 |                             //   A
220 |                             //   B
221 |                             // Comment-terminating newline and newline occurring after it
222 |                             // (intermediate spaces are ignored) are interpreted as \par command.
223 | 
224 |                             $this->storeTokenPosition();
225 | 
226 |                             return $this->_setToken(self::TYPE_SPECIAL, '%');
227 | 
228 |                         case self::STATE_BSLASH:
229 |                             return $this->_setToken(self::TYPE_CSYMBOL, '\\%');
230 | 
231 |                         case self::STATE_CONTROL:
232 |                             // end of command name, unget char
233 |                             $this->_ungetChar();
234 |                             return $this->_setToken(self::TYPE_CWORD, $buf);
235 | 
236 |                         case self::STATE_SPACE:
237 |                             $this->_ungetChar();
238 |                             return $this->_setSpaceToken($buf);
239 |                     }
240 |                     break;
241 | 
242 |                 // The following characters play a special role in LaTeX and are called special printing
243 |                 // characters, or simply special characters.
244 |                 // # $ % & ~ _ ^ \ { }
245 |                 // http://www.personal.ceu.hu/tex/specchar.htm
246 |                 case '}':
247 |                 case '{':
248 |                 case '~':
249 |                 case '^':
250 |                 case '_':
251 |                 case '&':
252 |                 case '#':
253 |                 case '$':
254 |                 case '[': // square brackets are considered special symbols, as
255 |                 case ']': // they delimit optional arguments
256 |                     switch ($this->_state) {
257 |                         case self::STATE_DEFAULT:
258 |                             // there may be something in buffer, if so, return
259 |                             // it before returning this token
260 |                             if (strlen($buf)) {
261 |                                 $this->_ungetChar();
262 |                                 return $this->_setToken(self::TYPE_TEXT, $buf);
263 |                             }
264 |                             // unescaped special character
265 |                             $this->storeTokenPosition();
266 |                             return $this->_setToken(self::TYPE_SPECIAL, $c);
267 | 
268 |                         case self::STATE_BSLASH:
269 |                             // escaped special character
270 |                             return $this->_setToken(self::TYPE_CSYMBOL, '\\' . $c);
271 | 
272 |                         case self::STATE_CONTROL:
273 |                             // end of command name, unget char
274 |                             $this->_ungetChar();
275 |                             return $this->_setToken(self::TYPE_CWORD, $buf);
276 | 
277 |                         case self::STATE_SPACE:
278 |                             $this->_ungetChar();
279 |                             return $this->_setSpaceToken($buf);
280 |                     }
281 |                     break;
282 | 
283 |                 default:
284 |                     switch ($this->_state) {
285 |                         case self::STATE_DEFAULT:
286 |                             if ($buf === '') {
287 |                                 $this->storeTokenPosition();
288 |                             }
289 |                             $buf .= $c;
290 |                             break;
291 | 
292 |                         case self::STATE_BSLASH:
293 |                             if ($this->_isAlpha($c)) {
294 |                                 $this->_state = self::STATE_CONTROL;
295 |                                 $buf .= $c;
296 |                             } else {
297 |                                 // single non-letter -> control symbol, i.e., \^
298 |                                 return $this->_setToken(self::TYPE_CSYMBOL, "\\" . $c);
299 |                             }
300 |                             break;
301 | 
302 |                         case self::STATE_CONTROL:
303 |                             if ($this->_isAlpha($c)) {
304 |                                 $buf .= $c;
305 |                             } else {
306 |                                 // not a letter, unget last char, return buffer
307 |                                 $this->_ungetChar();
308 |                                 return $this->_setToken(self::TYPE_CWORD, $buf);
309 |                             }
310 |                             break;
311 | 
312 |                         case self::STATE_SPACE:
313 |                             $this->_ungetChar();
314 |                             return $this->_setSpaceToken($buf);
315 |                     }
316 |                     break;
317 |             }
318 |         } while ($c !== self::EOF);
319 | 
320 |         return false;
321 |     }
322 | 
323 |     /**
324 |      * @return string
325 |      */
326 |     protected function _getChar()
327 |     {
328 |         if ($this->_pos >= strlen($this->_str)) {
329 |             return self::EOF; // artificial symbol denoting end of input
330 |         }
331 | 
332 |         $c = substr($this->_str, $this->_pos, 1);
333 |         $this->_pos++;
334 | 
335 |         $this->_pcolumn = $this->_column;
336 |         $this->_pline = $this->_line;
337 | 
338 |         $this->_column++;
339 | 
340 |         return $c;
341 |     }
342 | 
343 |     protected function _ungetChar()
344 |     {
345 |         if ($this->_pline === null) {
346 |             throw new RuntimeException('Too many unget calls');
347 |         }
348 | 
349 |         --$this->_pos;
350 |         $this->_line = $this->_pline;
351 |         $this->_column = $this->_pcolumn;
352 | 
353 |         $this->_pline = null;
354 |         $this->_pcolumn = null;
355 |     }
356 | 
357 |     protected function storeTokenPosition()
358 |     {
359 |         $this->_tokenPosition = array('line' => $this->_line, 'column' => $this->_column);
360 |     }
361 | 
362 |     protected function _setToken($type, $value, $raw = null)
363 |     {
364 |         // printf("setToken(type = %s, value = %s, pos = %d)\n", $type, $value, $this->_pos);
365 |         $position = $this->_tokenPosition;
366 | 
367 |         $token = array(
368 |             'type' => $type,
369 |             'value' => $value,
370 |             'line' => $position ? $position['line'] : null,
371 |             'column' => $position ? $position['column'] : null,
372 |         );
373 |         if (isset($raw)) {
374 |             $token['raw'] = $raw; // raw whitespace value
375 |         }
376 |         $this->_state = self::STATE_DEFAULT;
377 |         return $this->_token = $token;
378 |     }
379 | 
380 |     /**
381 |      * Return token based on the contents of given whitespace string.
382 |      *
383 |      * Consume all whitespaces, if among them more than one LF is found,
384 |      * return \par, otherwise append a single space to the buffer.
385 |      * This is equivalent to the following text transformations:
386 |      * 1. merge spaces into adjacent newlines
387 |      * 2. merge multiple newlines into \par
388 |      * 3. replace single newline with a space
389 |      *
390 |      * \par is equivalent to: #[ \t]*\n[ \t]*\n[ \t\n]*#
391 |      *
392 |      * @param  string $value
393 |      * @return array
394 |      */
395 |     protected function _setSpaceToken($value)
396 |     {
397 |         if (!ctype_space($value)) {
398 |             throw new InvalidArgumentException('Whitespace value expected');
399 |         }
400 | 
401 |         if (substr_count($value, "\n") > 1) {
402 |             return $this->_setToken(self::TYPE_CWORD, '\\par', $value);
403 |         }
404 | 
405 |         return $this->_setToken(self::TYPE_SPACE, ' ', $value);
406 |     }
407 | 
408 |     /**
409 |      * Locale independent check if string is non-empty and consists of
410 |      * ASCII letters A-Za-z only.
411 |      *
412 |      * @param  string $str
413 |      * @return bool
414 |      */
415 |     protected function _isAlpha($str)
416 |     {
417 |         // ctype_alpha() is locale dependent so can't be used here
418 |         if (0 < ($len = strlen($str))) {
419 |             for ($i = 0; $i < $len; ++$i) {
420 |                 $c = substr($str, $i, 1);
421 |                 if (($c < 'a' || 'z' < $c) && ($c < 'A' || 'Z' < $c)) {
422 |                     return false;
423 |                 }
424 |             }
425 |             return true;
426 |         }
427 |         return false;
428 |     }
429 | }
430 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Node.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * Class representing AST node of a parsed document.
  5 |  */
  6 | class PhpLatex_Node
  7 | {
  8 |     protected $_type;
  9 |     protected $_props;
 10 |     protected $_children = array();
 11 | 
 12 |     /**
 13 |      * @param mixed $type
 14 |      * @param array $props
 15 |      */
 16 |     public function __construct($type, array $props = null)
 17 |     {
 18 |         $this->_type = $type;
 19 | 
 20 |         // _props and _children properties are lazily-initialized
 21 |         // on first write
 22 | 
 23 |         if (null !== $props) {
 24 |             $this->setProps($props);
 25 |         }
 26 |     }
 27 | 
 28 |     /**
 29 |      * @return mixed
 30 |      */
 31 |     public function getType()
 32 |     {
 33 |         return $this->_type;
 34 |     }
 35 | 
 36 |     /**
 37 |      * @return PhpLatex_Node
 38 |      */
 39 |     public function addChild(PhpLatex_Node $node)
 40 |     {
 41 |         return $this->appendChild($node);
 42 |     }
 43 | 
 44 |     public function appendChild(PhpLatex_Node $child)
 45 |     {
 46 |         $this->_children[] = $child;
 47 |         return $this;
 48 |     }
 49 | 
 50 |     public function appendTo(PhpLatex_Node $parent)
 51 |     {
 52 |         $parent->appendChild($this);
 53 |         return $this;
 54 |     }
 55 | 
 56 |     /**
 57 |      * Retrieves the child node corresponding to the specified index.
 58 |      *
 59 |      * @param  int $index   The zero-based index of the child
 60 |      * @return PhpLatex_Node
 61 |      */
 62 |     public function getChild($index)
 63 |     {
 64 |         return isset($this->_children[$index]) ? $this->_children[$index] : null;
 65 |     }
 66 | 
 67 |     /**
 68 |      * @return array
 69 |      */
 70 |     public function getChildren()
 71 |     {
 72 |         return $this->_children;
 73 |     }
 74 | 
 75 |     /**
 76 |      * @return bool
 77 |      */
 78 |     public function hasChildren()
 79 |     {
 80 |         return (bool) count($this->_children);
 81 |     }
 82 | 
 83 |     /**
 84 |      * @return PhpLatex_Node
 85 |      */
 86 |     public function setProps(array $props)
 87 |     {
 88 |         foreach ($props as $key => $value) {
 89 |             $this->setProp($key, $value);
 90 |         }
 91 |         return $this;
 92 |     }
 93 | 
 94 |     /**
 95 |      * @return array
 96 |      */
 97 |     public function getProps()
 98 |     {
 99 |         return (array) $this->_props;
100 |     }
101 | 
102 |     /**
103 |      * @param  string $key
104 |      * @param  mixed $value
105 |      * @return PhpLatex_Node
106 |      */
107 |     public function setProp($key, $value)
108 |     {
109 |         if (null === $value) {
110 |             // unsetting an unexistant element from an array does not trigger
111 |             // "Undefined variable" notice, see:
112 |             // http://us.php.net/manual/en/function.unset.php#77310
113 |             unset($this->_props[$key]);
114 |         } else {
115 |             $this->_props[$key] = $value;
116 |         }
117 |         return $this;
118 |     }
119 | 
120 |     /**
121 |      * @param  string $key
122 |      * @return mixed
123 |      */
124 |     public function getProp($key)
125 |     {
126 |         return isset($this->_props[$key]) ? $this->_props[$key] : null;
127 |     }
128 | 
129 |     public function __set($key, $value)
130 |     {
131 |         $this->setProp($key, $value);
132 |     }
133 | 
134 |     public function __get($key)
135 |     {
136 |         return $this->getProp($key);
137 |     }
138 | 
139 |     public function __isset($key)
140 |     {
141 |         return $this->getProp($key) !== null;
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Parser.php:
--------------------------------------------------------------------------------
   1 | <?php
   2 | 
   3 | /**
   4 |  * This parser attempts to create LaTeX document tree, which by stringified
   5 |  * creates semantically correct and valid LaTeX document.
   6 |  */
   7 | class PhpLatex_Parser
   8 | {
   9 |     // Supported grammar for top-down parser
  10 |     //
  11 |     // <doc> ::= <exprList>
  12 |     // <exprList> ::= <expr> <exprList> | <empty>
  13 |     // <expr> ::= <command> | '{' <exprList> '}' | <word>
  14 |     //
  15 |     // <command>        \\[a-zA-Z]+ | \\[^a-zA-Z]
  16 |     // <word>           not a command
  17 | 
  18 |     const MODE_MATH = 2;
  19 |     const MODE_TEXT = 1;
  20 |     const MODE_BOTH = 3;
  21 | 
  22 |     const STATE_TEXT    = 1;
  23 |     const STATE_MATH    = 2;
  24 |     const STATE_ARG     = 4;
  25 |     const STATE_OPT_ARG = 8;
  26 | 
  27 |     const TYPE_DOCUMENT = 'document';
  28 |     const TYPE_TEXT     = 'text';
  29 |     const TYPE_MATH     = 'math';
  30 |     const TYPE_GROUP    = 'group';
  31 |     const TYPE_SPECIAL  = 'special';
  32 |     const TYPE_COMMAND  = 'command';
  33 |     const TYPE_ENVIRON  = 'environ';
  34 |     const TYPE_VERBATIM = 'verbatim';
  35 | 
  36 |     protected $_lexer;
  37 |     protected $_verbatims;
  38 | 
  39 |     /**
  40 |      * Environments specification.
  41 |      *
  42 |      * Supported keys:
  43 |      *      int mode            null    - mode in which this environ may be
  44 |      *                                    present, one of MODE_ flag constants
  45 |      *      bool verbatim       false   - is this environ verbatim
  46 |      *      bool math           false   - does this environ start math mode?
  47 |      *      string[] environs   array() - list of environments this environ may
  48 |      *                                    occur inside, if not given or empty
  49 |      *                                    this environ cannot be nested inside
  50 |      *                                    other environments
  51 |      *      int args            0       - number of arguments this environment
  52 |      *                                    requires
  53 |      *
  54 |      * @var array
  55 |      */
  56 |     protected $_environs = array();
  57 |     protected $_commands = array();
  58 | 
  59 |     protected $_skipUndefinedCommands = true;
  60 |     protected $_skipUndefinedEnvirons = true;
  61 | 
  62 |     protected $refs = array();
  63 | 
  64 |     public function __construct()
  65 |     {
  66 |         $this->addCommands(require dirname(__FILE__) . '/commands.php');
  67 |         $this->_environs = require dirname(__FILE__) . '/environs.php';
  68 |     }
  69 | 
  70 |     /**
  71 |      * @param string $name
  72 |      * @param array $options
  73 |      * @return $this
  74 |      */
  75 |     public function addCommand($name, array $options) // {{{
  76 |     {
  77 |         if (!preg_match('/^\\\\([a-zA-Z]+|[^a-zA-Z])$/', $name)) {
  78 |             throw new InvalidArgumentException(sprintf('Invalid command name: "%s"', $name));
  79 |         }
  80 | 
  81 |         if (isset($options['mode'])) {
  82 |             $mode = $options['mode'];
  83 |             switch ($mode) {
  84 |                 case 'both':
  85 |                     $mode = self::MODE_BOTH;
  86 |                     break;
  87 | 
  88 |                 case 'math':
  89 |                     $mode = self::MODE_MATH;
  90 |                     break;
  91 | 
  92 |                 case 'text':
  93 |                     $mode = self::MODE_TEXT;
  94 |                     break;
  95 | 
  96 |                 default:
  97 |                     $mode = intval($mode);
  98 |                     break;
  99 |             }
 100 |         } else {
 101 |             $mode = self::MODE_BOTH;
 102 |         }
 103 | 
 104 |         $this->_commands[$name] = array(
 105 |             'mode' => $mode,
 106 |             'numArgs' => isset($options['numArgs']) ? intval($options['numArgs']) : 0,
 107 |             'numOptArgs' => isset($options['numOptArgs']) ? intval($options['numOptArgs']) : 0,
 108 |             'parseArgs' => !isset($options['parseArgs']) || $options['parseArgs'], // parse by default
 109 |             'starred' => isset($options['starred']) ? $options['starred'] : false,
 110 |         );
 111 |         return $this;
 112 |     } // }}}
 113 | 
 114 |     public function addCommands(array $commands) // {{{
 115 |     {
 116 |         foreach ($commands as $name => $spec) {
 117 |             $this->addCommand($name, (array) $spec);
 118 |         }
 119 |         return $this;
 120 |     } // }}}
 121 | 
 122 |     protected function _getRandomString($length)
 123 |     {
 124 |         $chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 125 |         $max = strlen($chars) - 1;
 126 |         $string = '';
 127 |         while (strlen($string) < $length) {
 128 |             $string .= substr($chars, mt_rand(0, $max), 1);
 129 |         }
 130 |         return $string;
 131 |     }
 132 | 
 133 |     public function _readVerbatim($match)
 134 |     {
 135 |         do {
 136 |             $id = $this->_getRandomString(8);
 137 |         } while (isset($this->_verbatims[$id]));
 138 |         $this->_verbatims[$id] = array(
 139 |             'name' => $match['name'],
 140 |             'content' => $match['content'],
 141 |         );
 142 |         return "\\verbatim" . $id . " ";
 143 |     }
 144 | 
 145 |     protected $_token;
 146 |     protected $_tokenQueue = array();
 147 | 
 148 |     /**
 149 |      * Read next token from lexer.
 150 |      */
 151 |     protected function _next() // {{{
 152 |     {
 153 |         if (empty($this->_tokenQueue)) {
 154 |             // token queue is empty, get next token from lexer and return it
 155 |             // without modyfying the queue
 156 |             $token = $this->_lexer->next();
 157 |         } else {
 158 |             $token = array_shift($this->_tokenQueue);
 159 |         }
 160 |         $this->_token = $token;
 161 |         return $this->_token;
 162 |     } // }}}
 163 | 
 164 |     /**
 165 |      * Peek at the next token.
 166 |      */
 167 |     protected function _peek() // {{{
 168 |     {
 169 |         if (empty($this->_tokenQueue)) {
 170 |             $next = $this->_lexer->next();
 171 |             if ($next) {
 172 |                 $this->_tokenQueue[] = $next;
 173 |             } else {
 174 |                 // no more tokens
 175 |                 return false;
 176 |             }
 177 |         }
 178 |         return $this->_tokenQueue[0];
 179 |     } // }}}
 180 | 
 181 |     /**
 182 |      * Get current token.
 183 |      */
 184 |     protected function _current() // {{{
 185 |     {
 186 |         return $this->_token;
 187 |     } // }}}
 188 | 
 189 |     /**
 190 |      * Put token so that it will be loaded in the next call of _next()
 191 |      */
 192 |     protected function _unget(array $token) // {{{
 193 |     {
 194 |         array_unshift($this->_tokenQueue, $token);
 195 |         return $this;
 196 |     } // }}}
 197 | 
 198 |     public function parse($str)
 199 |     {
 200 |         $this->_verbatims = array();
 201 | 
 202 |         // smart comments: when a digit precedes a percent sign it is not
 203 |         // considered as start of comment
 204 |         $str = preg_replace('/([0-9])%/', '\1\\%', $str);
 205 | 
 206 |         // echo mb_strlen(preg_replace('/[^\d\p{L}]/u', '', $str)) / 5;
 207 | 
 208 |         // transform string for better tokenization
 209 |         // extract verbatims to ensure their whitespaces remain unchanged
 210 |         // * (greedy), *? (lazy)
 211 | 
 212 |         foreach ($this->_environs as $name => $spec) {
 213 |             if (!isset($spec['verbatim']) || !$spec['verbatim']) {
 214 |                 continue;
 215 |             }
 216 | 
 217 |             // prepare name for regex
 218 |             $name = preg_quote($name, '/');
 219 | 
 220 |             // if environment has starred version, add match for optional star
 221 |             if (isset($spec['starred']) && $spec['starred']) {
 222 |                 $name .= '\\*?';
 223 |             }
 224 | 
 225 |             // negative lookbehind to make sure \begin is not escaped
 226 |             $rx = '/(?<!\\\\)\\\\begin\s*\{(?P<name>' . $name . ')\}(?P<content>(.|\s)*?)\\\\end\s*\{\1\}/';
 227 | 
 228 |             $str = preg_replace_callback($rx, array($this, '_readVerbatim'), $str);
 229 |         }
 230 | 
 231 |         $this->_lexer = new PhpLatex_Lexer($str);
 232 |         $root = new PhpLatex_Node(self::TYPE_DOCUMENT);
 233 |         $this->_parseExprList($root, null, self::MODE_TEXT);
 234 | 
 235 |         // scan parsed tree in infix mode, assign numberings and refs and labels
 236 | 
 237 |         return $root;
 238 |     }
 239 | 
 240 |     /**
 241 |      * @param  string $stopAtToken
 242 |      * @param  string $state
 243 |      * @return array
 244 |      */
 245 |     protected function _parseExprList(PhpLatex_Node $parent, $stopAtToken, $state, $environ = null) // {{{
 246 |     {
 247 |         $tree = array();
 248 |         while (false !== ($token = $this->_peek())) {
 249 |             if ($token['value'] === $stopAtToken) {
 250 |                 // consume terminating token
 251 |                 $this->_next();
 252 |                 break;
 253 |             }
 254 |             $node = $this->_parseExpr($state, $environ);
 255 |             if ($node) {
 256 |                 $parent->appendChild($node);
 257 |             }
 258 |         }
 259 |         return $tree;
 260 |     } // }}}
 261 | 
 262 |     protected function _parseExpr($state, $environ = null) // {{{
 263 |     {
 264 |         $token = $this->_next();
 265 |         if ($token) {
 266 |             switch ($token['type']) {
 267 |                 case PhpLatex_Lexer::TYPE_CSYMBOL:
 268 |                 case PhpLatex_Lexer::TYPE_CWORD:
 269 |                     return $this->_parseControl($token, $state, $environ);
 270 | 
 271 |                 case PhpLatex_Lexer::TYPE_SPECIAL:
 272 |                     return $this->_parseSpecial($token, $state, $environ);
 273 | 
 274 |                 case PhpLatex_Lexer::TYPE_SPACE:
 275 |                 case PhpLatex_Lexer::TYPE_TEXT:
 276 |                     return $this->_parseText($token, $state);
 277 | 
 278 |                 case PhpLatex_Lexer::TYPE_COMMENT:
 279 |                     $this->_skipSpacesAndComments();
 280 |                     break;
 281 | 
 282 |                 default:
 283 |                     break;
 284 |             }
 285 |         }
 286 | 
 287 |         return false;
 288 |     } // }}}
 289 | 
 290 |     /**
 291 |      * @param  string $type
 292 |      * @param  int $mode
 293 |      * @param  string $environ
 294 |      * @return PhpLatex_Node
 295 |      */
 296 |     protected function _createNode($type, $mode, $environ = null) // {{{
 297 |     {
 298 |         return new PhpLatex_Node($type, array(
 299 |             'mode' => intval($mode),
 300 |             'environ' => null === $environ ? null : strval($environ),
 301 |         ));
 302 |     } // }}}
 303 | 
 304 |     /**
 305 |      * @param string $name
 306 |      *     name of a tested environment
 307 |      * @param int $mode
 308 |      *     mode the tested environment is encountered in
 309 |      * @param string $environ
 310 |      *     OPTIONAL name of a parent environment
 311 |      * @return PhpLatex_Node
 312 |      * @throws Exception
 313 |      *     when environment is encountered in invalid mode or
 314 |      *     when environment can't be nested within the parent environment
 315 |      */
 316 |     protected function _createEnviron($name, $mode, $environ = null) // {{{
 317 |     {
 318 |         assert(($mode & ($mode - 1)) === 0); // mode must be a power of 2
 319 | 
 320 |         $math = false;
 321 |         $args = array();
 322 | 
 323 |         if (isset($this->_environs[$name])) {
 324 |             $spec = $this->_environs[$name];
 325 | 
 326 |             // if mode specification is present, check if it matches
 327 |             // given mode flag
 328 |             if (isset($spec['mode']) && !($spec['mode'] & $mode)) {
 329 |                 throw new Exception('Environment in invalid mode');
 330 |             }
 331 | 
 332 |             // if parent environ and environs spec for environ of given name
 333 |             // are given, check if the parent environ is a valid container
 334 |             if (null !== $environ &&
 335 |                 (empty($spec['environs']) ||
 336 |                     !in_array($environ, (array) $spec['environs'], true))
 337 |             ) {
 338 |                 throw new Exception('Environment ' . $name . ' cannot be nested in ' . $environ . ' environment');
 339 |             }
 340 | 
 341 |             // check if this environ is an alias for a math mode (i.e. math
 342 |             // or displaymath), if so, prepare math node instead of environ node
 343 |             $math = isset($spec['math']) && $spec['math'];
 344 | 
 345 |             // parse args, will be placed as environs first children, with
 346 |             // no spaces between them, btw: \begin{tabular}c is a perfectly
 347 |             // correct specification for a single-column table.
 348 |             $nargs = isset($spec['numArgs']) ? intval($spec['numArgs']) : 0;
 349 |             while (count($args) < $nargs) {
 350 |                 if (false === ($arg = $this->_parseArg($mode, $environ))) {
 351 |                     $arg = $this->_createNode(self::TYPE_GROUP, $mode);
 352 |                 }
 353 |                 $arg->setProp('arg', true);
 354 |                 $args[] = $arg;
 355 |             }
 356 |         } elseif ($this->_skipUndefinedEnvirons) {
 357 |             throw new Exception(sprintf('Environment %s undefined', $name));
 358 |         }
 359 | 
 360 |         $node = $this->_createNode(self::TYPE_ENVIRON, $mode, $environ);
 361 |         $node->value = $name;
 362 | 
 363 |         if ($math) {
 364 |             $node->math = $math;
 365 |         }
 366 | 
 367 |         foreach ($args as $arg) {
 368 |             $node->appendChild($arg);
 369 |         }
 370 | 
 371 |         return $node;
 372 |     } // }}}
 373 | 
 374 |     /**
 375 |      * @throw Exception if mode is different than MODE_TEXT and math delimiters
 376 |      *                  are found
 377 |      */
 378 |     protected function _tryParseMathControl($token, $mode, $environ = null) // {{{
 379 |     {
 380 |         // if in text mode try first to parse math
 381 |         // predefined delimiters: left, right, inline
 382 |         $mathControls = array(
 383 |             array('\\(', '\\)', true),
 384 |             array('\\[', '\\]', false),
 385 |         );
 386 |         foreach ($mathControls as $pair) {
 387 |             if ($token['value'] === $pair[0]) {
 388 |                 if ($mode === self::MODE_TEXT) {
 389 |                     $node = $this->_createNode(self::TYPE_MATH, $mode, $environ);
 390 |                     $node->inline = $pair[2];
 391 | 
 392 |                     $this->_parseExprList($node, $pair[1], self::MODE_MATH, $environ);
 393 | 
 394 |                     return $node;
 395 |                 } else {
 396 |                     // math delimiter detected in invalid mode, stop processing
 397 |                     // ! LaTeX Error: Bad math environment delimiter.
 398 |                     throw new Exception('Math delimiter in invalid mode');
 399 |                 }
 400 |             }
 401 |         }
 402 | 
 403 |         // no math found
 404 |         return false;
 405 |     } // }}}
 406 | 
 407 |     /**
 408 |      * Parse verbatim placeholder.
 409 |      *
 410 |      * @param  array $token
 411 |      * @param  int $mode
 412 |      * @param  string $environ OPTIONAL
 413 |      * @return PhpLatex_Node
 414 |      * @throws Exception
 415 |      */
 416 |     protected function _tryParseVerbatimControl($token, $mode, $environ = null) // {{{
 417 |     {
 418 |         $value = $token['value'];
 419 | 
 420 |         if (!strncmp($value, '\\verbatim', 9)) {
 421 |             // \verbatim prefix matched, check if this is indeed a placeholder
 422 |             $id = substr($value, 9);
 423 |             if (isset($this->_verbatims[$id])) {
 424 |                 $name = $this->_verbatims[$id]['name'];
 425 |                 $node = $this->_createEnviron($name, $mode, $environ);
 426 | 
 427 |                 $verb = $this->_createNode(self::TYPE_VERBATIM, $mode, $name);
 428 |                 $verb->value = $this->_verbatims[$id]['content'];
 429 | 
 430 |                 $node->addChild($verb);
 431 | 
 432 |                 return $node;
 433 |             }
 434 |         }
 435 | 
 436 |         return false;
 437 |     } // }}}
 438 | 
 439 |     /**
 440 |      * Parse control sequence
 441 |      * @return false|PhpLatex_Node
 442 |      */
 443 |     protected function _parseControl($token, $mode, $environ = null) // {{{
 444 |     {
 445 |         $value = $token['value'];
 446 | 
 447 |         try {
 448 |             $node = $this->_tryParseMathControl($token, $mode, $environ);
 449 |             if ($node) {
 450 |                 return $node;
 451 |             }
 452 |         } catch (Exception $e) {
 453 |             return false;
 454 |         }
 455 | 
 456 |         try {
 457 |             $node = $this->_tryParseVerbatimControl($token, $mode, $environ);
 458 |             if ($node) {
 459 |                 return $node;
 460 |             }
 461 |         } catch (Exception $e) {
 462 |             return false;
 463 |         }
 464 | 
 465 |         switch ($value) {
 466 |             case '\\begin':
 467 |                 if (false !== ($name = $this->_parseEnvName())) {
 468 |                     try {
 469 |                         $node = $this->_createEnviron($name, $mode, $environ);
 470 | 
 471 |                         if ($node->math) {
 472 |                             $this->_parseExprList($node, '\\end', self::MODE_MATH, $environ);
 473 |                         } else {
 474 |                             $this->_parseExprList($node, '\\end', $mode, $name);
 475 |                         }
 476 | 
 477 |                         // consume environment name, don't care if this succeeds
 478 |                         // or not
 479 |                         $this->_parseEnvName();
 480 | 
 481 |                         return $node;
 482 | 
 483 |                     } catch (Exception $e) {
 484 |                         // environ in invalid mode or invalid environ nesting
 485 |                     }
 486 |                 }
 487 |                 return false;
 488 | 
 489 |             case '\\end':
 490 |                 // \end with no \begin, skip environ name
 491 |                 $this->_parseEnvName();
 492 |                 return false;
 493 | 
 494 |             case '\\]':
 495 |             case '\\)':
 496 |                 // unmatched math delimiter, skip
 497 |                 return false;
 498 | 
 499 |             case '\\left':
 500 |             case '\\right':
 501 |                 return $this->_parseLeftRight($token, $mode, $environ);
 502 |         }
 503 | 
 504 |         // skip space after control word (before parsing arguments)
 505 |         //
 506 |         // "When a space comes after a control word (an all-letter control
 507 |         // sequence), it is ignored by TeX; i.e., it is not considered to be
 508 |         // a "real" space belonging to the manuscript that is being typeset.
 509 |         // But when a space comes after a control symbol, it's truly a space."
 510 |         //
 511 |         // Donald E. Knuth, "TeXbook", Chapter 3
 512 |         //
 513 |         // Skip all spaces and comments occurring after this token, if this
 514 |         // token is a control word.
 515 |         if ($token['type'] === PhpLatex_Lexer::TYPE_CWORD) {
 516 |             $this->_skipSpacesAndComments();
 517 |         }
 518 | 
 519 |         $mathWrapper = null;
 520 | 
 521 |         $nodeMode = $mode;
 522 |         $nodeArgs = array();
 523 |         $nodeOptArgs = array();
 524 |         $nodeStarred = false;
 525 | 
 526 |         // validate control sequence and parse arguments
 527 |         if (isset($this->_commands[$value])) {
 528 |             $spec = $this->_commands[$value];
 529 | 
 530 |             // check if this command requires an environment, if so, check
 531 |             // if current environment is among listed ones
 532 |             if (isset($spec['environs']) &&
 533 |                 !in_array($environ, (array) $spec['environs'], true)
 534 |             ) {
 535 |                 return false;
 536 |             }
 537 | 
 538 |             // check if command is used in proper mode
 539 |             if (isset($spec['mode']) && !($spec['mode'] & $mode)) {
 540 |                 // when math mode command is encountered in text mode, wrap it
 541 |                 // in inline math mode (never the other way around).
 542 |                 if ($spec['mode'] & self::MODE_MATH) {
 543 |                     // We're outside math mode here.
 544 |                     $nodeMode = self::MODE_MATH;
 545 |                     $mathWrapper = $this->_createNode(self::TYPE_MATH, $mode);
 546 |                     $mathWrapper->inline = true;
 547 |                 } else {
 548 |                     return false;
 549 |                 }
 550 |             }
 551 | 
 552 |             // check if this command can appear in a starred version, if so,
 553 |             // parse any the following asterisk token
 554 |             if ((isset($spec['starred']) && $spec['starred']) &&
 555 |                 ($next = $this->_peek()) &&
 556 |                 ($next['type'] === PhpLatex_Lexer::TYPE_TEXT) &&
 557 |                 (0 === strncmp($next['value'], '*', 1))
 558 |             ) {
 559 |                 $this->_next();
 560 |                 $nodeStarred = true;
 561 |                 // remove asterisk from the beginning of token value, no need
 562 |                 // to use mbstring functions
 563 |                 $next['value'] = substr($next['value'], 1);
 564 |                 if (strlen($next['value'])) {
 565 |                     $this->_unget($next);
 566 |                 }
 567 |             }
 568 | 
 569 |             // parse optional arguments
 570 |             $numOptArgs = isset($spec['numOptArgs']) ? intval($spec['numOptArgs']) : 0;
 571 |             $parseArgs = isset($spec['parseArgs']) ? $spec['parseArgs'] : true;
 572 | 
 573 |             while (count($nodeOptArgs) < $numOptArgs) {
 574 |                 if (false !== ($arg = $this->_parseOptArg($nodeMode, $environ, $parseArgs))) {
 575 |                     $nodeOptArgs[] = $arg;
 576 |                 } else {
 577 |                     break;
 578 |                 }
 579 |             }
 580 | 
 581 |             // parse arguments
 582 |             $numArgs = isset($spec['numArgs']) ? intval($spec['numArgs']) : 0;
 583 | 
 584 |             while (count($nodeArgs) < $numArgs) {
 585 |                 if (false === ($arg = $this->_parseArg($nodeMode, $environ, $parseArgs))) {
 586 |                     // no argument found, create an artificial one
 587 |                     $arg = $this->_createNode(self::TYPE_GROUP, $nodeMode);
 588 |                 }
 589 |                 $nodeArgs[] = $arg;
 590 |             }
 591 |         } elseif ($this->_skipUndefinedCommands) {
 592 |             return false;
 593 |         }
 594 | 
 595 |         $node = $this->_createNode(self::TYPE_COMMAND, $nodeMode, $environ);
 596 |         $node->value = $value;
 597 | 
 598 |         if ($token['type'] === PhpLatex_Lexer::TYPE_CSYMBOL) {
 599 |             $node->symbol = true; // control symbol
 600 |         }
 601 | 
 602 |         if ($nodeStarred) {
 603 |             $node->starred = $nodeStarred;
 604 |         }
 605 | 
 606 |         foreach ($nodeOptArgs as $arg) {
 607 |             $node->appendChild($arg);
 608 |         }
 609 | 
 610 |         foreach ($nodeArgs as $arg) {
 611 |             $node->appendChild($arg);
 612 |         }
 613 | 
 614 |         if ($mathWrapper) {
 615 |             $mathWrapper->appendChild($node);
 616 |             return $mathWrapper;
 617 |         }
 618 | 
 619 |         return $node;
 620 |     } // }}}
 621 | 
 622 |     /**
 623 |      * Skip spaces and comments starting from the current lexer position.
 624 |      *
 625 |      * After this function has run current token, if exists, is neither space
 626 |      * nor comment.
 627 |      */
 628 |     protected function _skipSpacesAndComments($inComment = false)
 629 |     {
 630 |         while ($next = $this->_peek()) {
 631 |             if ($inComment) {
 632 |                 if (isset($next['raw']) && strpos($next['raw'], "\n") !== false) {
 633 |                     $inComment = false;
 634 |                 } else {
 635 |                     $this->_next();
 636 |                 }
 637 |             } else {
 638 |                 if ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL && $next['value'] === '%') {
 639 |                     $inComment = true;
 640 |                     $this->_next();
 641 |                 } else if ($next['type'] === PhpLatex_Lexer::TYPE_SPACE) {
 642 |                     $this->_next();
 643 |                 } else {
 644 |                     break;
 645 |                 }
 646 |             }
 647 |         }
 648 |     }
 649 | 
 650 |     protected function _parseArg($mode, $environ, $parseArgs = true) // {{{
 651 |     {
 652 |         $this->_skipSpacesAndComments();
 653 | 
 654 |         if ($next = $this->_peek()) {
 655 |             switch ($next['type']) {
 656 |                 case PhpLatex_Lexer::TYPE_SPECIAL:
 657 |                     switch ($next['value']) {
 658 |                         case '{':
 659 |                             // if args are not to be parsed consume all contents up to the
 660 |                             // first encountered right curly bracket
 661 |                             if (!$parseArgs) {
 662 |                                 $group = $this->_createNode(self::TYPE_GROUP, $mode);
 663 |                                 $this->_next();
 664 |                                 $text = '';
 665 |                                 while ($next = $this->_peek()) {
 666 |                                     if ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL
 667 |                                         && $next['value'] === '}') {
 668 |                                         $this->_next();
 669 |                                         break;
 670 |                                     }
 671 | 
 672 |                                     $text .= $next['value'];
 673 |                                     $this->_next();
 674 |                                 }
 675 |                                 $node = $this->_createNode(self::TYPE_VERBATIM, $mode);
 676 |                                 $node->value = $text;
 677 |                                 $node->appendTo($group);
 678 | 
 679 |                                 return $group;
 680 |                             }
 681 | 
 682 |                             // found group
 683 |                             $this->_next();
 684 | 
 685 |                             $group = $this->_createNode(self::TYPE_GROUP, $mode);
 686 | 
 687 |                             // TODO stop at first encountered \par control
 688 |                             $this->_parseExprList($group, '}', $mode, $environ);
 689 | 
 690 |                             return $group;
 691 | 
 692 |                         case '[':
 693 |                         case ']':
 694 |                             // square brackets may be treated as text (they are returned as
 695 |                             // specials to make easier parsing of optional parameters).
 696 |                             // Encountered bracket, not enveloped in a pair of curly brackets
 697 |                             // forms a separate group.
 698 |                             $this->_next();
 699 | 
 700 |                             $group = $this->_createNode(self::TYPE_GROUP, $mode);
 701 | 
 702 |                             $node = $this->_createNode(self::TYPE_TEXT, $mode);
 703 |                             $node->value = $next['value'];
 704 |                             $node->appendTo($group);
 705 | 
 706 |                             return $group;
 707 | 
 708 |                         case '%':
 709 |                             // comment start
 710 |                             break;
 711 | 
 712 |                         default:
 713 |                             // other specials (~ ^ _ & # $) are silently ignored
 714 |                             break;
 715 |                     }
 716 |                     break;
 717 | 
 718 |                 case PhpLatex_Lexer::TYPE_TEXT:
 719 |                     // found text token, extract first character, leave the
 720 |                     // rest of its value for further processing
 721 |                     $this->_next();
 722 | 
 723 |                     $group = $this->_createNode(self::TYPE_GROUP, $mode);
 724 | 
 725 |                     $node = $this->_createNode(self::TYPE_TEXT, $mode);
 726 |                     $node->value = mb_substr($next['value'], 0, 1);
 727 |                     $node->appendTo($group);
 728 | 
 729 |                     $next['value'] = mb_substr($next['value'], 1);
 730 |                     if (mb_strlen($next['value'])) {
 731 |                         $this->_unget($next);
 732 |                     }
 733 | 
 734 |                     return $group;
 735 | 
 736 |                 case PhpLatex_Lexer::TYPE_CWORD:
 737 |                 case PhpLatex_Lexer::TYPE_CSYMBOL:
 738 |                     // found control sequence
 739 | 
 740 |                     if ($next['value'] === '\\par') {
 741 |                         // Runaway argument?
 742 |                         // ! Paragraph ended before command was complete.
 743 |                         return false;
 744 |                     }
 745 | 
 746 |                     $this->_next();
 747 | 
 748 |                     $group = $this->_createNode(self::TYPE_GROUP, $mode);
 749 | 
 750 |                     if (($node = $this->_parseControl($next, $mode, $environ))) {
 751 |                         $node->appendTo($group);
 752 |                     }
 753 | 
 754 |                     return $group;
 755 |             }
 756 |         }
 757 | 
 758 |         return false;
 759 |     } // }}}
 760 | 
 761 |     /**
 762 |      * Try and parse optional argument. Optional argument must be delimited
 763 |      * with square brackets, otherwise it is ignored.
 764 |      */
 765 |     protected function _parseOptArg($state, $environ) // {{{
 766 |     {
 767 |         $this->_skipSpacesAndComments();
 768 | 
 769 |         if (($next = $this->_peek()) &&
 770 |             ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL) &&
 771 |             ($next['value'] === '[')
 772 |         ) {
 773 |             $this->_next();
 774 | 
 775 |             $group = $this->_createNode(self::TYPE_GROUP, $state);
 776 |             $group->optional = true;
 777 | 
 778 |             // TODO stop at first encountered \par control
 779 |             $this->_parseExprList($group, ']', $state | self::STATE_OPT_ARG, $environ);
 780 | 
 781 |             return $group;
 782 |         }
 783 | 
 784 |         return false;
 785 |     } // }}}
 786 | 
 787 |     /**
 788 |      * This method will consume all valid tokens, first invalid token
 789 |      * encountered will be put back to lexer.
 790 |      *
 791 |      * @return string|false
 792 |      */
 793 |     protected function _parseEnvName() // {{{
 794 |     {
 795 |         // 1. Skip spaces and comments
 796 |         $this->_skipSpacesAndComments();
 797 | 
 798 |         while (false !== ($next = $this->_peek())) {
 799 |             if ($next['value'] !== '{') {
 800 |                 // 2A. first encountered non-space token is not a curly bracket
 801 |                 // Since start of group was expected, this token breaks opening
 802 |                 // of an environment. Give it back and report failure.
 803 |                 break;
 804 | 
 805 |             } else {
 806 |                 // 2B. first encountered non-space token is a curly bracket that
 807 |                 //     begins a group containing environment name, skip it
 808 |                 $this->_next();
 809 | 
 810 |                 // Names of environmens in LaTeX may contain any characters,
 811 |                 // any curly brackets must be matched.
 812 | 
 813 |                 $par = 1;   // unmatched curly brackets counter
 814 |                 $name = ''; // environment name
 815 | 
 816 |                 while (false !== ($next = $this->_next())) {
 817 |                     if ($next['value'] === '{') {
 818 |                         ++$par;
 819 |                     } elseif ($next['value'] === '}') {
 820 |                         --$par;
 821 |                         if (!$par) {
 822 |                             // last required right curly bracket
 823 |                             break;
 824 |                         }
 825 |                     }
 826 |                     $name .= $next['value'];
 827 |                 }
 828 |                 if (strlen($name)) {
 829 |                     return $name;
 830 |                 }
 831 |             }
 832 |         }
 833 | 
 834 |         // no valid environment name was found
 835 |         return false;
 836 |     } // }}}
 837 | 
 838 |     /**
 839 |      * Build text node starting from current token and by appending any
 840 |      * following text, space and square bracket tokens.
 841 |      *
 842 |      * @param array $token
 843 |      * @param int $mode
 844 |      * @return PhpLatex_Node
 845 |      */
 846 |     protected function _parseText($token, $mode)
 847 |     {
 848 |         $value = $token['value'];
 849 | 
 850 |         // concatenate output as long as next token is TEXT, SPACE or square
 851 |         // brackets
 852 |         while ($next = $this->_peek()) {
 853 |             if ($this->_isText($next, $mode)) {
 854 |                 $value .= $next['value'];
 855 |                 $this->_next();
 856 |             } else {
 857 |                 break;
 858 |             }
 859 |         }
 860 | 
 861 |         $node = $this->_createNode(self::TYPE_TEXT, $mode);
 862 |         $node->value = $value;
 863 |         return $node;
 864 |     }
 865 | 
 866 |     /**
 867 |      * @param array $token
 868 |      * @param int $state
 869 |      * @param string $environ
 870 |      */
 871 |     protected function _parseSpecial($token, $state, $environ) // {{{
 872 |     {
 873 |         $value = $token['value'];
 874 |         switch ($value) {
 875 |             case '{':
 876 |                 $node = $this->_createNode(self::TYPE_GROUP, $state);
 877 |                 $this->_parseExprList($node, '}', $state);
 878 |                 return $node;
 879 | 
 880 |             case '}':
 881 |                 // unmatched right curly bracket, skip
 882 |                 break;
 883 | 
 884 |             case '$':
 885 |                 if ($state & self::STATE_TEXT) {
 886 |                     if (($next = $this->_peek())) {
 887 |                         $node = new PhpLatex_Node(self::TYPE_MATH);
 888 |                         $node->mode = $state;
 889 |                         if ($next['value'] === '$') { // displaymath
 890 |                             $node->inline = false;
 891 |                             $this->_next(); // consume second dollar
 892 | 
 893 |                             // consume expressions up to first double dollars
 894 |                             // encountered
 895 |                             do {
 896 |                                 $this->_parseExprList($node, '$', self::MODE_MATH);
 897 |                                 $next = $this->_peek();
 898 |                                 if ($next && $next['value'] === '$') {
 899 |                                     // second terminating dollar found, consume
 900 |                                     // it and stop looping
 901 |                                     $this->_next();
 902 |                                     break;
 903 |                                 }
 904 |                             } while ($next);
 905 |                         } else {
 906 |                             $node->inline = true;
 907 |                             $this->_parseExprList($node, '$', self::MODE_MATH);
 908 |                         }
 909 | 
 910 |                         return $node;
 911 |                     }
 912 |                     // unterminated document (and math mode)
 913 |                 }
 914 |                 break;
 915 | 
 916 |             case '[':
 917 |             case ']':
 918 |                 // square brackets that are not part of optional arguments
 919 |                 // (those are handled when parsing control sequences)
 920 |                 while ($next = $this->_peek()) {
 921 |                     if ($this->_isText($next, $state)) {
 922 |                         $value .= $next['value'];
 923 |                         $this->_next();
 924 |                     } else {
 925 |                         break;
 926 |                     }
 927 |                 }
 928 | 
 929 |                 $node = $this->_createNode(self::TYPE_TEXT, $state);
 930 |                 $node->value = $value;
 931 |                 return $node;
 932 | 
 933 |             case '^':
 934 |             case '_':
 935 |                 // subscript and superscript, require math mode
 936 |                 if ((self::STATE_MATH & $state) && ($arg = $this->_parseArg($state, $environ))) {
 937 |                     $node = $this->_createNode(self::TYPE_SPECIAL, $state);
 938 |                     $node->value = $value;
 939 |                     $node->appendChild($arg);
 940 |                     return $node;
 941 |                 }
 942 |                 break;
 943 | 
 944 |             /** @noinspection PhpMissingBreakStatementInspection */
 945 |             case '&': // TODO may occur only in table
 946 |                 if (empty($environ)) {
 947 |                     // not in environment, escape it
 948 |                     $node = $this->_createNode(self::TYPE_COMMAND, $state);
 949 |                     $node->symbol = true; // control symbol \&
 950 |                     $node->value = '\\&';
 951 |                     return $node;
 952 |                 }
 953 |                 // otherwise fall through to get special
 954 | 
 955 |             case '~':
 956 |                 $node = $this->_createNode(self::TYPE_SPECIAL, $state);
 957 |                 $node->value = $value;
 958 |                 return $node;
 959 | 
 960 |             case '%':
 961 |                 $this->_skipSpacesAndComments(true);
 962 |                 break;
 963 | 
 964 |             case '#':
 965 |                 // currently not supported
 966 |                 break;
 967 |         }
 968 | 
 969 |         return false;
 970 |     } // }}}
 971 | 
 972 |     /**
 973 |      * @param array $token
 974 |      * @param int $mode
 975 |      * @param string|array $environs
 976 |      */
 977 |     protected function _parseLeftRight($token, $mode, $environs)
 978 |     {
 979 |         if ($mode !== self::MODE_MATH) {
 980 |             // wrap in math
 981 |             return false;
 982 |         }
 983 | 
 984 |         $environs = (array) $environs;
 985 | 
 986 |         $this->_skipSpacesAndComments();
 987 |         $next = $this->_peek();
 988 |         if (!$next) {
 989 |             return false;
 990 |         }
 991 | 
 992 |         $delimiter = '.';
 993 |         $validDelimiter = false;
 994 | 
 995 |         if ($next['type'] === PhpLatex_Lexer::TYPE_TEXT) {
 996 |             $validChars = array('.', '|', '/', '<', '>', '(', ')', '[', ']');
 997 |             $firstChar = mb_substr($next['value'], 0, 1);
 998 |             if (in_array($firstChar, $validChars)) {
 999 |                 $this->_next();
1000 |                 $validDelimiter = true;
1001 | 
1002 |                 $delimiter = $firstChar;
1003 |                 if (mb_strlen($next['value']) > 1) {
1004 |                     $next['value'] = mb_substr($next['value'], 1);
1005 |                     $this->_unget($next);
1006 |                 }
1007 |             }
1008 |         } elseif ($next['type'] === PhpLatex_Lexer::TYPE_CSYMBOL || $next['type'] === PhpLatex_Lexer::TYPE_CWORD) {
1009 |             // All controls from math-delimiters.tex
1010 |             $validSymbols = array(
1011 |                 '\backslash',
1012 |                 '\langle',
1013 |                 '\lceil',
1014 |                 '\lfloor',
1015 |                 '\rangle',
1016 |                 '\rceil',
1017 |                 '\rfloor',
1018 |                 '\{',
1019 |                 '\|',
1020 |                 '\}',
1021 |             );
1022 |             if (in_array($next['value'], $validSymbols)) {
1023 |                 $delimiter = $next['value'];
1024 |                 $validDelimiter = true;
1025 |                 $this->_next();
1026 |             }
1027 |         }
1028 | 
1029 |         if (!$validDelimiter) {
1030 |             // Invalid bracket command
1031 |             // LaTeX error:
1032 |             // I was expecting to see something like `(' or `\{' or
1033 |             // `\}' here. If you typed, e.g., `{' instead of `\{', you
1034 |             // should probably delete the `{' by typing `1' now, so that
1035 |             // braces don't get unbalanced.
1036 | 
1037 |             // Insert space before non-space character
1038 |             $this->_unget(array(
1039 |                 'type' => 'text',
1040 |                 'value' => ' ',
1041 |             ));
1042 |         }
1043 | 
1044 |         $node = $this->_createNode(self::TYPE_COMMAND, self::MODE_MATH);
1045 |         $node->value = $token['value'] . $delimiter;
1046 |         $node->noSpaceAfter = true;
1047 |         return $node;
1048 |     }
1049 | 
1050 |     /**
1051 |      * @param array $token
1052 |      * @return bool
1053 |      */
1054 |     protected function _isText($token, $state) // {{{
1055 |     {
1056 |         $type = $token['type'];
1057 | 
1058 |         return $type === PhpLatex_Lexer::TYPE_TEXT
1059 |             || $type === PhpLatex_Lexer::TYPE_SPACE
1060 |             || ($type === PhpLatex_Lexer::TYPE_SPECIAL
1061 |                 && ($token['value'] === '[' || (
1062 |                     // right square bracket is treated as special when
1063 |                     // encountered during parsing of optional arguments
1064 |                     $token['value'] === ']' && !($state & self::STATE_OPT_ARG)
1065 |                 ))
1066 |             );
1067 |     } // }}}
1068 | }
1069 | 


--------------------------------------------------------------------------------
/library/PhpLatex/PdfLatex.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | // TODO add options, such as changing PDF version in xelatex -output-driver="xdvipdfmx -V4"
  4 | class PhpLatex_PdfLatex
  5 | {
  6 |     const TEXMFHOME = 'TEXMFHOME';
  7 | 
  8 |     /**
  9 |      * @var string
 10 |      */
 11 |     protected $_texmfhome;
 12 | 
 13 |     /**
 14 |      * @var string
 15 |      */
 16 |     protected $_buildDir;
 17 | 
 18 |     protected $_log;
 19 | 
 20 |     /**
 21 |      * @var array
 22 |      */
 23 |     protected $_compiler;
 24 | 
 25 |     /**
 26 |      * @return string
 27 |      */
 28 |     public function getPdflatexBinary()
 29 |     {
 30 |         // lstat(./pdflatex) failed ...
 31 |         // ./pdflatex: No such file or directory
 32 |         // pdflatex: ../../../texk/kpathsea/progname.c:316: remove_dots: Assertion `ret' failed.
 33 |         // Aborted
 34 | 
 35 |         // Solution: Use the full path to the pdflatex binary
 36 |         if ($this->_compiler === null) {
 37 |             $this->setPdflatexBinary($this->findPdflatexBinary());
 38 |         }
 39 |         return $this->_compiler['path'];
 40 |     }
 41 | 
 42 |     public function setPdflatexBinary($path)
 43 |     {
 44 |         // Can't use file_exists() / is_executable(), because if open_basedir ini setting is in
 45 |         // effect, file won't be reported as existing/executable, but the binary itself can still
 46 |         // exist outside the open_basedir, and be executable.
 47 |         exec(escapeshellarg($path) . ' -version 2>&1', $output, $error);
 48 | 
 49 |         if ($error) {
 50 |             throw new InvalidArgumentException('Unable to execute pdflatex binary: ' . $path);
 51 |         }
 52 | 
 53 |         $compiler = $this->_parseCompilerInfo($output[0]);
 54 |         if (!$compiler) {
 55 |             throw new InvalidArgumentException('Unrecognized pdflatex -version output');
 56 |         }
 57 | 
 58 |         $this->_compiler = array(
 59 |             'path' => $path, // open_basedir may be in effect, don't use realpath()
 60 |             'engine' => $compiler['engine'],
 61 |             'version' => $compiler['version'],
 62 |         );
 63 | 
 64 |         return $this;
 65 |     }
 66 | 
 67 |     /**
 68 |      * @param string $version
 69 |      * @internal This function is not part of the public api.
 70 |      */
 71 |     public function _parseCompilerInfo($version)
 72 |     {
 73 |         if (preg_match("/(?P<engine>\S*?TeX) (?P<version>\d[^\n]+)/i", $version, $match)) {
 74 |             return array('engine' => $match['engine'], 'version' => $match['version']);
 75 |         }
 76 |         if (preg_match("/(?P<engine>\S*?TeX), Version (?P<version>[^\n]+)/i", $version, $match)) {
 77 |             return array('engine' => $match['engine'], 'version' => $match['version']);
 78 |         }
 79 |         return false;
 80 |     }
 81 | 
 82 |     public function findPdflatexBinary()
 83 |     {
 84 |         $files = array('pdflatex');
 85 | 
 86 |         $path = getenv('PATH');
 87 |         $dirs = explode(PATH_SEPARATOR, $path);
 88 |         array_unshift($dirs, getcwd());
 89 | 
 90 |         // WIN32 WINNT Windows CYGWIN_NT-5.1
 91 |         $isWindows = stripos(PHP_OS, 'WIN') === 0 || stripos(PHP_OS, 'CYGWIN') === 0;
 92 | 
 93 |         foreach ($files as $file) {
 94 |             if ($isWindows) {
 95 |                 $file .= '.exe';
 96 |             }
 97 | 
 98 |             foreach ($dirs as $dir) {
 99 |                 $path = $dir . DIRECTORY_SEPARATOR . $file;
100 |                 if (file_exists($path) && is_executable($path)) {
101 |                     return $path;
102 |                 }
103 |             }
104 |         }
105 | 
106 |         throw new Exception('Unable to locate pdflatex binary');
107 |     }
108 | 
109 |     public function setBuildDir($path)
110 |     {
111 |         if (!is_dir($path)) {
112 |             throw new InvalidArgumentException('Path is not a directory: ' . $path);
113 |         }
114 |         if (!is_writable($path)) {
115 |             throw new InvalidArgumentException('Path is not writable: ' . $path);
116 |         }
117 |         $this->_buildDir = rtrim(realpath($path), '/') . '/';
118 |         return $this;
119 |     }
120 | 
121 |     public function getBuildDir()
122 |     {
123 |         if (empty($this->_buildDir)) {
124 |             $this->setBuildDir(sys_get_temp_dir());
125 |         }
126 |         return $this->_buildDir;
127 |     }
128 | 
129 |     public function compile($file, array $files = null)
130 |     {
131 |         $this->_log = null;
132 | 
133 |         $cwd = getcwd();
134 |         $dir = dirname($file);
135 | 
136 |         foreach ((array) $files as $path) {
137 |             // TODO handle Windows
138 |             if (!is_file($dir . '/' . basename($path))) {
139 |                 if (!@symlink($path, $dir . '/' . basename($path))) {
140 |                     copy($path, $dir . '/' . basename($path));
141 |                 }
142 |             }
143 |         }
144 | 
145 |         $pdflatex = $this->getPdflatexBinary();
146 | 
147 |         $texmfhome = getenv(self::TEXMFHOME);
148 |         $this->_setEnv(self::TEXMFHOME, $this->_texmfhome);
149 | 
150 |         chdir($dir);
151 |         $cmd = "$pdflatex -interaction nonstopmode -halt-on-error -file-line-error $file";
152 |         $log = `$cmd`;
153 |         `$cmd 2>&1`;
154 |         chdir($cwd);
155 | 
156 |         $this->_setEnv(self::TEXMFHOME, $texmfhome);
157 | 
158 |         // process log so that paths are not given away
159 |         $log = str_replace(array("\r\n", "\r"), "\n", $log);
160 |         $log = str_replace(array(
161 |             $dir . '/',
162 |             wordwrap('(' . $dir . '/', 79, "\n", true),
163 |             wordwrap($dir . '/', 79, "\n", true),
164 |         ), array('', '('), $log);
165 | 
166 |         $this->_log = __CLASS__ . ' ' . $file . "\n\n" . $log;
167 | 
168 |         $output = sprintf('%s/%s.pdf', $dir, basename($file, '.tex'));
169 | 
170 |         // if document body is empty a 0-length file is generated
171 |         if (is_file($output) && filesize($output)) {
172 |             return $output;
173 |         }
174 | 
175 |         throw new Exception(sprintf('Unable to compile file \'%s\'', $file));
176 |     }
177 | 
178 |     /**
179 |      * Compile string to a PDF document
180 |      *
181 |      * @param $script String containing LaTeX document source
182 |      * @param array $files
183 |      * @return string Path to compiled PDF document
184 |      * @throws Exception
185 |      */
186 |     public function compileString($script, array $files = null)
187 |     {
188 |         $buildDir = $this->getBuildDir() . 'pdflatex/' . md5($script);
189 |         $output = $buildDir . '/output.pdf';
190 | 
191 |         if (is_file($output)) {
192 |             return $output;
193 |         }
194 | 
195 |         if (!is_dir($buildDir)) {
196 |             if (!@mkdir($buildDir, 0777, true)) {
197 |                 throw new Exception(sprintf(
198 |                     'Unable to create script build directory: %s',
199 |                     $buildDir
200 |                 ));
201 |             }
202 |         }
203 | 
204 |         if (!is_writable($buildDir)) {
205 |             throw new Exception(sprintf(
206 |                 'Script build directory is not writable: %s',
207 |                 $buildDir
208 |             ));
209 |         }
210 | 
211 |         $scriptFile = $buildDir . '/output.tex';
212 |         file_put_contents($scriptFile, $script);
213 | 
214 |         return $this->compile($scriptFile, $files);
215 |     }
216 | 
217 |     public function getLog()
218 |     {
219 |         return (string) $this->_log;
220 |     }
221 | 
222 |     public function setTexmfhome($texmfhome)
223 |     {
224 |         $this->_texmfhome = (string) $texmfhome;
225 |         return $this;
226 |     }
227 | 
228 |     protected function _setEnv($key, $value)
229 |     {
230 |         // putenv/getenv and $_ENV are completely distinct environment stores
231 |         $_ENV[$key] = $value;
232 |         putenv("$key=$value");
233 |     }
234 | }
235 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Renderer/Abstract.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | abstract class PhpLatex_Renderer_Abstract
  4 | {
  5 |     /**
  6 |      * Creates LaTeX representation of the given document node.
  7 |      *
  8 |      * This method is useful when parts of the rendered document should be
  9 |      * presented as the LaTeX source for processing (validating and rendering)
 10 |      * by external tools, i.e. MathJaX, mathTeX or mimeTeX.
 11 |      *
 12 |      * @param PhpLatex_Node|PhpLatex_Node[] $node
 13 |      * @return string
 14 |      */
 15 |     public static function toLatex($node) // {{{
 16 |     {
 17 |         if ($node instanceof PhpLatex_Node) {
 18 |             switch ($node->getType()) {
 19 |                 case PhpLatex_Parser::TYPE_SPECIAL:
 20 |                     if ($node->value === '_' || $node->value === '^') {
 21 |                         return $node->value . self::toLatex($node->getChildren());
 22 |                     }
 23 |                     return $node->value;
 24 | 
 25 |                 case PhpLatex_Parser::TYPE_TEXT:
 26 |                     // make sure text is properly escaped
 27 |                     $source = PhpLatex_Utils::escape($node->value);
 28 |                     return $source;
 29 | 
 30 |                 case PhpLatex_Parser::TYPE_GROUP:
 31 |                     $source = $node->optional ? '[{' : '{';
 32 |                     $source .= self::toLatex($node->getChildren());
 33 |                     $source .= $node->optional ? '}]' : '}';
 34 |                     return $source;
 35 | 
 36 |                 case PhpLatex_Parser::TYPE_VERBATIM:
 37 |                     return $node->value;
 38 | 
 39 |                 case PhpLatex_Parser::TYPE_MATH:
 40 |                     $source = self::toLatex($node->getChildren());
 41 |                     if ($node->inline) {
 42 |                         return '\\(' . $source . '\\)';
 43 |                     } else {
 44 |                         return '\\[' . $source . '\\]';
 45 |                     }
 46 | 
 47 |                 case PhpLatex_Parser::TYPE_COMMAND:
 48 |                     $value = $node->value;
 49 |                     if ($node->starred) {
 50 |                         $value .= '*';
 51 |                     }
 52 |                     if ($node->value === '\\string') {
 53 |                         foreach ($node->getChildren() as $child) {
 54 |                             $value .= self::toLatex($child);
 55 |                         }
 56 |                         return $value;
 57 |                     }
 58 |                     if ($node->symbol || $node->hasChildren()) {
 59 |                         return $value . self::toLatex($node->getChildren());
 60 |                     }
 61 | 
 62 |                     // some control words, e.g. \left[, doesn't need space after
 63 |                     if ($node->noSpaceAfter) {
 64 |                         return $value;
 65 |                     }
 66 |                     // control word, add space that was removed after
 67 |                     return $value . ' ';
 68 | 
 69 |                 case PhpLatex_Parser::TYPE_ENVIRON:
 70 |                     $children = $node->getChildren();
 71 |                     $argsEnd = 0;
 72 | 
 73 |                     foreach ($children as $child) {
 74 |                         if ($child->arg) {
 75 |                             ++$argsEnd;
 76 |                         } else {
 77 |                             break;
 78 |                         }
 79 |                     }
 80 | 
 81 |                     $args = array_slice($children, 0, $argsEnd);
 82 |                     $children = array_slice($children, $argsEnd);
 83 | 
 84 |                     return "\\begin{" . $node->value . "}" . self::toLatex($args) . "\n"
 85 |                          . self::toLatex($children) . "\n"
 86 |                          . "\\end{" . $node->value . "}";
 87 | 
 88 |                 case PhpLatex_Parser::TYPE_DOCUMENT:
 89 |                     return self::toLatex($node->getChildren());
 90 |             }
 91 |         } elseif (is_array($node)) {
 92 |             // render node list and concatenate results
 93 |             $latex = '';
 94 |             foreach ($node as $child) {
 95 |                 $latex .= self::toLatex($child);
 96 |             }
 97 |             return $latex;
 98 |         }
 99 |     } // }}}
100 | 
101 |     /**
102 |      * @param PhpLatex_Node|string $node
103 |      * @return string
104 |      */
105 |     abstract public function render($node);
106 | 
107 |     protected $_commandRenderers = array();
108 | 
109 |     public function addCommandRenderer($command, $renderer)
110 |     {
111 |         if (!is_callable($renderer) && !$renderer instanceof PhpLatex_Renderer_NodeRenderer) {
112 |             throw new InvalidArgumentException(sprintf(
113 |                 'Renderer must be an instance of PhpLatex_Renderer_NodeRenderer or a callable, %s given',
114 |                 is_object($renderer) ? get_class($renderer) : gettype($renderer)
115 |             ));
116 |         }
117 |         $this->_commandRenderers[$command] = $renderer;
118 |         return $this;
119 |     }
120 | 
121 |     public function hasCommandRenderer($command)
122 |     {
123 |         return isset($this->_commandRenderers[$command]);
124 |     }
125 | 
126 |     public function executeCommandRenderer($command, PhpLatex_Node $node)
127 |     {
128 |         if (!$this->hasCommandRenderer($command)) {
129 |             throw new InvalidArgumentException('Renderer for command ' . $command . ' not available');
130 |         }
131 |         $renderer = $this->_commandRenderers[$command];
132 |         if ($renderer instanceof PhpLatex_Renderer_NodeRenderer) {
133 |             return $renderer->render($node);
134 |         }
135 |         return call_user_func($renderer, $node);
136 |     }
137 | }
138 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Renderer/Html.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | class PhpLatex_Renderer_Html extends PhpLatex_Renderer_Abstract
  4 | {
  5 |     const FLAG_IGNORE_PAR = 1;
  6 |     const FLAG_PAR2BR     = 4;
  7 |     const FLAG_ARG        = 2;
  8 |     const FLAG_ITEM       = self::FLAG_PAR2BR;
  9 | 
 10 |     protected $_commands = array(
 11 | 
 12 | 
 13 | 
 14 |     );
 15 | 
 16 |     protected $_par = array();
 17 | 
 18 |     /**
 19 |      * @var PhpLatex_Renderer_Typestyle
 20 |      */
 21 |     protected $_typestyle;
 22 | 
 23 |     /**
 24 |      * @var PhpLatex_Parser
 25 |      */
 26 |     protected $_parser;
 27 | 
 28 |     /**
 29 |      * @param PhpLatex_Parser $parser
 30 |      * @return PhpLatex_Renderer_Html
 31 |      */
 32 |     public function setParser(PhpLatex_Parser $parser)
 33 |     {
 34 |         $this->_parser = $parser;
 35 |         return $this;
 36 |     }
 37 | 
 38 |     /**
 39 |      * @return PhpLatex_Parser
 40 |      */
 41 |     public function getParser()
 42 |     {
 43 |         if ($this->_parser === null) {
 44 |             $this->_parser = new PhpLatex_Parser();
 45 |         }
 46 |         return $this->_parser;
 47 |     }
 48 | 
 49 |     protected function _renderItem($node, PhpLatex_Utils_PeekableIterator $it) // {{{
 50 |     {
 51 |         $html = '';
 52 | 
 53 |         if ($node->value === '\\item') {
 54 |             $it->next(); // skip \item control
 55 |         } else {
 56 |             return; // skip because no \item control was found
 57 |         }
 58 | 
 59 |         // stop rendering at first \item control word
 60 |         while (($n = $it->current()) && ($n->getType() !== PhpLatex_Parser::TYPE_COMMAND || $n->value !== '\\item')) {
 61 |             // consecutive \par macros inside \item are interpreted
 62 |             // as a single \newline
 63 |             $html .= $this->_renderNode($n, self::FLAG_PAR2BR);
 64 |             $next = $it->peek();
 65 |             if ($next && ($next->getType() !== PhpLatex_Parser::TYPE_COMMAND || $next->value !== '\\item')) {
 66 |                 $it->next();
 67 |             } else {
 68 |                 break;
 69 |             }
 70 |         }
 71 | 
 72 |         // in \item all par are converted to newline
 73 | 
 74 |         // \newline (and \\) right after \item causes "There's no line here to
 75 |         // end" error, newlines after item content are ignored
 76 |         $html = preg_replace('/^(\s|<(br|par)\/>)+|(\s|<(br|par)\/>)+$/', '', $html);
 77 |         $html = '<li>' . $html . '</li>' . "\n";
 78 | 
 79 |         return $html;
 80 |     } // }}}
 81 | 
 82 |     protected function __renderText($text)
 83 |     {
 84 |         return str_replace(
 85 |             array(
 86 |                 "\n",
 87 |                 '---', '--',
 88 |                 ',,', '``',
 89 |                 '\'\'', '"',
 90 |                 '`', '\'',
 91 |                 '<<', '>>',
 92 |                 '<', '>',
 93 |             ),
 94 |             array(
 95 |                 ' ',
 96 |                 '&mdash;', '&ndash;',
 97 |                 '&bdquo;', '&ldquo;',
 98 |                 '&rdquo;', '&rdquo;',
 99 |                 '&lsquo;', '&rsquo;',
100 |                 '&laquo;', '&raquo;',
101 |                 '&lt;' ,'&gt;',
102 |             ),
103 |             $text
104 |         );
105 |     }
106 | 
107 |     protected function _renderText($node, $flags = 0)
108 |     {
109 |         return $this->__renderText($node->value);
110 |     }
111 | 
112 |     protected function _renderGroup($node, $flags = 0)
113 |     {
114 |         if (!is_object($node)) {
115 |             throw new Exception;
116 |         }
117 |         // TODO context
118 |         if ($node->mode & PhpLatex_Parser::MODE_MATH) {
119 |             if ($node->optional) {
120 |                 // optional argument, for proper nesting must be wrapped in
121 |                 // curly braces
122 |                 $html = '[{';
123 |             } else {
124 |                 $html = '{';
125 |             }
126 |         } else {
127 |             $html = '';
128 |         }
129 | 
130 |         $tit = new PhpLatex_Utils_PeekableArrayIterator($node->getChildren());
131 |         while ($tit->valid()) {
132 |             $subnode = $tit->current();
133 |             $html .= $this->_renderNode($subnode, $flags);
134 |             $tit->next();
135 |         }
136 | 
137 |         if ($node->mode & PhpLatex_Parser::MODE_MATH) {
138 |             if ($node->optional) {
139 |                 $html .= '}]';
140 |             } else {
141 |                 $html .= '}';
142 |             }
143 |         }
144 |         return $html;
145 |     }
146 | 
147 |     protected function _renderMath($node, $flags = 0)
148 |     {
149 |         if ($node->inline) {
150 |             $delims = array('\\(', '\\)');
151 |         } else {
152 |             $delims = array('\\[', '\\]');
153 |         }
154 | 
155 |         $html = $this->_renderGroup($node, $flags);
156 | 
157 |                     // check for forbidden control words
158 |                     /*if (in_array($token['value'], array(
159 |                         '\\def',
160 |                         '\\newcommand', '\\renewcommand',
161 |                         '\\newenvironment', '\\renewenvironment',
162 |                         '\\newfont', '\\newtheorem', '\\usepackage',
163 |                         // MathTex extensions
164 |                         '\\eval', '\\environment', '\\gif',
165 |                     ), true)) {
166 |                         break;
167 |                     }*/
168 | 
169 |         // filter out certain commands
170 |         // escape unescaped \(, \), \[ and \] in subtree
171 |         // render contents
172 |         // trim
173 |         return $delims[0] . $html . $delims[1];
174 |     }
175 | 
176 |     // TODO need to know whether special is in math or text mode
177 |     protected function _renderSpecial($node, $flags = 0)
178 |     {
179 |         if ($node->mode & PhpLatex_Parser::MODE_MATH) {
180 |             if ($node->value === '_' || $node->value === '^') {
181 |                 $children = $node->getChildren();
182 |                 if (count($children)) {
183 |                     return $node->value . $this->_renderNode($children[0], self::FLAG_ARG);
184 |                 }
185 |             }
186 |             return $node->value;
187 |         }
188 |         switch ($node->value) {
189 |                 case '~':
190 |                     return '&nbsp;';
191 | 
192 |                 case '_':
193 |                 case '^':
194 |                     $children = $node->getChildren();
195 |                     if (count($children)) {
196 |                         $tag = $node->value === '_' ? 'sub' : 'sup';
197 |                         $text = $this->_renderNode($children[0], self::FLAG_ARG);
198 |                         return '<' . $tag . '>' . $text . '</' . $tag . '>';
199 |                     }
200 |                     break;
201 |             }
202 |     }
203 | 
204 |     protected function _renderEnvironList($node)
205 |     {
206 |         $html = '';
207 |         $tag = $node->value === 'itemize' ? 'ul' : 'ol';
208 |         if ($node->getChildren()) {
209 |             // list environments do not inherit flags
210 |             $html .= '<' . $tag . '>';
211 |             $iit = new PhpLatex_Utils_PeekableArrayIterator($node->getChildren());
212 |             while ($iit->valid()) {
213 |                 $subnode = $iit->current();
214 |                 $html .= $this->_renderItem($subnode, $iit, 0);
215 |                 $iit->next();
216 |             }
217 |             $html .= '</' . $tag . '>';
218 |         }
219 |         return $html;
220 |     }
221 | 
222 |     protected function _renderEnvironTabular($node)
223 |     {
224 |         $children = $node->getChildren();
225 |         $alignment = $this->_renderNodeChildren($children[0]);
226 |         $alignment = preg_replace('/[^crl]/', '', strtolower($alignment));
227 |         // alignment is treated merly as a hint
228 | 
229 |         $nrows = 0;
230 |         $ncols = 0;
231 |         $row = 0;
232 |         $col = 0;
233 |         $table = array();
234 | 
235 |         // ltrim spaces
236 |         for ($i = 1; $i < count($children); ++$i) {
237 |             $child = $children[$i];
238 |             if ($child->getType() === PhpLatex_Parser::TYPE_COMMAND &&
239 |                 $child->value === '\\\\'
240 |             ) {
241 |                 // start new row
242 |                 ++$row;
243 |                 $col = 0;
244 |                 continue;
245 |             }
246 |             if ($child->getType() === PhpLatex_Parser::TYPE_SPECIAL &&
247 |                 $child->value === '&'
248 |             ) {
249 |                 // start new column
250 |                 ++$col;
251 |                 continue;
252 |             }
253 | 
254 |             $cell = $this->_renderNode($child);
255 | 
256 |             // if last row consists only of an empty string ignore it
257 |             if ($i === count($children) - 1 && $cell === '') {
258 |                 break;
259 |             }
260 | 
261 |             $nrows = max($nrows, $row + 1);
262 |             $ncols = max($ncols, $col + 1);
263 | 
264 |             if (!isset($table[$row][$col])) {
265 |                 $table[$row][$col] = '';
266 |             }
267 | 
268 |             $table[$row][$col] .= $cell;
269 |         }
270 | 
271 |         $html = '<table class="table">';
272 |         for ($row = 0; $row < $nrows; ++$row) {
273 |             $html .= '<tr>';
274 |             for ($col = 0; $col < $ncols; ++$col) {
275 |                 $align = substr($alignment, $col, 1);
276 |                 $style = '';
277 |                 if ($align === 'c') {
278 |                     $style = ' style="text-align:center"';
279 |                 } elseif ($align === 'l') {
280 |                     $style = ' style="text-align:left"';
281 |                 } elseif ($align === 'r') {
282 |                     $style = ' style="text-align:right"';
283 |                 }
284 | 
285 |                 $cell = isset($table[$row][$col]) ? trim($table[$row][$col]) : '';
286 |                 $html .= '<td' . $style . '>' . $cell . '</td>';
287 |             }
288 |             $html .= '</tr>';
289 |         }
290 |         $html .= '</table>';
291 |         return $html;
292 |     }
293 | 
294 |     protected function _renderEnvironEquation($node)
295 |     {
296 |         $name = 'equation' . ($node->starred ? '*' : '');
297 |         return "\\[\n"
298 |             . "\\begin{{$name}} "
299 |             . $this->_renderNodeChildren($node)
300 |             . " \\end{{$name}}\n"
301 |             . "\\]\n";
302 |     }
303 | 
304 |     protected function _renderEnvironEqnarray($node)
305 |     {
306 |         $name = 'eqnarray' . ($node->starred ? '*' : '');
307 |         return "\\[ \\begin{{$name}}\n"
308 |             . $this->_renderNodeChildren($node)
309 |             . " \end{{$name}} \\]\n";
310 |     }
311 | 
312 |     protected function _renderEnvironMath($node)
313 |     {
314 |         return "\\( " . $this->_renderNodeChildren($node) . " \\) ";
315 |     }
316 | 
317 |     protected function _renderEnvironDisplaymath($node)
318 |     {
319 |         return "\\[\n" . $this->_renderNodeChildren($node) . " \\]\n";
320 |     }
321 | 
322 |     protected function _renderEnvironVerbatim($node)
323 |     {
324 |         $child = $node->getChild(0);
325 |         return '<pre class="latex-verbatim">' . htmlspecialchars($child->value)  . '</pre>';
326 |     }
327 | 
328 |     protected function _renderNodeChildren($node)
329 |     {
330 |         $html = '';
331 |         foreach ($node->getChildren() as $child) {
332 |             $html .= $this->_renderNode($child, 0);
333 |         }
334 |         return $html;
335 |     }
336 | 
337 |     // assumption $it->current() === $node
338 |     // increment iterator only if next node is required for rendering of
339 |     // this node
340 |     protected function _renderNode($node, $flags = 0)
341 |     {
342 |         if ($node->getType() === PhpLatex_Parser::TYPE_ENVIRON) {
343 |             $html = '';
344 |             switch ($node->value) {
345 |                 case 'itemize':
346 |                 case 'enumerate':
347 |                     return $this->_renderEnvironList($node);
348 | 
349 |                 default:
350 |                     $method = '_renderEnviron' . $node->value;
351 |                     if (method_exists($this, $method)) {
352 |                         return $this->$method($node);
353 |                     }
354 |                     // invalid environment, render its contents
355 |                     $html = $this->_renderNodeChildren($node);
356 |                     break;
357 |             }
358 |             return $html;
359 |         }
360 | 
361 |         if ($node->getType() === PhpLatex_Parser::TYPE_VERBATIM) {
362 |             return $this->__renderText($node->value);
363 |         }
364 | 
365 |         if ($node->getType() === PhpLatex_Parser::TYPE_COMMAND) {
366 |             // TODO filter out forbidden control sequences
367 |             if ($node->mode & PhpLatex_Parser::MODE_MATH) {
368 |                 $html = $this->_renderNodeChildren($node);
369 |                 // don't append space if control symbol
370 | 
371 |                 return $node->value . ($html ? $html : ($node->symbol ? '' : ' '));
372 |             }
373 |             if ($this->hasCommandRenderer($node->value)) {
374 |                 return $this->executeCommandRenderer($node->value, $node);
375 |             }
376 |             switch ($node->value) {
377 |                 case '\\S':
378 |                     return '&sect;';
379 | 
380 |                 case '\\P':
381 |                     return '&para;';
382 | 
383 |                 case '\\ldots':
384 |                 case '\\dots':
385 |                     return '&hellip;';
386 | 
387 |                 case '\\textbackslash':
388 |                     return '\\';
389 | 
390 |                 case '\\textasciitilde':
391 |                     return '~';
392 | 
393 |                 case '\\textasciicircum':
394 |                     return '^';
395 | 
396 |                 case '\\-':
397 |                     return ''; // word hyphenation
398 | 
399 |                 case '\\^':
400 |                 case '\\~':
401 |                     if ($arg = $node->getChild(0)) {
402 |                         $arg = trim($this->_renderNodeChildren($arg));
403 |                     }
404 |                     if (0 === strlen($arg)) {
405 |                         return substr($node->value, 1);
406 |                     }
407 |                     return $arg; // TODO support for circumflex/tilde accent
408 | 
409 |                 case '\\#':
410 |                 case '\\%':
411 |                 case '\\_':
412 |                 case '\\{':
413 |                 case '\\}':
414 |                 case '\\$':
415 |                     return substr($node->value, 1);
416 | 
417 |                 // spaces, based on https://en.wikipedia.org/wiki/Whitespace_character#Unicode
418 |                 case '\\ ':
419 |                     return '&nbsp;';
420 |                 case '\\,':
421 |                     return '&thinsp;';
422 |                 case '\\enspace':
423 |                     return '&ensp;';
424 |                 case '\\quad':
425 |                     return '&emsp;';
426 | 
427 |                 case '\\ref':
428 |                     // TODO if ref target resides in math mode render \\ref, so that
429 |                     // it can be handled by JS.
430 |                     return "\\ref{" . trim($this->_renderNodeChildren($node), "{}") . '} ';
431 | 
432 |                 case '\\&':
433 |                     return '&amp;';
434 | 
435 |                 case '\\\\':
436 |                 case '\\newline';
437 |                     return '<br/>';
438 | 
439 |                 case '\\par':
440 |                     // replace \par in argument with space
441 |                     if ($flags & self::FLAG_ARG) {
442 |                         return ' '; // ok
443 |                     }
444 | 
445 |                     // par placeholder for further processing (par will be
446 |                     // inserted or removed if certain conditions are met)
447 |                     return '<par/>';
448 | 
449 |                 case '\\url':
450 |                 case '\\href':
451 |                     $args = $node->getChildren();
452 |                     if (count($args) > 0) {
453 |                         // term arg (not text) causes the following error:
454 |                         // ! TeX capacity exceeded, sorry [input stack size=5000].
455 | 
456 |                         // TODO validate url, only (ht|f)tp(s)?:// urls
457 |                         $url = $this->_renderNode($args[0]);
458 |                         $urlAttr = str_replace(array('<', '>', '"'), array('&lt;', '&gt;', '&quot;'), $url);
459 | 
460 |                         $text = count($args) > 1 ? $this->_renderNode($args[1]) : $url;
461 | 
462 |                         return "<a href=\"" . $urlAttr . "\">" . $text . "</a>";
463 |                     }
464 |                     break;
465 | 
466 |                 case '\\TeX':
467 |                     return '<span style="font-size:1em;text-transform:uppercase;font-family:serif">T<sub style="line-height:1;font-size:1em;vertical-align:-0.5ex;margin-left:-0.1667em;margin-right:-0.125em;top:0;bottom:0">e</sub>X</span>';
468 | 
469 |                 case '\\LaTeX':
470 |                     return '<span style="font-size:1em;text-transform:uppercase;font-family:serif">L<sup style="line-height:1;font-size:0.85em;vertical-align:0.15em;margin-left:-0.36em;margin-right:-0.15em;top:0;bottom:0">A</sup>T<sub style="line-height:1;font-size:1em;vertical-align:-0.5ex;margin-left:-0.1667em;margin-right:-0.125em;top:0;bottom:0">e</sub>X</span>';
471 | 
472 |                 case '\\chapter':
473 |                 case '\\section':
474 |                 case '\\subsection':
475 |                 case '\\subsubsection':
476 |                 case '\\paragraph':
477 |                 case '\\subparagraph':
478 |                 case '\\textsubscript': // \usepackage{fixltx2e}
479 |                 case '\\textsuperscript':
480 |                     foreach ($node->getChildren() as $arg) {
481 |                         switch ($node->value) {
482 |                             case '\\chapter':
483 |                                 $tag = 'h1';
484 |                                 break;
485 | 
486 |                             case '\\section':
487 |                                 $tag = 'h2';
488 |                                 break;
489 | 
490 |                             case '\\subsection':
491 |                                 $tag = 'h3';
492 |                                 break;
493 | 
494 |                             case '\\subsubsection':
495 |                                 $tag = 'h4';
496 |                                 break;
497 | 
498 |                             case '\\paragraph':
499 |                                 $tag = 'h5';
500 |                                 break;
501 | 
502 |                             case '\\subparagraph':
503 |                                 $tag = 'h6';
504 |                                 break;
505 | 
506 |                             case '\\textsubscript':
507 |                                 $tag = 'sub';
508 |                                 break;
509 | 
510 |                             case '\\textsuperscript':
511 |                                 $tag = 'sup';
512 |                                 break;
513 |                         }
514 |                         $text = $this->_renderNode($arg, self::FLAG_ARG);
515 |                         $html = '<' . $tag . '>' . $text . '</' . $tag . '>';
516 |                         return $html;
517 |                     }
518 |                     break;
519 | 
520 |                 default:
521 |                     return $this->_renderStyled($node);
522 |                     break;
523 |             }
524 |         }
525 | 
526 |         $method = '_render' . $node->getType();
527 |         if (method_exists($this, $method)) {
528 |             return $this->$method($node, $flags);
529 |         }
530 |     }
531 | 
532 |     protected $_initialTypestyle;
533 | 
534 |     protected function _pushTypestyle()
535 |     {
536 |         if (!$this->_initialTypestyle) {
537 |             $this->_initialTypestyle = new PhpLatex_Renderer_Typestyle();
538 |         }
539 |         if (!$this->_typestyle) {
540 |             $this->_typestyle = $this->_initialTypestyle->push();
541 |         } else {
542 |             $this->_typestyle = $this->_typestyle->push();
543 |         }
544 |         return $this->_typestyle;
545 |     }
546 | 
547 |     protected function _renderStyled(PhpLatex_Node $node)
548 |     {
549 |         $typestyle = null;
550 | 
551 |         if ($node->getType() === PhpLatex_Parser::TYPE_COMMAND) {
552 |             switch ($node->value) {
553 |                 case '\\textbf':
554 |                     $typestyle = $this->_pushTypestyle();
555 |                     $typestyle->bold = true;
556 |                     break;
557 | 
558 |                 case '\\textup':
559 |                     $typestyle = $this->_pushTypestyle();
560 |                     $typestyle->style = PhpLatex_Renderer_Typestyle::STYLE_NORMAL;
561 |                     break;
562 | 
563 |                 case '\\textit':
564 |                     $typestyle = $this->_pushTypestyle();
565 |                     $typestyle->style = PhpLatex_Renderer_Typestyle::STYLE_ITALIC;
566 |                     break;
567 | 
568 |                 case '\\textsl': // slanted (oblique)
569 |                     $typestyle = $this->_pushTypestyle();
570 |                     $typestyle->style = PhpLatex_Renderer_Typestyle::STYLE_SLANTED;
571 |                     break;
572 | 
573 |                 case '\\emph':
574 |                     $typestyle = $this->_pushTypestyle();
575 |                     $typestyle->emphasis = true;
576 |                     break;
577 | 
578 |                 case '\\textrm':
579 |                     $typestyle = $this->_pushTypestyle();
580 |                     $typestyle->family = PhpLatex_Renderer_Typestyle::FAMILY_SERIF;
581 |                     break;
582 | 
583 |                 case '\\texttt':
584 |                     $typestyle = $this->_pushTypestyle();
585 |                     $typestyle->family = PhpLatex_Renderer_Typestyle::FAMILY_MONO;
586 |                     break;
587 | 
588 |                 case '\\textsf':
589 |                     $typestyle = $this->_pushTypestyle();
590 |                     $typestyle->family = PhpLatex_Renderer_Typestyle::FAMILY_SANS;
591 |                     break;
592 | 
593 |                 case '\\underline':
594 |                     $typestyle = $this->_pushTypestyle();
595 |                     $typestyle->underline = true;
596 |                     break;
597 | 
598 |                 case '\\textsc': // small caps
599 |                     $typestyle = $this->_pushTypestyle();
600 |                     $typestyle->smallcaps = true;
601 |                     break;
602 |             }
603 |         }
604 | 
605 |         $render = null;
606 | 
607 |         foreach ($node->getChildren() as $arg) {
608 |             $render .= $this->_renderNode($arg, self::FLAG_ARG);
609 |         }
610 | 
611 |         // wrap in style difference wrt to parent typestyle
612 |         if ($typestyle) {
613 |             if (strlen($render)) {
614 |                 $diff = $typestyle->diff();
615 |                 if ($diff) {
616 |                     $render = $this->_wrapStyle($render, $diff);
617 |                 }
618 |             }
619 |             $this->_typestyle = $typestyle->pop();
620 |         }
621 | 
622 |         return (string) $render;
623 |     }
624 | 
625 |     protected function _wrapStyle($render, array $diff = null)
626 |     {
627 |         $tags = array();
628 |         $style = array();
629 | 
630 |         if (isset($diff['family'])) {
631 |             switch ($diff['family']) {
632 |                 case PhpLatex_Renderer_Typestyle::FAMILY_SANS:
633 |                     $style['font-family'] = 'sans-serif';
634 |                     break;
635 | 
636 |                 case PhpLatex_Renderer_Typestyle::FAMILY_MONO:
637 |                     $style['font-family'] = 'monospace';
638 |                     break;
639 | 
640 |                 case PhpLatex_Renderer_Typestyle::FAMILY_SERIF:
641 |                     $style['font-family'] = 'serif';
642 |                     break;
643 |             }
644 |         }
645 | 
646 |         if (isset($diff['style'])) {
647 |             switch ($diff['style']) {
648 |                 case PhpLatex_Renderer_Typestyle::STYLE_NORMAL:
649 |                     $style['font-style'] = 'normal';
650 |                     break;
651 | 
652 |                 case PhpLatex_Renderer_Typestyle::STYLE_SLANTED:
653 |                     $style['font-style'] = 'oblique';
654 |                     break;
655 | 
656 |                 case PhpLatex_Renderer_Typestyle::STYLE_ITALIC:
657 |                     $tags[] = 'i';
658 |                     break;
659 |             }
660 |         }
661 | 
662 |         if (isset($diff['bold'])) {
663 |             if ($diff['bold']) {
664 |                 $tags[] = 'b';
665 |             } else {
666 |                 $style['font-weight'] = 'normal';
667 |             }
668 |         }
669 | 
670 |         if (isset($diff['emphasis'])) {
671 |             if ($diff['emphasis']) {
672 |                 $tags[] = 'em';
673 |             }
674 |         }
675 | 
676 |         if (isset($diff['underline'])) {
677 |             if ($diff['underline']) {
678 |                 $tags[] = 'u';
679 |             } else {
680 |                 $style['text-decoration'] = 'none';
681 |             }
682 |         }
683 | 
684 |         if (isset($diff['smallcaps'])) {
685 |             if ($diff['smallcaps']) {
686 |                 $style['font-variant'] = 'small-caps';
687 |             } else {
688 |                 $style['font-variant'] = 'normal';
689 |             }
690 |         }
691 | 
692 |         if (!$tags && !$style) {
693 |             return $render;
694 |         }
695 | 
696 |         if ($tags) {
697 |             $open = $close = '';
698 |             foreach ($tags as $tag) {
699 |                 $open .= '<' . $tag . '>';
700 |                 $close = $close . '</' . $tag . '>';
701 |             }
702 |             return $open . $render . $close;
703 |         }
704 | 
705 |         $css = array();
706 |         foreach ($style as $key => $value) {
707 |             $css[] = $key . ':' . $value;
708 |         }
709 |         return sprintf('<span style="%s">%s</span>', implode(';', $css), $render);
710 |     }
711 | 
712 |     /**
713 |      * @param PhpLatex_Node|string $document
714 |      * @return mixed|string
715 |      */
716 |     public function render($document)
717 |     {
718 |         if (!$document instanceof PhpLatex_Node) {
719 |             $document = $this->getParser()->parse($document);
720 |         }
721 | 
722 |         $this->_par = array();
723 |         $result = '';
724 | 
725 |         foreach ($document->getChildren() as $node) {
726 |             $result .= $this->_renderNode($node);
727 |         }
728 | 
729 |         // fix paragraphs before and after block-level elements
730 | 
731 |         // skip all \newlines and \\ that came after \par (this may be required
732 |         // when rendering LaTeX output, to avoid 'There's no line here to end'
733 |         // error) and merge multiple \par into one
734 |         $result = preg_replace(
735 |             '/<par\/>(<(br|par)\/>)+/',
736 |             '<par/>',
737 |             $result
738 |         );
739 | 
740 |         $result = preg_replace('/<par\/><(h1|h2|h3|h4|h5|h6|pre|ul|ol)/i', '<\1', $result);
741 |         $result = preg_replace('/<\/(h1|h2|h3|h4|h5|h6|pre|ul|ol)>(<par\/>)/i', '</\1>', $result);
742 | 
743 |         // replace par placeholders with their HTML counterparts
744 |         // TODO Maybe P instead of BR?
745 |         $result = str_replace('<par/>', '<br/><br/>', $result);
746 | 
747 |         return $result;
748 |     }
749 | }
750 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Renderer/NodeRenderer.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | interface PhpLatex_Renderer_NodeRenderer
 4 | {
 5 |     /**
 6 |      * @param PhpLatex_Node $node
 7 |      * @return string
 8 |      */
 9 |     public function render(PhpLatex_Node $node);
10 | }
11 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Renderer/Typestyle.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | class PhpLatex_Renderer_Typestyle
 4 | {
 5 |     const FAMILY_UNKNOWN = 0;
 6 | 
 7 |     const FAMILY_SERIF = 1;
 8 | 
 9 |     const FAMILY_SANS = 2;
10 | 
11 |     const FAMILY_MONO = 3;
12 | 
13 |     const STYLE_NORMAL = 0;
14 | 
15 |     const STYLE_ITALIC = 1;
16 | 
17 |     const STYLE_SLANTED = 2;
18 | 
19 |     protected $_parent;
20 | 
21 |     public $style = self::STYLE_NORMAL;
22 | 
23 |     public $bold = false;
24 | 
25 |     public $underline = false;
26 | 
27 |     public $emphasis = false;
28 | 
29 |     public $smallcaps = false;
30 | 
31 |     public $family = self::FAMILY_UNKNOWN;
32 | 
33 |     public function push()
34 |     {
35 |         $child = clone $this;
36 |         $child->_parent = $this;
37 |         return $child;
38 |     }
39 | 
40 |     public function pop()
41 |     {
42 |         $parent = $this->_parent;
43 |         $this->_parent = null;
44 |         return $parent;
45 |     }
46 | 
47 |     public function diff()
48 |     {
49 |         $props = array(
50 |             'style'     => 'int',
51 |             'bold'      => 'bool',
52 |             'underline' => 'bool',
53 |             'emphasis'  => 'bool',
54 |             'smallcaps' => 'bool',
55 |             'family'    => 'int',
56 |         );
57 |         $diff = array();
58 | 
59 |         if ($this->_parent === null) {
60 |             foreach ($props as $name => $type) {
61 |                 $value = $this->$name;
62 |                 settype($value, $type);
63 |                 $diff[$name] = $value;
64 |             }
65 |         } else {
66 |             foreach ($props as $name => $type) {
67 |                 $value = $this->$name;
68 |                 settype($value, $type);
69 |                 $value2 = $this->_parent->$name;
70 |                 settype($value2, $type);
71 |                 if ($value !== $value2) {
72 |                     $diff[$name] = $value;
73 |                 }
74 |             }
75 |         }
76 |         return $diff;
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Utils.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | class PhpLatex_Utils
 4 | {
 5 |     /**
 6 |      * @param  string $string
 7 |      * @return string
 8 |      */
 9 |     public static function escape($string)
10 |     {
11 |         $replace = array(
12 |             '&' => '\\&',
13 |             '{' => '\\{',
14 |             '}' => '\\}',
15 |             '$' => '\\$',
16 |             '%' => '\\%',
17 |             '#' => '\\#',
18 |             '_' => '\\_',
19 |             '^' => '\\^', // textmode
20 |             '~' => '\\textasciitilde{}', // textmode
21 |             '\\' => '\\textbackslash{}', // textmode
22 |             // escape square brackets so that \\[length] construct does not appear
23 |             '[' => '{[}',
24 |             ']' => '{]}',
25 |         );
26 |         $string = (string) $string;
27 |         return strtr($string, $replace);
28 |     }
29 | 
30 |     /**
31 |      * Converts UTF-8 characters to their LaTeX text mode equivalents.
32 |      * Unrecognized characters are removed from output.
33 |      *
34 |      * @param string $string
35 |      * @return string
36 |      */
37 |     public static function escapeUtf8($string)
38 |     {
39 |         static $map;
40 |         if (null === $map) {
41 |             $map = require dirname(__FILE__) . '/latex_utf8.php';
42 |         }
43 |         $string = (string) $string;
44 |         $string = strtr($string, $map);
45 |         $string = preg_replace('/[^\t\n\r\x20-\x7E]/', '', $string);
46 |         return $string;
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Utils/PeekableArrayIterator.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | class PhpLatex_Utils_PeekableArrayIterator
  4 |     implements Iterator, Countable, ArrayAccess, PhpLatex_Utils_PeekableIterator
  5 | {
  6 |     /**
  7 |      * @var array
  8 |      */
  9 |     protected $_array;
 10 | 
 11 |     public function __construct(array $array = array())
 12 |     {
 13 |         $this->_array = $array;
 14 | 
 15 |         // reset internal array pointer, otherwise current position will
 16 |         // be copied from the original array!
 17 |         reset($this->_array);
 18 |     }
 19 | 
 20 |     #[\ReturnTypeWillChange]
 21 |     public function current()
 22 |     {
 23 |         return current($this->_array);
 24 |     }
 25 | 
 26 |     #[\ReturnTypeWillChange]
 27 |     public function key()
 28 |     {
 29 |         return key($this->_array);
 30 |     }
 31 | 
 32 |     #[\ReturnTypeWillChange]
 33 |     public function next()
 34 |     {
 35 |         next($this->_array);
 36 |     }
 37 | 
 38 |     #[\ReturnTypeWillChange]
 39 |     public function rewind()
 40 |     {
 41 |         reset($this->_array);
 42 |     }
 43 | 
 44 |     #[\ReturnTypeWillChange]
 45 |     public function valid()
 46 |     {
 47 |         return key($this->_array) !== null;
 48 |     }
 49 | 
 50 |     #[\ReturnTypeWillChange]
 51 |     public function count()
 52 |     {
 53 |         return count($this->_array);
 54 |     }
 55 | 
 56 |     #[\ReturnTypeWillChange]
 57 |     public function offsetExists($offset)
 58 |     {
 59 |         return isset($this->_array[$offset]);
 60 |     }
 61 | 
 62 |     #[\ReturnTypeWillChange]
 63 |     public function offsetGet($offset)
 64 |     {
 65 |         return isset($this->_array[$offset]) ? $this->_array[$offset] : null;
 66 |     }
 67 | 
 68 |     #[\ReturnTypeWillChange]
 69 |     public function offsetSet($offset, $value) {
 70 |         if (is_null($offset)) {
 71 |             $this->_array[] = $value;
 72 |         } else {
 73 |             $this->_array[$offset] = $value;
 74 |         }
 75 |     }
 76 | 
 77 |     #[\ReturnTypeWillChange]
 78 |     public function offsetUnset($offset)
 79 |     {
 80 |         unset($this->_array[$offset]);
 81 |     }
 82 | 
 83 |     public function __isset($offset)
 84 |     {
 85 |         return $this->offsetExists($offset);
 86 |     }
 87 | 
 88 |     public function __unset($offset)
 89 |     {
 90 |         $this->offsetUnset($offset);
 91 |     }
 92 | 
 93 |     public function peek()
 94 |     {
 95 |         if ($this->valid()) {
 96 |             $value = next($this->_array);
 97 |             prev($this->_array);
 98 |             return $value;
 99 |         }
100 |         return false;
101 |     }
102 | 
103 |     public function hasNext()
104 |     {
105 |         if ($this->valid()) {
106 |             next($this->_array);
107 |             $result = $this->valid();
108 |             prev($this->_array);
109 |             return $result;
110 |         }
111 |         return false;
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Utils/PeekableIterator.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | interface PhpLatex_Utils_PeekableIterator extends Iterator
 4 | {
 5 |     /**
 6 |      * Returns the next element in the iteration, without advancing
 7 |      * the iteration.
 8 |      *
 9 |      * @return mixed
10 |      */
11 |     public function peek();
12 | 
13 |     /**
14 |      * Returns true if the iteration has more elements.
15 |      *
16 |      * @return bool
17 |      */
18 |     public function hasNext();
19 | }
20 | 


--------------------------------------------------------------------------------
/library/PhpLatex/Utils/TreeDebug.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * @internal
 5 |  */
 6 | class PhpLatex_Utils_TreeDebug
 7 | {
 8 |     public static function debug(PhpLatex_Node $node, $echo = true)
 9 |     {
10 |         $result = self::_debug($node);
11 |         if ($echo) {
12 |             echo $result;
13 |             return true;
14 |         }
15 |         return $result;
16 |     }
17 | 
18 |     protected static function _debug(PhpLatex_Node $node, $indent = '') {
19 |         $str = "type: {$node->getType()}\n";
20 | 
21 |         if (count($node->getProps())) {
22 |             $str .= $indent . "props:\n";
23 |             foreach ($node->getProps() as $key => $value) {
24 |                 if ($key === 'mode') {
25 |                     switch ($value) {
26 |                         case PhpLatex_Parser::MODE_MATH:
27 |                             $value = "$value (math)";
28 |                             break;
29 |                         case PhpLatex_Parser::MODE_TEXT:
30 |                             $value = "$value (text)";
31 |                             break;
32 |                         case PhpLatex_Parser::MODE_BOTH:
33 |                             $value = "$value (both)";
34 |                             break;
35 |                     }
36 |                 }
37 |                 if ($key === 'value') {
38 |                     $value = '"' . strtr($value, array(
39 |                             "\n" => '\n',
40 |                             "\t" => '\t',
41 |                             "\r" => '\r',
42 |                         )) . '"';
43 |                 }
44 |                 if (is_bool($value)) {
45 |                     $value = var_export($value, true);
46 |                 }
47 |                 $str .= $indent . "  {$key}: $value\n";
48 |             }
49 |         } else {
50 |             $str .= $indent . "props: (empty)\n";
51 |         }
52 | 
53 |         if (count($node->getChildren())) {
54 |             $str .= $indent . "children:\n";
55 |             foreach ($node->getChildren() as $child) {
56 |                 $str .= $indent . '  - ' . self::_debug($child, $indent . '    ');
57 |             }
58 |         } else {
59 |             $str .= $indent . "children: (empty)\n";
60 |         }
61 | 
62 |         return $str;
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/library/PhpLatex/environs.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | return array(
 4 |     'verbatim'    => array(
 5 |         'verbatim'  => true,
 6 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
 7 |         'environs'  => array('itemize', 'enumerate'),
 8 |         'starred'   => true,
 9 |         // verbatim in tabular causes
10 |         // ! LaTeX Error: Something's wrong--perhaps a missing \item.
11 |     ),
12 |     'Verbatim'    => array(
13 |         'verbatim'  => true,
14 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
15 |         'environs'  => array('itemize', 'enumerate'),
16 |     ),
17 |     'lstlisting'  => array(
18 |         'verbatim'  => true,
19 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
20 |         'environs'  => array('itemize', 'enumerate'),
21 |     ),
22 |     'enumerate'   => array(
23 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
24 |         'environs'  => array('itemize', 'enumerate'),
25 |     ),
26 |     'itemize'     => array(
27 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
28 |         'environs'  => array('itemize', 'enumerate'),
29 |         // itemize in tabular causes
30 |         // ! LaTeX Error: Something's wrong--perhaps a missing \item.
31 |     ),
32 |     'displaymath' => array(
33 |         'math'      => true,
34 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
35 |         'environs'  => array('itemize', 'enumerate'),
36 |         // displaymath in tabular causes
37 |         // ! LaTeX Error: Bad math environment delimiter.
38 |     ),
39 |     'math'        => array(
40 |         'math'      => true,
41 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
42 |         'environs'  => array('itemize', 'enumerate', 'tabular'),
43 |     ),
44 |     'equation' => array(
45 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
46 |         'math'      => true,
47 |         'starred'   => true,
48 |     ),
49 |     'eqnarray' => array(
50 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
51 |         'math'      => true,
52 |         'starred'   => true,
53 |     ),
54 |     'tabular' => array(
55 |         'numArgs'   => 1,
56 |         'mode'      => PhpLatex_Parser::MODE_TEXT,
57 |         'environs'  => array('itemize', 'enumerate', 'tabular'),
58 |     ),
59 |     'array' => array(
60 |         'numArgs'   => 1,
61 |         'mode'      => PhpLatex_Parser::MODE_MATH,
62 |     ),
63 | );
64 | 


--------------------------------------------------------------------------------
/library/PhpLatex/latex_utf8.php:
--------------------------------------------------------------------------------
   1 | <?php
   2 | 
   3 | return array (
   4 |     'Á' => '\\\'{A}', // A with acute
   5 |     'Á̧' => 'A', // A with acute and cedilla
   6 |     'Ạ́' => 'A', // A with acute and dot below
   7 |     'Ą̌' => 'A', // A with acute and ogonek
   8 |     'Ą́' => 'A', // A with acute and ogonek
   9 |     'Ă' => '\\u{A}', // A with breve
  10 |     'Ắ' => 'A', // A with breve and acute
  11 |     'Ặ' => 'A', // A with breve and dot below
  12 |     'Ằ' => 'A', // A with breve and grave
  13 |     'Ẳ' => 'A', // A with breve and hook above
  14 |     'Ẵ' => 'A', // A with breve and tilde
  15 |     'Ǎ' => '\\v{A}', // A with caron
  16 |     'Ǎ̧' => 'A', // A with caron and cedilla
  17 |     'A̧' => '\\c{A}', // A with cedilla
  18 |     'A̐' => 'A', // A with chandrabindu
  19 |     'Â' => '\\^{A}', // A with circumflex
  20 |     'Ấ' => 'A', // A with circumflex and acute
  21 |     'Â̧' => 'A', // A with circumflex and cedilla
  22 |     'Ậ' => 'A', // A with circumflex and dot below
  23 |     'Ầ' => 'A', // A with circumflex and grave
  24 |     'Ẩ' => 'A', // A with circumflex and hook above
  25 |     'Ą̂' => 'A', // A with circumflex and ogonek
  26 |     'Ẫ' => 'A', // A with circumflex and tilde
  27 |     'A̭' => 'A', // A with circumflex below
  28 |     'A̓' => 'A', // A with comma above
  29 |     'Ä' => '\\"{A}', // A with diaeresis
  30 |     'Ä́' => 'A', // A with diaeresis and acute
  31 |     'Ä̌' => 'A', // A with diaeresis and caron
  32 |     'Ä̂' => 'A', // A with diaeresis and circumflex
  33 |     'Ạ̈' => 'A', // A with diaeresis and dot below
  34 |     'Ä̀' => 'A', // A with diaeresis and grave
  35 |     'Ǟ' => 'A', // A with diaeresis and macron
  36 |     'Ą̈' => 'A', // A with diaeresis and ogonek
  37 |     'Ą̈̌' => 'A', // A with diaeresis, caron and ogonek
  38 |     'Ą̈̂' => 'A', // A with diaeresis, circumflex and ogonek
  39 |     'Ą̈̀' => 'A', // A with diaeresis, grave and ogonek
  40 |     'Ȧ' => '\\.{A}', // A with dot above
  41 |     'Ȧ́' => 'A', // A with dot above and acute
  42 |     'Ǡ' => 'A', // A with dot above and macron
  43 |     'Ạ' => '\\d{A}', // A with dot below
  44 |     'A̋' => '\\H{A}', // A with double acute
  45 |     'Ȁ' => 'A', // A with double grave
  46 |     'À' => '\\`{A}', // A with grave
  47 |     'À̧' => 'A', // A with grave and cedilla
  48 |     'Ạ̀' => 'A', // A with grave and dot below
  49 |     'Ą̀' => 'A', // A with grave and ogonek
  50 |     'Ả' => 'A', // A with hook above
  51 |     'Ȃ' => 'A', // A with inverted breve
  52 |     'Ā' => '\\={A}', // A with macron
  53 |     'Ā́' => 'A', // A with macron and acute
  54 |     'Ā̆' => 'A', // A with macron and breve
  55 |     'Ā̌' => 'A', // A with macron and caron
  56 |     'Ā̂' => 'A', // A with macron and circumflex
  57 |     'Ạ̄' => 'A', // A with macron and dot below
  58 |     'Ā̀' => 'A', // A with macron and grave
  59 |     'Ą̄' => 'A', // A with macron and ogonek
  60 |     'Ā̊' => 'A', // A with macron and ring above
  61 |     'A̱' => 'A', // A with macron below
  62 |     'Á̱' => 'A', // A with macron below and acute
  63 |     'Â̱' => 'A', // A with macron below and circumflex
  64 |     'Ä̱' => 'A', // A with macron below and diaeresis
  65 |     'À̱' => 'A', // A with macron below and grave
  66 |     'Ā̱' => 'A', // A with macron below and macron
  67 |     'Å̱' => 'A', // A with macron below and ring above
  68 |     'Ą̄́' => 'A', // A with macron, acute and ogonek
  69 |     'Ą̄̌' => 'A', // A with macron, caron and ogonek
  70 |     'Ą̄̂' => 'A', // A with macron, circumflex and ogonek
  71 |     'Ą̄̀' => 'A', // A with macron, grave and ogonek
  72 |     'Ą' => '\\k{A}', // A with ogonek
  73 |     'A᷎' => 'A', // A with ogonek above
  74 |     'ᶏ' => 'A', // A with retroflex hook
  75 |     'Å' => '\\r{A}', // A with ring above
  76 |     'Ǻ' => 'A', // A with ring above and acute
  77 |     'Å̂' => 'A', // A with ring above and circumflex
  78 |     'Ą̊' => 'A', // A with ring above and ogonek
  79 |     'Ḁ' => 'A', // A with ring below
  80 |     'Ⱥ' => 'A', // A with stroke
  81 |     'Ã' => '\\~{A}', // A with tilde
  82 |     'Ã́' => 'A', // A with tilde and acute
  83 |     'Ạ̃' => 'A', // A with tilde and dot below
  84 |     'Ã̀' => 'A', // A with tilde and grave
  85 |     'Ą̃' => 'A', // A with tilde and ogonek
  86 |     'A̰' => 'A', // A with tilde below
  87 |     'A̍' => 'A', // A with vertical line
  88 |     // 'Ɖ', African D, D with tail
  89 |     // 'Ɑ́', Alpha with acute
  90 |     // 'Ɑ̌', Alpha with caron
  91 |     // 'Ɑ̂', Alpha with circumflex
  92 |     // 'Ɑ̀', Alpha with grave
  93 |     // 'ᶐ', Alpha with retroflex hook
  94 |     'B́' => '\\\'{B}', // B with acute
  95 |     'B̓' => 'B', // B with comma above
  96 |     'B̤' => 'B', // B with diaeresis below
  97 |     'Ḃ' => '\\.{B}', // B with dot above
  98 |     'Ḅ' => '\\d{B}', // B with dot below
  99 |     'Ɓ' => 'B', // B with hook
 100 |     'Ḇ' => 'B', // B with line below
 101 |     'ᵬ' => 'B', // B with middle tilde
 102 |     'ᶀ' => 'B', // B with palatal hook
 103 |     'Ƀ' => 'B', // B with stroke
 104 |     'B̃' => '\\~{B}', // B with tilde
 105 |     'Ƃ' => 'B', // B with topbar
 106 |     'Ć' => '\\\'{C}', // C with acute
 107 |     'Ꞓ' => 'C', // C with bar
 108 |     'C̆' => '\\u{C}', // C with breve
 109 |     'Č' => '\\v{C}', // C with caron
 110 |     'Č̓' => 'C', // C with caron and comma above
 111 |     'Ç' => '\\c{C}', // C with cedilla
 112 |     'Ḉ' => 'C', // C with cedilla and acute
 113 |     'Ç̆' => 'C', // C with cedilla and breve
 114 |     'Ç̌' => 'C', // C with cedilla and caron
 115 |     'Ç̇' => 'C', // C with cedilla and dot above
 116 |     'Ĉ' => '\\^{C}', // C with circumflex
 117 |     'C̓' => 'C', // C with comma above
 118 |     'ɕ' => 'C', // C with curl
 119 |     'C̈' => '\\"{C}', // C with diaeresis
 120 |     'Ċ' => '\\.{C}', // C with dot above
 121 |     'C̣' => '\\d{C}', // C with dot below
 122 |     'C̀' => '\\`{C}', // C with grave
 123 |     'Ƈ' => 'C', // C with hook
 124 |     'C̄' => '\\={C}', // C with macron
 125 |     'Ȼ' => 'C', // C with stroke
 126 |     'C̃' => '\\~{C}', // C with tilde
 127 |     // 'Ꜯ', Cuatrillo with comma
 128 |     'D́' => '\\\'{D}', // D with acute
 129 |     'Ď' => '\\v{D}', // D with caron
 130 |     'Ḑ' => '\\c{D}', // D with cedilla
 131 |     'D̂' => '\\^{D}', // D with circumflex
 132 |     'Ḓ' => 'D', // D with circumflex below
 133 |     'Ḓ' => 'D', // D with circumflex below
 134 |     'D̓' => 'D', // D with comma above
 135 |     'D̦' => 'D', // D with comma below
 136 |     'ȡ' => 'D', // D with curl
 137 |     'D̤' => 'D', // D with diaeresis below
 138 |     'Ḋ' => '\\.{D}', // D with dot above
 139 |     'Ḍ' => '\\d{D}', // D with dot below
 140 |     'Ɗ' => 'D', // D with hook
 141 |     'ᶑ' => 'D', // D with hook and tail
 142 |     'Ḏ' => 'D', // D with line below
 143 |     'ᵭ' => 'D', // D with middle tilde
 144 |     'ᶁ' => 'D', // D with palatal hook
 145 |     'Đ' => 'D', // D with stroke
 146 |     'Ƌ' => 'D', // D with topbar
 147 |     // 'ȷ', Dotless J
 148 |     // 'ɟ', Dotless J with stroke
 149 |     // 'ʄ', Dotless J with stroke and hook
 150 |     'É' => '\\\'{E}', // E with acute
 151 |     'Ȩ́' => 'E', // E with acute and cedilla
 152 |     'Ẹ́' => 'E', // E with acute and dot below
 153 |     'É̱' => 'E', // E with acute and macron below
 154 |     'Ę́' => 'E', // E with acute and ogonek
 155 |     'Ĕ' => '\\u{E}', // E with breve
 156 |     'Ḝ' => 'E', // E with breve and cedilla
 157 |     'Ḝ' => 'E', // E with breve and cedilla
 158 |     'Ě' => '\\v{E}', // E with caron
 159 |     'Ȩ̌' => 'E', // E with caron and cedilla
 160 |     'Ę̌' => 'E', // E with caron and ogonek
 161 |     'Ȩ' => '\\c{E}', // E with cedilla
 162 |     'Ȩ' => '\\c{E}', // E with cedilla
 163 |     'Ê' => '\\^{E}', // E with circumflex
 164 |     'Ế' => 'E', // E with circumflex and acute
 165 |     'Ȩ̂' => 'E', // E with circumflex and cedilla
 166 |     'Ệ' => 'E', // E with circumflex and dot below
 167 |     'Ề' => 'E', // E with circumflex and grave
 168 |     'Ể' => 'E', // E with circumflex and hook above
 169 |     'Ê̱' => 'E', // E with circumflex and macron below
 170 |     'Ę̂' => 'E', // E with circumflex and ogonek
 171 |     'Ễ' => 'E', // E with circumflex and tilde
 172 |     'Ḙ' => 'E', // E with circumflex below
 173 |     'Ë' => '\\"{E}', // E with diaeresis
 174 |     'Ë́' => 'E', // E with diaeresis and acute
 175 |     'Ë̌' => 'E', // E with diaeresis and caron
 176 |     'Ë̂' => 'E', // E with diaeresis and circumflex
 177 |     'Ë̀' => 'E', // E with diaeresis and grave
 178 |     'Ë̱' => 'E', // E with diaeresis and macron below
 179 |     'Ę̈' => 'E', // E with diaeresis and ogonek
 180 |     'Ę̈̌' => 'E', // E with diaeresis, caron and ogonek
 181 |     'Ę̈̂' => 'E', // E with diaeresis, circumflex and ogonek
 182 |     'Ę̈̀' => 'E', // E with diaeresis, grave and ogonek
 183 |     'Ė' => '\\.{E}', // E with dot above
 184 |     'Ė́' => 'E', // E with dot above and acute
 185 |     'Ę̇' => 'E', // E with dot above and ogonek
 186 |     'Ę̇́' => 'E', // E with dot above, acute and ogonek
 187 |     'Ė̃' => 'E', // E with dot and macron
 188 |     'Ẹ' => '\\d{E}', // E with dot below
 189 |     'E̋' => '\\H{E}', // E with double acute
 190 |     'Ę̋' => 'E', // E with double acute and ogonek
 191 |     'Ȅ' => 'E', // E with double grave
 192 |     'Ȅ' => 'E', // E with double grave
 193 |     'È' => '\\`{E}', // E with grave
 194 |     'Ȩ̀' => 'E', // E with grave and cedilla
 195 |     'Ẹ̀' => 'E', // E with grave and dot below
 196 |     'È̱' => 'E', // E with grave and macron below
 197 |     'Ę̀' => 'E', // E with grave and ogonek
 198 |     'Ẻ' => 'E', // E with hook above
 199 |     'Ȇ' => 'E', // E with inverted breve
 200 |     'Ē' => '\\={E}', // E with macron
 201 |     'Ḗ' => 'E', // E with macron and acute
 202 |     'Ē̆' => 'E', // E with macron and breve
 203 |     'Ē̌' => 'E', // E with macron and caron
 204 |     'Ē̂' => 'E', // E with macron and circumflex
 205 |     'Ẹ̄' => 'E', // E with macron and dot below
 206 |     'Ḕ' => 'E', // E with macron and grave
 207 |     'Ē̱' => 'E', // E with macron and macron below
 208 |     'Ę̄' => 'E', // E with macron and ogonek
 209 |     'E̱' => 'E', // E with macron below
 210 |     'Ę̄́' => 'E', // E with macron, acute and ogonek
 211 |     'Ę̄̌' => 'E', // E with macron, caron and ogonek
 212 |     'Ę̄̂' => 'E', // E with macron, circumflex and ogonek
 213 |     'Ę̄̀' => 'E', // E with macron, grave and ogonek
 214 |     'ⱸ' => 'E', // E with notch
 215 |     'Ę' => '\\k{E}', // E with ogonek
 216 |     'E᷎' => 'E', // E with ogonek above
 217 |     'Ę᷎' => 'E', // E with ogonek above and ogonek
 218 |     'Ę̣' => 'E', // E with ogonek and dot below
 219 |     'ᶒ' => 'E', // E with retroflex hook
 220 |     'E̊' => 'E', // E with ring
 221 |     'Ɇ' => 'E', // E with stroke
 222 |     'Ẽ' => '\\~{E}', // E with tilde
 223 |     'Ẽ́' => 'E', // E with tilde and acute
 224 |     'Ẽ̌' => 'E', // E with tilde and caron
 225 |     'Ẽ̂' => 'E', // E with tilde and circumflex
 226 |     'Ẽ̀' => 'E', // E with tilde and grave
 227 |     'Ę̃' => 'E', // E with tilde and ogonek
 228 |     'Ẽ̍' => 'E', // E with tilde and vertical line
 229 |     'Ḛ' => 'E', // E with tilde below
 230 |     'E̍' => 'E', // E with vertical line
 231 |     // 'ʆ', Esh with curl
 232 |     // 'ᶋ', Esh with palatal hook
 233 |     // 'ᶘ', Esh with retroflex hook
 234 |     // 'Ǯ', Ezh with caron
 235 |     // 'ʓ', Ezh with curl
 236 |     // 'ᶚ', Ezh with retroflex hook
 237 |     // 'ƺ', Ezh with tail
 238 |     'F́' => '\\\'{F}', // F with acute
 239 |     'F̧' => '\\c{F}', // F with cedilla
 240 |     'Ḟ' => '\\.{F}', // F with dot above
 241 |     'F̣' => '\\d{F}', // F with dot below
 242 |     'F̀' => '\\`{F}', // F with grave
 243 |     'Ƒ' => 'F', // F with hook (Script F)
 244 |     'F̄' => '\\={F}', // F with macron
 245 |     'ᵮ' => 'F', // F with middle tilde
 246 |     'ᶂ' => 'F', // F with palatal hook
 247 |     'Ǵ' => '\\\'{G}', // G with acute
 248 |     'Ğ' => '\\u{G}', // G with breve
 249 |     'Ǧ' => '\\v{G}', // G with caron
 250 |     'Ģ' => '\\c{G}', // G with cedilla
 251 |     'Ĝ' => '\\^{G}', // G with circumflex
 252 |     'G̈' => '\\"{G}', // G with diaeresis
 253 |     'G̤' => 'G', // G with diaeresis below
 254 |     'Ġ' => '\\.{G}', // G with dot above
 255 |     'G̣' => '\\d{G}', // G with dot below
 256 |     'G̀' => '\\`{G}', // G with grave
 257 |     'Ɠ' => 'G', // G with hook
 258 |     'Ḡ' => '\\={G}', // G with macron
 259 |     'Ꞡ' => 'G', // G with oblique stroke
 260 |     'ᶃ' => 'G', // G with palatal hook
 261 |     'Ǥ' => 'G', // G with stroke
 262 |     'G̃' => '\\~{G}', // G with tilde
 263 |     // 'Ɣ̓', Gamma with comma above
 264 |     // 'ʡ', Glottal stop with stroke
 265 |     'H́' => '\\\'{H}', // H with acute
 266 |     'Ḫ' => 'H', // H with breve below
 267 |     'Ȟ' => '\\v{H}', // H with caron
 268 |     'Ḩ' => '\\c{H}', // H with cedilla
 269 |     'H̐' => 'H', // H with chandrabindu
 270 |     'Ĥ' => '\\^{H}', // H with circumflex
 271 |     'H̓' => 'H', // H with comma above
 272 |     'Ⱨ' => 'H', // H with descender
 273 |     'Ḧ' => '\\"{H}', // H with diaeresis
 274 |     'H̤' => 'H', // H with diaeresis below
 275 |     'Ḣ' => '\\.{H}', // H with dot above
 276 |     'Ḣ' => '\\.{H}', // H with dot above
 277 |     'Ḥ' => '\\d{H}', // H with dot below
 278 |     'Ɦ' => 'H', // H with hook
 279 |     'H̱' => 'H', // H with line below
 280 |     'H̄' => '\\={H}', // H with macron
 281 |     'Ħ' => 'H', // H with stroke
 282 |     // 'Ꜧ', Heng
 283 |     // 'ɧ', Heng with hook
 284 |     'I' => 'I', // I (lowercase, i.e. ı) without dot above
 285 |     'İ' => 'I', // I (uppercase) with dot above
 286 |     'Í' => '\\\'{I}', // I with acute
 287 |     'Ĭ' => '\\u{I}', // I with breve
 288 |     'Ǐ' => '\\v{I}', // I with caron
 289 |     'I̧' => '\\c{I}', // I with cedilla
 290 |     'Í̧' => 'I', // I with cedilla and acute
 291 |     'Î̧' => 'I', // I with cedilla and circumflex
 292 |     'Ì̧' => 'I', // I with cedilla and grave
 293 |     'I̐' => 'I', // I with chandrabindu
 294 |     'Î' => '\\^{I}', // I with circumflex
 295 |     'Î́' => 'I', // I with circumflex and acute
 296 |     'I̓' => 'I', // I with comma above
 297 |     'Ï' => '\\"{I}', // I with diaeresis
 298 |     'Ḯ' => 'I', // I with diaeresis and acute
 299 |     'Ị' => '\\d{I}', // I with dot below
 300 |     'Ị́' => 'I', // I with dot below and acute
 301 |     'Ị̂' => 'I', // I with dot below and circumflex
 302 |     'Ị̃' => 'I', // I with dot below and tilde
 303 |     'I̋' => '\\H{I}', // I with double acute
 304 |     'Ȉ' => 'I', // I with double grave
 305 |     'Ȉ' => 'I', // I with double grave
 306 |     'Ì' => '\\`{I}', // I with grave
 307 |     'Ỉ' => 'I', // I with hook above
 308 |     'Ȋ' => 'I', // I with inverted breve
 309 |     'I̱' => 'I', // I with line below
 310 |     'Í̱' => 'I', // I with line below and acute
 311 |     'Î̱' => 'I', // I with line below and circumflex
 312 |     'Ì̱' => 'I', // I with line below and grave
 313 |     'Ī̱' => 'I', // I with line below and macron
 314 |     'Ī' => '\\={I}', // I with macron
 315 |     'Ī́' => 'I', // I with macron and acute
 316 |     'Ī̌' => 'I', // I with macron and caron
 317 |     'Ī̂' => 'I', // I with macron and circumflex
 318 |     'Ī̀' => 'I', // I with macron and grave
 319 |     'Į' => '\\k{I}', // I with ogonek
 320 |     'Į́' => 'I', // I with ogonek and acute
 321 |     'Į̌' => 'I', // I with ogonek and caron
 322 |     'Į̂' => 'I', // I with ogonek and circumflex
 323 |     'Į̀' => 'I', // I with ogonek and grave
 324 |     'Į̃' => 'I', // I with ogonek and tilde
 325 |     'ᶖ' => 'I', // I with retroflex hook
 326 |     'Ɨ' => 'I', // I with stroke
 327 |     'Ɨ́' => 'I', // I with stroke and acute
 328 |     'Ɨ̌' => 'I', // I with stroke and caron
 329 |     'Ɨ̧' => 'I', // I with stroke and cedilla
 330 |     'Ɨ̂' => 'I', // I with stroke and circumflex
 331 |     'Ɨ̀' => 'I', // I with stroke and grave
 332 |     'Ɨ̄' => 'I', // I with stroke and macron
 333 |     'Ɨ̃' => 'I', // I with stroke and tilde
 334 |     'Ɨ̧̌' => 'I', // I with stroke, cedilla and caron
 335 |     'Ɨ̧̀' => 'I', // I with stroke, cedilla and grave
 336 |     'Ɨ̧̂' => 'I', // I with stroke, cedilla, and circumflex
 337 |     'Ĩ' => '\\~{I}', // I with tilde
 338 |     'Ĩ́' => 'I', // I with tilde and acute
 339 |     'Ĩ̌' => 'I', // I with tilde and caron
 340 |     'Ĩ̂' => 'I', // I with tilde and circumflex
 341 |     'Ĩ̀' => 'I', // I with tilde and grave
 342 |     'Ĩ̍' => 'I', // I with tilde and vertical line
 343 |     'Ḭ' => 'I', // I with tilde below
 344 |     'I̍' => 'I', // I with vertical line
 345 |     // 'Ꝼ́', Insular F with acute
 346 |     // 'Ꝼ̇', Insular F with dot above
 347 |     // 'Ꝼ̣', Insular F with dot below
 348 |     // 'ƾ', Inverted glottal stop with stroke
 349 |     // 'Ɩ́', Iota with acute
 350 |     // 'Ɩ̀', Iota with grave
 351 |     // 'ᵼ', Iota with stroke
 352 |     // 'Ɩ̃', Iota with tilde
 353 |     'J́' => '\\\'{J}', // J with acute
 354 |     'J̌' => '\\v{J}', // J with caron
 355 |     'Ĵ' => '\\^{J}', // J with circumflex
 356 |     'ʝ' => 'J', // J with crossed-tail
 357 |     'J̣' => '\\d{J}', // J with dot below
 358 |     'J̄' => '\\={J}', // J with macron
 359 |     'Ɉ' => 'J', // J with stroke
 360 |     'J̃' => '\\~{J}', // J with tilde
 361 |     'Ḱ' => '\\\'{K}', // K with acute
 362 |     'Ǩ' => '\\v{K}', // K with caron
 363 |     'Ķ' => '\\c{K}', // K with cedilla
 364 |     'Ⱪ' => 'K', // K with descender
 365 |     'Ꝃ' => 'K', // K with diagonal stroke
 366 |     'K̇' => '\\.{K}', // K with dot above
 367 |     'Ḳ' => '\\d{K}', // K with dot below
 368 |     'K̀' => '\\`{K}', // K with grave
 369 |     'Ƙ' => 'K', // K with hook
 370 |     'Ḵ' => 'K', // K with line below
 371 |     'K̄' => '\\={K}', // K with macron
 372 |     'Ꞣ' => 'K', // K with oblique stroke
 373 |     'ᶄ' => 'K', // K with palatal hook
 374 |     'Ꝁ' => 'K', // K with stroke
 375 |     'Ꝅ' => 'K', // K with stroke and diagonal stroke
 376 |     'Ĺ' => '\\\'{L}', // L with acute
 377 |     'Ḷ́' => 'L', // L with acute and dot below
 378 |     'Ƚ' => 'L', // L with bar
 379 |     'ɬ' => 'L', // L with belt
 380 |     'Ľ' => '\\v{L}', // L with caron
 381 |     'Ļ' => '\\c{L}', // L with cedilla
 382 |     'L̐' => 'L', // L with chandrabindu
 383 |     'L̂' => '\\^{L}', // L with circumflex
 384 |     'Ḽ' => 'L', // L with circumflex below
 385 |     'L̓' => 'L', // L with comma above
 386 |     'Ḷ̓' => 'L', // L with comma above and dot below
 387 |     'L̦' => 'L', // L with comma below
 388 |     'ȴ' => 'L', // L with curl
 389 |     'Ḷ' => '\\d{L}', // L with dot below
 390 |     'Ḹ' => 'L', // L with dot below and macron
 391 |     'Ⱡ' => 'L', // L with double bar
 392 |     'Ꝉ' => 'L', // L with high stroke
 393 |     'Ḻ' => 'L', // L with line below
 394 |     'Ɫ' => 'L', // L with middle tilde
 395 |     'ᶅ' => 'L', // L with palatal hook
 396 |     'ɭ' => 'L', // L with retroflex hook
 397 |     'ꞎ' => 'L', // L with retroflex hook and belt
 398 |     'Ł' => 'L', // L with stroke
 399 |     'L̃' => '\\~{L}', // L with tilde
 400 |     // 'ƛ', Lambda with stroke
 401 |     // 'ƛ̓', Lambda with stroke and comma above
 402 |     // 'ẜ', Long S with diagonal stroke
 403 |     // 'ẝ', Long S with high stroke
 404 |     // 'ẛ', Long s with dot above
 405 |     'Ḿ' => '\\\'{M}', // M with acute
 406 |     'Ṃ́' => 'M', // M with acute and dot below
 407 |     'M̧' => '\\c{M}', // M with cedilla
 408 |     'M̐' => 'M', // M with chandrabindu
 409 |     'M̓' => 'M', // M with comma above
 410 |     'Ṃ̓' => 'M', // M with comma above and dot below
 411 |     'M̦' => 'M', // M with comma below
 412 |     'M̈' => '\\"{M}', // M with diaeresis
 413 |     'Ṁ' => '\\.{M}', // M with dot above
 414 |     'Ṃ' => '\\d{M}', // M with dot below
 415 |     'Ṃ' => '\\d{M}', // M with dot below
 416 |     'M̀' => '\\`{M}', // M with grave
 417 |     'Ɱ' => 'M', // M with hook
 418 |     'M̄' => '\\={M}', // M with macron
 419 |     'ᵯ' => 'M', // M with middle tilde
 420 |     'M̨' => '\\k{M}', // M with ogonek
 421 |     'ᶆ' => 'M', // M with palatal hook
 422 |     'M̃' => '\\~{M}', // M with tilde
 423 |     'M̍' => 'M', // M with vertical line
 424 |     'Ń' => '\\\'{N}', // N with acute
 425 |     'Ṇ́' => 'N', // N with acute and dot below
 426 |     'Ň' => '\\v{N}', // N with caron
 427 |     'Ņ' => '\\c{N}', // N with cedilla
 428 |     'N̐' => 'N', // N with chandrabindu
 429 |     'N̂' => '\\^{N}', // N with circumflex
 430 |     'Ṋ' => 'N', // N with circumflex below
 431 |     'Ṇ̓' => 'N', // N with comma above and dot below
 432 |     'N̦' => 'N', // N with comma below
 433 |     'ȵ' => 'N', // N with curl
 434 |     'Ꞑ' => 'N', // N with descender
 435 |     'N̈' => 'N', // N with diaresis
 436 |     'Ṅ' => '\\.{N}', // N with dot above
 437 |     'Ṇ' => '\\d{N}', // N with dot below
 438 |     'Ǹ' => '\\`{N}', // N with grave
 439 |     'Ɲ' => 'N', // N with left hook
 440 |     'Ṉ' => 'N', // N with line below
 441 |     'Ƞ' => 'N', // N with long right leg
 442 |     'N̄' => '\\={N}', // N with macron
 443 |     'ᵰ' => 'N', // N with middle tilde
 444 |     'Ꞥ' => 'N', // N with oblique stroke
 445 |     'ᶇ' => 'N', // N with palatal hook
 446 |     'ɳ' => 'N', // N with retroflex hook
 447 |     'Ñ̈' => 'N', // N with tidle and diaeresis
 448 |     'Ñ' => '\\~{N}', // N with tilde
 449 |     'N̰' => 'N', // N with tilde below
 450 |     'N̲' => '\\b{N}', // N with underline
 451 |     'N̍' => 'N', // N with vertical line
 452 |     'Ó' => '\\\'{O}', // O with acute
 453 |     'Ó̧' => 'O', // O with acute and cedilla
 454 |     'Ọ́' => 'O', // O with acute and dot below
 455 |     'Ó̱' => 'O', // O with acute and line below
 456 |     'Ǫ́' => 'O', // O with acute and ogonek
 457 |     'Ɵ' => 'O', // O with bar
 458 |     'Ŏ' => '\\u{O}', // O with breve
 459 |     'Ǒ' => '\\v{O}', // O with caron
 460 |     'Ǒ̧' => 'O', // O with caron and cedilla
 461 |     'Ǫ̌' => 'O', // O with caron and ogonek
 462 |     'O̧' => '\\c{O}', // O with cedilla
 463 |     'O̐' => 'O', // O with chandrabindu
 464 |     'Ô' => '\\^{O}', // O with circumflex
 465 |     'Ố' => 'O', // O with circumflex and acute
 466 |     'Ô̧' => 'O', // O with circumflex and cedilla
 467 |     'Ộ' => 'O', // O with circumflex and dot below
 468 |     'Ộ' => 'O', // O with circumflex and dot below
 469 |     'Ồ' => 'O', // O with circumflex and grave
 470 |     'Ổ' => 'O', // O with circumflex and hook above
 471 |     'Ô̱' => 'O', // O with circumflex and line below
 472 |     'Ǫ̂' => 'O', // O with circumflex and ogonek
 473 |     'Ỗ' => 'O', // O with circumflex and tilde
 474 |     'O̭' => 'O', // O with circumflex below
 475 |     'Ö' => '\\"{O}', // O with diaeresis
 476 |     'Ö́' => 'O', // O with diaeresis and acute
 477 |     'Ö̀' => 'O', // O with diaeresis and grave
 478 |     'Ö̱' => 'O', // O with diaeresis and line below
 479 |     'Ȫ' => 'O', // O with diaeresis and macron
 480 |     'Ȯ' => '\\.{O}', // O with dot above
 481 |     'Ȱ' => 'O', // O with dot above and macron
 482 |     'O̍͘' => 'O', // O with dot above and vertical line
 483 |     'O͘' => 'O', // O with dot above right
 484 |     'Ó͘' => 'O', // O with dot above right and acute
 485 |     'Ò͘' => 'O', // O with dot above right and grave
 486 |     'Ō͘' => 'O', // O with dot above right and macron
 487 |     'Ọ' => '\\d{O}', // O with dot below
 488 |     'Ő' => '\\H{O}', // O with double acute
 489 |     'Ő' => '\\H{O}', // O with double acute
 490 |     'Ȍ' => 'O', // O with double grave
 491 |     'Ò' => '\\`{O}', // O with grave
 492 |     'Ò̧' => 'O', // O with grave and cedilla
 493 |     'Ọ̀' => 'O', // O with grave and dot below
 494 |     'Ò̱' => 'O', // O with grave and line below
 495 |     'Ǫ̀' => 'O', // O with grave and ogonek
 496 |     'Ỏ' => 'O', // O with hook above
 497 |     'Ơ' => 'O', // O with horn
 498 |     'Ớ' => 'O', // O with horn and acute
 499 |     'Ợ' => 'O', // O with horn and dot below
 500 |     'Ờ' => 'O', // O with horn and grave
 501 |     'Ở' => 'O', // O with horn and hook above
 502 |     'Ỡ' => 'O', // O with horn and tilde
 503 |     'Ȏ' => 'O', // O with inverted breve
 504 |     'O̱' => 'O', // O with line below
 505 |     'Ꝋ' => 'O', // O with long stroke overlay
 506 |     'Ꝋ' => 'O', // O with long stroke overlay
 507 |     'Ꝍ' => 'O', // O with loop
 508 |     'ⱺ' => 'O', // O with low ring inside
 509 |     'Ō' => '\\={O}', // O with macron
 510 |     'Ṓ' => 'O', // O with macron and acute
 511 |     'Ō̌' => 'O', // O with macron and caron
 512 |     'Ō̂' => 'O', // O with macron and circumflex
 513 |     'Ọ̄' => 'O', // O with macron and dot below
 514 |     'Ṑ' => 'O', // O with macron and grave
 515 |     'Ō̱' => 'O', // O with macron and line below
 516 |     'Ǭ' => 'O', // O with macron and ogonek
 517 |     'Ǭ̀' => 'O', // O with macron, grave and ogonek
 518 |     'Ǫ' => '\\k{O}', // O with ogonek
 519 |     'Ø' => 'O', // O with stroke
 520 |     'Ǿ' => 'O', // O with stroke and acute
 521 |     'Ø̌' => 'O', // O with stroke and caron
 522 |     'Ø̂' => 'O', // O with stroke and circumflex
 523 |     'Ø̀' => 'O', // O with stroke and grave
 524 |     'Ø̄' => 'O', // O with stroken and macron
 525 |     'Õ' => '\\~{O}', // O with tilde
 526 |     'Ṍ' => 'O', // O with tilde and acute
 527 |     'Ṏ' => 'O', // O with tilde and diaeresis
 528 |     'Ȭ' => 'O', // O with tilde and macron
 529 |     'O̲' => '\\b{O}', // O with underline
 530 |     'O̍' => 'O', // O with vertical line
 531 |     // 'ᶓ', Open E with retroflex hook
 532 |     // 'Ɔ́', Open O with acute
 533 |     // 'Ɔ̧́', Open O with acute and cedilla
 534 |     // 'Ɔ̌', Open O with caron
 535 |     // 'Ɔ̧̌', Open O with caron and cedilla
 536 |     // 'Ɔ̧', Open O with cedilla
 537 |     // 'Ɔ̂', Open O with circumflex
 538 |     // 'Ɔ̧̂', Open O with circumflex and cedilla
 539 |     // 'Ɔ̈', Open O with diaeresis
 540 |     // 'Ɔ̀', Open O with grave
 541 |     // 'Ɔ̧̀', Open O with grave and cedilla
 542 |     // 'Ɔ̄', Open O with macron
 543 |     // 'ᶗ', Open O with retroflex hook
 544 |     // 'Ɔ̃', Open O with tilde
 545 |     // 'Ɔ̃́', Open O with tilde and acute
 546 |     // 'Ɔ̃̌', Open O with tilde and caron
 547 |     // 'Ɔ̃̂', Open O with tilde and circumflex
 548 |     // 'Ɔ̃̀', Open O with tilde and grave
 549 |     // 'Ɔ̃̍', Open O with tilde and verticale line
 550 |     // 'Ɔ̱', Open O with tilde below
 551 |     // 'Ɔ̍', Open O with vertical line
 552 |     'Ṕ' => '\\\'{P}', // P with acute
 553 |     'P̓' => 'P', // P with comma above
 554 |     'P̈' => '\\"{P}', // P with diaeresis
 555 |     'P̤' => 'P', // P with diaeresis below
 556 |     'Ṗ' => '\\.{P}', // P with dot above
 557 |     'P̣' => 'P', // P with dot above below
 558 |     'Ꝓ' => 'P', // P with flourish
 559 |     'P̀' => '\\`{P}', // P with grave
 560 |     'Ƥ' => 'P', // P with hook
 561 |     'P̄' => '\\={P}', // P with macron
 562 |     'ᵱ' => 'P', // P with middle tilde
 563 |     'ᶈ' => 'P', // P with palatal hook
 564 |     'Ꝕ' => 'P', // P with squirrel tail
 565 |     'Ᵽ' => 'P', // P with stroke
 566 |     'Ꝑ' => 'P', // P with stroke through descender
 567 |     'P̄' => '\\~{P}', // P with tilde
 568 |     'Q̓' => 'Q', // Q with comma above
 569 |     'Ꝙ' => 'Q', // Q with diagonal stroke
 570 |     'Q̇' => '\\.{Q}', // Q with dot above
 571 |     'ʠ' => 'Q', // Q with hook
 572 |     'Ɋ' => 'Q', // Q with hook tail
 573 |     'Ꝗ' => 'Q', // Q with stroke through descender
 574 |     'Ŕ' => '\\\'{R}', // R with acute
 575 |     'Ř' => '\\v{R}', // R with caron
 576 |     'Ŗ' => '\\c{R}', // R with cedilla
 577 |     'R̂' => '\\^{R}', // R with circumflex
 578 |     'R̓' => 'R', // R with comma above
 579 |     'R̦' => 'R', // R with comma below
 580 |     'R̰' => 'R', // R with diaeresis below
 581 |     'Ṙ' => '\\.{R}', // R with dot above
 582 |     'Ṛ' => '\\d{R}', // R with dot below
 583 |     'Ṝ' => 'R', // R with dot below and macron
 584 |     'Ȑ' => 'R', // R with double grave
 585 |     'ɾ' => 'R', // R with fishhook
 586 |     'ᵳ' => 'R', // R with fishhook and middle tilde
 587 |     'Ȓ' => 'R', // R with inverted breve
 588 |     'Ṟ' => 'R', // R with line below
 589 |     'ɼ' => 'R', // R with long leg
 590 |     'R̄' => '\\={R}', // R with macron
 591 |     'ᵲ' => 'R', // R with middle tilde
 592 |     'Ꞧ' => 'R', // R with oblique stroke
 593 |     'ᶉ' => 'R', // R with palatal hook
 594 |     'R̥' => 'R', // R with ring below
 595 |     'R̥̄' => 'R', // R with ring below and macron
 596 |     'Ɍ' => 'R', // R with stroke
 597 |     'Ɽ' => 'R', // R with tail
 598 |     'R̃' => '\\~{R}', // R with tilde
 599 |     // 'Ꜿ', Reversed C with dot
 600 |     // 'ɿ', Reversed R with fishhook
 601 |     // 'ʢ', Reversed glottal stop with stroke
 602 |     // 'ɝ', Reversed open E with hook (Reversed Epsilon hook)
 603 |     // 'ᶔ', Reversed open E with retroflex hook
 604 |     'Ś' => '\\\'{S}', // S with acute
 605 |     'Ṥ' => 'S', // S with acute and dot above
 606 |     'Š' => '\\v{S}', // S with caron
 607 |     'Ṧ' => 'S', // S with caron and dot above
 608 |     'Ş' => '\\c{S}', // S with cedilla
 609 |     'Ŝ' => '\\^{S}', // S with circumflex
 610 |     'Ș' => 'S', // S with comma below
 611 |     'Ṡ' => '\\.{S}', // S with dot above
 612 |     'Ṣ' => '\\d{S}', // S with dot below
 613 |     'Ṩ' => 'S', // S with dot below and dot above
 614 |     'ʂ' => 'S', // S with hook
 615 |     'ẞ' => 'S', // S with middle tilde
 616 |     'Ꞩ' => 'S', // S with oblique stroke
 617 |     'ᶊ' => 'S', // S with palatal hook
 618 |     'Ȿ' => 'S', // S with swash tail
 619 |     // 'ɚ', Schwa with hook
 620 |     // 'ᶕ', Schwa with retroflex hook
 621 |     // 'ᴓ', Sideways O with stroke
 622 |     // 'ʛ', Small capital G with hook
 623 |     // 'ᵾ', Small capital U with stroke
 624 |     'Ť' => '\\v{T}', // T with caron
 625 |     'Ţ' => '\\c{T}', // T with cedilla
 626 |     'Ṱ' => 'T', // T with circumflex below
 627 |     'Ț' => 'T', // T with comma below
 628 |     'ȶ' => 'T', // T with curl
 629 |     'T̈' => '\\"{T}', // T with diaeresis
 630 |     'Ⱦ' => 'T', // T with diagonal stroke
 631 |     'Ṫ' => '\\.{T}', // T with dot above
 632 |     'Ṭ' => '\\d{T}', // T with dot below
 633 |     'Ƭ' => 'T', // T with hook
 634 |     'Ṯ' => 'T', // T with line below
 635 |     'ᵵ' => 'T', // T with middle tilde
 636 |     'ƫ' => 'T', // T with palatal hook
 637 |     'Ʈ' => 'T', // T with retroflex hook
 638 |     'Ŧ' => 'T', // T with stroke
 639 |     // 'Ꝥ', Thorn with stroke
 640 |     // 'Ꝧ', Thorn with stroke through descender
 641 |     // 'ʮ', Turned H with fishhook
 642 |     // 'ʯ', Turned H with fishhook and tail
 643 |     // 'ɰ', Turned M with long leg
 644 |     // 'ɻ', Turned R with hook
 645 |     // 'ɺ', Turned R with long leg
 646 |     // 'ⱹ', Turned R with tail
 647 |     // 'ƻ', Two with stroke
 648 |     'Ʉ' => 'U', // U bar
 649 |     'Ú' => '\\\'{U}', // U with acute
 650 |     'Ŭ' => '\\u{U}', // U with breve
 651 |     'Ǔ' => '\\v{U}', // U with caron
 652 |     'Û' => '\\^{U}', // U with circumflex
 653 |     'Ṷ' => 'U', // U with circumflex below
 654 |     'Ü' => '\\"{U}', // U with diaeresis
 655 |     'Ǘ' => 'U', // U with diaeresis and acute
 656 |     'Ǚ' => 'U', // U with diaeresis and caron
 657 |     'Ǜ' => 'U', // U with diaeresis and grave
 658 |     'Ǖ' => 'U', // U with diaeresis and macron
 659 |     'Ṳ' => 'U', // U with diaeresis below
 660 |     'Ụ' => '\\d{U}', // U with dot below
 661 |     'Ű' => '\\H{U}', // U with double acute
 662 |     'Ȕ' => 'U', // U with double grave
 663 |     'Ù' => '\\`{U}', // U with grave
 664 |     'Ủ' => 'U', // U with hook above
 665 |     'Ư' => 'U', // U with horn
 666 |     'Ứ' => 'U', // U with horn and acute
 667 |     'Ự' => 'U', // U with horn and dot below
 668 |     'Ừ' => 'U', // U with horn and grave
 669 |     'Ử' => 'U', // U with horn and hook above
 670 |     'Ữ' => 'U', // U with horn and tilde
 671 |     'Ȗ' => 'U', // U with inverted breve
 672 |     'Ū' => '\\={U}', // U with macron
 673 |     'Ṻ' => 'U', // U with macron and diaeresis
 674 |     'Ų' => '\\k{U}', // U with ogonek
 675 |     'ᶙ' => 'U', // U with retroflex hook
 676 |     'Ů' => '\\r{U}', // U with ring above
 677 |     'Ũ' => '\\~{U}', // U with tilde
 678 |     'Ṹ' => 'U', // U with tilde and acute
 679 |     'Ṵ' => 'U', // U with tilde below
 680 |     // 'ᵿ', Upsilon with stroke
 681 |     'ⱴ' => 'V', // V with curl
 682 |     'Ꝟ' => 'V', // V with diagonal stroke
 683 |     'Ṿ' => '\\d{V}', // V with dot below
 684 |     'Ʋ' => 'V', // V with hook (Script V)
 685 |     'ᶌ' => 'V', // V with palatal hook
 686 |     'ⱱ' => 'V', // V with right hook
 687 |     'Ṽ' => '\\~{V}', // V with tilde
 688 |     'Ẃ' => '\\\'{W}', // W with acute
 689 |     'Ŵ' => '\\^{W}', // W with circumflex
 690 |     'Ẅ' => '\\"{W}', // W with diaeresis
 691 |     'Ẇ' => '\\.{W}', // W with dot above
 692 |     'Ẉ' => '\\d{W}', // W with dot below
 693 |     'Ẁ' => '\\`{W}', // W with grave
 694 |     'Ⱳ' => 'W', // W with hook
 695 |     'W̊' => '\\r{W}', // W with ring above
 696 |     'Ẍ' => '\\"{X}', // X with diaeresis
 697 |     'Ẋ' => '\\.{X}', // X with dot above
 698 |     'ᶍ' => 'X', // X with palatal hook
 699 |     'Ý' => '\\\'{Y}', // Y with acute
 700 |     'Ŷ' => '\\^{Y}', // Y with circumflex
 701 |     'Ÿ' => '\\"{Y}', // Y with diaeresis
 702 |     'Ẏ' => '\\.{Y}', // Y with dot above
 703 |     'Ỵ' => '\\d{Y}', // Y with dot below
 704 |     'Ỳ' => '\\`{Y}', // Y with grave
 705 |     'Ƴ' => 'Y', // Y with hook
 706 |     'Ỷ' => 'Y', // Y with hook above
 707 |     'Ỿ' => 'Y', // Y with loop
 708 |     'Ȳ' => '\\={Y}', // Y with macron
 709 |     'Y̊' => '\\r{Y}', // Y with ring above
 710 |     'Ɏ' => 'Y', // Y with stroke
 711 |     'Ỹ' => '\\~{Y}', // Y with tilde
 712 |     'Ź' => '\\\'{Z}', // Z with acute
 713 |     'Ž' => '\\v{Z}', // Z with caron
 714 |     'Ẑ' => '\\^{Z}', // Z with circumflex
 715 |     'ʑ' => 'Z', // Z with curl
 716 |     'Ⱬ' => 'Z', // Z with descender
 717 |     'Ż' => '\\.{Z}', // Z with dot above
 718 |     'Ẓ' => '\\d{Z}', // Z with dot below
 719 |     'Ȥ' => 'Z', // Z with hook
 720 |     'Ẕ' => 'Z', // Z with line below
 721 |     'ᵶ' => 'Z', // Z with middle tilde
 722 |     'ᶎ' => 'Z', // Z with palatal hook
 723 |     'ʐ' => 'Z', // Z with retroflex hook
 724 |     'Ƶ' => 'Z', // Z with stroke
 725 |     'Ɀ' => 'Z', // Z with swash tail
 726 |     'á' => '\\\'{a}', // a with acute
 727 |     'á̧' => 'a', // a with acute and cedilla
 728 |     'ạ́' => 'a', // a with acute and dot below
 729 |     'ą́' => 'a', // a with acute and ogonek
 730 |     'ą̌' => 'a', // a with acute and ogonek
 731 |     'ă' => '\\u{a}', // a with breve
 732 |     'ắ' => 'a', // a with breve and acute
 733 |     'ặ' => 'a', // a with breve and dot below
 734 |     'ằ' => 'a', // a with breve and grave
 735 |     'ẳ' => 'a', // a with breve and hook above
 736 |     'ẵ' => 'a', // a with breve and tilde
 737 |     'ǎ' => '\\v{a}', // a with caron
 738 |     'ǎ̧' => 'a', // a with caron and cedilla
 739 |     'a̧' => '\\c{a}', // a with cedilla
 740 |     'a̐' => 'a', // a with chandrabindu
 741 |     'â' => '\\^{a}', // a with circumflex
 742 |     'ấ' => 'a', // a with circumflex and acute
 743 |     'â̧' => 'a', // a with circumflex and cedilla
 744 |     'ậ' => 'a', // a with circumflex and dot below
 745 |     'ầ' => 'a', // a with circumflex and grave
 746 |     'ẩ' => 'a', // a with circumflex and hook above
 747 |     'ą̂' => 'a', // a with circumflex and ogonek
 748 |     'ẫ' => 'a', // a with circumflex and tilde
 749 |     'a̭' => 'a', // a with circumflex below
 750 |     'a̓' => 'a', // a with comma above
 751 |     'ä' => '\\"{a}', // a with diaeresis
 752 |     'ä́' => 'a', // a with diaeresis and acute
 753 |     'ä̌' => 'a', // a with diaeresis and caron
 754 |     'ä̂' => 'a', // a with diaeresis and circumflex
 755 |     'ạ̈' => 'a', // a with diaeresis and dot below
 756 |     'ä̀' => 'a', // a with diaeresis and grave
 757 |     'ǟ' => 'a', // a with diaeresis and macron
 758 |     'ą̈' => 'a', // a with diaeresis and ogonek
 759 |     'ą̈̌' => 'a', // a with diaeresis, caron and ogonek
 760 |     'ą̈̂' => 'a', // a with diaeresis, circumflex and ogonek
 761 |     'ą̈̀' => 'a', // a with diaeresis, grave and ogonek
 762 |     'ȧ' => '\\.{a}', // a with dot above
 763 |     'ȧ́' => 'a', // a with dot above and acute
 764 |     'ǡ' => 'a', // a with dot above and macron
 765 |     'ạ' => '\\d{a}', // a with dot below
 766 |     'a̋' => '\\H{a}', // a with double acute
 767 |     'ȁ' => 'a', // a with double grave
 768 |     'à' => '\\`{a}', // a with grave
 769 |     'à̧' => 'a', // a with grave and cedilla
 770 |     'ạ̀' => 'a', // a with grave and dot below
 771 |     'ą̀' => 'a', // a with grave and ogonek
 772 |     'ả' => 'a', // a with hook above
 773 |     'ȃ' => 'a', // a with inverted breve
 774 |     'ā' => '\\={a}', // a with macron
 775 |     'ā́' => 'a', // a with macron and acute
 776 |     'ā̆' => 'a', // a with macron and breve
 777 |     'ā̌' => 'a', // a with macron and caron
 778 |     'ā̂' => 'a', // a with macron and circumflex
 779 |     'ạ̄' => 'a', // a with macron and dot below
 780 |     'ā̀' => 'a', // a with macron and grave
 781 |     'ą̄' => 'a', // a with macron and ogonek
 782 |     'ā̊' => 'a', // a with macron and ring above
 783 |     'a̱' => 'a', // a with macron below
 784 |     'á̱' => 'a', // a with macron below and acute
 785 |     'â̱' => 'a', // a with macron below and circumflex
 786 |     'ä̱' => 'a', // a with macron below and diaeresis
 787 |     'à̱' => 'a', // a with macron below and grave
 788 |     'ā̱' => 'a', // a with macron below and macron
 789 |     'å̱' => 'a', // a with macron below and ring above
 790 |     'ą̄́' => 'a', // a with macron, acute and ogonek
 791 |     'ą̄̌' => 'a', // a with macron, caron and ogonek
 792 |     'ą̄̂' => 'a', // a with macron, circumflex and ogonek
 793 |     'ą̄̀' => 'a', // a with macron, grave and ogonek
 794 |     'ą' => '\\k{a}', // a with ogonek
 795 |     'a᷎' => 'a', // a with ogonek above
 796 |     'å' => '\\r{a}', // a with ring above
 797 |     'ǻ' => 'a', // a with ring above and acute
 798 |     'å̂' => 'a', // a with ring above and circumflex
 799 |     'ą̊' => 'a', // a with ring above and ogonek
 800 |     'ḁ' => 'a', // a with ring below
 801 |     'ⱥ' => 'a', // a with stroke
 802 |     'ã' => '\\~{a}', // a with tilde
 803 |     'ã́' => 'a', // a with tilde and acute
 804 |     'ạ̃' => 'a', // a with tilde and dot below
 805 |     'ã̀' => 'a', // a with tilde and grave
 806 |     'ą̃' => 'a', // a with tilde and ogonek
 807 |     'a̰' => 'a', // a with tilde below
 808 |     'a̍' => 'a', // a with vertical line
 809 |     // 'ɖ', african D, D with tail
 810 |     // 'ɑ́', alpha with acute
 811 |     // 'ɑ̌', alpha with caron
 812 |     // 'ɑ̂', alpha with circumflex
 813 |     // 'ɑ̀', alpha with grave
 814 |     'b́' => '\\\'{b}', // b with acute
 815 |     'b̓' => 'b', // b with comma above
 816 |     'b̤' => 'b', // b with diaeresis below
 817 |     'ḃ' => '\\.{b}', // b with dot above
 818 |     'ḅ' => '\\d{b}', // b with dot below
 819 |     'ɓ' => 'b', // b with hook
 820 |     'ḇ' => 'b', // b with line below
 821 |     'ƀ' => 'b', // b with stroke
 822 |     'b̃' => '\\~{b}', // b with tilde
 823 |     'ƃ' => 'b', // b with topbar
 824 |     'ć' => '\\\'{c}', // c with acute
 825 |     'ꞓ' => 'c', // c with bar
 826 |     'c̆' => '\\u{c}', // c with breve
 827 |     'č' => '\\v{c}', // c with caron
 828 |     'č̓' => 'c', // c with caron and comma above
 829 |     'ç' => '\\c{c}', // c with cedilla
 830 |     'ḉ' => 'c', // c with cedilla and acute
 831 |     'ç̆' => 'c', // c with cedilla and breve
 832 |     'ç̌' => 'c', // c with cedilla and caron
 833 |     'ç̇' => 'c', // c with cedilla and dot above
 834 |     'ĉ' => '\\^{c}', // c with circumflex
 835 |     'c̓' => 'c', // c with comma above
 836 |     'c̈' => '\\"{c}', // c with diaeresis
 837 |     'ċ' => '\\.{c}', // c with dot above
 838 |     'c̣' => '\\d{c}', // c with dot below
 839 |     'c̀' => '\\`{c}', // c with grave
 840 |     'ƈ' => 'c', // c with hook
 841 |     'c̄' => '\\={c}', // c with macron
 842 |     'ȼ' => 'c', // c with stroke
 843 |     'c̃' => '\\~{c}', // c with tilde
 844 |     // 'ꜯ', cuatrillo with comma
 845 |     'd́' => '\\\'{d}', // d with acute
 846 |     'ď' => '\\v{d}', // d with caron
 847 |     'ḑ' => '\\c{d}', // d with cedilla
 848 |     'd̂' => '\\^{d}', // d with circumflex
 849 |     'ḓ' => 'd', // d with circumflex below
 850 |     'ḓ' => 'd', // d with circumflex below
 851 |     'd̓' => 'd', // d with comma above
 852 |     'd̦' => 'd', // d with comma below
 853 |     'd̤' => 'd', // d with diaeresis below
 854 |     'ḋ' => '\\.{d}', // d with dot above
 855 |     'ḍ' => '\\d{d}', // d with dot below
 856 |     'ɗ' => 'd', // d with hook
 857 |     'ḏ' => 'd', // d with line below
 858 |     'đ' => 'd', // d with stroke
 859 |     'ƌ' => 'd', // d with topbar
 860 |     'é' => '\\\'{e}', // e with acute
 861 |     'ȩ́' => 'e', // e with acute and cedilla
 862 |     'ẹ́' => 'e', // e with acute and dot below
 863 |     'é̱' => 'e', // e with acute and macron below
 864 |     'ę́' => 'e', // e with acute and ogonek
 865 |     'ĕ' => '\\u{e}', // e with breve
 866 |     'ḝ' => 'e', // e with breve and cedilla
 867 |     'ḝ' => 'e', // e with breve and cedilla
 868 |     'ě' => '\\v{e}', // e with caron
 869 |     'ȩ̌' => 'e', // e with caron and cedilla
 870 |     'ę̌' => 'e', // e with caron and ogonek
 871 |     'ȩ' => '\\c{e}', // e with cedilla
 872 |     'ȩ' => '\\c{e}', // e with cedilla
 873 |     'ê' => '\\^{e}', // e with circumflex
 874 |     'ế' => 'e', // e with circumflex and acute
 875 |     'ȩ̂' => 'e', // e with circumflex and cedilla
 876 |     'ệ' => 'e', // e with circumflex and dot below
 877 |     'ề' => 'e', // e with circumflex and grave
 878 |     'ể' => 'e', // e with circumflex and hook above
 879 |     'ê̱' => 'e', // e with circumflex and macron below
 880 |     'ę̂' => 'e', // e with circumflex and ogonek
 881 |     'ễ' => 'e', // e with circumflex and tilde
 882 |     'ḙ' => 'e', // e with circumflex below
 883 |     'ë' => '\\"{e}', // e with diaeresis
 884 |     'ë́' => 'e', // e with diaeresis and acute
 885 |     'ë̌' => 'e', // e with diaeresis and caron
 886 |     'ë̂' => 'e', // e with diaeresis and circumflex
 887 |     'ë̀' => 'e', // e with diaeresis and grave
 888 |     'ë̱' => 'e', // e with diaeresis and macron below
 889 |     'ę̈' => 'e', // e with diaeresis and ogonek
 890 |     'ę̈̌' => 'e', // e with diaeresis, caron and ogonek
 891 |     'ę̈̂' => 'e', // e with diaeresis, circumflex and ogonek
 892 |     'ę̈̀' => 'e', // e with diaeresis, grave and ogonek
 893 |     'ė' => '\\.{e}', // e with dot above
 894 |     'ė́' => 'e', // e with dot above and acute
 895 |     'ę̇' => 'e', // e with dot above and ogonek
 896 |     'ę̇́' => 'e', // e with dot above, acute and ogonek
 897 |     'ė̃' => 'e', // e with dot and macron
 898 |     'ẹ' => '\\d{e}', // e with dot below
 899 |     'e̋' => '\\H{e}', // e with double acute
 900 |     'ę̋' => 'e', // e with double acute and ogonek
 901 |     'ȅ' => 'e', // e with double grave
 902 |     'ȅ' => 'e', // e with double grave
 903 |     'è' => '\\`{e}', // e with grave
 904 |     'ȩ̀' => 'e', // e with grave and cedilla
 905 |     'ẹ̀' => 'e', // e with grave and dot below
 906 |     'è̱' => 'e', // e with grave and macron below
 907 |     'ę̀' => 'e', // e with grave and ogonek
 908 |     'ẻ' => 'e', // e with hook above
 909 |     'ȇ' => 'e', // e with inverted breve
 910 |     'ē' => '\\={e}', // e with macron
 911 |     'ḗ' => 'e', // e with macron and acute
 912 |     'ē̆' => 'e', // e with macron and breve
 913 |     'ē̌' => 'e', // e with macron and caron
 914 |     'ē̂' => 'e', // e with macron and circumflex
 915 |     'ẹ̄' => 'e', // e with macron and dot below
 916 |     'ḕ' => 'e', // e with macron and grave
 917 |     'ē̱' => 'e', // e with macron and macron below
 918 |     'ę̄' => 'e', // e with macron and ogonek
 919 |     'e̱' => 'e', // e with macron below
 920 |     'ę̄́' => 'e', // e with macron, acute and ogonek
 921 |     'ę̄̌' => 'e', // e with macron, caron and ogonek
 922 |     'ę̄̂' => 'e', // e with macron, circumflex and ogonek
 923 |     'ę̄̀' => 'e', // e with macron, grave and ogonek
 924 |     'ę' => '\\k{e}', // e with ogonek
 925 |     'e᷎' => 'e', // e with ogonek above
 926 |     'ę᷎' => 'e', // e with ogonek above and ogonek
 927 |     'ę̣' => 'e', // e with ogonek and dot below
 928 |     'e̊' => 'e', // e with ring
 929 |     'ɇ' => 'e', // e with stroke
 930 |     'ẽ' => '\\~{e}', // e with tilde
 931 |     'ẽ́' => 'e', // e with tilde and acute
 932 |     'ẽ̌' => 'e', // e with tilde and caron
 933 |     'ẽ̂' => 'e', // e with tilde and circumflex
 934 |     'ẽ̀' => 'e', // e with tilde and grave
 935 |     'ę̃' => 'e', // e with tilde and ogonek
 936 |     'ẽ̍' => 'e', // e with tilde and vertical line
 937 |     'ḛ' => 'e', // e with tilde below
 938 |     'e̍' => 'e', // e with vertical line
 939 |     // 'ǯ', ezh with caron
 940 |     'f́' => '\\\'{f}', // f with acute
 941 |     'f̧' => '\\c{f}', // f with cedilla
 942 |     'ḟ' => '\\.{f}', // f with dot above
 943 |     'f̣' => '\\d{f}', // f with dot below
 944 |     'f̀' => '\\`{f}', // f with grave
 945 |     'ƒ' => 'f', // f with hook (Script F)
 946 |     'f̄' => '\\={f}', // f with macron
 947 |     'ǵ' => '\\\'{g}', // g with acute
 948 |     'ğ' => '\\u{g}', // g with breve
 949 |     'ǧ' => '\\v{g}', // g with caron
 950 |     'ģ' => '\\c{g}', // g with cedilla
 951 |     'ĝ' => '\\^{g}', // g with circumflex
 952 |     'g̈' => '\\"{g}', // g with diaeresis
 953 |     'g̤' => 'g', // g with diaeresis below
 954 |     'ġ' => '\\.{g}', // g with dot above
 955 |     'g̣' => '\\d{g}', // g with dot below
 956 |     'g̀' => '\\`{g}', // g with grave
 957 |     'ɠ' => 'g', // g with hook
 958 |     'ḡ' => '\\={g}', // g with macron
 959 |     'ꞡ' => 'g', // g with oblique stroke
 960 |     'ǥ' => 'g', // g with stroke
 961 |     'g̃' => '\\~{g}', // g with tilde
 962 |     // 'ɣ̓', gamma with comma above
 963 |     'h́' => '\\\'{h}', // h with acute
 964 |     'ḫ' => 'h', // h with breve below
 965 |     'ȟ' => '\\v{h}', // h with caron
 966 |     'ḩ' => '\\c{h}', // h with cedilla
 967 |     'h̐' => 'h', // h with chandrabindu
 968 |     'ĥ' => '\\^{h}', // h with circumflex
 969 |     'h̓' => 'h', // h with comma above
 970 |     'ⱨ' => 'h', // h with descender
 971 |     'ḧ' => '\\"{h}', // h with diaeresis
 972 |     'h̤' => 'h', // h with diaeresis below
 973 |     'ḣ' => '\\.{h}', // h with dot above
 974 |     'ḣ' => '\\.{h}', // h with dot above
 975 |     'ḥ' => '\\d{h}', // h with dot below
 976 |     'ɦ' => 'h', // h with hook
 977 |     'ẖ' => 'h', // h with line below
 978 |     'h̄' => '\\={h}', // h with macron
 979 |     'ħ' => 'h', // h with stroke
 980 |     // 'ꜧ', heng
 981 |     'ı' => 'i', // i (lowercase, i.e. ı) without dot above
 982 |     'i' => 'i', // i (uppercase) with dot above
 983 |     'í' => '\\\'{i}', // i with acute
 984 |     'ĭ' => '\\u{i}', // i with breve
 985 |     'ǐ' => '\\v{i}', // i with caron
 986 |     'i̧' => '\\c{i}', // i with cedilla
 987 |     'í̧' => 'i', // i with cedilla and acute
 988 |     'î̧' => 'i', // i with cedilla and circumflex
 989 |     'ì̧' => 'i', // i with cedilla and grave
 990 |     'i̐' => 'i', // i with chandrabindu
 991 |     'î' => '\\^{i}', // i with circumflex
 992 |     'î́' => 'i', // i with circumflex and acute
 993 |     'i̓' => 'i', // i with comma above
 994 |     'ï' => '\\"{i}', // i with diaeresis
 995 |     'ḯ' => 'i', // i with diaeresis and acute
 996 |     'ị' => '\\d{i}', // i with dot below
 997 |     'ị́' => 'i', // i with dot below and acute
 998 |     'ị̂' => 'i', // i with dot below and circumflex
 999 |     'ị̃' => 'i', // i with dot below and tilde
1000 |     'i̋' => '\\H{i}', // i with double acute
1001 |     'ȉ' => 'i', // i with double grave
1002 |     'ȉ' => 'i', // i with double grave
1003 |     'ì' => '\\`{i}', // i with grave
1004 |     'ỉ' => 'i', // i with hook above
1005 |     'ȋ' => 'i', // i with inverted breve
1006 |     'i̱' => 'i', // i with line below
1007 |     'í̱' => 'i', // i with line below and acute
1008 |     'î̱' => 'i', // i with line below and circumflex
1009 |     'ì̱' => 'i', // i with line below and grave
1010 |     'ī̱' => 'i', // i with line below and macron
1011 |     'ī' => '\\={i}', // i with macron
1012 |     'ī́' => 'i', // i with macron and acute
1013 |     'ī̌' => 'i', // i with macron and caron
1014 |     'ī̂' => 'i', // i with macron and circumflex
1015 |     'ī̀' => 'i', // i with macron and grave
1016 |     'į' => '\\k{i}', // i with ogonek
1017 |     'į́' => 'i', // i with ogonek and acute
1018 |     'į̌' => 'i', // i with ogonek and caron
1019 |     'į̂' => 'i', // i with ogonek and circumflex
1020 |     'į̀' => 'i', // i with ogonek and grave
1021 |     'į̃' => 'i', // i with ogonek and tilde
1022 |     'ɨ' => 'i', // i with stroke
1023 |     'ɨ́' => 'i', // i with stroke and acute
1024 |     'ɨ̌' => 'i', // i with stroke and caron
1025 |     'ɨ̧' => 'i', // i with stroke and cedilla
1026 |     'ɨ̂' => 'i', // i with stroke and circumflex
1027 |     'ɨ̀' => 'i', // i with stroke and grave
1028 |     'ɨ̄' => 'i', // i with stroke and macron
1029 |     'ɨ̃' => 'i', // i with stroke and tilde
1030 |     'ɨ̧̌' => 'i', // i with stroke, cedilla and caron
1031 |     'ɨ̧̀' => 'i', // i with stroke, cedilla and grave
1032 |     'ɨ̧̂' => 'i', // i with stroke, cedilla, and circumflex
1033 |     'ĩ' => '\\~{i}', // i with tilde
1034 |     'ĩ́' => 'i', // i with tilde and acute
1035 |     'ĩ̌' => 'i', // i with tilde and caron
1036 |     'ĩ̂' => 'i', // i with tilde and circumflex
1037 |     'ĩ̀' => 'i', // i with tilde and grave
1038 |     'ĩ̍' => 'i', // i with tilde and vertical line
1039 |     'ḭ' => 'i', // i with tilde below
1040 |     'i̍' => 'i', // i with vertical line
1041 |     // 'ꝼ́', insular F with acute
1042 |     // 'ꝼ̇', insular F with dot above
1043 |     // 'ꝼ̣', insular F with dot below
1044 |     // 'ɩ́', iota with acute
1045 |     // 'ɩ̀', iota with grave
1046 |     // 'ɩ̃', iota with tilde
1047 |     'j́' => '\\\'{j}', // j with acute
1048 |     'ǰ' => '\\v{j}', // j with caron
1049 |     'ĵ' => '\\^{j}', // j with circumflex
1050 |     'j̣' => '\\d{j}', // j with dot below
1051 |     'j̄' => '\\={j}', // j with macron
1052 |     'ɉ' => 'j', // j with stroke
1053 |     'j̃' => '\\~{j}', // j with tilde
1054 |     'ḱ' => '\\\'{k}', // k with acute
1055 |     'ǩ' => '\\v{k}', // k with caron
1056 |     'ķ' => '\\c{k}', // k with cedilla
1057 |     'ⱪ' => 'k', // k with descender
1058 |     'ꝃ' => 'k', // k with diagonal stroke
1059 |     'k̇' => '\\.{k}', // k with dot above
1060 |     'ḳ' => '\\d{k}', // k with dot below
1061 |     'k̀' => '\\`{k}', // k with grave
1062 |     'ƙ' => 'k', // k with hook
1063 |     'ḵ' => 'k', // k with line below
1064 |     'k̄' => '\\={k}', // k with macron
1065 |     'ꞣ' => 'k', // k with oblique stroke
1066 |     'ꝁ' => 'k', // k with stroke
1067 |     'ꝅ' => 'k', // k with stroke and diagonal stroke
1068 |     'ĺ' => '\\\'{l}', // l with acute
1069 |     'ḷ́' => 'l', // l with acute and dot below
1070 |     'ƚ' => 'l', // l with bar
1071 |     'ľ' => '\\v{l}', // l with caron
1072 |     'ļ' => '\\c{l}', // l with cedilla
1073 |     'l̐' => 'l', // l with chandrabindu
1074 |     'l̂' => '\\^{l}', // l with circumflex
1075 |     'ḽ' => 'l', // l with circumflex below
1076 |     'l̓' => 'l', // l with comma above
1077 |     'ḷ̓' => 'l', // l with comma above and dot below
1078 |     'l̦' => 'l', // l with comma below
1079 |     'ḷ' => '\\d{l}', // l with dot below
1080 |     'ḹ' => 'l', // l with dot below and macron
1081 |     'ⱡ' => 'l', // l with double bar
1082 |     'ꝉ' => 'l', // l with high stroke
1083 |     'ḻ' => 'l', // l with line below
1084 |     'ɫ' => 'l', // l with middle tilde
1085 |     'ł' => 'l', // l with stroke
1086 |     'l̃' => '\\~{l}', // l with tilde
1087 |     'ḿ' => '\\\'{m}', // m with acute
1088 |     'ṃ́' => 'm', // m with acute and dot below
1089 |     'm̧' => '\\c{m}', // m with cedilla
1090 |     'm̐' => 'm', // m with chandrabindu
1091 |     'm̓' => 'm', // m with comma above
1092 |     'ṃ̓' => 'm', // m with comma above and dot below
1093 |     'm̦' => 'm', // m with comma below
1094 |     'm̈' => '\\"{m}', // m with diaeresis
1095 |     'ṁ' => '\\.{m}', // m with dot above
1096 |     'ṃ' => '\\d{m}', // m with dot below
1097 |     'ṃ' => '\\d{m}', // m with dot below
1098 |     'm̀' => '\\`{m}', // m with grave
1099 |     'ɱ' => 'm', // m with hook
1100 |     'm̄' => '\\={m}', // m with macron
1101 |     'm̨' => '\\k{m}', // m with ogonek
1102 |     'm̃' => '\\~{m}', // m with tilde
1103 |     'm̍' => 'm', // m with vertical line
1104 |     'ń' => '\\\'{n}', // n with acute
1105 |     'ṇ́' => 'n', // n with acute and dot below
1106 |     'ň' => '\\v{n}', // n with caron
1107 |     'ņ' => '\\c{n}', // n with cedilla
1108 |     'n̐' => 'n', // n with chandrabindu
1109 |     'n̂' => '\\^{n}', // n with circumflex
1110 |     'ṋ' => 'n', // n with circumflex below
1111 |     'ṇ̓' => 'n', // n with comma above and dot below
1112 |     'n̦' => 'n', // n with comma below
1113 |     'ꞑ' => 'n', // n with descender
1114 |     'n̈' => 'n', // n with diaresis
1115 |     'ṅ' => '\\.{n}', // n with dot above
1116 |     'ṇ' => '\\d{n}', // n with dot below
1117 |     'ǹ' => '\\`{n}', // n with grave
1118 |     'ɲ' => 'n', // n with left hook
1119 |     'ṉ' => 'n', // n with line below
1120 |     'ƞ' => 'n', // n with long right leg
1121 |     'n̄' => '\\={n}', // n with macron
1122 |     'ꞥ' => 'n', // n with oblique stroke
1123 |     'ñ̈' => 'n', // n with tidle and diaeresis
1124 |     'ñ' => '\\~{n}', // n with tilde
1125 |     'n̰' => 'n', // n with tilde below
1126 |     'n̲' => '\\b{n}', // n with underline
1127 |     'n̍' => 'n', // n with vertical line
1128 |     'ó' => '\\\'{o}', // o with acute
1129 |     'ó̧' => 'o', // o with acute and cedilla
1130 |     'ọ́' => 'o', // o with acute and dot below
1131 |     'ó̱' => 'o', // o with acute and line below
1132 |     'ǫ́' => 'o', // o with acute and ogonek
1133 |     'ɵ' => 'o', // o with bar
1134 |     'ŏ' => '\\u{o}', // o with breve
1135 |     'ǒ' => '\\v{o}', // o with caron
1136 |     'ǒ̧' => 'o', // o with caron and cedilla
1137 |     'ǫ̌' => 'o', // o with caron and ogonek
1138 |     'o̧' => '\\c{o}', // o with cedilla
1139 |     'o̐' => 'o', // o with chandrabindu
1140 |     'ô' => '\\^{o}', // o with circumflex
1141 |     'ố' => 'o', // o with circumflex and acute
1142 |     'ô̧' => 'o', // o with circumflex and cedilla
1143 |     'ộ' => 'o', // o with circumflex and dot below
1144 |     'ộ' => 'o', // o with circumflex and dot below
1145 |     'ồ' => 'o', // o with circumflex and grave
1146 |     'ổ' => 'o', // o with circumflex and hook above
1147 |     'ô̱' => 'o', // o with circumflex and line below
1148 |     'ǫ̂' => 'o', // o with circumflex and ogonek
1149 |     'ỗ' => 'o', // o with circumflex and tilde
1150 |     'o̭' => 'o', // o with circumflex below
1151 |     'ö' => '\\"{o}', // o with diaeresis
1152 |     'ö́' => 'o', // o with diaeresis and acute
1153 |     'ö̀' => 'o', // o with diaeresis and grave
1154 |     'ö̱' => 'o', // o with diaeresis and line below
1155 |     'ȫ' => 'o', // o with diaeresis and macron
1156 |     'ȯ' => '\\.{o}', // o with dot above
1157 |     'ȱ' => 'o', // o with dot above and macron
1158 |     'o̍͘' => 'o', // o with dot above and vertical line
1159 |     'o͘' => 'o', // o with dot above right
1160 |     'ó͘' => 'o', // o with dot above right and acute
1161 |     'ò͘' => 'o', // o with dot above right and grave
1162 |     'ō͘' => 'o', // o with dot above right and macron
1163 |     'ọ' => '\\d{o}', // o with dot below
1164 |     'ő' => '\\H{o}', // o with double acute
1165 |     'ő' => '\\H{o}', // o with double acute
1166 |     'ȍ' => 'o', // o with double grave
1167 |     'ò' => '\\`{o}', // o with grave
1168 |     'ò̧' => 'o', // o with grave and cedilla
1169 |     'ọ̀' => 'o', // o with grave and dot below
1170 |     'ò̱' => 'o', // o with grave and line below
1171 |     'ǫ̀' => 'o', // o with grave and ogonek
1172 |     'ỏ' => 'o', // o with hook above
1173 |     'ơ' => 'o', // o with horn
1174 |     'ớ' => 'o', // o with horn and acute
1175 |     'ợ' => 'o', // o with horn and dot below
1176 |     'ờ' => 'o', // o with horn and grave
1177 |     'ở' => 'o', // o with horn and hook above
1178 |     'ỡ' => 'o', // o with horn and tilde
1179 |     'ȏ' => 'o', // o with inverted breve
1180 |     'o̱' => 'o', // o with line below
1181 |     'ꝋ' => 'o', // o with long stroke overlay
1182 |     'ꝋ' => 'o', // o with long stroke overlay
1183 |     'ꝍ' => 'o', // o with loop
1184 |     'ō' => '\\={o}', // o with macron
1185 |     'ṓ' => 'o', // o with macron and acute
1186 |     'ō̌' => 'o', // o with macron and caron
1187 |     'ō̂' => 'o', // o with macron and circumflex
1188 |     'ọ̄' => 'o', // o with macron and dot below
1189 |     'ṑ' => 'o', // o with macron and grave
1190 |     'ō̱' => 'o', // o with macron and line below
1191 |     'ǭ' => 'o', // o with macron and ogonek
1192 |     'ǭ̀' => 'o', // o with macron, grave and ogonek
1193 |     'ǫ' => '\\k{o}', // o with ogonek
1194 |     'ø' => 'o', // o with stroke
1195 |     'ǿ' => 'o', // o with stroke and acute
1196 |     'ø̌' => 'o', // o with stroke and caron
1197 |     'ø̂' => 'o', // o with stroke and circumflex
1198 |     'ø̀' => 'o', // o with stroke and grave
1199 |     'ø̄' => 'o', // o with stroken and macron
1200 |     'õ' => '\\~{o}', // o with tilde
1201 |     'ṍ' => 'o', // o with tilde and acute
1202 |     'ṏ' => 'o', // o with tilde and diaeresis
1203 |     'ȭ' => 'o', // o with tilde and macron
1204 |     'o̲' => '\\b{o}', // o with underline
1205 |     'o̍' => 'o', // o with vertical line
1206 |     // 'ɔ́', open O with acute
1207 |     // 'ɔ̧́', open O with acute and cedilla
1208 |     // 'ɔ̌', open O with caron
1209 |     // 'ɔ̧̌', open O with caron and cedilla
1210 |     // 'ɔ̧', open O with cedilla
1211 |     // 'ɔ̂', open O with circumflex
1212 |     // 'ɔ̧̂', open O with circumflex and cedilla
1213 |     // 'ɔ̈', open O with diaeresis
1214 |     // 'ɔ̀', open O with grave
1215 |     // 'ɔ̧̀', open O with grave and cedilla
1216 |     // 'ɔ̄', open O with macron
1217 |     // 'ɔ̃', open O with tilde
1218 |     // 'ɔ̃́', open O with tilde and acute
1219 |     // 'ɔ̃̌', open O with tilde and caron
1220 |     // 'ɔ̃̂', open O with tilde and circumflex
1221 |     // 'ɔ̃̀', open O with tilde and grave
1222 |     // 'ɔ̃̍', open O with tilde and verticale line
1223 |     // 'ɔ̱', open O with tilde below
1224 |     // 'ɔ̍', open O with vertical line
1225 |     'ṕ' => '\\\'{p}', // p with acute
1226 |     'p̓' => 'p', // p with comma above
1227 |     'p̈' => '\\"{p}', // p with diaeresis
1228 |     'p̤' => 'p', // p with diaeresis below
1229 |     'ṗ' => '\\.{p}', // p with dot above
1230 |     'p̣' => 'p', // p with dot above below
1231 |     'ꝓ' => 'p', // p with flourish
1232 |     'p̀' => '\\`{p}', // p with grave
1233 |     'ƥ' => 'p', // p with hook
1234 |     'p̄' => '\\={p}', // p with macron
1235 |     'ꝕ' => 'p', // p with squirrel tail
1236 |     'ᵽ' => 'p', // p with stroke
1237 |     'ꝑ' => 'p', // p with stroke through descender
1238 |     'p̄' => '\\~{p}', // p with tilde
1239 |     'q̓' => 'q', // q with comma above
1240 |     'ꝙ' => 'q', // q with diagonal stroke
1241 |     'q̇' => '\\.{q}', // q with dot above
1242 |     'ɋ' => 'q', // q with hook tail
1243 |     'ꝗ' => 'q', // q with stroke through descender
1244 |     'ŕ' => '\\\'{r}', // r with acute
1245 |     'ř' => '\\v{r}', // r with caron
1246 |     'ŗ' => '\\c{r}', // r with cedilla
1247 |     'r̂' => '\\^{r}', // r with circumflex
1248 |     'r̓' => 'r', // r with comma above
1249 |     'r̦' => 'r', // r with comma below
1250 |     'r̰' => 'r', // r with diaeresis below
1251 |     'ṙ' => '\\.{r}', // r with dot above
1252 |     'ṛ' => '\\d{r}', // r with dot below
1253 |     'ṝ' => 'r', // r with dot below and macron
1254 |     'ȑ' => 'r', // r with double grave
1255 |     'ȓ' => 'r', // r with inverted breve
1256 |     'ṟ' => 'r', // r with line below
1257 |     'r̄' => '\\={r}', // r with macron
1258 |     'ꞧ' => 'r', // r with oblique stroke
1259 |     'r̥' => 'r', // r with ring below
1260 |     'r̥̄' => 'r', // r with ring below and macron
1261 |     'ɍ' => 'r', // r with stroke
1262 |     'ɽ' => 'r', // r with tail
1263 |     'r̃' => '\\~{r}', // r with tilde
1264 |     // 'ꜿ', reversed C with dot
1265 |     'ś' => '\\\'{s}', // s with acute
1266 |     'ṥ' => 's', // s with acute and dot above
1267 |     'š' => '\\v{s}', // s with caron
1268 |     'ṧ' => 's', // s with caron and dot above
1269 |     'ş' => '\\c{s}', // s with cedilla
1270 |     'ŝ' => '\\^{s}', // s with circumflex
1271 |     'ș' => 's', // s with comma below
1272 |     'ṡ' => '\\.{s}', // s with dot above
1273 |     'ṣ' => '\\d{s}', // s with dot below
1274 |     'ṩ' => 's', // s with dot below and dot above
1275 |     'ᵴ' => 's', // s with middle tilde
1276 |     'ꞩ' => 's', // s with oblique stroke
1277 |     'ȿ' => 's', // s with swash tail
1278 |     'ť' => '\\v{t}', // t with caron
1279 |     'ţ' => '\\c{t}', // t with cedilla
1280 |     'ṱ' => 't', // t with circumflex below
1281 |     'ț' => 't', // t with comma below
1282 |     'ẗ' => '\\"{t}', // t with diaeresis
1283 |     'ⱦ' => 't', // t with diagonal stroke
1284 |     'ṫ' => '\\.{t}', // t with dot above
1285 |     'ṭ' => '\\d{t}', // t with dot below
1286 |     'ƭ' => 't', // t with hook
1287 |     'ṯ' => 't', // t with line below
1288 |     'ʈ' => 't', // t with retroflex hook
1289 |     'ŧ' => 't', // t with stroke
1290 |     // 'ꝥ', thorn with stroke
1291 |     // 'ꝧ', thorn with stroke through descender
1292 |     'ʉ' => 'u', // u bar
1293 |     'ú' => '\\\'{u}', // u with acute
1294 |     'ŭ' => '\\u{u}', // u with breve
1295 |     'ǔ' => '\\v{u}', // u with caron
1296 |     'û' => '\\^{u}', // u with circumflex
1297 |     'ṷ' => 'u', // u with circumflex below
1298 |     'ü' => '\\"{u}', // u with diaeresis
1299 |     'ǘ' => 'u', // u with diaeresis and acute
1300 |     'ǚ' => 'u', // u with diaeresis and caron
1301 |     'ǜ' => 'u', // u with diaeresis and grave
1302 |     'ǖ' => 'u', // u with diaeresis and macron
1303 |     'ṳ' => 'u', // u with diaeresis below
1304 |     'ụ' => '\\d{u}', // u with dot below
1305 |     'ű' => '\\H{u}', // u with double acute
1306 |     'ȕ' => 'u', // u with double grave
1307 |     'ù' => '\\`{u}', // u with grave
1308 |     'ủ' => 'u', // u with hook above
1309 |     'ư' => 'u', // u with horn
1310 |     'ứ' => 'u', // u with horn and acute
1311 |     'ự' => 'u', // u with horn and dot below
1312 |     'ừ' => 'u', // u with horn and grave
1313 |     'ử' => 'u', // u with horn and hook above
1314 |     'ữ' => 'u', // u with horn and tilde
1315 |     'ȗ' => 'u', // u with inverted breve
1316 |     'ū' => '\\={u}', // u with macron
1317 |     'ṻ' => 'u', // u with macron and diaeresis
1318 |     'ų' => '\\k{u}', // u with ogonek
1319 |     'ů' => '\\r{u}', // u with ring above
1320 |     'ũ' => '\\~{u}', // u with tilde
1321 |     'ṹ' => 'u', // u with tilde and acute
1322 |     'ṵ' => 'u', // u with tilde below
1323 |     'ꝟ' => 'v', // v with diagonal stroke
1324 |     'ṿ' => '\\d{v}', // v with dot below
1325 |     'ʋ' => 'v', // v with hook (Script V)
1326 |     'ṽ' => '\\~{v}', // v with tilde
1327 |     'ẃ' => '\\\'{w}', // w with acute
1328 |     'ŵ' => '\\^{w}', // w with circumflex
1329 |     'ẅ' => '\\"{w}', // w with diaeresis
1330 |     'ẇ' => '\\.{w}', // w with dot above
1331 |     'ẉ' => '\\d{w}', // w with dot below
1332 |     'ẁ' => '\\`{w}', // w with grave
1333 |     'ⱳ' => 'w', // w with hook
1334 |     'ẘ' => '\\r{w}', // w with ring above
1335 |     'ẍ' => '\\"{x}', // x with diaeresis
1336 |     'ẋ' => '\\.{x}', // x with dot above
1337 |     'ý' => '\\\'{y}', // y with acute
1338 |     'ŷ' => '\\^{y}', // y with circumflex
1339 |     'ÿ' => '\\"{y}', // y with diaeresis
1340 |     'ẏ' => '\\.{y}', // y with dot above
1341 |     'ỵ' => '\\d{y}', // y with dot below
1342 |     'ỳ' => '\\`{y}', // y with grave
1343 |     'ƴ' => 'y', // y with hook
1344 |     'ỷ' => 'y', // y with hook above
1345 |     'ỿ' => 'y', // y with loop
1346 |     'ȳ' => '\\={y}', // y with macron
1347 |     'ẙ' => '\\r{y}', // y with ring above
1348 |     'ɏ' => 'y', // y with stroke
1349 |     'ỹ' => '\\~{y}', // y with tilde
1350 |     'ź' => '\\\'{z}', // z with acute
1351 |     'ž' => '\\v{z}', // z with caron
1352 |     'ẑ' => '\\^{z}', // z with circumflex
1353 |     'ⱬ' => 'z', // z with descender
1354 |     'ż' => '\\.{z}', // z with dot above
1355 |     'ẓ' => '\\d{z}', // z with dot below
1356 |     'ȥ' => 'z', // z with hook
1357 |     'ẕ' => 'z', // z with line below
1358 |     'ƶ' => 'z', // z with stroke
1359 |     'ɀ' => 'z', // z with swash tail
1360 | 
1361 |     '–' => '--',
1362 |     '—' => '---',
1363 |     '…' => '\\ldots{}',
1364 | 
1365 |     '¶' => '\\P{}',
1366 |     '§' => '\\S{}',
1367 | 
1368 |     'æ' => '\\ae{}',
1369 |     'Æ' => '\\AE{}',
1370 |     'ß' => '\\ss{}',
1371 |     'œ' => '\\oe{}',
1372 |     'Œ' => '\\OE{}',
1373 |     'ø' => '\\o{}',
1374 |     'Ø' => '\\O{}',
1375 |     'Å' => '\\AA{}',
1376 |     'å' => '\\aa{}',
1377 |     'ł' => '\\l{}',
1378 |     'Ł' => '\\L{}',
1379 |     'Ŋ' => '\\NG{}',
1380 |     'ŋ' => '\\ng{}',
1381 | 
1382 |     'α' => '$\\alpha$',
1383 |     'β' => '$\\beta$',
1384 |     'γ' => '$\\gamma$',
1385 |     'δ' => '$\\delta$',
1386 |     'ε' => '$\\varepsilon$',
1387 |     'ζ' => '$\\zeta$',
1388 |     'η' => '$\\eta$',
1389 |     'θ' => '$\\vartheta$',
1390 |     'ι' => '$\\iota$',
1391 |     'κ' => '$\\kappa$',
1392 |     'λ' => '$\\lambda$',
1393 |     'μ' => '$\\mu$',
1394 |     'ν' => '$\\nu$',
1395 |     'ξ' => '$\\xi$',
1396 |     'ο' => '$\\omicron$',
1397 |     'π' => '$\\pi$',
1398 |     'ρ' => '$\\varrho$',
1399 |     'ς' => '$\\varsigma$',
1400 |     'σ' => '$\\sigma$',
1401 |     'τ' => '$\\tau$',
1402 |     'υ' => '$\\upsilon$',
1403 |     'φ' => '$\\varphi$',
1404 |     'χ' => '$\\chi$',
1405 |     'ψ' => '$\\psi$',
1406 |     'ω' => '$\\omega$',
1407 |     'Α' => '$\\Alpha$',
1408 |     'Β' => '$\\Beta$',
1409 |     'Γ' => '$\\Gamma$',
1410 |     'Δ' => '$\\Delta$',
1411 |     'Ε' => '$\\Epsilon$',
1412 |     'Ζ' => '$\\Zeta$',
1413 |     'Η' => '$\\Eta$',
1414 |     'Θ' => '$\\Theta$',
1415 |     'Ι' => '$\\Iota$',
1416 |     'Κ' => '$\\Kappa$',
1417 |     'Λ' => '$\\Lambda$',
1418 |     'Μ' => '$\\Mu$',
1419 |     'Ν' => '$\\Nu$',
1420 |     'Ξ' => '$\\Xi$',
1421 |     'Ο' => '$\\Omicron$',
1422 |     'Π' => '$\\Pi$',
1423 |     'Ρ' => '$\\Rho$',
1424 |     'Σ' => '$\\Sigma$',
1425 |     'Τ' => '$\\Tau$',
1426 |     'Υ' => '$\\Upsilon$',
1427 |     'Φ' => '$\\Phi$',
1428 |     'Χ' => '$\\Chi$',
1429 |     'Ψ' => '$\\Psi$',
1430 |     'Ω' => '$\\Omega$',
1431 | 
1432 |     // ligatures
1433 |     'Æ' => '\\AE{}',
1434 |     'Ǽ' => '\\AE{}',
1435 |     'Ǣ' => '\\AE{}',
1436 |     'æ' => '\\ae{}',
1437 |     'ǽ' => '\\ae{}',
1438 |     'ǣ' => '\\ae{}',
1439 |     'Œ' => '\\OE{}',
1440 |     'ɶ' => '\\OE{}',
1441 |     'œ' => '\\oe{}',
1442 |     'ᵫ' => 'ue',
1443 |     'Ĳ' => 'IJ',
1444 |     'ĳ' => 'ij',
1445 | 
1446 |     'ﬀ' => 'ff',
1447 |     'ﬃ' => 'ffi',
1448 |     'ﬄ' => 'ffl',
1449 |     'ﬁ' => 'fi',
1450 |     'ﬂ' => 'fl',
1451 |     'ﬅ' => 'ft',
1452 |     'ﬆ' => 'st',
1453 |     'ʦ' => 'ts',
1454 | 
1455 |     'Ǉ' => 'LJ',
1456 |     'ǈ' => 'Lj',
1457 |     'ǉ' => 'lj',
1458 |     'ʪ' => 'ls',
1459 |     'ʫ' => 'lz',
1460 |     'Ǌ' => 'NJ',
1461 |     'ǋ' => 'Nj',
1462 |     'ǌ' => 'nj',
1463 |     'Ǳ' => 'DZ',
1464 |     'ǲ' => 'Dz',
1465 |     'ǳ' => 'dz',
1466 |     'ʬ' => 'ww',
1467 | 
1468 |     'Ŋ' => '\\NG{}',
1469 |     'ŋ' => '\\ng{}',
1470 | 
1471 |     'Ǆ' => 'D\'\\v{Z}',
1472 |     'ǅ' => 'D\'\\v{z}',
1473 |     'ǆ' => 'd\'\\v{z}',
1474 | 
1475 |     'Ƕ' => 'Hv',
1476 |     'ƕ' => 'hv',
1477 | 
1478 |     // spaces
1479 |     ' ' => '--', // OGHAM SPACE MARK
1480 |     ' ' => ' ', // EN SPACE
1481 |     ' ' => ' ', // EM SPACE
1482 |     ' ' => ' ', // THREE-PER-EM SPACE
1483 |     ' ' => ' ', // FOUR-PER-EM SPACE
1484 |     ' ' => ' ', // SIX-PER-EM SPACE
1485 |     ' ' => ' ', // FIGURE SPACE
1486 |     ' ' => ' ', // PUNCTUATION SPACE
1487 |     ' ' => ' ', // THIN SPACE
1488 |     '　' => ' ', // IDEOGRAPHIC SPACE
1489 |     ' ' => ' ', // NO-BREAK SPACE
1490 |     ' ' => ' ', // HAIR SPACE
1491 |     "\xE2\x80\x8B" => '', // ZERO WIDTH SPACE
1492 |     ' ' => ' ', // NARROW NO-BREAK SPACE
1493 |     "\xE2\x80\xAF" => ' ', // NARROW NO-BREAK SPACE
1494 |     ' ' => ' ', // MEDIUM MATHEMATICAL SPACE
1495 |     "\xE2\x81\x9F" => ' ', // MEDIUM MATHEMATICAL SPACE
1496 |     "\xE1\xA0\x8E" => '', // MONGOLIAN VOWEL SEPARATOR
1497 |     "\xE2\x80\x80" => ' ', // EN QUAD
1498 |     "\xE2\x80\x81" => ' ', // EM QUAD
1499 |     "\xEF\xBB\xBF" => '', // ZERO WIDTH NO-BREAK SPACE (BOM)
1500 | );
1501 | 


--------------------------------------------------------------------------------