├── LICENSE ├── README.md ├── composer.json ├── data ├── amssymb │ ├── both-amssymb.tex │ ├── math-amssymb-alphabets.tex │ ├── math-amssymb-binops.tex │ ├── math-amssymb-greek.tex │ ├── math-amssymb-loglike.tex │ ├── math-amssymb-misc.tex │ ├── math-amssymb-symbols.tex │ └── math-amssymb-varsized-delimiters.tex ├── amstext │ └── both-amstext-alphabets.tex ├── base.php ├── both-alphabets.tex ├── both-refs.tex ├── both-spaces.tex ├── both.tex ├── compile.php ├── fixltx2e │ └── both-fixltx2e.tex ├── hyperref │ └── text-hyperref.tex ├── math-accents.tex ├── math-alphabets.tex ├── math-arrows.tex ├── math-binops.tex ├── math-delimiters.tex ├── math-greek.tex ├── math-large-delimeters.tex ├── math-loglike.tex ├── math-misc.tex ├── math-other.tex ├── math-punctuation.tex ├── math-relations.tex ├── math-spaces.tex ├── math-varsymbols.tex ├── math.tex ├── text-accents.tex ├── text-fontsize.tex ├── text-primitives.tex ├── text-spaces.tex └── text.tex └── library └── PhpLatex ├── Filter └── Html2Latex.php ├── Lexer.php ├── Node.php ├── Parser.php ├── PdfLatex.php ├── Renderer ├── Abstract.php ├── Html.php ├── NodeRenderer.php └── Typestyle.php ├── Utils.php ├── Utils ├── PeekableArrayIterator.php ├── PeekableIterator.php └── TreeDebug.php ├── commands.php ├── environs.php └── latex_utf8.php /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Xemlock 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # php-latex 2 | 3 | [![Build status](https://github.com/xemlock/php-latex/workflows/build/badge.svg)](https://github.com/xemlock/php-latex/actions?query=workflow/build) 4 | [![License](https://img.shields.io/github/license/xemlock/php-latex.svg)](https://packagist.org/packages/xemlock/php-latex) 5 | 6 | 7 | The main purpose of this library is to provide a valid LaTeX output from, not always valid, user input. You can also render LaTeX code to HTML, with one limitation though - rendering to HTML is done only for the text mode, the math mode needs to be handled by a JavaScript 8 | library - in the browser. For this I recommend using [MathJax](https://www.mathjax.org/). 9 | 10 | Bear in mind that not every LaTeX command is recognized or implemented. If you happen to need a command that's 11 | not supported you can either define it manually (see description below), or file a [feature request](https://github.com/xemlock/php-latex/issues/new/choose). 12 | 13 | ## Installation 14 | 15 | To use php-latex, you install it just as any other php package - with [Composer](https://getcomposer.org/). 16 | 17 | ``` 18 | composer require xemlock/php-latex:dev-master 19 | ``` 20 | 21 | ## Usage 22 | 23 | Basic usage is as follows: 24 | 25 | ### Parsing LaTeX source code 26 | 27 | ```php 28 | $parser = new PhpLatex_Parser(); 29 | $parsedTree = $parser->parse($input); 30 | // $parsedTree contains object representation of the LaTeX document 31 | ``` 32 | 33 | ### Render parsed LaTeX source 34 | 35 | Once you have a parsed source code, you can render it to HTML (or to LaTeX) - please mind that math-mode code is rendered as-is. 36 | 37 | ```php 38 | // render parsed LaTeX code to HTML 39 | $htmlRenderer = new PhpLatex_Renderer_Html(); 40 | $html = $htmlRenderer->render($parsedTree); 41 | 42 | // render parsed LaTeX code to sanitized LaTeX code 43 | $latex = PhpLatex_Renderer_Abstract::toLatex($parsedTree); 44 | ``` 45 | 46 | ### Customization 47 | 48 | You can add custom (or not yet implemented) commands to the parser: 49 | 50 | ```php 51 | $parser = new PhpLatex_Parser(); 52 | $parser->addCommand( 53 | '\placeholder', 54 | array( 55 | // number of arguments 56 | 'numArgs' => 1, 57 | // number of optional arguments, default 0 58 | 'numOptArgs' => 1, 59 | // mode this command is valid in, can be: 'both', 'math', 'text' 60 | 'mode' => 'both', 61 | // whether command arguments should be parsed, or handled as-is 62 | 'parseArgs' => false, 63 | // whether command allows a starred variant 64 | 'starred' => false, 65 | ) 66 | ); 67 | ``` 68 | 69 | ### pdflatex 70 | 71 | Additionally, this library provides a wrapper for pdflatex to make rendering and compiling `.tex` files 72 | from PHP scripts easier. 73 | 74 | ```php 75 | $pdflatex = new PhpLatex_PdfLatex(); 76 | 77 | // to generate a PDF from .tex file 78 | $pathToGeneratedPdf = $pdflatex->compile('/path/to/document.tex', 79 | array(/* optional paths to files included by .tex file (images) */]) 80 | ); 81 | ``` 82 | 83 | You can access the build log of the last `compile` call via: 84 | 85 | ```php 86 | echo $pdflatex->getLog(); 87 | ``` 88 | 89 | You can even compile on the fly a LaTeX string: 90 | 91 | ```php 92 | $pathToGeneratedPdf = $pdflatex->compileString(' 93 | \documentclass{article} 94 | \begin{document} 95 | Hello from \LaTeX! 96 | \end{document} 97 | '); 98 | ``` 99 | 100 | By default, a system temp dir is used for generating PDF from string. You can however customize it: 101 | 102 | ```php 103 | $pdflatex->setBuildDir('/path/to/temp'); 104 | ``` 105 | 106 | ## License 107 | 108 | The MIT License (MIT). See the LICENSE file. 109 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xemlock/php-latex", 3 | "description": "LaTeX parser and renderer", 4 | "type": "library", 5 | "license": "MIT", 6 | "authors": [ 7 | { 8 | "name": "xemlock", 9 | "email": "xemlock@gmail.com" 10 | } 11 | ], 12 | "require": { 13 | "php": ">=5.3.0", 14 | "ext-dom": "*", 15 | "ext-mbstring": "*" 16 | }, 17 | "require-dev": { 18 | "phpunit/phpunit": ">=5.7 <10.0" 19 | }, 20 | "autoload": { 21 | "psr-0": { "PhpLatex_": "library" } 22 | }, 23 | "scripts": { 24 | "post-install-cmd": [ 25 | "@php .scripts/patch-phpunit.php" 26 | ], 27 | "post-update-cmd": [ 28 | "@php .scripts/patch-phpunit.php" 29 | ], 30 | "test": "phpunit" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /data/amssymb/both-amssymb.tex: -------------------------------------------------------------------------------- 1 | \checkmark 2 | \circledR 3 | \maltese 4 | \yen 5 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-alphabets.tex: -------------------------------------------------------------------------------- 1 | \mathfrak{Abc} 2 | \mathbb{NRC} 3 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-binops.tex: -------------------------------------------------------------------------------- 1 | \barwedge 2 | \boxdot 3 | \boxminus 4 | \boxplus 5 | \boxtimes 6 | \Cap 7 | \centerdot 8 | \circledast 9 | \circledcirc 10 | \circleddash 11 | \Cup 12 | \curlyvee 13 | \curlywedge 14 | \divideontimes 15 | \dotplus 16 | \doublebarwedge 17 | \intercal 18 | \leftthreetimes 19 | \ltimes 20 | \rightthreetimes 21 | \rtimes 22 | \smallsetminus 23 | \veebar 24 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-greek.tex: -------------------------------------------------------------------------------- 1 | \digamma 2 | \varkappa 3 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-loglike.tex: -------------------------------------------------------------------------------- 1 | \injlim 2 | \projlim 3 | \varinjlim 4 | \varliminf 5 | \varlimsup 6 | \varprojlim 7 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-misc.tex: -------------------------------------------------------------------------------- 1 | \angle 2 | \Box 3 | \dashleftarrow 4 | \dashrightarrow 5 | \Diamond 6 | \hbar 7 | \llcorner 8 | \lrcorner 9 | \mho 10 | \rightleftharpoons 11 | \sqsubset 12 | \sqsupset 13 | \ulcorner 14 | \urcorner 15 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-symbols.tex: -------------------------------------------------------------------------------- 1 | \angle 2 | \approxeq 3 | \backepsilon 4 | \backprime 5 | \backsim 6 | \backsimeq 7 | \barwedge 8 | \Bbbk 9 | \because 10 | \beth 11 | \between 12 | \bigstar 13 | \blacklozenge 14 | \blacksquare 15 | \blacktriangle 16 | \blacktriangledown 17 | \blacktriangleleft 18 | \blacktriangleright 19 | \boxdot 20 | \boxminus 21 | \boxplus 22 | \boxtimes 23 | \Bumpeq 24 | \bumpeq 25 | \Cap 26 | \centerdot 27 | \circeq 28 | \circlearrowleft 29 | \circlearrowright 30 | \circledast 31 | \circledcirc 32 | \circleddash 33 | \circledS 34 | \complement 35 | \Cup 36 | \curlyeqprec 37 | \curlyeqsucc 38 | \curlyvee 39 | \curlywedge 40 | \curvearrowleft 41 | \curvearrowright 42 | \daleth 43 | \diagdown 44 | \diagup 45 | \digamma 46 | \divideontimes 47 | \Doteq 48 | \doteqdot 49 | \dotplus 50 | \doublebarwedge 51 | \doublecap 52 | \doublecup 53 | \downdownarrows 54 | \downharpoonleft 55 | \downharpoonright 56 | \eqcirc 57 | \eqsim 58 | \eqslantgtr 59 | \eqslantless 60 | \eth 61 | \fallingdotseq 62 | \Finv 63 | \Game 64 | \geqq 65 | \geqslant 66 | \ggg 67 | \gggtr 68 | \gimel 69 | \gnapprox 70 | \gneq 71 | \gneqq 72 | \gnsim 73 | \gtrapprox 74 | \gtrdot 75 | \gtreqless 76 | \gtreqqless 77 | \gtrless 78 | \gtrsim 79 | \gvertneqq 80 | \hbar 81 | \hslash 82 | \intercal 83 | \leftarrowtail 84 | \leftleftarrows 85 | \leftrightarrows 86 | \leftrightharpoons 87 | \leftrightsquigarrow 88 | \leftthreetimes 89 | \leqq 90 | \leqslant 91 | \lessapprox 92 | \lessdot 93 | \lesseqgtr 94 | \lesseqqgtr 95 | \lessgtr 96 | \lesssim 97 | \Lleftarrow 98 | \lll 99 | \llless 100 | \lnapprox 101 | \lneq 102 | \lneqq 103 | \lnsim 104 | \looparrowleft 105 | \looparrowright 106 | \lozenge 107 | \Lsh 108 | \ltimes 109 | \lvertneqq 110 | \measuredangle 111 | \mho 112 | \multimap 113 | \ncong 114 | \nexists 115 | \ngeq 116 | \ngeqq 117 | \ngeqslant 118 | \ngtr 119 | \nleftarrow 120 | \nLeftarrow 121 | \nLeftrightarrow 122 | \nleftrightarrow 123 | \nleq 124 | \nleqq 125 | \nleqslant 126 | \nless 127 | \nmid 128 | \nparallel 129 | \nprec 130 | \npreceq 131 | \nrightarrow 132 | \nRightarrow 133 | \nshortmid 134 | \nshortparallel 135 | \nsim 136 | \nsubseteq 137 | \nsubseteqq 138 | \nsucc 139 | \nsucceq 140 | \nsupseteq 141 | \nsupseteqq 142 | \ntriangleleft 143 | \ntrianglelefteq 144 | \ntriangleright 145 | \ntrianglerighteq 146 | \nvdash 147 | \nVdash 148 | \nVDash 149 | \nvDash 150 | \pitchfork 151 | \precapprox 152 | \preccurlyeq 153 | \precnapprox 154 | \precneqq 155 | \precnsim 156 | \precsim 157 | \restriction 158 | \rightarrowtail 159 | \rightleftarrows 160 | \rightleftharpoons 161 | \rightrightarrows 162 | \rightsquigarrow 163 | \rightthreetimes 164 | \risingdotseq 165 | \Rrightarrow 166 | \Rsh 167 | \rtimes 168 | \shortmid 169 | \shortparallel 170 | \smallfrown 171 | \smallsetminus 172 | \smallsmile 173 | \sphericalangle 174 | \sqsubset 175 | \sqsupset 176 | \square 177 | \Subset 178 | \subseteqq 179 | \subsetneq 180 | \subsetneqq 181 | \succapprox 182 | \succcurlyeq 183 | \succnapprox 184 | \succneqq 185 | \succnsim 186 | \succsim 187 | \Supset 188 | \supseteqq 189 | \supsetneq 190 | \supsetneqq 191 | \therefore 192 | \thickapprox 193 | \thicksim 194 | \triangledown 195 | \trianglelefteq 196 | \triangleq 197 | \trianglerighteq 198 | \twoheadleftarrow 199 | \twoheadrightarrow 200 | \upharpoonleft 201 | \upharpoonright 202 | \upuparrows 203 | \varkappa 204 | \varnothing 205 | \varpropto 206 | \varsubsetneq 207 | \varsubsetneqq 208 | \varsupsetneq 209 | \varsupsetneqq 210 | \vartriangle 211 | \vartriangleleft 212 | \vartriangleright 213 | \vDash 214 | \Vdash 215 | \veebar 216 | \Vvdash 217 | -------------------------------------------------------------------------------- /data/amssymb/math-amssymb-varsized-delimiters.tex: -------------------------------------------------------------------------------- 1 | \lvert 2 | \lVert 3 | \rvert 4 | \rVert 5 | -------------------------------------------------------------------------------- /data/amstext/both-amstext-alphabets.tex: -------------------------------------------------------------------------------- 1 | \text{Abc} 2 | -------------------------------------------------------------------------------- /data/base.php: -------------------------------------------------------------------------------- 1 | array( // TeX primitive 5 | 'numArgs' => 1, 6 | 'parseArgs' => false, 7 | 'mode' => 'both', 8 | ), 9 | '\\ ' => array( 10 | 'mode' => 'both', 11 | 'numArgs' => 0, 12 | 'numOptArgs' => 0, 13 | ), 14 | '\\chapter' => array( 15 | 'numArgs' => 1, 16 | 'mode' => 'text', 17 | 'starred' => true, 18 | 'counter' => 'chapter', 19 | 'counterReset' => array( 20 | 'section', 'subsection', 'subsubsection', 'paragraph', 'subparagraph', 21 | ), 22 | ), 23 | '\\section' => array( 24 | 'numArgs' => 1, 25 | 'mode' => 'text', 26 | 'starred' => true, 27 | 'counter' => 'section', 28 | 'counterReset' => array( 29 | 'subsection', 'subsubsection', 'paragraph', 'subparagraph', 30 | ), 31 | ), 32 | '\\subsection' => array( 33 | 'numArgs' => 1, 34 | 'mode' => 'text', 35 | 'starred' => true, 36 | 'counter' => 'subsection', 37 | 'counterReset' => array( 38 | 'subsubsection', 'paragraph', 'subparagraph', 39 | ), 40 | ), 41 | '\\subsubsection' => array( 42 | 'numArgs' => 1, 43 | 'mode' => 'text', 44 | 'starred' => true, 45 | 'counter' => 'subsubsection', 46 | 'counterReset' => array( 47 | 'paragraph', 'subparagraph', 48 | ), 49 | ), 50 | '\\paragraph' => array( 51 | 'numArgs' => 1, 52 | 'mode' => 'text', 53 | 'starred' => true, 54 | 'counter' => 'paragraph', 55 | 'counterReset' => array( 56 | 'subparagraph', 57 | ), 58 | ), 59 | '\\subparagraph' => array( 60 | 'numArgs' => 1, 61 | 'mode' => 'text', 62 | 'starred' => true, 63 | 'counter' => 'subparagraph', 64 | ), 65 | '\\item' => array( 66 | 'mode' => 'text', 67 | 'environs' => array('itemize', 'enumerate'), 68 | ), 69 | '\\hline' => array( 70 | 'mode' => 'text', 71 | 'environs' => array('tabular'), 72 | ), 73 | ); 74 | -------------------------------------------------------------------------------- /data/both-alphabets.tex: -------------------------------------------------------------------------------- 1 | \emph{Abc} 2 | \textbf{Abc} 3 | \textit{Abc} 4 | \textrm{Abc} 5 | \textsf{Abc} 6 | \texttt{Abc} 7 | \textup{Abc} 8 | -------------------------------------------------------------------------------- /data/both-refs.tex: -------------------------------------------------------------------------------- 1 | \label{marker} 2 | \ref{marker} 3 | \pageref{marker} 4 | -------------------------------------------------------------------------------- /data/both-spaces.tex: -------------------------------------------------------------------------------- 1 | \, 2 | \enspace 3 | \quad 4 | -------------------------------------------------------------------------------- /data/both.tex: -------------------------------------------------------------------------------- 1 | \# 2 | \$ 3 | \\ 4 | \_ 5 | \copyright 6 | \dag 7 | \ddag 8 | \dots 9 | \newline 10 | \P 11 | \pounds 12 | \S 13 | \textsuperscript{a} 14 | \{ 15 | \} 16 | \* 17 | -------------------------------------------------------------------------------- /data/compile.php: -------------------------------------------------------------------------------- 1 | both|math|text)#', basename($file), $match)) { 15 | continue; 16 | } 17 | 18 | $fileCommands = array_filter(array_map(function ($str) { 19 | $str = preg_replace('/%.*/', '', $str); 20 | $str = trim($str); 21 | return $str; 22 | }, file($file)), 'strlen'); 23 | $mode = $match['mode']; 24 | 25 | foreach ($fileCommands as $command) { 26 | // extract command name and number of args 27 | if (!preg_match('#^(?P\\\\([a-zA-Z]+|[^a-zA-Z]| ))#', $command, $match)) { 28 | throw new Exception( 29 | sprintf("File %s contains invalid command name '%s'", $file, $command) 30 | ); 31 | } 32 | 33 | $name = $match['command']; 34 | 35 | // in case [ or { is part of command name, search for substrings in part 36 | // of command after its name 37 | $numArgs = substr_count(substr($command, strlen($name)), '{'); 38 | $numOptArgs = substr_count(substr($command, strlen($name)), '['); 39 | 40 | if (isset($commands[$name])) { 41 | $c = $commands[$name]; 42 | if ($c['numArgs'] !== $numArgs || $c['numOptArgs'] !== $numOptArgs) { 43 | throw new Exception( 44 | sprintf('File %s contains conflicting definition of command %s', $file, $name) 45 | ); 46 | } 47 | } 48 | 49 | if (isset($commands[$name])) { 50 | if ($commands[$name]['numArgs'] !== $numArgs) { 51 | throw new Exception(sprintf( 52 | 'Duplicate definition of %s, conflicting number of arguments %d vs %d', 53 | $name, $commands[$name]['numArgs'], $numArgs 54 | )); 55 | } 56 | if ($commands[$name]['numOptArgs'] !== $numOptArgs) { 57 | throw new Exception(sprintf( 58 | 'Duplicate definition of %s, conflicting number of optional arguments %d vs %d', 59 | $name, $commands[$name]['numOptArgs'], $numOptArgs 60 | )); 61 | } 62 | if ($commands[$name]['mode'] !== 'both' && $commands[$name]['mode'] !== $mode) { 63 | $commands[$name]['mode'] = 'both'; 64 | } 65 | } else { 66 | $commands[$name]['mode'] = $mode; 67 | $commands[$name]['numArgs'] = $numArgs; 68 | $commands[$name]['numOptArgs'] = $numOptArgs; 69 | } 70 | } 71 | } 72 | 73 | uksort($commands, function ($a, $b) { 74 | // strip leading backslash 75 | $a = substr($a, 1); 76 | $b = substr($b, 1); 77 | 78 | $casecmp = strcasecmp($a, $b); 79 | if (!$casecmp) { 80 | return strcmp($a, $b); 81 | } 82 | 83 | return $casecmp; 84 | }); 85 | 86 | $php = str_replace(' ', ' ', var_export($commands, true)); 87 | $php = preg_replace('#\s+=>\s+array \(#', " => array(", $php); 88 | 89 | file_put_contents(dirname(__FILE__) . '/../library/PhpLatex/commands.php', '_paragraphs[$this->_pos] . '](', $text, ') -> '; 29 | 30 | if (isset($this->_paragraphs[$this->_pos])) { 31 | if ($this->_nl) { 32 | if ($text !== ' ') { 33 | $this->_nl = false; 34 | $par = $this->_paragraphs[$this->_pos] . "\\\\\n" . $text; 35 | } else { 36 | // do nothing - do not append space-only string or line break 37 | // wait for more text to come 38 | $par = $text; 39 | } 40 | } else { 41 | // append new text to existing paragraph, merge spaces on the 42 | // strings boundary into a single space 43 | $par = $this->_paragraphs[$this->_pos] . $text; 44 | $par = str_replace(' ', ' ', $par); 45 | } 46 | } else { 47 | // new paragraph must start with a non-space character, 48 | // no line break at the beginning of the paragraph, trailing 49 | // spaces are allowed (there will be no more than 2) 50 | $par = $text; 51 | } 52 | 53 | if (strlen($par)) { 54 | $this->_paragraphs[$this->_pos] = $par; 55 | } 56 | 57 | // echo '[' . @$this->_paragraphs[$this->_pos] . ']', "\n\n"; 58 | } 59 | 60 | 61 | return $this; 62 | } 63 | 64 | public function breakLine() 65 | { 66 | if ($this->_nl) { 67 | $this->newParagraph(); 68 | } elseif (isset($this->_paragraphs[$this->_pos]) && !ctype_space($this->_paragraphs[$this->_pos])) { 69 | // line break can only be placed in a non-empty paragraph 70 | $this->_nl = true; 71 | } 72 | return $this; 73 | } 74 | 75 | public function newParagraph() 76 | { 77 | $this->_nl = false; 78 | if (isset($this->_paragraphs[$this->_pos])) { 79 | ++$this->_pos; 80 | } 81 | return $this; 82 | } 83 | 84 | public function clear() 85 | { 86 | $this->_paragraphs = array(); 87 | $this->_pos = 0; 88 | $this->_nl = false; 89 | return $this; 90 | } 91 | 92 | public function count() 93 | { 94 | return count($this->_paragraphs); 95 | } 96 | 97 | public function getIterator() 98 | { 99 | return new ArrayIterator($this->_paragraphs); 100 | } 101 | 102 | public function __toString() 103 | { 104 | if (count($this->_paragraphs)) { 105 | return preg_replace('/[ ]+/', ' ', implode("\n\n", $this->_paragraphs)) . "\n\n"; 106 | } 107 | return ''; 108 | } 109 | 110 | public function toArray() 111 | { 112 | return $this->_paragraphs; 113 | } 114 | } 115 | 116 | class PhpLatex_Filter_Html2Latex 117 | { 118 | protected static $_outputEncoding = 'ANSI'; 119 | 120 | /** 121 | * Set output encoding 122 | * @param $encoding 123 | */ 124 | public static function setOutputEncoding($encoding) 125 | { 126 | self::$_outputEncoding = strtoupper($encoding); 127 | } 128 | 129 | /** 130 | * @param string $html 131 | * @param array $options OPTIONAL 132 | * @return string 133 | */ 134 | public static function filter($html, array $options = null) 135 | { 136 | $errors = libxml_use_internal_errors(true); 137 | 138 | $doc = new DOMDocument(); 139 | $doc->loadHTML('' . $html); 140 | 141 | libxml_clear_errors(); 142 | libxml_use_internal_errors($errors); 143 | 144 | foreach ($doc->childNodes as $item) { 145 | if ($item->nodeType == XML_PI_NODE) { 146 | $doc->removeChild($item); 147 | } 148 | } 149 | 150 | $doc->encoding = 'UTF-8'; 151 | 152 | $body = $doc->getElementsByTagName('body')->item(0); 153 | 154 | $debug = 0; 155 | if($debug){ 156 | header('Content-Type: text/plain; charset=utf-8'); 157 | echo $doc->saveHTML(), "\n\n"; 158 | } 159 | 160 | if ($body) { 161 | $elems = array($body); 162 | $refs = array(); 163 | $filter = new Zefram_Filter_Slug(); // FIXME dependency! 164 | // extract all referenced ids of elements, they will be used for internal links creation 165 | while ($elem = array_shift($elems)) { 166 | foreach ($elem->childNodes as $item) { 167 | if ($item->nodeType === XML_ELEMENT_NODE) { 168 | $elems[] = $item; 169 | } 170 | } 171 | if ($elem->nodeType === XML_ELEMENT_NODE && strtoupper($elem->tagName) === 'A') { 172 | $href = trim($elem->getAttribute('href')); 173 | if (strlen($href) && $href{0} === '#') { 174 | $id = substr($href, 1); 175 | $refs[$id] = 'ref:' . $filter->filter(str_ireplace('ref:', '', $id)); 176 | } 177 | } 178 | } 179 | 180 | self::$_refs = $refs; 181 | 182 | // TODO create IDs map 183 | $latex = self::processBlock($body, self::TRIM); 184 | 185 | if($debug){ 186 | header('Content-Type: text/plain; charset=utf-8'); 187 | echo $latex;exit; 188 | } 189 | return $latex; 190 | } 191 | return ''; 192 | } 193 | 194 | protected static $_refs; 195 | 196 | public static function processBlock(DOMNode $body, $flags = 0) 197 | { 198 | $latex = ''; 199 | $par = new PhpLatex_Filter_ParagraphList(); 200 | foreach ($body->childNodes as $item) { 201 | switch ($item->nodeType) { 202 | case XML_TEXT_NODE: 203 | case XML_ENTITY_NODE: 204 | self::_addToParagraph($par, $item); 205 | break; 206 | 207 | case XML_ELEMENT_NODE: 208 | switch (strtoupper($item->tagName)) { 209 | case 'H1': 210 | case 'H2': 211 | case 'H3': 212 | case 'H4': 213 | case 'H5': 214 | case 'H6': 215 | $value = trim(self::getText($item)); 216 | if (!($flags & self::NO_HEADINGS)) { 217 | $map = array( 218 | 'H1' => 'section', 219 | 'H2' => 'section', 220 | 'H3' => 'subsection', 221 | 'H4' => 'subsubsection', 222 | 'H5' => 'paragraph', 223 | 'H6' => 'subparagraph', 224 | ); 225 | 226 | // TODO handle math mode \texorpdfstring 227 | 228 | $value = '\\' . $map[strtoupper($item->tagName)] . '*{' . $value . '}' . "\n"; 229 | 230 | // find first id, if found, create label, 231 | // analyze elements in document order 232 | $elems = array($item); 233 | while ($elem = array_shift($elems)) { 234 | if ($elem->nodeType === XML_ELEMENT_NODE) { 235 | $id = str_ireplace('ref:', '', $elem->getAttribute('id')); 236 | if (strlen($id)) { 237 | $value .= '\\label{ref:' . $id . '}' . "\n"; 238 | break; 239 | } 240 | foreach ($elem->childNodes as $child) { 241 | $elems[] = $child; 242 | } 243 | } 244 | } 245 | } 246 | $latex .= $par . $value; 247 | $par->clear(); 248 | break; 249 | 250 | case 'UL': 251 | case 'OL': 252 | case 'DL': 253 | $latex .= $par . self::processList($item); 254 | $par->clear(); 255 | break; 256 | 257 | case 'TABLE': 258 | $latex .= $par . self::processTable($item); 259 | $par->clear(); 260 | break; 261 | 262 | default: 263 | self::_addToParagraph($par, $item); 264 | break; 265 | } 266 | } 267 | } 268 | 269 | if (count($par)) { 270 | $latex .= $par; 271 | $par->clear(); 272 | } 273 | 274 | if ($flags & self::TRIM) { // trim only new lines 275 | $latex = str_replace("\n", '', $latex); 276 | } 277 | 278 | return $latex; 279 | } 280 | 281 | const BOLD = 0x0001; 282 | const ITALIC = 0x0002; 283 | const TELETYPE = 0x0004; 284 | const UNDERLINE = 0x0008; 285 | const NO_PARAGRAPH = 0x0010; 286 | const TRIM = 0x0020; 287 | const LINK = 0x0040; 288 | const NO_HEADINGS = 0x0080; 289 | 290 | // TODO table 291 | 292 | public static function processTable(DOMElement $table, $flags = 0) 293 | { 294 | // requires tabularx package 295 | $tbodies = array($table); 296 | foreach (self::getChildren($table, 'TBODY') as $tbody) { 297 | $tbodies[] = $tbody; 298 | } 299 | $ncols = 0; 300 | $content = ''; 301 | while ($tbody = array_shift($tbodies)) { 302 | foreach (self::getChildren($tbody, 'TR') as $tr) { 303 | $tds = self::getChildren($tr, 'TD'); 304 | $ncols = max($ncols, count($tds)); 305 | $row = array(); 306 | foreach ($tds as $td) { 307 | $row[] = self::getText($td); 308 | } 309 | $row = implode(' & ', $row); 310 | if (strlen($row)) { 311 | $content .= $row . '\\\\' . "\n"; 312 | } 313 | } 314 | } 315 | 316 | $latex = ''; 317 | if ($content) { 318 | if ($ncols === 2) { 319 | $colspec = 'Xr'; 320 | } elseif ($ncols === 3) { 321 | $colspec = 'lXr'; 322 | } else { 323 | $colspec = str_repeat('X', $ncols); 324 | } 325 | // TODO handle colspec -> borders, alignment 326 | $latex .= '\\vspace{5ex}' . "\n"; 327 | $latex .= '\\begin{tabularx}{\textwidth}{' . $colspec . '}' . "\n"; 328 | $latex .= $content; 329 | $latex .= '\\end{tabularx}' . "\n"; 330 | $latex .= '\\vspace{5ex}' . "\n\n"; 331 | } 332 | return $latex; 333 | } 334 | 335 | public static function getChildren(DOMNode $node, $tagName) 336 | { 337 | $children = array(); 338 | if ($node->nodeType === XML_ELEMENT_NODE) { 339 | foreach ($node->childNodes as $child) { 340 | if ($child->nodeType === XML_ELEMENT_NODE && strtoupper($child->tagName) === $tagName) { 341 | $children[] = $child; 342 | } 343 | } 344 | } 345 | return $children; 346 | } 347 | 348 | public static function processList(DOMElement $element, $flags = 0, $level = 0) 349 | { 350 | // TODO handle indented lists 351 | 352 | $tagName = strtoupper($element->tagName); 353 | if (!in_array($tagName, array('OL', 'UL', 'DL'))) { 354 | throw new InvalidArgumentException('Not a list: ' . $tagName); 355 | } 356 | 357 | // Lists in LaTeX can be 4 levels deep 358 | if ($level >= 4) { 359 | return self::getText($element, self::NO_PARAGRAPH | self::TRIM); 360 | } 361 | 362 | $env = null; 363 | 364 | if ($tagName === 'OL') { 365 | $env = 'enumerate'; 366 | } elseif ($tagName === 'UL') { 367 | $env = 'itemize'; 368 | } elseif ($tagName === 'DL') { 369 | $env = 'description'; 370 | } 371 | 372 | $latex = ''; 373 | 374 | // paragraphs in list item? 375 | $prevTag = null; 376 | foreach ($element->childNodes as $item) { 377 | if ($item->nodeType !== XML_ELEMENT_NODE) { 378 | continue; 379 | } 380 | $t = strtoupper($item->tagName); 381 | switch ($t) { 382 | case 'LI': 383 | case 'DD': 384 | $text = self::processBlock($item, self::TRIM | self::NO_HEADINGS); 385 | // there can be more than one paragraph in list item 386 | $text = preg_replace('/\n[ \t]*\n+/', "\n\n", trim($text)); 387 | 388 | if ($t == 'LI' || ($t == 'DD' && (!$prevTag && $prevTag !== 'DT'))) { 389 | $latex .= ' \\item ' . trim($text) . "\n"; 390 | } else { 391 | $latex .= ' ' . trim($text) . "\n"; 392 | } 393 | break; 394 | 395 | case 'DT': 396 | $text = self::getText($item, self::TRIM | self::NO_PARAGRAPH); 397 | $latex .= ' \\item'; 398 | if (strlen($text)) { 399 | $latex .= '[{' . $text . '}]' . "\n"; 400 | } 401 | break; 402 | 403 | default: 404 | var_dumP($item);exit; 405 | } 406 | $prevTag = $t; 407 | } 408 | 409 | return sprintf("\\begin{%s}\n%s\\end{%s}\n\n", $env, $latex, $env); 410 | } 411 | 412 | public static function processLink(DOMElement $element, $flags = 0) 413 | { 414 | if ($flags & self::LINK) { 415 | // no nested links 416 | return; 417 | } 418 | 419 | $text = self::getText($element, self::NO_PARAGRAPH | self::LINK); 420 | if (strlen($text)) { 421 | $href = trim($element->getAttribute('href')); 422 | if (strlen($href)) { 423 | $label = PhpLatex_Utils::escape($text); 424 | if ($href{0} === '#') { 425 | $id = substr($href, 1); 426 | if (isset(self::$_refs[$id])) { 427 | return '\\hyperref[{' . self::$_refs[$id] . '}]{' . $label . '}'; 428 | } 429 | return; 430 | } 431 | return '\\href{' . PhpLatex_Utils::escape($href) . '}{' . $label . '}'; 432 | } 433 | } 434 | } 435 | 436 | protected static function _addToParagraph(PhpLatex_Filter_ParagraphList $par, DOMNode $item, $flags = 0) 437 | { 438 | $cflags = $flags; 439 | switch ($item->nodeType) { 440 | case XML_TEXT_NODE: 441 | $par->addText(self::getTextValue($item)); 442 | break; 443 | 444 | case XML_ENTITY_NODE: 445 | $par->addText(self::getTextValue($item)); 446 | break; 447 | 448 | case XML_ELEMENT_NODE: 449 | switch (strtoupper($item->tagName)) { 450 | case 'BR': 451 | $par->breakLine(); 452 | break; 453 | 454 | case 'STRONG': 455 | case 'B': 456 | $value = self::getText($item, $cflags | self::BOLD); 457 | if (!($flags & self::BOLD) && strlen($value)) { 458 | $value = '\\textbf{' . $value . '}'; 459 | } 460 | $par->addText($value); 461 | break; 462 | 463 | case 'EM': 464 | case 'I': 465 | $value = self::getText($item, $cflags | self::ITALIC); 466 | if (!($flags & self::ITALIC) && strlen($value)) { 467 | $value = '\\textit{' . $value . '}'; 468 | } 469 | $par->addText($value); 470 | break; 471 | 472 | case 'CODE': 473 | $value = self::getText($item, $cflags | self::TELETYPE); 474 | if (!($flags & self::TELETYPE) && strlen($value)) { 475 | $value = '\\texttt{' . $value . '}'; 476 | } 477 | $par->addText($value); 478 | break; 479 | 480 | case 'U': 481 | $value = self::getText($item, $cflags | self::UNDERLINE); 482 | if (!($flags & self::UNDERLINE) && strlen($value)) { 483 | $value = '\\underline{' . $value . '}'; 484 | } 485 | $par->addText($value); 486 | break; 487 | 488 | // TODO handle indented paragraphs 489 | case 'P': 490 | $par->newParagraph(); 491 | foreach ($item->childNodes as $child) { 492 | self::_addToParagraph($par, $child); 493 | } 494 | break; 495 | 496 | case 'A': 497 | $par->addText(self::processLink($item, $flags)); 498 | break; 499 | 500 | case 'SUB': 501 | // requires \usepackage{fixltx2e} for releases prior to 2015/01/01 502 | $par->addText('\\textsubscript{' . self::getText($item, $cflags | self::NO_PARAGRAPH) . '}'); 503 | break; 504 | 505 | case 'SUP': 506 | $par->addText('\\textsuperscript{' . self::getText($item, $cflags | self::NO_PARAGRAPH) . '}'); 507 | break; 508 | 509 | default: 510 | $par->addText(self::getText($item, $cflags)); 511 | break; 512 | } 513 | break; 514 | } 515 | 516 | return $par; 517 | } 518 | 519 | public static function getText(DOMNode $element, $flags = 0) 520 | { 521 | $par = new PhpLatex_Filter_ParagraphList(); 522 | 523 | foreach ($element->childNodes as $item) { 524 | switch ($item->nodeType) { 525 | case XML_TEXT_NODE: 526 | case XML_ENTITY_NODE: 527 | $par->addText(self::getTextValue($item)); 528 | break; 529 | 530 | case XML_ELEMENT_NODE: 531 | self::_addToParagraph($par, $item, $flags); 532 | break; 533 | } 534 | } 535 | 536 | return implode(' ', $par->toArray()); 537 | } 538 | 539 | public static function getTextValue(DOMText $node) 540 | { 541 | $value = str_replace(array("\r\n", "\r"), "\n", $node->wholeText); 542 | $value = PhpLatex_Utils::escape($value); 543 | 544 | // replace UTF-8 characters with their counterparts if encoding is not UTF-8, 545 | // otherwise remove invalid UTF-8 characters 546 | if (in_array(self::$_outputEncoding, array('UTF-8', 'UTF8'), true)) { 547 | // regex taken from http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string 548 | $regex = ' 549 | / 550 | ( 551 | (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx 552 | | [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 553 | | [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 554 | | [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 555 | ){1,100} # ...one or more times 556 | ) 557 | | . # anything else 558 | /x'; 559 | $value = preg_replace($regex, '$1', $value); 560 | } else { 561 | $value = PhpLatex_Utils::escapeUtf8($value); 562 | } 563 | return $value; 564 | } 565 | } 566 | -------------------------------------------------------------------------------- /library/PhpLatex/Lexer.php: -------------------------------------------------------------------------------- 1 | setString($str); 45 | } 46 | 47 | public function setString($str) 48 | { 49 | // skip leading and trailing whitespaces 50 | $str = trim($str); 51 | 52 | // perform initial transformations to mimic how TeX handles whitespaces. 53 | // Because of these transformations verbatim environments must be handled 54 | // elsewhere (i.e., replaced with placeholders before passing the input 55 | // to this lexer). 56 | 57 | // Unify newline character across platforms, replace tab with space 58 | $str = str_replace( 59 | array("\r\n", "\r", "\t"), 60 | array("\n", "\n", " "), 61 | (string) $str 62 | ); 63 | 64 | // Replace ASCII control characters with spaces, so that token positions 65 | // remain unchanged 66 | $str = preg_replace( 67 | '/[\x{0000}-\x{0009}\x{000B}-\x{001F}\x{007F}]/u', 68 | ' ', 69 | $str 70 | ); 71 | 72 | $this->_str = $str; 73 | $this->_pos = 0; 74 | 75 | $this->_line = 1; 76 | $this->_column = 0; 77 | 78 | $this->_pline = null; 79 | $this->_pcolumn = null; 80 | 81 | $this->_token = null; 82 | $this->_tokenPosition = null; 83 | 84 | $this->_state = self::STATE_DEFAULT; 85 | } 86 | 87 | public function current() 88 | { 89 | return $this->_token; 90 | } 91 | 92 | /** 93 | * @return array|false 94 | */ 95 | public function next() 96 | { 97 | $buf = ''; 98 | 99 | do { 100 | $c = $this->_getChar(); 101 | 102 | switch ($c) { 103 | case self::EOF: 104 | switch ($this->_state) { 105 | case self::STATE_DEFAULT: 106 | if (strlen($buf)) { 107 | return $this->_setToken(self::TYPE_TEXT, $buf); 108 | } 109 | break; 110 | 111 | case self::STATE_BSLASH: 112 | break; 113 | 114 | case self::STATE_CONTROL: 115 | return $this->_setToken(self::TYPE_CWORD, $buf); 116 | 117 | case self::STATE_SPACE: 118 | // ignore trailing spaces 119 | break; 120 | } 121 | break; 122 | 123 | case "\\": 124 | switch ($this->_state) { 125 | case self::STATE_DEFAULT: 126 | // if there is something in the buffer return it 127 | // before switching state 128 | if (strlen($buf)) { 129 | $this->_ungetChar(); 130 | return $this->_setToken(self::TYPE_TEXT, $buf); 131 | } 132 | $this->_state = self::STATE_BSLASH; 133 | $buf = "\\"; 134 | $this->storeTokenPosition(); 135 | break; 136 | 137 | case self::STATE_BSLASH: 138 | return $this->_setToken(self::TYPE_CSYMBOL, '\\\\'); 139 | 140 | case self::STATE_CONTROL: 141 | // end of command, unget char, return buffer 142 | $this->_ungetChar(); 143 | return $this->_setToken(self::TYPE_CWORD, $buf); 144 | 145 | case self::STATE_SPACE: 146 | $this->_ungetChar(); 147 | return $this->_setSpaceToken($buf); 148 | } 149 | break; 150 | 151 | case ' ': 152 | case "\n": 153 | switch ($this->_state) { 154 | case self::STATE_DEFAULT: 155 | if (strlen($buf)) { 156 | $this->_ungetChar(); 157 | return $this->_setToken(self::TYPE_TEXT, $buf); 158 | } 159 | $this->_state = self::STATE_SPACE; 160 | $buf = $c; 161 | $this->storeTokenPosition(); 162 | if ($c === "\n") { 163 | $this->_line++; 164 | $this->_column = 0; 165 | } 166 | break; 167 | 168 | case self::STATE_BSLASH: 169 | $this->storeTokenPosition(); 170 | // if space then return control symbol, otherwise 171 | // switch to default state and unget this char to 172 | // be handler later (ignore this backslash) 173 | if ($c === ' ') { 174 | return $this->_setToken(self::TYPE_CSYMBOL, '\\ '); 175 | } 176 | $this->_state = self::STATE_DEFAULT; 177 | $this->_ungetChar(); 178 | break; 179 | 180 | case self::STATE_CONTROL: 181 | // end of control word 182 | $this->_ungetChar(); 183 | return $this->_setToken(self::TYPE_CWORD, $buf); 184 | 185 | case self::STATE_SPACE: 186 | $buf .= $c; 187 | if ($c === "\n") { 188 | $this->_line++; 189 | $this->_column = 0; 190 | } 191 | break; 192 | } 193 | break; 194 | 195 | case '%': 196 | switch ($this->_state) { 197 | case self::STATE_DEFAULT: 198 | // there may be something in buffer, if so, return 199 | // it before returning this token 200 | if (strlen($buf)) { 201 | $this->_ungetChar(); 202 | return $this->_setToken(self::TYPE_TEXT, $buf); 203 | } 204 | 205 | // http://en.wikibooks.org/wiki/LaTeX/Basics#Comments: 206 | // "When LaTeX encounters a % character while processing an input file, it 207 | // ignores the rest of the current line, the line break, and all whitespace 208 | // [newline excluded!] at the beginning of the next line." 209 | // This behavior can be illustrated by the following example: 210 | // A% comment 211 | // B 212 | // will be rendered as: 213 | // AB 214 | // whereas: 215 | // A% comment 216 | // 217 | // B 218 | // as: 219 | // A 220 | // B 221 | // Comment-terminating newline and newline occurring after it 222 | // (intermediate spaces are ignored) are interpreted as \par command. 223 | 224 | $this->storeTokenPosition(); 225 | 226 | return $this->_setToken(self::TYPE_SPECIAL, '%'); 227 | 228 | case self::STATE_BSLASH: 229 | return $this->_setToken(self::TYPE_CSYMBOL, '\\%'); 230 | 231 | case self::STATE_CONTROL: 232 | // end of command name, unget char 233 | $this->_ungetChar(); 234 | return $this->_setToken(self::TYPE_CWORD, $buf); 235 | 236 | case self::STATE_SPACE: 237 | $this->_ungetChar(); 238 | return $this->_setSpaceToken($buf); 239 | } 240 | break; 241 | 242 | // The following characters play a special role in LaTeX and are called special printing 243 | // characters, or simply special characters. 244 | // # $ % & ~ _ ^ \ { } 245 | // http://www.personal.ceu.hu/tex/specchar.htm 246 | case '}': 247 | case '{': 248 | case '~': 249 | case '^': 250 | case '_': 251 | case '&': 252 | case '#': 253 | case '$': 254 | case '[': // square brackets are considered special symbols, as 255 | case ']': // they delimit optional arguments 256 | switch ($this->_state) { 257 | case self::STATE_DEFAULT: 258 | // there may be something in buffer, if so, return 259 | // it before returning this token 260 | if (strlen($buf)) { 261 | $this->_ungetChar(); 262 | return $this->_setToken(self::TYPE_TEXT, $buf); 263 | } 264 | // unescaped special character 265 | $this->storeTokenPosition(); 266 | return $this->_setToken(self::TYPE_SPECIAL, $c); 267 | 268 | case self::STATE_BSLASH: 269 | // escaped special character 270 | return $this->_setToken(self::TYPE_CSYMBOL, '\\' . $c); 271 | 272 | case self::STATE_CONTROL: 273 | // end of command name, unget char 274 | $this->_ungetChar(); 275 | return $this->_setToken(self::TYPE_CWORD, $buf); 276 | 277 | case self::STATE_SPACE: 278 | $this->_ungetChar(); 279 | return $this->_setSpaceToken($buf); 280 | } 281 | break; 282 | 283 | default: 284 | switch ($this->_state) { 285 | case self::STATE_DEFAULT: 286 | if ($buf === '') { 287 | $this->storeTokenPosition(); 288 | } 289 | $buf .= $c; 290 | break; 291 | 292 | case self::STATE_BSLASH: 293 | if ($this->_isAlpha($c)) { 294 | $this->_state = self::STATE_CONTROL; 295 | $buf .= $c; 296 | } else { 297 | // single non-letter -> control symbol, i.e., \^ 298 | return $this->_setToken(self::TYPE_CSYMBOL, "\\" . $c); 299 | } 300 | break; 301 | 302 | case self::STATE_CONTROL: 303 | if ($this->_isAlpha($c)) { 304 | $buf .= $c; 305 | } else { 306 | // not a letter, unget last char, return buffer 307 | $this->_ungetChar(); 308 | return $this->_setToken(self::TYPE_CWORD, $buf); 309 | } 310 | break; 311 | 312 | case self::STATE_SPACE: 313 | $this->_ungetChar(); 314 | return $this->_setSpaceToken($buf); 315 | } 316 | break; 317 | } 318 | } while ($c !== self::EOF); 319 | 320 | return false; 321 | } 322 | 323 | /** 324 | * @return string 325 | */ 326 | protected function _getChar() 327 | { 328 | if ($this->_pos >= strlen($this->_str)) { 329 | return self::EOF; // artificial symbol denoting end of input 330 | } 331 | 332 | $c = substr($this->_str, $this->_pos, 1); 333 | $this->_pos++; 334 | 335 | $this->_pcolumn = $this->_column; 336 | $this->_pline = $this->_line; 337 | 338 | $this->_column++; 339 | 340 | return $c; 341 | } 342 | 343 | protected function _ungetChar() 344 | { 345 | if ($this->_pline === null) { 346 | throw new RuntimeException('Too many unget calls'); 347 | } 348 | 349 | --$this->_pos; 350 | $this->_line = $this->_pline; 351 | $this->_column = $this->_pcolumn; 352 | 353 | $this->_pline = null; 354 | $this->_pcolumn = null; 355 | } 356 | 357 | protected function storeTokenPosition() 358 | { 359 | $this->_tokenPosition = array('line' => $this->_line, 'column' => $this->_column); 360 | } 361 | 362 | protected function _setToken($type, $value, $raw = null) 363 | { 364 | // printf("setToken(type = %s, value = %s, pos = %d)\n", $type, $value, $this->_pos); 365 | $position = $this->_tokenPosition; 366 | 367 | $token = array( 368 | 'type' => $type, 369 | 'value' => $value, 370 | 'line' => $position ? $position['line'] : null, 371 | 'column' => $position ? $position['column'] : null, 372 | ); 373 | if (isset($raw)) { 374 | $token['raw'] = $raw; // raw whitespace value 375 | } 376 | $this->_state = self::STATE_DEFAULT; 377 | return $this->_token = $token; 378 | } 379 | 380 | /** 381 | * Return token based on the contents of given whitespace string. 382 | * 383 | * Consume all whitespaces, if among them more than one LF is found, 384 | * return \par, otherwise append a single space to the buffer. 385 | * This is equivalent to the following text transformations: 386 | * 1. merge spaces into adjacent newlines 387 | * 2. merge multiple newlines into \par 388 | * 3. replace single newline with a space 389 | * 390 | * \par is equivalent to: #[ \t]*\n[ \t]*\n[ \t\n]*# 391 | * 392 | * @param string $value 393 | * @return array 394 | */ 395 | protected function _setSpaceToken($value) 396 | { 397 | if (!ctype_space($value)) { 398 | throw new InvalidArgumentException('Whitespace value expected'); 399 | } 400 | 401 | if (substr_count($value, "\n") > 1) { 402 | return $this->_setToken(self::TYPE_CWORD, '\\par', $value); 403 | } 404 | 405 | return $this->_setToken(self::TYPE_SPACE, ' ', $value); 406 | } 407 | 408 | /** 409 | * Locale independent check if string is non-empty and consists of 410 | * ASCII letters A-Za-z only. 411 | * 412 | * @param string $str 413 | * @return bool 414 | */ 415 | protected function _isAlpha($str) 416 | { 417 | // ctype_alpha() is locale dependent so can't be used here 418 | if (0 < ($len = strlen($str))) { 419 | for ($i = 0; $i < $len; ++$i) { 420 | $c = substr($str, $i, 1); 421 | if (($c < 'a' || 'z' < $c) && ($c < 'A' || 'Z' < $c)) { 422 | return false; 423 | } 424 | } 425 | return true; 426 | } 427 | return false; 428 | } 429 | } 430 | -------------------------------------------------------------------------------- /library/PhpLatex/Node.php: -------------------------------------------------------------------------------- 1 | _type = $type; 19 | 20 | // _props and _children properties are lazily-initialized 21 | // on first write 22 | 23 | if (null !== $props) { 24 | $this->setProps($props); 25 | } 26 | } 27 | 28 | /** 29 | * @return mixed 30 | */ 31 | public function getType() 32 | { 33 | return $this->_type; 34 | } 35 | 36 | /** 37 | * @return PhpLatex_Node 38 | */ 39 | public function addChild(PhpLatex_Node $node) 40 | { 41 | return $this->appendChild($node); 42 | } 43 | 44 | public function appendChild(PhpLatex_Node $child) 45 | { 46 | $this->_children[] = $child; 47 | return $this; 48 | } 49 | 50 | public function appendTo(PhpLatex_Node $parent) 51 | { 52 | $parent->appendChild($this); 53 | return $this; 54 | } 55 | 56 | /** 57 | * Retrieves the child node corresponding to the specified index. 58 | * 59 | * @param int $index The zero-based index of the child 60 | * @return PhpLatex_Node 61 | */ 62 | public function getChild($index) 63 | { 64 | return isset($this->_children[$index]) ? $this->_children[$index] : null; 65 | } 66 | 67 | /** 68 | * @return array 69 | */ 70 | public function getChildren() 71 | { 72 | return $this->_children; 73 | } 74 | 75 | /** 76 | * @return bool 77 | */ 78 | public function hasChildren() 79 | { 80 | return (bool) count($this->_children); 81 | } 82 | 83 | /** 84 | * @return PhpLatex_Node 85 | */ 86 | public function setProps(array $props) 87 | { 88 | foreach ($props as $key => $value) { 89 | $this->setProp($key, $value); 90 | } 91 | return $this; 92 | } 93 | 94 | /** 95 | * @return array 96 | */ 97 | public function getProps() 98 | { 99 | return (array) $this->_props; 100 | } 101 | 102 | /** 103 | * @param string $key 104 | * @param mixed $value 105 | * @return PhpLatex_Node 106 | */ 107 | public function setProp($key, $value) 108 | { 109 | if (null === $value) { 110 | // unsetting an unexistant element from an array does not trigger 111 | // "Undefined variable" notice, see: 112 | // http://us.php.net/manual/en/function.unset.php#77310 113 | unset($this->_props[$key]); 114 | } else { 115 | $this->_props[$key] = $value; 116 | } 117 | return $this; 118 | } 119 | 120 | /** 121 | * @param string $key 122 | * @return mixed 123 | */ 124 | public function getProp($key) 125 | { 126 | return isset($this->_props[$key]) ? $this->_props[$key] : null; 127 | } 128 | 129 | public function __set($key, $value) 130 | { 131 | $this->setProp($key, $value); 132 | } 133 | 134 | public function __get($key) 135 | { 136 | return $this->getProp($key); 137 | } 138 | 139 | public function __isset($key) 140 | { 141 | return $this->getProp($key) !== null; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /library/PhpLatex/Parser.php: -------------------------------------------------------------------------------- 1 | ::= 12 | // ::= | 13 | // ::= | '{' '}' | 14 | // 15 | // \\[a-zA-Z]+ | \\[^a-zA-Z] 16 | // not a command 17 | 18 | const MODE_MATH = 2; 19 | const MODE_TEXT = 1; 20 | const MODE_BOTH = 3; 21 | 22 | const STATE_TEXT = 1; 23 | const STATE_MATH = 2; 24 | const STATE_ARG = 4; 25 | const STATE_OPT_ARG = 8; 26 | 27 | const TYPE_DOCUMENT = 'document'; 28 | const TYPE_TEXT = 'text'; 29 | const TYPE_MATH = 'math'; 30 | const TYPE_GROUP = 'group'; 31 | const TYPE_SPECIAL = 'special'; 32 | const TYPE_COMMAND = 'command'; 33 | const TYPE_ENVIRON = 'environ'; 34 | const TYPE_VERBATIM = 'verbatim'; 35 | 36 | protected $_lexer; 37 | protected $_verbatims; 38 | 39 | /** 40 | * Environments specification. 41 | * 42 | * Supported keys: 43 | * int mode null - mode in which this environ may be 44 | * present, one of MODE_ flag constants 45 | * bool verbatim false - is this environ verbatim 46 | * bool math false - does this environ start math mode? 47 | * string[] environs array() - list of environments this environ may 48 | * occur inside, if not given or empty 49 | * this environ cannot be nested inside 50 | * other environments 51 | * int args 0 - number of arguments this environment 52 | * requires 53 | * 54 | * @var array 55 | */ 56 | protected $_environs = array(); 57 | protected $_commands = array(); 58 | 59 | protected $_skipUndefinedCommands = true; 60 | protected $_skipUndefinedEnvirons = true; 61 | 62 | protected $refs = array(); 63 | 64 | public function __construct() 65 | { 66 | $this->addCommands(require dirname(__FILE__) . '/commands.php'); 67 | $this->_environs = require dirname(__FILE__) . '/environs.php'; 68 | } 69 | 70 | /** 71 | * @param string $name 72 | * @param array $options 73 | * @return $this 74 | */ 75 | public function addCommand($name, array $options) // {{{ 76 | { 77 | if (!preg_match('/^\\\\([a-zA-Z]+|[^a-zA-Z])$/', $name)) { 78 | throw new InvalidArgumentException(sprintf('Invalid command name: "%s"', $name)); 79 | } 80 | 81 | if (isset($options['mode'])) { 82 | $mode = $options['mode']; 83 | switch ($mode) { 84 | case 'both': 85 | $mode = self::MODE_BOTH; 86 | break; 87 | 88 | case 'math': 89 | $mode = self::MODE_MATH; 90 | break; 91 | 92 | case 'text': 93 | $mode = self::MODE_TEXT; 94 | break; 95 | 96 | default: 97 | $mode = intval($mode); 98 | break; 99 | } 100 | } else { 101 | $mode = self::MODE_BOTH; 102 | } 103 | 104 | $this->_commands[$name] = array( 105 | 'mode' => $mode, 106 | 'numArgs' => isset($options['numArgs']) ? intval($options['numArgs']) : 0, 107 | 'numOptArgs' => isset($options['numOptArgs']) ? intval($options['numOptArgs']) : 0, 108 | 'parseArgs' => !isset($options['parseArgs']) || $options['parseArgs'], // parse by default 109 | 'starred' => isset($options['starred']) ? $options['starred'] : false, 110 | ); 111 | return $this; 112 | } // }}} 113 | 114 | public function addCommands(array $commands) // {{{ 115 | { 116 | foreach ($commands as $name => $spec) { 117 | $this->addCommand($name, (array) $spec); 118 | } 119 | return $this; 120 | } // }}} 121 | 122 | protected function _getRandomString($length) 123 | { 124 | $chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; 125 | $max = strlen($chars) - 1; 126 | $string = ''; 127 | while (strlen($string) < $length) { 128 | $string .= substr($chars, mt_rand(0, $max), 1); 129 | } 130 | return $string; 131 | } 132 | 133 | public function _readVerbatim($match) 134 | { 135 | do { 136 | $id = $this->_getRandomString(8); 137 | } while (isset($this->_verbatims[$id])); 138 | $this->_verbatims[$id] = array( 139 | 'name' => $match['name'], 140 | 'content' => $match['content'], 141 | ); 142 | return "\\verbatim" . $id . " "; 143 | } 144 | 145 | protected $_token; 146 | protected $_tokenQueue = array(); 147 | 148 | /** 149 | * Read next token from lexer. 150 | */ 151 | protected function _next() // {{{ 152 | { 153 | if (empty($this->_tokenQueue)) { 154 | // token queue is empty, get next token from lexer and return it 155 | // without modyfying the queue 156 | $token = $this->_lexer->next(); 157 | } else { 158 | $token = array_shift($this->_tokenQueue); 159 | } 160 | $this->_token = $token; 161 | return $this->_token; 162 | } // }}} 163 | 164 | /** 165 | * Peek at the next token. 166 | */ 167 | protected function _peek() // {{{ 168 | { 169 | if (empty($this->_tokenQueue)) { 170 | $next = $this->_lexer->next(); 171 | if ($next) { 172 | $this->_tokenQueue[] = $next; 173 | } else { 174 | // no more tokens 175 | return false; 176 | } 177 | } 178 | return $this->_tokenQueue[0]; 179 | } // }}} 180 | 181 | /** 182 | * Get current token. 183 | */ 184 | protected function _current() // {{{ 185 | { 186 | return $this->_token; 187 | } // }}} 188 | 189 | /** 190 | * Put token so that it will be loaded in the next call of _next() 191 | */ 192 | protected function _unget(array $token) // {{{ 193 | { 194 | array_unshift($this->_tokenQueue, $token); 195 | return $this; 196 | } // }}} 197 | 198 | public function parse($str) 199 | { 200 | $this->_verbatims = array(); 201 | 202 | // smart comments: when a digit precedes a percent sign it is not 203 | // considered as start of comment 204 | $str = preg_replace('/([0-9])%/', '\1\\%', $str); 205 | 206 | // echo mb_strlen(preg_replace('/[^\d\p{L}]/u', '', $str)) / 5; 207 | 208 | // transform string for better tokenization 209 | // extract verbatims to ensure their whitespaces remain unchanged 210 | // * (greedy), *? (lazy) 211 | 212 | foreach ($this->_environs as $name => $spec) { 213 | if (!isset($spec['verbatim']) || !$spec['verbatim']) { 214 | continue; 215 | } 216 | 217 | // prepare name for regex 218 | $name = preg_quote($name, '/'); 219 | 220 | // if environment has starred version, add match for optional star 221 | if (isset($spec['starred']) && $spec['starred']) { 222 | $name .= '\\*?'; 223 | } 224 | 225 | // negative lookbehind to make sure \begin is not escaped 226 | $rx = '/(?' . $name . ')\}(?P(.|\s)*?)\\\\end\s*\{\1\}/'; 227 | 228 | $str = preg_replace_callback($rx, array($this, '_readVerbatim'), $str); 229 | } 230 | 231 | $this->_lexer = new PhpLatex_Lexer($str); 232 | $root = new PhpLatex_Node(self::TYPE_DOCUMENT); 233 | $this->_parseExprList($root, null, self::MODE_TEXT); 234 | 235 | // scan parsed tree in infix mode, assign numberings and refs and labels 236 | 237 | return $root; 238 | } 239 | 240 | /** 241 | * @param string $stopAtToken 242 | * @param string $state 243 | * @return array 244 | */ 245 | protected function _parseExprList(PhpLatex_Node $parent, $stopAtToken, $state, $environ = null) // {{{ 246 | { 247 | $tree = array(); 248 | while (false !== ($token = $this->_peek())) { 249 | if ($token['value'] === $stopAtToken) { 250 | // consume terminating token 251 | $this->_next(); 252 | break; 253 | } 254 | $node = $this->_parseExpr($state, $environ); 255 | if ($node) { 256 | $parent->appendChild($node); 257 | } 258 | } 259 | return $tree; 260 | } // }}} 261 | 262 | protected function _parseExpr($state, $environ = null) // {{{ 263 | { 264 | $token = $this->_next(); 265 | if ($token) { 266 | switch ($token['type']) { 267 | case PhpLatex_Lexer::TYPE_CSYMBOL: 268 | case PhpLatex_Lexer::TYPE_CWORD: 269 | return $this->_parseControl($token, $state, $environ); 270 | 271 | case PhpLatex_Lexer::TYPE_SPECIAL: 272 | return $this->_parseSpecial($token, $state, $environ); 273 | 274 | case PhpLatex_Lexer::TYPE_SPACE: 275 | case PhpLatex_Lexer::TYPE_TEXT: 276 | return $this->_parseText($token, $state); 277 | 278 | case PhpLatex_Lexer::TYPE_COMMENT: 279 | $this->_skipSpacesAndComments(); 280 | break; 281 | 282 | default: 283 | break; 284 | } 285 | } 286 | 287 | return false; 288 | } // }}} 289 | 290 | /** 291 | * @param string $type 292 | * @param int $mode 293 | * @param string $environ 294 | * @return PhpLatex_Node 295 | */ 296 | protected function _createNode($type, $mode, $environ = null) // {{{ 297 | { 298 | return new PhpLatex_Node($type, array( 299 | 'mode' => intval($mode), 300 | 'environ' => null === $environ ? null : strval($environ), 301 | )); 302 | } // }}} 303 | 304 | /** 305 | * @param string $name 306 | * name of a tested environment 307 | * @param int $mode 308 | * mode the tested environment is encountered in 309 | * @param string $environ 310 | * OPTIONAL name of a parent environment 311 | * @return PhpLatex_Node 312 | * @throws Exception 313 | * when environment is encountered in invalid mode or 314 | * when environment can't be nested within the parent environment 315 | */ 316 | protected function _createEnviron($name, $mode, $environ = null) // {{{ 317 | { 318 | assert(($mode & ($mode - 1)) === 0); // mode must be a power of 2 319 | 320 | $math = false; 321 | $args = array(); 322 | 323 | if (isset($this->_environs[$name])) { 324 | $spec = $this->_environs[$name]; 325 | 326 | // if mode specification is present, check if it matches 327 | // given mode flag 328 | if (isset($spec['mode']) && !($spec['mode'] & $mode)) { 329 | throw new Exception('Environment in invalid mode'); 330 | } 331 | 332 | // if parent environ and environs spec for environ of given name 333 | // are given, check if the parent environ is a valid container 334 | if (null !== $environ && 335 | (empty($spec['environs']) || 336 | !in_array($environ, (array) $spec['environs'], true)) 337 | ) { 338 | throw new Exception('Environment ' . $name . ' cannot be nested in ' . $environ . ' environment'); 339 | } 340 | 341 | // check if this environ is an alias for a math mode (i.e. math 342 | // or displaymath), if so, prepare math node instead of environ node 343 | $math = isset($spec['math']) && $spec['math']; 344 | 345 | // parse args, will be placed as environs first children, with 346 | // no spaces between them, btw: \begin{tabular}c is a perfectly 347 | // correct specification for a single-column table. 348 | $nargs = isset($spec['numArgs']) ? intval($spec['numArgs']) : 0; 349 | while (count($args) < $nargs) { 350 | if (false === ($arg = $this->_parseArg($mode, $environ))) { 351 | $arg = $this->_createNode(self::TYPE_GROUP, $mode); 352 | } 353 | $arg->setProp('arg', true); 354 | $args[] = $arg; 355 | } 356 | } elseif ($this->_skipUndefinedEnvirons) { 357 | throw new Exception(sprintf('Environment %s undefined', $name)); 358 | } 359 | 360 | $node = $this->_createNode(self::TYPE_ENVIRON, $mode, $environ); 361 | $node->value = $name; 362 | 363 | if ($math) { 364 | $node->math = $math; 365 | } 366 | 367 | foreach ($args as $arg) { 368 | $node->appendChild($arg); 369 | } 370 | 371 | return $node; 372 | } // }}} 373 | 374 | /** 375 | * @throw Exception if mode is different than MODE_TEXT and math delimiters 376 | * are found 377 | */ 378 | protected function _tryParseMathControl($token, $mode, $environ = null) // {{{ 379 | { 380 | // if in text mode try first to parse math 381 | // predefined delimiters: left, right, inline 382 | $mathControls = array( 383 | array('\\(', '\\)', true), 384 | array('\\[', '\\]', false), 385 | ); 386 | foreach ($mathControls as $pair) { 387 | if ($token['value'] === $pair[0]) { 388 | if ($mode === self::MODE_TEXT) { 389 | $node = $this->_createNode(self::TYPE_MATH, $mode, $environ); 390 | $node->inline = $pair[2]; 391 | 392 | $this->_parseExprList($node, $pair[1], self::MODE_MATH, $environ); 393 | 394 | return $node; 395 | } else { 396 | // math delimiter detected in invalid mode, stop processing 397 | // ! LaTeX Error: Bad math environment delimiter. 398 | throw new Exception('Math delimiter in invalid mode'); 399 | } 400 | } 401 | } 402 | 403 | // no math found 404 | return false; 405 | } // }}} 406 | 407 | /** 408 | * Parse verbatim placeholder. 409 | * 410 | * @param array $token 411 | * @param int $mode 412 | * @param string $environ OPTIONAL 413 | * @return PhpLatex_Node 414 | * @throws Exception 415 | */ 416 | protected function _tryParseVerbatimControl($token, $mode, $environ = null) // {{{ 417 | { 418 | $value = $token['value']; 419 | 420 | if (!strncmp($value, '\\verbatim', 9)) { 421 | // \verbatim prefix matched, check if this is indeed a placeholder 422 | $id = substr($value, 9); 423 | if (isset($this->_verbatims[$id])) { 424 | $name = $this->_verbatims[$id]['name']; 425 | $node = $this->_createEnviron($name, $mode, $environ); 426 | 427 | $verb = $this->_createNode(self::TYPE_VERBATIM, $mode, $name); 428 | $verb->value = $this->_verbatims[$id]['content']; 429 | 430 | $node->addChild($verb); 431 | 432 | return $node; 433 | } 434 | } 435 | 436 | return false; 437 | } // }}} 438 | 439 | /** 440 | * Parse control sequence 441 | * @return false|PhpLatex_Node 442 | */ 443 | protected function _parseControl($token, $mode, $environ = null) // {{{ 444 | { 445 | $value = $token['value']; 446 | 447 | try { 448 | $node = $this->_tryParseMathControl($token, $mode, $environ); 449 | if ($node) { 450 | return $node; 451 | } 452 | } catch (Exception $e) { 453 | return false; 454 | } 455 | 456 | try { 457 | $node = $this->_tryParseVerbatimControl($token, $mode, $environ); 458 | if ($node) { 459 | return $node; 460 | } 461 | } catch (Exception $e) { 462 | return false; 463 | } 464 | 465 | switch ($value) { 466 | case '\\begin': 467 | if (false !== ($name = $this->_parseEnvName())) { 468 | try { 469 | $node = $this->_createEnviron($name, $mode, $environ); 470 | 471 | if ($node->math) { 472 | $this->_parseExprList($node, '\\end', self::MODE_MATH, $environ); 473 | } else { 474 | $this->_parseExprList($node, '\\end', $mode, $name); 475 | } 476 | 477 | // consume environment name, don't care if this succeeds 478 | // or not 479 | $this->_parseEnvName(); 480 | 481 | return $node; 482 | 483 | } catch (Exception $e) { 484 | // environ in invalid mode or invalid environ nesting 485 | } 486 | } 487 | return false; 488 | 489 | case '\\end': 490 | // \end with no \begin, skip environ name 491 | $this->_parseEnvName(); 492 | return false; 493 | 494 | case '\\]': 495 | case '\\)': 496 | // unmatched math delimiter, skip 497 | return false; 498 | 499 | case '\\left': 500 | case '\\right': 501 | return $this->_parseLeftRight($token, $mode, $environ); 502 | } 503 | 504 | // skip space after control word (before parsing arguments) 505 | // 506 | // "When a space comes after a control word (an all-letter control 507 | // sequence), it is ignored by TeX; i.e., it is not considered to be 508 | // a "real" space belonging to the manuscript that is being typeset. 509 | // But when a space comes after a control symbol, it's truly a space." 510 | // 511 | // Donald E. Knuth, "TeXbook", Chapter 3 512 | // 513 | // Skip all spaces and comments occurring after this token, if this 514 | // token is a control word. 515 | if ($token['type'] === PhpLatex_Lexer::TYPE_CWORD) { 516 | $this->_skipSpacesAndComments(); 517 | } 518 | 519 | $mathWrapper = null; 520 | 521 | $nodeMode = $mode; 522 | $nodeArgs = array(); 523 | $nodeOptArgs = array(); 524 | $nodeStarred = false; 525 | 526 | // validate control sequence and parse arguments 527 | if (isset($this->_commands[$value])) { 528 | $spec = $this->_commands[$value]; 529 | 530 | // check if this command requires an environment, if so, check 531 | // if current environment is among listed ones 532 | if (isset($spec['environs']) && 533 | !in_array($environ, (array) $spec['environs'], true) 534 | ) { 535 | return false; 536 | } 537 | 538 | // check if command is used in proper mode 539 | if (isset($spec['mode']) && !($spec['mode'] & $mode)) { 540 | // when math mode command is encountered in text mode, wrap it 541 | // in inline math mode (never the other way around). 542 | if ($spec['mode'] & self::MODE_MATH) { 543 | // We're outside math mode here. 544 | $nodeMode = self::MODE_MATH; 545 | $mathWrapper = $this->_createNode(self::TYPE_MATH, $mode); 546 | $mathWrapper->inline = true; 547 | } else { 548 | return false; 549 | } 550 | } 551 | 552 | // check if this command can appear in a starred version, if so, 553 | // parse any the following asterisk token 554 | if ((isset($spec['starred']) && $spec['starred']) && 555 | ($next = $this->_peek()) && 556 | ($next['type'] === PhpLatex_Lexer::TYPE_TEXT) && 557 | (0 === strncmp($next['value'], '*', 1)) 558 | ) { 559 | $this->_next(); 560 | $nodeStarred = true; 561 | // remove asterisk from the beginning of token value, no need 562 | // to use mbstring functions 563 | $next['value'] = substr($next['value'], 1); 564 | if (strlen($next['value'])) { 565 | $this->_unget($next); 566 | } 567 | } 568 | 569 | // parse optional arguments 570 | $numOptArgs = isset($spec['numOptArgs']) ? intval($spec['numOptArgs']) : 0; 571 | $parseArgs = isset($spec['parseArgs']) ? $spec['parseArgs'] : true; 572 | 573 | while (count($nodeOptArgs) < $numOptArgs) { 574 | if (false !== ($arg = $this->_parseOptArg($nodeMode, $environ, $parseArgs))) { 575 | $nodeOptArgs[] = $arg; 576 | } else { 577 | break; 578 | } 579 | } 580 | 581 | // parse arguments 582 | $numArgs = isset($spec['numArgs']) ? intval($spec['numArgs']) : 0; 583 | 584 | while (count($nodeArgs) < $numArgs) { 585 | if (false === ($arg = $this->_parseArg($nodeMode, $environ, $parseArgs))) { 586 | // no argument found, create an artificial one 587 | $arg = $this->_createNode(self::TYPE_GROUP, $nodeMode); 588 | } 589 | $nodeArgs[] = $arg; 590 | } 591 | } elseif ($this->_skipUndefinedCommands) { 592 | return false; 593 | } 594 | 595 | $node = $this->_createNode(self::TYPE_COMMAND, $nodeMode, $environ); 596 | $node->value = $value; 597 | 598 | if ($token['type'] === PhpLatex_Lexer::TYPE_CSYMBOL) { 599 | $node->symbol = true; // control symbol 600 | } 601 | 602 | if ($nodeStarred) { 603 | $node->starred = $nodeStarred; 604 | } 605 | 606 | foreach ($nodeOptArgs as $arg) { 607 | $node->appendChild($arg); 608 | } 609 | 610 | foreach ($nodeArgs as $arg) { 611 | $node->appendChild($arg); 612 | } 613 | 614 | if ($mathWrapper) { 615 | $mathWrapper->appendChild($node); 616 | return $mathWrapper; 617 | } 618 | 619 | return $node; 620 | } // }}} 621 | 622 | /** 623 | * Skip spaces and comments starting from the current lexer position. 624 | * 625 | * After this function has run current token, if exists, is neither space 626 | * nor comment. 627 | */ 628 | protected function _skipSpacesAndComments($inComment = false) 629 | { 630 | while ($next = $this->_peek()) { 631 | if ($inComment) { 632 | if (isset($next['raw']) && strpos($next['raw'], "\n") !== false) { 633 | $inComment = false; 634 | } else { 635 | $this->_next(); 636 | } 637 | } else { 638 | if ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL && $next['value'] === '%') { 639 | $inComment = true; 640 | $this->_next(); 641 | } else if ($next['type'] === PhpLatex_Lexer::TYPE_SPACE) { 642 | $this->_next(); 643 | } else { 644 | break; 645 | } 646 | } 647 | } 648 | } 649 | 650 | protected function _parseArg($mode, $environ, $parseArgs = true) // {{{ 651 | { 652 | $this->_skipSpacesAndComments(); 653 | 654 | if ($next = $this->_peek()) { 655 | switch ($next['type']) { 656 | case PhpLatex_Lexer::TYPE_SPECIAL: 657 | switch ($next['value']) { 658 | case '{': 659 | // if args are not to be parsed consume all contents up to the 660 | // first encountered right curly bracket 661 | if (!$parseArgs) { 662 | $group = $this->_createNode(self::TYPE_GROUP, $mode); 663 | $this->_next(); 664 | $text = ''; 665 | while ($next = $this->_peek()) { 666 | if ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL 667 | && $next['value'] === '}') { 668 | $this->_next(); 669 | break; 670 | } 671 | 672 | $text .= $next['value']; 673 | $this->_next(); 674 | } 675 | $node = $this->_createNode(self::TYPE_VERBATIM, $mode); 676 | $node->value = $text; 677 | $node->appendTo($group); 678 | 679 | return $group; 680 | } 681 | 682 | // found group 683 | $this->_next(); 684 | 685 | $group = $this->_createNode(self::TYPE_GROUP, $mode); 686 | 687 | // TODO stop at first encountered \par control 688 | $this->_parseExprList($group, '}', $mode, $environ); 689 | 690 | return $group; 691 | 692 | case '[': 693 | case ']': 694 | // square brackets may be treated as text (they are returned as 695 | // specials to make easier parsing of optional parameters). 696 | // Encountered bracket, not enveloped in a pair of curly brackets 697 | // forms a separate group. 698 | $this->_next(); 699 | 700 | $group = $this->_createNode(self::TYPE_GROUP, $mode); 701 | 702 | $node = $this->_createNode(self::TYPE_TEXT, $mode); 703 | $node->value = $next['value']; 704 | $node->appendTo($group); 705 | 706 | return $group; 707 | 708 | case '%': 709 | // comment start 710 | break; 711 | 712 | default: 713 | // other specials (~ ^ _ & # $) are silently ignored 714 | break; 715 | } 716 | break; 717 | 718 | case PhpLatex_Lexer::TYPE_TEXT: 719 | // found text token, extract first character, leave the 720 | // rest of its value for further processing 721 | $this->_next(); 722 | 723 | $group = $this->_createNode(self::TYPE_GROUP, $mode); 724 | 725 | $node = $this->_createNode(self::TYPE_TEXT, $mode); 726 | $node->value = mb_substr($next['value'], 0, 1); 727 | $node->appendTo($group); 728 | 729 | $next['value'] = mb_substr($next['value'], 1); 730 | if (mb_strlen($next['value'])) { 731 | $this->_unget($next); 732 | } 733 | 734 | return $group; 735 | 736 | case PhpLatex_Lexer::TYPE_CWORD: 737 | case PhpLatex_Lexer::TYPE_CSYMBOL: 738 | // found control sequence 739 | 740 | if ($next['value'] === '\\par') { 741 | // Runaway argument? 742 | // ! Paragraph ended before command was complete. 743 | return false; 744 | } 745 | 746 | $this->_next(); 747 | 748 | $group = $this->_createNode(self::TYPE_GROUP, $mode); 749 | 750 | if (($node = $this->_parseControl($next, $mode, $environ))) { 751 | $node->appendTo($group); 752 | } 753 | 754 | return $group; 755 | } 756 | } 757 | 758 | return false; 759 | } // }}} 760 | 761 | /** 762 | * Try and parse optional argument. Optional argument must be delimited 763 | * with square brackets, otherwise it is ignored. 764 | */ 765 | protected function _parseOptArg($state, $environ) // {{{ 766 | { 767 | $this->_skipSpacesAndComments(); 768 | 769 | if (($next = $this->_peek()) && 770 | ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL) && 771 | ($next['value'] === '[') 772 | ) { 773 | $this->_next(); 774 | 775 | $group = $this->_createNode(self::TYPE_GROUP, $state); 776 | $group->optional = true; 777 | 778 | // TODO stop at first encountered \par control 779 | $this->_parseExprList($group, ']', $state | self::STATE_OPT_ARG, $environ); 780 | 781 | return $group; 782 | } 783 | 784 | return false; 785 | } // }}} 786 | 787 | /** 788 | * This method will consume all valid tokens, first invalid token 789 | * encountered will be put back to lexer. 790 | * 791 | * @return string|false 792 | */ 793 | protected function _parseEnvName() // {{{ 794 | { 795 | // 1. Skip spaces and comments 796 | $this->_skipSpacesAndComments(); 797 | 798 | while (false !== ($next = $this->_peek())) { 799 | if ($next['value'] !== '{') { 800 | // 2A. first encountered non-space token is not a curly bracket 801 | // Since start of group was expected, this token breaks opening 802 | // of an environment. Give it back and report failure. 803 | break; 804 | 805 | } else { 806 | // 2B. first encountered non-space token is a curly bracket that 807 | // begins a group containing environment name, skip it 808 | $this->_next(); 809 | 810 | // Names of environmens in LaTeX may contain any characters, 811 | // any curly brackets must be matched. 812 | 813 | $par = 1; // unmatched curly brackets counter 814 | $name = ''; // environment name 815 | 816 | while (false !== ($next = $this->_next())) { 817 | if ($next['value'] === '{') { 818 | ++$par; 819 | } elseif ($next['value'] === '}') { 820 | --$par; 821 | if (!$par) { 822 | // last required right curly bracket 823 | break; 824 | } 825 | } 826 | $name .= $next['value']; 827 | } 828 | if (strlen($name)) { 829 | return $name; 830 | } 831 | } 832 | } 833 | 834 | // no valid environment name was found 835 | return false; 836 | } // }}} 837 | 838 | /** 839 | * Build text node starting from current token and by appending any 840 | * following text, space and square bracket tokens. 841 | * 842 | * @param array $token 843 | * @param int $mode 844 | * @return PhpLatex_Node 845 | */ 846 | protected function _parseText($token, $mode) 847 | { 848 | $value = $token['value']; 849 | 850 | // concatenate output as long as next token is TEXT, SPACE or square 851 | // brackets 852 | while ($next = $this->_peek()) { 853 | if ($this->_isText($next, $mode)) { 854 | $value .= $next['value']; 855 | $this->_next(); 856 | } else { 857 | break; 858 | } 859 | } 860 | 861 | $node = $this->_createNode(self::TYPE_TEXT, $mode); 862 | $node->value = $value; 863 | return $node; 864 | } 865 | 866 | /** 867 | * @param array $token 868 | * @param int $state 869 | * @param string $environ 870 | */ 871 | protected function _parseSpecial($token, $state, $environ) // {{{ 872 | { 873 | $value = $token['value']; 874 | switch ($value) { 875 | case '{': 876 | $node = $this->_createNode(self::TYPE_GROUP, $state); 877 | $this->_parseExprList($node, '}', $state); 878 | return $node; 879 | 880 | case '}': 881 | // unmatched right curly bracket, skip 882 | break; 883 | 884 | case '$': 885 | if ($state & self::STATE_TEXT) { 886 | if (($next = $this->_peek())) { 887 | $node = new PhpLatex_Node(self::TYPE_MATH); 888 | $node->mode = $state; 889 | if ($next['value'] === '$') { // displaymath 890 | $node->inline = false; 891 | $this->_next(); // consume second dollar 892 | 893 | // consume expressions up to first double dollars 894 | // encountered 895 | do { 896 | $this->_parseExprList($node, '$', self::MODE_MATH); 897 | $next = $this->_peek(); 898 | if ($next && $next['value'] === '$') { 899 | // second terminating dollar found, consume 900 | // it and stop looping 901 | $this->_next(); 902 | break; 903 | } 904 | } while ($next); 905 | } else { 906 | $node->inline = true; 907 | $this->_parseExprList($node, '$', self::MODE_MATH); 908 | } 909 | 910 | return $node; 911 | } 912 | // unterminated document (and math mode) 913 | } 914 | break; 915 | 916 | case '[': 917 | case ']': 918 | // square brackets that are not part of optional arguments 919 | // (those are handled when parsing control sequences) 920 | while ($next = $this->_peek()) { 921 | if ($this->_isText($next, $state)) { 922 | $value .= $next['value']; 923 | $this->_next(); 924 | } else { 925 | break; 926 | } 927 | } 928 | 929 | $node = $this->_createNode(self::TYPE_TEXT, $state); 930 | $node->value = $value; 931 | return $node; 932 | 933 | case '^': 934 | case '_': 935 | // subscript and superscript, require math mode 936 | if ((self::STATE_MATH & $state) && ($arg = $this->_parseArg($state, $environ))) { 937 | $node = $this->_createNode(self::TYPE_SPECIAL, $state); 938 | $node->value = $value; 939 | $node->appendChild($arg); 940 | return $node; 941 | } 942 | break; 943 | 944 | /** @noinspection PhpMissingBreakStatementInspection */ 945 | case '&': // TODO may occur only in table 946 | if (empty($environ)) { 947 | // not in environment, escape it 948 | $node = $this->_createNode(self::TYPE_COMMAND, $state); 949 | $node->symbol = true; // control symbol \& 950 | $node->value = '\\&'; 951 | return $node; 952 | } 953 | // otherwise fall through to get special 954 | 955 | case '~': 956 | $node = $this->_createNode(self::TYPE_SPECIAL, $state); 957 | $node->value = $value; 958 | return $node; 959 | 960 | case '%': 961 | $this->_skipSpacesAndComments(true); 962 | break; 963 | 964 | case '#': 965 | // currently not supported 966 | break; 967 | } 968 | 969 | return false; 970 | } // }}} 971 | 972 | /** 973 | * @param array $token 974 | * @param int $mode 975 | * @param string|array $environs 976 | */ 977 | protected function _parseLeftRight($token, $mode, $environs) 978 | { 979 | if ($mode !== self::MODE_MATH) { 980 | // wrap in math 981 | return false; 982 | } 983 | 984 | $environs = (array) $environs; 985 | 986 | $this->_skipSpacesAndComments(); 987 | $next = $this->_peek(); 988 | if (!$next) { 989 | return false; 990 | } 991 | 992 | $delimiter = '.'; 993 | $validDelimiter = false; 994 | 995 | if ($next['type'] === PhpLatex_Lexer::TYPE_TEXT) { 996 | $validChars = array('.', '|', '/', '<', '>', '(', ')', '[', ']'); 997 | $firstChar = mb_substr($next['value'], 0, 1); 998 | if (in_array($firstChar, $validChars)) { 999 | $this->_next(); 1000 | $validDelimiter = true; 1001 | 1002 | $delimiter = $firstChar; 1003 | if (mb_strlen($next['value']) > 1) { 1004 | $next['value'] = mb_substr($next['value'], 1); 1005 | $this->_unget($next); 1006 | } 1007 | } 1008 | } elseif ($next['type'] === PhpLatex_Lexer::TYPE_CSYMBOL || $next['type'] === PhpLatex_Lexer::TYPE_CWORD) { 1009 | // All controls from math-delimiters.tex 1010 | $validSymbols = array( 1011 | '\backslash', 1012 | '\langle', 1013 | '\lceil', 1014 | '\lfloor', 1015 | '\rangle', 1016 | '\rceil', 1017 | '\rfloor', 1018 | '\{', 1019 | '\|', 1020 | '\}', 1021 | ); 1022 | if (in_array($next['value'], $validSymbols)) { 1023 | $delimiter = $next['value']; 1024 | $validDelimiter = true; 1025 | $this->_next(); 1026 | } 1027 | } 1028 | 1029 | if (!$validDelimiter) { 1030 | // Invalid bracket command 1031 | // LaTeX error: 1032 | // I was expecting to see something like `(' or `\{' or 1033 | // `\}' here. If you typed, e.g., `{' instead of `\{', you 1034 | // should probably delete the `{' by typing `1' now, so that 1035 | // braces don't get unbalanced. 1036 | 1037 | // Insert space before non-space character 1038 | $this->_unget(array( 1039 | 'type' => 'text', 1040 | 'value' => ' ', 1041 | )); 1042 | } 1043 | 1044 | $node = $this->_createNode(self::TYPE_COMMAND, self::MODE_MATH); 1045 | $node->value = $token['value'] . $delimiter; 1046 | $node->noSpaceAfter = true; 1047 | return $node; 1048 | } 1049 | 1050 | /** 1051 | * @param array $token 1052 | * @return bool 1053 | */ 1054 | protected function _isText($token, $state) // {{{ 1055 | { 1056 | $type = $token['type']; 1057 | 1058 | return $type === PhpLatex_Lexer::TYPE_TEXT 1059 | || $type === PhpLatex_Lexer::TYPE_SPACE 1060 | || ($type === PhpLatex_Lexer::TYPE_SPECIAL 1061 | && ($token['value'] === '[' || ( 1062 | // right square bracket is treated as special when 1063 | // encountered during parsing of optional arguments 1064 | $token['value'] === ']' && !($state & self::STATE_OPT_ARG) 1065 | )) 1066 | ); 1067 | } // }}} 1068 | } 1069 | -------------------------------------------------------------------------------- /library/PhpLatex/PdfLatex.php: -------------------------------------------------------------------------------- 1 | _compiler === null) { 37 | $this->setPdflatexBinary($this->findPdflatexBinary()); 38 | } 39 | return $this->_compiler['path']; 40 | } 41 | 42 | public function setPdflatexBinary($path) 43 | { 44 | // Can't use file_exists() / is_executable(), because if open_basedir ini setting is in 45 | // effect, file won't be reported as existing/executable, but the binary itself can still 46 | // exist outside the open_basedir, and be executable. 47 | exec(escapeshellarg($path) . ' -version 2>&1', $output, $error); 48 | 49 | if ($error) { 50 | throw new InvalidArgumentException('Unable to execute pdflatex binary: ' . $path); 51 | } 52 | 53 | $compiler = $this->_parseCompilerInfo($output[0]); 54 | if (!$compiler) { 55 | throw new InvalidArgumentException('Unrecognized pdflatex -version output'); 56 | } 57 | 58 | $this->_compiler = array( 59 | 'path' => $path, // open_basedir may be in effect, don't use realpath() 60 | 'engine' => $compiler['engine'], 61 | 'version' => $compiler['version'], 62 | ); 63 | 64 | return $this; 65 | } 66 | 67 | /** 68 | * @param string $version 69 | * @internal This function is not part of the public api. 70 | */ 71 | public function _parseCompilerInfo($version) 72 | { 73 | if (preg_match("/(?P\S*?TeX) (?P\d[^\n]+)/i", $version, $match)) { 74 | return array('engine' => $match['engine'], 'version' => $match['version']); 75 | } 76 | if (preg_match("/(?P\S*?TeX), Version (?P[^\n]+)/i", $version, $match)) { 77 | return array('engine' => $match['engine'], 'version' => $match['version']); 78 | } 79 | return false; 80 | } 81 | 82 | public function findPdflatexBinary() 83 | { 84 | $files = array('pdflatex'); 85 | 86 | $path = getenv('PATH'); 87 | $dirs = explode(PATH_SEPARATOR, $path); 88 | array_unshift($dirs, getcwd()); 89 | 90 | // WIN32 WINNT Windows CYGWIN_NT-5.1 91 | $isWindows = stripos(PHP_OS, 'WIN') === 0 || stripos(PHP_OS, 'CYGWIN') === 0; 92 | 93 | foreach ($files as $file) { 94 | if ($isWindows) { 95 | $file .= '.exe'; 96 | } 97 | 98 | foreach ($dirs as $dir) { 99 | $path = $dir . DIRECTORY_SEPARATOR . $file; 100 | if (file_exists($path) && is_executable($path)) { 101 | return $path; 102 | } 103 | } 104 | } 105 | 106 | throw new Exception('Unable to locate pdflatex binary'); 107 | } 108 | 109 | public function setBuildDir($path) 110 | { 111 | if (!is_dir($path)) { 112 | throw new InvalidArgumentException('Path is not a directory: ' . $path); 113 | } 114 | if (!is_writable($path)) { 115 | throw new InvalidArgumentException('Path is not writable: ' . $path); 116 | } 117 | $this->_buildDir = rtrim(realpath($path), '/') . '/'; 118 | return $this; 119 | } 120 | 121 | public function getBuildDir() 122 | { 123 | if (empty($this->_buildDir)) { 124 | $this->setBuildDir(sys_get_temp_dir()); 125 | } 126 | return $this->_buildDir; 127 | } 128 | 129 | public function compile($file, array $files = null) 130 | { 131 | $this->_log = null; 132 | 133 | $cwd = getcwd(); 134 | $dir = dirname($file); 135 | 136 | foreach ((array) $files as $path) { 137 | // TODO handle Windows 138 | if (!is_file($dir . '/' . basename($path))) { 139 | if (!@symlink($path, $dir . '/' . basename($path))) { 140 | copy($path, $dir . '/' . basename($path)); 141 | } 142 | } 143 | } 144 | 145 | $pdflatex = $this->getPdflatexBinary(); 146 | 147 | $texmfhome = getenv(self::TEXMFHOME); 148 | $this->_setEnv(self::TEXMFHOME, $this->_texmfhome); 149 | 150 | chdir($dir); 151 | $cmd = "$pdflatex -interaction nonstopmode -halt-on-error -file-line-error $file"; 152 | $log = `$cmd`; 153 | `$cmd 2>&1`; 154 | chdir($cwd); 155 | 156 | $this->_setEnv(self::TEXMFHOME, $texmfhome); 157 | 158 | // process log so that paths are not given away 159 | $log = str_replace(array("\r\n", "\r"), "\n", $log); 160 | $log = str_replace(array( 161 | $dir . '/', 162 | wordwrap('(' . $dir . '/', 79, "\n", true), 163 | wordwrap($dir . '/', 79, "\n", true), 164 | ), array('', '('), $log); 165 | 166 | $this->_log = __CLASS__ . ' ' . $file . "\n\n" . $log; 167 | 168 | $output = sprintf('%s/%s.pdf', $dir, basename($file, '.tex')); 169 | 170 | // if document body is empty a 0-length file is generated 171 | if (is_file($output) && filesize($output)) { 172 | return $output; 173 | } 174 | 175 | throw new Exception(sprintf('Unable to compile file \'%s\'', $file)); 176 | } 177 | 178 | /** 179 | * Compile string to a PDF document 180 | * 181 | * @param $script String containing LaTeX document source 182 | * @param array $files 183 | * @return string Path to compiled PDF document 184 | * @throws Exception 185 | */ 186 | public function compileString($script, array $files = null) 187 | { 188 | $buildDir = $this->getBuildDir() . 'pdflatex/' . md5($script); 189 | $output = $buildDir . '/output.pdf'; 190 | 191 | if (is_file($output)) { 192 | return $output; 193 | } 194 | 195 | if (!is_dir($buildDir)) { 196 | if (!@mkdir($buildDir, 0777, true)) { 197 | throw new Exception(sprintf( 198 | 'Unable to create script build directory: %s', 199 | $buildDir 200 | )); 201 | } 202 | } 203 | 204 | if (!is_writable($buildDir)) { 205 | throw new Exception(sprintf( 206 | 'Script build directory is not writable: %s', 207 | $buildDir 208 | )); 209 | } 210 | 211 | $scriptFile = $buildDir . '/output.tex'; 212 | file_put_contents($scriptFile, $script); 213 | 214 | return $this->compile($scriptFile, $files); 215 | } 216 | 217 | public function getLog() 218 | { 219 | return (string) $this->_log; 220 | } 221 | 222 | public function setTexmfhome($texmfhome) 223 | { 224 | $this->_texmfhome = (string) $texmfhome; 225 | return $this; 226 | } 227 | 228 | protected function _setEnv($key, $value) 229 | { 230 | // putenv/getenv and $_ENV are completely distinct environment stores 231 | $_ENV[$key] = $value; 232 | putenv("$key=$value"); 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /library/PhpLatex/Renderer/Abstract.php: -------------------------------------------------------------------------------- 1 | getType()) { 19 | case PhpLatex_Parser::TYPE_SPECIAL: 20 | if ($node->value === '_' || $node->value === '^') { 21 | return $node->value . self::toLatex($node->getChildren()); 22 | } 23 | return $node->value; 24 | 25 | case PhpLatex_Parser::TYPE_TEXT: 26 | // make sure text is properly escaped 27 | $source = PhpLatex_Utils::escape($node->value); 28 | return $source; 29 | 30 | case PhpLatex_Parser::TYPE_GROUP: 31 | $source = $node->optional ? '[{' : '{'; 32 | $source .= self::toLatex($node->getChildren()); 33 | $source .= $node->optional ? '}]' : '}'; 34 | return $source; 35 | 36 | case PhpLatex_Parser::TYPE_VERBATIM: 37 | return $node->value; 38 | 39 | case PhpLatex_Parser::TYPE_MATH: 40 | $source = self::toLatex($node->getChildren()); 41 | if ($node->inline) { 42 | return '\\(' . $source . '\\)'; 43 | } else { 44 | return '\\[' . $source . '\\]'; 45 | } 46 | 47 | case PhpLatex_Parser::TYPE_COMMAND: 48 | $value = $node->value; 49 | if ($node->starred) { 50 | $value .= '*'; 51 | } 52 | if ($node->value === '\\string') { 53 | foreach ($node->getChildren() as $child) { 54 | $value .= self::toLatex($child); 55 | } 56 | return $value; 57 | } 58 | if ($node->symbol || $node->hasChildren()) { 59 | return $value . self::toLatex($node->getChildren()); 60 | } 61 | 62 | // some control words, e.g. \left[, doesn't need space after 63 | if ($node->noSpaceAfter) { 64 | return $value; 65 | } 66 | // control word, add space that was removed after 67 | return $value . ' '; 68 | 69 | case PhpLatex_Parser::TYPE_ENVIRON: 70 | $children = $node->getChildren(); 71 | $argsEnd = 0; 72 | 73 | foreach ($children as $child) { 74 | if ($child->arg) { 75 | ++$argsEnd; 76 | } else { 77 | break; 78 | } 79 | } 80 | 81 | $args = array_slice($children, 0, $argsEnd); 82 | $children = array_slice($children, $argsEnd); 83 | 84 | return "\\begin{" . $node->value . "}" . self::toLatex($args) . "\n" 85 | . self::toLatex($children) . "\n" 86 | . "\\end{" . $node->value . "}"; 87 | 88 | case PhpLatex_Parser::TYPE_DOCUMENT: 89 | return self::toLatex($node->getChildren()); 90 | } 91 | } elseif (is_array($node)) { 92 | // render node list and concatenate results 93 | $latex = ''; 94 | foreach ($node as $child) { 95 | $latex .= self::toLatex($child); 96 | } 97 | return $latex; 98 | } 99 | } // }}} 100 | 101 | /** 102 | * @param PhpLatex_Node|string $node 103 | * @return string 104 | */ 105 | abstract public function render($node); 106 | 107 | protected $_commandRenderers = array(); 108 | 109 | public function addCommandRenderer($command, $renderer) 110 | { 111 | if (!is_callable($renderer) && !$renderer instanceof PhpLatex_Renderer_NodeRenderer) { 112 | throw new InvalidArgumentException(sprintf( 113 | 'Renderer must be an instance of PhpLatex_Renderer_NodeRenderer or a callable, %s given', 114 | is_object($renderer) ? get_class($renderer) : gettype($renderer) 115 | )); 116 | } 117 | $this->_commandRenderers[$command] = $renderer; 118 | return $this; 119 | } 120 | 121 | public function hasCommandRenderer($command) 122 | { 123 | return isset($this->_commandRenderers[$command]); 124 | } 125 | 126 | public function executeCommandRenderer($command, PhpLatex_Node $node) 127 | { 128 | if (!$this->hasCommandRenderer($command)) { 129 | throw new InvalidArgumentException('Renderer for command ' . $command . ' not available'); 130 | } 131 | $renderer = $this->_commandRenderers[$command]; 132 | if ($renderer instanceof PhpLatex_Renderer_NodeRenderer) { 133 | return $renderer->render($node); 134 | } 135 | return call_user_func($renderer, $node); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /library/PhpLatex/Renderer/Html.php: -------------------------------------------------------------------------------- 1 | _parser = $parser; 35 | return $this; 36 | } 37 | 38 | /** 39 | * @return PhpLatex_Parser 40 | */ 41 | public function getParser() 42 | { 43 | if ($this->_parser === null) { 44 | $this->_parser = new PhpLatex_Parser(); 45 | } 46 | return $this->_parser; 47 | } 48 | 49 | protected function _renderItem($node, PhpLatex_Utils_PeekableIterator $it) // {{{ 50 | { 51 | $html = ''; 52 | 53 | if ($node->value === '\\item') { 54 | $it->next(); // skip \item control 55 | } else { 56 | return; // skip because no \item control was found 57 | } 58 | 59 | // stop rendering at first \item control word 60 | while (($n = $it->current()) && ($n->getType() !== PhpLatex_Parser::TYPE_COMMAND || $n->value !== '\\item')) { 61 | // consecutive \par macros inside \item are interpreted 62 | // as a single \newline 63 | $html .= $this->_renderNode($n, self::FLAG_PAR2BR); 64 | $next = $it->peek(); 65 | if ($next && ($next->getType() !== PhpLatex_Parser::TYPE_COMMAND || $next->value !== '\\item')) { 66 | $it->next(); 67 | } else { 68 | break; 69 | } 70 | } 71 | 72 | // in \item all par are converted to newline 73 | 74 | // \newline (and \\) right after \item causes "There's no line here to 75 | // end" error, newlines after item content are ignored 76 | $html = preg_replace('/^(\s|<(br|par)\/>)+|(\s|<(br|par)\/>)+$/', '', $html); 77 | $html = '
  • ' . $html . '
  • ' . "\n"; 78 | 79 | return $html; 80 | } // }}} 81 | 82 | protected function __renderText($text) 83 | { 84 | return str_replace( 85 | array( 86 | "\n", 87 | '---', '--', 88 | ',,', '``', 89 | '\'\'', '"', 90 | '`', '\'', 91 | '<<', '>>', 92 | '<', '>', 93 | ), 94 | array( 95 | ' ', 96 | '—', '–', 97 | '„', '“', 98 | '”', '”', 99 | '‘', '’', 100 | '«', '»', 101 | '<' ,'>', 102 | ), 103 | $text 104 | ); 105 | } 106 | 107 | protected function _renderText($node, $flags = 0) 108 | { 109 | return $this->__renderText($node->value); 110 | } 111 | 112 | protected function _renderGroup($node, $flags = 0) 113 | { 114 | if (!is_object($node)) { 115 | throw new Exception; 116 | } 117 | // TODO context 118 | if ($node->mode & PhpLatex_Parser::MODE_MATH) { 119 | if ($node->optional) { 120 | // optional argument, for proper nesting must be wrapped in 121 | // curly braces 122 | $html = '[{'; 123 | } else { 124 | $html = '{'; 125 | } 126 | } else { 127 | $html = ''; 128 | } 129 | 130 | $tit = new PhpLatex_Utils_PeekableArrayIterator($node->getChildren()); 131 | while ($tit->valid()) { 132 | $subnode = $tit->current(); 133 | $html .= $this->_renderNode($subnode, $flags); 134 | $tit->next(); 135 | } 136 | 137 | if ($node->mode & PhpLatex_Parser::MODE_MATH) { 138 | if ($node->optional) { 139 | $html .= '}]'; 140 | } else { 141 | $html .= '}'; 142 | } 143 | } 144 | return $html; 145 | } 146 | 147 | protected function _renderMath($node, $flags = 0) 148 | { 149 | if ($node->inline) { 150 | $delims = array('\\(', '\\)'); 151 | } else { 152 | $delims = array('\\[', '\\]'); 153 | } 154 | 155 | $html = $this->_renderGroup($node, $flags); 156 | 157 | // check for forbidden control words 158 | /*if (in_array($token['value'], array( 159 | '\\def', 160 | '\\newcommand', '\\renewcommand', 161 | '\\newenvironment', '\\renewenvironment', 162 | '\\newfont', '\\newtheorem', '\\usepackage', 163 | // MathTex extensions 164 | '\\eval', '\\environment', '\\gif', 165 | ), true)) { 166 | break; 167 | }*/ 168 | 169 | // filter out certain commands 170 | // escape unescaped \(, \), \[ and \] in subtree 171 | // render contents 172 | // trim 173 | return $delims[0] . $html . $delims[1]; 174 | } 175 | 176 | // TODO need to know whether special is in math or text mode 177 | protected function _renderSpecial($node, $flags = 0) 178 | { 179 | if ($node->mode & PhpLatex_Parser::MODE_MATH) { 180 | if ($node->value === '_' || $node->value === '^') { 181 | $children = $node->getChildren(); 182 | if (count($children)) { 183 | return $node->value . $this->_renderNode($children[0], self::FLAG_ARG); 184 | } 185 | } 186 | return $node->value; 187 | } 188 | switch ($node->value) { 189 | case '~': 190 | return ' '; 191 | 192 | case '_': 193 | case '^': 194 | $children = $node->getChildren(); 195 | if (count($children)) { 196 | $tag = $node->value === '_' ? 'sub' : 'sup'; 197 | $text = $this->_renderNode($children[0], self::FLAG_ARG); 198 | return '<' . $tag . '>' . $text . ''; 199 | } 200 | break; 201 | } 202 | } 203 | 204 | protected function _renderEnvironList($node) 205 | { 206 | $html = ''; 207 | $tag = $node->value === 'itemize' ? 'ul' : 'ol'; 208 | if ($node->getChildren()) { 209 | // list environments do not inherit flags 210 | $html .= '<' . $tag . '>'; 211 | $iit = new PhpLatex_Utils_PeekableArrayIterator($node->getChildren()); 212 | while ($iit->valid()) { 213 | $subnode = $iit->current(); 214 | $html .= $this->_renderItem($subnode, $iit, 0); 215 | $iit->next(); 216 | } 217 | $html .= ''; 218 | } 219 | return $html; 220 | } 221 | 222 | protected function _renderEnvironTabular($node) 223 | { 224 | $children = $node->getChildren(); 225 | $alignment = $this->_renderNodeChildren($children[0]); 226 | $alignment = preg_replace('/[^crl]/', '', strtolower($alignment)); 227 | // alignment is treated merly as a hint 228 | 229 | $nrows = 0; 230 | $ncols = 0; 231 | $row = 0; 232 | $col = 0; 233 | $table = array(); 234 | 235 | // ltrim spaces 236 | for ($i = 1; $i < count($children); ++$i) { 237 | $child = $children[$i]; 238 | if ($child->getType() === PhpLatex_Parser::TYPE_COMMAND && 239 | $child->value === '\\\\' 240 | ) { 241 | // start new row 242 | ++$row; 243 | $col = 0; 244 | continue; 245 | } 246 | if ($child->getType() === PhpLatex_Parser::TYPE_SPECIAL && 247 | $child->value === '&' 248 | ) { 249 | // start new column 250 | ++$col; 251 | continue; 252 | } 253 | 254 | $cell = $this->_renderNode($child); 255 | 256 | // if last row consists only of an empty string ignore it 257 | if ($i === count($children) - 1 && $cell === '') { 258 | break; 259 | } 260 | 261 | $nrows = max($nrows, $row + 1); 262 | $ncols = max($ncols, $col + 1); 263 | 264 | if (!isset($table[$row][$col])) { 265 | $table[$row][$col] = ''; 266 | } 267 | 268 | $table[$row][$col] .= $cell; 269 | } 270 | 271 | $html = ''; 272 | for ($row = 0; $row < $nrows; ++$row) { 273 | $html .= ''; 274 | for ($col = 0; $col < $ncols; ++$col) { 275 | $align = substr($alignment, $col, 1); 276 | $style = ''; 277 | if ($align === 'c') { 278 | $style = ' style="text-align:center"'; 279 | } elseif ($align === 'l') { 280 | $style = ' style="text-align:left"'; 281 | } elseif ($align === 'r') { 282 | $style = ' style="text-align:right"'; 283 | } 284 | 285 | $cell = isset($table[$row][$col]) ? trim($table[$row][$col]) : ''; 286 | $html .= '' . $cell . ''; 287 | } 288 | $html .= ''; 289 | } 290 | $html .= '
    '; 291 | return $html; 292 | } 293 | 294 | protected function _renderEnvironEquation($node) 295 | { 296 | $name = 'equation' . ($node->starred ? '*' : ''); 297 | return "\\[\n" 298 | . "\\begin{{$name}} " 299 | . $this->_renderNodeChildren($node) 300 | . " \\end{{$name}}\n" 301 | . "\\]\n"; 302 | } 303 | 304 | protected function _renderEnvironEqnarray($node) 305 | { 306 | $name = 'eqnarray' . ($node->starred ? '*' : ''); 307 | return "\\[ \\begin{{$name}}\n" 308 | . $this->_renderNodeChildren($node) 309 | . " \end{{$name}} \\]\n"; 310 | } 311 | 312 | protected function _renderEnvironMath($node) 313 | { 314 | return "\\( " . $this->_renderNodeChildren($node) . " \\) "; 315 | } 316 | 317 | protected function _renderEnvironDisplaymath($node) 318 | { 319 | return "\\[\n" . $this->_renderNodeChildren($node) . " \\]\n"; 320 | } 321 | 322 | protected function _renderEnvironVerbatim($node) 323 | { 324 | $child = $node->getChild(0); 325 | return '
    ' . htmlspecialchars($child->value)  . '
    '; 326 | } 327 | 328 | protected function _renderNodeChildren($node) 329 | { 330 | $html = ''; 331 | foreach ($node->getChildren() as $child) { 332 | $html .= $this->_renderNode($child, 0); 333 | } 334 | return $html; 335 | } 336 | 337 | // assumption $it->current() === $node 338 | // increment iterator only if next node is required for rendering of 339 | // this node 340 | protected function _renderNode($node, $flags = 0) 341 | { 342 | if ($node->getType() === PhpLatex_Parser::TYPE_ENVIRON) { 343 | $html = ''; 344 | switch ($node->value) { 345 | case 'itemize': 346 | case 'enumerate': 347 | return $this->_renderEnvironList($node); 348 | 349 | default: 350 | $method = '_renderEnviron' . $node->value; 351 | if (method_exists($this, $method)) { 352 | return $this->$method($node); 353 | } 354 | // invalid environment, render its contents 355 | $html = $this->_renderNodeChildren($node); 356 | break; 357 | } 358 | return $html; 359 | } 360 | 361 | if ($node->getType() === PhpLatex_Parser::TYPE_VERBATIM) { 362 | return $this->__renderText($node->value); 363 | } 364 | 365 | if ($node->getType() === PhpLatex_Parser::TYPE_COMMAND) { 366 | // TODO filter out forbidden control sequences 367 | if ($node->mode & PhpLatex_Parser::MODE_MATH) { 368 | $html = $this->_renderNodeChildren($node); 369 | // don't append space if control symbol 370 | 371 | return $node->value . ($html ? $html : ($node->symbol ? '' : ' ')); 372 | } 373 | if ($this->hasCommandRenderer($node->value)) { 374 | return $this->executeCommandRenderer($node->value, $node); 375 | } 376 | switch ($node->value) { 377 | case '\\S': 378 | return '§'; 379 | 380 | case '\\P': 381 | return '¶'; 382 | 383 | case '\\ldots': 384 | case '\\dots': 385 | return '…'; 386 | 387 | case '\\textbackslash': 388 | return '\\'; 389 | 390 | case '\\textasciitilde': 391 | return '~'; 392 | 393 | case '\\textasciicircum': 394 | return '^'; 395 | 396 | case '\\-': 397 | return ''; // word hyphenation 398 | 399 | case '\\^': 400 | case '\\~': 401 | if ($arg = $node->getChild(0)) { 402 | $arg = trim($this->_renderNodeChildren($arg)); 403 | } 404 | if (0 === strlen($arg)) { 405 | return substr($node->value, 1); 406 | } 407 | return $arg; // TODO support for circumflex/tilde accent 408 | 409 | case '\\#': 410 | case '\\%': 411 | case '\\_': 412 | case '\\{': 413 | case '\\}': 414 | case '\\$': 415 | return substr($node->value, 1); 416 | 417 | // spaces, based on https://en.wikipedia.org/wiki/Whitespace_character#Unicode 418 | case '\\ ': 419 | return ' '; 420 | case '\\,': 421 | return ' '; 422 | case '\\enspace': 423 | return ' '; 424 | case '\\quad': 425 | return ' '; 426 | 427 | case '\\ref': 428 | // TODO if ref target resides in math mode render \\ref, so that 429 | // it can be handled by JS. 430 | return "\\ref{" . trim($this->_renderNodeChildren($node), "{}") . '} '; 431 | 432 | case '\\&': 433 | return '&'; 434 | 435 | case '\\\\': 436 | case '\\newline'; 437 | return '
    '; 438 | 439 | case '\\par': 440 | // replace \par in argument with space 441 | if ($flags & self::FLAG_ARG) { 442 | return ' '; // ok 443 | } 444 | 445 | // par placeholder for further processing (par will be 446 | // inserted or removed if certain conditions are met) 447 | return ''; 448 | 449 | case '\\url': 450 | case '\\href': 451 | $args = $node->getChildren(); 452 | if (count($args) > 0) { 453 | // term arg (not text) causes the following error: 454 | // ! TeX capacity exceeded, sorry [input stack size=5000]. 455 | 456 | // TODO validate url, only (ht|f)tp(s)?:// urls 457 | $url = $this->_renderNode($args[0]); 458 | $urlAttr = str_replace(array('<', '>', '"'), array('<', '>', '"'), $url); 459 | 460 | $text = count($args) > 1 ? $this->_renderNode($args[1]) : $url; 461 | 462 | return "" . $text . ""; 463 | } 464 | break; 465 | 466 | case '\\TeX': 467 | return 'TeX'; 468 | 469 | case '\\LaTeX': 470 | return 'LATeX'; 471 | 472 | case '\\chapter': 473 | case '\\section': 474 | case '\\subsection': 475 | case '\\subsubsection': 476 | case '\\paragraph': 477 | case '\\subparagraph': 478 | case '\\textsubscript': // \usepackage{fixltx2e} 479 | case '\\textsuperscript': 480 | foreach ($node->getChildren() as $arg) { 481 | switch ($node->value) { 482 | case '\\chapter': 483 | $tag = 'h1'; 484 | break; 485 | 486 | case '\\section': 487 | $tag = 'h2'; 488 | break; 489 | 490 | case '\\subsection': 491 | $tag = 'h3'; 492 | break; 493 | 494 | case '\\subsubsection': 495 | $tag = 'h4'; 496 | break; 497 | 498 | case '\\paragraph': 499 | $tag = 'h5'; 500 | break; 501 | 502 | case '\\subparagraph': 503 | $tag = 'h6'; 504 | break; 505 | 506 | case '\\textsubscript': 507 | $tag = 'sub'; 508 | break; 509 | 510 | case '\\textsuperscript': 511 | $tag = 'sup'; 512 | break; 513 | } 514 | $text = $this->_renderNode($arg, self::FLAG_ARG); 515 | $html = '<' . $tag . '>' . $text . ''; 516 | return $html; 517 | } 518 | break; 519 | 520 | default: 521 | return $this->_renderStyled($node); 522 | break; 523 | } 524 | } 525 | 526 | $method = '_render' . $node->getType(); 527 | if (method_exists($this, $method)) { 528 | return $this->$method($node, $flags); 529 | } 530 | } 531 | 532 | protected $_initialTypestyle; 533 | 534 | protected function _pushTypestyle() 535 | { 536 | if (!$this->_initialTypestyle) { 537 | $this->_initialTypestyle = new PhpLatex_Renderer_Typestyle(); 538 | } 539 | if (!$this->_typestyle) { 540 | $this->_typestyle = $this->_initialTypestyle->push(); 541 | } else { 542 | $this->_typestyle = $this->_typestyle->push(); 543 | } 544 | return $this->_typestyle; 545 | } 546 | 547 | protected function _renderStyled(PhpLatex_Node $node) 548 | { 549 | $typestyle = null; 550 | 551 | if ($node->getType() === PhpLatex_Parser::TYPE_COMMAND) { 552 | switch ($node->value) { 553 | case '\\textbf': 554 | $typestyle = $this->_pushTypestyle(); 555 | $typestyle->bold = true; 556 | break; 557 | 558 | case '\\textup': 559 | $typestyle = $this->_pushTypestyle(); 560 | $typestyle->style = PhpLatex_Renderer_Typestyle::STYLE_NORMAL; 561 | break; 562 | 563 | case '\\textit': 564 | $typestyle = $this->_pushTypestyle(); 565 | $typestyle->style = PhpLatex_Renderer_Typestyle::STYLE_ITALIC; 566 | break; 567 | 568 | case '\\textsl': // slanted (oblique) 569 | $typestyle = $this->_pushTypestyle(); 570 | $typestyle->style = PhpLatex_Renderer_Typestyle::STYLE_SLANTED; 571 | break; 572 | 573 | case '\\emph': 574 | $typestyle = $this->_pushTypestyle(); 575 | $typestyle->emphasis = true; 576 | break; 577 | 578 | case '\\textrm': 579 | $typestyle = $this->_pushTypestyle(); 580 | $typestyle->family = PhpLatex_Renderer_Typestyle::FAMILY_SERIF; 581 | break; 582 | 583 | case '\\texttt': 584 | $typestyle = $this->_pushTypestyle(); 585 | $typestyle->family = PhpLatex_Renderer_Typestyle::FAMILY_MONO; 586 | break; 587 | 588 | case '\\textsf': 589 | $typestyle = $this->_pushTypestyle(); 590 | $typestyle->family = PhpLatex_Renderer_Typestyle::FAMILY_SANS; 591 | break; 592 | 593 | case '\\underline': 594 | $typestyle = $this->_pushTypestyle(); 595 | $typestyle->underline = true; 596 | break; 597 | 598 | case '\\textsc': // small caps 599 | $typestyle = $this->_pushTypestyle(); 600 | $typestyle->smallcaps = true; 601 | break; 602 | } 603 | } 604 | 605 | $render = null; 606 | 607 | foreach ($node->getChildren() as $arg) { 608 | $render .= $this->_renderNode($arg, self::FLAG_ARG); 609 | } 610 | 611 | // wrap in style difference wrt to parent typestyle 612 | if ($typestyle) { 613 | if (strlen($render)) { 614 | $diff = $typestyle->diff(); 615 | if ($diff) { 616 | $render = $this->_wrapStyle($render, $diff); 617 | } 618 | } 619 | $this->_typestyle = $typestyle->pop(); 620 | } 621 | 622 | return (string) $render; 623 | } 624 | 625 | protected function _wrapStyle($render, array $diff = null) 626 | { 627 | $tags = array(); 628 | $style = array(); 629 | 630 | if (isset($diff['family'])) { 631 | switch ($diff['family']) { 632 | case PhpLatex_Renderer_Typestyle::FAMILY_SANS: 633 | $style['font-family'] = 'sans-serif'; 634 | break; 635 | 636 | case PhpLatex_Renderer_Typestyle::FAMILY_MONO: 637 | $style['font-family'] = 'monospace'; 638 | break; 639 | 640 | case PhpLatex_Renderer_Typestyle::FAMILY_SERIF: 641 | $style['font-family'] = 'serif'; 642 | break; 643 | } 644 | } 645 | 646 | if (isset($diff['style'])) { 647 | switch ($diff['style']) { 648 | case PhpLatex_Renderer_Typestyle::STYLE_NORMAL: 649 | $style['font-style'] = 'normal'; 650 | break; 651 | 652 | case PhpLatex_Renderer_Typestyle::STYLE_SLANTED: 653 | $style['font-style'] = 'oblique'; 654 | break; 655 | 656 | case PhpLatex_Renderer_Typestyle::STYLE_ITALIC: 657 | $tags[] = 'i'; 658 | break; 659 | } 660 | } 661 | 662 | if (isset($diff['bold'])) { 663 | if ($diff['bold']) { 664 | $tags[] = 'b'; 665 | } else { 666 | $style['font-weight'] = 'normal'; 667 | } 668 | } 669 | 670 | if (isset($diff['emphasis'])) { 671 | if ($diff['emphasis']) { 672 | $tags[] = 'em'; 673 | } 674 | } 675 | 676 | if (isset($diff['underline'])) { 677 | if ($diff['underline']) { 678 | $tags[] = 'u'; 679 | } else { 680 | $style['text-decoration'] = 'none'; 681 | } 682 | } 683 | 684 | if (isset($diff['smallcaps'])) { 685 | if ($diff['smallcaps']) { 686 | $style['font-variant'] = 'small-caps'; 687 | } else { 688 | $style['font-variant'] = 'normal'; 689 | } 690 | } 691 | 692 | if (!$tags && !$style) { 693 | return $render; 694 | } 695 | 696 | if ($tags) { 697 | $open = $close = ''; 698 | foreach ($tags as $tag) { 699 | $open .= '<' . $tag . '>'; 700 | $close = $close . ''; 701 | } 702 | return $open . $render . $close; 703 | } 704 | 705 | $css = array(); 706 | foreach ($style as $key => $value) { 707 | $css[] = $key . ':' . $value; 708 | } 709 | return sprintf('%s', implode(';', $css), $render); 710 | } 711 | 712 | /** 713 | * @param PhpLatex_Node|string $document 714 | * @return mixed|string 715 | */ 716 | public function render($document) 717 | { 718 | if (!$document instanceof PhpLatex_Node) { 719 | $document = $this->getParser()->parse($document); 720 | } 721 | 722 | $this->_par = array(); 723 | $result = ''; 724 | 725 | foreach ($document->getChildren() as $node) { 726 | $result .= $this->_renderNode($node); 727 | } 728 | 729 | // fix paragraphs before and after block-level elements 730 | 731 | // skip all \newlines and \\ that came after \par (this may be required 732 | // when rendering LaTeX output, to avoid 'There's no line here to end' 733 | // error) and merge multiple \par into one 734 | $result = preg_replace( 735 | '/(<(br|par)\/>)+/', 736 | '', 737 | $result 738 | ); 739 | 740 | $result = preg_replace('/<(h1|h2|h3|h4|h5|h6|pre|ul|ol)/i', '<\1', $result); 741 | $result = preg_replace('/<\/(h1|h2|h3|h4|h5|h6|pre|ul|ol)>()/i', '', $result); 742 | 743 | // replace par placeholders with their HTML counterparts 744 | // TODO Maybe P instead of BR? 745 | $result = str_replace('', '

    ', $result); 746 | 747 | return $result; 748 | } 749 | } 750 | -------------------------------------------------------------------------------- /library/PhpLatex/Renderer/NodeRenderer.php: -------------------------------------------------------------------------------- 1 | _parent = $this; 37 | return $child; 38 | } 39 | 40 | public function pop() 41 | { 42 | $parent = $this->_parent; 43 | $this->_parent = null; 44 | return $parent; 45 | } 46 | 47 | public function diff() 48 | { 49 | $props = array( 50 | 'style' => 'int', 51 | 'bold' => 'bool', 52 | 'underline' => 'bool', 53 | 'emphasis' => 'bool', 54 | 'smallcaps' => 'bool', 55 | 'family' => 'int', 56 | ); 57 | $diff = array(); 58 | 59 | if ($this->_parent === null) { 60 | foreach ($props as $name => $type) { 61 | $value = $this->$name; 62 | settype($value, $type); 63 | $diff[$name] = $value; 64 | } 65 | } else { 66 | foreach ($props as $name => $type) { 67 | $value = $this->$name; 68 | settype($value, $type); 69 | $value2 = $this->_parent->$name; 70 | settype($value2, $type); 71 | if ($value !== $value2) { 72 | $diff[$name] = $value; 73 | } 74 | } 75 | } 76 | return $diff; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /library/PhpLatex/Utils.php: -------------------------------------------------------------------------------- 1 | '\\&', 13 | '{' => '\\{', 14 | '}' => '\\}', 15 | '$' => '\\$', 16 | '%' => '\\%', 17 | '#' => '\\#', 18 | '_' => '\\_', 19 | '^' => '\\^', // textmode 20 | '~' => '\\textasciitilde{}', // textmode 21 | '\\' => '\\textbackslash{}', // textmode 22 | // escape square brackets so that \\[length] construct does not appear 23 | '[' => '{[}', 24 | ']' => '{]}', 25 | ); 26 | $string = (string) $string; 27 | return strtr($string, $replace); 28 | } 29 | 30 | /** 31 | * Converts UTF-8 characters to their LaTeX text mode equivalents. 32 | * Unrecognized characters are removed from output. 33 | * 34 | * @param string $string 35 | * @return string 36 | */ 37 | public static function escapeUtf8($string) 38 | { 39 | static $map; 40 | if (null === $map) { 41 | $map = require dirname(__FILE__) . '/latex_utf8.php'; 42 | } 43 | $string = (string) $string; 44 | $string = strtr($string, $map); 45 | $string = preg_replace('/[^\t\n\r\x20-\x7E]/', '', $string); 46 | return $string; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /library/PhpLatex/Utils/PeekableArrayIterator.php: -------------------------------------------------------------------------------- 1 | _array = $array; 14 | 15 | // reset internal array pointer, otherwise current position will 16 | // be copied from the original array! 17 | reset($this->_array); 18 | } 19 | 20 | #[\ReturnTypeWillChange] 21 | public function current() 22 | { 23 | return current($this->_array); 24 | } 25 | 26 | #[\ReturnTypeWillChange] 27 | public function key() 28 | { 29 | return key($this->_array); 30 | } 31 | 32 | #[\ReturnTypeWillChange] 33 | public function next() 34 | { 35 | next($this->_array); 36 | } 37 | 38 | #[\ReturnTypeWillChange] 39 | public function rewind() 40 | { 41 | reset($this->_array); 42 | } 43 | 44 | #[\ReturnTypeWillChange] 45 | public function valid() 46 | { 47 | return key($this->_array) !== null; 48 | } 49 | 50 | #[\ReturnTypeWillChange] 51 | public function count() 52 | { 53 | return count($this->_array); 54 | } 55 | 56 | #[\ReturnTypeWillChange] 57 | public function offsetExists($offset) 58 | { 59 | return isset($this->_array[$offset]); 60 | } 61 | 62 | #[\ReturnTypeWillChange] 63 | public function offsetGet($offset) 64 | { 65 | return isset($this->_array[$offset]) ? $this->_array[$offset] : null; 66 | } 67 | 68 | #[\ReturnTypeWillChange] 69 | public function offsetSet($offset, $value) { 70 | if (is_null($offset)) { 71 | $this->_array[] = $value; 72 | } else { 73 | $this->_array[$offset] = $value; 74 | } 75 | } 76 | 77 | #[\ReturnTypeWillChange] 78 | public function offsetUnset($offset) 79 | { 80 | unset($this->_array[$offset]); 81 | } 82 | 83 | public function __isset($offset) 84 | { 85 | return $this->offsetExists($offset); 86 | } 87 | 88 | public function __unset($offset) 89 | { 90 | $this->offsetUnset($offset); 91 | } 92 | 93 | public function peek() 94 | { 95 | if ($this->valid()) { 96 | $value = next($this->_array); 97 | prev($this->_array); 98 | return $value; 99 | } 100 | return false; 101 | } 102 | 103 | public function hasNext() 104 | { 105 | if ($this->valid()) { 106 | next($this->_array); 107 | $result = $this->valid(); 108 | prev($this->_array); 109 | return $result; 110 | } 111 | return false; 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /library/PhpLatex/Utils/PeekableIterator.php: -------------------------------------------------------------------------------- 1 | getType()}\n"; 20 | 21 | if (count($node->getProps())) { 22 | $str .= $indent . "props:\n"; 23 | foreach ($node->getProps() as $key => $value) { 24 | if ($key === 'mode') { 25 | switch ($value) { 26 | case PhpLatex_Parser::MODE_MATH: 27 | $value = "$value (math)"; 28 | break; 29 | case PhpLatex_Parser::MODE_TEXT: 30 | $value = "$value (text)"; 31 | break; 32 | case PhpLatex_Parser::MODE_BOTH: 33 | $value = "$value (both)"; 34 | break; 35 | } 36 | } 37 | if ($key === 'value') { 38 | $value = '"' . strtr($value, array( 39 | "\n" => '\n', 40 | "\t" => '\t', 41 | "\r" => '\r', 42 | )) . '"'; 43 | } 44 | if (is_bool($value)) { 45 | $value = var_export($value, true); 46 | } 47 | $str .= $indent . " {$key}: $value\n"; 48 | } 49 | } else { 50 | $str .= $indent . "props: (empty)\n"; 51 | } 52 | 53 | if (count($node->getChildren())) { 54 | $str .= $indent . "children:\n"; 55 | foreach ($node->getChildren() as $child) { 56 | $str .= $indent . ' - ' . self::_debug($child, $indent . ' '); 57 | } 58 | } else { 59 | $str .= $indent . "children: (empty)\n"; 60 | } 61 | 62 | return $str; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /library/PhpLatex/environs.php: -------------------------------------------------------------------------------- 1 | array( 5 | 'verbatim' => true, 6 | 'mode' => PhpLatex_Parser::MODE_TEXT, 7 | 'environs' => array('itemize', 'enumerate'), 8 | 'starred' => true, 9 | // verbatim in tabular causes 10 | // ! LaTeX Error: Something's wrong--perhaps a missing \item. 11 | ), 12 | 'Verbatim' => array( 13 | 'verbatim' => true, 14 | 'mode' => PhpLatex_Parser::MODE_TEXT, 15 | 'environs' => array('itemize', 'enumerate'), 16 | ), 17 | 'lstlisting' => array( 18 | 'verbatim' => true, 19 | 'mode' => PhpLatex_Parser::MODE_TEXT, 20 | 'environs' => array('itemize', 'enumerate'), 21 | ), 22 | 'enumerate' => array( 23 | 'mode' => PhpLatex_Parser::MODE_TEXT, 24 | 'environs' => array('itemize', 'enumerate'), 25 | ), 26 | 'itemize' => array( 27 | 'mode' => PhpLatex_Parser::MODE_TEXT, 28 | 'environs' => array('itemize', 'enumerate'), 29 | // itemize in tabular causes 30 | // ! LaTeX Error: Something's wrong--perhaps a missing \item. 31 | ), 32 | 'displaymath' => array( 33 | 'math' => true, 34 | 'mode' => PhpLatex_Parser::MODE_TEXT, 35 | 'environs' => array('itemize', 'enumerate'), 36 | // displaymath in tabular causes 37 | // ! LaTeX Error: Bad math environment delimiter. 38 | ), 39 | 'math' => array( 40 | 'math' => true, 41 | 'mode' => PhpLatex_Parser::MODE_TEXT, 42 | 'environs' => array('itemize', 'enumerate', 'tabular'), 43 | ), 44 | 'equation' => array( 45 | 'mode' => PhpLatex_Parser::MODE_TEXT, 46 | 'math' => true, 47 | 'starred' => true, 48 | ), 49 | 'eqnarray' => array( 50 | 'mode' => PhpLatex_Parser::MODE_TEXT, 51 | 'math' => true, 52 | 'starred' => true, 53 | ), 54 | 'tabular' => array( 55 | 'numArgs' => 1, 56 | 'mode' => PhpLatex_Parser::MODE_TEXT, 57 | 'environs' => array('itemize', 'enumerate', 'tabular'), 58 | ), 59 | 'array' => array( 60 | 'numArgs' => 1, 61 | 'mode' => PhpLatex_Parser::MODE_MATH, 62 | ), 63 | ); 64 | -------------------------------------------------------------------------------- /library/PhpLatex/latex_utf8.php: -------------------------------------------------------------------------------- 1 | '\\\'{A}', // A with acute 5 | 'Á̧' => 'A', // A with acute and cedilla 6 | 'Ạ́' => 'A', // A with acute and dot below 7 | 'Ą̌' => 'A', // A with acute and ogonek 8 | 'Ą́' => 'A', // A with acute and ogonek 9 | 'Ă' => '\\u{A}', // A with breve 10 | 'Ắ' => 'A', // A with breve and acute 11 | 'Ặ' => 'A', // A with breve and dot below 12 | 'Ằ' => 'A', // A with breve and grave 13 | 'Ẳ' => 'A', // A with breve and hook above 14 | 'Ẵ' => 'A', // A with breve and tilde 15 | 'Ǎ' => '\\v{A}', // A with caron 16 | 'Ǎ̧' => 'A', // A with caron and cedilla 17 | 'A̧' => '\\c{A}', // A with cedilla 18 | 'A̐' => 'A', // A with chandrabindu 19 | 'Â' => '\\^{A}', // A with circumflex 20 | 'Ấ' => 'A', // A with circumflex and acute 21 | 'Â̧' => 'A', // A with circumflex and cedilla 22 | 'Ậ' => 'A', // A with circumflex and dot below 23 | 'Ầ' => 'A', // A with circumflex and grave 24 | 'Ẩ' => 'A', // A with circumflex and hook above 25 | 'Ą̂' => 'A', // A with circumflex and ogonek 26 | 'Ẫ' => 'A', // A with circumflex and tilde 27 | 'A̭' => 'A', // A with circumflex below 28 | 'A̓' => 'A', // A with comma above 29 | 'Ä' => '\\"{A}', // A with diaeresis 30 | 'Ä́' => 'A', // A with diaeresis and acute 31 | 'Ä̌' => 'A', // A with diaeresis and caron 32 | 'Ä̂' => 'A', // A with diaeresis and circumflex 33 | 'Ạ̈' => 'A', // A with diaeresis and dot below 34 | 'Ä̀' => 'A', // A with diaeresis and grave 35 | 'Ǟ' => 'A', // A with diaeresis and macron 36 | 'Ą̈' => 'A', // A with diaeresis and ogonek 37 | 'Ą̈̌' => 'A', // A with diaeresis, caron and ogonek 38 | 'Ą̈̂' => 'A', // A with diaeresis, circumflex and ogonek 39 | 'Ą̈̀' => 'A', // A with diaeresis, grave and ogonek 40 | 'Ȧ' => '\\.{A}', // A with dot above 41 | 'Ȧ́' => 'A', // A with dot above and acute 42 | 'Ǡ' => 'A', // A with dot above and macron 43 | 'Ạ' => '\\d{A}', // A with dot below 44 | 'A̋' => '\\H{A}', // A with double acute 45 | 'Ȁ' => 'A', // A with double grave 46 | 'À' => '\\`{A}', // A with grave 47 | 'À̧' => 'A', // A with grave and cedilla 48 | 'Ạ̀' => 'A', // A with grave and dot below 49 | 'Ą̀' => 'A', // A with grave and ogonek 50 | 'Ả' => 'A', // A with hook above 51 | 'Ȃ' => 'A', // A with inverted breve 52 | 'Ā' => '\\={A}', // A with macron 53 | 'Ā́' => 'A', // A with macron and acute 54 | 'Ā̆' => 'A', // A with macron and breve 55 | 'Ā̌' => 'A', // A with macron and caron 56 | 'Ā̂' => 'A', // A with macron and circumflex 57 | 'Ạ̄' => 'A', // A with macron and dot below 58 | 'Ā̀' => 'A', // A with macron and grave 59 | 'Ą̄' => 'A', // A with macron and ogonek 60 | 'Ā̊' => 'A', // A with macron and ring above 61 | 'A̱' => 'A', // A with macron below 62 | 'Á̱' => 'A', // A with macron below and acute 63 | 'Â̱' => 'A', // A with macron below and circumflex 64 | 'Ä̱' => 'A', // A with macron below and diaeresis 65 | 'À̱' => 'A', // A with macron below and grave 66 | 'Ā̱' => 'A', // A with macron below and macron 67 | 'Å̱' => 'A', // A with macron below and ring above 68 | 'Ą̄́' => 'A', // A with macron, acute and ogonek 69 | 'Ą̄̌' => 'A', // A with macron, caron and ogonek 70 | 'Ą̄̂' => 'A', // A with macron, circumflex and ogonek 71 | 'Ą̄̀' => 'A', // A with macron, grave and ogonek 72 | 'Ą' => '\\k{A}', // A with ogonek 73 | 'A᷎' => 'A', // A with ogonek above 74 | 'ᶏ' => 'A', // A with retroflex hook 75 | 'Å' => '\\r{A}', // A with ring above 76 | 'Ǻ' => 'A', // A with ring above and acute 77 | 'Å̂' => 'A', // A with ring above and circumflex 78 | 'Ą̊' => 'A', // A with ring above and ogonek 79 | 'Ḁ' => 'A', // A with ring below 80 | 'Ⱥ' => 'A', // A with stroke 81 | 'Ã' => '\\~{A}', // A with tilde 82 | 'Ã́' => 'A', // A with tilde and acute 83 | 'Ạ̃' => 'A', // A with tilde and dot below 84 | 'Ã̀' => 'A', // A with tilde and grave 85 | 'Ą̃' => 'A', // A with tilde and ogonek 86 | 'A̰' => 'A', // A with tilde below 87 | 'A̍' => 'A', // A with vertical line 88 | // 'Ɖ', African D, D with tail 89 | // 'Ɑ́', Alpha with acute 90 | // 'Ɑ̌', Alpha with caron 91 | // 'Ɑ̂', Alpha with circumflex 92 | // 'Ɑ̀', Alpha with grave 93 | // 'ᶐ', Alpha with retroflex hook 94 | 'B́' => '\\\'{B}', // B with acute 95 | 'B̓' => 'B', // B with comma above 96 | 'B̤' => 'B', // B with diaeresis below 97 | 'Ḃ' => '\\.{B}', // B with dot above 98 | 'Ḅ' => '\\d{B}', // B with dot below 99 | 'Ɓ' => 'B', // B with hook 100 | 'Ḇ' => 'B', // B with line below 101 | 'ᵬ' => 'B', // B with middle tilde 102 | 'ᶀ' => 'B', // B with palatal hook 103 | 'Ƀ' => 'B', // B with stroke 104 | 'B̃' => '\\~{B}', // B with tilde 105 | 'Ƃ' => 'B', // B with topbar 106 | 'Ć' => '\\\'{C}', // C with acute 107 | 'Ꞓ' => 'C', // C with bar 108 | 'C̆' => '\\u{C}', // C with breve 109 | 'Č' => '\\v{C}', // C with caron 110 | 'Č̓' => 'C', // C with caron and comma above 111 | 'Ç' => '\\c{C}', // C with cedilla 112 | 'Ḉ' => 'C', // C with cedilla and acute 113 | 'Ç̆' => 'C', // C with cedilla and breve 114 | 'Ç̌' => 'C', // C with cedilla and caron 115 | 'Ç̇' => 'C', // C with cedilla and dot above 116 | 'Ĉ' => '\\^{C}', // C with circumflex 117 | 'C̓' => 'C', // C with comma above 118 | 'ɕ' => 'C', // C with curl 119 | 'C̈' => '\\"{C}', // C with diaeresis 120 | 'Ċ' => '\\.{C}', // C with dot above 121 | 'C̣' => '\\d{C}', // C with dot below 122 | 'C̀' => '\\`{C}', // C with grave 123 | 'Ƈ' => 'C', // C with hook 124 | 'C̄' => '\\={C}', // C with macron 125 | 'Ȼ' => 'C', // C with stroke 126 | 'C̃' => '\\~{C}', // C with tilde 127 | // 'Ꜯ', Cuatrillo with comma 128 | 'D́' => '\\\'{D}', // D with acute 129 | 'Ď' => '\\v{D}', // D with caron 130 | 'Ḑ' => '\\c{D}', // D with cedilla 131 | 'D̂' => '\\^{D}', // D with circumflex 132 | 'Ḓ' => 'D', // D with circumflex below 133 | 'Ḓ' => 'D', // D with circumflex below 134 | 'D̓' => 'D', // D with comma above 135 | 'D̦' => 'D', // D with comma below 136 | 'ȡ' => 'D', // D with curl 137 | 'D̤' => 'D', // D with diaeresis below 138 | 'Ḋ' => '\\.{D}', // D with dot above 139 | 'Ḍ' => '\\d{D}', // D with dot below 140 | 'Ɗ' => 'D', // D with hook 141 | 'ᶑ' => 'D', // D with hook and tail 142 | 'Ḏ' => 'D', // D with line below 143 | 'ᵭ' => 'D', // D with middle tilde 144 | 'ᶁ' => 'D', // D with palatal hook 145 | 'Đ' => 'D', // D with stroke 146 | 'Ƌ' => 'D', // D with topbar 147 | // 'ȷ', Dotless J 148 | // 'ɟ', Dotless J with stroke 149 | // 'ʄ', Dotless J with stroke and hook 150 | 'É' => '\\\'{E}', // E with acute 151 | 'Ȩ́' => 'E', // E with acute and cedilla 152 | 'Ẹ́' => 'E', // E with acute and dot below 153 | 'É̱' => 'E', // E with acute and macron below 154 | 'Ę́' => 'E', // E with acute and ogonek 155 | 'Ĕ' => '\\u{E}', // E with breve 156 | 'Ḝ' => 'E', // E with breve and cedilla 157 | 'Ḝ' => 'E', // E with breve and cedilla 158 | 'Ě' => '\\v{E}', // E with caron 159 | 'Ȩ̌' => 'E', // E with caron and cedilla 160 | 'Ę̌' => 'E', // E with caron and ogonek 161 | 'Ȩ' => '\\c{E}', // E with cedilla 162 | 'Ȩ' => '\\c{E}', // E with cedilla 163 | 'Ê' => '\\^{E}', // E with circumflex 164 | 'Ế' => 'E', // E with circumflex and acute 165 | 'Ȩ̂' => 'E', // E with circumflex and cedilla 166 | 'Ệ' => 'E', // E with circumflex and dot below 167 | 'Ề' => 'E', // E with circumflex and grave 168 | 'Ể' => 'E', // E with circumflex and hook above 169 | 'Ê̱' => 'E', // E with circumflex and macron below 170 | 'Ę̂' => 'E', // E with circumflex and ogonek 171 | 'Ễ' => 'E', // E with circumflex and tilde 172 | 'Ḙ' => 'E', // E with circumflex below 173 | 'Ë' => '\\"{E}', // E with diaeresis 174 | 'Ë́' => 'E', // E with diaeresis and acute 175 | 'Ë̌' => 'E', // E with diaeresis and caron 176 | 'Ë̂' => 'E', // E with diaeresis and circumflex 177 | 'Ë̀' => 'E', // E with diaeresis and grave 178 | 'Ë̱' => 'E', // E with diaeresis and macron below 179 | 'Ę̈' => 'E', // E with diaeresis and ogonek 180 | 'Ę̈̌' => 'E', // E with diaeresis, caron and ogonek 181 | 'Ę̈̂' => 'E', // E with diaeresis, circumflex and ogonek 182 | 'Ę̈̀' => 'E', // E with diaeresis, grave and ogonek 183 | 'Ė' => '\\.{E}', // E with dot above 184 | 'Ė́' => 'E', // E with dot above and acute 185 | 'Ę̇' => 'E', // E with dot above and ogonek 186 | 'Ę̇́' => 'E', // E with dot above, acute and ogonek 187 | 'Ė̃' => 'E', // E with dot and macron 188 | 'Ẹ' => '\\d{E}', // E with dot below 189 | 'E̋' => '\\H{E}', // E with double acute 190 | 'Ę̋' => 'E', // E with double acute and ogonek 191 | 'Ȅ' => 'E', // E with double grave 192 | 'Ȅ' => 'E', // E with double grave 193 | 'È' => '\\`{E}', // E with grave 194 | 'Ȩ̀' => 'E', // E with grave and cedilla 195 | 'Ẹ̀' => 'E', // E with grave and dot below 196 | 'È̱' => 'E', // E with grave and macron below 197 | 'Ę̀' => 'E', // E with grave and ogonek 198 | 'Ẻ' => 'E', // E with hook above 199 | 'Ȇ' => 'E', // E with inverted breve 200 | 'Ē' => '\\={E}', // E with macron 201 | 'Ḗ' => 'E', // E with macron and acute 202 | 'Ē̆' => 'E', // E with macron and breve 203 | 'Ē̌' => 'E', // E with macron and caron 204 | 'Ē̂' => 'E', // E with macron and circumflex 205 | 'Ẹ̄' => 'E', // E with macron and dot below 206 | 'Ḕ' => 'E', // E with macron and grave 207 | 'Ē̱' => 'E', // E with macron and macron below 208 | 'Ę̄' => 'E', // E with macron and ogonek 209 | 'E̱' => 'E', // E with macron below 210 | 'Ę̄́' => 'E', // E with macron, acute and ogonek 211 | 'Ę̄̌' => 'E', // E with macron, caron and ogonek 212 | 'Ę̄̂' => 'E', // E with macron, circumflex and ogonek 213 | 'Ę̄̀' => 'E', // E with macron, grave and ogonek 214 | 'ⱸ' => 'E', // E with notch 215 | 'Ę' => '\\k{E}', // E with ogonek 216 | 'E᷎' => 'E', // E with ogonek above 217 | 'Ę᷎' => 'E', // E with ogonek above and ogonek 218 | 'Ę̣' => 'E', // E with ogonek and dot below 219 | 'ᶒ' => 'E', // E with retroflex hook 220 | 'E̊' => 'E', // E with ring 221 | 'Ɇ' => 'E', // E with stroke 222 | 'Ẽ' => '\\~{E}', // E with tilde 223 | 'Ẽ́' => 'E', // E with tilde and acute 224 | 'Ẽ̌' => 'E', // E with tilde and caron 225 | 'Ẽ̂' => 'E', // E with tilde and circumflex 226 | 'Ẽ̀' => 'E', // E with tilde and grave 227 | 'Ę̃' => 'E', // E with tilde and ogonek 228 | 'Ẽ̍' => 'E', // E with tilde and vertical line 229 | 'Ḛ' => 'E', // E with tilde below 230 | 'E̍' => 'E', // E with vertical line 231 | // 'ʆ', Esh with curl 232 | // 'ᶋ', Esh with palatal hook 233 | // 'ᶘ', Esh with retroflex hook 234 | // 'Ǯ', Ezh with caron 235 | // 'ʓ', Ezh with curl 236 | // 'ᶚ', Ezh with retroflex hook 237 | // 'ƺ', Ezh with tail 238 | 'F́' => '\\\'{F}', // F with acute 239 | 'F̧' => '\\c{F}', // F with cedilla 240 | 'Ḟ' => '\\.{F}', // F with dot above 241 | 'F̣' => '\\d{F}', // F with dot below 242 | 'F̀' => '\\`{F}', // F with grave 243 | 'Ƒ' => 'F', // F with hook (Script F) 244 | 'F̄' => '\\={F}', // F with macron 245 | 'ᵮ' => 'F', // F with middle tilde 246 | 'ᶂ' => 'F', // F with palatal hook 247 | 'Ǵ' => '\\\'{G}', // G with acute 248 | 'Ğ' => '\\u{G}', // G with breve 249 | 'Ǧ' => '\\v{G}', // G with caron 250 | 'Ģ' => '\\c{G}', // G with cedilla 251 | 'Ĝ' => '\\^{G}', // G with circumflex 252 | 'G̈' => '\\"{G}', // G with diaeresis 253 | 'G̤' => 'G', // G with diaeresis below 254 | 'Ġ' => '\\.{G}', // G with dot above 255 | 'G̣' => '\\d{G}', // G with dot below 256 | 'G̀' => '\\`{G}', // G with grave 257 | 'Ɠ' => 'G', // G with hook 258 | 'Ḡ' => '\\={G}', // G with macron 259 | 'Ꞡ' => 'G', // G with oblique stroke 260 | 'ᶃ' => 'G', // G with palatal hook 261 | 'Ǥ' => 'G', // G with stroke 262 | 'G̃' => '\\~{G}', // G with tilde 263 | // 'Ɣ̓', Gamma with comma above 264 | // 'ʡ', Glottal stop with stroke 265 | 'H́' => '\\\'{H}', // H with acute 266 | 'Ḫ' => 'H', // H with breve below 267 | 'Ȟ' => '\\v{H}', // H with caron 268 | 'Ḩ' => '\\c{H}', // H with cedilla 269 | 'H̐' => 'H', // H with chandrabindu 270 | 'Ĥ' => '\\^{H}', // H with circumflex 271 | 'H̓' => 'H', // H with comma above 272 | 'Ⱨ' => 'H', // H with descender 273 | 'Ḧ' => '\\"{H}', // H with diaeresis 274 | 'H̤' => 'H', // H with diaeresis below 275 | 'Ḣ' => '\\.{H}', // H with dot above 276 | 'Ḣ' => '\\.{H}', // H with dot above 277 | 'Ḥ' => '\\d{H}', // H with dot below 278 | 'Ɦ' => 'H', // H with hook 279 | 'H̱' => 'H', // H with line below 280 | 'H̄' => '\\={H}', // H with macron 281 | 'Ħ' => 'H', // H with stroke 282 | // 'Ꜧ', Heng 283 | // 'ɧ', Heng with hook 284 | 'I' => 'I', // I (lowercase, i.e. ı) without dot above 285 | 'İ' => 'I', // I (uppercase) with dot above 286 | 'Í' => '\\\'{I}', // I with acute 287 | 'Ĭ' => '\\u{I}', // I with breve 288 | 'Ǐ' => '\\v{I}', // I with caron 289 | 'I̧' => '\\c{I}', // I with cedilla 290 | 'Í̧' => 'I', // I with cedilla and acute 291 | 'Î̧' => 'I', // I with cedilla and circumflex 292 | 'Ì̧' => 'I', // I with cedilla and grave 293 | 'I̐' => 'I', // I with chandrabindu 294 | 'Î' => '\\^{I}', // I with circumflex 295 | 'Î́' => 'I', // I with circumflex and acute 296 | 'I̓' => 'I', // I with comma above 297 | 'Ï' => '\\"{I}', // I with diaeresis 298 | 'Ḯ' => 'I', // I with diaeresis and acute 299 | 'Ị' => '\\d{I}', // I with dot below 300 | 'Ị́' => 'I', // I with dot below and acute 301 | 'Ị̂' => 'I', // I with dot below and circumflex 302 | 'Ị̃' => 'I', // I with dot below and tilde 303 | 'I̋' => '\\H{I}', // I with double acute 304 | 'Ȉ' => 'I', // I with double grave 305 | 'Ȉ' => 'I', // I with double grave 306 | 'Ì' => '\\`{I}', // I with grave 307 | 'Ỉ' => 'I', // I with hook above 308 | 'Ȋ' => 'I', // I with inverted breve 309 | 'I̱' => 'I', // I with line below 310 | 'Í̱' => 'I', // I with line below and acute 311 | 'Î̱' => 'I', // I with line below and circumflex 312 | 'Ì̱' => 'I', // I with line below and grave 313 | 'Ī̱' => 'I', // I with line below and macron 314 | 'Ī' => '\\={I}', // I with macron 315 | 'Ī́' => 'I', // I with macron and acute 316 | 'Ī̌' => 'I', // I with macron and caron 317 | 'Ī̂' => 'I', // I with macron and circumflex 318 | 'Ī̀' => 'I', // I with macron and grave 319 | 'Į' => '\\k{I}', // I with ogonek 320 | 'Į́' => 'I', // I with ogonek and acute 321 | 'Į̌' => 'I', // I with ogonek and caron 322 | 'Į̂' => 'I', // I with ogonek and circumflex 323 | 'Į̀' => 'I', // I with ogonek and grave 324 | 'Į̃' => 'I', // I with ogonek and tilde 325 | 'ᶖ' => 'I', // I with retroflex hook 326 | 'Ɨ' => 'I', // I with stroke 327 | 'Ɨ́' => 'I', // I with stroke and acute 328 | 'Ɨ̌' => 'I', // I with stroke and caron 329 | 'Ɨ̧' => 'I', // I with stroke and cedilla 330 | 'Ɨ̂' => 'I', // I with stroke and circumflex 331 | 'Ɨ̀' => 'I', // I with stroke and grave 332 | 'Ɨ̄' => 'I', // I with stroke and macron 333 | 'Ɨ̃' => 'I', // I with stroke and tilde 334 | 'Ɨ̧̌' => 'I', // I with stroke, cedilla and caron 335 | 'Ɨ̧̀' => 'I', // I with stroke, cedilla and grave 336 | 'Ɨ̧̂' => 'I', // I with stroke, cedilla, and circumflex 337 | 'Ĩ' => '\\~{I}', // I with tilde 338 | 'Ĩ́' => 'I', // I with tilde and acute 339 | 'Ĩ̌' => 'I', // I with tilde and caron 340 | 'Ĩ̂' => 'I', // I with tilde and circumflex 341 | 'Ĩ̀' => 'I', // I with tilde and grave 342 | 'Ĩ̍' => 'I', // I with tilde and vertical line 343 | 'Ḭ' => 'I', // I with tilde below 344 | 'I̍' => 'I', // I with vertical line 345 | // 'Ꝼ́', Insular F with acute 346 | // 'Ꝼ̇', Insular F with dot above 347 | // 'Ꝼ̣', Insular F with dot below 348 | // 'ƾ', Inverted glottal stop with stroke 349 | // 'Ɩ́', Iota with acute 350 | // 'Ɩ̀', Iota with grave 351 | // 'ᵼ', Iota with stroke 352 | // 'Ɩ̃', Iota with tilde 353 | 'J́' => '\\\'{J}', // J with acute 354 | 'J̌' => '\\v{J}', // J with caron 355 | 'Ĵ' => '\\^{J}', // J with circumflex 356 | 'ʝ' => 'J', // J with crossed-tail 357 | 'J̣' => '\\d{J}', // J with dot below 358 | 'J̄' => '\\={J}', // J with macron 359 | 'Ɉ' => 'J', // J with stroke 360 | 'J̃' => '\\~{J}', // J with tilde 361 | 'Ḱ' => '\\\'{K}', // K with acute 362 | 'Ǩ' => '\\v{K}', // K with caron 363 | 'Ķ' => '\\c{K}', // K with cedilla 364 | 'Ⱪ' => 'K', // K with descender 365 | 'Ꝃ' => 'K', // K with diagonal stroke 366 | 'K̇' => '\\.{K}', // K with dot above 367 | 'Ḳ' => '\\d{K}', // K with dot below 368 | 'K̀' => '\\`{K}', // K with grave 369 | 'Ƙ' => 'K', // K with hook 370 | 'Ḵ' => 'K', // K with line below 371 | 'K̄' => '\\={K}', // K with macron 372 | 'Ꞣ' => 'K', // K with oblique stroke 373 | 'ᶄ' => 'K', // K with palatal hook 374 | 'Ꝁ' => 'K', // K with stroke 375 | 'Ꝅ' => 'K', // K with stroke and diagonal stroke 376 | 'Ĺ' => '\\\'{L}', // L with acute 377 | 'Ḷ́' => 'L', // L with acute and dot below 378 | 'Ƚ' => 'L', // L with bar 379 | 'ɬ' => 'L', // L with belt 380 | 'Ľ' => '\\v{L}', // L with caron 381 | 'Ļ' => '\\c{L}', // L with cedilla 382 | 'L̐' => 'L', // L with chandrabindu 383 | 'L̂' => '\\^{L}', // L with circumflex 384 | 'Ḽ' => 'L', // L with circumflex below 385 | 'L̓' => 'L', // L with comma above 386 | 'Ḷ̓' => 'L', // L with comma above and dot below 387 | 'L̦' => 'L', // L with comma below 388 | 'ȴ' => 'L', // L with curl 389 | 'Ḷ' => '\\d{L}', // L with dot below 390 | 'Ḹ' => 'L', // L with dot below and macron 391 | 'Ⱡ' => 'L', // L with double bar 392 | 'Ꝉ' => 'L', // L with high stroke 393 | 'Ḻ' => 'L', // L with line below 394 | 'Ɫ' => 'L', // L with middle tilde 395 | 'ᶅ' => 'L', // L with palatal hook 396 | 'ɭ' => 'L', // L with retroflex hook 397 | 'ꞎ' => 'L', // L with retroflex hook and belt 398 | 'Ł' => 'L', // L with stroke 399 | 'L̃' => '\\~{L}', // L with tilde 400 | // 'ƛ', Lambda with stroke 401 | // 'ƛ̓', Lambda with stroke and comma above 402 | // 'ẜ', Long S with diagonal stroke 403 | // 'ẝ', Long S with high stroke 404 | // 'ẛ', Long s with dot above 405 | 'Ḿ' => '\\\'{M}', // M with acute 406 | 'Ṃ́' => 'M', // M with acute and dot below 407 | 'M̧' => '\\c{M}', // M with cedilla 408 | 'M̐' => 'M', // M with chandrabindu 409 | 'M̓' => 'M', // M with comma above 410 | 'Ṃ̓' => 'M', // M with comma above and dot below 411 | 'M̦' => 'M', // M with comma below 412 | 'M̈' => '\\"{M}', // M with diaeresis 413 | 'Ṁ' => '\\.{M}', // M with dot above 414 | 'Ṃ' => '\\d{M}', // M with dot below 415 | 'Ṃ' => '\\d{M}', // M with dot below 416 | 'M̀' => '\\`{M}', // M with grave 417 | 'Ɱ' => 'M', // M with hook 418 | 'M̄' => '\\={M}', // M with macron 419 | 'ᵯ' => 'M', // M with middle tilde 420 | 'M̨' => '\\k{M}', // M with ogonek 421 | 'ᶆ' => 'M', // M with palatal hook 422 | 'M̃' => '\\~{M}', // M with tilde 423 | 'M̍' => 'M', // M with vertical line 424 | 'Ń' => '\\\'{N}', // N with acute 425 | 'Ṇ́' => 'N', // N with acute and dot below 426 | 'Ň' => '\\v{N}', // N with caron 427 | 'Ņ' => '\\c{N}', // N with cedilla 428 | 'N̐' => 'N', // N with chandrabindu 429 | 'N̂' => '\\^{N}', // N with circumflex 430 | 'Ṋ' => 'N', // N with circumflex below 431 | 'Ṇ̓' => 'N', // N with comma above and dot below 432 | 'N̦' => 'N', // N with comma below 433 | 'ȵ' => 'N', // N with curl 434 | 'Ꞑ' => 'N', // N with descender 435 | 'N̈' => 'N', // N with diaresis 436 | 'Ṅ' => '\\.{N}', // N with dot above 437 | 'Ṇ' => '\\d{N}', // N with dot below 438 | 'Ǹ' => '\\`{N}', // N with grave 439 | 'Ɲ' => 'N', // N with left hook 440 | 'Ṉ' => 'N', // N with line below 441 | 'Ƞ' => 'N', // N with long right leg 442 | 'N̄' => '\\={N}', // N with macron 443 | 'ᵰ' => 'N', // N with middle tilde 444 | 'Ꞥ' => 'N', // N with oblique stroke 445 | 'ᶇ' => 'N', // N with palatal hook 446 | 'ɳ' => 'N', // N with retroflex hook 447 | 'Ñ̈' => 'N', // N with tidle and diaeresis 448 | 'Ñ' => '\\~{N}', // N with tilde 449 | 'N̰' => 'N', // N with tilde below 450 | 'N̲' => '\\b{N}', // N with underline 451 | 'N̍' => 'N', // N with vertical line 452 | 'Ó' => '\\\'{O}', // O with acute 453 | 'Ó̧' => 'O', // O with acute and cedilla 454 | 'Ọ́' => 'O', // O with acute and dot below 455 | 'Ó̱' => 'O', // O with acute and line below 456 | 'Ǫ́' => 'O', // O with acute and ogonek 457 | 'Ɵ' => 'O', // O with bar 458 | 'Ŏ' => '\\u{O}', // O with breve 459 | 'Ǒ' => '\\v{O}', // O with caron 460 | 'Ǒ̧' => 'O', // O with caron and cedilla 461 | 'Ǫ̌' => 'O', // O with caron and ogonek 462 | 'O̧' => '\\c{O}', // O with cedilla 463 | 'O̐' => 'O', // O with chandrabindu 464 | 'Ô' => '\\^{O}', // O with circumflex 465 | 'Ố' => 'O', // O with circumflex and acute 466 | 'Ô̧' => 'O', // O with circumflex and cedilla 467 | 'Ộ' => 'O', // O with circumflex and dot below 468 | 'Ộ' => 'O', // O with circumflex and dot below 469 | 'Ồ' => 'O', // O with circumflex and grave 470 | 'Ổ' => 'O', // O with circumflex and hook above 471 | 'Ô̱' => 'O', // O with circumflex and line below 472 | 'Ǫ̂' => 'O', // O with circumflex and ogonek 473 | 'Ỗ' => 'O', // O with circumflex and tilde 474 | 'O̭' => 'O', // O with circumflex below 475 | 'Ö' => '\\"{O}', // O with diaeresis 476 | 'Ö́' => 'O', // O with diaeresis and acute 477 | 'Ö̀' => 'O', // O with diaeresis and grave 478 | 'Ö̱' => 'O', // O with diaeresis and line below 479 | 'Ȫ' => 'O', // O with diaeresis and macron 480 | 'Ȯ' => '\\.{O}', // O with dot above 481 | 'Ȱ' => 'O', // O with dot above and macron 482 | 'O̍͘' => 'O', // O with dot above and vertical line 483 | 'O͘' => 'O', // O with dot above right 484 | 'Ó͘' => 'O', // O with dot above right and acute 485 | 'Ò͘' => 'O', // O with dot above right and grave 486 | 'Ō͘' => 'O', // O with dot above right and macron 487 | 'Ọ' => '\\d{O}', // O with dot below 488 | 'Ő' => '\\H{O}', // O with double acute 489 | 'Ő' => '\\H{O}', // O with double acute 490 | 'Ȍ' => 'O', // O with double grave 491 | 'Ò' => '\\`{O}', // O with grave 492 | 'Ò̧' => 'O', // O with grave and cedilla 493 | 'Ọ̀' => 'O', // O with grave and dot below 494 | 'Ò̱' => 'O', // O with grave and line below 495 | 'Ǫ̀' => 'O', // O with grave and ogonek 496 | 'Ỏ' => 'O', // O with hook above 497 | 'Ơ' => 'O', // O with horn 498 | 'Ớ' => 'O', // O with horn and acute 499 | 'Ợ' => 'O', // O with horn and dot below 500 | 'Ờ' => 'O', // O with horn and grave 501 | 'Ở' => 'O', // O with horn and hook above 502 | 'Ỡ' => 'O', // O with horn and tilde 503 | 'Ȏ' => 'O', // O with inverted breve 504 | 'O̱' => 'O', // O with line below 505 | 'Ꝋ' => 'O', // O with long stroke overlay 506 | 'Ꝋ' => 'O', // O with long stroke overlay 507 | 'Ꝍ' => 'O', // O with loop 508 | 'ⱺ' => 'O', // O with low ring inside 509 | 'Ō' => '\\={O}', // O with macron 510 | 'Ṓ' => 'O', // O with macron and acute 511 | 'Ō̌' => 'O', // O with macron and caron 512 | 'Ō̂' => 'O', // O with macron and circumflex 513 | 'Ọ̄' => 'O', // O with macron and dot below 514 | 'Ṑ' => 'O', // O with macron and grave 515 | 'Ō̱' => 'O', // O with macron and line below 516 | 'Ǭ' => 'O', // O with macron and ogonek 517 | 'Ǭ̀' => 'O', // O with macron, grave and ogonek 518 | 'Ǫ' => '\\k{O}', // O with ogonek 519 | 'Ø' => 'O', // O with stroke 520 | 'Ǿ' => 'O', // O with stroke and acute 521 | 'Ø̌' => 'O', // O with stroke and caron 522 | 'Ø̂' => 'O', // O with stroke and circumflex 523 | 'Ø̀' => 'O', // O with stroke and grave 524 | 'Ø̄' => 'O', // O with stroken and macron 525 | 'Õ' => '\\~{O}', // O with tilde 526 | 'Ṍ' => 'O', // O with tilde and acute 527 | 'Ṏ' => 'O', // O with tilde and diaeresis 528 | 'Ȭ' => 'O', // O with tilde and macron 529 | 'O̲' => '\\b{O}', // O with underline 530 | 'O̍' => 'O', // O with vertical line 531 | // 'ᶓ', Open E with retroflex hook 532 | // 'Ɔ́', Open O with acute 533 | // 'Ɔ̧́', Open O with acute and cedilla 534 | // 'Ɔ̌', Open O with caron 535 | // 'Ɔ̧̌', Open O with caron and cedilla 536 | // 'Ɔ̧', Open O with cedilla 537 | // 'Ɔ̂', Open O with circumflex 538 | // 'Ɔ̧̂', Open O with circumflex and cedilla 539 | // 'Ɔ̈', Open O with diaeresis 540 | // 'Ɔ̀', Open O with grave 541 | // 'Ɔ̧̀', Open O with grave and cedilla 542 | // 'Ɔ̄', Open O with macron 543 | // 'ᶗ', Open O with retroflex hook 544 | // 'Ɔ̃', Open O with tilde 545 | // 'Ɔ̃́', Open O with tilde and acute 546 | // 'Ɔ̃̌', Open O with tilde and caron 547 | // 'Ɔ̃̂', Open O with tilde and circumflex 548 | // 'Ɔ̃̀', Open O with tilde and grave 549 | // 'Ɔ̃̍', Open O with tilde and verticale line 550 | // 'Ɔ̱', Open O with tilde below 551 | // 'Ɔ̍', Open O with vertical line 552 | 'Ṕ' => '\\\'{P}', // P with acute 553 | 'P̓' => 'P', // P with comma above 554 | 'P̈' => '\\"{P}', // P with diaeresis 555 | 'P̤' => 'P', // P with diaeresis below 556 | 'Ṗ' => '\\.{P}', // P with dot above 557 | 'P̣' => 'P', // P with dot above below 558 | 'Ꝓ' => 'P', // P with flourish 559 | 'P̀' => '\\`{P}', // P with grave 560 | 'Ƥ' => 'P', // P with hook 561 | 'P̄' => '\\={P}', // P with macron 562 | 'ᵱ' => 'P', // P with middle tilde 563 | 'ᶈ' => 'P', // P with palatal hook 564 | 'Ꝕ' => 'P', // P with squirrel tail 565 | 'Ᵽ' => 'P', // P with stroke 566 | 'Ꝑ' => 'P', // P with stroke through descender 567 | 'P̄' => '\\~{P}', // P with tilde 568 | 'Q̓' => 'Q', // Q with comma above 569 | 'Ꝙ' => 'Q', // Q with diagonal stroke 570 | 'Q̇' => '\\.{Q}', // Q with dot above 571 | 'ʠ' => 'Q', // Q with hook 572 | 'Ɋ' => 'Q', // Q with hook tail 573 | 'Ꝗ' => 'Q', // Q with stroke through descender 574 | 'Ŕ' => '\\\'{R}', // R with acute 575 | 'Ř' => '\\v{R}', // R with caron 576 | 'Ŗ' => '\\c{R}', // R with cedilla 577 | 'R̂' => '\\^{R}', // R with circumflex 578 | 'R̓' => 'R', // R with comma above 579 | 'R̦' => 'R', // R with comma below 580 | 'R̰' => 'R', // R with diaeresis below 581 | 'Ṙ' => '\\.{R}', // R with dot above 582 | 'Ṛ' => '\\d{R}', // R with dot below 583 | 'Ṝ' => 'R', // R with dot below and macron 584 | 'Ȑ' => 'R', // R with double grave 585 | 'ɾ' => 'R', // R with fishhook 586 | 'ᵳ' => 'R', // R with fishhook and middle tilde 587 | 'Ȓ' => 'R', // R with inverted breve 588 | 'Ṟ' => 'R', // R with line below 589 | 'ɼ' => 'R', // R with long leg 590 | 'R̄' => '\\={R}', // R with macron 591 | 'ᵲ' => 'R', // R with middle tilde 592 | 'Ꞧ' => 'R', // R with oblique stroke 593 | 'ᶉ' => 'R', // R with palatal hook 594 | 'R̥' => 'R', // R with ring below 595 | 'R̥̄' => 'R', // R with ring below and macron 596 | 'Ɍ' => 'R', // R with stroke 597 | 'Ɽ' => 'R', // R with tail 598 | 'R̃' => '\\~{R}', // R with tilde 599 | // 'Ꜿ', Reversed C with dot 600 | // 'ɿ', Reversed R with fishhook 601 | // 'ʢ', Reversed glottal stop with stroke 602 | // 'ɝ', Reversed open E with hook (Reversed Epsilon hook) 603 | // 'ᶔ', Reversed open E with retroflex hook 604 | 'Ś' => '\\\'{S}', // S with acute 605 | 'Ṥ' => 'S', // S with acute and dot above 606 | 'Š' => '\\v{S}', // S with caron 607 | 'Ṧ' => 'S', // S with caron and dot above 608 | 'Ş' => '\\c{S}', // S with cedilla 609 | 'Ŝ' => '\\^{S}', // S with circumflex 610 | 'Ș' => 'S', // S with comma below 611 | 'Ṡ' => '\\.{S}', // S with dot above 612 | 'Ṣ' => '\\d{S}', // S with dot below 613 | 'Ṩ' => 'S', // S with dot below and dot above 614 | 'ʂ' => 'S', // S with hook 615 | 'ẞ' => 'S', // S with middle tilde 616 | 'Ꞩ' => 'S', // S with oblique stroke 617 | 'ᶊ' => 'S', // S with palatal hook 618 | 'Ȿ' => 'S', // S with swash tail 619 | // 'ɚ', Schwa with hook 620 | // 'ᶕ', Schwa with retroflex hook 621 | // 'ᴓ', Sideways O with stroke 622 | // 'ʛ', Small capital G with hook 623 | // 'ᵾ', Small capital U with stroke 624 | 'Ť' => '\\v{T}', // T with caron 625 | 'Ţ' => '\\c{T}', // T with cedilla 626 | 'Ṱ' => 'T', // T with circumflex below 627 | 'Ț' => 'T', // T with comma below 628 | 'ȶ' => 'T', // T with curl 629 | 'T̈' => '\\"{T}', // T with diaeresis 630 | 'Ⱦ' => 'T', // T with diagonal stroke 631 | 'Ṫ' => '\\.{T}', // T with dot above 632 | 'Ṭ' => '\\d{T}', // T with dot below 633 | 'Ƭ' => 'T', // T with hook 634 | 'Ṯ' => 'T', // T with line below 635 | 'ᵵ' => 'T', // T with middle tilde 636 | 'ƫ' => 'T', // T with palatal hook 637 | 'Ʈ' => 'T', // T with retroflex hook 638 | 'Ŧ' => 'T', // T with stroke 639 | // 'Ꝥ', Thorn with stroke 640 | // 'Ꝧ', Thorn with stroke through descender 641 | // 'ʮ', Turned H with fishhook 642 | // 'ʯ', Turned H with fishhook and tail 643 | // 'ɰ', Turned M with long leg 644 | // 'ɻ', Turned R with hook 645 | // 'ɺ', Turned R with long leg 646 | // 'ⱹ', Turned R with tail 647 | // 'ƻ', Two with stroke 648 | 'Ʉ' => 'U', // U bar 649 | 'Ú' => '\\\'{U}', // U with acute 650 | 'Ŭ' => '\\u{U}', // U with breve 651 | 'Ǔ' => '\\v{U}', // U with caron 652 | 'Û' => '\\^{U}', // U with circumflex 653 | 'Ṷ' => 'U', // U with circumflex below 654 | 'Ü' => '\\"{U}', // U with diaeresis 655 | 'Ǘ' => 'U', // U with diaeresis and acute 656 | 'Ǚ' => 'U', // U with diaeresis and caron 657 | 'Ǜ' => 'U', // U with diaeresis and grave 658 | 'Ǖ' => 'U', // U with diaeresis and macron 659 | 'Ṳ' => 'U', // U with diaeresis below 660 | 'Ụ' => '\\d{U}', // U with dot below 661 | 'Ű' => '\\H{U}', // U with double acute 662 | 'Ȕ' => 'U', // U with double grave 663 | 'Ù' => '\\`{U}', // U with grave 664 | 'Ủ' => 'U', // U with hook above 665 | 'Ư' => 'U', // U with horn 666 | 'Ứ' => 'U', // U with horn and acute 667 | 'Ự' => 'U', // U with horn and dot below 668 | 'Ừ' => 'U', // U with horn and grave 669 | 'Ử' => 'U', // U with horn and hook above 670 | 'Ữ' => 'U', // U with horn and tilde 671 | 'Ȗ' => 'U', // U with inverted breve 672 | 'Ū' => '\\={U}', // U with macron 673 | 'Ṻ' => 'U', // U with macron and diaeresis 674 | 'Ų' => '\\k{U}', // U with ogonek 675 | 'ᶙ' => 'U', // U with retroflex hook 676 | 'Ů' => '\\r{U}', // U with ring above 677 | 'Ũ' => '\\~{U}', // U with tilde 678 | 'Ṹ' => 'U', // U with tilde and acute 679 | 'Ṵ' => 'U', // U with tilde below 680 | // 'ᵿ', Upsilon with stroke 681 | 'ⱴ' => 'V', // V with curl 682 | 'Ꝟ' => 'V', // V with diagonal stroke 683 | 'Ṿ' => '\\d{V}', // V with dot below 684 | 'Ʋ' => 'V', // V with hook (Script V) 685 | 'ᶌ' => 'V', // V with palatal hook 686 | 'ⱱ' => 'V', // V with right hook 687 | 'Ṽ' => '\\~{V}', // V with tilde 688 | 'Ẃ' => '\\\'{W}', // W with acute 689 | 'Ŵ' => '\\^{W}', // W with circumflex 690 | 'Ẅ' => '\\"{W}', // W with diaeresis 691 | 'Ẇ' => '\\.{W}', // W with dot above 692 | 'Ẉ' => '\\d{W}', // W with dot below 693 | 'Ẁ' => '\\`{W}', // W with grave 694 | 'Ⱳ' => 'W', // W with hook 695 | 'W̊' => '\\r{W}', // W with ring above 696 | 'Ẍ' => '\\"{X}', // X with diaeresis 697 | 'Ẋ' => '\\.{X}', // X with dot above 698 | 'ᶍ' => 'X', // X with palatal hook 699 | 'Ý' => '\\\'{Y}', // Y with acute 700 | 'Ŷ' => '\\^{Y}', // Y with circumflex 701 | 'Ÿ' => '\\"{Y}', // Y with diaeresis 702 | 'Ẏ' => '\\.{Y}', // Y with dot above 703 | 'Ỵ' => '\\d{Y}', // Y with dot below 704 | 'Ỳ' => '\\`{Y}', // Y with grave 705 | 'Ƴ' => 'Y', // Y with hook 706 | 'Ỷ' => 'Y', // Y with hook above 707 | 'Ỿ' => 'Y', // Y with loop 708 | 'Ȳ' => '\\={Y}', // Y with macron 709 | 'Y̊' => '\\r{Y}', // Y with ring above 710 | 'Ɏ' => 'Y', // Y with stroke 711 | 'Ỹ' => '\\~{Y}', // Y with tilde 712 | 'Ź' => '\\\'{Z}', // Z with acute 713 | 'Ž' => '\\v{Z}', // Z with caron 714 | 'Ẑ' => '\\^{Z}', // Z with circumflex 715 | 'ʑ' => 'Z', // Z with curl 716 | 'Ⱬ' => 'Z', // Z with descender 717 | 'Ż' => '\\.{Z}', // Z with dot above 718 | 'Ẓ' => '\\d{Z}', // Z with dot below 719 | 'Ȥ' => 'Z', // Z with hook 720 | 'Ẕ' => 'Z', // Z with line below 721 | 'ᵶ' => 'Z', // Z with middle tilde 722 | 'ᶎ' => 'Z', // Z with palatal hook 723 | 'ʐ' => 'Z', // Z with retroflex hook 724 | 'Ƶ' => 'Z', // Z with stroke 725 | 'Ɀ' => 'Z', // Z with swash tail 726 | 'á' => '\\\'{a}', // a with acute 727 | 'á̧' => 'a', // a with acute and cedilla 728 | 'ạ́' => 'a', // a with acute and dot below 729 | 'ą́' => 'a', // a with acute and ogonek 730 | 'ą̌' => 'a', // a with acute and ogonek 731 | 'ă' => '\\u{a}', // a with breve 732 | 'ắ' => 'a', // a with breve and acute 733 | 'ặ' => 'a', // a with breve and dot below 734 | 'ằ' => 'a', // a with breve and grave 735 | 'ẳ' => 'a', // a with breve and hook above 736 | 'ẵ' => 'a', // a with breve and tilde 737 | 'ǎ' => '\\v{a}', // a with caron 738 | 'ǎ̧' => 'a', // a with caron and cedilla 739 | 'a̧' => '\\c{a}', // a with cedilla 740 | 'a̐' => 'a', // a with chandrabindu 741 | 'â' => '\\^{a}', // a with circumflex 742 | 'ấ' => 'a', // a with circumflex and acute 743 | 'â̧' => 'a', // a with circumflex and cedilla 744 | 'ậ' => 'a', // a with circumflex and dot below 745 | 'ầ' => 'a', // a with circumflex and grave 746 | 'ẩ' => 'a', // a with circumflex and hook above 747 | 'ą̂' => 'a', // a with circumflex and ogonek 748 | 'ẫ' => 'a', // a with circumflex and tilde 749 | 'a̭' => 'a', // a with circumflex below 750 | 'a̓' => 'a', // a with comma above 751 | 'ä' => '\\"{a}', // a with diaeresis 752 | 'ä́' => 'a', // a with diaeresis and acute 753 | 'ä̌' => 'a', // a with diaeresis and caron 754 | 'ä̂' => 'a', // a with diaeresis and circumflex 755 | 'ạ̈' => 'a', // a with diaeresis and dot below 756 | 'ä̀' => 'a', // a with diaeresis and grave 757 | 'ǟ' => 'a', // a with diaeresis and macron 758 | 'ą̈' => 'a', // a with diaeresis and ogonek 759 | 'ą̈̌' => 'a', // a with diaeresis, caron and ogonek 760 | 'ą̈̂' => 'a', // a with diaeresis, circumflex and ogonek 761 | 'ą̈̀' => 'a', // a with diaeresis, grave and ogonek 762 | 'ȧ' => '\\.{a}', // a with dot above 763 | 'ȧ́' => 'a', // a with dot above and acute 764 | 'ǡ' => 'a', // a with dot above and macron 765 | 'ạ' => '\\d{a}', // a with dot below 766 | 'a̋' => '\\H{a}', // a with double acute 767 | 'ȁ' => 'a', // a with double grave 768 | 'à' => '\\`{a}', // a with grave 769 | 'à̧' => 'a', // a with grave and cedilla 770 | 'ạ̀' => 'a', // a with grave and dot below 771 | 'ą̀' => 'a', // a with grave and ogonek 772 | 'ả' => 'a', // a with hook above 773 | 'ȃ' => 'a', // a with inverted breve 774 | 'ā' => '\\={a}', // a with macron 775 | 'ā́' => 'a', // a with macron and acute 776 | 'ā̆' => 'a', // a with macron and breve 777 | 'ā̌' => 'a', // a with macron and caron 778 | 'ā̂' => 'a', // a with macron and circumflex 779 | 'ạ̄' => 'a', // a with macron and dot below 780 | 'ā̀' => 'a', // a with macron and grave 781 | 'ą̄' => 'a', // a with macron and ogonek 782 | 'ā̊' => 'a', // a with macron and ring above 783 | 'a̱' => 'a', // a with macron below 784 | 'á̱' => 'a', // a with macron below and acute 785 | 'â̱' => 'a', // a with macron below and circumflex 786 | 'ä̱' => 'a', // a with macron below and diaeresis 787 | 'à̱' => 'a', // a with macron below and grave 788 | 'ā̱' => 'a', // a with macron below and macron 789 | 'å̱' => 'a', // a with macron below and ring above 790 | 'ą̄́' => 'a', // a with macron, acute and ogonek 791 | 'ą̄̌' => 'a', // a with macron, caron and ogonek 792 | 'ą̄̂' => 'a', // a with macron, circumflex and ogonek 793 | 'ą̄̀' => 'a', // a with macron, grave and ogonek 794 | 'ą' => '\\k{a}', // a with ogonek 795 | 'a᷎' => 'a', // a with ogonek above 796 | 'å' => '\\r{a}', // a with ring above 797 | 'ǻ' => 'a', // a with ring above and acute 798 | 'å̂' => 'a', // a with ring above and circumflex 799 | 'ą̊' => 'a', // a with ring above and ogonek 800 | 'ḁ' => 'a', // a with ring below 801 | 'ⱥ' => 'a', // a with stroke 802 | 'ã' => '\\~{a}', // a with tilde 803 | 'ã́' => 'a', // a with tilde and acute 804 | 'ạ̃' => 'a', // a with tilde and dot below 805 | 'ã̀' => 'a', // a with tilde and grave 806 | 'ą̃' => 'a', // a with tilde and ogonek 807 | 'a̰' => 'a', // a with tilde below 808 | 'a̍' => 'a', // a with vertical line 809 | // 'ɖ', african D, D with tail 810 | // 'ɑ́', alpha with acute 811 | // 'ɑ̌', alpha with caron 812 | // 'ɑ̂', alpha with circumflex 813 | // 'ɑ̀', alpha with grave 814 | 'b́' => '\\\'{b}', // b with acute 815 | 'b̓' => 'b', // b with comma above 816 | 'b̤' => 'b', // b with diaeresis below 817 | 'ḃ' => '\\.{b}', // b with dot above 818 | 'ḅ' => '\\d{b}', // b with dot below 819 | 'ɓ' => 'b', // b with hook 820 | 'ḇ' => 'b', // b with line below 821 | 'ƀ' => 'b', // b with stroke 822 | 'b̃' => '\\~{b}', // b with tilde 823 | 'ƃ' => 'b', // b with topbar 824 | 'ć' => '\\\'{c}', // c with acute 825 | 'ꞓ' => 'c', // c with bar 826 | 'c̆' => '\\u{c}', // c with breve 827 | 'č' => '\\v{c}', // c with caron 828 | 'č̓' => 'c', // c with caron and comma above 829 | 'ç' => '\\c{c}', // c with cedilla 830 | 'ḉ' => 'c', // c with cedilla and acute 831 | 'ç̆' => 'c', // c with cedilla and breve 832 | 'ç̌' => 'c', // c with cedilla and caron 833 | 'ç̇' => 'c', // c with cedilla and dot above 834 | 'ĉ' => '\\^{c}', // c with circumflex 835 | 'c̓' => 'c', // c with comma above 836 | 'c̈' => '\\"{c}', // c with diaeresis 837 | 'ċ' => '\\.{c}', // c with dot above 838 | 'c̣' => '\\d{c}', // c with dot below 839 | 'c̀' => '\\`{c}', // c with grave 840 | 'ƈ' => 'c', // c with hook 841 | 'c̄' => '\\={c}', // c with macron 842 | 'ȼ' => 'c', // c with stroke 843 | 'c̃' => '\\~{c}', // c with tilde 844 | // 'ꜯ', cuatrillo with comma 845 | 'd́' => '\\\'{d}', // d with acute 846 | 'ď' => '\\v{d}', // d with caron 847 | 'ḑ' => '\\c{d}', // d with cedilla 848 | 'd̂' => '\\^{d}', // d with circumflex 849 | 'ḓ' => 'd', // d with circumflex below 850 | 'ḓ' => 'd', // d with circumflex below 851 | 'd̓' => 'd', // d with comma above 852 | 'd̦' => 'd', // d with comma below 853 | 'd̤' => 'd', // d with diaeresis below 854 | 'ḋ' => '\\.{d}', // d with dot above 855 | 'ḍ' => '\\d{d}', // d with dot below 856 | 'ɗ' => 'd', // d with hook 857 | 'ḏ' => 'd', // d with line below 858 | 'đ' => 'd', // d with stroke 859 | 'ƌ' => 'd', // d with topbar 860 | 'é' => '\\\'{e}', // e with acute 861 | 'ȩ́' => 'e', // e with acute and cedilla 862 | 'ẹ́' => 'e', // e with acute and dot below 863 | 'é̱' => 'e', // e with acute and macron below 864 | 'ę́' => 'e', // e with acute and ogonek 865 | 'ĕ' => '\\u{e}', // e with breve 866 | 'ḝ' => 'e', // e with breve and cedilla 867 | 'ḝ' => 'e', // e with breve and cedilla 868 | 'ě' => '\\v{e}', // e with caron 869 | 'ȩ̌' => 'e', // e with caron and cedilla 870 | 'ę̌' => 'e', // e with caron and ogonek 871 | 'ȩ' => '\\c{e}', // e with cedilla 872 | 'ȩ' => '\\c{e}', // e with cedilla 873 | 'ê' => '\\^{e}', // e with circumflex 874 | 'ế' => 'e', // e with circumflex and acute 875 | 'ȩ̂' => 'e', // e with circumflex and cedilla 876 | 'ệ' => 'e', // e with circumflex and dot below 877 | 'ề' => 'e', // e with circumflex and grave 878 | 'ể' => 'e', // e with circumflex and hook above 879 | 'ê̱' => 'e', // e with circumflex and macron below 880 | 'ę̂' => 'e', // e with circumflex and ogonek 881 | 'ễ' => 'e', // e with circumflex and tilde 882 | 'ḙ' => 'e', // e with circumflex below 883 | 'ë' => '\\"{e}', // e with diaeresis 884 | 'ë́' => 'e', // e with diaeresis and acute 885 | 'ë̌' => 'e', // e with diaeresis and caron 886 | 'ë̂' => 'e', // e with diaeresis and circumflex 887 | 'ë̀' => 'e', // e with diaeresis and grave 888 | 'ë̱' => 'e', // e with diaeresis and macron below 889 | 'ę̈' => 'e', // e with diaeresis and ogonek 890 | 'ę̈̌' => 'e', // e with diaeresis, caron and ogonek 891 | 'ę̈̂' => 'e', // e with diaeresis, circumflex and ogonek 892 | 'ę̈̀' => 'e', // e with diaeresis, grave and ogonek 893 | 'ė' => '\\.{e}', // e with dot above 894 | 'ė́' => 'e', // e with dot above and acute 895 | 'ę̇' => 'e', // e with dot above and ogonek 896 | 'ę̇́' => 'e', // e with dot above, acute and ogonek 897 | 'ė̃' => 'e', // e with dot and macron 898 | 'ẹ' => '\\d{e}', // e with dot below 899 | 'e̋' => '\\H{e}', // e with double acute 900 | 'ę̋' => 'e', // e with double acute and ogonek 901 | 'ȅ' => 'e', // e with double grave 902 | 'ȅ' => 'e', // e with double grave 903 | 'è' => '\\`{e}', // e with grave 904 | 'ȩ̀' => 'e', // e with grave and cedilla 905 | 'ẹ̀' => 'e', // e with grave and dot below 906 | 'è̱' => 'e', // e with grave and macron below 907 | 'ę̀' => 'e', // e with grave and ogonek 908 | 'ẻ' => 'e', // e with hook above 909 | 'ȇ' => 'e', // e with inverted breve 910 | 'ē' => '\\={e}', // e with macron 911 | 'ḗ' => 'e', // e with macron and acute 912 | 'ē̆' => 'e', // e with macron and breve 913 | 'ē̌' => 'e', // e with macron and caron 914 | 'ē̂' => 'e', // e with macron and circumflex 915 | 'ẹ̄' => 'e', // e with macron and dot below 916 | 'ḕ' => 'e', // e with macron and grave 917 | 'ē̱' => 'e', // e with macron and macron below 918 | 'ę̄' => 'e', // e with macron and ogonek 919 | 'e̱' => 'e', // e with macron below 920 | 'ę̄́' => 'e', // e with macron, acute and ogonek 921 | 'ę̄̌' => 'e', // e with macron, caron and ogonek 922 | 'ę̄̂' => 'e', // e with macron, circumflex and ogonek 923 | 'ę̄̀' => 'e', // e with macron, grave and ogonek 924 | 'ę' => '\\k{e}', // e with ogonek 925 | 'e᷎' => 'e', // e with ogonek above 926 | 'ę᷎' => 'e', // e with ogonek above and ogonek 927 | 'ę̣' => 'e', // e with ogonek and dot below 928 | 'e̊' => 'e', // e with ring 929 | 'ɇ' => 'e', // e with stroke 930 | 'ẽ' => '\\~{e}', // e with tilde 931 | 'ẽ́' => 'e', // e with tilde and acute 932 | 'ẽ̌' => 'e', // e with tilde and caron 933 | 'ẽ̂' => 'e', // e with tilde and circumflex 934 | 'ẽ̀' => 'e', // e with tilde and grave 935 | 'ę̃' => 'e', // e with tilde and ogonek 936 | 'ẽ̍' => 'e', // e with tilde and vertical line 937 | 'ḛ' => 'e', // e with tilde below 938 | 'e̍' => 'e', // e with vertical line 939 | // 'ǯ', ezh with caron 940 | 'f́' => '\\\'{f}', // f with acute 941 | 'f̧' => '\\c{f}', // f with cedilla 942 | 'ḟ' => '\\.{f}', // f with dot above 943 | 'f̣' => '\\d{f}', // f with dot below 944 | 'f̀' => '\\`{f}', // f with grave 945 | 'ƒ' => 'f', // f with hook (Script F) 946 | 'f̄' => '\\={f}', // f with macron 947 | 'ǵ' => '\\\'{g}', // g with acute 948 | 'ğ' => '\\u{g}', // g with breve 949 | 'ǧ' => '\\v{g}', // g with caron 950 | 'ģ' => '\\c{g}', // g with cedilla 951 | 'ĝ' => '\\^{g}', // g with circumflex 952 | 'g̈' => '\\"{g}', // g with diaeresis 953 | 'g̤' => 'g', // g with diaeresis below 954 | 'ġ' => '\\.{g}', // g with dot above 955 | 'g̣' => '\\d{g}', // g with dot below 956 | 'g̀' => '\\`{g}', // g with grave 957 | 'ɠ' => 'g', // g with hook 958 | 'ḡ' => '\\={g}', // g with macron 959 | 'ꞡ' => 'g', // g with oblique stroke 960 | 'ǥ' => 'g', // g with stroke 961 | 'g̃' => '\\~{g}', // g with tilde 962 | // 'ɣ̓', gamma with comma above 963 | 'h́' => '\\\'{h}', // h with acute 964 | 'ḫ' => 'h', // h with breve below 965 | 'ȟ' => '\\v{h}', // h with caron 966 | 'ḩ' => '\\c{h}', // h with cedilla 967 | 'h̐' => 'h', // h with chandrabindu 968 | 'ĥ' => '\\^{h}', // h with circumflex 969 | 'h̓' => 'h', // h with comma above 970 | 'ⱨ' => 'h', // h with descender 971 | 'ḧ' => '\\"{h}', // h with diaeresis 972 | 'h̤' => 'h', // h with diaeresis below 973 | 'ḣ' => '\\.{h}', // h with dot above 974 | 'ḣ' => '\\.{h}', // h with dot above 975 | 'ḥ' => '\\d{h}', // h with dot below 976 | 'ɦ' => 'h', // h with hook 977 | 'ẖ' => 'h', // h with line below 978 | 'h̄' => '\\={h}', // h with macron 979 | 'ħ' => 'h', // h with stroke 980 | // 'ꜧ', heng 981 | 'ı' => 'i', // i (lowercase, i.e. ı) without dot above 982 | 'i' => 'i', // i (uppercase) with dot above 983 | 'í' => '\\\'{i}', // i with acute 984 | 'ĭ' => '\\u{i}', // i with breve 985 | 'ǐ' => '\\v{i}', // i with caron 986 | 'i̧' => '\\c{i}', // i with cedilla 987 | 'í̧' => 'i', // i with cedilla and acute 988 | 'î̧' => 'i', // i with cedilla and circumflex 989 | 'ì̧' => 'i', // i with cedilla and grave 990 | 'i̐' => 'i', // i with chandrabindu 991 | 'î' => '\\^{i}', // i with circumflex 992 | 'î́' => 'i', // i with circumflex and acute 993 | 'i̓' => 'i', // i with comma above 994 | 'ï' => '\\"{i}', // i with diaeresis 995 | 'ḯ' => 'i', // i with diaeresis and acute 996 | 'ị' => '\\d{i}', // i with dot below 997 | 'ị́' => 'i', // i with dot below and acute 998 | 'ị̂' => 'i', // i with dot below and circumflex 999 | 'ị̃' => 'i', // i with dot below and tilde 1000 | 'i̋' => '\\H{i}', // i with double acute 1001 | 'ȉ' => 'i', // i with double grave 1002 | 'ȉ' => 'i', // i with double grave 1003 | 'ì' => '\\`{i}', // i with grave 1004 | 'ỉ' => 'i', // i with hook above 1005 | 'ȋ' => 'i', // i with inverted breve 1006 | 'i̱' => 'i', // i with line below 1007 | 'í̱' => 'i', // i with line below and acute 1008 | 'î̱' => 'i', // i with line below and circumflex 1009 | 'ì̱' => 'i', // i with line below and grave 1010 | 'ī̱' => 'i', // i with line below and macron 1011 | 'ī' => '\\={i}', // i with macron 1012 | 'ī́' => 'i', // i with macron and acute 1013 | 'ī̌' => 'i', // i with macron and caron 1014 | 'ī̂' => 'i', // i with macron and circumflex 1015 | 'ī̀' => 'i', // i with macron and grave 1016 | 'į' => '\\k{i}', // i with ogonek 1017 | 'į́' => 'i', // i with ogonek and acute 1018 | 'į̌' => 'i', // i with ogonek and caron 1019 | 'į̂' => 'i', // i with ogonek and circumflex 1020 | 'į̀' => 'i', // i with ogonek and grave 1021 | 'į̃' => 'i', // i with ogonek and tilde 1022 | 'ɨ' => 'i', // i with stroke 1023 | 'ɨ́' => 'i', // i with stroke and acute 1024 | 'ɨ̌' => 'i', // i with stroke and caron 1025 | 'ɨ̧' => 'i', // i with stroke and cedilla 1026 | 'ɨ̂' => 'i', // i with stroke and circumflex 1027 | 'ɨ̀' => 'i', // i with stroke and grave 1028 | 'ɨ̄' => 'i', // i with stroke and macron 1029 | 'ɨ̃' => 'i', // i with stroke and tilde 1030 | 'ɨ̧̌' => 'i', // i with stroke, cedilla and caron 1031 | 'ɨ̧̀' => 'i', // i with stroke, cedilla and grave 1032 | 'ɨ̧̂' => 'i', // i with stroke, cedilla, and circumflex 1033 | 'ĩ' => '\\~{i}', // i with tilde 1034 | 'ĩ́' => 'i', // i with tilde and acute 1035 | 'ĩ̌' => 'i', // i with tilde and caron 1036 | 'ĩ̂' => 'i', // i with tilde and circumflex 1037 | 'ĩ̀' => 'i', // i with tilde and grave 1038 | 'ĩ̍' => 'i', // i with tilde and vertical line 1039 | 'ḭ' => 'i', // i with tilde below 1040 | 'i̍' => 'i', // i with vertical line 1041 | // 'ꝼ́', insular F with acute 1042 | // 'ꝼ̇', insular F with dot above 1043 | // 'ꝼ̣', insular F with dot below 1044 | // 'ɩ́', iota with acute 1045 | // 'ɩ̀', iota with grave 1046 | // 'ɩ̃', iota with tilde 1047 | 'j́' => '\\\'{j}', // j with acute 1048 | 'ǰ' => '\\v{j}', // j with caron 1049 | 'ĵ' => '\\^{j}', // j with circumflex 1050 | 'j̣' => '\\d{j}', // j with dot below 1051 | 'j̄' => '\\={j}', // j with macron 1052 | 'ɉ' => 'j', // j with stroke 1053 | 'j̃' => '\\~{j}', // j with tilde 1054 | 'ḱ' => '\\\'{k}', // k with acute 1055 | 'ǩ' => '\\v{k}', // k with caron 1056 | 'ķ' => '\\c{k}', // k with cedilla 1057 | 'ⱪ' => 'k', // k with descender 1058 | 'ꝃ' => 'k', // k with diagonal stroke 1059 | 'k̇' => '\\.{k}', // k with dot above 1060 | 'ḳ' => '\\d{k}', // k with dot below 1061 | 'k̀' => '\\`{k}', // k with grave 1062 | 'ƙ' => 'k', // k with hook 1063 | 'ḵ' => 'k', // k with line below 1064 | 'k̄' => '\\={k}', // k with macron 1065 | 'ꞣ' => 'k', // k with oblique stroke 1066 | 'ꝁ' => 'k', // k with stroke 1067 | 'ꝅ' => 'k', // k with stroke and diagonal stroke 1068 | 'ĺ' => '\\\'{l}', // l with acute 1069 | 'ḷ́' => 'l', // l with acute and dot below 1070 | 'ƚ' => 'l', // l with bar 1071 | 'ľ' => '\\v{l}', // l with caron 1072 | 'ļ' => '\\c{l}', // l with cedilla 1073 | 'l̐' => 'l', // l with chandrabindu 1074 | 'l̂' => '\\^{l}', // l with circumflex 1075 | 'ḽ' => 'l', // l with circumflex below 1076 | 'l̓' => 'l', // l with comma above 1077 | 'ḷ̓' => 'l', // l with comma above and dot below 1078 | 'l̦' => 'l', // l with comma below 1079 | 'ḷ' => '\\d{l}', // l with dot below 1080 | 'ḹ' => 'l', // l with dot below and macron 1081 | 'ⱡ' => 'l', // l with double bar 1082 | 'ꝉ' => 'l', // l with high stroke 1083 | 'ḻ' => 'l', // l with line below 1084 | 'ɫ' => 'l', // l with middle tilde 1085 | 'ł' => 'l', // l with stroke 1086 | 'l̃' => '\\~{l}', // l with tilde 1087 | 'ḿ' => '\\\'{m}', // m with acute 1088 | 'ṃ́' => 'm', // m with acute and dot below 1089 | 'm̧' => '\\c{m}', // m with cedilla 1090 | 'm̐' => 'm', // m with chandrabindu 1091 | 'm̓' => 'm', // m with comma above 1092 | 'ṃ̓' => 'm', // m with comma above and dot below 1093 | 'm̦' => 'm', // m with comma below 1094 | 'm̈' => '\\"{m}', // m with diaeresis 1095 | 'ṁ' => '\\.{m}', // m with dot above 1096 | 'ṃ' => '\\d{m}', // m with dot below 1097 | 'ṃ' => '\\d{m}', // m with dot below 1098 | 'm̀' => '\\`{m}', // m with grave 1099 | 'ɱ' => 'm', // m with hook 1100 | 'm̄' => '\\={m}', // m with macron 1101 | 'm̨' => '\\k{m}', // m with ogonek 1102 | 'm̃' => '\\~{m}', // m with tilde 1103 | 'm̍' => 'm', // m with vertical line 1104 | 'ń' => '\\\'{n}', // n with acute 1105 | 'ṇ́' => 'n', // n with acute and dot below 1106 | 'ň' => '\\v{n}', // n with caron 1107 | 'ņ' => '\\c{n}', // n with cedilla 1108 | 'n̐' => 'n', // n with chandrabindu 1109 | 'n̂' => '\\^{n}', // n with circumflex 1110 | 'ṋ' => 'n', // n with circumflex below 1111 | 'ṇ̓' => 'n', // n with comma above and dot below 1112 | 'n̦' => 'n', // n with comma below 1113 | 'ꞑ' => 'n', // n with descender 1114 | 'n̈' => 'n', // n with diaresis 1115 | 'ṅ' => '\\.{n}', // n with dot above 1116 | 'ṇ' => '\\d{n}', // n with dot below 1117 | 'ǹ' => '\\`{n}', // n with grave 1118 | 'ɲ' => 'n', // n with left hook 1119 | 'ṉ' => 'n', // n with line below 1120 | 'ƞ' => 'n', // n with long right leg 1121 | 'n̄' => '\\={n}', // n with macron 1122 | 'ꞥ' => 'n', // n with oblique stroke 1123 | 'ñ̈' => 'n', // n with tidle and diaeresis 1124 | 'ñ' => '\\~{n}', // n with tilde 1125 | 'n̰' => 'n', // n with tilde below 1126 | 'n̲' => '\\b{n}', // n with underline 1127 | 'n̍' => 'n', // n with vertical line 1128 | 'ó' => '\\\'{o}', // o with acute 1129 | 'ó̧' => 'o', // o with acute and cedilla 1130 | 'ọ́' => 'o', // o with acute and dot below 1131 | 'ó̱' => 'o', // o with acute and line below 1132 | 'ǫ́' => 'o', // o with acute and ogonek 1133 | 'ɵ' => 'o', // o with bar 1134 | 'ŏ' => '\\u{o}', // o with breve 1135 | 'ǒ' => '\\v{o}', // o with caron 1136 | 'ǒ̧' => 'o', // o with caron and cedilla 1137 | 'ǫ̌' => 'o', // o with caron and ogonek 1138 | 'o̧' => '\\c{o}', // o with cedilla 1139 | 'o̐' => 'o', // o with chandrabindu 1140 | 'ô' => '\\^{o}', // o with circumflex 1141 | 'ố' => 'o', // o with circumflex and acute 1142 | 'ô̧' => 'o', // o with circumflex and cedilla 1143 | 'ộ' => 'o', // o with circumflex and dot below 1144 | 'ộ' => 'o', // o with circumflex and dot below 1145 | 'ồ' => 'o', // o with circumflex and grave 1146 | 'ổ' => 'o', // o with circumflex and hook above 1147 | 'ô̱' => 'o', // o with circumflex and line below 1148 | 'ǫ̂' => 'o', // o with circumflex and ogonek 1149 | 'ỗ' => 'o', // o with circumflex and tilde 1150 | 'o̭' => 'o', // o with circumflex below 1151 | 'ö' => '\\"{o}', // o with diaeresis 1152 | 'ö́' => 'o', // o with diaeresis and acute 1153 | 'ö̀' => 'o', // o with diaeresis and grave 1154 | 'ö̱' => 'o', // o with diaeresis and line below 1155 | 'ȫ' => 'o', // o with diaeresis and macron 1156 | 'ȯ' => '\\.{o}', // o with dot above 1157 | 'ȱ' => 'o', // o with dot above and macron 1158 | 'o̍͘' => 'o', // o with dot above and vertical line 1159 | 'o͘' => 'o', // o with dot above right 1160 | 'ó͘' => 'o', // o with dot above right and acute 1161 | 'ò͘' => 'o', // o with dot above right and grave 1162 | 'ō͘' => 'o', // o with dot above right and macron 1163 | 'ọ' => '\\d{o}', // o with dot below 1164 | 'ő' => '\\H{o}', // o with double acute 1165 | 'ő' => '\\H{o}', // o with double acute 1166 | 'ȍ' => 'o', // o with double grave 1167 | 'ò' => '\\`{o}', // o with grave 1168 | 'ò̧' => 'o', // o with grave and cedilla 1169 | 'ọ̀' => 'o', // o with grave and dot below 1170 | 'ò̱' => 'o', // o with grave and line below 1171 | 'ǫ̀' => 'o', // o with grave and ogonek 1172 | 'ỏ' => 'o', // o with hook above 1173 | 'ơ' => 'o', // o with horn 1174 | 'ớ' => 'o', // o with horn and acute 1175 | 'ợ' => 'o', // o with horn and dot below 1176 | 'ờ' => 'o', // o with horn and grave 1177 | 'ở' => 'o', // o with horn and hook above 1178 | 'ỡ' => 'o', // o with horn and tilde 1179 | 'ȏ' => 'o', // o with inverted breve 1180 | 'o̱' => 'o', // o with line below 1181 | 'ꝋ' => 'o', // o with long stroke overlay 1182 | 'ꝋ' => 'o', // o with long stroke overlay 1183 | 'ꝍ' => 'o', // o with loop 1184 | 'ō' => '\\={o}', // o with macron 1185 | 'ṓ' => 'o', // o with macron and acute 1186 | 'ō̌' => 'o', // o with macron and caron 1187 | 'ō̂' => 'o', // o with macron and circumflex 1188 | 'ọ̄' => 'o', // o with macron and dot below 1189 | 'ṑ' => 'o', // o with macron and grave 1190 | 'ō̱' => 'o', // o with macron and line below 1191 | 'ǭ' => 'o', // o with macron and ogonek 1192 | 'ǭ̀' => 'o', // o with macron, grave and ogonek 1193 | 'ǫ' => '\\k{o}', // o with ogonek 1194 | 'ø' => 'o', // o with stroke 1195 | 'ǿ' => 'o', // o with stroke and acute 1196 | 'ø̌' => 'o', // o with stroke and caron 1197 | 'ø̂' => 'o', // o with stroke and circumflex 1198 | 'ø̀' => 'o', // o with stroke and grave 1199 | 'ø̄' => 'o', // o with stroken and macron 1200 | 'õ' => '\\~{o}', // o with tilde 1201 | 'ṍ' => 'o', // o with tilde and acute 1202 | 'ṏ' => 'o', // o with tilde and diaeresis 1203 | 'ȭ' => 'o', // o with tilde and macron 1204 | 'o̲' => '\\b{o}', // o with underline 1205 | 'o̍' => 'o', // o with vertical line 1206 | // 'ɔ́', open O with acute 1207 | // 'ɔ̧́', open O with acute and cedilla 1208 | // 'ɔ̌', open O with caron 1209 | // 'ɔ̧̌', open O with caron and cedilla 1210 | // 'ɔ̧', open O with cedilla 1211 | // 'ɔ̂', open O with circumflex 1212 | // 'ɔ̧̂', open O with circumflex and cedilla 1213 | // 'ɔ̈', open O with diaeresis 1214 | // 'ɔ̀', open O with grave 1215 | // 'ɔ̧̀', open O with grave and cedilla 1216 | // 'ɔ̄', open O with macron 1217 | // 'ɔ̃', open O with tilde 1218 | // 'ɔ̃́', open O with tilde and acute 1219 | // 'ɔ̃̌', open O with tilde and caron 1220 | // 'ɔ̃̂', open O with tilde and circumflex 1221 | // 'ɔ̃̀', open O with tilde and grave 1222 | // 'ɔ̃̍', open O with tilde and verticale line 1223 | // 'ɔ̱', open O with tilde below 1224 | // 'ɔ̍', open O with vertical line 1225 | 'ṕ' => '\\\'{p}', // p with acute 1226 | 'p̓' => 'p', // p with comma above 1227 | 'p̈' => '\\"{p}', // p with diaeresis 1228 | 'p̤' => 'p', // p with diaeresis below 1229 | 'ṗ' => '\\.{p}', // p with dot above 1230 | 'p̣' => 'p', // p with dot above below 1231 | 'ꝓ' => 'p', // p with flourish 1232 | 'p̀' => '\\`{p}', // p with grave 1233 | 'ƥ' => 'p', // p with hook 1234 | 'p̄' => '\\={p}', // p with macron 1235 | 'ꝕ' => 'p', // p with squirrel tail 1236 | 'ᵽ' => 'p', // p with stroke 1237 | 'ꝑ' => 'p', // p with stroke through descender 1238 | 'p̄' => '\\~{p}', // p with tilde 1239 | 'q̓' => 'q', // q with comma above 1240 | 'ꝙ' => 'q', // q with diagonal stroke 1241 | 'q̇' => '\\.{q}', // q with dot above 1242 | 'ɋ' => 'q', // q with hook tail 1243 | 'ꝗ' => 'q', // q with stroke through descender 1244 | 'ŕ' => '\\\'{r}', // r with acute 1245 | 'ř' => '\\v{r}', // r with caron 1246 | 'ŗ' => '\\c{r}', // r with cedilla 1247 | 'r̂' => '\\^{r}', // r with circumflex 1248 | 'r̓' => 'r', // r with comma above 1249 | 'r̦' => 'r', // r with comma below 1250 | 'r̰' => 'r', // r with diaeresis below 1251 | 'ṙ' => '\\.{r}', // r with dot above 1252 | 'ṛ' => '\\d{r}', // r with dot below 1253 | 'ṝ' => 'r', // r with dot below and macron 1254 | 'ȑ' => 'r', // r with double grave 1255 | 'ȓ' => 'r', // r with inverted breve 1256 | 'ṟ' => 'r', // r with line below 1257 | 'r̄' => '\\={r}', // r with macron 1258 | 'ꞧ' => 'r', // r with oblique stroke 1259 | 'r̥' => 'r', // r with ring below 1260 | 'r̥̄' => 'r', // r with ring below and macron 1261 | 'ɍ' => 'r', // r with stroke 1262 | 'ɽ' => 'r', // r with tail 1263 | 'r̃' => '\\~{r}', // r with tilde 1264 | // 'ꜿ', reversed C with dot 1265 | 'ś' => '\\\'{s}', // s with acute 1266 | 'ṥ' => 's', // s with acute and dot above 1267 | 'š' => '\\v{s}', // s with caron 1268 | 'ṧ' => 's', // s with caron and dot above 1269 | 'ş' => '\\c{s}', // s with cedilla 1270 | 'ŝ' => '\\^{s}', // s with circumflex 1271 | 'ș' => 's', // s with comma below 1272 | 'ṡ' => '\\.{s}', // s with dot above 1273 | 'ṣ' => '\\d{s}', // s with dot below 1274 | 'ṩ' => 's', // s with dot below and dot above 1275 | 'ᵴ' => 's', // s with middle tilde 1276 | 'ꞩ' => 's', // s with oblique stroke 1277 | 'ȿ' => 's', // s with swash tail 1278 | 'ť' => '\\v{t}', // t with caron 1279 | 'ţ' => '\\c{t}', // t with cedilla 1280 | 'ṱ' => 't', // t with circumflex below 1281 | 'ț' => 't', // t with comma below 1282 | 'ẗ' => '\\"{t}', // t with diaeresis 1283 | 'ⱦ' => 't', // t with diagonal stroke 1284 | 'ṫ' => '\\.{t}', // t with dot above 1285 | 'ṭ' => '\\d{t}', // t with dot below 1286 | 'ƭ' => 't', // t with hook 1287 | 'ṯ' => 't', // t with line below 1288 | 'ʈ' => 't', // t with retroflex hook 1289 | 'ŧ' => 't', // t with stroke 1290 | // 'ꝥ', thorn with stroke 1291 | // 'ꝧ', thorn with stroke through descender 1292 | 'ʉ' => 'u', // u bar 1293 | 'ú' => '\\\'{u}', // u with acute 1294 | 'ŭ' => '\\u{u}', // u with breve 1295 | 'ǔ' => '\\v{u}', // u with caron 1296 | 'û' => '\\^{u}', // u with circumflex 1297 | 'ṷ' => 'u', // u with circumflex below 1298 | 'ü' => '\\"{u}', // u with diaeresis 1299 | 'ǘ' => 'u', // u with diaeresis and acute 1300 | 'ǚ' => 'u', // u with diaeresis and caron 1301 | 'ǜ' => 'u', // u with diaeresis and grave 1302 | 'ǖ' => 'u', // u with diaeresis and macron 1303 | 'ṳ' => 'u', // u with diaeresis below 1304 | 'ụ' => '\\d{u}', // u with dot below 1305 | 'ű' => '\\H{u}', // u with double acute 1306 | 'ȕ' => 'u', // u with double grave 1307 | 'ù' => '\\`{u}', // u with grave 1308 | 'ủ' => 'u', // u with hook above 1309 | 'ư' => 'u', // u with horn 1310 | 'ứ' => 'u', // u with horn and acute 1311 | 'ự' => 'u', // u with horn and dot below 1312 | 'ừ' => 'u', // u with horn and grave 1313 | 'ử' => 'u', // u with horn and hook above 1314 | 'ữ' => 'u', // u with horn and tilde 1315 | 'ȗ' => 'u', // u with inverted breve 1316 | 'ū' => '\\={u}', // u with macron 1317 | 'ṻ' => 'u', // u with macron and diaeresis 1318 | 'ų' => '\\k{u}', // u with ogonek 1319 | 'ů' => '\\r{u}', // u with ring above 1320 | 'ũ' => '\\~{u}', // u with tilde 1321 | 'ṹ' => 'u', // u with tilde and acute 1322 | 'ṵ' => 'u', // u with tilde below 1323 | 'ꝟ' => 'v', // v with diagonal stroke 1324 | 'ṿ' => '\\d{v}', // v with dot below 1325 | 'ʋ' => 'v', // v with hook (Script V) 1326 | 'ṽ' => '\\~{v}', // v with tilde 1327 | 'ẃ' => '\\\'{w}', // w with acute 1328 | 'ŵ' => '\\^{w}', // w with circumflex 1329 | 'ẅ' => '\\"{w}', // w with diaeresis 1330 | 'ẇ' => '\\.{w}', // w with dot above 1331 | 'ẉ' => '\\d{w}', // w with dot below 1332 | 'ẁ' => '\\`{w}', // w with grave 1333 | 'ⱳ' => 'w', // w with hook 1334 | 'ẘ' => '\\r{w}', // w with ring above 1335 | 'ẍ' => '\\"{x}', // x with diaeresis 1336 | 'ẋ' => '\\.{x}', // x with dot above 1337 | 'ý' => '\\\'{y}', // y with acute 1338 | 'ŷ' => '\\^{y}', // y with circumflex 1339 | 'ÿ' => '\\"{y}', // y with diaeresis 1340 | 'ẏ' => '\\.{y}', // y with dot above 1341 | 'ỵ' => '\\d{y}', // y with dot below 1342 | 'ỳ' => '\\`{y}', // y with grave 1343 | 'ƴ' => 'y', // y with hook 1344 | 'ỷ' => 'y', // y with hook above 1345 | 'ỿ' => 'y', // y with loop 1346 | 'ȳ' => '\\={y}', // y with macron 1347 | 'ẙ' => '\\r{y}', // y with ring above 1348 | 'ɏ' => 'y', // y with stroke 1349 | 'ỹ' => '\\~{y}', // y with tilde 1350 | 'ź' => '\\\'{z}', // z with acute 1351 | 'ž' => '\\v{z}', // z with caron 1352 | 'ẑ' => '\\^{z}', // z with circumflex 1353 | 'ⱬ' => 'z', // z with descender 1354 | 'ż' => '\\.{z}', // z with dot above 1355 | 'ẓ' => '\\d{z}', // z with dot below 1356 | 'ȥ' => 'z', // z with hook 1357 | 'ẕ' => 'z', // z with line below 1358 | 'ƶ' => 'z', // z with stroke 1359 | 'ɀ' => 'z', // z with swash tail 1360 | 1361 | '–' => '--', 1362 | '—' => '---', 1363 | '…' => '\\ldots{}', 1364 | 1365 | '¶' => '\\P{}', 1366 | '§' => '\\S{}', 1367 | 1368 | 'æ' => '\\ae{}', 1369 | 'Æ' => '\\AE{}', 1370 | 'ß' => '\\ss{}', 1371 | 'œ' => '\\oe{}', 1372 | 'Œ' => '\\OE{}', 1373 | 'ø' => '\\o{}', 1374 | 'Ø' => '\\O{}', 1375 | 'Å' => '\\AA{}', 1376 | 'å' => '\\aa{}', 1377 | 'ł' => '\\l{}', 1378 | 'Ł' => '\\L{}', 1379 | 'Ŋ' => '\\NG{}', 1380 | 'ŋ' => '\\ng{}', 1381 | 1382 | 'α' => '$\\alpha$', 1383 | 'β' => '$\\beta$', 1384 | 'γ' => '$\\gamma$', 1385 | 'δ' => '$\\delta$', 1386 | 'ε' => '$\\varepsilon$', 1387 | 'ζ' => '$\\zeta$', 1388 | 'η' => '$\\eta$', 1389 | 'θ' => '$\\vartheta$', 1390 | 'ι' => '$\\iota$', 1391 | 'κ' => '$\\kappa$', 1392 | 'λ' => '$\\lambda$', 1393 | 'μ' => '$\\mu$', 1394 | 'ν' => '$\\nu$', 1395 | 'ξ' => '$\\xi$', 1396 | 'ο' => '$\\omicron$', 1397 | 'π' => '$\\pi$', 1398 | 'ρ' => '$\\varrho$', 1399 | 'ς' => '$\\varsigma$', 1400 | 'σ' => '$\\sigma$', 1401 | 'τ' => '$\\tau$', 1402 | 'υ' => '$\\upsilon$', 1403 | 'φ' => '$\\varphi$', 1404 | 'χ' => '$\\chi$', 1405 | 'ψ' => '$\\psi$', 1406 | 'ω' => '$\\omega$', 1407 | 'Α' => '$\\Alpha$', 1408 | 'Β' => '$\\Beta$', 1409 | 'Γ' => '$\\Gamma$', 1410 | 'Δ' => '$\\Delta$', 1411 | 'Ε' => '$\\Epsilon$', 1412 | 'Ζ' => '$\\Zeta$', 1413 | 'Η' => '$\\Eta$', 1414 | 'Θ' => '$\\Theta$', 1415 | 'Ι' => '$\\Iota$', 1416 | 'Κ' => '$\\Kappa$', 1417 | 'Λ' => '$\\Lambda$', 1418 | 'Μ' => '$\\Mu$', 1419 | 'Ν' => '$\\Nu$', 1420 | 'Ξ' => '$\\Xi$', 1421 | 'Ο' => '$\\Omicron$', 1422 | 'Π' => '$\\Pi$', 1423 | 'Ρ' => '$\\Rho$', 1424 | 'Σ' => '$\\Sigma$', 1425 | 'Τ' => '$\\Tau$', 1426 | 'Υ' => '$\\Upsilon$', 1427 | 'Φ' => '$\\Phi$', 1428 | 'Χ' => '$\\Chi$', 1429 | 'Ψ' => '$\\Psi$', 1430 | 'Ω' => '$\\Omega$', 1431 | 1432 | // ligatures 1433 | 'Æ' => '\\AE{}', 1434 | 'Ǽ' => '\\AE{}', 1435 | 'Ǣ' => '\\AE{}', 1436 | 'æ' => '\\ae{}', 1437 | 'ǽ' => '\\ae{}', 1438 | 'ǣ' => '\\ae{}', 1439 | 'Œ' => '\\OE{}', 1440 | 'ɶ' => '\\OE{}', 1441 | 'œ' => '\\oe{}', 1442 | 'ᵫ' => 'ue', 1443 | 'IJ' => 'IJ', 1444 | 'ij' => 'ij', 1445 | 1446 | 'ff' => 'ff', 1447 | 'ffi' => 'ffi', 1448 | 'ffl' => 'ffl', 1449 | 'fi' => 'fi', 1450 | 'fl' => 'fl', 1451 | 'ſt' => 'ft', 1452 | 'st' => 'st', 1453 | 'ʦ' => 'ts', 1454 | 1455 | 'LJ' => 'LJ', 1456 | 'Lj' => 'Lj', 1457 | 'lj' => 'lj', 1458 | 'ʪ' => 'ls', 1459 | 'ʫ' => 'lz', 1460 | 'NJ' => 'NJ', 1461 | 'Nj' => 'Nj', 1462 | 'nj' => 'nj', 1463 | 'DZ' => 'DZ', 1464 | 'Dz' => 'Dz', 1465 | 'dz' => 'dz', 1466 | 'ʬ' => 'ww', 1467 | 1468 | 'Ŋ' => '\\NG{}', 1469 | 'ŋ' => '\\ng{}', 1470 | 1471 | 'DŽ' => 'D\'\\v{Z}', 1472 | 'Dž' => 'D\'\\v{z}', 1473 | 'dž' => 'd\'\\v{z}', 1474 | 1475 | 'Ƕ' => 'Hv', 1476 | 'ƕ' => 'hv', 1477 | 1478 | // spaces 1479 | ' ' => '--', // OGHAM SPACE MARK 1480 | ' ' => ' ', // EN SPACE 1481 | ' ' => ' ', // EM SPACE 1482 | ' ' => ' ', // THREE-PER-EM SPACE 1483 | ' ' => ' ', // FOUR-PER-EM SPACE 1484 | ' ' => ' ', // SIX-PER-EM SPACE 1485 | ' ' => ' ', // FIGURE SPACE 1486 | ' ' => ' ', // PUNCTUATION SPACE 1487 | ' ' => ' ', // THIN SPACE 1488 | ' ' => ' ', // IDEOGRAPHIC SPACE 1489 | ' ' => ' ', // NO-BREAK SPACE 1490 | ' ' => ' ', // HAIR SPACE 1491 | "\xE2\x80\x8B" => '', // ZERO WIDTH SPACE 1492 | ' ' => ' ', // NARROW NO-BREAK SPACE 1493 | "\xE2\x80\xAF" => ' ', // NARROW NO-BREAK SPACE 1494 | ' ' => ' ', // MEDIUM MATHEMATICAL SPACE 1495 | "\xE2\x81\x9F" => ' ', // MEDIUM MATHEMATICAL SPACE 1496 | "\xE1\xA0\x8E" => '', // MONGOLIAN VOWEL SEPARATOR 1497 | "\xE2\x80\x80" => ' ', // EN QUAD 1498 | "\xE2\x80\x81" => ' ', // EM QUAD 1499 | "\xEF\xBB\xBF" => '', // ZERO WIDTH NO-BREAK SPACE (BOM) 1500 | ); 1501 | --------------------------------------------------------------------------------