├── .github └── workflows │ └── ci.yml ├── CHANGES.md ├── LICENSE ├── README.md ├── Setup.hs ├── example.md ├── pandoc-unicode-math.cabal └── src ├── LatexToUnicode.hs ├── MathFilter.hs ├── Symbols.hs └── UnicodeToLatex.hs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: workflow_dispatch 4 | 5 | jobs: 6 | build: 7 | name: Build on ${{ matrix.os }} with GHC ${{ matrix.ghc }} and pandoc-types ${{ matrix.pandoc-types }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | matrix: 11 | os: [ubuntu-latest, macOS-latest, windows-latest] 12 | ghc: ["8.10.7"] 13 | cabal: ["3.8.1.0"] 14 | pandoc-types: ["1.20", "1.21", "1.22", "1.23"] 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: haskell-actions/setup@v2 18 | with: 19 | ghc-version: ${{ matrix.ghc }} 20 | cabal-version: ${{ matrix.cabal }} 21 | - run: cabal install -O2 --installdir bin --constraint 'pandoc-types ^>= ${{ matrix.pandoc-types }}' 22 | - run: tar -czf pandoc-unicode-math_${{ runner.os }}_pandoc-types-${{ matrix.pandoc-types }}.tar.gz --dereference -C bin . 23 | - uses: actions/upload-artifact@v4 24 | with: 25 | name: pandoc-unicode-math_${{ runner.os }}_pandoc-types-${{ matrix.pandoc-types }} 26 | path: pandoc-unicode-math_${{ runner.os }}_pandoc-types-${{ matrix.pandoc-types }}.tar.gz 27 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Revision history for pandoc-unicode-math 2 | 3 | ## Release 3.1.0 -- 2022-03-15 4 | 5 | A new symbol was added. Contributed by Ben. 6 | 7 | Pandoc compatibility: ≥ 2.8 8 | 9 | ## Release 3.0.1 -- 2021-01-09 10 | 11 | This release can be built with a broader range of GHC versions (at least 8.6, 12 | 8.8, 8.10 instead of just 8.8 with release 2.0.0). No functional changes. 13 | 14 | Pandoc compatibility: 2.10 15 | 16 | ## Release 3.0.0 -- 2021-01-08 17 | 18 | Pandoc compatibility: 2.10 19 | 20 | ## Release 2.0.1 -- 2021-01-09 21 | 22 | This release can be built with a broader range of GHC versions (at least 8.6, 23 | 8.8, 8.10 instead of just 8.8 with release 2.0.0). No functional changes. 24 | 25 | Pandoc compatibility: 2.8-2.9 26 | 27 | ## Release 2.0.0 -- 2021-01-08 28 | 29 | Starting with this release, version number increments will also reflect 30 | compatibility with Pandoc versions i.e., if a new release of this filter is not 31 | compatible with an older Pandoc version anymore this will lead to a major 32 | version increment (like 1.2.0 → 2.0.0). 33 | 34 | Pandoc compatibility: 2.8-2.9 35 | 36 | ## Release 1.2.0 -- 2019-12-17 37 | 38 | The `pandoc-unicode-math` filter now adds sensible whitespace where necessary: 39 | The Unicode sequence `λx` now translates to `\lambda x` (note the added 40 | whitespace) because the previous `\lambdax` would raise a Latex error, but `αβ` 41 | still translates to `\alpha\beta` because this is unambiguous. Idea by Fynn 42 | Leitow. 43 | 44 | Several new symbols were added. Contributed by Fynn Leitow. 45 | 46 | Pandoc compatibility: ca. 1.19-2.7.3 47 | 48 | ## Release 1.1.0 -- 2019-04-09 49 | 50 | A second filter `pandoc-unicode-math-from-latex` was added that replaces 51 | "regular" Latex math commands by equivalent Unicode symbols. This is more or 52 | less the inverse of the existing filter `pandoc-unicode-math`. Idea by Anish 53 | Mittal. 54 | 55 | Several new symbols were added. Partly contributed by Eric Hanson. 56 | 57 | Pandoc compatibility: 1.19-ca. 2.7 58 | 59 | ## Release 1.0.0 -- 2018-02-27 60 | 61 | Initial release. 62 | 63 | Pandoc compatibility: 1.17 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2019, Martin Hoppenheit 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Martin Hoppenheit nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pandoc-unicode-math 2 | 3 | [![CI](https://github.com/marhop/pandoc-unicode-math/actions/workflows/ci.yml/badge.svg)](https://github.com/marhop/pandoc-unicode-math/actions/workflows/ci.yml) 4 | 5 | If you prefer Markdown to Latex because of its readability, then why not 6 | improve the readability of math expressions as well? Just use the power of 7 | Unicode! 8 | 9 | ## What does this filter do? 10 | 11 | This [Pandoc] [filter] replaces Unicode math symbols and greek letters like ∀, 12 | ∈, →, λ, or Ω *in math environments* (i.e., between single or double dollar 13 | signs for inline or display math, respectively) by equivalent Latex commands 14 | like `\forall`, `\in`, `\rightarrow`, `\lambda`, or `\Omega`. That means you can 15 | write expressions like these in Markdown and still get Latex's superior math 16 | typesetting when converting to PDF: 17 | 18 | $$α → β ≡ ¬α ∨ β ≡ ¬(α ∧ ¬β)$$ 19 | 20 | $$∀ x ∈ \{ 2, 4, … \}, ∀ y ∈ ℕ : (x ⋅ y) mod 2 = 0$$ 21 | 22 | $$ℕ ⊂ ℤ ⊂ ℝ$$ 23 | 24 | Now compare them to the same expressions written with Latex commands: 25 | 26 | $$\alpha \rightarrow \beta \equiv \neg \alpha \lor \beta \equiv \neg (\alpha \land \neg \beta)$$ 27 | 28 | $$\forall x \in \{ 2, 4, \dots \}, \forall y \in \mathbb{N} : (x \cdot y) mod 2 = 0$$ 29 | 30 | $$\mathbb{N} \subset \mathbb{Z} \subset \mathbb{R}$$ 31 | 32 | If you prefer the Unicode variant, this filter is for you! 33 | 34 | A complete list of symbols that are replaced by this filter can be found in the 35 | [`src/Symbols.hs`](src/Symbols.hs) file. 36 | 37 | [Pandoc]: https://pandoc.org/ 38 | [filter]: https://pandoc.org/filters.html 39 | 40 | ## Usage 41 | 42 | 1. Get binaries [here][releases] and put them in your [PATH]. Be careful to 43 | choose a release that matches your Pandoc version (otherwise you'll get an 44 | error like `Incompatible API versions: encoded with [1,20] but attempted to 45 | decode with [1,21].`). If necessary, you can build from source (see below). 46 | 2. Write a Markdown document containing Unicode characters in a math 47 | environment like the provided [example file](example.md). 48 | 3. Invoke Pandoc to convert the Markdown document to PDF and apply the filter 49 | along the way: 50 | 51 | $ pandoc example.md --filter pandoc-unicode-math -o example.pdf 52 | 53 | [releases]: https://github.com/marhop/pandoc-unicode-math/releases 54 | [PATH]: https://en.wikipedia.org/wiki/PATH_(variable) 55 | 56 | ## But my keyboard has no "α" and "∃" keys! 57 | 58 | The `pandoc-unicode-math` filter will not make *writing* math expressions 59 | easier. It only makes *reading* them easier. How to input Unicode symbols 60 | depends on your editor. In Vim, [digraphs] and the [characterize plugin] are 61 | very useful for working with Unicode. 62 | 63 | However, to help you get started with existing documents a second filter called 64 | `pandoc-unicode-math-from-latex` is provided that replaces "regular" Latex math 65 | commands like `\forall` or `\alpha` by equivalent Unicode symbols like ∀ or α. 66 | Yes, that means it's the inverse of the `pandoc-unicode-math` filter, except 67 | that `pandoc-unicode-math` replaces characters like Α (greek Alpha) by A (latin 68 | A) but `pandoc-unicode-math-from-latex` does not do the reverse since this would 69 | be ambiguous. This filter can be used like this: 70 | 71 | $ pandoc latex-math.md --filter pandoc-unicode-math-from-latex -o unicode-math.md 72 | 73 | [digraphs]: http://vimdoc.sourceforge.net/htmldoc/digraph.html 74 | [characterize plugin]: https://github.com/tpope/vim-characterize 75 | 76 | ## Limitations 77 | 78 | For more complex math containing fractions like `\frac{2}{3+5}` or lots of 79 | subscripts and superscripts like `k_{n+1}^2` you still need raw Latex because 80 | these things cannot be expressed by single Unicode characters. However, 81 | Unicode symbols and Latex commands can be used together in the same 82 | expression: 83 | 84 | ∃ x ∈ ℕ : \frac{x}{2} = 21 85 | 86 | ## Alternatives 87 | 88 | Instead of writing Unicode symbols in your source file and applying this filter, 89 | you can write regular Latex math commands and use an editor that displays them 90 | in a more readable way. For example, using Vim and the [Markdown plugin], these 91 | settings have the desired effect: 92 | 93 | let g:vim_markdown_math=1 94 | set conceallevel=2 95 | 96 | [Markdown plugin]: https://github.com/plasticboy/vim-markdown 97 | 98 | ## Building from source 99 | 100 | These filters are written in Haskell, so you need GHC (compiler) and Cabal 101 | (build tool), best installed with [ghcup]. Clone the Git repository, change to 102 | its top level directory and run the following command: 103 | 104 | $ cabal install --constraint 'pandoc-types ^>= 1.23' 105 | 106 | On Linux, this will build and install two filters, `pandoc-unicode-math` and 107 | `pandoc-unicode-math-from-latex` to `~/.cabal/bin/` and on Windows, well, I 108 | don't know but surely somewhere sensible. 109 | 110 | In the above command you have to choose a version of the pandoc-types library 111 | that matches your Pandoc release. (If you run `cabal build` in a separate step 112 | the `--constraint` option should be included there as well.) For reference, here 113 | is a compatibility list: 114 | 115 | pandoc-types | pandoc | GHC 116 | -------------|-----------|--------------------- 117 | 1.23 | ≥ 3.0 | 8.8.4, 8.10.7, 9.4.2 118 | 1.22 | 2.11-2.19 | 8.8.4, 8.10.7, 9.4.2 119 | 1.21 | 2.10 | 8.8.4, 8.10.7 120 | 1.20 | 2.8-2.9 | 8.8.4, 8.10.7 121 | 122 | The GHC column records compiler versions I successfully built with. Other 123 | versions may or may not work. 124 | 125 | [ghcup]: https://www.haskell.org/ghcup/ 126 | 127 | ## Contributing 128 | 129 | Pull Requests are welcome. It's easy to add new symbols to the 130 | [`src/Symbols.hs`](src/Symbols.hs) file. 131 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /example.md: -------------------------------------------------------------------------------- 1 | # Notes concerning the $λ$ calculus 2 | 3 | $λ$ terms: 4 | 5 | * Variable: $x$ 6 | * Abstraction: $(λx.M)$ 7 | * Application: $(MN)$ 8 | 9 | Reduction operations: 10 | 11 | * $α$ reduction: $λx.M → λy.[y/x]M$ 12 | * $β$ reduction: $(λx.M)N → [N/x]M$ 13 | 14 | Church numerals: 15 | 16 | * $0 := λfx.x$ 17 | * $1 := λfx.fx$ 18 | * $2 := λfx.f(fx)$ 19 | -------------------------------------------------------------------------------- /pandoc-unicode-math.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.2 2 | name: pandoc-unicode-math 3 | version: 3.1.0 4 | synopsis: Replace Unicode symbols by equivalent Latex commands 5 | description: A Pandoc filter that replaces Unicode math symbols and 6 | greek letters like ∀, ∈, →, λ, or Ω in math environments 7 | by equivalent Latex commands like \forall, \in, 8 | \rightarrow, \lambda, or \Omega. That leads to readable 9 | math expressions in both Markdown and PDF. And another 10 | filter that does the reverse. 11 | homepage: https://github.com/marhop/pandoc-unicode-math 12 | license: BSD-3-Clause 13 | license-file: LICENSE 14 | author: Martin Hoppenheit 15 | maintainer: martin@hoppenheit.info 16 | copyright: 2017-2024 Martin Hoppenheit 17 | category: Text 18 | build-type: Simple 19 | extra-source-files: README.md 20 | , example.md 21 | 22 | common deps 23 | hs-source-dirs: src 24 | default-language: Haskell2010 25 | ghc-options: -Wall 26 | build-depends: base >= 4.12 && < 5 27 | , containers >= 0.6.0 && < 0.7 28 | , pandoc-types >= 1.20 && < 1.24 29 | , text >= 1.2.3 && < 2.2 30 | other-modules: MathFilter 31 | , Symbols 32 | 33 | executable pandoc-unicode-math 34 | import: deps 35 | main-is: UnicodeToLatex.hs 36 | ghc-options: -main-is UnicodeToLatex 37 | 38 | executable pandoc-unicode-math-from-latex 39 | import: deps 40 | main-is: LatexToUnicode.hs 41 | ghc-options: -main-is LatexToUnicode 42 | build-depends: HaTeX >= 3.22.3 && < 3.23 43 | -------------------------------------------------------------------------------- /src/LatexToUnicode.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module LatexToUnicode where 4 | 5 | import Data.Map.Strict (lookup) 6 | import Data.Text (Text, pack) 7 | import MathFilter 8 | import Symbols 9 | import Text.LaTeX.Base.Parser (parseLaTeX) 10 | import Text.LaTeX.Base.Render (render) 11 | import Text.LaTeX.Base.Syntax (LaTeX (..), TeXArg (FixArg)) 12 | import Text.Pandoc.JSON (toJSONFilter) 13 | import Prelude hiding (lookup) 14 | 15 | main :: IO () 16 | main = toJSONFilter (mathFilter latexToUnicode) 17 | 18 | -- | Replace Latex math commands by equivalent Unicode symbols. Examples: 19 | -- 20 | -- * \alpha → α 21 | -- * \mathbb{N} → ℕ 22 | -- * but /not/ A → Α (latin A to greek Alpha) because that's ambiguous 23 | latexToUnicode :: Text -> Text 24 | latexToUnicode = either (error . show) (render . go) . parseLaTeX 25 | where 26 | go :: LaTeX -> LaTeX 27 | -- e.g. \alpha 28 | go orig@(TeXCommS x) = 29 | maybe orig TeXRaw $ lookup ("\\" <> pack x) latexToUnicodeMap 30 | -- e.g. \mathbb{N} 31 | go orig@(TeXComm x [FixArg (TeXRaw y)]) = 32 | maybe orig TeXRaw $ 33 | lookup ("\\" <> pack x <> "{" <> y <> "}") latexToUnicodeMap 34 | -- nested Latex expressions 35 | go (TeXEnv x ys z) = TeXEnv x ys (go z) 36 | go (TeXMath x y) = TeXMath x (go y) 37 | go (TeXBraces x) = TeXBraces (go x) 38 | go (TeXSeq x y) = TeXSeq (go x) (go y) 39 | go x = x 40 | -------------------------------------------------------------------------------- /src/MathFilter.hs: -------------------------------------------------------------------------------- 1 | module MathFilter 2 | ( mathFilter, 3 | ) 4 | where 5 | 6 | import Data.Text (Text) 7 | import Text.Pandoc.JSON (Inline (Math)) 8 | 9 | -- | Apply a function to math expressions in a Pandoc AST. Leave non-math 10 | -- content unchanged. 11 | mathFilter :: (Text -> Text) -> Inline -> Inline 12 | mathFilter f (Math t e) = Math t (f e) 13 | mathFilter _ x = x 14 | -------------------------------------------------------------------------------- /src/Symbols.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Symbols 4 | ( unicodeToLatexMap, 5 | latexToUnicodeMap, 6 | ) 7 | where 8 | 9 | import Data.Map.Strict (Map, fromList) 10 | import Data.Text (Text, singleton) 11 | 12 | -- | Map from Unicode symbols to Latex commands. 13 | unicodeToLatexMap :: Map Char Text 14 | unicodeToLatexMap = fromList symbols 15 | 16 | -- | Map from Latex commands to Unicode symbols. 17 | latexToUnicodeMap :: Map Text Text 18 | latexToUnicodeMap = fromList [(y, singleton x) | (x, y) <- symbols] 19 | 20 | symbols :: [(Char, Text)] 21 | symbols = 22 | [ ('¬', "\\neg"), 23 | ('±', "\\pm"), 24 | ('×', "\\times"), 25 | ('÷', "\\div"), 26 | ('…', "\\dots"), 27 | ('ℕ', "\\mathbb{N}"), 28 | ('ℚ', "\\mathbb{Q}"), 29 | ('ℝ', "\\mathbb{R}"), 30 | ('ℤ', "\\mathbb{Z}"), 31 | ('ℂ', "\\mathbb{C}"), 32 | ('←', "\\leftarrow"), 33 | ('↑', "\\uparrow"), 34 | ('→', "\\rightarrow"), 35 | ('↓', "\\downarrow"), 36 | ('↔', "\\leftrightarrow"), 37 | ('⇐', "\\Leftarrow"), 38 | ('⇒', "\\Rightarrow"), 39 | ('⇔', "\\Leftrightarrow"), 40 | ('↦', "\\mapsto"), 41 | ('∀', "\\forall"), 42 | ('∃', "\\exists"), 43 | ('∅', "\\emptyset"), 44 | ('∈', "\\in"), 45 | ('∉', "\\notin"), 46 | ('∋', "\\ni"), 47 | ('∎', "\\blacksquare"), 48 | ('∫', "\\int"), 49 | ('∑', "\\sum"), 50 | ('√', "\\sqrt"), 51 | ('∂', "\\partial"), 52 | ('∓', "\\mp"), 53 | ('∗', "\\ast"), 54 | ('∘', "\\circ"), 55 | ('∙', "\\bullet"), 56 | ('∝', "\\propto"), 57 | ('∞', "\\infty"), 58 | ('∥', "\\parallel"), 59 | ('∡', "\\measuredangle"), 60 | ('∧', "\\land"), 61 | ('∨', "\\lor"), 62 | ('∩', "\\cap"), 63 | ('∪', "\\cup"), 64 | ('⌈', "\\lceil"), 65 | ('⌉', "\\rceil"), 66 | ('⌊', "\\lfloor"), 67 | ('⌋', "\\rfloor"), 68 | ('⟨', "\\langle"), 69 | ('⟩', "\\rangle"), 70 | ('∴', "\\therefore"), 71 | ('∵', "\\because"), 72 | ('≈', "\\approx"), 73 | ('≠', "\\neq"), 74 | ('≡', "\\equiv"), 75 | ('≤', "\\leq"), 76 | ('≥', "\\geq"), 77 | ('⊂', "\\subset"), 78 | ('⊃', "\\supset"), 79 | ('⊆', "\\subseteq"), 80 | ('⊇', "\\supseteq"), 81 | ('⊢', "\\vdash"), 82 | ('⊤', "\\top"), 83 | ('⊥', "\\bot"), 84 | ('⊨', "\\vDash"), 85 | ('⋅', "\\cdot"), 86 | ('⋮', "\\vdots"), 87 | ('⋯', "\\cdots"), 88 | ('ℵ', "\\aleph"), 89 | ('α', "\\alpha"), 90 | ('Α', "A"), 91 | ('β', "\\beta"), 92 | ('Β', "B"), 93 | ('γ', "\\gamma"), 94 | ('Γ', "\\Gamma"), 95 | ('δ', "\\delta"), 96 | ('Δ', "\\Delta"), 97 | ('ε', "\\varepsilon"), 98 | ('ϵ', "\\epsilon"), 99 | ('Ε', "E"), 100 | ('ζ', "\\zeta"), 101 | ('Ζ', "Z"), 102 | ('η', "\\eta"), 103 | ('Η', "H"), 104 | ('θ', "\\theta"), 105 | ('ϑ', "\\vartheta"), 106 | ('Θ', "\\Theta"), 107 | ('ι', "\\iota"), 108 | ('Ι', "I"), 109 | ('κ', "\\kappa"), 110 | ('ϰ', "\\varkappa"), 111 | ('Κ', "K"), 112 | ('λ', "\\lambda"), 113 | ('Λ', "\\Lambda"), 114 | ('μ', "\\mu"), 115 | ('Μ', "M"), 116 | ('∇', "\\nabla"), 117 | ('ν', "\\nu"), 118 | ('Ν', "N"), 119 | ('ξ', "\\xi"), 120 | ('Ξ', "\\Xi"), 121 | ('ο', "o"), 122 | ('Ο', "O"), 123 | ('π', "\\pi"), 124 | ('Π', "\\Pi"), 125 | ('ρ', "\\rho"), 126 | ('ϱ', "\\varrho"), 127 | ('Ρ', "P"), 128 | ('σ', "\\sigma"), 129 | ('ς', "\\varsigma"), 130 | ('Σ', "\\Sigma"), 131 | ('τ', "\\tau"), 132 | ('Τ', "T"), 133 | ('υ', "\\upsilon"), 134 | ('Υ', "\\Upsilon"), 135 | ('φ', "\\varphi"), 136 | ('ϕ', "\\phi"), 137 | ('Φ', "\\Phi"), 138 | ('χ', "\\chi"), 139 | ('Χ', "X"), 140 | ('ψ', "\\psi"), 141 | ('Ψ', "\\Psi"), 142 | ('ω', "\\omega"), 143 | -- Do not switch order of the two "\\Omega" entries! The /last/ one is used 144 | -- in latexToUnicodeMap, which is what we want. 145 | ('Ω', "\\Omega"), 146 | ('Ω', "\\Omega") 147 | ] 148 | -------------------------------------------------------------------------------- /src/UnicodeToLatex.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module UnicodeToLatex where 4 | 5 | import Data.Char (isAlphaNum) 6 | import Data.Map.Strict (findWithDefault, (!?)) 7 | import Data.Text (Text, cons, singleton, snoc, uncons) 8 | import qualified Data.Text as T 9 | import MathFilter 10 | import Symbols 11 | import Text.Pandoc.JSON (toJSONFilter) 12 | 13 | main :: IO () 14 | main = toJSONFilter (mathFilter unicodeToLatex) 15 | 16 | -- | Replace Unicode math symbols in a string by equivalent Latex commands. 17 | -- Examples: 18 | -- 19 | -- * α → \alpha 20 | -- * ℕ → \mathbb{N} 21 | -- * Α → A (greek Alpha to latin A), ugly but that's how Latex handles it 22 | -- 23 | -- Sensible whitespace is added where necessary: 24 | -- 25 | -- * λx → \lambda x 26 | -- * αβ → \alpha\beta 27 | unicodeToLatex :: Text -> Text 28 | unicodeToLatex = T.foldr f "" 29 | where 30 | f :: Char -> Text -> Text 31 | f x acc 32 | | Just (y, ys) <- uncons acc = 33 | maybe (x `cons` acc) (<> isolate y <> ys) (unicodeToLatexMap !? x) 34 | | otherwise = findWithDefault (singleton x) x unicodeToLatexMap 35 | isolate :: Char -> Text 36 | isolate x 37 | | isAlphaNum x = " " `snoc` x 38 | | otherwise = singleton x 39 | --------------------------------------------------------------------------------