├── Setup.hs ├── cabal.project ├── .gitmodules ├── examples ├── complex-lists-pandoc.odt ├── asciidoctor-article-template-pandoc.odt ├── monospace.adoc ├── complex-lists.adoc ├── complex-lists-pandoc.md ├── asciidoc-hs.css ├── asciidoctor-article-template-pandoc.md ├── monospace-asciidoctor.html ├── complex-lists-asciidoctor.html ├── monospace-pandoc.html ├── complex-lists-pandoc.html ├── asciidoctor-article-template-asciidoctor.html ├── asciidoctor-article-template.adoc └── asciidoctor-article-template-pandoc.html ├── stack.yaml ├── .gitignore ├── test └── Tests │ ├── Main.hs │ ├── Metadata.hs │ ├── Inlines.hs │ └── Blocks.hs ├── exe └── Main.hs ├── CHANGELOG.adoc ├── stack.yaml.lock ├── src └── Text │ └── AsciiDoc │ ├── UnparsedInline.hs │ ├── Debug │ └── ParseTest.hs │ ├── SourceRange.hs │ ├── Metadata.hs │ ├── LineParsers.hs │ ├── SpecialChars.hs │ ├── ElementAttributes.hs │ ├── Pandoc.hs │ ├── Inlines.hs │ └── Blocks.hs ├── LICENSE ├── .github └── workflows │ └── ci.yml ├── asciidoc-hs.cabal └── README.adoc /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | 3 | main = defaultMain 4 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: 2 | ./ 3 | 4 | optional-packages: 5 | ./parsec-free 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "parsec-free"] 2 | path = parsec-free 3 | url = https://github.com/gmarpons/parsec-free.git 4 | -------------------------------------------------------------------------------- /examples/complex-lists-pandoc.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmarpons/asciidoc-hs/HEAD/examples/complex-lists-pandoc.odt -------------------------------------------------------------------------------- /examples/asciidoctor-article-template-pandoc.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gmarpons/asciidoc-hs/HEAD/examples/asciidoctor-article-template-pandoc.odt -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-17.14 2 | 3 | packages: 4 | - . 5 | 6 | extra-deps: 7 | - repr-tree-syb-0.1.1@sha256:b3af0d338ab5273ebfc993ab1a3b10a36febffdfad474fb6ae7c2ab1fdb49702,1100 8 | -------------------------------------------------------------------------------- /examples/monospace.adoc: -------------------------------------------------------------------------------- 1 | = Example with monospace 2 | 3 | == `Monospace` can be used in section headers 4 | 5 | This is a paragraph with `some words in monospace and in *bold monospace*`. 6 | -------------------------------------------------------------------------------- /examples/complex-lists.adoc: -------------------------------------------------------------------------------- 1 | = Example with complex lists 2 | 3 | .Unordered list title 4 | * list item 1 5 | - nested list item 6 | ** nested nested list item 1 7 | + 8 | another paragraph in the same nested nested list item 1 9 | ** nested nested list item 2 10 | + 11 | .an example inside item 2 12 | ==== 13 | an example 14 | ==== 15 | 16 | * list item 2 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.hie 7 | *.chi 8 | *.chs.h 9 | *.dyn_o 10 | *.dyn_hi 11 | .hpc 12 | .hsenv 13 | .cabal-sandbox/ 14 | cabal.sandbox.config 15 | *.prof 16 | *.aux 17 | *.hp 18 | *.eventlog 19 | .stack-work/ 20 | cabal.project.local 21 | cabal.project.local~ 22 | .HTF/ 23 | .ghc.environment.* 24 | deps.png 25 | modules.png 26 | cabal.project.local~* 27 | -------------------------------------------------------------------------------- /examples/complex-lists-pandoc.md: -------------------------------------------------------------------------------- 1 | # Example with complex lists 2 | 3 | Unordered list title 4 | 5 | - list item 1 6 | 7 | - nested list item 8 | 9 | - nested nested list item 1 10 | 11 | another paragraph in the same nested nested list item 1 12 | 13 | - nested nested list item 2 14 | 15 | Example 1. an example inside item 2 16 | 17 | an example 18 | 19 | - list item 2 20 | -------------------------------------------------------------------------------- /test/Tests/Main.hs: -------------------------------------------------------------------------------- 1 | module Main 2 | ( main, 3 | ) 4 | where 5 | 6 | import Test.Tasty 7 | import Tests.Blocks 8 | import Tests.Inlines 9 | import Tests.Metadata 10 | 11 | main :: IO () 12 | main = defaultMain tests 13 | 14 | tests :: TestTree 15 | tests = testGroup "tests" [functionalTests] 16 | 17 | functionalTests :: TestTree 18 | functionalTests = 19 | testGroup 20 | "functional tests" 21 | [ blockUnitTests, 22 | inlineUnitTests, 23 | metadataUnitTests 24 | ] 25 | -------------------------------------------------------------------------------- /examples/asciidoc-hs.css: -------------------------------------------------------------------------------- 1 | .monospace { 2 | font-family: "Droid Sans Mono", "DejaVu Sans Mono", monospace; 3 | font-weight: 400; 4 | color: rgba(0, 0, 0, 0.9); 5 | font-size: 1em; 6 | } 7 | 8 | :not(pre):not([class^=L]) > .monospace { 9 | font-size: 0.9375em; 10 | font-style: normal !important; 11 | letter-spacing: 0; 12 | padding: 0.1em 0.5ex; 13 | word-spacing: -0.15em; 14 | background: #f7f7f8; 15 | -webkit-border-radius: 4px; 16 | border-radius: 4px; 17 | line-height: 1.45; 18 | text-rendering: optimizeSpeed; 19 | } 20 | -------------------------------------------------------------------------------- /exe/Main.hs: -------------------------------------------------------------------------------- 1 | module Main 2 | ( main, 3 | ) 4 | where 5 | 6 | import qualified Data.Aeson.Text as Aeson 7 | import qualified Data.Text as T (lines) 8 | import qualified Data.Text.IO as T 9 | import qualified Data.Text.Lazy.IO as LT 10 | import Text.AsciiDoc.Blocks 11 | import Text.AsciiDoc.Pandoc 12 | import qualified Text.Parsec as Parsec (runParser) 13 | 14 | main :: IO () 15 | main = do 16 | result <- 17 | Parsec.runParser documentP blockParserInitialState "" 18 | . T.lines 19 | <$> T.getContents 20 | case result of 21 | Left err -> error $ "Parsing error: " <> show err 22 | Right doc -> 23 | LT.putStrLn $ Aeson.encodeToLazyText $ convertDocument $ parseInlines doc 24 | -------------------------------------------------------------------------------- /CHANGELOG.adoc: -------------------------------------------------------------------------------- 1 | = Revision history for asciidoc-hs 2 | 3 | == 0.0.0.0 -- 2021-06-10 4 | 5 | * First version. 6 | * Basic paragraphs. 7 | * Basic, grammar based, inline parsing. 8 | * Conversion to JSON Pandoc. 9 | * (Nested) bold, italic formatting. 10 | * Monospace formatting for HTML output (through support in CSS). 11 | * Highlight formatting and custom inline style. 12 | * Constrained and unconstrained formatting. 13 | * Element attributes: positional, named, shorthand syntax. 14 | * ID attribute, role attribute, optional attribute. 15 | * Block titles. 16 | * Line and block comments. 17 | * Delimited blocks: sidebars, example blocks. 18 | * Section headers and levels. 19 | * Discrete sections. 20 | * Unordered lists (with simple list continuations). 21 | * Document title. 22 | -------------------------------------------------------------------------------- /examples/asciidoctor-article-template-pandoc.md: -------------------------------------------------------------------------------- 1 | # AsciiDoc Article Title 2 | 3 | ## First level heading 4 | 5 | This is a paragraph with a **bold** word and an *italicized* word. 6 | 7 | ### Second level heading 8 | 9 | Unordered list title 10 | 11 | - list item 1 12 | 13 | - nested list item 14 | 15 | - nested nested list item 1 16 | 17 | - nested nested list item 2 18 | 19 | - list item 2 20 | 21 | This is a paragraph. 22 | 23 | Example 1. Example block title 24 | 25 | Content in an example block is subject to normal substitutions. 26 | 27 | Sidebar title 28 | 29 | Sidebars contain aside text and are subject to normal substitutions. 30 | 31 | #### Third level heading 32 | 33 | ##### Fourth level heading 34 | 35 | ###### Fifth level heading 36 | 37 | ## First level heading 38 | 39 | ## First level heading 40 | -------------------------------------------------------------------------------- /stack.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: 7 | - completed: 8 | hackage: repr-tree-syb-0.1.1@sha256:b3af0d338ab5273ebfc993ab1a3b10a36febffdfad474fb6ae7c2ab1fdb49702,1100 9 | pantry-tree: 10 | size: 213 11 | sha256: 9a20743b457ff2e3e1ce98881480f8353b7837c66352cf55c1edd2a8dac97fd4 12 | original: 13 | hackage: repr-tree-syb-0.1.1@sha256:b3af0d338ab5273ebfc993ab1a3b10a36febffdfad474fb6ae7c2ab1fdb49702,1100 14 | snapshots: 15 | - completed: 16 | size: 567677 17 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/14.yaml 18 | sha256: 3740f22286bf5e6e3d82f88125e1c708b6e27847211f956b530aa5d83cf39383 19 | original: lts-17.14 20 | -------------------------------------------------------------------------------- /examples/monospace-asciidoctor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Example with monospace 9 | 10 | 11 | 12 | 15 |
16 |
17 |

Monospace can be used in section headers

18 |
19 |
20 |

This is a paragraph with some words in monospace and in bold monospace.

21 |
22 |
23 |
24 |
25 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/UnparsedInline.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | 3 | -- | 4 | -- Module : Text.AsciiDoc.UnparsedInline 5 | -- Copyright : © 2020–present Guillem Marpons 6 | -- License : BSD-3-Clause 7 | -- 8 | -- Maintainer : Guillem Marpons 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Types for working with input lines meant to be parsed together into an 13 | -- 'Text.AsciiDoc.Inline'. 14 | module Text.AsciiDoc.UnparsedInline 15 | ( UnparsedInline, 16 | InputLine (..), 17 | ) 18 | where 19 | 20 | import Data.List.NonEmpty (NonEmpty) 21 | import Data.Text (Text) 22 | 23 | -- TODO. Type-encode the invariant of UnparsedInline. See branch 24 | -- safe-unparsed-inline for a POC. Assess if it's worth the extra dependencies 25 | -- (uses package some), or if the knowledge obtained during block parsing can be 26 | -- really preserved for inline parsing (and guarantee that we don't try to 27 | -- parse invalid text). 28 | 29 | -- | Non-empty sequence of input lines meant to be parsed together as an inline. 30 | -- 31 | -- __Invariant__: The first element is always a 'MarkupLine'. 32 | -- This guarantees that an @UnparsedInline@ can always be converted to an 33 | -- 'Text.AsciiDoc.Inline'. 34 | type UnparsedInline = NonEmpty InputLine 35 | 36 | data InputLine 37 | = MarkupLine Text 38 | | CommentLine Text 39 | deriving stock (Eq, Show) 40 | -------------------------------------------------------------------------------- /examples/complex-lists-asciidoctor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Example with complex lists 9 | 10 | 11 | 12 | 15 |
16 |
17 |
Unordered list title
18 |
    19 |
  • 20 |

    list item 1

    21 |
    22 |
      23 |
    • 24 |

      nested list item

      25 |
      26 |
        27 |
      • 28 |

        nested nested list item 1

        29 |
        30 |

        another paragraph in the same nested nested list item 1

        31 |
        32 |
      • 33 |
      • 34 |

        nested nested list item 2

        35 |
        36 |
        Example 1. an example inside item 2
        37 |
        38 |
        39 |

        an example

        40 |
        41 |
        42 |
        43 |
      • 44 |
      45 |
      46 |
    • 47 |
    48 |
    49 |
  • 50 |
  • 51 |

    list item 2

    52 |
  • 53 |
54 |
55 |
56 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /examples/monospace-pandoc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Example with monospace 8 | 17 | 18 | 19 | 22 | 23 | 24 |
25 | 29 |
30 | 31 |

Monospace can be used in section headers

32 |
33 |

This is a paragraph with some words in monospace and in bold monospace.

34 |
35 |
36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Guillem Marpons 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /examples/complex-lists-pandoc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Example with complex lists 8 | 17 | 18 | 19 | 22 | 23 | 24 |
25 | 29 |
30 | 31 |
32 |
33 | Unordered list title 34 |
35 |
    36 |
  • 37 |

    list item 1

    38 |
    39 |
    40 |
      41 |
    • 42 |

      nested list item

      43 |
      44 |
      45 |
        46 |
      • 47 |

        nested nested list item 1

        48 |
        49 |
        50 |

        another paragraph in the same nested nested list item 1

        51 |
      • 52 |
      • 53 |

        nested nested list item 2

        54 |
        55 |
        56 |
        57 | Example 1. an example inside item 2 58 |
        59 |
        60 |
        61 |

        an example

        62 |
        63 |
        64 |
      • 65 |
      66 |
    • 67 |
    68 |
  • 69 |
  • 70 |

    list item 2

    71 |
  • 72 |
73 |
74 |
75 |
76 | 77 | 78 | -------------------------------------------------------------------------------- /examples/asciidoctor-article-template-asciidoctor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | AsciiDoc Article Title 9 | 10 | 11 | 12 | 15 |
16 |
17 |

First level heading

18 |
19 |
20 |

This is a paragraph with a bold word and an italicized word.

21 |
22 |
23 |

Second level heading

24 |
25 |
Unordered list title
26 |
    27 |
  • 28 |

    list item 1

    29 |
    30 |
      31 |
    • 32 |

      nested list item

      33 |
      34 |
        35 |
      • 36 |

        nested nested list item 1

        37 |
      • 38 |
      • 39 |

        nested nested list item 2

        40 |
      • 41 |
      42 |
      43 |
    • 44 |
    45 |
    46 |
  • 47 |
  • 48 |

    list item 2

    49 |
  • 50 |
51 |
52 |
53 |

This is a paragraph.

54 |
55 |
56 |
Example 1. Example block title
57 |
58 |
59 |

Content in an example block is subject to normal substitutions.

60 |
61 |
62 |
63 |
64 |
65 |
Sidebar title
66 |
67 |

Sidebars contain aside text and are subject to normal substitutions.

68 |
69 |
70 |
71 |
72 |

Third level heading

73 |
74 |
Fourth level heading
75 |
76 |
Fifth level heading
77 | 78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |

First level heading

86 |
87 | 88 |
89 |
90 |
91 |

First level heading

92 |
93 | 94 |
95 |
96 |
97 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /examples/asciidoctor-article-template.adoc: -------------------------------------------------------------------------------- 1 | 2 | 3 | = AsciiDoc Article Title 4 | // Firstname Lastname 5 | // 3.0, July 29, 2022: AsciiDoc article template 6 | // :toc: 7 | // :icons: font 8 | // :url-quickref: https://docs.asciidoctor.org/asciidoc/latest/syntax-quick-reference/ 9 | 10 | // Content entered directly below the header but before the first section heading is called the preamble. 11 | 12 | == First level heading 13 | 14 | This is a paragraph with a *bold* word and an _italicized_ word. 15 | 16 | // .Image caption 17 | // image::image-file-name.png[I am the image alt text.] 18 | 19 | // This is another paragraph.footnote:[I am footnote text and will be displayed at the bottom of the article.] 20 | 21 | === Second level heading 22 | 23 | .Unordered list title 24 | * list item 1 25 | ** nested list item 26 | *** nested nested list item 1 27 | *** nested nested list item 2 28 | * list item 2 29 | 30 | This is a paragraph. 31 | 32 | .Example block title 33 | ==== 34 | Content in an example block is subject to normal substitutions. 35 | ==== 36 | 37 | .Sidebar title 38 | **** 39 | Sidebars contain aside text and are subject to normal substitutions. 40 | **** 41 | 42 | ==== Third level heading 43 | 44 | // [#id-for-listing-block] 45 | // .Listing block title 46 | // ---- 47 | // Content in a listing block is subject to verbatim substitutions. 48 | // Listing block content is commonly used to preserve code input. 49 | // ---- 50 | 51 | ===== Fourth level heading 52 | 53 | // .Table title 54 | // |=== 55 | // |Column heading 1 |Column heading 2 56 | 57 | // |Column 1, row 1 58 | // |Column 2, row 1 59 | 60 | // |Column 1, row 2 61 | // |Column 2, row 2 62 | // |=== 63 | 64 | ====== Fifth level heading 65 | 66 | // [quote, firstname lastname, movie title] 67 | // ____ 68 | // I am a block quote or a prose excerpt. 69 | // I am subject to normal substitutions. 70 | // ____ 71 | 72 | // [verse, firstname lastname, poem title and more] 73 | // ____ 74 | // I am a verse block. 75 | // Indents and endlines are preserved in verse blocks. 76 | // ____ 77 | 78 | == First level heading 79 | 80 | // TIP: There are five admonition labels: Tip, Note, Important, Caution and Warning. 81 | 82 | // I am a comment and won't be rendered. 83 | 84 | // . ordered list item 85 | // .. nested ordered list item 86 | // . ordered list item 87 | 88 | // The text at the end of this sentence is cross referenced to <<_third_level_heading,the third level heading>> 89 | 90 | == First level heading 91 | 92 | // This is a link to the https://docs.asciidoctor.org/home/[Asciidoctor documentation]. 93 | // This is an attribute reference {url-quickref}[that links this text to the AsciiDoc Syntax Quick Reference]. 94 | -------------------------------------------------------------------------------- /examples/asciidoctor-article-template-pandoc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | AsciiDoc Article Title 8 | 17 | 18 | 21 | 22 | 23 |
24 | 28 |
29 | 30 |

First level heading

31 |
32 |

This is a paragraph with a bold word and an italicized word.

33 |
34 |

Second level heading

35 |
36 |
37 | Unordered list title 38 |
39 |
    40 |
  • 41 |

    list item 1

    42 |
    43 |
    44 |
      45 |
    • 46 |

      nested list item

      47 |
      48 |
      49 |
        50 |
      • 51 |

        nested nested list item 1

        52 |
      • 53 |
      • 54 |

        nested nested list item 2

        55 |
      • 56 |
      57 |
    • 58 |
    59 |
  • 60 |
  • 61 |

    list item 2

    62 |
  • 63 |
64 |
65 |
66 |

This is a paragraph.

67 |
68 |
69 |
70 | Example 1. Example block title 71 |
72 |
73 |
74 |

Content in an example block is subject to normal substitutions.

75 |
76 |
77 |
78 |
79 |
80 |
81 | Sidebar title 82 |
83 |
84 |

Sidebars contain aside text and are subject to normal substitutions.

85 |
86 |
87 |
88 |

Third level heading

89 |
Fourth level heading
90 |
Fifth level heading
91 |

First level heading

92 |

First level heading

93 |
94 |
95 | 96 | 97 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/Debug/ParseTest.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# OPTIONS_HADDOCK hide #-} 3 | 4 | module Text.AsciiDoc.Debug.ParseTest 5 | ( OutputType (..), 6 | parseInline, 7 | parseFile, 8 | module Text.AsciiDoc.Blocks, 9 | module Text.AsciiDoc.Inlines, 10 | ) 11 | where 12 | 13 | import qualified Control.Exception as E 14 | -- The following import is correct, but haskell-language-server sometimes complains. 15 | import Control.Monad.Reader 16 | import Data.IORef 17 | import Data.Text (Text) 18 | import qualified Data.Text as T 19 | import qualified Data.Text.IO as T 20 | import Text.AsciiDoc.Blocks hiding (Parser, State) 21 | import qualified Text.AsciiDoc.Blocks as B 22 | import Text.AsciiDoc.Inlines hiding (Parser, State) 23 | import qualified Text.AsciiDoc.Inlines as I 24 | import Text.Parsec (ParsecT, Stream) 25 | -- The following import is correct, but haskell-language-server sometimes complains. 26 | import Text.Parsec.Free.Log (LogType, renderLog) 27 | import Text.Parsec.Prim (runPT, runPTLog) 28 | -- The following import is correct, but haskell-language-server sometimes complains. 29 | import qualified Text.Pretty.Simple as Pretty 30 | 31 | data OutputType = Result | Log 32 | 33 | -- | Usage example: 34 | -- 35 | -- > cabal repl asciidoc-hs:library:debug-with-parsec-free <<< 'parseInline inlinesP Log "*foo*"' 36 | parseInline :: 37 | Show a => 38 | ParsecT Text I.State (ReaderT LogType IO) a -> 39 | OutputType -> 40 | Text -> 41 | IO () 42 | parseInline parser outputType input = do 43 | parseTestLogWithState False parser outputType I.inlineParserInitialState input 44 | 45 | -- | Usage example: 46 | -- 47 | -- > cabal repl asciidoc-hs:library:debug-with-parsec-free <<< 'parseFile pDocument Log "input.adoc"' > output.txt 48 | parseFile :: 49 | Show a => 50 | ParsecT [Text] B.State (ReaderT LogType IO) a -> 51 | OutputType -> 52 | FilePath -> 53 | IO () 54 | parseFile parser outputType file = do 55 | tokens <- readTokens file 56 | parseTestLogWithState False parser outputType B.blockParserInitialState tokens 57 | 58 | readTokens :: FilePath -> IO [Text] 59 | readTokens file = do 60 | t <- T.readFile file 61 | pure $ T.lines t 62 | 63 | parseTestLogWithState :: 64 | (Stream s (ReaderT LogType IO) t, Show a, Show t) => 65 | -- | If True, display every parse, not just the interesting ones 66 | Bool -> 67 | ParsecT s u (ReaderT LogType IO) a -> 68 | OutputType -> 69 | u -> 70 | s -> 71 | IO () 72 | parseTestLogWithState b p outputType state input = do 73 | lg <- newIORef [] 74 | eres <- E.try $ runReaderT (parseTestLogWithStateAux p outputType state input) lg 75 | putStrLn $ case eres of 76 | Left err -> "EXCEPTION => " ++ show (err :: E.SomeException) 77 | Right a -> "Result => " ++ show a 78 | theLog <- readIORef lg 79 | putStrLn $ renderLog b theLog 80 | 81 | parseTestLogWithStateAux :: 82 | (MonadIO m, MonadReader LogType m, Stream s m t, Show a, Show t) => 83 | ParsecT s u m a -> 84 | OutputType -> 85 | u -> 86 | s -> 87 | m () 88 | parseTestLogWithStateAux p outputType state input = do 89 | eres <- case outputType of 90 | Result -> runPT p state "" input 91 | Log -> runPTLog p state "" input 92 | liftIO $ case eres of 93 | Left err -> do 94 | putStr "parse error at " 95 | print err 96 | Right x -> Pretty.pPrint x 97 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/SourceRange.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE LambdaCase #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | 4 | -- | 5 | -- Module : Text.AsciiDoc.SourceRange 6 | -- Copyright : © 2020–present Guillem Marpons 7 | -- License : BSD-3-Clause 8 | -- 9 | -- Maintainer : Guillem Marpons 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- This module contains functions that traverse an AsciiDoc AST and decorate it 14 | -- with source ranges information. 15 | module Text.AsciiDoc.SourceRange 16 | ( -- * Source ranges 17 | SourcePosition, 18 | SourceRange (..), 19 | addSourceRanges, 20 | childInlines, 21 | contentLength, 22 | ) 23 | where 24 | 25 | import Data.List.NonEmpty 26 | import qualified Data.Text as T 27 | import qualified Optics.Core as Optics 28 | import Optics.Traversal (Traversal') 29 | import Text.AsciiDoc.Inlines 30 | 31 | -- | Extract the immediate descendants (sub-inlines) of an `Inline`. 32 | -- 33 | -- Analogous to @Lens.Plated.children@. 34 | childInlines :: Traversal' Inline Inline 35 | childInlines = Optics.traversalVL subInlines' 36 | where 37 | subInlines' f = \case 38 | StyledText style parameters open inlines close -> 39 | (\x -> StyledText style parameters open x close) <$> traverse f inlines 40 | InlineSeq inlines -> InlineSeq <$> traverse f inlines 41 | x -> pure x 42 | 43 | contentLength :: Inline -> Int 44 | contentLength = \case 45 | Space t -> T.length t 46 | AlphaNum t -> T.length t 47 | Symbol t -> T.length t 48 | Newline t -> T.length t 49 | _ -> 0 50 | 51 | type SourcePosition = (Int, Int) 52 | 53 | data SourceRange = SourceRange SourcePosition SourcePosition 54 | 55 | instance Show SourceRange where 56 | show (SourceRange (l1, c1) (l2, c2)) = 57 | show l1 <> ":" <> show c1 <> "-" <> show l2 <> ":" <> show c2 58 | 59 | -- | TODO: maybe this function should be idempotent. 60 | addSourceRanges :: Inline -> Inline 61 | addSourceRanges = fst . addSourceRanges' (1, 1) 62 | where 63 | addSourceRanges' :: SourcePosition -> Inline -> (Inline, SourcePosition) 64 | addSourceRanges' initial@(initialLine, initialColumn) x = 65 | let (x', (finalLine', finalColumn')) = 66 | Optics.mapAccumLOf 67 | childInlines 68 | addSourceRanges' 69 | (initialLine, initialColumn + prefixLength x) 70 | x 71 | final@(finalLine, finalColumn) = case x of 72 | -- `final` is the _previous_ position to the first position of the 73 | -- next inline, so we use 0 as column value (we increment by one the 74 | -- value returned by this function). 75 | Newline _ -> (finalLine' + 1, 0) 76 | _ -> (finalLine', finalColumn' + contentLength x + suffixLength x - 1) 77 | in (wrap (SourceRange initial final) x', (finalLine, finalColumn + 1)) 78 | wrap :: SourceRange -> Inline -> Inline 79 | wrap range = \case 80 | x@(Newline _) -> x 81 | x@(AlphaNum _) -> 82 | StyledText 83 | Custom 84 | (InlineAttributeList (T.pack $ "data-sourcepos: " <> show range)) 85 | "" 86 | (x :| []) 87 | "" 88 | x -> x 89 | prefixLength = \case 90 | StyledText _ (InlineAttributeList t) o _ _ 91 | | T.null t -> T.length o 92 | | otherwise -> 2 + T.length t + T.length o 93 | _ -> 0 94 | suffixLength = \case 95 | StyledText _ _ _ _ c -> T.length c 96 | _ -> 0 97 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | push: 7 | branches: [main] 8 | 9 | jobs: 10 | ormolu: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v2 15 | 16 | - name: Ormolu 17 | uses: mrkkrp/ormolu-action@v2 18 | with: 19 | extra-args: --ghc-opt -XTypeApplications 20 | 21 | cabal: 22 | name: ${{ matrix.os }} / ghc ${{ matrix.ghc }} 23 | runs-on: ${{ matrix.os }} 24 | needs: ormolu 25 | strategy: 26 | matrix: 27 | os: [ubuntu-latest, macos-latest, windows-latest] 28 | cabal: [latest] 29 | ghc: 30 | - "8.10.7" 31 | - "9.0.1" 32 | - "9.2.1" 33 | exclude: 34 | - os: macos-latest 35 | ghc: "8.10.7" 36 | - os: macos-latest 37 | ghc: "9.2.1" 38 | - os: windows-latest 39 | ghc: "8.10.7" 40 | - os: windows-latest 41 | ghc: "9.2.1" 42 | 43 | steps: 44 | - name: Checkout repository 45 | uses: actions/checkout@v2 46 | 47 | - name: Setup Haskell 48 | uses: haskell/actions/setup@v1 49 | id: setup-haskell-cabal 50 | with: 51 | ghc-version: ${{ matrix.ghc }} 52 | cabal-version: ${{ matrix.cabal }} 53 | 54 | - name: Freeze 55 | run: | 56 | cabal freeze 57 | 58 | - name: Cache ~/.cabal/store 59 | uses: actions/cache@v2 60 | with: 61 | path: | 62 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 63 | dist-newstyle 64 | key: ${{ runner.os }}-${{ matrix.ghc }}-${{ hashFiles('cabal.project.freeze') }} 65 | restore-keys: ${{ runner.os }}-${{ matrix.ghc }}- 66 | 67 | - name: Build 68 | run: | 69 | cabal configure --enable-tests --enable-benchmarks --enable-documentation --test-show-details=direct 70 | cabal update 71 | cabal build lib:asciidoc-hs exe:asciidoc-hs asciidoc-hs-test 72 | 73 | - name: Test 74 | run: | 75 | cabal test 76 | 77 | - name: Documentation 78 | run: | 79 | cabal haddock 80 | 81 | stack: 82 | name: stack / ghc ${{ matrix.ghc }} 83 | runs-on: ubuntu-latest 84 | strategy: 85 | matrix: 86 | ghc: 87 | - "8.10.7" 88 | - "9.0.1" 89 | 90 | steps: 91 | - name: Checkout repository 92 | uses: actions/checkout@v2 93 | 94 | - name: Setup Haskell Stack 95 | uses: haskell/actions/setup@v1 96 | with: 97 | ghc-version: ${{ matrix.ghc }} 98 | stack-version: ${{ matrix.stack }} 99 | 100 | - name: Cache ~/.stack 101 | uses: actions/cache@v2 102 | with: 103 | path: ~/.stack 104 | key: ${{ runner.os }}-${{ matrix.ghc }}-stack 105 | 106 | - name: Build Stack 107 | run: | 108 | stack build --system-ghc --test --bench --no-run-tests --no-run-benchmarks 109 | 110 | - name: Test Stack 111 | run: | 112 | stack test --system-ghc 113 | 114 | hlint: 115 | runs-on: ubuntu-latest 116 | needs: cabal 117 | 118 | steps: 119 | - name: Checkout repository 120 | uses: actions/checkout@v2 121 | 122 | - name: Setup HLint 123 | uses: rwe/actions-hlint-setup@v1 124 | with: 125 | version: '3.3.1' 126 | 127 | - name: Run HLint 128 | uses: rwe/actions-hlint-run@v2 129 | with: 130 | path: '["src/", "exe/", "test/"]' 131 | fail-on: warning 132 | -------------------------------------------------------------------------------- /test/Tests/Metadata.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Tests.Metadata 4 | ( metadataUnitTests, 5 | ) 6 | where 7 | 8 | import qualified Data.IntMap as IntMap 9 | import Data.List.NonEmpty (NonEmpty (..)) 10 | import qualified Data.Map as Map 11 | import Data.Semigroup (Last (..)) 12 | import Data.Text (Text) 13 | import Test.Hspec.Expectations.Pretty (shouldBe) 14 | import Test.Tasty (TestTree, testGroup) 15 | import Test.Tasty.HUnit (assertFailure, testCase) 16 | import Tests.Blocks (parseTest) 17 | import Text.AsciiDoc.Blocks 18 | import Text.AsciiDoc.Metadata 19 | import Text.AsciiDoc.UnparsedInline 20 | 21 | parseBlockPrefix :: [Text] -> IO (NonEmpty (BlockPrefixItem UnparsedInline)) 22 | parseBlockPrefix t = case parseTest blockPrefixP t of 23 | Right prefix -> pure prefix 24 | Left parseError -> assertFailure $ "Parser fails: " <> show parseError 25 | 26 | metadataUnitTests :: TestTree 27 | metadataUnitTests = 28 | testGroup 29 | "metadata unit tests" 30 | [ testCase "block title" $ do 31 | p <- parseBlockPrefix [".Foo"] 32 | toMetadata p 33 | `shouldBe` mempty {metadataTitle = Just (Last (MarkupLine "Foo" :| []))}, 34 | testCase "standalone block id" $ do 35 | p <- parseBlockPrefix ["[[Foo]]"] 36 | toMetadata p 37 | `shouldBe` (mempty @(Metadata UnparsedInline)) {metadataIds = ["Foo"]}, 38 | testCase "two standalone block ids" $ do 39 | p <- parseBlockPrefix ["[[Foo]]", "[[Bar]]"] 40 | toMetadata p 41 | `shouldBe` (mempty @(Metadata UnparsedInline)) 42 | { metadataIds = ["Foo", "Bar"] 43 | }, 44 | testCase "standalone block style" $ do 45 | p <- parseBlockPrefix ["[Foo]"] 46 | toMetadata p 47 | `shouldBe` (mempty @(Metadata UnparsedInline)) 48 | { metadataStyle = Just (Last "Foo") 49 | }, 50 | testCase "standalone block role" $ do 51 | p <- parseBlockPrefix ["[.Foo]"] 52 | -- Compatible with how Asciidoctor cleans style when none is specified 53 | -- in shortand syntax. 54 | toMetadata p 55 | `shouldBe` (mempty @(Metadata UnparsedInline)) 56 | { metadataStyle = Just (Last ""), 57 | metadataRoles = ["Foo"] 58 | }, 59 | testCase "positional attributes" $ do 60 | p <- parseBlockPrefix ["[Foo, Bar, Baz]"] 61 | toMetadata p 62 | `shouldBe` (mempty @(Metadata UnparsedInline)) 63 | { metadataStyle = Just (Last "Foo"), 64 | metadataPositionalAttributes = IntMap.fromList [(2, "Bar"), (3, "Baz")] 65 | }, 66 | testCase "named attribute" $ do 67 | p <- parseBlockPrefix ["[Foo = Bar]"] 68 | toMetadata p 69 | `shouldBe` (mempty @(Metadata UnparsedInline)) 70 | { metadataNamedAttributes = Map.fromList [("Foo", "Bar")] 71 | }, 72 | testCase "standalone option" $ do 73 | p <- parseBlockPrefix ["[%Foo]"] 74 | -- Compatible with how Asciidoctor cleans style when none is specified 75 | -- in shortand syntax. 76 | toMetadata p 77 | `shouldBe` (mempty @(Metadata UnparsedInline)) 78 | { metadataStyle = Just (Last ""), 79 | metadataOptions = ["Foo"] 80 | }, 81 | testCase "complex example" $ do 82 | p <- 83 | parseBlockPrefix 84 | [ "[.Foo]", 85 | "// Comment", 86 | "[Foo#Foo%Foo.Foo.Bar%%Bar, 'Foo', Foo = Bar]", 87 | "[role = 'Baz Foo']", 88 | "[opts = Baz]", 89 | "", 90 | "[[Bar]]", 91 | "[Bar, Foo = Baz, title=Baz, Bar]" 92 | ] 93 | toMetadata p 94 | `shouldBe` mempty 95 | { metadataStyle = Just (Last "Bar"), 96 | metadataIds = ["Foo", "Bar"], 97 | metadataRoles = ["Baz", "Foo"], 98 | metadataOptions = ["Foo", "", "Bar", "Baz"], 99 | metadataTitle = Just (Last (MarkupLine "Baz" :| [])), 100 | metadataPositionalAttributes = IntMap.fromList [(2, "Foo"), (4, "Bar")], 101 | metadataNamedAttributes = Map.fromList [("Foo", "Baz")], 102 | metadataRoleNamedAttribute = Just (Last ["Baz", "Foo"]) 103 | } 104 | ] 105 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/Metadata.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | 4 | -- | 5 | -- Module : Text.AsciiDoc.Metadata 6 | -- Copyright : © 2020–present Guillem Marpons 7 | -- License : BSD-3-Clause 8 | -- 9 | -- Maintainer : Guillem Marpons 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- A flexible Metadata type that can be used to store properties of both 14 | -- 'Text.AsciiDoc.Blocks' and 'Text.AsciiDoc.Inlines'. 15 | -- 16 | -- Its 'Semigroup' and 'Monoid' instances codify the non-trivial rules of 17 | -- metadata addition. 18 | module Text.AsciiDoc.Metadata 19 | ( -- * Metadata Type 20 | Metadata (..), 21 | 22 | -- * ToMetadata Class 23 | ToMetadata (..), 24 | ) 25 | where 26 | 27 | import qualified Data.IntMap as IntMap 28 | import Data.List.NonEmpty (NonEmpty ((:|))) 29 | import qualified Data.Map as Map 30 | import Data.Semigroup (Last (..)) 31 | import Data.Text (Text) 32 | import qualified Data.Text as T 33 | import Text.AsciiDoc.ElementAttributes 34 | import Text.AsciiDoc.UnparsedInline 35 | 36 | data Metadata a = Metadata 37 | { metadataStyle :: Maybe (Last Text), 38 | metadataIds :: [Text], 39 | metadataRoles :: [Text], 40 | metadataOptions :: [Text], 41 | metadataTitle :: Maybe (Last a), 42 | metadataPositionalAttributes :: IntMap.IntMap Text, 43 | -- | Named attributes different than @id@, @opts@, @options@, @role@ and 44 | -- @title@. 45 | metadataNamedAttributes :: Map.Map Text Text, 46 | metadataRoleNamedAttribute :: Maybe (Last [Text]) 47 | } 48 | deriving stock (Eq, Show, Functor) 49 | 50 | instance Semigroup (Metadata a) where 51 | x <> y = 52 | let a = metadataStyle x <> metadataStyle y 53 | b = metadataIds x <> metadataIds y 54 | -- If the right operand contains and explicit "role=ROLENAME" attribute, 55 | -- it replaces whatever roles contained in the left operand. The right 56 | -- operand can append new roles with the "[.ROLENAME]" syntax. 57 | c = case metadataRoleNamedAttribute y of 58 | Just yRoles -> getLast yRoles 59 | Nothing -> metadataRoles x <> metadataRoles y 60 | d = metadataOptions x <> metadataOptions y 61 | e = metadataTitle x <> metadataTitle y 62 | -- Semigroup instance from IntMap gives precedence to values from left 63 | -- operand. In this case we prefer the last value (i.e. value from 64 | -- operand @y@). 65 | f = metadataPositionalAttributes y <> metadataPositionalAttributes x 66 | -- Semigroup instance from Map gives precedence to values from left 67 | -- operand. In this case we prefer the last value (i.e. value from 68 | -- operand @y@). 69 | g = metadataNamedAttributes y <> metadataNamedAttributes x 70 | h = metadataRoleNamedAttribute x <> metadataRoleNamedAttribute y 71 | in Metadata 72 | { metadataStyle = a, 73 | metadataIds = b, 74 | metadataRoles = c, 75 | metadataOptions = d, 76 | metadataTitle = e, 77 | metadataPositionalAttributes = f, 78 | metadataNamedAttributes = g, 79 | metadataRoleNamedAttribute = h 80 | } 81 | 82 | instance Monoid (Metadata a) where 83 | mempty = 84 | Metadata 85 | { metadataStyle = mempty, 86 | metadataIds = mempty, 87 | metadataRoles = mempty, 88 | metadataOptions = mempty, 89 | metadataTitle = mempty, 90 | metadataPositionalAttributes = mempty, 91 | metadataNamedAttributes = mempty, 92 | metadataRoleNamedAttribute = mempty 93 | } 94 | 95 | class ToMetadata b a where 96 | toMetadata :: b -> Metadata a 97 | 98 | instance ToMetadata PositionedAttribute UnparsedInline where 99 | toMetadata (PositionedAttribute (index, PositionalAttribute p)) = 100 | mempty {metadataPositionalAttributes = IntMap.singleton index p} 101 | -- Special treatment of attribute names: @id@, @opts@, @options@, @role@ and 102 | -- @title@. 103 | toMetadata (PositionedAttribute (_, NamedAttribute "id" v)) = 104 | mempty {metadataIds = [v]} 105 | toMetadata (PositionedAttribute (_, NamedAttribute "opts" v)) = 106 | mempty {metadataOptions = [v]} 107 | toMetadata (PositionedAttribute (_, NamedAttribute "options" v)) = 108 | mempty {metadataOptions = [v]} 109 | toMetadata (PositionedAttribute (_, NamedAttribute "role" v)) = 110 | mempty 111 | { metadataRoles = T.words v, 112 | metadataRoleNamedAttribute = Just $ Last $ T.words v 113 | } 114 | toMetadata (PositionedAttribute (_, NamedAttribute "title" v)) = 115 | mempty {metadataTitle = Just $ Last $ MarkupLine v :| []} 116 | -- Any other named attribute 117 | toMetadata (PositionedAttribute (_, NamedAttribute k v)) = 118 | mempty {metadataNamedAttributes = Map.singleton k v} 119 | toMetadata (PositionedAttribute (_, ShorthandSyntaxAttribute s i r o)) = 120 | mempty 121 | { metadataStyle = Just (Last s), 122 | metadataIds = i, 123 | metadataRoles = r, 124 | metadataOptions = o 125 | } 126 | 127 | instance 128 | {-# OVERLAPPABLE #-} 129 | (Foldable f, ToMetadata b a) => 130 | ToMetadata (f b) a 131 | where 132 | toMetadata = foldMap toMetadata 133 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/LineParsers.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | -- | 4 | -- Module : Text.AsciiDoc.LineParsers 5 | -- Copyright : © 2020–present Guillem Marpons 6 | -- License : BSD-3-Clause 7 | -- 8 | -- Maintainer : Guillem Marpons 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Parsec-style parser combinators to help classifying individual AsciiDoc 13 | -- lines. 14 | -- 15 | -- All parsers in this module return 'Data.Text.Text', which helps to combine 16 | -- them using the @Monoid@ instance of 'Text.Parsec.ParsecT'. 17 | module Text.AsciiDoc.LineParsers 18 | ( -- * Line parser type 19 | LineParser, 20 | 21 | -- * Concrete parsers 22 | blockId, 23 | blockAttributeList, 24 | 25 | -- * Generic parsers accepting 'Marker's 26 | runOfN, 27 | count, 28 | 29 | -- * Generic parsers accepting 'Text' 30 | anyRemainder, 31 | many, 32 | manyText, 33 | some, 34 | string, 35 | char, 36 | satisfy, 37 | ) 38 | where 39 | 40 | import Control.Monad (MonadPlus) 41 | import qualified Control.Monad.Combinators as PC hiding 42 | ( endBy1, 43 | sepBy1, 44 | sepEndBy1, 45 | some, 46 | someTill, 47 | ) 48 | import qualified Control.Monad.Combinators.NonEmpty as PC 49 | import Data.Char (isAlphaNum, isDigit, isLetter, isSpace, ord) 50 | import Data.Functor.Identity (Identity) 51 | import qualified Data.List.NonEmpty as NE 52 | import Data.Text (Text) 53 | import qualified Data.Text as T 54 | import Text.AsciiDoc.SpecialChars 55 | import qualified Text.Parsec as Parsec 56 | 57 | -- | Parser type used to check syntactic conditions on single lines of an 58 | -- AsciiDoc document. 59 | type LineParser = Parsec.ParsecT Text () Identity 60 | 61 | -- | Accepts a block identifier using the double square bracket syntax: an 62 | -- identifier surrounded by "@[[@" and "@]]@". The identifier must start with a 63 | -- letter, or "@_@", or "@:@". It can contain letters, digits, and some other 64 | -- special characters defined in https://www.w3.org/TR/REC-xml/#NT-Name 65 | -- 66 | -- Note that that the aforementioned requirements only apply to double square 67 | -- bracket syntax: other ways of defining identifiers accept the full range of 68 | -- Unicode characters. For more information see 69 | -- https://asciidoctor.org/docs/user-manual/#custom-ids. 70 | -- 71 | -- Accepts null identifiers (the empty string). 72 | -- 73 | -- It does not accept spaces between the square brackets. 74 | blockId :: LineParser Text 75 | blockId = 76 | string "[[" 77 | *> PC.option 78 | T.empty 79 | ( satisfy isBlockIdStartChar <> many (Parsec.satisfy isBlockIdChar) 80 | ) 81 | <* string "]]" 82 | where 83 | -- TODO. Assess if package charset can be used to speed-up the following 84 | -- checks. 85 | isBlockIdStartChar c = c `elem` ['_', ':'] || isLetter c 86 | isBlockIdChar c = 87 | isBlockIdStartChar c 88 | || isDigit c 89 | || c == '-' 90 | || c == '.' 91 | -- #xB7 92 | || c == '·' 93 | -- #x203F 94 | || c == '‿' 95 | -- #x2040 96 | || c == '⁀' 97 | -- #x0300-#x036F (Combining Diacritical Marks) 98 | || (ord c >= 768 && ord c <= 879) 99 | 100 | -- | Accepts an square-bracket-enclosed string with almost no restrictions on 101 | -- the characters in between. Only the very first character needs to be alphanum 102 | -- or one of the following list: @[\',', \'.', \'#', \'%', \'_', \'º', \'ª', 103 | -- \'\'', \'"']@. This list has been discovered empirically (testing). 104 | -- 105 | -- The input can be empty. 106 | -- 107 | -- The input can contain newline characters, even if this function is usually 108 | -- called over one line strings (Asciidoctor doesn't support multi-line block 109 | -- attribute lists). 110 | blockAttributeList :: LineParser Text 111 | blockAttributeList = do 112 | t <- string "[" *> anyRemainder 113 | let (t', remainder) = T.breakOnEnd "]" t 114 | case (T.all isSpace remainder, T.uncons t', T.unsnoc t') of 115 | -- No chars between square brackets: empty attribute list 116 | (True, Nothing, Just (_, ']')) -> pure "" 117 | -- There are some chars between square brackets: accept them if starting 118 | -- char belongs to a restricted group of characters. 119 | (True, Just (s, _), Just (t'', ']')) | isStartChar s -> pure t'' 120 | -- Fail otherwise (no square bracket at the end, or no correct first char). 121 | _ -> PC.empty 122 | where 123 | isStartChar c = 124 | isAlphaNum c || c `elem` [',', '.', '#', '%', '_', 'º', 'ª', '\'', '"'] 125 | 126 | -- | @runOfN n cs@ creates a list of parsers, one for every character @c@ member 127 | -- of @cs@. Each of these parsers accepts any run of @n@ or more consecutive 128 | -- appearances of @c@. 129 | -- 130 | -- Example: @runOfN 4 [\'+', \'=']@ accepts runs of four or more symbols @"+"@, or 131 | -- four or more symbols @"="@. 132 | runOfN :: Int -> [SpecialChar a] -> [LineParser (Marker a)] 133 | runOfN n = fmap $ \c -> 134 | (\t -> c :* (n + T.length t)) 135 | <$ PC.count n (Parsec.char $ fromSpecialChar c) 136 | <*> many (Parsec.char $ fromSpecialChar c) 137 | 138 | -- | Returns (parses successfully) the remaining text of the line, whatever its 139 | -- content. 140 | anyRemainder :: LineParser Text 141 | -- anyRemainder = Parsec.getInput <* Parsec.setInput "" 142 | anyRemainder = many Parsec.anyChar 143 | 144 | many :: MonadPlus m => m Char -> m Text 145 | many p = T.pack <$> PC.many p 146 | 147 | manyText :: MonadPlus m => m Text -> m Text 148 | manyText p = T.concat <$> PC.many p 149 | 150 | some :: MonadPlus m => m Char -> m Text 151 | some p = T.pack . NE.toList <$> PC.some p 152 | 153 | count :: Int -> SpecialChar a -> LineParser (Marker a) 154 | count m c = c :* m <$ PC.count m (Parsec.char (fromSpecialChar c)) 155 | 156 | string :: String -> LineParser Text 157 | string s = T.pack <$> Parsec.string s 158 | 159 | char :: Char -> LineParser Text 160 | char c = T.singleton <$> Parsec.char c 161 | 162 | satisfy :: (Char -> Bool) -> LineParser Text 163 | satisfy f = T.singleton <$> Parsec.satisfy f 164 | -------------------------------------------------------------------------------- /asciidoc-hs.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.4 2 | name: asciidoc-hs 3 | version: 0.0.0.0 4 | synopsis: AsciiDoc parser that can be used as a Pandoc front-end 5 | description: See README.adoc for more info. 6 | homepage: https://github.com/gmarpons/asciidoc-hs 7 | bug-reports: https://github.com/gmarpons/asciidoc-hs/issues 8 | license: BSD-3-Clause 9 | license-file: LICENSE 10 | author: Guillem Marpons 11 | maintainer: Guillem Marpons 12 | copyright: 2020-2021 Guillem Marpons 13 | build-type: Simple 14 | category: Text 15 | extra-doc-files: CHANGELOG.adoc 16 | README.adoc 17 | tested-with: GHC == 8.10.7 18 | GHC == 9.0.1 19 | GHC == 9.2.1 20 | 21 | source-repository head 22 | type: git 23 | location: https://github.com/gmarpons/asciidoc-hs.git 24 | 25 | flag debug-with-parsec-free 26 | Description: Enable debugging with parsec-free 27 | Default: False 28 | Manual: True 29 | 30 | common common-options 31 | build-depends: base >= 4.13.0.0 && < 4.17 32 | ghc-options: -Wall 33 | -Wcompat 34 | -Widentities 35 | -Wincomplete-uni-patterns 36 | -Wincomplete-record-updates 37 | if impl(ghc >= 8.0) 38 | ghc-options: -Wredundant-constraints 39 | if impl(ghc >= 8.2) 40 | ghc-options: -fhide-source-paths 41 | if impl(ghc >= 8.4) 42 | ghc-options: -Wmissing-export-lists 43 | -Wpartial-fields 44 | if impl(ghc >= 8.8) 45 | ghc-options: -Wmissing-deriving-strategies 46 | 47 | if impl(ghc >= 9.2) 48 | default-language: GHC2021 49 | else 50 | default-language: Haskell2010 51 | default-extensions: 52 | DeriveDataTypeable 53 | DeriveFunctor 54 | EmptyDataDeriving 55 | FlexibleContexts 56 | FlexibleInstances 57 | GeneralizedNewtypeDeriving 58 | MultiParamTypeClasses 59 | StandaloneDeriving 60 | TypeApplications 61 | other-extensions: 62 | DerivingStrategies 63 | GADTs 64 | LambdaCase 65 | OverloadedStrings 66 | 67 | library 68 | import: common-options 69 | hs-source-dirs: src 70 | if flag(debug-with-parsec-free) 71 | exposed-modules: 72 | Text.AsciiDoc.Debug.ParseTest 73 | Text.AsciiDoc.Blocks 74 | Text.AsciiDoc.ElementAttributes 75 | Text.AsciiDoc.Inlines 76 | Text.AsciiDoc.LineParsers 77 | Text.AsciiDoc.Metadata 78 | Text.AsciiDoc.Pandoc 79 | Text.AsciiDoc.SourceRange 80 | Text.AsciiDoc.SpecialChars 81 | Text.AsciiDoc.UnparsedInline 82 | else 83 | exposed-modules: 84 | Text.AsciiDoc.Blocks 85 | Text.AsciiDoc.ElementAttributes 86 | Text.AsciiDoc.Inlines 87 | Text.AsciiDoc.LineParsers 88 | Text.AsciiDoc.Metadata 89 | Text.AsciiDoc.Pandoc 90 | Text.AsciiDoc.SourceRange 91 | Text.AsciiDoc.SpecialChars 92 | Text.AsciiDoc.UnparsedInline 93 | if flag(debug-with-parsec-free) 94 | build-depends: 95 | containers >= 0.6.0.1, 96 | mtl >= 2.1.2, 97 | optics-core >= 0.3, 98 | pandoc-types >= 1.20, 99 | parsec-free >= 3.1.11.7, 100 | parser-combinators >= 1.2, 101 | pretty-simple >= 4.0, 102 | syb >= 0.7, 103 | text >= 1.2.4 104 | else 105 | build-depends: 106 | containers >=0.6.0.1 && <0.7, 107 | optics-core >=0.3 && <0.5, 108 | pandoc-types >=1.20 && <1.23, 109 | parsec >=3.1.14 && <3.2, 110 | parser-combinators >=1.2 && <1.4, 111 | syb >=0.7 && <0.8, 112 | text >=1.2.4 && <1.3 113 | 114 | executable asciidoc-hs 115 | import: common-options 116 | hs-source-dirs: exe 117 | main-is: Main.hs 118 | build-depends: 119 | asciidoc-hs, 120 | aeson >=1.4.7 && <2.1, 121 | parsec >=3.1.14 && <3.2, 122 | text >=1.2.4 && <1.3 123 | ghc-options: -threaded 124 | -rtsopts 125 | -with-rtsopts=-N 126 | 127 | test-suite asciidoc-hs-test 128 | import: common-options 129 | type: exitcode-stdio-1.0 130 | hs-source-dirs: test 131 | main-is: Tests/Main.hs 132 | other-modules: Tests.Blocks 133 | Tests.Inlines 134 | Tests.Metadata 135 | build-depends: 136 | asciidoc-hs, 137 | containers >=0.6.0.1 && <0.7, 138 | hspec-expectations-pretty-diff >=0.7.2.5, 139 | parsec >=3.1.14 && <3.2, 140 | pretty-show >=1.10, 141 | repr-tree-syb >=0.1.1, 142 | tasty >=1.1.0.3, 143 | tasty-hunit-compat >=0.2, 144 | text >=1.2.4 && <1.3 145 | ghc-options: -threaded 146 | -rtsopts 147 | -with-rtsopts=-N 148 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/SpecialChars.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE GADTs #-} 3 | {-# LANGUAGE LambdaCase #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | 6 | -- | 7 | -- Module : Text.AsciiDoc.SpecialChars 8 | -- Copyright : © 2020–present Guillem Marpons 9 | -- License : BSD-3-Clause 10 | -- 11 | -- Maintainer : Guillem Marpons 12 | -- Stability : experimental 13 | -- Portability : portable 14 | -- 15 | -- This module defines an algebraic type for describing characters that have a 16 | -- special meaning in AsciiDoc, like @*@, @=@, or @/@. 17 | -- 18 | -- Data constructors of the 'SpecialChar' type (like 'AsteriskD' or 'HyphenL') 19 | -- can be used to preserve full information about what has been parsed. 20 | -- This is useful, for example, to apply the "parse, don't validate" principle. 21 | -- Suffixes "D" or "L" in the aforementioned constructors are explained in the 22 | -- documentation of type 'SpecialChar'. 23 | -- 24 | -- The module also defines types that represent combinations of 'SpecialChar's 25 | -- that have a particular meaning in AsciiDoc: 'Mark's and 'Marker's. 26 | module Text.AsciiDoc.SpecialChars 27 | ( -- * Special characters 28 | CommentChar, 29 | DelimiterChar, 30 | FormatChar, 31 | HeaderChar, 32 | ListChar, 33 | SpecialChar (..), 34 | fromSpecialChar, 35 | 36 | -- * Formatting @Mark@s used in inlines 37 | Mark (..), 38 | fromMark, 39 | isConstrained, 40 | isUnconstrained, 41 | extendedMarksOf, 42 | closingMarkOf, 43 | 44 | -- * @Marker@s for initiating and delimiting blocks 45 | Marker (..), 46 | fromMarker, 47 | ) 48 | where 49 | 50 | import Data.String (IsString (..)) 51 | import Data.Text (Text) 52 | import qualified Data.Text as T (justifyRight) 53 | 54 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is 55 | -- used in a delimiter for a block comment or in a marker for a line comment. 56 | data CommentChar deriving stock (Eq, Show) 57 | 58 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is 59 | -- used in a block delimiter. 60 | data DelimiterChar deriving stock (Eq, Show) 61 | 62 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is 63 | -- used in a formatting mark for defining inline text (such as bold or 64 | -- monospace) and punctuation (such as curved quotation marks) styles. 65 | data FormatChar deriving stock (Eq, Show) 66 | 67 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is 68 | -- used in a section header marker. 69 | data HeaderChar deriving stock (Eq, Show) 70 | 71 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is 72 | -- used in a list item marker. 73 | data ListChar deriving stock (Eq, Show) 74 | 75 | -- | Algebraic type for describing characters that have a special meaning in 76 | -- AsciiDoc, like @*@, @=@, or @/@. 77 | -- 78 | -- Every constructor defines a tuple @(character, function in AsciiDoc)@. 79 | -- The last letter of every constructor name indicates the function. 80 | -- The same character can be used in more than one function. 81 | -- We use GADTs to represent those tuples, as in this way we get more precise 82 | -- type checking with no code duplication. 83 | -- The list of possible functions is: 84 | -- 85 | -- * 'CommentChar' 86 | -- * 'DelimiterChar' 87 | -- * 'FormatChar' 88 | -- * 'HeaderChar' 89 | -- * 'ListChar' 90 | data SpecialChar a where 91 | SlashC :: SpecialChar CommentChar 92 | AsteriskD :: SpecialChar DelimiterChar 93 | HyphenD :: SpecialChar DelimiterChar 94 | EqualsSignD :: SpecialChar DelimiterChar 95 | NumberF :: SpecialChar FormatChar 96 | AsteriskF :: SpecialChar FormatChar 97 | UnderscoreF :: SpecialChar FormatChar 98 | GraveF :: SpecialChar FormatChar 99 | EqualsSignH :: SpecialChar HeaderChar 100 | AsteriskL :: SpecialChar ListChar 101 | HyphenL :: SpecialChar ListChar 102 | 103 | deriving stock instance (Eq a) => Eq (SpecialChar a) 104 | 105 | instance Show (SpecialChar a) where 106 | show = show . fromSpecialChar 107 | 108 | fromSpecialChar :: SpecialChar a -> Char 109 | fromSpecialChar = \case 110 | SlashC -> '/' 111 | AsteriskD -> '*' 112 | HyphenD -> '-' 113 | NumberF -> '#' 114 | AsteriskF -> '*' 115 | UnderscoreF -> '_' 116 | GraveF -> '`' 117 | EqualsSignD -> '=' 118 | EqualsSignH -> '=' 119 | AsteriskL -> '*' 120 | HyphenL -> '-' 121 | 122 | -- | A data type describing a sequence of special characters used as formatting 123 | -- marks. 124 | -- 125 | -- A @Mark@ contains at least one character. 126 | -- 127 | -- Most @Mark@s consist of a sequence of one or more repeated symbols, like 128 | -- "@**@" or "@_@". 129 | data Mark 130 | = SingleMark (SpecialChar FormatChar) 131 | | DoubleMark (SpecialChar FormatChar) 132 | deriving stock (Eq) 133 | 134 | instance Show Mark where 135 | show = fromMark 136 | 137 | isUnconstrained :: Mark -> Bool 138 | isUnconstrained (SingleMark _) = False 139 | isUnconstrained (DoubleMark _) = True 140 | 141 | isConstrained :: Mark -> Bool 142 | isConstrained = not . isUnconstrained 143 | 144 | extendedMarksOf :: Mark -> [Mark] 145 | extendedMarksOf (SingleMark c) = [DoubleMark c] 146 | extendedMarksOf _ = [] 147 | 148 | closingMarkOf :: Mark -> Mark 149 | closingMarkOf m@(SingleMark _) = m 150 | closingMarkOf m@(DoubleMark _) = m 151 | 152 | instance IsString Mark where 153 | fromString [x] = 154 | case toFormatChar x of 155 | Just c -> SingleMark c 156 | _ -> error $ "Mark.fromString: bad char: " ++ show x 157 | fromString [x1, x2] = 158 | case (toFormatChar x1, toFormatChar x2) of 159 | (Just c1, Just c2) | c1 == c2 -> DoubleMark c1 160 | _ -> error $ "Mark.fromString: bad chars: " ++ show [x1, x2] 161 | fromString _ = error "Mark.fromString: bad length" 162 | 163 | fromMark :: Mark -> String 164 | fromMark (SingleMark c) = [fromSpecialChar c] 165 | fromMark (DoubleMark c) = [fromSpecialChar c, fromSpecialChar c] 166 | 167 | toFormatChar :: Char -> Maybe (SpecialChar FormatChar) 168 | toFormatChar = \case 169 | '#' -> Just NumberF 170 | '*' -> Just AsteriskF 171 | '_' -> Just UnderscoreF 172 | '`' -> Just GraveF 173 | _ -> Nothing 174 | 175 | -- | An algebraic data type for describing a sequence of special characters that 176 | -- can be: 177 | -- 178 | -- * A marker that signals the start of a new element in the document. 179 | -- Examples of markers are: 180 | -- 181 | -- * "@*@" for starting a new list item. 182 | -- * "@===@" for starting a new level 2 section header. 183 | -- 184 | -- * A delimiter. 185 | -- Examples of delimiters are: 186 | -- 187 | -- * "@****@" or "@====@" or other openings and closings of delimited 188 | -- blocks. 189 | data Marker a 190 | = -- | Most markers consist in a single character repeated a number of times. 191 | SpecialChar a :* Int 192 | 193 | deriving stock instance Eq (Marker CommentChar) 194 | 195 | deriving stock instance Eq (Marker DelimiterChar) 196 | 197 | deriving stock instance Eq (Marker FormatChar) 198 | 199 | deriving stock instance Eq (Marker ListChar) 200 | 201 | deriving stock instance Show (Marker CommentChar) 202 | 203 | deriving stock instance Show (Marker DelimiterChar) 204 | 205 | deriving stock instance Show (Marker FormatChar) 206 | 207 | deriving stock instance Show (Marker ListChar) 208 | 209 | fromMarker :: Marker a -> Text 210 | fromMarker (c :* n) = T.justifyRight n (fromSpecialChar c) "" 211 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = Asciidoc-hs 2 | :toc: macro 3 | ifndef::env-github[] 4 | :icons: font 5 | :outfilesuffix: .adoc 6 | :caution-caption: :fire: 7 | :important-caption: :exclamation: 8 | :note-caption: :paperclip: 9 | :tip-caption: :bulb: 10 | :warning-caption: :warning: 11 | endif::[] 12 | // Variables: 13 | :release-version: 14 | // URLs: 15 | :url-repo: https://github.com/gmarpons/asciidoc-hs 16 | 17 | WARNING: This project is in its early stages. 18 | Don't expect to be able to use it in anything relevant, yet. 19 | 20 | `asciidoc-hs` is a parser for the lightweight markup language https://docs.asciidoctor.org/asciidoc/latest/[AsciiDoc] that can be used as a https://pandoc.org/[Pandoc] front-end. 21 | It draws on https://github.com/jgm/commonmark-hs[`commonmark-hs`] and is written in pure Haskell. 22 | 23 | image:https://github.com/gmarpons/asciidoc-hs/workflows/CI/badge.svg[Build Status (GitHub Actions),link={url-repo}/actions] 24 | 25 | toc::[] 26 | 27 | == Mission 28 | 29 | The main goal of this project is to become a reasonably complete implementation of the AsciiDoc language and to allow Pandoc to read AsciiDoc documents. 30 | 31 | We want to read AsciiDoc as it's used today (mainly in its https://docs.asciidoctor.org/asciidoctor/latest/[Asciidoctor] variant), and also support the https://www.eclipse.org/org/workinggroups/asciidoc-charter.php[AsciiDoc Standard] when it arrives. 32 | We're following the standardization process closely and we'll try to contribute to it if we see the opportunity. 33 | 34 | There have been former attempts at a Pandoc Reader for AsciiDoc, but they have been abandoned. 35 | If you want to feed Pandoc with an AsciiDoc source, your best option at the moment is probably to first convert to Docbook using Asciidoctor. 36 | 37 | Secondary goals of the project are (but don't expect any roadmap or timeline at the moment): 38 | 39 | * Be used as infrastructure for building advanced editor and IDE features (e.g., linters and live previewers). 40 | This means to be able to track precise source mapping information, among others. 41 | 42 | * Be scriptable in a similar way to Pandoc, but using and AST/DOM that is specific to AsciiDoc and fully supports its semantic richness. 43 | 44 | * Enable source-to-source transformations, including exact-print for those parts not explicitly modified by the scripts (a.k.a. lossless transformation). 45 | 46 | * End up being distributed together with Pandoc, as a regular Pandoc Reader. 47 | 48 | == Features 49 | 50 | The supported features can be found in our https://github.com/gmarpons/asciidoc-hs/wiki/AsciiDoc-Compatibility-Matrix[compatibility matrix], currently based on Asciidoctor's feature list. 51 | 52 | == Install 53 | 54 | You'll need to build from source for the time being. 55 | We expect to upload the package to Hackage soon. 56 | 57 | If you don't have Haskell installed in your system, try install GHC and Cabal with https://www.haskell.org/ghcup/[`ghcup`], and follow instructions for Cabal below. 58 | 59 | `asciidoc-hs` has been tested with GHC 8.8 and 8.10, and Cabal 3.4. 60 | 61 | === Using Cabal 62 | 63 | First, clone the repository: 64 | 65 | [subs=attributes] 66 | $ git clone {url-repo}.git 67 | $ cd asciidoc-hs 68 | 69 | Then, build using `cabal`: 70 | 71 | $ cabal build 72 | 73 | You can optionally copy the resulting executable under a location of your choice (that should be under the `PATH` environment variable): 74 | 75 | $ cp "$(cabal exec --verbose=0 --offline sh -- -c 'command -v asciidoc-hs')" ~/.local/bin/asciidoc-hs 76 | 77 | === Using Stack 78 | 79 | First, clone the repository: 80 | 81 | [subs=attributes] 82 | $ git clone {url-repo}.git 83 | $ cd asciidoc-hs 84 | 85 | Then, build using `stack`: 86 | 87 | $ stack build 88 | 89 | You can optionally copy the resulting executable under a location of your choice (that should be under the `PATH` environment variable): 90 | 91 | $ cp "$(stack path --local-install-root)/bin/asciidoc-hs" ~/.local/bin/asciidoc-hs 92 | 93 | == Use 94 | 95 | In the https://github.com/gmarpons/asciidoc-hs/tree/main/examples[examples] directory of this repository you can find some example AsciiDoc files and its various conversions. 96 | 97 | The commands used to get the converted files are: 98 | 99 | :filename: ⟨FILENAME⟩ 100 | 101 | * Convert to HTML using Asciidoctor: 102 | + 103 | [subs=attributes] 104 | $ asciidoctor -a "sectids!" -a "showtitle" -a "linkcss" -a "stylesheet=https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" -a "webfonts!" {filename}.adoc -o - > {filename}-asciidoctor.html 105 | 106 | * Convert to HTML using `asciidoc-hs` + Pandoc: 107 | + 108 | [subs=attributes] 109 | $ cat {filename}.adoc | asciidoc-hs | pandoc -f json -t html5 --standalone --css="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" --css="./asciidoc-hs.css" > {filename}-pandoc.html 110 | 111 | * Convert to ODT using `asciidoc-hs` + Pandoc: 112 | + 113 | [subs=attributes] 114 | $ cat {filename}.adoc | asciidoc-hs | pandoc -f json -t odt > {filename}-pandoc.odt 115 | 116 | * Convert to Markdown using `asciidoc-hs` + Pandoc: 117 | + 118 | [subs=attributes] 119 | $ cat {filename}.adoc | asciidoc-hs | pandoc -f json -t markdown_strict > {filename}-pandoc.md 120 | 121 | File https://github.com/gmarpons/asciidoc-hs/blob/main/examples/asciidoctor-article-template.adoc[`asciidoctor-article-template.adoc`] presents a variety of AsciiDoc features. 122 | Those not yet supported by `asciidoc-hs` are commented out. 123 | 124 | == Contribute 125 | 126 | Contributions are Welcome! 127 | 128 | For any of: 129 | 130 | * reporting a bug, 131 | * filling a feature request, 132 | * opening a PR, 133 | 134 | we'll try to stick to the workflow and guidelines set in https://github.com/kowainik/.github/blob/main/CONTRIBUTING.md[Kowainik contributing guidelines], with the difference that we use https://hackage.haskell.org/package/ormolu[`ormolu`] instead of http://hackage.haskell.org/package/stylish-haskell[`stylish-haskell`]. 135 | 136 | === Implement a new AsciiDoc feature 137 | 138 | AsciiDoc is an extensive language. 139 | There are plenty of features still to be implemented, with varying degrees of difficulty. 140 | 141 | The recommended workflow is the following: 142 | 143 | . Check the https://github.com/gmarpons/asciidoc-hs/wiki/AsciiDoc-Compatibility-Matrix[compatibility matrix] to look for unsupported features. 144 | . Fill an issue with the proposed feature if you cannot find it in the https://github.com/gmarpons/asciidoc-hs/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement[issue tracker], yet. 145 | . Go inspect files https://github.com/gmarpons/asciidoc-hs/blob/main/src/Text/AsciiDoc/Inlines.hs[Inlines.hs] or https://github.com/gmarpons/asciidoc-hs/blob/main/src/Text/AsciiDoc/Blocks.hs[Blocks.hs] and see: 146 | ** If the current AST data types (mainly `Inline` and `Block`) support the intended feature, or need to be modified. 147 | ** Look for a similar and already implemented feature, and see how the corresponding parser functions are written. 148 | . Discuss a possible implementation in the issue tracker. 149 | . Modify the aforementioned files. 150 | . Add new https://github.com/gmarpons/asciidoc-hs/tree/main/test/Tests[test cases] for inlines or blocks. 151 | . Add the necessary new cases to functions `convertInline` or `convertBlock` in https://github.com/gmarpons/asciidoc-hs/blob/main/src/Text/AsciiDoc/Pandoc.hs[Pandoc.hs]. 152 | 153 | You don't need to wait to complete the steps above before opening a PR. 154 | In fact, it's better if your code can be reviewed from the beginning. 155 | 156 | == Acknowledgements 157 | 158 | This work has been supported by a https://www.tweag.io/blog/2020-02-14-os-fellowship/[Tweag Open Source Fellowship]. 159 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/ElementAttributes.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | 4 | -- | 5 | -- Module : Text.AsciiDoc.ElementAttributes 6 | -- Copyright : © 2020–present Guillem Marpons 7 | -- License : BSD-3-Clause 8 | -- 9 | -- Maintainer : Guillem Marpons 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- Implementation of the Element Attribute Language for AsciiDoc, i.e., the 14 | -- language for describing properties (styles, identifiers, roles, named 15 | -- attributes, etc.) of entities like inlines and blocks. 16 | -- 17 | -- This module contains Parsec-style parsers for the aforementioned language. 18 | -- 19 | -- It tries to be compatible with Asciidoctor. 20 | -- See https://docs.asciidoctor.org/asciidoc/latest/attributes/element-attributes/. 21 | module Text.AsciiDoc.ElementAttributes 22 | ( -- * AST types 23 | ElementAttribute (..), 24 | PositionedAttribute (..), 25 | 26 | -- * Parsers 27 | AttributeParser, 28 | attributeListP, 29 | attributeShorthandSyntaxP, 30 | CommaAcceptance (..), 31 | ) 32 | where 33 | 34 | import Control.Applicative.Permutations 35 | import Control.Monad.Combinators hiding 36 | ( endBy1, 37 | sepBy1, 38 | sepEndBy1, 39 | some, 40 | someTill, 41 | ) 42 | import Control.Monad.Combinators.NonEmpty 43 | import Data.Foldable (fold) 44 | import Data.List.NonEmpty (NonEmpty (..)) 45 | import Data.Text (Text) 46 | import qualified Data.Text as T 47 | import qualified Text.AsciiDoc.LineParsers as LP 48 | import qualified Text.Parsec as Parsec 49 | 50 | data ElementAttribute 51 | = PositionalAttribute Text 52 | | NamedAttribute Text Text 53 | | -- | Special Asciidoctor syntax, all fields are optional and can appear in 54 | -- any order: @[style#id.role1.role2%option1%option2]@. 55 | -- 56 | -- An empty style means no style has been specified. 57 | -- 58 | -- Empty ids, roles and options are admissible, even if they normally rise a 59 | -- warning. 60 | -- 61 | -- More than one id is admissible, even if this situation normally rises a 62 | -- warning. 63 | ShorthandSyntaxAttribute Text [Text] [Text] [Text] 64 | deriving stock (Eq, Show) 65 | 66 | newtype PositionedAttribute = PositionedAttribute (Int, ElementAttribute) 67 | 68 | type AttributeParser = LP.LineParser 69 | 70 | data Position 71 | = Start 72 | | Other 73 | 74 | -- | Converts a @Text@ into a list of attributes. 75 | -- 76 | -- Attributes are separated by commas. 77 | -- 78 | -- Attributes can be empty (the empty string). Attributes can contain square 79 | -- brackets. 80 | -- 81 | -- Attributes can be enclosed between single or double quotes. Quote-enclosed 82 | -- attributes can contain commas and quote characters (if escaped with "@\@"). 83 | -- 84 | -- This parser does not fail for any input. 85 | -- It supports both the empty string and a sequence of spaces as input. 86 | -- 87 | -- __Divergence from Asciidoctor__: When a string of the list is only partially 88 | -- enclosed between single or double quotes, Asciidoctor breaks the string into 89 | -- different attributes, and this function considers it a non-quoted single 90 | -- attribute (i.e., quotes are part of the attribute and commas break the 91 | -- string). 92 | attributeListP :: AttributeParser (NonEmpty ElementAttribute) 93 | attributeListP = 94 | (:|) 95 | <$> attributeP Start 96 | <*> option [] (sepP *> sepBy (attributeP Other) sepP) <* Parsec.eof 97 | where 98 | attributeP position = 99 | Parsec.try (quotedP position '"' <* many Parsec.space <* endP) 100 | <|> Parsec.try (quotedP position '\'' <* many Parsec.space <* endP) 101 | <|> unquotedP position <* many Parsec.space <* endP 102 | quotedP :: Position -> Char -> AttributeParser ElementAttribute 103 | quotedP position quote = do 104 | v <- quotedValueP quote 105 | let shorthandOrError = 106 | Parsec.parse (attributeShorthandSyntaxP AcceptCommas <* Parsec.eof) "" v 107 | case (position, shorthandOrError) of 108 | -- First positional attribute, shorthand syntax accepted 109 | (Start, Right shorthand) -> pure shorthand 110 | -- First positional attribute, shorthand syntax failed: treat as a 111 | -- regular positional attribute 112 | (Start, Left _) -> pure $ PositionalAttribute v 113 | -- Position different from first: regular positional attribute 114 | (Other, _) -> pure $ PositionalAttribute v 115 | unquotedP Start = 116 | Parsec.try unquotedShorthandP 117 | <|> Parsec.try namedP 118 | <|> positionalP 119 | unquotedP Other = 120 | Parsec.try namedP 121 | <|> positionalP 122 | unquotedShorthandP = do 123 | shorthand <- 124 | -- Parsec.try not needed because this functions is only ever called 125 | -- inside a Parsec.try. 126 | attributeShorthandSyntaxP RejectCommas <* many Parsec.space <* endP 127 | case shorthand of 128 | -- No '=' found in the first segment of the potential shorthand syntax 129 | ShorthandSyntaxAttribute style _ _ _ 130 | | T.all (/= '=') style -> pure shorthand 131 | _ -> empty 132 | quotedValueP :: Char -> AttributeParser Text 133 | quotedValueP quote = 134 | between (Parsec.char quote) (Parsec.char quote) $ 135 | LP.manyText (LP.satisfy (\c -> c /= quote && c /= '\\')) 136 | <> LP.manyText 137 | ( (Parsec.try (LP.char '\\' *> LP.char quote) <|> LP.char '\\') 138 | <> LP.manyText (LP.satisfy (\c -> c /= quote && c /= '\\')) 139 | ) 140 | unquotedValueP :: AttributeParser Text 141 | unquotedValueP = T.strip . T.pack <$> many (Parsec.satisfy (/= ',')) 142 | namedP :: AttributeParser ElementAttribute 143 | namedP = 144 | NamedAttribute 145 | <$> nameP <* Parsec.char '=' <* many Parsec.space <*> valueP 146 | nameP :: AttributeParser Text 147 | nameP = LP.some (Parsec.noneOf [' ', ',', '=']) <* many Parsec.space 148 | valueP = 149 | Parsec.try (quotedValueP '"') 150 | <|> Parsec.try (quotedValueP '\'') 151 | <|> unquotedValueP 152 | positionalP = PositionalAttribute <$> unquotedValueP 153 | sepP = Parsec.char ',' <* many Parsec.space 154 | endP = Parsec.lookAhead $ eitherP (LP.char ',') Parsec.eof 155 | 156 | data CommaAcceptance 157 | = AcceptCommas 158 | | RejectCommas 159 | deriving stock (Eq, Show) 160 | 161 | attributeShorthandSyntaxP :: CommaAcceptance -> AttributeParser ElementAttribute 162 | attributeShorthandSyntaxP commaAcceptance = 163 | wrap <$> someTill permutationP endP 164 | where 165 | -- Use monoid instances of (,), Text and [] to collect ids, roles and 166 | -- options. 167 | wrap ((s, (i, (r, o))) :| xs) = 168 | ShorthandSyntaxAttribute 169 | s 170 | (i <> (fst . snd . fold) xs) 171 | (r <> (fst . snd . snd . fold) xs) 172 | (o <> (snd . snd . snd . fold) xs) 173 | permutationP :: AttributeParser (Text, ([Text], ([Text], [Text]))) 174 | permutationP = 175 | runPermutation $ 176 | (\s i r o -> (s, (i, (r, o)))) 177 | <$> toPermutationWithDefault "" styleP 178 | <*> toPermutationWithDefault [] identifierP 179 | <*> toPermutationWithDefault [] roleP 180 | <*> toPermutationWithDefault [] optionP 181 | styleP = LP.many anyChar 182 | identifierP = (: []) <$ Parsec.char '#' <*> LP.many anyChar 183 | roleP = (: []) <$ Parsec.char '.' <*> LP.many anyChar 184 | optionP = (: []) <$ Parsec.char '%' <*> LP.many anyChar 185 | anyChar = 186 | Parsec.noneOf $ 187 | [' ', '#', '.', '%'] <> case commaAcceptance of 188 | AcceptCommas -> [] 189 | RejectCommas -> [','] 190 | endP = Parsec.lookAhead $ eitherP Parsec.eof $ Parsec.oneOf [' ', ','] 191 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/Pandoc.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | {-# LANGUAGE LambdaCase #-} 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | -- | 6 | -- Module : Text.AsciiDoc.Pandoc 7 | -- Copyright : © 2020–present Guillem Marpons 8 | -- License : BSD-3-Clause 9 | -- 10 | -- Maintainer : Guillem Marpons 11 | -- Stability : experimental 12 | -- Portability : portable 13 | -- 14 | -- Converters from AsciiDoc AST node types to Pandoc AST node types. 15 | module Text.AsciiDoc.Pandoc 16 | ( -- * Types 17 | Document, 18 | Inline, 19 | 20 | -- * Conversions 21 | convertDocument, 22 | convertBlock, 23 | convertInline, 24 | -- TODO. Relocate parseInlines function 25 | parseInlines, 26 | ) 27 | where 28 | 29 | import Data.List.NonEmpty as NE (NonEmpty (..)) 30 | import qualified Data.List.NonEmpty as NE 31 | import Data.Semigroup (Last (Last)) 32 | import Data.Text (Text) 33 | import qualified Data.Text as T 34 | import Text.AsciiDoc.Blocks hiding (Parser, State) 35 | import Text.AsciiDoc.Inlines hiding (Parser, State) 36 | import Text.AsciiDoc.Metadata 37 | import Text.AsciiDoc.UnparsedInline 38 | import Text.Pandoc.Builder (Attr) 39 | import qualified Text.Pandoc.Builder as Pandoc 40 | import Text.Pandoc.Definition (Pandoc) 41 | import qualified Text.Parsec as Parsec 42 | 43 | -- type Document = [Block Inline] 44 | 45 | parseInlines :: Document UnparsedInline -> Document Inline 46 | parseInlines = fmap parseInline 47 | 48 | parseInline :: UnparsedInline -> Inline 49 | parseInline x = 50 | case Parsec.runParser inlinesP inlineParserInitialState "" (fromUnparsedInline x) of 51 | Right result -> result 52 | Left parseError -> error $ "parseInlines: " <> show parseError 53 | where 54 | fromUnparsedInline :: UnparsedInline -> Text 55 | -- TODO. Check what line finalizers T.unilines considers, and update the 56 | -- following definition to be congruent with Text.AsciiDoc.Inlines.newlineP. 57 | fromUnparsedInline = T.unlines . fmap toText . NE.toList 58 | toText :: InputLine -> Text 59 | toText = \case 60 | MarkupLine t -> t 61 | CommentLine _ -> "" 62 | 63 | convertDocument :: Document Inline -> Pandoc 64 | convertDocument (Document mHeader bs) = 65 | Pandoc.setMeta "pagetitle" pageTitle $ 66 | Pandoc.doc $ 67 | -- Divergence from Asciidoctor: the following div with role "article" is 68 | -- not necessary in Asciidoctor because "article" role (and other metadata 69 | -- in @metadata@) is assigned to the tag. 70 | Pandoc.divWith 71 | (toAttr $ articleMetadata {metadataRoles = "article" : metadataRoles articleMetadata}) 72 | ( Pandoc.divWith 73 | (toAttr $ mempty {metadataIds = ["header"]}) 74 | -- Spurious @plain@ block needed to avoid Pandoc fuse
and 75 | (Pandoc.plain " " <> headerContent) 76 | <> Pandoc.divWith 77 | (toAttr $ mempty {metadataIds = ["content"]}) 78 | -- Spurious @plain@ block needed to avoid Pandoc fuse
and 79 | (Pandoc.plain " " <> contentTitle <> foldMap convertBlock bs) 80 | ) 81 | where 82 | -- TODO. Var sectionMetadata will be used when correct section nesting is 83 | -- computed 84 | (pageTitle, articleMetadata, _sectionMetadata, headerContent, contentTitle) = 85 | case mHeader of 86 | Just (DocumentHeader p level i) -> 87 | let m = toMetadata p :: Metadata UnparsedInline 88 | t = convertInline i 89 | h = Pandoc.header (level + 1) t 90 | in if level == 0 91 | then 92 | ( t, 93 | m, 94 | mempty :: Metadata UnparsedInline, 95 | h, 96 | mempty 97 | ) 98 | else 99 | ( t, 100 | m {metadataIds = [], metadataRoles = []}, 101 | mempty {metadataRoles = metadataRoles m}, 102 | mempty, 103 | Pandoc.divWith (toAttr $ mempty {metadataIds = metadataIds m}) h 104 | ) 105 | Nothing -> ("Untitled", mempty, mempty, mempty, mempty) 106 | 107 | convertBlock :: Block Inline -> Pandoc.Blocks 108 | convertBlock = \case 109 | Paragraph p i -> 110 | -- We cannot use <> to extend m because, to mimic Asciidoctor, metadataRoles 111 | -- appending doesn't preserve the left value if the right one uses 112 | -- "role=..." syntax. See Monoid Metadata instance. 113 | let m = toMetadata p 114 | in Pandoc.divWith (toAttr $ m {metadataRoles = "paragraph" : metadataRoles m}) $ 115 | prependTitleDiv (fmap parseInline m) $ 116 | Pandoc.para $ convertInline i 117 | -- Divergence from Asciidoctor: we add the possible title in the section 118 | -- prefix to the header of the section, and not to the first non-header block 119 | -- found, as Asciidoctor does. 120 | Section p level i bs -> 121 | let m = toMetadata p 122 | mSect = m {metadataRoles = T.pack ("sect" ++ show (level + 1)) : metadataRoles m} 123 | in Pandoc.divWith (toAttr mSect) $ 124 | prependTitleDiv (fmap parseInline m) $ 125 | Pandoc.headerWith mempty (level + 1) (convertInline i) 126 | <> foldMap convertBlock bs 127 | -- TODO. Compute Section's (nesting) before converting. The following case 128 | -- should be redundant. 129 | -- TODO. Add a Metadata value to SectionHeaderBlock and avoid recalculating it 130 | SectionHeader p level i -> 131 | let m = toMetadata p 132 | in Pandoc.divWith (toAttr m) $ 133 | prependTitleDiv (fmap parseInline m) $ 134 | Pandoc.headerWith mempty (level + 1) (convertInline i) 135 | List (Unordered Nothing) p bss -> 136 | let m = toMetadata p 137 | in Pandoc.divWith (toAttr $ m {metadataRoles = "ulist" : metadataRoles m}) $ 138 | prependTitleDiv (fmap parseInline m) $ 139 | Pandoc.bulletList $ NE.toList $ fmap (foldMap convertBlock) bss 140 | -- TODO. Cover other list types 141 | List _ _p _ -> undefined 142 | Nestable Example p bs -> 143 | let m = toMetadata p 144 | -- TODO. This prefix should come from a (localized) variable (L10N) 145 | prefix = parseInline $ MarkupLine "Example 1. " :| [] 146 | title = fmap ((prefix <>) . parseInline) <$> metadataTitle m 147 | in Pandoc.divWith (toAttr $ m {metadataRoles = "exampleblock" : metadataRoles m}) $ 148 | prependTitleDiv (m {metadataTitle = title}) $ 149 | Pandoc.divWith (toAttr $ mempty {metadataRoles = ["content"]}) $ 150 | foldMap convertBlock bs 151 | Nestable Sidebar p bs -> 152 | let m = toMetadata p 153 | in Pandoc.divWith (toAttr $ m {metadataRoles = "sidebarblock" : metadataRoles m}) $ 154 | Pandoc.divWith (toAttr $ mempty {metadataRoles = ["content"]}) $ 155 | -- In contrast with Nestable Example, here the title is part of the 156 | -- content, to mimic Asciidoctor 157 | prependTitleDiv (fmap parseInline m) $ 158 | foldMap convertBlock bs 159 | -- TODO. Cover other nestable types 160 | Nestable _ _p _ -> undefined 161 | DanglingBlockPrefix _ -> mempty 162 | 163 | convertInline :: Inline -> Pandoc.Inlines 164 | convertInline = \case 165 | AlphaNum t -> Pandoc.str t 166 | EndOfInline _ -> mempty 167 | Newline t -> Pandoc.str t 168 | Space _ -> Pandoc.space 169 | InlineSeq inlines -> foldMap convertInline inlines 170 | StyledText Bold as _ inlines _ 171 | | as == defaultAttributeList -> 172 | Pandoc.strong $ foldMap convertInline inlines 173 | -- NOTE. Asciidoctor creates a single element with attributes in this 174 | -- case, but Pandoc AST doesn't support attributes in nodes. The 175 | -- following is semantically equivalent: 176 | StyledText Bold as _ inlines _ -> 177 | Pandoc.spanWith (toAttr $ toMetadata as) $ 178 | Pandoc.strong $ foldMap convertInline inlines 179 | -- NOTE. Pandoc's AST don't support "mark" elements, but the HTML writer 180 | -- produces a element if the class "mark" is added to the span. 181 | -- 182 | -- As Asciidoctor, we only produce a element for "empty" custom spans. 183 | StyledText Custom as _ inlines _ -> 184 | let m = toMetadata as :: Metadata UnparsedInline 185 | -- Returns a functions that *can* enclose its argument into a span, 186 | -- depending on 'as'. 187 | encloseInSpan :: Pandoc.Inlines -> Pandoc.Inlines 188 | encloseInSpan 189 | | as == defaultAttributeList = 190 | Pandoc.spanWith (toAttr $ mempty {metadataRoles = ["mark"]}) 191 | -- If attributes are not significant for translation to Pandoc, in 192 | -- congruence with Asciidoctor we skip the superfluous span. 193 | | toAttr m == mempty = id 194 | | otherwise = Pandoc.spanWith $ toAttr $ toMetadata as 195 | in encloseInSpan $ foldMap convertInline inlines 196 | StyledText Italic as _ inlines _ 197 | | as == defaultAttributeList -> 198 | Pandoc.emph $ foldMap convertInline inlines 199 | -- NOTE. Asciidoctor creates a single element with attributes in this 200 | -- case, but Pandoc AST doesn't support attributes in nodes. The 201 | -- following is semantically equivalent: 202 | StyledText Italic as _ inlines _ -> 203 | Pandoc.spanWith (toAttr $ toMetadata as) $ 204 | Pandoc.emph $ foldMap convertInline inlines 205 | -- NOTE. Pandoc's Code nodes do not support nested markup. It's a pity because 206 | -- HTML element does. Among the three following solutions, we choose 1) 207 | -- because it's simpler than 2) and produces cleaner (probably more robust for 208 | -- backends that are not HTML) Pandoc output, and with 3) a lot of markup is 209 | -- potentially lost. Also, it's congruent with the treatment of above: 210 | -- 211 | -- 1) Convert Monospace into a span, do not call Pandoc.code, and add a 212 | -- "monospace" class. I.e., do not generate element in HTML and 213 | -- delegate monospace markup to CSS or another formatting mechanism. 214 | -- 215 | -- 2) Break inlines into pieces of uniform markup, and apply Pandoc.code as 216 | -- the inner-most construct of each piece. Apply other markup to the pieces 217 | -- that need it. 218 | -- 219 | -- 3) Convert inlines into a Text with no markup applied. 220 | -- 221 | -- TODO. Find a solution that can work for other writers than HTML. 222 | StyledText Monospace as _ inlines _ -> 223 | Pandoc.spanWith (toAttr $ mempty {metadataRoles = ["monospace"]} <> toMetadata as) $ 224 | foldMap convertInline inlines 225 | Symbol t -> Pandoc.str t 226 | 227 | toAttr :: Metadata UnparsedInline -> Attr 228 | toAttr m = (identifier, classes, keyvals) 229 | where 230 | identifier = case metadataIds m of 231 | [] -> "" 232 | (x : _) -> x -- TODO. Support multiple id's per entity. 233 | classes = metadataRoles m 234 | -- We could do 235 | -- Map.toList $ metadataNamedAttributes m 236 | -- but keyvals is used by Pandoc for data-* attributes. 237 | keyvals = [] 238 | 239 | -- | In case the metadata contains a block title, this function prepends a div 240 | -- with the title to the provided 'Pandoc.Blocks' value. 241 | prependTitleDiv :: Metadata Inline -> Pandoc.Blocks -> Pandoc.Blocks 242 | prependTitleDiv m = case metadataTitle m of 243 | Nothing -> id 244 | Just (Last i) -> 245 | mappend $ 246 | Pandoc.divWith (toAttr $ mempty {metadataRoles = ["title"]}) $ 247 | Pandoc.plain $ convertInline i 248 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/Inlines.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE GADTs #-} 3 | {-# LANGUAGE LambdaCase #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | 6 | -- | 7 | -- Module : Text.AsciiDoc.Inlines 8 | -- Copyright : © 2020–present Guillem Marpons 9 | -- License : BSD-3-Clause 10 | -- 11 | -- Maintainer : Guillem Marpons 12 | -- Stability : experimental 13 | -- Portability : portable 14 | -- 15 | -- This module contains Parsec-style parsers for AsciiDoc inline elements. 16 | -- 17 | -- It tries to be compatible with Asciidoctor, but uses a grammar-based parsing 18 | -- approach instead of regexes. 19 | -- 20 | -- There are three kinds of terminals in the grammar: 21 | -- 22 | -- * Alpha-numeric sequences ('AlphaNum'). 23 | -- * Gaps: space-like character sequences and 'Newline's (including the 24 | -- surrounding space). 25 | -- * Other characters: punctuation symbols, mathematical symbols, etc. 26 | -- It includes formatting and punctuation marks. 27 | -- 28 | -- These groups of characters govern how constrained enclosures are parsed. 29 | module Text.AsciiDoc.Inlines 30 | ( -- * AST types 31 | Inline (..), 32 | Style (..), 33 | InlineAttributeList (..), 34 | defaultAttributeList, 35 | 36 | -- * Parsers 37 | inlinesP, 38 | 39 | -- * Parser type 40 | Parser, 41 | State (..), 42 | inlineParserInitialState, 43 | ) 44 | where 45 | 46 | import Control.Monad (when) 47 | import Control.Monad.Combinators (someTill) 48 | import Control.Monad.Combinators hiding 49 | ( endBy1, 50 | sepBy1, 51 | sepEndBy1, 52 | some, 53 | someTill, 54 | ) 55 | import Control.Monad.Combinators.NonEmpty (some) 56 | import Data.Char (isAlphaNum, isSpace) 57 | import Data.Functor (void) 58 | import Data.Generics (Data, Typeable) 59 | import Data.List.NonEmpty (NonEmpty (..), (<|)) 60 | import qualified Data.List.NonEmpty as NE 61 | import Data.Text (Text) 62 | import qualified Data.Text as T 63 | import Text.AsciiDoc.ElementAttributes 64 | import Text.AsciiDoc.Metadata 65 | import Text.AsciiDoc.SpecialChars 66 | import Text.AsciiDoc.UnparsedInline 67 | import Text.Parsec (()) 68 | import qualified Text.Parsec as Parsec 69 | ( ParsecT, 70 | char, 71 | eof, 72 | getState, 73 | label, 74 | lookAhead, 75 | notFollowedBy, 76 | parse, 77 | putState, 78 | try, 79 | ) 80 | import qualified Text.Parsec.Char as Parsec 81 | ( anyChar, 82 | satisfy, 83 | string, 84 | ) 85 | 86 | type Parser m = Parsec.ParsecT Text State m 87 | 88 | -- | Custom parser state for the parser for 'Inline's. 89 | newtype State = State 90 | { -- | A stack (LIFO) of formatting and punctuation 'Mark's. 91 | -- 92 | -- Every mark in the stack is the opening mark for the corresponding 93 | -- enclosure. 94 | -- Top of the stack contains the opening mark for most recently open 95 | -- enclosure. 96 | openEnclosures :: [Mark] 97 | } 98 | deriving newtype (Eq, Show) 99 | 100 | inlineParserInitialState :: State 101 | inlineParserInitialState = 102 | State 103 | { openEnclosures = [] 104 | } 105 | 106 | -- | Formatting styles for inline text. 107 | data Style 108 | = Bold 109 | | Custom 110 | | Italic 111 | | Monospace 112 | deriving stock (Eq, Show, Typeable, Data) 113 | 114 | -- | Subscript 115 | -- | Superscript 116 | 117 | -- | Every formatting 'Mark' maps to a 'Style', and this function computes this 118 | -- map. 119 | toStyle :: Mark -> Style 120 | toStyle = \case 121 | SingleMark NumberF -> Custom 122 | SingleMark AsteriskF -> Bold 123 | SingleMark UnderscoreF -> Italic 124 | SingleMark GraveF -> Monospace 125 | DoubleMark NumberF -> Custom 126 | DoubleMark AsteriskF -> Bold 127 | DoubleMark UnderscoreF -> Italic 128 | DoubleMark GraveF -> Monospace 129 | 130 | -- | A data type for all the different inline types. 131 | -- 132 | -- Some inline types can contain other inlines, and there is a constructor 133 | -- 'InlineSeq' serving as a general inline container. 134 | data Inline 135 | = AlphaNum Text 136 | | EndOfInline Text 137 | | InlineSeq (NonEmpty Inline) 138 | | Newline Text 139 | | Space Text 140 | | StyledText Style InlineAttributeList Text (NonEmpty Inline) Text 141 | | Symbol Text 142 | deriving stock (Eq, Show, Typeable, Data) 143 | 144 | -- | InlineMacro Text 145 | -- | EscapedSymbol Text 146 | -- | DoubleEscapedSymbol Text 147 | 148 | instance Semigroup Inline where 149 | InlineSeq x <> InlineSeq y = InlineSeq (x <> y) 150 | InlineSeq x <> b = InlineSeq $ x <> (b :| []) 151 | a <> InlineSeq y = InlineSeq $ a <| y 152 | a <> b = InlineSeq $ a :| [b] 153 | 154 | -- EBNF grammar non-terminal symbols ------------------------------------------ 155 | 156 | -- The parser can be read as an EBNF grammar, with starting symbol 'inlinesP'. 157 | -- The resulting grammar would be ambiguous, so there are a series of functions 158 | -- used to discard some parsing paths ('muP', 'piP', 'sigmaP', 'phiP', 'psiP', 159 | -- 'omegaP'), that work together with order in alternatives to give a completely 160 | -- deterministic parser. 161 | 162 | -- | This function is the only exported parser function for AsciiDoc inlines. 163 | -- 164 | -- If more than one individual inlines can be parsed, it returns all of them 165 | -- encapsulated into an 'InlineSeq'. 166 | inlinesP :: Monad m => Parser m Inline 167 | inlinesP = 168 | (\(x :| xs) ys -> InlineSeq (x :| xs ++ ys)) 169 | <$> (firstP "F") 170 | <*> ( concat 171 | <$> many 172 | ( NE.toList <$> Parsec.label gapWithOptionalContinuationP "N1" 173 | <|> Parsec.label (sigmaP *> nonGapSequenceP) "N2" 174 | ) 175 | ) 176 | <* Parsec.eof 177 | 178 | unconstrainedP :: Monad m => Parser m Inline 179 | unconstrainedP = Parsec.try $ do 180 | Parsec.label (pure ()) "U" 181 | ps <- option defaultAttributeList inlineAttributeListP 182 | phiP 183 | openMark <- openP ["##", "**", "__", "``"] 184 | is <- inlinesInUnconstrainedP 185 | closeMark <- closeP openMark 186 | pure $ StyledText (toStyle openMark) ps (fromMarkT openMark) is (fromMarkT closeMark) 187 | where 188 | fromMarkT = T.pack . fromMark 189 | inlinesInUnconstrainedP = 190 | (\(x :| xs) ys -> x :| xs ++ ys) 191 | <$> ((gapWithOptionalContinuationP <|> firstP) "Y_p | F") 192 | <*> ( concat 193 | <$> many 194 | ( NE.toList <$> Parsec.label gapWithOptionalContinuationP "N1" 195 | <|> Parsec.label (sigmaP *> nonGapSequenceP) "N2" 196 | ) 197 | ) 198 | {-# ANN unconstrainedP ("HLint: ignore" :: String) #-} 199 | 200 | constrainedP :: Monad m => Parser m Inline 201 | constrainedP = Parsec.try $ do 202 | Parsec.label (pure ()) "C" 203 | ps <- option defaultAttributeList inlineAttributeListP 204 | varphiP 205 | openMark <- openP ["#", "*", "_", "`"] 206 | is <- inlinesInConstrainedP 207 | omegaP 208 | closeMark <- closeP openMark 209 | pure $ StyledText (toStyle openMark) ps (fromMarkT openMark) is (fromMarkT closeMark) 210 | where 211 | fromMarkT = T.pack . fromMark 212 | inlinesInConstrainedP = 213 | (\(x :| xs) ys -> x :| xs ++ ys) 214 | <$> (firstP "F") 215 | <*> ( concat 216 | <$> many 217 | ( Parsec.label gapWithContinuationP "N1" 218 | <|> Parsec.label (muP *> nonGapSequenceP) "N2" 219 | ) 220 | ) 221 | {-# ANN constrainedP ("HLint: ignore" :: String) #-} 222 | 223 | firstP :: Monad m => Parser m (NonEmpty Inline) 224 | firstP = 225 | -- Notice the similarity with alphaNumOrOtherP. 226 | (:| []) 227 | <$> alphaNumP 228 | <|> piP *> otherWithContinuationP 229 | 230 | nonGapSequenceP :: Monad m => Parser m [Inline] 231 | nonGapSequenceP = 232 | (:) 233 | <$> (unconstrainedP <|> otherP) 234 | <*> option [] alphaNumOrOtherP 235 | 236 | openP :: Monad m => [Mark] -> Parser m Mark 237 | openP ms = do 238 | Parsec.label (pure ()) "M_<" 239 | mark <- choice $ Parsec.try . markP <$> ms 240 | -- What follows is not part of the EBNF description of the language, but it's 241 | -- easier to put it here than create a specific function for it. 242 | st <- Parsec.getState 243 | Parsec.putState $ st {openEnclosures = mark : openEnclosures st} 244 | Parsec.label (pure ()) $ "State: " ++ show (mark : openEnclosures st) 245 | pure mark 246 | 247 | closeP :: Monad m => Mark -> Parser m Mark 248 | closeP openMark = do 249 | -- Passing a mark to this function is redundant, but the openP/closeP 250 | -- connection makes the interface more clear for callers. It can also be used 251 | -- to (run-time) check for some programming errors. 252 | Parsec.label (pure ()) $ "M_>: " ++ show openMark 253 | let closeMark = closingMarkOf openMark 254 | _ <- markP closeMark 255 | -- What follows is not part of the EBNF description of the language, but it's 256 | -- easier to put it here than create a specific function for it. 257 | st <- Parsec.getState 258 | case openEnclosures st of 259 | (e : es) -> do 260 | when (closingMarkOf e /= closeMark) $ 261 | error "closeP: trying to close mark different from the innermost enclosure" 262 | Parsec.putState $ st {openEnclosures = es} 263 | [] -> error "closeP: trying to close non-existent enclosure" 264 | pure closeMark 265 | 266 | gapWithContinuationP :: Monad m => Parser m [Inline] 267 | gapWithContinuationP = 268 | flip Parsec.label "Y" $ 269 | -- Parsec.try is necessary because we can accept gapP and fail afterwards. 270 | Parsec.try $ 271 | (++) 272 | <$> (NE.toList <$> gapP) 273 | <*> ((: []) <$> alphaNumP <|> NE.toList <$ sigmaP <*> otherWithContinuationP) 274 | 275 | gapWithOptionalContinuationP :: Monad m => Parser m (NonEmpty Inline) 276 | gapWithOptionalContinuationP = 277 | flip Parsec.label "Y_p" $ 278 | (\(h :| t) c -> h :| t ++ c) 279 | <$> gapP 280 | <*> option [] ((: []) <$> alphaNumP <|> NE.toList <$ sigmaP <*> otherWithContinuationP) 281 | 282 | otherWithContinuationP :: Monad m => Parser m (NonEmpty Inline) 283 | otherWithContinuationP = 284 | flip Parsec.label "X" $ 285 | ((:|) <$> unconstrainedP <*> option [] alphaNumOrOtherP) 286 | <|> ((:|) <$> constrainedP <*> option [] gapOrOtherWithContinuationP) 287 | <|> ((:|) <$> otherP <*> option [] alphaNumOrOtherP) 288 | where 289 | gapOrOtherWithContinuationP = 290 | flip Parsec.label "Y | X" $ 291 | gapWithContinuationP 292 | <|> muP *> (NE.toList <$> otherWithContinuationP) 293 | 294 | alphaNumOrOtherP :: Monad m => Parser m [Inline] 295 | alphaNumOrOtherP = 296 | flip Parsec.label "A | X" $ 297 | (: []) 298 | <$> alphaNumP 299 | <|> muP *> (NE.toList <$> otherWithContinuationP) 300 | 301 | gapP :: Monad m => Parser m (NonEmpty Inline) 302 | gapP = flip Parsec.label "G" $ some newlineOrSpaceP 303 | where 304 | newlineOrSpaceP = 305 | newlineP 306 | -- Inline the definition of spaceP here allows to avoid redundant checks 307 | -- against '\n' or '\r'. 308 | <|> Space . T.pack . NE.toList <$> some (Parsec.satisfy isSpace) 309 | 310 | -- Functions for disambiguating the EBNF grammar ------------------------------ 311 | 312 | -- | Function called after a character of kind alphanum or other, and before a 313 | -- character of kind other. 314 | -- 315 | -- TODO. Check that this function together with 'sigmaP' cover all cases at the 316 | -- beginning of N 317 | -- 318 | -- It fails if an open enclosure can be closed (using 'closableMarks'), or a 319 | -- full unconstrained enclosure can be parsed, at current input. 320 | muP :: Monad m => Parser m () 321 | muP = do 322 | Parsec.label (pure ()) "MU" 323 | st <- Parsec.getState 324 | Parsec.notFollowedBy (choice $ tryToCloseMarkP <$> closableMarks (openEnclosures st)) 325 | <|> Parsec.lookAhead (void unconstrainedP) 326 | 327 | -- | Function called after the opening mark of an enclosure (i.e., after a 328 | -- character of kind other), and before a character of kind other. 329 | -- 330 | -- It's identical to 'muP' with the exception that the mark recently open (top 331 | -- of the stack) is not taken into account. 332 | -- 333 | -- It fails if an open enclosure (except the innermost one) can be closed (using 334 | -- 'closableMarks'), or a full unconstrained enclosure can be parsed, at current 335 | -- input. 336 | piP :: Monad m => Parser m () 337 | piP = do 338 | Parsec.label (pure ()) "PI" 339 | st <- Parsec.getState 340 | case openEnclosures st of 341 | (_ : es) -> 342 | Parsec.notFollowedBy (choice $ tryToCloseMarkP <$> closableMarks es) 343 | <|> Parsec.lookAhead (void unconstrainedP) 344 | [] -> pure () 345 | 346 | -- | Function called after a character of kind gap, and before a character of 347 | -- kind other. 348 | -- 349 | -- It fails if any unconstrained open enclosure can be closed. 350 | sigmaP :: Monad m => Parser m () 351 | sigmaP = do 352 | Parsec.label (pure ()) "SIGMA" 353 | st <- Parsec.getState 354 | Parsec.notFollowedBy $ 355 | choice $ tryToCloseMarkP <$> filter isUnconstrained (openEnclosures st) 356 | 357 | -- | Function called before the opening mark for an unconstrained enclosure 358 | -- (i.e., before a character of kind other) 359 | -- 360 | -- It fails if we're trying to open an already open mark. 361 | -- 362 | -- It takes into account the case that we can open both an arleady open mark and 363 | -- an extension of it (e.g., "@**@" is an extension of "@*@"). This function 364 | -- doesn't fail in this case, and the parser will try to open the extended mark. 365 | phiP :: Monad m => Parser m () 366 | phiP = do 367 | Parsec.label (pure ()) "PHI" 368 | st <- Parsec.getState 369 | Parsec.notFollowedBy (choice $ markP <$> openEnclosures st) 370 | <|> void (Parsec.lookAhead (choice $ markP <$> concatMap extendedMarksOf (openEnclosures st))) 371 | 372 | -- | Function called before the opening mark for a constrained enclosure (i.e., 373 | -- before a character of kind other). 374 | -- 375 | -- It fails if we're trying to open an already open mark. 376 | varphiP :: Monad m => Parser m () 377 | varphiP = do 378 | Parsec.label (pure ()) "VARPHI" 379 | st <- Parsec.getState 380 | Parsec.notFollowedBy $ choice $ markP <$> openEnclosures st 381 | 382 | -- | Function called after a character of kind alphanum or other, and before the 383 | -- closing mark for a constrained enclosure (i.e., before a character of kind 384 | -- other). 385 | -- 386 | -- It fails if we can close an open extended version (i.e., an unconstrained 387 | -- enclosure) of the mark we're trying to close, as closing the unconstrained 388 | -- enclosure takes priority. 389 | omegaP :: Monad m => Parser m () 390 | omegaP = do 391 | Parsec.label (pure ()) "OMEGA" 392 | st <- Parsec.getState 393 | Parsec.notFollowedBy $ 394 | choice $ tryToCloseMarkP <$> filter isUnconstrained (openEnclosures st) 395 | 396 | closableMarks :: [Mark] -> [Mark] 397 | closableMarks ms = x ++ filter isUnconstrained y 398 | where 399 | (x, y) = span isConstrained ms 400 | 401 | tryToCloseMarkP :: Monad m => Mark -> Parser m () 402 | tryToCloseMarkP m = Parsec.try $ do 403 | Parsec.label (pure ()) $ "tryToCloseMarkP: " ++ show m 404 | _ <- markP $ closingMarkOf m 405 | when (isConstrained m) $ do 406 | Parsec.eof <|> void (Parsec.satisfy (not . isAlphaNum)) 407 | 408 | -- EBNF grammar terminal symbols ---------------------------------------------- 409 | 410 | markP :: Monad m => Mark -> Parser m Mark 411 | markP m = m <$ Parsec.string (fromMark m) 412 | 413 | otherP :: Monad m => Parser m Inline 414 | otherP = 415 | Symbol . T.singleton 416 | <$> Parsec.satisfy (\c -> not (isSpace c || isAlphaNum c)) 417 | 418 | -- | It parses as newlines the combinations: 419 | -- 420 | -- * @CR@ 421 | -- * @CR LF@ 422 | -- * @LF@ 423 | -- 424 | -- This is the exact set parsed by @libasciidoc@. At the moment we do not 425 | -- consider the combination @LF CR@ (used in some systems, see 426 | -- https://en.wikipedia.org/wiki/Newline#Representation) as a single newline. 427 | newlineP :: Monad m => Parser m Inline 428 | newlineP = 429 | wrap <$> newlineP' <*> optional Parsec.eof 430 | where 431 | wrap t Nothing = Newline t 432 | wrap t (Just ()) = EndOfInline t 433 | newlineP' :: Monad m => Parser m Text 434 | newlineP' = 435 | (<>) <$> singletonP '\r' <*> option "" (singletonP '\n') 436 | <|> singletonP '\n' 437 | singletonP :: Monad m => Char -> Parser m Text 438 | singletonP c = T.singleton <$> Parsec.char c 439 | 440 | alphaNumP :: Monad m => Parser m Inline 441 | alphaNumP = 442 | AlphaNum . T.pack . NE.toList 443 | <$> Parsec.label (some wordCharP) "A" 444 | where 445 | wordCharP = Parsec.satisfy $ \c -> 446 | isAlphaNum c 447 | 448 | -- Parser for element attribute (aka parameter) lists ------------------------ 449 | 450 | newtype InlineAttributeList = InlineAttributeList Text 451 | deriving newtype (Eq, Show) 452 | deriving stock (Data, Typeable) 453 | 454 | -- | This instance accepts the same kind of attributes than the instance for 455 | -- 'Text.AsciiDoc.Blocks.BlockPrefixItem's, including the shorthand syntax. 456 | -- 457 | -- Attributes @title@ and @opts@/@options@ have currently no meaning for 458 | -- inlines, but they are still parsed and stored in the resulting 'Metadata' 459 | -- value. 460 | -- 461 | -- __Divergence from Asciidoctor__: The aforementioned behavior implies that 462 | -- some inputs produce different results than Asciidoctor. 463 | -- Asciidoctor only honours @role@ and @id@ attributes and messes up the rest. 464 | instance ToMetadata InlineAttributeList UnparsedInline where 465 | toMetadata (InlineAttributeList "") = mempty 466 | toMetadata (InlineAttributeList t) = 467 | case Parsec.parse attributeListP "" t of 468 | Right attributes -> 469 | toMetadata $ PositionedAttribute <$> NE.zip (1 :| [2 ..]) attributes 470 | Left _ -> error "toMetadata @InlineAttributeList: parse should not fail" 471 | 472 | defaultAttributeList :: InlineAttributeList 473 | defaultAttributeList = InlineAttributeList "" 474 | 475 | -- | Accepts an square-bracket-enclosed string with no restrictions on the 476 | -- characters in between, provided that there is at least one such character. 477 | inlineAttributeListP :: Monad m => Parser m InlineAttributeList 478 | inlineAttributeListP = 479 | flip Parsec.label "P" $ 480 | InlineAttributeList . T.pack 481 | <$ Parsec.char '[' <*> someTill Parsec.anyChar (Parsec.char ']') 482 | -------------------------------------------------------------------------------- /src/Text/AsciiDoc/Blocks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE GADTs #-} 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | -- | 6 | -- Module : Text.AsciiDoc.Blocks 7 | -- Copyright : © 2020–present Guillem Marpons 8 | -- License : BSD-3-Clause 9 | -- 10 | -- Maintainer : Guillem Marpons 11 | -- Stability : experimental 12 | -- Portability : portable 13 | -- 14 | -- This module contains Parsec-style parsers for AsciiDoc block elements. 15 | -- 16 | -- It tries to be compatible with Asciidoctor. 17 | module Text.AsciiDoc.Blocks 18 | ( -- * AST types 19 | HeaderLevel, 20 | ListType (..), 21 | ListCheckStatus (..), 22 | NestableBlockType (..), 23 | AdmonitionType (..), 24 | LiteralBlockType (..), 25 | LiteralIndentation (..), 26 | BlockMacroType (..), 27 | MacroArguments (..), 28 | IncludeOptions (..), 29 | AttributeId, 30 | Comment (..), 31 | MetadataItem (..), 32 | BlockPrefixItem (..), 33 | UnparsedBlockPrefix, 34 | Block (..), 35 | DocumentHeader (..), 36 | Document (..), 37 | 38 | -- * Parsers 39 | documentP, 40 | documentHeaderP, 41 | blocksP, 42 | blockP, 43 | blockPrefixP, 44 | attributeEntryP, 45 | blockIdP, 46 | blockAttributeListP, 47 | blockTitleP, 48 | nestableP, 49 | sectionHeaderP, 50 | paragraphP, 51 | danglingBlockPrefixP, 52 | initialBlankLinesP, 53 | blankLineP, 54 | 55 | -- * Parser type 56 | Parser, 57 | State (..), 58 | blockParserInitialState, 59 | 60 | -- * Helper low-level parsers 61 | lineP, 62 | lineP', 63 | lineOneOfP, 64 | lineNoneOfP, 65 | includeP, 66 | openDelimiterP, 67 | closeDelimiterP, 68 | satisfyToken, 69 | ) 70 | where 71 | 72 | import Control.Arrow (Arrow ((&&&))) 73 | import Control.Monad.Combinators hiding 74 | ( endBy1, 75 | sepBy1, 76 | sepEndBy1, 77 | some, 78 | someTill, 79 | ) 80 | import Control.Monad.Combinators.NonEmpty 81 | import Data.Char (isSpace) 82 | import Data.List.NonEmpty (NonEmpty (..), (<|)) 83 | import qualified Data.List.NonEmpty as NE 84 | import qualified Data.Map as Map 85 | import Data.Maybe (catMaybes) 86 | import Data.Semigroup (Last (..)) 87 | import Data.Text (Text) 88 | import qualified Data.Text as T 89 | import Text.AsciiDoc.ElementAttributes 90 | import qualified Text.AsciiDoc.LineParsers as LP 91 | import Text.AsciiDoc.Metadata 92 | import Text.AsciiDoc.SpecialChars 93 | import Text.AsciiDoc.UnparsedInline 94 | import Text.Parsec (()) 95 | import qualified Text.Parsec as Parsec 96 | import Text.Parsec.Char (alphaNum, char, space) 97 | 98 | -- | Greater or equal than 0. 99 | -- A section header prefixed by one single "@=@" has level 0, and one with two 100 | -- "@=@"'s has level 1. 101 | -- This follows Asciidoctor's behavior. 102 | type HeaderLevel = Int 103 | 104 | -- Text: can contain symbols, does not begin nor end with space. 105 | -- Text': can end with spaces. 106 | data ListType 107 | = Description -- PEG: Space* Text Space* "::" (Space+ Text)? Space* 108 | | Ordered -- PEG: Space* "."+ Space+ Text' 109 | | Unordered (Maybe ListCheckStatus) -- PEG: Space* ("*"+ / "-"+ / ... ) Space+ Text' 110 | | -- | Callouts can be conceptualized as belonging to the block they follow 111 | -- from, but Asciidoctor treats them as an independent entity, very similar to 112 | -- any other list. 113 | Callout -- PEG: "<" (Num / ".") ">" Space+ Text' 114 | deriving stock (Eq, Show) 115 | 116 | data ListCheckStatus 117 | = Checked 118 | | Unchecked 119 | deriving stock (Eq, Show) 120 | 121 | data NestableBlockType 122 | = Admonition AdmonitionType 123 | | Example 124 | | Sidebar 125 | | Quote 126 | | -- | Open block (delimited with "--") with non-standard name. 127 | Other Text 128 | deriving stock (Eq, Show) 129 | 130 | data AdmonitionType 131 | = Note 132 | | Tip 133 | | Important 134 | | Caution 135 | | Warning 136 | deriving stock (Eq, Show) 137 | 138 | -- | Literal block types are subject by default to substitution group 139 | -- "Verbatim", if not stated otherwise. The actual substitutions applied can be 140 | -- modified with the @subs@ block attribute, nonetheless. 141 | data LiteralBlockType 142 | = Fenced 143 | | Listing 144 | | Literal LiteralIndentation 145 | | -- | Default substitution group: None (aka Passthrough). 146 | Passthrough 147 | | Source 148 | | -- | Default substitution group: None (aka Passthrough). 149 | Stem 150 | | Verse 151 | deriving stock (Eq, Show) 152 | 153 | -- | The @Int@ is the indentation of the block. If the @Literal@ block is not 154 | -- signaled by indentation (i.e., @....@ or @[literal]@ is used), then 155 | -- indentation is 0 (all preceding space is copied verbatim as content). 156 | newtype LiteralIndentation = LiteralIndentation Int 157 | deriving newtype (Eq, Show) 158 | 159 | data BlockMacroType 160 | = ImageBlockMacro 161 | | TableOfContentsMacro 162 | | CustomBlockMacro 163 | deriving stock (Eq, Show) 164 | 165 | data MacroArguments = MacroArguments 166 | deriving stock (Eq, Show) 167 | 168 | data IncludeOptions 169 | = IncludeOptions 170 | deriving stock (Eq, Show) 171 | 172 | type AttributeId = Text 173 | 174 | data Comment 175 | = LineCommentSequence (NonEmpty Text) 176 | | BlockComment [Text] 177 | deriving stock (Eq, Show) 178 | 179 | -- | A Block can be preceded by an arbitrary (finite) list of @MetadataItem@s. 180 | -- 181 | -- This is a syntactic element. Every value of this type comes from a source 182 | -- line. 183 | data MetadataItem a 184 | = -- | A block can have more than one ID (aka anchor), and all of them can be 185 | -- used in cross-references. 186 | BlockId Text 187 | | -- | A block can be preceded by any number of @BlockTitle@s (aka labels). 188 | -- Only the last one is semantically relevant. 189 | BlockTitle a 190 | | -- | A block can be preceded by any number of @BlockAttributeList@s. For 191 | -- positional arguments, only the last list is taken into account. 192 | -- 193 | -- Some of the elements of the list can be name-value pairs. 194 | -- 195 | -- TODO. Check if some attributes in the list can contain full inlines, as 196 | -- it's the case with standalone (aka attribute entry) attributes. 197 | BlockAttributeList Text 198 | deriving stock (Eq, Show, Functor) 199 | 200 | instance ToMetadata (MetadataItem UnparsedInline) UnparsedInline where 201 | toMetadata (BlockId i) = mempty {metadataIds = [i]} 202 | toMetadata (BlockTitle t) = mempty {metadataTitle = Just $ Last t} 203 | toMetadata (BlockAttributeList "") = mempty 204 | toMetadata (BlockAttributeList t) = 205 | case Parsec.parse attributeListP "" t of 206 | Right attributes -> 207 | toMetadata $ PositionedAttribute <$> NE.zip (1 :| [2 ..]) attributes 208 | Left _ -> error "toMetadata @(MetadataItem UnparsedInline): parse should not fail" 209 | 210 | data BlockPrefixItem a 211 | = MetadataItem (MetadataItem a) 212 | | -- | A value of @Nothing@ means the attribute has been unset. 213 | AttributeEntry AttributeId (Maybe a) 214 | | Comment Comment 215 | deriving stock (Eq, Show, Functor) 216 | 217 | instance ToMetadata (BlockPrefixItem UnparsedInline) UnparsedInline where 218 | toMetadata (MetadataItem x) = toMetadata x 219 | toMetadata (AttributeEntry _ _) = mempty 220 | toMetadata (Comment _) = mempty 221 | 222 | type UnparsedBlockPrefix = [BlockPrefixItem UnparsedInline] 223 | 224 | -- | A Block consists, syntactically, of one or more contiguous and complete 225 | -- lines of text. 226 | -- Some block types can contain other blocks. 227 | data Block a 228 | = -- | Regular paragraph. 229 | Paragraph UnparsedBlockPrefix a 230 | | -- | This data constructor is not used during parsing, it requires an 231 | -- additional "nesting" pass. 232 | -- 233 | -- There can be a @Section@ inside an, e.g., open block, but it needs to 234 | -- have style @discrete@. 235 | Section UnparsedBlockPrefix HeaderLevel a [Block a] 236 | | -- | A section header contains the same information as a section, except the 237 | -- contained sequence of blocks. 238 | -- 239 | -- After the "nesting" pass, all @SectionHeader@s but @discrete@ ones are 240 | -- converted to proper @Section@s. 241 | SectionHeader UnparsedBlockPrefix HeaderLevel a 242 | | List ListType UnparsedBlockPrefix (NonEmpty (NonEmpty (Block a))) 243 | | {- Table -- TODO. Many things here -} 244 | {- ThematicBreak UnparsedBlockPrefix -} 245 | {- PageBreak UnparsedBlockPrefix -} 246 | 247 | -- | Sequence of blocks of some defined type that allows nested blocks 248 | -- inside (i.e. admonition, sidebar, example, quote, and open block with no 249 | -- other standard type). 250 | Nestable NestableBlockType UnparsedBlockPrefix [Block a] 251 | | {- VerseBlock UnparsedBlockPrefix [a] -} 252 | {- -- | Block type determines substitution group applied: @Verbatim@ or @None@ 253 | -- (aka passthrough). 254 | -- 255 | -- TODO: Check that designed pipeline guarantees that pre-processor 256 | -- directives are expanded (if not escaped) even in literal blocks, as 257 | -- https://asciidoctor.org/docs/user-manual/#include-processing states. 258 | LiteralBlock LiteralBlockType UnparsedBlockPrefix [Text] -} 259 | 260 | {- -- | Some macros accept block metadata, as e.g. @toc::[]@, that accepts 261 | -- defining its title with @.TITLE@ syntax. 262 | BlockMacro BlockMacroType UnparsedBlockPrefix MacroArguments -} 263 | DanglingBlockPrefix UnparsedBlockPrefix 264 | deriving stock (Eq, Show, Functor) 265 | 266 | data DocumentHeader a 267 | = DocumentHeader UnparsedBlockPrefix HeaderLevel a 268 | deriving stock (Eq, Show, Functor) 269 | 270 | data Document a 271 | = Document (Maybe (DocumentHeader a)) [Block a] 272 | deriving stock (Eq, Show, Functor) 273 | 274 | -- Document { 275 | -- docPrefix :: UnparsedBlockPrefix, 276 | -- docTitle :: a, 277 | -- docBlocks :: [Block UnparsedInline] 278 | -- } 279 | 280 | -- | Custom parser state for the parser for 'Block's. 281 | data State = State 282 | { -- | A stack of open 'Nestable' blocks. 283 | -- Innermost element is the top of the stack. 284 | -- 285 | -- For every nestable block we store: 286 | -- 287 | -- * The syntactic 'DelimiterChar' used to open the block. 288 | -- This is what we need to recognize the matching closing delimiter. 289 | -- * A stack of list item markers previously used in the current (possibly 290 | -- nested, aka multi-level, list). 291 | -- If the parser position is not currently on a list, the stack is empty. 292 | -- 293 | -- The list representing the stack of open nestable blocks is non-empty: at 294 | -- the bottom of the stack there is always a value representing the 295 | -- top-level document (defined in 'State's @Monoid@ instance), so a 296 | -- one-element stack indicates no nestable block has been open. 297 | openBlocks :: NonEmpty (Marker DelimiterChar, [Marker ListChar]), 298 | -- | An environment mapping attribute names to their values (i.e. inlines). 299 | env :: Map.Map AttributeId Text 300 | } 301 | deriving stock (Eq, Show) 302 | 303 | blockParserInitialState :: State 304 | blockParserInitialState = 305 | State 306 | { -- We use @'*' :* 0@ as an arbitrary value that is always present as the 307 | -- bottom of the stack. 308 | openBlocks = (AsteriskD :* 0, []) :| [], 309 | env = mempty 310 | } 311 | 312 | type Parser m = Parsec.ParsecT [Text] State m 313 | 314 | documentP :: Monad m => Parser m (Document UnparsedInline) 315 | documentP = 316 | Document 317 | <$ option () includeP 318 | <* initialBlankLinesP 319 | <*> optional (Parsec.try documentHeaderP "document header") 320 | <* many blankLineP 321 | <*> blocksP 322 | <* Parsec.eof 323 | 324 | documentHeaderP :: Monad m => Parser m (DocumentHeader UnparsedInline) 325 | documentHeaderP = do 326 | prefix <- option [] (NE.toList <$> blockPrefixP) 327 | (level, i) <- rawSectionHeaderP 328 | pure $ DocumentHeader prefix level i 329 | 330 | blocksP :: Monad m => Parser m [Block UnparsedInline] 331 | blocksP = many (blockP []) "blocks" 332 | 333 | blockP :: Monad m => [LP.LineParser Text] -> Parser m (Block UnparsedInline) 334 | blockP extraParagraphFinalizers = do 335 | prefix <- option [] (NE.toList <$> blockPrefixP) 336 | blockP' prefix "block" 337 | where 338 | blockP' prefix = 339 | (nestableP prefix "nestable") 340 | <|> (sectionHeaderP prefix "section header") 341 | <|> (listP prefix "list") 342 | <|> (paragraphP prefix extraParagraphFinalizers "paragraph") 343 | <|> (danglingBlockPrefixP prefix "dangling block prefix") 344 | 345 | blockPrefixP :: Monad m => Parser m (NonEmpty (BlockPrefixItem UnparsedInline)) 346 | blockPrefixP = some pBlockPrefixItem "block prefix" 347 | where 348 | pBlockPrefixItem = 349 | Comment <$> blockCommentP 350 | <|> Comment <$> lineCommentSequenceP 351 | <|> attributeEntryP 352 | <|> blockIdP 353 | <|> blockAttributeListP 354 | <|> blockTitleP 355 | 356 | blockCommentP :: Monad m => Parser m Comment 357 | blockCommentP = do 358 | _ :* n <- choice $ fmap lineP' $ LP.runOfN 4 [SlashC] 359 | -- We use here an alternative version of lineP, called lineP', that does not 360 | -- try to handle pre-processor directives, as includes have no effect inside 361 | -- block comments. 362 | ts <- 363 | manyTill (lineP' LP.anyRemainder) $ 364 | eitherP (lineP' (LP.count n SlashC)) Parsec.eof 365 | option () includeP 366 | _ <- many blankLineP 367 | pure $ BlockComment ts 368 | {-# ANN blockCommentP ("HLint: ignore" :: String) #-} 369 | 370 | lineCommentSequenceP :: Monad m => Parser m Comment 371 | lineCommentSequenceP = 372 | LineCommentSequence <$> some lineCommentP <* many blankLineP 373 | 374 | -- | Parses a line starting with *exactly* two '/'s. 375 | lineCommentP :: Monad m => Parser m Text 376 | lineCommentP = 377 | lineP (LP.string "//" *> Parsec.notFollowedBy (char '/') *> LP.anyRemainder) 378 | 379 | -- TODO. Add attribute continuations. 380 | attributeEntryP :: Monad m => Parser m (BlockPrefixItem UnparsedInline) 381 | attributeEntryP = attributeEntryP' <* many blankLineP 382 | where 383 | attributeEntryP' = do 384 | (k, v) <- 385 | lineP 386 | ( (,) <$ LP.char ':' <*> LP.some alphaNum 387 | <* LP.char ':' 388 | <* LP.some space <*> LP.anyRemainder 389 | ) 390 | Parsec.modifyState $ \st -> st {env = Map.insert k v (env st)} 391 | pure $ AttributeEntry k $ Just (MarkupLine v :| []) 392 | 393 | blockIdP :: Monad m => Parser m (BlockPrefixItem a) 394 | blockIdP = blockIdP' <* many blankLineP 395 | where 396 | blockIdP' = MetadataItem . BlockId <$> lineP LP.blockId 397 | 398 | blockAttributeListP :: Monad m => Parser m (BlockPrefixItem a) 399 | blockAttributeListP = blockAttributeListP' <* many blankLineP 400 | where 401 | blockAttributeListP' = 402 | MetadataItem . BlockAttributeList 403 | <$> lineP LP.blockAttributeList 404 | 405 | blockTitleP :: Monad m => Parser m (BlockPrefixItem UnparsedInline) 406 | blockTitleP = blockTitleP' <* many blankLineP 407 | where 408 | blockTitleP' = 409 | MetadataItem . BlockTitle . (:| []) . MarkupLine 410 | <$> lineP (LP.char '.' *> (LP.satisfy (not . isSpace) <> LP.anyRemainder)) 411 | 412 | -- | Parses a nestable delimited block. 413 | nestableP :: 414 | Monad m => 415 | UnparsedBlockPrefix -> 416 | Parser m (Block UnparsedInline) 417 | nestableP prefix = do 418 | c <- openDelimiterP [AsteriskD, EqualsSignD] 419 | bs <- manyTill (blockP []) $ eitherP closeDelimiterP Parsec.eof 420 | _ <- many blankLineP 421 | pure $ case c of 422 | AsteriskD -> Nestable Sidebar prefix bs 423 | HyphenD -> error "nestableP: HyphenD case not implemented yet" 424 | EqualsSignD -> Nestable Example prefix bs 425 | 426 | -- | Parses a section header and computes its level. 427 | -- 428 | -- __POST-CONDITION__: The computed level is greater or equal than 0. 429 | sectionHeaderP :: 430 | Monad m => 431 | UnparsedBlockPrefix -> 432 | Parser m (Block UnparsedInline) 433 | sectionHeaderP prefix = do 434 | -- Post-condition above follows from the fact that 'LP.runOfN 1' can only 435 | -- return texts of length >= 1. 436 | -- TODO. Use type-level Nat in 'Marker', so post-condition can be checked by 437 | -- the compiler. 438 | state <- Parsec.getState 439 | case (NE.tail (openBlocks state), style) of 440 | -- If parser is currently inside a nestable block (tail state.openBlocks is 441 | -- not null), and the section header we're trying to parse has a style 442 | -- different from "discrete", this parser must fail (and the text be 443 | -- considered a regular paragraph). 444 | (_ : _, Nothing) -> empty 445 | (_ : _, Just (Last t)) | t /= "discrete" -> empty 446 | -- In any other case: parse as a section header. 447 | _ -> do 448 | (level, text) <- rawSectionHeaderP 449 | _ <- many blankLineP 450 | pure $ SectionHeader prefix level text 451 | where 452 | style = metadataStyle $ toMetadata @_ @UnparsedInline $ prefix 453 | 454 | rawSectionHeaderP :: Monad m => Parser m (HeaderLevel, UnparsedInline) 455 | rawSectionHeaderP = 456 | (\(_c :* n, x) -> (n - 1, MarkupLine x :| [])) 457 | <$> lineP 458 | ( (,) 459 | <$> choice (LP.runOfN 1 [EqualsSignH]) <* some space 460 | <*> (LP.satisfy (not . isSpace) <> LP.anyRemainder) 461 | ) 462 | 463 | listP :: 464 | (Monad m) => 465 | UnparsedBlockPrefix -> 466 | Parser m (Block UnparsedInline) 467 | listP prefix = 468 | listP' prefix <* many blankLineP 469 | where 470 | allUnorderedMarkers = LP.runOfN 1 [AsteriskL, HyphenL] 471 | listP' prefix' = do 472 | state <- Parsec.getState 473 | let allowedMarkers = allUnorderedMarkers 474 | -- Disallow as markers those markers already in use in the current 475 | -- list tree of the innermost open block 476 | disallowedMarkers = snd currentBlock 477 | (currentBlock, otherBlocks) = NE.head &&& NE.tail $ openBlocks state 478 | -- Accept item with a new marker 479 | (marker@(c :* n), firstLine) <- 480 | itemFirstLineP allowedMarkers disallowedMarkers 481 | -- Add new marker to the state 482 | Parsec.setState $ 483 | state 484 | { openBlocks = 485 | (fst currentBlock, marker : disallowedMarkers) :| otherBlocks 486 | } 487 | -- Complete the first item, using the already parsed first line 488 | firstItem <- 489 | itemP firstLine 490 | "first item " <> T.unpack (fromMarker marker) 491 | -- Accept items with the same marker of the first item 492 | nextItems <- 493 | many 494 | ( itemFirstLineP [LP.count n c] [] 495 | >>= itemP . snd "item " <> T.unpack (fromMarker marker) 496 | ) 497 | -- Recover state present at the beginning of the function. Functions like 498 | -- pItem could have modified it. 499 | Parsec.setState state 500 | pure $ List (Unordered Nothing) prefix' (firstItem :| nextItems) 501 | itemFirstLineP = 502 | \x -> lineP . itemFirstLine x 503 | itemFirstLine :: 504 | [LP.LineParser (Marker ListChar)] -> 505 | [Marker ListChar] -> 506 | LP.LineParser (Marker ListChar, Text) 507 | itemFirstLine allowedMarkers disallowedMarkers = do 508 | _ <- many space 509 | marker <- choice allowedMarkers 510 | if marker `elem` disallowedMarkers 511 | then empty 512 | else do 513 | _ <- some space 514 | remainder <- LP.satisfy (not . isSpace) <> LP.anyRemainder 515 | pure (marker, remainder) 516 | itemP firstLine = do 517 | -- As we are inside a list, any list marker is a finalizer of the current 518 | -- item (no blank line needed) 519 | nextLines <- 520 | many $ 521 | paragraphContinuationP [snd <$> itemFirstLine allUnorderedMarkers []] 522 | nextBlocks <- 523 | option 524 | [] 525 | ( ((: []) <$> sublistP "sublist") 526 | <|> catMaybes <$> many (listContinuationP "list continuation") 527 | "next blocks" 528 | ) 529 | _ <- many blankLineP 530 | pure $ Paragraph [] (MarkupLine firstLine :| nextLines) :| nextBlocks 531 | -- __Divergence DVB001 from Asciidoctor__. Before sublist: 532 | -- 533 | -- * Full prefix (including attributes and block title) is allowed. 534 | -- 535 | -- * Any number of blank lines is allowed. 536 | -- 537 | -- Probably a linter should warn against any block prefix not preceded by 538 | -- blank lines. 539 | sublistP = Parsec.try $ do 540 | _ <- many blankLineP 541 | prefix' <- option [] (NE.toList <$> blockPrefixP) 542 | listP prefix' 543 | -- __Divergence DVB002 from Asciidoctor__: As in classic AsciiDoc, no blank 544 | -- lines are allowed before the @+@ sign. 545 | listContinuationP :: Monad m => Parser m (Maybe (Block UnparsedInline)) 546 | listContinuationP = 547 | lineP (LP.char '+') 548 | *> optional blankLineP 549 | *> optional (blockP [snd <$> itemFirstLine allUnorderedMarkers []]) 550 | 551 | paragraphP :: 552 | Monad m => 553 | UnparsedBlockPrefix -> 554 | [LP.LineParser Text] -> 555 | Parser m (Block UnparsedInline) 556 | paragraphP prefix extraFinalizers = 557 | Paragraph prefix <$> paragraphP' <* many blankLineP 558 | where 559 | paragraphP' = 560 | (:|) <$> firstP <*> many (paragraphContinuationP extraFinalizers "paragraph continuation") 561 | firstP :: Monad m => Parser m InputLine 562 | firstP = 563 | MarkupLine 564 | <$> lineNoneOfP 565 | -- Nestable 566 | ( (fmap fromMarker <$> LP.runOfN 4 [AsteriskD, EqualsSignD]) 567 | <> [ 568 | -- Blank line 569 | pure "" 570 | ] 571 | ) 572 | 573 | -- Line comments (but not block comments!) can be contained in a paragraph. 574 | paragraphContinuationP :: Monad m => [LP.LineParser Text] -> Parser m InputLine 575 | paragraphContinuationP extraFinalizers = 576 | CommentLine <$> lineCommentP 577 | <|> MarkupLine 578 | <$> lineNoneOfP 579 | ( fmap Parsec.try extraFinalizers 580 | -- Nestable 581 | <> (fmap fromMarker <$> LP.runOfN 4 [AsteriskD, EqualsSignD]) 582 | -- BlockComment 583 | <> (fmap fromMarker <$> LP.runOfN 4 [SlashC]) 584 | <> [ 585 | -- BlockId, starts with "[[" 586 | Parsec.try LP.blockId, 587 | -- BlockAttributeList, starts with "[" 588 | "" <$ LP.blockAttributeList, 589 | -- New block introducer, '+' 590 | Parsec.try (LP.char '+'), 591 | -- BlankLine 592 | pure "" 593 | ] 594 | ) 595 | 596 | danglingBlockPrefixP :: 597 | Monad m => 598 | UnparsedBlockPrefix -> 599 | Parser m (Block UnparsedInline) 600 | danglingBlockPrefixP [] = empty 601 | danglingBlockPrefixP prefix = 602 | DanglingBlockPrefix prefix 603 | <$ Parsec.lookAhead (closeDelimiterP <|> Parsec.eof) 604 | 605 | initialBlankLinesP :: Monad m => Parser m [Text] 606 | initialBlankLinesP = many blankLineP 607 | 608 | blankLineP :: Monad m => Parser m Text 609 | blankLineP = lineP $ pure "" 610 | 611 | -- | Argument can be a parser for the beginning of the line. Function checks 612 | -- that the part of the line not parsed is whitespace. 613 | -- 614 | -- If the line is parsed successfully, this combinator checks if an include line 615 | -- follows. If that is the case it inserts the corresponding lines into the 616 | -- input stream of the parser. 617 | lineP :: Monad m => LP.LineParser a -> Parser m a 618 | lineP p = do 619 | result <- lineP' p 620 | option () includeP 621 | pure result 622 | 623 | -- | A version of 'lineP' that does not check if the line is followed by an 624 | -- include. 625 | lineP' :: Monad m => LP.LineParser a -> Parser m a 626 | lineP' p = satisfyToken $ 627 | \t -> f $ Parsec.parse (p <* many space <* Parsec.eof) "" t 628 | where 629 | f (Right l) = Just l 630 | f (Left _) = Nothing 631 | 632 | -- | @lineOneOfP ps@ accepts any line that consists in syntax described by any 633 | -- parser in @ps@ plus optional space characters. 634 | -- 635 | -- This function runs parsers in @ps@ in sequence, with no lookahead. This means 636 | -- that the order in which parsers appear in @ps@ is relevant, and that 637 | -- 'Parsec.try' could be needed in some elements of @ps@ if their recognized 638 | -- languages share some prefix. 639 | -- 640 | -- If blank lines need to be accepted, add @pure ""@ as the last element of 641 | -- @ps@. 642 | lineOneOfP :: Monad m => [LP.LineParser a] -> Parser m a 643 | lineOneOfP parsers = do 644 | result <- lineOneOfP' 645 | option () includeP 646 | pure result 647 | where 648 | lineOneOfP' = satisfyToken $ 649 | \t -> 650 | f $ 651 | Parsec.parse (choice parsers <* many space <* Parsec.eof) "" t 652 | f (Right l) = Just l 653 | f (Left _) = Nothing 654 | 655 | -- | @lineNoneOfP ps@ accepts any line that does not consist in syntax described 656 | -- by any parser in @ps@ plus optional space characters. 657 | -- 658 | -- This function runs parsers in @ps@ in sequence, with no lookahead. This means 659 | -- that the order in which parsers appear in @ps@ is relevant, and that 660 | -- 'Parsec.try' could be needed in some elements of @ps@ if their recognized 661 | -- languages share some prefix. 662 | -- 663 | -- If blank lines need to excluded from acceptance, add @pure ""@ as the last 664 | -- element of @ps@. 665 | lineNoneOfP :: Monad m => [LP.LineParser a] -> Parser m Text 666 | lineNoneOfP parsers = do 667 | result <- lineNoneOfP' 668 | option () includeP 669 | pure result 670 | where 671 | lineNoneOfP' = satisfyToken $ 672 | \t -> 673 | f t $ 674 | Parsec.parse (choice parsers <* many space <* Parsec.eof) "" t 675 | f _ (Right _) = Nothing 676 | f t (Left _) = Just t 677 | 678 | includeP :: Parser m () 679 | includeP = empty 680 | 681 | -- includeP = do 682 | -- (filename, arguments) <- 683 | -- lineP' $ 684 | -- (,) 685 | -- <$ LP.string "include::" 686 | -- <*> LP.many (satisfy (/= '[')) <* char '[' 687 | -- <*> LP.many (satisfy (/= ']')) <* char ']' 688 | -- current <- Parsec.getInput 689 | -- -- TODO. Read actual file content, this is a stub. 690 | -- Parsec.setInput $ ["// (STUB) include::" <> filename <> "[" <> arguments <> "]"] <> current 691 | -- -- Recursive call to handle the case in which the first line of the included 692 | -- -- file is also an include. 693 | -- option () includeP 694 | 695 | openDelimiterP :: 696 | Monad m => 697 | [SpecialChar DelimiterChar] -> 698 | Parser m (SpecialChar DelimiterChar) 699 | openDelimiterP cs = do 700 | -- Parsec.lookAhead needed here because in case we fail later on (because the 701 | -- block is already open) we don't want to consume any input. 702 | (c :* n) <- Parsec.lookAhead $ Parsec.try $ lineOneOfP (LP.runOfN 4 cs) 703 | st <- Parsec.getState 704 | -- If block is already open (the delimiter is in the stack of open blocks), 705 | -- we're not opening it again, but fail. In case we don't fail, we consume the 706 | -- line that was looked ahead above. 707 | if (c :* n) `elem` (fst <$> openBlocks st) 708 | then empty 709 | else 710 | ( do 711 | -- Add found delimiter to the stack of open blocks 712 | Parsec.putState (st {openBlocks = (c :* n, []) <| openBlocks st}) 713 | -- Complete consumption of the token (aka one line of input), and 714 | -- following blanklines 715 | _ <- lineP LP.anyRemainder 716 | _ <- many blankLineP 717 | pure c 718 | ) 719 | 720 | closeDelimiterP :: Monad m => Parser m () 721 | closeDelimiterP = do 722 | st <- Parsec.getState 723 | let (c :* n, _) = NE.head (openBlocks st) 724 | case NE.tail (openBlocks st) of 725 | -- In presence of DanglingBlockPrefix'es, we can try to pop from an 726 | -- openBlocks stack that contains the initial open block only. We do nothing 727 | -- in this case. 728 | [] -> pure () 729 | b : bs -> do 730 | -- If c :* n found in openBlocks stack, pop one element. Only consume line 731 | -- from input (and look for includes) if the found delimiter matches 732 | -- openBlocks' top. 733 | _ <- 734 | lineP (LP.count n c) 735 | <|> Parsec.lookAhead 736 | ( choice $ 737 | fmap (\(c' :* n', _) -> lineP' (LP.count n' c')) (b : bs) 738 | ) 739 | Parsec.putState $ st {openBlocks = b :| bs} 740 | 741 | -- TODO: Add name to source positions (possibly storing current filename when an 742 | -- inline arrives). 743 | -- 744 | -- TODO: Fix line numbering in the presence of includes. 745 | satisfyToken :: Monad m => (Text -> Maybe a) -> Parser m a 746 | satisfyToken matcher = Parsec.tokenPrim show updatePos matcher 747 | where 748 | updatePos :: Parsec.SourcePos -> Text -> [Text] -> Parsec.SourcePos 749 | updatePos pos _ _ = Parsec.incSourceLine pos 1 750 | {-# ANN satisfyToken ("HLint: ignore" :: String) #-} 751 | -------------------------------------------------------------------------------- /test/Tests/Inlines.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Tests.Inlines 4 | ( inlineUnitTests, 5 | ) 6 | where 7 | 8 | import Data.Either (isLeft) 9 | import Data.Functor.Identity (Identity) 10 | import Data.List.NonEmpty (NonEmpty (..)) 11 | import Data.Map.Internal (Identity (runIdentity)) 12 | import Data.Text (Text) 13 | import Test.Hspec.Expectations.Pretty (shouldBe) 14 | import Test.Tasty (TestTree, testGroup) 15 | import Test.Tasty.HUnit (assertBool, assertFailure, testCase) 16 | import Text.AsciiDoc.Inlines 17 | import qualified Text.Parsec as Parsec 18 | 19 | parseInline :: Text -> IO Inline 20 | parseInline t = 21 | case parseTest inlinesP t of 22 | Right result -> pure result 23 | Left parseError -> assertFailure $ "Parser fails: " <> show parseError 24 | 25 | parseTest :: Parser Identity a -> Text -> Either Parsec.ParseError a 26 | parseTest parser t = 27 | runIdentity $ Parsec.runParserT parser inlineParserInitialState "" t 28 | 29 | inlineUnitTests :: TestTree 30 | inlineUnitTests = 31 | testGroup 32 | "inline unit tests" 33 | [ simpleInlineTests, 34 | boldInlineTests, 35 | unconstrainedStylingTests, 36 | mixedFormattingStyleTests, 37 | enclosureInlineAttributeListTests, 38 | punctuationSymbolTests 39 | ] 40 | 41 | simpleInlineTests :: TestTree 42 | simpleInlineTests = 43 | testGroup 44 | "simple inlines" 45 | [ testCase "single-line, no formatting marks" $ do 46 | i <- parseInline "some words with no format" 47 | i `shouldBe` InlineSeq (AlphaNum "some" :| [Space " ", AlphaNum "words", Space " ", AlphaNum "with", Space " ", AlphaNum "no", Space " ", AlphaNum "format"]), 48 | testCase "Space at the beginning" $ 49 | assertBool "Parser doesn't fail" $ 50 | isLeft $ 51 | parseTest inlinesP " some words preceded by space", 52 | testCase "no formatting marks with space at the end" $ do 53 | i <- parseInline "some words with no format " 54 | i `shouldBe` InlineSeq (AlphaNum "some" :| [Space " ", AlphaNum "words", Space " ", AlphaNum "with", Space " ", AlphaNum "no", Space " ", AlphaNum "format", Space " "]) 55 | ] 56 | 57 | boldInlineTests :: TestTree 58 | boldInlineTests = 59 | testGroup 60 | "bold inlines" 61 | [ testCase "single-line, bold string" $ do 62 | i <- parseInline "*a sentence all in strong*" 63 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "sentence", Space " ", AlphaNum "all", Space " ", AlphaNum "in", Space " ", AlphaNum "strong"]) "*" :| []), 64 | testCase "single-line, bold string with space at the end" $ do 65 | i <- parseInline "*a sentence all in strong* " 66 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "sentence", Space " ", AlphaNum "all", Space " ", AlphaNum "in", Space " ", AlphaNum "strong"]) "*" :| [Space " "]), 67 | testCase "a word in bold in the middle" $ do 68 | i <- parseInline "a *few* words" 69 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", StyledText Bold defaultAttributeList "*" (AlphaNum "few" :| []) "*", Space " ", AlphaNum "words"]), 70 | testCase "two words in bold at the beginning" $ do 71 | i <- parseInline "*a few* words" 72 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "few"]) "*" :| [Space " ", AlphaNum "words"]), 73 | testCase "two words in bold at the end" $ do 74 | i <- parseInline "a *few words*" 75 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", StyledText Bold defaultAttributeList "*" (AlphaNum "few" :| [Space " ", AlphaNum "words"]) "*"]), 76 | testCase "bad bold ending with closing mark after space" $ do 77 | i <- parseInline "*a few *" 78 | i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", AlphaNum "few", Space " ", Symbol "*"]), 79 | testCase "bad bold ending with closing mark after space and before word" $ do 80 | i <- parseInline "*a *few words" 81 | i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words"]), 82 | testCase "asterisk in the middle of bold phrase" $ do 83 | i <- parseInline "*a *few words*" 84 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words"]) "*" :| []), 85 | testCase "single asterisk in phrase" $ do 86 | i <- parseInline "a *few words" 87 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words"]), 88 | testCase "single asterisk in phrase with space at the end" $ do 89 | i <- parseInline "a *few words " 90 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words", Space " "]), 91 | testCase "single asterisk in the middle of a word" $ do 92 | i <- parseInline "a f*ew words" 93 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", AlphaNum "f", Symbol "*", AlphaNum "ew", Space " ", AlphaNum "words"]), 94 | testCase "an asterisk in the middle of a word in strong phrase" $ do 95 | i <- parseInline "*a f*ew* words" 96 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "f", Symbol "*", AlphaNum "ew"]) "*" :| [Space " ", AlphaNum "words"]), 97 | testCase "single asterisk followed by word and space" $ do 98 | i <- parseInline "*a " 99 | i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " "]) 100 | ] 101 | 102 | unconstrainedStylingTests :: TestTree 103 | unconstrainedStylingTests = 104 | testGroup 105 | "unconstrained formatting marks" 106 | [ testCase "nesting unconstrained inside constrained, with no space" $ do 107 | i <- parseInline "#a##b##c#" 108 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", AlphaNum "c"]) "#" :| []), 109 | testCase "unpaired opening mark before correctly closed unconstrained pair" $ do 110 | i <- parseInline "#a##b##" 111 | i `shouldBe` InlineSeq (Symbol "#" :| [AlphaNum "a", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##"]), 112 | -- Divergence from Asciidoctor. 113 | testCase "(## #a ##b## c# ##)" $ do 114 | i <- parseInline "## #a ##b## c# ##" 115 | -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", Space " ", AlphaNum "c"]) "#", Space " "]) "##" :| []) 116 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [Symbol "#", AlphaNum "a", Space " "]) "##" :| [AlphaNum "b", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "c", Symbol "#", Space " "]) "##"]) 117 | i `shouldBe` result, 118 | -- libasciidoc fails test 119 | testCase "unpaired opening mark directly inside unconstrained pair" $ do 120 | i <- parseInline "## #a b ##" 121 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [Symbol "#", AlphaNum "a", Space " ", AlphaNum "b", Space " "]) "##" :| []) 122 | i `shouldBe` result, 123 | -- Divergence DVI001 from Asciidoctor. 124 | testCase "three opening marks, and three closing marks, with no space" $ do 125 | i <- parseInline "###a###" 126 | -- Asciidoctor: 127 | -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#" :| []) "##" :| []) 128 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [AlphaNum "a"]) "##" :| [Symbol "#"]) 129 | i `shouldBe` result, 130 | -- Divergence DVI001 from Asciidoctor. 131 | testCase "three opening marks, and three closing marks, two words and no space" $ do 132 | i <- parseInline "###a b###" 133 | -- Asciidoctor: 134 | -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", AlphaNum "b"]) "#" :| []) "##" :| []) 135 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [AlphaNum "a", Space " ", AlphaNum "b"]) "##" :| [Symbol "#"]) 136 | i `shouldBe` result, 137 | -- Divergence from Asciidoctor. 138 | -- TODO. This one is different from DVI001. Asciidoctor's outputs is just 139 | -- wrong. It looks like it works because it doesn't respect the nesting 140 | -- rule. 141 | testCase "three opening marks, and three closing marks, two words and no space" $ do 142 | -- Asciidoctor: 143 | -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "#" (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", AlphaNum "b", Space " "]) "##" :| []) "#" :| []) 144 | i <- parseInline "###a b ###" 145 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [AlphaNum "a", Space " ", AlphaNum "b", Space " "]) "##" :| [Symbol "#"]) 146 | i `shouldBe` result, 147 | testCase "unconstrained styled word" $ do 148 | i <- parseInline "##a##" 149 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| []) "##" :| []), 150 | testCase "unconstrained formatting pair with inner space on the left" $ do 151 | i <- parseInline "## a##" 152 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "a"]) "##" :| []), 153 | testCase "unconstrained formatting pair with inner space on the right" $ do 154 | i <- parseInline "##a ##" 155 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " "]) "##" :| []), 156 | testCase "unconstrained formatting pair with inner space on both sides" $ do 157 | i <- parseInline "## a ##" 158 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "a", Space " "]) "##" :| []), 159 | testCase "unbalanced marks, one missing on the left" $ do 160 | i <- parseInline "##a#" 161 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (Symbol "#" :| [AlphaNum "a"]) "#" :| []), 162 | testCase "unbalanced marks, one missing on the right" $ do 163 | i <- parseInline "#a##" 164 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#" :| [Symbol "#"]), 165 | testCase "nesting constrained directly inside unconstrained, with space" $ do 166 | i <- parseInline "## #a# ##" 167 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#", Space " "]) "##" :| []), 168 | testCase "nesting unconstrained inside constrained, with spaces everywhere" $ do 169 | i <- parseInline "#a ## b ## c#" 170 | let result = InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "b", Space " "]) "##", Space " ", AlphaNum "c"]) "#" :| []) 171 | i `shouldBe` result, 172 | testCase "bad nesting: constrained directly inside constrained" $ do 173 | i <- parseInline "#a #b# c#" 174 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "#", AlphaNum "b"]) "#" :| [Space " ", AlphaNum "c", Symbol "#"]), 175 | testCase "two unconstrained pairs, false nesting" $ do 176 | i <- parseInline "##a ##b## c##" 177 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " "]) "##" :| [AlphaNum "b", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "c"]) "##"]), 178 | testCase "unpaired opening mark inside unconstrained pair" $ do 179 | i <- parseInline "##a #b ##" 180 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", Symbol "#", AlphaNum "b", Space " "]) "##" :| []) 181 | i `shouldBe` result, 182 | testCase "unpaired closing mark inside unconstrained pair" $ do 183 | i <- parseInline "##a b# ##" 184 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", AlphaNum "b", Symbol "#", Space " "]) "##" :| []), 185 | testCase "unpaired mark between space inside unconstrained pair" $ do 186 | i <- parseInline "##a # ##" 187 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", Symbol "#", Space " "]) "##" :| []), 188 | testCase "double mark ending constrained enclosure" $ do 189 | i <- parseInline "#a ## b#" 190 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "#"]) "#" :| [Space " ", AlphaNum "b", Symbol "#"]), 191 | testCase "nesting constrained inside unconstrained, with spaces on both sides" $ do 192 | i <- parseInline "##a #b# c##" 193 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "#" (AlphaNum "b" :| []) "#", Space " ", AlphaNum "c"]) "##" :| []), 194 | testCase "nesting unconstrained inside constrained, with spaces on both sides" $ do 195 | i <- parseInline "#a ##b## c#" 196 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", Space " ", AlphaNum "c"]) "#" :| []), 197 | testCase "unpaired opening mark inside constrained pair" $ do 198 | i <- parseInline "#a ##b c#" 199 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "#", Symbol "#", AlphaNum "b", Space " ", AlphaNum "c"]) "#" :| []), 200 | testCase "unpaired opening mark after correcty closed pair and some noise" $ do 201 | i <- parseInline "##a##b# ##" 202 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| []) "##" :| [AlphaNum "b", Symbol "#", Space " ", Symbol "#", Symbol "#"]), 203 | testCase "three-mark sequence" $ do 204 | i <- parseInline "###" 205 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (Symbol "#" :| []) "#" :| []), 206 | testCase "four-mark sequence" $ do 207 | i <- parseInline "####" 208 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (Symbol "#" :| []) "#" :| [Symbol "#"]), 209 | testCase "five-mark sequence" $ do 210 | i <- parseInline "#####" 211 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| []) "##" :| []) 212 | i `shouldBe` result, 213 | -- Divergence DVI002 from Asciidoctor: Asciidoctor accepts nested 214 | -- enclosures with empty inline inside. 215 | testCase "six-mark sequence" $ do 216 | i <- parseInline "######" 217 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| []) "##" :| [Symbol "#"]) 218 | i `shouldBe` result, 219 | testCase "unconstrained enclosure with a single space" $ do 220 | i <- parseInline "## ##" 221 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| []) "##" :| []), 222 | testCase "two three-mark sequences separated by space" $ do 223 | i <- parseInline "### ###" 224 | -- Asciidoctor seems inconsistent w.r.t "###a###" because in this case 225 | -- it flips U and C. 226 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [Space " "]) "##" :| [Symbol "#"]) 227 | i `shouldBe` result, 228 | -- Divergence DVI003 from Asciidoctor: Asciidoctor doesn't parse "...#b 229 | -- c#" as an enclosure, but it does if the mark of both enclosures is 230 | -- different. We consider the behavior here more uniform. 231 | testCase "two constrained enclosures with the same mark an no space in between" $ do 232 | i <- parseInline "#a##b c#" 233 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#" :| [StyledText Custom defaultAttributeList "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) 234 | ] 235 | 236 | mixedFormattingStyleTests :: TestTree 237 | mixedFormattingStyleTests = 238 | testGroup 239 | "mixed formatting styles" 240 | [ testCase "constrained monospace inside italics, inside bold, no space" $ do 241 | i <- parseInline "*_`a`_*" 242 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (StyledText Italic defaultAttributeList "_" (StyledText Monospace defaultAttributeList "`" (AlphaNum "a" :| []) "`" :| []) "_" :| []) "*" :| []), 243 | testCase "constrained monospace inside unconstrained italics, inside bold, no space" $ do 244 | i <- parseInline "*__`a`__*" 245 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (StyledText Italic defaultAttributeList "__" (StyledText Monospace defaultAttributeList "`" (AlphaNum "a" :| []) "`" :| []) "__" :| []) "*" :| []), 246 | testCase "constrained monospace inside unconstrained italics, inside bold, some space" $ do 247 | i <- parseInline "*a __ `b` __c*" 248 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", StyledText Italic defaultAttributeList "__" (Space " " :| [StyledText Monospace defaultAttributeList "`" (AlphaNum "b" :| []) "`", Space " "]) "__", AlphaNum "c"]) "*" :| []), 249 | testCase "constrained italics interrupted inside custom" $ do 250 | i <- parseInline "#a _b#" 251 | i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "#" :| []), 252 | -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule. 253 | testCase "(## _a ##b## c_ ##)" $ do 254 | i <- parseInline "## _a ##b## c_ ##" 255 | -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [StyledText Italic defaultAttributeList "_" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", Space " ", AlphaNum "c"]) "_", Space " "]) "##" :| []) 256 | let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [Symbol "_", AlphaNum "a", Space " "]) "##" :| [AlphaNum "b", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "c", Symbol "_", Space " "]) "##"]) 257 | i `shouldBe` result, 258 | testCase "(__ #a##b##c# __)" $ do 259 | i <- parseInline "__ #a##b##c# __" 260 | let result = InlineSeq (StyledText Italic (InlineAttributeList "") "__" (Space " " :| [StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| [StyledText Custom (InlineAttributeList "") "##" (AlphaNum "b" :| []) "##", AlphaNum "c"]) "#", Space " "]) "__" :| []) 261 | i `shouldBe` result, 262 | testCase "(#a *##b##*#)" $ do 263 | i <- parseInline "#a *##b##*#" 264 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| [Space " ", StyledText Bold (InlineAttributeList "") "*" (StyledText Custom (InlineAttributeList "") "##" (AlphaNum "b" :| []) "##" :| []) "*"]) "#" :| []) 265 | i `shouldBe` result, 266 | -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule. 267 | testCase "(*a _b* c_)" $ do 268 | i <- parseInline "*a _b* c_" 269 | -- let alt_result = InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic defaultAttributeList "_" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "_"]) 270 | let result = InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "*" :| [Space " ", AlphaNum "c", Symbol "_"]) 271 | i `shouldBe` result, 272 | -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule. 273 | testCase "(**a __b** c__)" $ do 274 | i <- parseInline "**a __b** c__" 275 | -- let alt_result = InlineSeq (Symbol "*" :| [Symbol "*", AlphaNum "a", Space " ", StyledText Italic defaultAttributeList "__" (AlphaNum "b" :| [Symbol "*", Symbol "*", Space " ", AlphaNum "c"]) "__"]) 276 | let result = InlineSeq (StyledText Bold defaultAttributeList "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_", Symbol "_"]) 277 | i `shouldBe` result, 278 | -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule. 279 | testCase "(**a _b** c_)" $ do 280 | i <- parseInline "**a _b** c_" 281 | i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "**" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_"]), 282 | -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule. 283 | testCase "(*a __b* c__)" $ do 284 | i <- parseInline "*a __b* c__" 285 | i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic defaultAttributeList "__" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "__"]), 286 | testCase "(#*a _b* c_#)" $ do 287 | i <- parseInline "#*a _b* c_#" 288 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (StyledText Bold (InlineAttributeList "") "*" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "*" :| [Space " ", AlphaNum "c", Symbol "_"]) "#" :| []) 289 | i `shouldBe` result, 290 | testCase "(#**a __b** c__#)" $ do 291 | i <- parseInline "#**a __b** c__#" 292 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_", Symbol "_"]) "#" :| []) 293 | i `shouldBe` result, 294 | testCase "(#**a _b** c_#)" $ do 295 | i <- parseInline "#**a _b** c_#" 296 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_"]) "#" :| []) 297 | i `shouldBe` result, 298 | testCase "(#*a __b* c__#)" $ do 299 | i <- parseInline "#*a __b* c__#" 300 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic (InlineAttributeList "") "__" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "__"]) "#" :| []) 301 | i `shouldBe` result, 302 | testCase "(##*a _b* c_##)" $ do 303 | i <- parseInline "##*a _b* c_##" 304 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "*" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "*" :| [Space " ", AlphaNum "c", Symbol "_"]) "##" :| []) 305 | i `shouldBe` result, 306 | testCase "(##**a __b** c__##)" $ do 307 | i <- parseInline "##**a __b** c__##" 308 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_", Symbol "_"]) "##" :| []) 309 | i `shouldBe` result, 310 | testCase "(##**a _b** c_##)" $ do 311 | i <- parseInline "##**a _b** c_##" 312 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_"]) "##" :| []) 313 | i `shouldBe` result, 314 | testCase "(##*a __b* c__##)" $ do 315 | i <- parseInline "##*a __b* c__##" 316 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic (InlineAttributeList "") "__" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "__"]) "##" :| []) 317 | i `shouldBe` result, 318 | testCase "(##*a _#b c#* d_##)" $ do 319 | i <- parseInline "##*a _#b c#* d_##" 320 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "*" (AlphaNum "a" :| [Space " ", Symbol "_", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) "*" :| [Space " ", AlphaNum "d", Symbol "_"]) "##" :| []) 321 | i `shouldBe` result, 322 | testCase "(##**a __#b c#** d__##)" $ do 323 | i <- parseInline "##**a __#b c#** d__##" 324 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) "**" :| [Space " ", AlphaNum "d", Symbol "_", Symbol "_"]) "##" :| []) 325 | i `shouldBe` result, 326 | testCase "(##**a _#b c#** d_##)" $ do 327 | i <- parseInline "##**a _#b c#** d_##" 328 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) "**" :| [Space " ", AlphaNum "d", Symbol "_"]) "##" :| []) 329 | i `shouldBe` result, 330 | testCase "(##*a __#b c#* d__##)" $ do 331 | i <- parseInline "##*a __#b c#* d__##" 332 | let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic (InlineAttributeList "") "__" (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#" :| [Symbol "*", Space " ", AlphaNum "d"]) "__"]) "##" :| []) 333 | i `shouldBe` result 334 | ] 335 | 336 | enclosureInlineAttributeListTests :: TestTree 337 | enclosureInlineAttributeListTests = 338 | testGroup 339 | "parameter lists for enclosures" 340 | [ testCase "simple parameter list for constrained enclosure" $ do 341 | i <- parseInline "[underline]#a#" 342 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "underline") "#" (AlphaNum "a" :| []) "#" :| []), 343 | testCase "simple parameter list for unconstrained enclosure" $ do 344 | i <- parseInline "[underline]##a##" 345 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "underline") "##" (AlphaNum "a" :| []) "##" :| []), 346 | testCase "parameter list for unconstrained enclosure preceded by word" $ do 347 | i <- parseInline "a[underline]##b##" 348 | i `shouldBe` InlineSeq (AlphaNum "a" :| [StyledText Custom (InlineAttributeList "underline") "##" (AlphaNum "b" :| []) "##"]), 349 | testCase "parameter list for unconstrained enclosure followed by word" $ do 350 | i <- parseInline "[underline]##a##b" 351 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "underline") "##" (AlphaNum "a" :| []) "##" :| [AlphaNum "b"]), 352 | testCase "failed parameter list for constrained enclosure preceded by word" $ do 353 | i <- parseInline "a[underline]#b#" 354 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Symbol "[", AlphaNum "underline", Symbol "]", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| []) "#"]), 355 | testCase "failed parameter list for constrained enclosure followed by word" $ do 356 | i <- parseInline "[underline]#a#b" 357 | i `shouldBe` InlineSeq (Symbol "[" :| [AlphaNum "underline", Symbol "]", Symbol "#", AlphaNum "a", Symbol "#", AlphaNum "b"]), 358 | testCase "unfinished (failed) parameter list" $ do 359 | i <- parseInline "[underline" 360 | i `shouldBe` InlineSeq (Symbol "[" :| [AlphaNum "underline"]), 361 | testCase "isolated (failed) parameter list" $ do 362 | i <- parseInline "[underline] #a#" 363 | i `shouldBe` InlineSeq (Symbol "[" :| [AlphaNum "underline", Symbol "]", Space " ", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#"]) 364 | ] 365 | 366 | punctuationSymbolTests :: TestTree 367 | punctuationSymbolTests = 368 | testGroup 369 | "punctuation symbols" 370 | [ testCase "(a.) sentence finalizer" $ do 371 | i <- parseInline "a." 372 | i `shouldBe` InlineSeq (AlphaNum "a" :| [Symbol "."]), 373 | testCase "(#a.#) sentence finalizer at end of constrained enclosure" $ do 374 | i <- parseInline "#a.#" 375 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| [Symbol "."]) "#" :| []), 376 | testCase "(##a.##) sentence finalizer at end of unconstrained enclosure" $ do 377 | i <- parseInline "##a.##" 378 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "##" (AlphaNum "a" :| [Symbol "."]) "##" :| []), 379 | testCase "(*#a#, b*) comma after enclosure, nested inside another enclosure" $ do 380 | i <- parseInline "*#a#, b*" 381 | i `shouldBe` InlineSeq (StyledText Bold (InlineAttributeList "") "*" (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#" :| [Symbol ",", Space " ", AlphaNum "b"]) "*" :| []), 382 | testCase "(#a#,#b#) punctuation symbol separating two enclosures" $ do 383 | i <- parseInline "#a#,#b#" 384 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#" :| [Symbol ",", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| []) "#"]), 385 | testCase "(#¿a?#) question marks inside constrained enclosure" $ do 386 | i <- parseInline "#¿a?#" 387 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "#" (Symbol "¿" :| [AlphaNum "a", Symbol "?"]) "#" :| []), 388 | testCase "(##¿a?##) question marks inside unconstrained enclosure" $ do 389 | i <- parseInline "##¿a?##" 390 | i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "##" (Symbol "¿" :| [AlphaNum "a", Symbol "?"]) "##" :| []), 391 | testCase "(¿#a#?) question marks outside constrained enclosure" $ do 392 | i <- parseInline "¿#a#?" 393 | i `shouldBe` InlineSeq (Symbol "¿" :| [StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#", Symbol "?"]), 394 | testCase "(¿##a##?) question marks outside unconstrained enclosure" $ do 395 | i <- parseInline "¿##a##?" 396 | i `shouldBe` InlineSeq (Symbol "¿" :| [StyledText Custom (InlineAttributeList "") "##" (AlphaNum "a" :| []) "##", Symbol "?"]) 397 | ] 398 | -------------------------------------------------------------------------------- /test/Tests/Blocks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Tests.Blocks 4 | ( parseTest, 5 | blockUnitTests, 6 | ) 7 | where 8 | 9 | import Data.Functor.Identity (Identity (runIdentity)) 10 | import Data.List.NonEmpty (NonEmpty ((:|))) 11 | import Data.Semigroup (Last (..)) 12 | import Data.Text (Text) 13 | import Test.Hspec.Expectations.Pretty (shouldBe) 14 | import Test.Tasty (TestTree, testGroup) 15 | import Test.Tasty.HUnit (assertFailure, testCase) 16 | import Text.AsciiDoc.Blocks 17 | import Text.AsciiDoc.Metadata 18 | import Text.AsciiDoc.UnparsedInline 19 | import qualified Text.Parsec as Parsec 20 | 21 | parseDocument :: [Text] -> IO (Document UnparsedInline) 22 | parseDocument t = case parseTest documentP t of 23 | Right x -> pure x 24 | Left parseError -> assertFailure $ "Parser fails: " <> show parseError 25 | 26 | parseBlocks :: [Text] -> IO [Block UnparsedInline] 27 | parseBlocks t = case parseTest blocksP t of 28 | Right x -> pure x 29 | Left parseError -> assertFailure $ "Parser fails: " <> show parseError 30 | 31 | parseTest :: Parser Identity a -> [Text] -> Either Parsec.ParseError a 32 | parseTest parser tokens = 33 | runIdentity $ Parsec.runParserT parser blockParserInitialState "" tokens 34 | 35 | blockUnitTests :: TestTree 36 | blockUnitTests = 37 | testGroup 38 | "block unit tests" 39 | [ documentUnitTests, 40 | blockCornerCaseUnitTests, 41 | paragraphUnitTests, 42 | sectionHeaderUnitTests, 43 | danglingBlockPrefixUnitTests, 44 | nestableUnitTests, 45 | unorderedListUnitTests, 46 | nestedListsUnitTests, 47 | listContinuationUnitTests, 48 | commentUnitTests 49 | ] 50 | 51 | documentUnitTests :: TestTree 52 | documentUnitTests = 53 | testGroup 54 | "document unit tests" 55 | [ testCase "empty document" $ do 56 | p <- 57 | parseDocument 58 | [] 59 | p `shouldBe` Document Nothing [], 60 | testCase "single document title" $ do 61 | p <- 62 | parseDocument 63 | [ "= Foo" 64 | ] 65 | p 66 | `shouldBe` Document 67 | (Just (DocumentHeader [] 0 (MarkupLine "Foo" :| []))) 68 | [], 69 | testCase "single document title with prefix" $ do 70 | p <- 71 | parseDocument 72 | [ "[[Foo]]", 73 | "= Bar" 74 | ] 75 | p 76 | `shouldBe` Document 77 | (Just (DocumentHeader [MetadataItem (BlockId "Foo")] 0 (MarkupLine "Bar" :| []))) 78 | [], 79 | testCase "document title with prefix followed by paragraph" $ do 80 | p <- 81 | parseDocument 82 | [ "[[Foo]]", 83 | "= Bar", 84 | "Baz" 85 | ] 86 | p 87 | `shouldBe` Document 88 | (Just (DocumentHeader [MetadataItem (BlockId "Foo")] 0 (MarkupLine "Bar" :| []))) 89 | [Paragraph [] (MarkupLine "Baz" :| [])], 90 | testCase "document title followed by empty line and paragraph" $ do 91 | p <- 92 | parseDocument 93 | [ "= Foo", 94 | "", 95 | "Bar" 96 | ] 97 | p 98 | `shouldBe` Document 99 | (Just (DocumentHeader [] 0 (MarkupLine "Foo" :| []))) 100 | [Paragraph [] (MarkupLine "Bar" :| [])], 101 | testCase "document title with level 1 followed by paragraph" $ do 102 | p <- 103 | parseDocument 104 | [ "== Foo", 105 | "Bar" 106 | ] 107 | p 108 | `shouldBe` Document 109 | (Just (DocumentHeader [] 1 (MarkupLine "Foo" :| []))) 110 | [Paragraph [] (MarkupLine "Bar" :| [])], 111 | testCase "document with no title and a paragraph" $ do 112 | p <- 113 | parseDocument 114 | [ "Foo" 115 | ] 116 | p 117 | `shouldBe` Document 118 | Nothing 119 | [Paragraph [] (MarkupLine "Foo" :| [])], 120 | testCase "single dangling block prefix" $ do 121 | p <- parseDocument ["[[Foo]]"] 122 | p 123 | `shouldBe` Document Nothing [DanglingBlockPrefix [MetadataItem (BlockId "Foo")]], 124 | testCase "dangling block prefix at eof and after empty line" $ do 125 | p <- 126 | parseDocument 127 | [ "", 128 | "[[Foo]]" 129 | ] 130 | p `shouldBe` Document Nothing [DanglingBlockPrefix [MetadataItem (BlockId "Foo")]] 131 | ] 132 | 133 | blockCornerCaseUnitTests :: TestTree 134 | blockCornerCaseUnitTests = 135 | testGroup 136 | "block corner case unit tests" 137 | [ testCase "empty block list" $ do 138 | p <- 139 | parseBlocks 140 | [] 141 | p `shouldBe` [], 142 | testCase "single empty line" $ do 143 | p <- 144 | parseBlocks 145 | [ "" 146 | ] 147 | p `shouldBe` [] 148 | ] 149 | 150 | paragraphUnitTests :: TestTree 151 | paragraphUnitTests = 152 | testGroup 153 | "paragraph unit tests" 154 | [ testCase "one line paragraph" $ do 155 | p <- 156 | parseBlocks 157 | [ "Foo" 158 | ] 159 | p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [])], 160 | testCase "two lines paragraph" $ do 161 | p <- 162 | parseBlocks 163 | [ "Foo", 164 | "Bar" 165 | ] 166 | p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [MarkupLine "Bar"])], 167 | testCase "paragraph followed by empty line" $ do 168 | p <- 169 | parseBlocks 170 | [ "Foo", 171 | "" 172 | ] 173 | p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [])], 174 | testCase "paragraph with indented following lines" $ do 175 | p <- 176 | parseBlocks 177 | [ "Foo", 178 | " Bar", 179 | " Baz" 180 | ] 181 | p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [MarkupLine " Bar", MarkupLine " Baz"])], 182 | testCase "two paragraphs" $ do 183 | p <- 184 | parseBlocks 185 | [ "Foo", 186 | "", 187 | "Bar" 188 | ] 189 | p 190 | `shouldBe` [ Paragraph [] (MarkupLine "Foo" :| []), 191 | Paragraph [] (MarkupLine "Bar" :| []) 192 | ], 193 | testCase "paragraph with block prefix" $ do 194 | p <- 195 | parseBlocks 196 | [ ".Foo", 197 | "// Comment", 198 | "[Foo#Bar%Baz]", 199 | "Foo" 200 | ] 201 | p 202 | `shouldBe` [ Paragraph 203 | [ MetadataItem (BlockTitle (MarkupLine "Foo" :| [])), 204 | Comment (LineCommentSequence (" Comment" :| [])), 205 | MetadataItem (BlockAttributeList "Foo#Bar%Baz") 206 | ] 207 | (MarkupLine "Foo" :| []) 208 | ], 209 | testCase "paragraph with block prefix containing empty lines" $ do 210 | p <- 211 | parseBlocks 212 | [ ".Foo", 213 | "", 214 | "[Foo#Bar%Baz]", 215 | "", 216 | "Foo" 217 | ] 218 | p 219 | `shouldBe` [ Paragraph 220 | [ MetadataItem (BlockTitle (MarkupLine "Foo" :| [])), 221 | MetadataItem (BlockAttributeList "Foo#Bar%Baz") 222 | ] 223 | (MarkupLine "Foo" :| []) 224 | ], 225 | testCase "paragraph followed by dangling block prefix" $ do 226 | p <- 227 | parseBlocks 228 | [ "Foo", 229 | "", 230 | ".Foo" 231 | ] 232 | p 233 | `shouldBe` [ Paragraph 234 | [] 235 | (MarkupLine "Foo" :| []), 236 | DanglingBlockPrefix 237 | [ MetadataItem (BlockTitle (MarkupLine "Foo" :| [])) 238 | ] 239 | ], 240 | testCase "paragraph with second line resembling block title" $ do 241 | p <- 242 | parseBlocks 243 | [ "Foo", 244 | ".Bar" 245 | ] 246 | p 247 | `shouldBe` [ Paragraph 248 | [] 249 | (MarkupLine "Foo" :| [MarkupLine ".Bar"]) 250 | ] 251 | ] 252 | 253 | sectionHeaderUnitTests :: TestTree 254 | sectionHeaderUnitTests = 255 | testGroup 256 | "section header unit tests" 257 | [ testCase "level 0 section header" $ do 258 | p <- 259 | parseBlocks 260 | [ "= Foo" 261 | ] 262 | p 263 | `shouldBe` [ SectionHeader 264 | [] 265 | 0 266 | (MarkupLine "Foo" :| []) 267 | ], 268 | testCase "level 1 section header" $ do 269 | p <- 270 | parseBlocks 271 | [ "== Foo" 272 | ] 273 | p 274 | `shouldBe` [ SectionHeader 275 | [] 276 | 1 277 | (MarkupLine "Foo" :| []) 278 | ], 279 | testCase "level 2 section header" $ do 280 | p <- 281 | parseBlocks 282 | [ "=== Foo" 283 | ] 284 | p 285 | `shouldBe` [ SectionHeader 286 | [] 287 | 2 288 | (MarkupLine "Foo" :| []) 289 | ], 290 | testCase "section header with two words" $ do 291 | p <- 292 | parseBlocks 293 | [ "= Foo bar" 294 | ] 295 | p 296 | `shouldBe` [ SectionHeader 297 | [] 298 | 0 299 | (MarkupLine "Foo bar" :| []) 300 | ], 301 | testCase "section header beginning with space" $ do 302 | p <- 303 | parseBlocks 304 | [ "= Foo" 305 | ] 306 | p 307 | `shouldBe` [ SectionHeader 308 | [] 309 | 0 310 | (MarkupLine "Foo" :| []) 311 | ], 312 | testCase "section header followed by paragraph" $ do 313 | p <- 314 | parseBlocks 315 | [ "= Foo", 316 | "Bar" 317 | ] 318 | p 319 | `shouldBe` [ SectionHeader 320 | [] 321 | 0 322 | (MarkupLine "Foo" :| []), 323 | Paragraph [] (MarkupLine "Bar" :| []) 324 | ], 325 | testCase "section header followed by empty line and paragraph" $ do 326 | p <- 327 | parseBlocks 328 | [ "= Foo", 329 | "", 330 | "Bar" 331 | ] 332 | p 333 | `shouldBe` [ SectionHeader 334 | [] 335 | 0 336 | (MarkupLine "Foo" :| []), 337 | Paragraph [] (MarkupLine "Bar" :| []) 338 | ], 339 | testCase "section header with block prefix" $ do 340 | p <- 341 | parseBlocks 342 | [ ".Foo", 343 | "= Foo" 344 | ] 345 | p 346 | `shouldBe` [ SectionHeader 347 | [MetadataItem (BlockTitle (MarkupLine "Foo" :| []))] 348 | 0 349 | (MarkupLine "Foo" :| []) 350 | ], 351 | testCase "section header followed by paragraph with block prefix" $ do 352 | p <- 353 | parseBlocks 354 | [ "= Foo", 355 | ".Bar", 356 | "Bar" 357 | ] 358 | p 359 | `shouldBe` [ SectionHeader 360 | [] 361 | 0 362 | (MarkupLine "Foo" :| []), 363 | Paragraph 364 | [MetadataItem (BlockTitle (MarkupLine "Bar" :| []))] 365 | (MarkupLine "Bar" :| []) 366 | ], 367 | testCase "discrete section header" $ do 368 | p <- 369 | parseBlocks 370 | [ "[discrete]", 371 | "= Foo" 372 | ] 373 | p 374 | `shouldBe` [ SectionHeader 375 | [MetadataItem (BlockAttributeList "discrete")] 376 | 0 377 | (MarkupLine "Foo" :| []) 378 | ] 379 | case p of 380 | (SectionHeader prefix _ _) : _ -> 381 | toMetadata prefix 382 | `shouldBe` (mempty @(Metadata UnparsedInline)) 383 | { metadataStyle = 384 | Just 385 | ( Last 386 | { getLast = "discrete" 387 | } 388 | ) 389 | } 390 | _ -> error "test case: discrete section header", 391 | -- TODO. Must change when indented literal paragraphs are implemented. 392 | testCase "false section header (space before '=')" $ do 393 | p <- 394 | parseBlocks 395 | [ " = Foo" 396 | ] 397 | p `shouldBe` [Paragraph [] (MarkupLine " = Foo" :| [])] 398 | ] 399 | 400 | danglingBlockPrefixUnitTests :: TestTree 401 | danglingBlockPrefixUnitTests = 402 | testGroup 403 | "dangling block prefix unit tests" 404 | [ testCase "dangling block prefix at eof and after paragraph" $ do 405 | p <- 406 | parseBlocks 407 | [ "Foo", 408 | "[[Foo]]" 409 | ] 410 | p 411 | `shouldBe` [ Paragraph 412 | [] 413 | (MarkupLine "Foo" :| []), 414 | DanglingBlockPrefix [MetadataItem (BlockId "Foo")] 415 | ], 416 | testCase "dangling block prefix at end of example block" $ do 417 | p <- 418 | parseBlocks 419 | [ "====", 420 | "Foo", 421 | "", 422 | "[[Bar]]", 423 | "====" 424 | ] 425 | p 426 | `shouldBe` [ Nestable 427 | Example 428 | [] 429 | [ Paragraph [] (MarkupLine "Foo" :| []), 430 | DanglingBlockPrefix [MetadataItem (BlockId "Bar")] 431 | ] 432 | ] 433 | ] 434 | 435 | nestableUnitTests :: TestTree 436 | nestableUnitTests = 437 | testGroup 438 | "nestable block unit tests" 439 | [ testCase "simple example block" $ do 440 | p <- 441 | parseBlocks 442 | [ "====", 443 | "Foo", 444 | "====" 445 | ] 446 | p 447 | `shouldBe` [ Nestable 448 | Example 449 | [] 450 | [Paragraph [] (MarkupLine "Foo" :| [])] 451 | ], 452 | testCase "simple sidebar block" $ do 453 | p <- 454 | parseBlocks 455 | [ "****", 456 | "Foo", 457 | "****" 458 | ] 459 | p 460 | `shouldBe` [ Nestable 461 | Sidebar 462 | [] 463 | [Paragraph [] (MarkupLine "Foo" :| [])] 464 | ], 465 | testCase "example block containing two paragraphs" $ do 466 | p <- 467 | parseBlocks 468 | [ "====", 469 | "Foo", 470 | "", 471 | "Bar", 472 | "====" 473 | ] 474 | p 475 | `shouldBe` [ Nestable 476 | Example 477 | [] 478 | [ Paragraph [] (MarkupLine "Foo" :| []), 479 | Paragraph [] (MarkupLine "Bar" :| []) 480 | ] 481 | ], 482 | testCase "example block with block title" $ do 483 | p <- 484 | parseBlocks 485 | [ ".Foo", 486 | "====", 487 | "Bar", 488 | "====" 489 | ] 490 | p 491 | `shouldBe` [ Nestable 492 | Example 493 | [MetadataItem (BlockTitle (MarkupLine "Foo" :| []))] 494 | [Paragraph [] (MarkupLine "Bar" :| [])] 495 | ], 496 | testCase "sidebar nested into example block" $ do 497 | p <- 498 | parseBlocks 499 | [ "====", 500 | "****", 501 | "Bar", 502 | "****", 503 | "====" 504 | ] 505 | p 506 | `shouldBe` [ Nestable 507 | Example 508 | [] 509 | [ Nestable 510 | Sidebar 511 | [] 512 | [ Paragraph [] (MarkupLine "Bar" :| []) 513 | ] 514 | ] 515 | ], 516 | testCase "sidebar nested into example block and following paragraph" $ do 517 | p <- 518 | parseBlocks 519 | [ "====", 520 | "Foo", 521 | "****", 522 | "Bar", 523 | "****", 524 | "====" 525 | ] 526 | p 527 | `shouldBe` [ Nestable 528 | Example 529 | [] 530 | [ Paragraph [] (MarkupLine "Foo" :| []), 531 | Nestable 532 | Sidebar 533 | [] 534 | [ Paragraph [] (MarkupLine "Bar" :| []) 535 | ] 536 | ] 537 | ], 538 | testCase "sidebar nested into example block and following empty line" $ do 539 | p <- 540 | parseBlocks 541 | [ "====", 542 | "", 543 | "****", 544 | "Bar", 545 | "****", 546 | "====" 547 | ] 548 | p 549 | `shouldBe` [ Nestable 550 | Example 551 | [] 552 | [ Nestable 553 | Sidebar 554 | [] 555 | [ Paragraph [] (MarkupLine "Bar" :| []) 556 | ] 557 | ] 558 | ], 559 | testCase "example block nested into example block and following paragraph" $ do 560 | p <- 561 | parseBlocks 562 | [ "====", 563 | "Foo", 564 | "======", 565 | "Bar", 566 | "======", 567 | "====" 568 | ] 569 | p 570 | `shouldBe` [ Nestable 571 | Example 572 | [] 573 | [ Paragraph [] (MarkupLine "Foo" :| []), 574 | Nestable 575 | Example 576 | [] 577 | [ Paragraph [] (MarkupLine "Bar" :| []) 578 | ] 579 | ] 580 | ], 581 | testCase "non-closed sidebar nested into example block" $ do 582 | p <- 583 | parseBlocks 584 | [ "====", 585 | "Foo", 586 | "****", 587 | "Bar", 588 | "====" 589 | ] 590 | p 591 | `shouldBe` [ Nestable 592 | Example 593 | [] 594 | [ Paragraph [] (MarkupLine "Foo" :| []), 595 | Nestable 596 | Sidebar 597 | [] 598 | [ Paragraph [] (MarkupLine "Bar" :| []) 599 | ] 600 | ] 601 | ], 602 | testCase "non-closed example block nested into example block" $ do 603 | p <- 604 | parseBlocks 605 | [ "====", 606 | "Foo", 607 | "======", 608 | "Bar", 609 | "====" 610 | ] 611 | p 612 | `shouldBe` [ Nestable 613 | Example 614 | [] 615 | [ Paragraph [] (MarkupLine "Foo" :| []), 616 | Nestable 617 | Example 618 | [] 619 | [ Paragraph [] (MarkupLine "Bar" :| []) 620 | ] 621 | ] 622 | ] 623 | ] 624 | 625 | unorderedListUnitTests :: TestTree 626 | unorderedListUnitTests = 627 | testGroup 628 | "unordered list unit tests" 629 | [ testCase "simple unordered list using '-' (hyphen)" $ do 630 | p <- 631 | parseBlocks 632 | [ "- Foo", 633 | "- Bar", 634 | "- Baz" 635 | ] 636 | p 637 | `shouldBe` [ List 638 | (Unordered Nothing) 639 | [] 640 | ( (Paragraph [] (MarkupLine "Foo" :| []) :| []) 641 | :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [], 642 | Paragraph [] (MarkupLine "Baz" :| []) :| [] 643 | ] 644 | ) 645 | ], 646 | testCase "simple unordered list using '*' (asterisk)" $ do 647 | p <- 648 | parseBlocks 649 | [ "* Foo", 650 | "* Bar", 651 | "* Baz" 652 | ] 653 | p 654 | `shouldBe` [ List 655 | (Unordered Nothing) 656 | [] 657 | ( (Paragraph [] (MarkupLine "Foo" :| []) :| []) 658 | :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [], 659 | Paragraph [] (MarkupLine "Baz" :| []) :| [] 660 | ] 661 | ) 662 | ], 663 | testCase "unordered list with irregular indentation" $ do 664 | p <- 665 | parseBlocks 666 | [ "* Foo", 667 | " * Bar", 668 | " * Baz" 669 | ] 670 | p 671 | `shouldBe` [ List 672 | (Unordered Nothing) 673 | [] 674 | ( (Paragraph [] (MarkupLine "Foo" :| []) :| []) 675 | :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [], 676 | Paragraph [] (MarkupLine "Baz" :| []) :| [] 677 | ] 678 | ) 679 | ], 680 | testCase "unordered list with indented first item" $ do 681 | p <- 682 | parseBlocks 683 | [ "Foo", 684 | "", 685 | " * Bar", 686 | "* Baz" 687 | ] 688 | p 689 | `shouldBe` [ Paragraph [] (MarkupLine "Foo" :| []), 690 | List 691 | (Unordered Nothing) 692 | [] 693 | ( (Paragraph [] (MarkupLine "Bar" :| []) :| []) 694 | :| [ Paragraph [] (MarkupLine "Baz" :| []) :| [] 695 | ] 696 | ) 697 | ], 698 | testCase "unordered list with irregular line spacing" $ do 699 | p <- 700 | parseBlocks 701 | [ "* Foo", 702 | "", 703 | "", 704 | "* Bar", 705 | "", 706 | "* Baz" 707 | ] 708 | p 709 | `shouldBe` [ List 710 | (Unordered Nothing) 711 | [] 712 | ( (Paragraph [] (MarkupLine "Foo" :| []) :| []) 713 | :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [], 714 | Paragraph [] (MarkupLine "Baz" :| []) :| [] 715 | ] 716 | ) 717 | ], 718 | testCase "unordered list with multi-line paragraphs" $ do 719 | p <- 720 | parseBlocks 721 | [ "* Foo", 722 | "Bar", 723 | "* Baz", 724 | " Qux" 725 | ] 726 | p 727 | `shouldBe` [ List 728 | (Unordered Nothing) 729 | [] 730 | ( (Paragraph [] (MarkupLine "Foo" :| [MarkupLine "Bar"]) :| []) 731 | :| [Paragraph [] (MarkupLine "Baz" :| [MarkupLine " Qux"]) :| []] 732 | ) 733 | ], 734 | testCase "unordered list item with literal second paragraph" $ do 735 | p <- 736 | parseBlocks 737 | [ "* Foo", 738 | "", 739 | " Bar" 740 | ] 741 | p 742 | `shouldBe` [ List 743 | (Unordered Nothing) 744 | [] 745 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 746 | -- TODO. Add when indented literal paragraphs are 747 | -- implemented: 748 | -- 749 | -- :| [QUOTED [] (MarkupLine "Bar" :| [])] 750 | :| [] 751 | ) 752 | :| [] 753 | ), 754 | -- TODO. Remove when indented literal paragraphs are 755 | -- implemented: 756 | Paragraph [] (MarkupLine " Bar" :| []) 757 | ], 758 | testCase "two unordered lists separated by a paragraph" $ do 759 | p <- 760 | parseBlocks 761 | [ "* Foo", 762 | "", 763 | "Bar", 764 | "", 765 | "* Baz", 766 | "* Qux" 767 | ] 768 | p 769 | `shouldBe` [ List 770 | (Unordered Nothing) 771 | [] 772 | ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []), 773 | Paragraph [] (MarkupLine "Bar" :| []), 774 | List 775 | (Unordered Nothing) 776 | [] 777 | ( (Paragraph [] (MarkupLine "Baz" :| []) :| []) 778 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 779 | ) 780 | ], 781 | testCase "unordered list followed up by consecutive example block" $ do 782 | p <- 783 | parseBlocks 784 | [ "* Foo", 785 | "====", 786 | "Bar", 787 | "====" 788 | ] 789 | p 790 | `shouldBe` [ List 791 | (Unordered Nothing) 792 | [] 793 | ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []), 794 | Nestable 795 | Example 796 | [] 797 | [Paragraph [] (MarkupLine "Bar" :| [])] 798 | ] 799 | ] 800 | 801 | nestedListsUnitTests :: TestTree 802 | nestedListsUnitTests = 803 | testGroup 804 | "nested lists unit tests" 805 | [ testCase "unordered list using '-' (hyphen) nested into list using '*' (asterisk)" $ do 806 | p <- 807 | parseBlocks 808 | [ "* Foo", 809 | "- Bar", 810 | "- Baz", 811 | "* Qux" 812 | ] 813 | p 814 | `shouldBe` [ List 815 | (Unordered Nothing) 816 | [] 817 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 818 | :| [ List 819 | (Unordered Nothing) 820 | [] 821 | ( (Paragraph [] (MarkupLine "Bar" :| []) :| []) 822 | :| [Paragraph [] (MarkupLine "Baz" :| []) :| []] 823 | ) 824 | ] 825 | ) 826 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 827 | ) 828 | ], 829 | testCase "nested unordered lists using increasing number of '*' (asterisk)" $ do 830 | p <- 831 | parseBlocks 832 | [ "* Foo", 833 | "** Bar", 834 | "*** Baz", 835 | "* Qux" 836 | ] 837 | p 838 | `shouldBe` [ List 839 | (Unordered Nothing) 840 | [] 841 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 842 | :| [ List 843 | (Unordered Nothing) 844 | [] 845 | ( ( Paragraph [] (MarkupLine "Bar" :| []) 846 | :| [ List 847 | (Unordered Nothing) 848 | [] 849 | ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| []) 850 | ] 851 | ) 852 | :| [] 853 | ) 854 | ] 855 | ) 856 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 857 | ) 858 | ], 859 | testCase "nested unordered lists with empty lines interspersed" $ do 860 | p <- 861 | parseBlocks 862 | [ "* Foo", 863 | "", 864 | "** Bar", 865 | "", 866 | "*** Baz", 867 | "", 868 | "* Qux" 869 | ] 870 | p 871 | `shouldBe` [ List 872 | (Unordered Nothing) 873 | [] 874 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 875 | :| [ List 876 | (Unordered Nothing) 877 | [] 878 | ( ( Paragraph [] (MarkupLine "Bar" :| []) 879 | :| [ List 880 | (Unordered Nothing) 881 | [] 882 | ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| []) 883 | ] 884 | ) 885 | :| [] 886 | ) 887 | ] 888 | ) 889 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 890 | ) 891 | ], 892 | testCase "nested unordered lists using unordered number of '*' (asterisk)" $ do 893 | p <- 894 | parseBlocks 895 | [ "** Foo", 896 | "* Bar", 897 | "*** Baz", 898 | " ** Qux" 899 | ] 900 | p 901 | `shouldBe` [ List 902 | (Unordered Nothing) 903 | [] 904 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 905 | :| [ List 906 | (Unordered Nothing) 907 | [] 908 | ( ( Paragraph [] (MarkupLine "Bar" :| []) 909 | :| [ List 910 | (Unordered Nothing) 911 | [] 912 | ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| []) 913 | ] 914 | ) 915 | :| [] 916 | ) 917 | ] 918 | ) 919 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 920 | ) 921 | ], 922 | testCase "nested unordered list with multi-line paragraph" $ do 923 | p <- 924 | parseBlocks 925 | [ "* Foo", 926 | "- Bar", 927 | "Baz", 928 | "* Qux" 929 | ] 930 | p 931 | `shouldBe` [ List 932 | (Unordered Nothing) 933 | [] 934 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 935 | :| [ List 936 | (Unordered Nothing) 937 | [] 938 | ( (Paragraph [] (MarkupLine "Bar" :| [MarkupLine "Baz"]) :| []) 939 | :| [] 940 | ) 941 | ] 942 | ) 943 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 944 | ) 945 | ], 946 | testCase "(DVB001) nested unordered list with block prefixes" $ do 947 | p <- 948 | parseBlocks 949 | [ "[.red]", 950 | ".FooFoo", 951 | "* Foo", 952 | "[.blue]", 953 | ".BarBar", 954 | "- Bar", 955 | "[.green]", 956 | ".BazBaz", 957 | "- Baz" 958 | ] 959 | p 960 | `shouldBe` [ List 961 | (Unordered Nothing) 962 | [ MetadataItem (BlockAttributeList ".red"), 963 | MetadataItem (BlockTitle (MarkupLine "FooFoo" :| [])) 964 | ] 965 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 966 | :| [ List 967 | (Unordered Nothing) 968 | [ MetadataItem (BlockAttributeList ".blue"), 969 | MetadataItem (BlockTitle (MarkupLine "BarBar" :| [])) 970 | ] 971 | ( (Paragraph [] (MarkupLine "Bar" :| []) :| []) 972 | :| [] 973 | ) 974 | ] 975 | ) 976 | :| [] 977 | ), 978 | List 979 | (Unordered Nothing) 980 | [ MetadataItem (BlockAttributeList ".green"), 981 | MetadataItem (BlockTitle (MarkupLine "BazBaz" :| [])) 982 | ] 983 | ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| []) 984 | ], 985 | -- Identical result to the previous test case. 986 | testCase "(DVB001) nested unordered list with block prefixes and some empty lines" $ do 987 | p <- 988 | parseBlocks 989 | [ "[.red]", 990 | ".FooFoo", 991 | "", 992 | "* Foo", 993 | "[.blue]", 994 | "", 995 | ".BarBar", 996 | "- Bar", 997 | "", 998 | "[.green]", 999 | ".BazBaz", 1000 | "- Baz" 1001 | ] 1002 | p 1003 | `shouldBe` [ List 1004 | (Unordered Nothing) 1005 | [ MetadataItem (BlockAttributeList ".red"), 1006 | MetadataItem (BlockTitle (MarkupLine "FooFoo" :| [])) 1007 | ] 1008 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1009 | :| [ List 1010 | (Unordered Nothing) 1011 | [ MetadataItem (BlockAttributeList ".blue"), 1012 | MetadataItem (BlockTitle (MarkupLine "BarBar" :| [])) 1013 | ] 1014 | ( (Paragraph [] (MarkupLine "Bar" :| []) :| []) 1015 | :| [] 1016 | ) 1017 | ] 1018 | ) 1019 | :| [] 1020 | ), 1021 | List 1022 | (Unordered Nothing) 1023 | [ MetadataItem (BlockAttributeList ".green"), 1024 | MetadataItem (BlockTitle (MarkupLine "BazBaz" :| [])) 1025 | ] 1026 | ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| []) 1027 | ] 1028 | ] 1029 | 1030 | listContinuationUnitTests :: TestTree 1031 | listContinuationUnitTests = 1032 | testGroup 1033 | "list continuation unit tests" 1034 | [ testCase "list continuation (paragraph), followed by another list item" $ do 1035 | p <- 1036 | parseBlocks 1037 | [ "* Foo", 1038 | "+", 1039 | "Bar", 1040 | "* Baz" 1041 | ] 1042 | p 1043 | `shouldBe` [ List 1044 | (Unordered Nothing) 1045 | [] 1046 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1047 | :| [Paragraph [] (MarkupLine "Bar" :| [])] 1048 | ) 1049 | :| [Paragraph [] (MarkupLine "Baz" :| []) :| []] 1050 | ) 1051 | ], 1052 | testCase "two list continuations (paragraph), followed by another list item" $ do 1053 | p <- 1054 | parseBlocks 1055 | [ "* Foo", 1056 | "+", 1057 | "Bar", 1058 | "+", 1059 | "Baz", 1060 | "* Qux" 1061 | ] 1062 | p 1063 | `shouldBe` [ List 1064 | (Unordered Nothing) 1065 | [] 1066 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1067 | :| [ Paragraph [] (MarkupLine "Bar" :| []), 1068 | Paragraph [] (MarkupLine "Baz" :| []) 1069 | ] 1070 | ) 1071 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 1072 | ) 1073 | ], 1074 | testCase "list continuation (paragraph) with a block prefix" $ do 1075 | p <- 1076 | parseBlocks 1077 | [ "* Foo", 1078 | "+", 1079 | "[.red]", 1080 | "Bar" 1081 | ] 1082 | p 1083 | `shouldBe` [ List 1084 | (Unordered Nothing) 1085 | [] 1086 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1087 | :| [ Paragraph 1088 | [MetadataItem (BlockAttributeList ".red")] 1089 | (MarkupLine "Bar" :| []) 1090 | ] 1091 | ) 1092 | :| [] 1093 | ) 1094 | ], 1095 | testCase "list continuation (example block), followed by another list item" $ do 1096 | p <- 1097 | parseBlocks 1098 | [ "* Foo", 1099 | "+", 1100 | "====", 1101 | "Bar", 1102 | "====", 1103 | "* Baz" 1104 | ] 1105 | p 1106 | `shouldBe` [ List 1107 | (Unordered Nothing) 1108 | [] 1109 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1110 | :| [Nestable Example [] [Paragraph [] (MarkupLine "Bar" :| [])]] 1111 | ) 1112 | :| [Paragraph [] (MarkupLine "Baz" :| []) :| []] 1113 | ) 1114 | ], 1115 | testCase "list continuation into a nested unordered list" $ do 1116 | p <- 1117 | parseBlocks 1118 | [ "* Foo", 1119 | "** Bar", 1120 | "+", 1121 | "Baz", 1122 | "* Qux" 1123 | ] 1124 | p 1125 | `shouldBe` [ List 1126 | (Unordered Nothing) 1127 | [] 1128 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1129 | :| [ List 1130 | (Unordered Nothing) 1131 | [] 1132 | ( ( Paragraph [] (MarkupLine "Bar" :| []) 1133 | :| [Paragraph [] (MarkupLine "Baz" :| [])] 1134 | ) 1135 | :| [] 1136 | ) 1137 | ] 1138 | ) 1139 | :| [Paragraph [] (MarkupLine "Qux" :| []) :| []] 1140 | ) 1141 | ], 1142 | testCase "dangling list continuation marker in outermost list" $ do 1143 | p <- 1144 | parseBlocks 1145 | [ "* Foo", 1146 | "+", 1147 | "", -- Asciidoctor allows this optional empty line here 1148 | "", 1149 | "Bar" 1150 | ] 1151 | p 1152 | `shouldBe` [ List 1153 | (Unordered Nothing) 1154 | [] 1155 | ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []), 1156 | Paragraph [] (MarkupLine "Bar" :| []) 1157 | ], 1158 | testCase "dangling list continuation marker into a nested unordered list" $ do 1159 | p <- 1160 | parseBlocks 1161 | [ "* Foo", 1162 | "** Bar", 1163 | "+", 1164 | "", -- Asciidoctor allows this optional blank line here 1165 | "", 1166 | "Baz" 1167 | ] 1168 | p 1169 | `shouldBe` [ List 1170 | (Unordered Nothing) 1171 | [] 1172 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1173 | :| [ List 1174 | (Unordered Nothing) 1175 | [] 1176 | ((Paragraph [] (MarkupLine "Bar" :| []) :| []) :| []) 1177 | ] 1178 | ) 1179 | :| [] 1180 | ), 1181 | Paragraph [] (MarkupLine "Baz" :| []) 1182 | ], 1183 | testCase "(DVB002) broken list continuation attempt in outermost list" $ do 1184 | p <- 1185 | parseBlocks 1186 | [ "* Foo", 1187 | "", 1188 | "+", 1189 | "Bar" 1190 | ] 1191 | p 1192 | `shouldBe` [ List 1193 | (Unordered Nothing) 1194 | [] 1195 | ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []), 1196 | Paragraph [] (MarkupLine "+" :| [MarkupLine "Bar"]) 1197 | ], 1198 | testCase "(DVB002) broken list continuation attempt in nested list" $ do 1199 | p <- 1200 | parseBlocks 1201 | [ "* Foo", 1202 | "** Bar", 1203 | "", 1204 | "+", 1205 | "Baz" 1206 | ] 1207 | p 1208 | `shouldBe` [ List 1209 | (Unordered Nothing) 1210 | [] 1211 | ( ( Paragraph [] (MarkupLine "Foo" :| []) 1212 | :| [ List 1213 | (Unordered Nothing) 1214 | [] 1215 | ((Paragraph [] (MarkupLine "Bar" :| []) :| []) :| []) 1216 | ] 1217 | ) 1218 | :| [] 1219 | ), 1220 | Paragraph [] (MarkupLine "+" :| [MarkupLine "Baz"]) 1221 | ], 1222 | testCase "line break that resembles list continuation" $ do 1223 | p <- 1224 | parseBlocks 1225 | [ "* Foo", 1226 | " +", 1227 | "Bar" 1228 | ] 1229 | p 1230 | `shouldBe` [ List 1231 | (Unordered Nothing) 1232 | [] 1233 | -- TODO. Must be changed when line breaks are 1234 | -- implemented. 1235 | ( ( Paragraph 1236 | [] 1237 | ( MarkupLine "Foo" 1238 | :| [ MarkupLine " +", 1239 | MarkupLine "Bar" 1240 | ] 1241 | ) 1242 | :| [] 1243 | ) 1244 | :| [] 1245 | ) 1246 | ] 1247 | ] 1248 | 1249 | commentUnitTests :: TestTree 1250 | commentUnitTests = 1251 | testGroup 1252 | "comment unit tests" 1253 | [ testCase "dangling block comment" $ do 1254 | p <- 1255 | parseBlocks 1256 | [ "////", 1257 | "Foo", 1258 | "////" 1259 | ] 1260 | p 1261 | `shouldBe` [DanglingBlockPrefix [Comment (BlockComment ["Foo"])]], 1262 | testCase "dangling line comment sequence" $ do 1263 | p <- 1264 | parseBlocks 1265 | [ "//Foo", 1266 | "// Bar" 1267 | ] 1268 | p 1269 | `shouldBe` [ DanglingBlockPrefix 1270 | [Comment (LineCommentSequence ("Foo" :| [" Bar"]))] 1271 | ], 1272 | testCase "block comment before paragraph" $ do 1273 | p <- 1274 | parseBlocks 1275 | [ "////", 1276 | "Foo", 1277 | "////", 1278 | "Bar" 1279 | ] 1280 | p 1281 | `shouldBe` [ Paragraph 1282 | [Comment (BlockComment ["Foo"])] 1283 | (MarkupLine "Bar" :| []) 1284 | ], 1285 | testCase "block comment before paragraph, with redundant space" $ do 1286 | p <- 1287 | parseBlocks 1288 | [ "//// ", 1289 | "Foo", 1290 | "//// ", 1291 | "Bar" 1292 | ] 1293 | p 1294 | `shouldBe` [ Paragraph 1295 | [Comment (BlockComment ["Foo"])] 1296 | (MarkupLine "Bar" :| []) 1297 | ], 1298 | testCase "block comment before paragraph, separated by blank line" $ do 1299 | p <- 1300 | parseBlocks 1301 | [ "////", 1302 | "Foo", 1303 | "////", 1304 | "", 1305 | "Bar" 1306 | ] 1307 | p 1308 | `shouldBe` [ Paragraph 1309 | [Comment (BlockComment ["Foo"])] 1310 | (MarkupLine "Bar" :| []) 1311 | ], 1312 | testCase "empty block comment before paragraph" $ do 1313 | p <- 1314 | parseBlocks 1315 | [ "////", 1316 | "////", 1317 | "Foo" 1318 | ] 1319 | p 1320 | `shouldBe` [ Paragraph 1321 | [Comment (BlockComment [])] 1322 | (MarkupLine "Foo" :| []) 1323 | ], 1324 | testCase "empty line comment before paragraph" $ do 1325 | p <- 1326 | parseBlocks 1327 | [ "//", 1328 | "Foo" 1329 | ] 1330 | p 1331 | `shouldBe` [ Paragraph 1332 | [Comment (LineCommentSequence ("" :| []))] 1333 | (MarkupLine "Foo" :| []) 1334 | ], 1335 | testCase "block comment with multiple pseudo-paragraphs" $ do 1336 | p <- 1337 | parseBlocks 1338 | [ "////", 1339 | "Foo", 1340 | "", 1341 | "Bar", 1342 | "////", 1343 | "Baz" 1344 | ] 1345 | p 1346 | `shouldBe` [ Paragraph 1347 | [Comment (BlockComment ["Foo", "", "Bar"])] 1348 | (MarkupLine "Baz" :| []) 1349 | ], 1350 | testCase "line comment sequence before paragraph" $ do 1351 | p <- 1352 | parseBlocks 1353 | [ "//Foo", 1354 | "// Bar", 1355 | "Baz" 1356 | ] 1357 | p 1358 | `shouldBe` [ Paragraph 1359 | [Comment (LineCommentSequence ("Foo" :| [" Bar"]))] 1360 | (MarkupLine "Baz" :| []) 1361 | ], 1362 | testCase "line comment inside paragraph" $ do 1363 | p <- 1364 | parseBlocks 1365 | [ "Foo", 1366 | "// Bar", 1367 | "Baz" 1368 | ] 1369 | p 1370 | `shouldBe` [ Paragraph 1371 | [] 1372 | (MarkupLine "Foo" :| [CommentLine " Bar", MarkupLine "Baz"]) 1373 | ], 1374 | testCase "line comment inside paragraph and after paragraph" $ do 1375 | p <- 1376 | parseBlocks 1377 | [ "Foo", 1378 | "// Bar", 1379 | "Baz", 1380 | "//Qux" 1381 | ] 1382 | p 1383 | `shouldBe` [ Paragraph 1384 | [] 1385 | ( MarkupLine "Foo" 1386 | :| [ CommentLine " Bar", 1387 | MarkupLine "Baz", 1388 | CommentLine "Qux" 1389 | ] 1390 | ) 1391 | ], 1392 | testCase "line comment sequence before section header" $ do 1393 | p <- 1394 | parseBlocks 1395 | [ "//Foo", 1396 | "// Bar", 1397 | "== Baz" 1398 | ] 1399 | p 1400 | `shouldBe` [ SectionHeader 1401 | [Comment (LineCommentSequence ("Foo" :| [" Bar"]))] 1402 | 1 1403 | (MarkupLine "Baz" :| []) 1404 | ], 1405 | testCase "block comment followed by line comment sequence" $ do 1406 | p <- 1407 | parseBlocks 1408 | [ "////", 1409 | "Foo", 1410 | "", 1411 | "////", 1412 | "//Bar", 1413 | "Baz" 1414 | ] 1415 | p 1416 | `shouldBe` [ Paragraph 1417 | [ Comment (BlockComment ["Foo", ""]), 1418 | Comment (LineCommentSequence ("Bar" :| [])) 1419 | ] 1420 | (MarkupLine "Baz" :| []) 1421 | ], 1422 | testCase "line comment sequence followed by block comment" $ do 1423 | p <- 1424 | parseBlocks 1425 | [ "// Foo", 1426 | "//Bar", 1427 | "////", 1428 | "Baz", 1429 | "////", 1430 | "Qux" 1431 | ] 1432 | p 1433 | `shouldBe` [ Paragraph 1434 | [ Comment (LineCommentSequence (" Foo" :| ["Bar"])), 1435 | Comment (BlockComment ["Baz"]) 1436 | ] 1437 | (MarkupLine "Qux" :| []) 1438 | ], 1439 | testCase "block comment with more than four '/' (slash)" $ do 1440 | p <- 1441 | parseBlocks 1442 | [ "/////", 1443 | "Foo", 1444 | "////", 1445 | "Bar", 1446 | "////", 1447 | "/////", 1448 | "Baz" 1449 | ] 1450 | p 1451 | `shouldBe` [ Paragraph 1452 | [ Comment (BlockComment ["Foo", "////", "Bar", "////"]) 1453 | ] 1454 | (MarkupLine "Baz" :| []) 1455 | ], 1456 | testCase "dangling non-closed block comment" $ do 1457 | p <- 1458 | parseBlocks 1459 | [ "////", 1460 | "Foo", 1461 | "", 1462 | "Bar" 1463 | ] 1464 | p 1465 | `shouldBe` [ DanglingBlockPrefix 1466 | [Comment (BlockComment ["Foo", "", "Bar"])] 1467 | ], 1468 | testCase "bad block comment opening, with three '/' (slashes)" $ do 1469 | p <- 1470 | parseBlocks 1471 | [ "///", 1472 | "Foo", 1473 | "////" 1474 | ] 1475 | p 1476 | `shouldBe` [ Paragraph [] (MarkupLine "///" :| [MarkupLine "Foo"]), 1477 | DanglingBlockPrefix [Comment (BlockComment [])] 1478 | ], 1479 | testCase "bad line comment, with three '/' (slashes)" $ do 1480 | p <- 1481 | parseBlocks 1482 | [ "///Foo", 1483 | "Bar" 1484 | ] 1485 | p 1486 | `shouldBe` [Paragraph [] (MarkupLine "///Foo" :| [MarkupLine "Bar"])], 1487 | testCase "bad block comment opening, preceded by space" $ do 1488 | p <- 1489 | parseBlocks 1490 | [ " ////", 1491 | "Foo", 1492 | "////" 1493 | ] 1494 | p 1495 | `shouldBe` [ Paragraph [] (MarkupLine " ////" :| [MarkupLine "Foo"]), 1496 | DanglingBlockPrefix [Comment (BlockComment [])] 1497 | ], 1498 | testCase "bad line comment opening, preceded by space" $ do 1499 | p <- 1500 | parseBlocks 1501 | [ " //Foo", 1502 | "Bar" 1503 | ] 1504 | p 1505 | `shouldBe` [Paragraph [] (MarkupLine " //Foo" :| [MarkupLine "Bar"])] 1506 | ] 1507 | --------------------------------------------------------------------------------