├── Setup.hs
├── cabal.project
├── .gitmodules
├── examples
    ├── complex-lists-pandoc.odt
    ├── asciidoctor-article-template-pandoc.odt
    ├── monospace.adoc
    ├── complex-lists.adoc
    ├── complex-lists-pandoc.md
    ├── asciidoc-hs.css
    ├── asciidoctor-article-template-pandoc.md
    ├── monospace-asciidoctor.html
    ├── complex-lists-asciidoctor.html
    ├── monospace-pandoc.html
    ├── complex-lists-pandoc.html
    ├── asciidoctor-article-template-asciidoctor.html
    ├── asciidoctor-article-template.adoc
    └── asciidoctor-article-template-pandoc.html
├── stack.yaml
├── .gitignore
├── test
    └── Tests
    │   ├── Main.hs
    │   ├── Metadata.hs
    │   ├── Inlines.hs
    │   └── Blocks.hs
├── exe
    └── Main.hs
├── CHANGELOG.adoc
├── stack.yaml.lock
├── src
    └── Text
    │   └── AsciiDoc
    │       ├── UnparsedInline.hs
    │       ├── Debug
    │           └── ParseTest.hs
    │       ├── SourceRange.hs
    │       ├── Metadata.hs
    │       ├── LineParsers.hs
    │       ├── SpecialChars.hs
    │       ├── ElementAttributes.hs
    │       ├── Pandoc.hs
    │       ├── Inlines.hs
    │       └── Blocks.hs
├── LICENSE
├── .github
    └── workflows
    │   └── ci.yml
├── asciidoc-hs.cabal
└── README.adoc


/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | 
3 | main = defaultMain
4 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages:
2 |   ./
3 | 
4 | optional-packages:
5 |   ./parsec-free
6 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "parsec-free"]
2 | 	path = parsec-free
3 | 	url = https://github.com/gmarpons/parsec-free.git
4 | 


--------------------------------------------------------------------------------
/examples/complex-lists-pandoc.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmarpons/asciidoc-hs/HEAD/examples/complex-lists-pandoc.odt


--------------------------------------------------------------------------------
/examples/asciidoctor-article-template-pandoc.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gmarpons/asciidoc-hs/HEAD/examples/asciidoctor-article-template-pandoc.odt


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-17.14
2 | 
3 | packages:
4 | - .
5 | 
6 | extra-deps:
7 | - repr-tree-syb-0.1.1@sha256:b3af0d338ab5273ebfc993ab1a3b10a36febffdfad474fb6ae7c2ab1fdb49702,1100
8 | 


--------------------------------------------------------------------------------
/examples/monospace.adoc:
--------------------------------------------------------------------------------
1 | = Example with monospace
2 | 
3 | == `Monospace` can be used in section headers
4 | 
5 | This is a paragraph with `some words in monospace and in *bold monospace*`.
6 | 


--------------------------------------------------------------------------------
/examples/complex-lists.adoc:
--------------------------------------------------------------------------------
 1 | = Example with complex lists
 2 | 
 3 | .Unordered list title
 4 | * list item 1
 5 | - nested list item
 6 | ** nested nested list item 1
 7 | +
 8 | another paragraph in the same nested nested list item 1
 9 | ** nested nested list item 2
10 | +
11 | .an example inside item 2
12 | ====
13 | an example
14 | ====
15 | 
16 | * list item 2
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | dist-*
 3 | cabal-dev
 4 | *.o
 5 | *.hi
 6 | *.hie
 7 | *.chi
 8 | *.chs.h
 9 | *.dyn_o
10 | *.dyn_hi
11 | .hpc
12 | .hsenv
13 | .cabal-sandbox/
14 | cabal.sandbox.config
15 | *.prof
16 | *.aux
17 | *.hp
18 | *.eventlog
19 | .stack-work/
20 | cabal.project.local
21 | cabal.project.local~
22 | .HTF/
23 | .ghc.environment.*
24 | deps.png
25 | modules.png
26 | cabal.project.local~*
27 | 


--------------------------------------------------------------------------------
/examples/complex-lists-pandoc.md:
--------------------------------------------------------------------------------
 1 | # Example with complex lists
 2 | 
 3 | Unordered list title
 4 | 
 5 | -   list item 1
 6 | 
 7 |     -   nested list item
 8 | 
 9 |         -   nested nested list item 1
10 | 
11 |             another paragraph in the same nested nested list item 1
12 | 
13 |         -   nested nested list item 2
14 | 
15 |             Example 1. an example inside item 2
16 | 
17 |             an example
18 | 
19 | -   list item 2
20 | 


--------------------------------------------------------------------------------
/test/Tests/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main
 2 |   ( main,
 3 |   )
 4 | where
 5 | 
 6 | import Test.Tasty
 7 | import Tests.Blocks
 8 | import Tests.Inlines
 9 | import Tests.Metadata
10 | 
11 | main :: IO ()
12 | main = defaultMain tests
13 | 
14 | tests :: TestTree
15 | tests = testGroup "tests" [functionalTests]
16 | 
17 | functionalTests :: TestTree
18 | functionalTests =
19 |   testGroup
20 |     "functional tests"
21 |     [ blockUnitTests,
22 |       inlineUnitTests,
23 |       metadataUnitTests
24 |     ]
25 | 


--------------------------------------------------------------------------------
/examples/asciidoc-hs.css:
--------------------------------------------------------------------------------
 1 | .monospace {
 2 |   font-family: "Droid Sans Mono", "DejaVu Sans Mono", monospace;
 3 |   font-weight: 400;
 4 |   color: rgba(0, 0, 0, 0.9);
 5 |   font-size: 1em;
 6 | }
 7 | 
 8 | :not(pre):not([class^=L]) > .monospace {
 9 |   font-size: 0.9375em;
10 |   font-style: normal !important;
11 |   letter-spacing: 0;
12 |   padding: 0.1em 0.5ex;
13 |   word-spacing: -0.15em;
14 |   background: #f7f7f8;
15 |   -webkit-border-radius: 4px;
16 |   border-radius: 4px;
17 |   line-height: 1.45;
18 |   text-rendering: optimizeSpeed;
19 | }
20 | 


--------------------------------------------------------------------------------
/exe/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main
 2 |   ( main,
 3 |   )
 4 | where
 5 | 
 6 | import qualified Data.Aeson.Text as Aeson
 7 | import qualified Data.Text as T (lines)
 8 | import qualified Data.Text.IO as T
 9 | import qualified Data.Text.Lazy.IO as LT
10 | import Text.AsciiDoc.Blocks
11 | import Text.AsciiDoc.Pandoc
12 | import qualified Text.Parsec as Parsec (runParser)
13 | 
14 | main :: IO ()
15 | main = do
16 |   result <-
17 |     Parsec.runParser documentP blockParserInitialState ""
18 |       . T.lines
19 |       <$> T.getContents
20 |   case result of
21 |     Left err -> error $ "Parsing error: " <> show err
22 |     Right doc ->
23 |       LT.putStrLn $ Aeson.encodeToLazyText $ convertDocument $ parseInlines doc
24 | 


--------------------------------------------------------------------------------
/CHANGELOG.adoc:
--------------------------------------------------------------------------------
 1 | = Revision history for asciidoc-hs
 2 | 
 3 | == 0.0.0.0 -- 2021-06-10
 4 | 
 5 | * First version.
 6 | * Basic paragraphs.
 7 | * Basic, grammar based, inline parsing.
 8 | * Conversion to JSON Pandoc.
 9 | * (Nested) bold, italic formatting.
10 | * Monospace formatting for HTML output (through support in CSS).
11 | * Highlight formatting and custom inline style.
12 | * Constrained and unconstrained formatting.
13 | * Element attributes: positional, named, shorthand syntax.
14 | * ID attribute, role attribute, optional attribute.
15 | * Block titles.
16 | * Line and block comments.
17 | * Delimited blocks: sidebars, example blocks.
18 | * Section headers and levels.
19 | * Discrete sections.
20 | * Unordered lists (with simple list continuations).
21 | * Document title.
22 | 


--------------------------------------------------------------------------------
/examples/asciidoctor-article-template-pandoc.md:
--------------------------------------------------------------------------------
 1 | # AsciiDoc Article Title
 2 | 
 3 | ## First level heading
 4 | 
 5 | This is a paragraph with a **bold** word and an *italicized* word.
 6 | 
 7 | ### Second level heading
 8 | 
 9 | Unordered list title
10 | 
11 | -   list item 1
12 | 
13 |     -   nested list item
14 | 
15 |         -   nested nested list item 1
16 | 
17 |         -   nested nested list item 2
18 | 
19 | -   list item 2
20 | 
21 | This is a paragraph.
22 | 
23 | Example 1. Example block title
24 | 
25 | Content in an example block is subject to normal substitutions.
26 | 
27 | Sidebar title
28 | 
29 | Sidebars contain aside text and are subject to normal substitutions.
30 | 
31 | #### Third level heading
32 | 
33 | ##### Fourth level heading
34 | 
35 | ###### Fifth level heading
36 | 
37 | ## First level heading
38 | 
39 | ## First level heading
40 | 


--------------------------------------------------------------------------------
/stack.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages:
 7 | - completed:
 8 |     hackage: repr-tree-syb-0.1.1@sha256:b3af0d338ab5273ebfc993ab1a3b10a36febffdfad474fb6ae7c2ab1fdb49702,1100
 9 |     pantry-tree:
10 |       size: 213
11 |       sha256: 9a20743b457ff2e3e1ce98881480f8353b7837c66352cf55c1edd2a8dac97fd4
12 |   original:
13 |     hackage: repr-tree-syb-0.1.1@sha256:b3af0d338ab5273ebfc993ab1a3b10a36febffdfad474fb6ae7c2ab1fdb49702,1100
14 | snapshots:
15 | - completed:
16 |     size: 567677
17 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/14.yaml
18 |     sha256: 3740f22286bf5e6e3d82f88125e1c708b6e27847211f956b530aa5d83cf39383
19 |   original: lts-17.14
20 | 


--------------------------------------------------------------------------------
/examples/monospace-asciidoctor.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 | <meta charset="UTF-8">
 5 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 | <meta name="generator" content="Asciidoctor 2.0.10">
 8 | <title>Example with monospace</title>
 9 | <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css">
10 | </head>
11 | <body class="article">
12 | <div id="header">
13 | <h1>Example with monospace</h1>
14 | </div>
15 | <div id="content">
16 | <div class="sect1">
17 | <h2><code>Monospace</code> can be used in section headers</h2>
18 | <div class="sectionbody">
19 | <div class="paragraph">
20 | <p>This is a paragraph with <code>some words in monospace and in <strong>bold monospace</strong></code>.</p>
21 | </div>
22 | </div>
23 | </div>
24 | </div>
25 | <div id="footer">
26 | <div id="footer-text">
27 | Last updated 2021-06-09 22:50:29 +0200
28 | </div>
29 | </div>
30 | </body>
31 | </html>
32 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/UnparsedInline.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DerivingStrategies #-}
 2 | 
 3 | -- |
 4 | -- Module      :  Text.AsciiDoc.UnparsedInline
 5 | -- Copyright   :  © 2020–present Guillem Marpons
 6 | -- License     :  BSD-3-Clause
 7 | --
 8 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 9 | -- Stability   :  experimental
10 | -- Portability :  portable
11 | --
12 | -- Types for working with input lines meant to be parsed together into an
13 | -- 'Text.AsciiDoc.Inline'.
14 | module Text.AsciiDoc.UnparsedInline
15 |   ( UnparsedInline,
16 |     InputLine (..),
17 |   )
18 | where
19 | 
20 | import Data.List.NonEmpty (NonEmpty)
21 | import Data.Text (Text)
22 | 
23 | -- TODO. Type-encode the invariant of UnparsedInline. See branch
24 | -- safe-unparsed-inline for a POC. Assess if it's worth the extra dependencies
25 | -- (uses package some), or if the knowledge obtained during block parsing can be
26 | -- really preserved for inline parsing (and guarantee that we don't try to
27 | -- parse invalid text).
28 | 
29 | -- | Non-empty sequence of input lines meant to be parsed together as an inline.
30 | --
31 | -- __Invariant__: The first element is always a 'MarkupLine'.
32 | -- This guarantees that an @UnparsedInline@ can always be converted to an
33 | -- 'Text.AsciiDoc.Inline'.
34 | type UnparsedInline = NonEmpty InputLine
35 | 
36 | data InputLine
37 |   = MarkupLine Text
38 |   | CommentLine Text
39 |   deriving stock (Eq, Show)
40 | 


--------------------------------------------------------------------------------
/examples/complex-lists-asciidoctor.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 | <meta charset="UTF-8">
 5 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 | <meta name="generator" content="Asciidoctor 2.0.10">
 8 | <title>Example with complex lists</title>
 9 | <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css">
10 | </head>
11 | <body class="article">
12 | <div id="header">
13 | <h1>Example with complex lists</h1>
14 | </div>
15 | <div id="content">
16 | <div class="ulist">
17 | <div class="title">Unordered list title</div>
18 | <ul>
19 | <li>
20 | <p>list item 1</p>
21 | <div class="ulist">
22 | <ul>
23 | <li>
24 | <p>nested list item</p>
25 | <div class="ulist">
26 | <ul>
27 | <li>
28 | <p>nested nested list item 1</p>
29 | <div class="paragraph">
30 | <p>another paragraph in the same nested nested list item 1</p>
31 | </div>
32 | </li>
33 | <li>
34 | <p>nested nested list item 2</p>
35 | <div class="exampleblock">
36 | <div class="title">Example 1. an example inside item 2</div>
37 | <div class="content">
38 | <div class="paragraph">
39 | <p>an example</p>
40 | </div>
41 | </div>
42 | </div>
43 | </li>
44 | </ul>
45 | </div>
46 | </li>
47 | </ul>
48 | </div>
49 | </li>
50 | <li>
51 | <p>list item 2</p>
52 | </li>
53 | </ul>
54 | </div>
55 | </div>
56 | <div id="footer">
57 | <div id="footer-text">
58 | Last updated 2021-06-09 23:15:06 +0200
59 | </div>
60 | </div>
61 | </body>
62 | </html>
63 | 


--------------------------------------------------------------------------------
/examples/monospace-pandoc.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>Example with monospace</title>
 8 |   <style>
 9 |     code{white-space: pre-wrap;}
10 |     span.smallcaps{font-variant: small-caps;}
11 |     span.underline{text-decoration: underline;}
12 |     div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |     div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
14 |     ul.task-list{list-style: none;}
15 |     .display.math{display: block; text-align: center; margin: 0.5rem auto;}
16 |   </style>
17 |   <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" />
18 |   <link rel="stylesheet" href="./asciidoc-hs.css" />
19 |   <!--[if lt IE 9]>
20 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
21 |   <![endif]-->
22 | </head>
23 | <body>
24 | <div class="article">
25 | <div id="header">
26 |  
27 | <h1>Example with monospace</h1>
28 | </div>
29 | <div id="content">
30 |  
31 | <h2><span class="monospace">Monospace</span> can be used in section headers</h2>
32 | <div class="paragraph">
33 | <p>This is a paragraph with <span class="monospace">some words in monospace and in <strong>bold monospace</strong></span>.</p>
34 | </div>
35 | </div>
36 | </div>
37 | </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, Guillem Marpons
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/examples/complex-lists-pandoc.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>Example with complex lists</title>
 8 |   <style>
 9 |     code{white-space: pre-wrap;}
10 |     span.smallcaps{font-variant: small-caps;}
11 |     span.underline{text-decoration: underline;}
12 |     div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |     div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
14 |     ul.task-list{list-style: none;}
15 |     .display.math{display: block; text-align: center; margin: 0.5rem auto;}
16 |   </style>
17 |   <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" />
18 |   <link rel="stylesheet" href="./asciidoc-hs.css" />
19 |   <!--[if lt IE 9]>
20 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
21 |   <![endif]-->
22 | </head>
23 | <body>
24 | <div class="article">
25 | <div id="header">
26 |  
27 | <h1>Example with complex lists</h1>
28 | </div>
29 | <div id="content">
30 |  
31 | <div class="ulist">
32 | <div class="title">
33 | Unordered list title
34 | </div>
35 | <ul>
36 | <li><div class="paragraph">
37 | <p>list item 1</p>
38 | </div>
39 | <div class="ulist">
40 | <ul>
41 | <li><div class="paragraph">
42 | <p>nested list item</p>
43 | </div>
44 | <div class="ulist">
45 | <ul>
46 | <li><div class="paragraph">
47 | <p>nested nested list item 1</p>
48 | </div>
49 | <div class="paragraph">
50 | <p>another paragraph in the same nested nested list item 1</p>
51 | </div></li>
52 | <li><div class="paragraph">
53 | <p>nested nested list item 2</p>
54 | </div>
55 | <div class="exampleblock">
56 | <div class="title">
57 | Example 1. an example inside item 2
58 | </div>
59 | <div class="content">
60 | <div class="paragraph">
61 | <p>an example</p>
62 | </div>
63 | </div>
64 | </div></li>
65 | </ul>
66 | </div></li>
67 | </ul>
68 | </div></li>
69 | <li><div class="paragraph">
70 | <p>list item 2</p>
71 | </div></li>
72 | </ul>
73 | </div>
74 | </div>
75 | </div>
76 | </body>
77 | </html>
78 | 


--------------------------------------------------------------------------------
/examples/asciidoctor-article-template-asciidoctor.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="UTF-8">
  5 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 | <meta name="generator" content="Asciidoctor 2.0.10">
  8 | <title>AsciiDoc Article Title</title>
  9 | <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css">
 10 | </head>
 11 | <body class="article">
 12 | <div id="header">
 13 | <h1>AsciiDoc Article Title</h1>
 14 | </div>
 15 | <div id="content">
 16 | <div class="sect1">
 17 | <h2>First level heading</h2>
 18 | <div class="sectionbody">
 19 | <div class="paragraph">
 20 | <p>This is a paragraph with a <strong>bold</strong> word and an <em>italicized</em> word.</p>
 21 | </div>
 22 | <div class="sect2">
 23 | <h3>Second level heading</h3>
 24 | <div class="ulist">
 25 | <div class="title">Unordered list title</div>
 26 | <ul>
 27 | <li>
 28 | <p>list item 1</p>
 29 | <div class="ulist">
 30 | <ul>
 31 | <li>
 32 | <p>nested list item</p>
 33 | <div class="ulist">
 34 | <ul>
 35 | <li>
 36 | <p>nested nested list item 1</p>
 37 | </li>
 38 | <li>
 39 | <p>nested nested list item 2</p>
 40 | </li>
 41 | </ul>
 42 | </div>
 43 | </li>
 44 | </ul>
 45 | </div>
 46 | </li>
 47 | <li>
 48 | <p>list item 2</p>
 49 | </li>
 50 | </ul>
 51 | </div>
 52 | <div class="paragraph">
 53 | <p>This is a paragraph.</p>
 54 | </div>
 55 | <div class="exampleblock">
 56 | <div class="title">Example 1. Example block title</div>
 57 | <div class="content">
 58 | <div class="paragraph">
 59 | <p>Content in an example block is subject to normal substitutions.</p>
 60 | </div>
 61 | </div>
 62 | </div>
 63 | <div class="sidebarblock">
 64 | <div class="content">
 65 | <div class="title">Sidebar title</div>
 66 | <div class="paragraph">
 67 | <p>Sidebars contain aside text and are subject to normal substitutions.</p>
 68 | </div>
 69 | </div>
 70 | </div>
 71 | <div class="sect3">
 72 | <h4>Third level heading</h4>
 73 | <div class="sect4">
 74 | <h5>Fourth level heading</h5>
 75 | <div class="sect5">
 76 | <h6>Fifth level heading</h6>
 77 | 
 78 | </div>
 79 | </div>
 80 | </div>
 81 | </div>
 82 | </div>
 83 | </div>
 84 | <div class="sect1">
 85 | <h2>First level heading</h2>
 86 | <div class="sectionbody">
 87 | 
 88 | </div>
 89 | </div>
 90 | <div class="sect1">
 91 | <h2>First level heading</h2>
 92 | <div class="sectionbody">
 93 | 
 94 | </div>
 95 | </div>
 96 | </div>
 97 | <div id="footer">
 98 | <div id="footer-text">
 99 | Last updated 2021-06-09 21:47:15 +0200
100 | </div>
101 | </div>
102 | </body>
103 | </html>
104 | 


--------------------------------------------------------------------------------
/examples/asciidoctor-article-template.adoc:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | = AsciiDoc Article Title
 4 | // Firstname Lastname <author@asciidoctor.org>
 5 | // 3.0, July 29, 2022: AsciiDoc article template
 6 | // :toc:
 7 | // :icons: font
 8 | // :url-quickref: https://docs.asciidoctor.org/asciidoc/latest/syntax-quick-reference/
 9 | 
10 | // Content entered directly below the header but before the first section heading is called the preamble.
11 | 
12 | == First level heading
13 | 
14 | This is a paragraph with a *bold* word and an _italicized_ word.
15 | 
16 | // .Image caption
17 | // image::image-file-name.png[I am the image alt text.]
18 | 
19 | // This is another paragraph.footnote:[I am footnote text and will be displayed at the bottom of the article.]
20 | 
21 | === Second level heading
22 | 
23 | .Unordered list title
24 | * list item 1
25 | ** nested list item
26 | *** nested nested list item 1
27 | *** nested nested list item 2
28 | * list item 2
29 | 
30 | This is a paragraph.
31 | 
32 | .Example block title
33 | ====
34 | Content in an example block is subject to normal substitutions.
35 | ====
36 | 
37 | .Sidebar title
38 | ****
39 | Sidebars contain aside text and are subject to normal substitutions.
40 | ****
41 | 
42 | ==== Third level heading
43 | 
44 | // [#id-for-listing-block]
45 | // .Listing block title
46 | // ----
47 | // Content in a listing block is subject to verbatim substitutions.
48 | // Listing block content is commonly used to preserve code input.
49 | // ----
50 | 
51 | ===== Fourth level heading
52 | 
53 | // .Table title
54 | // |===
55 | // |Column heading 1 |Column heading 2
56 | 
57 | // |Column 1, row 1
58 | // |Column 2, row 1
59 | 
60 | // |Column 1, row 2
61 | // |Column 2, row 2
62 | // |===
63 | 
64 | ====== Fifth level heading
65 | 
66 | // [quote, firstname lastname, movie title]
67 | // ____
68 | // I am a block quote or a prose excerpt.
69 | // I am subject to normal substitutions.
70 | // ____
71 | 
72 | // [verse, firstname lastname, poem title and more]
73 | // ____
74 | // I am a verse block.
75 | //   Indents and endlines are preserved in verse blocks.
76 | // ____
77 | 
78 | == First level heading
79 | 
80 | // TIP: There are five admonition labels: Tip, Note, Important, Caution and Warning.
81 | 
82 | // I am a comment and won't be rendered.
83 | 
84 | // . ordered list item
85 | // .. nested ordered list item
86 | // . ordered list item
87 | 
88 | // The text at the end of this sentence is cross referenced to <<_third_level_heading,the third level heading>>
89 | 
90 | == First level heading
91 | 
92 | // This is a link to the https://docs.asciidoctor.org/home/[Asciidoctor documentation].
93 | // This is an attribute reference {url-quickref}[that links this text to the AsciiDoc Syntax Quick Reference].
94 | 


--------------------------------------------------------------------------------
/examples/asciidoctor-article-template-pandoc.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <meta name="generator" content="pandoc" />
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
 7 |   <title>AsciiDoc Article Title</title>
 8 |   <style>
 9 |     code{white-space: pre-wrap;}
10 |     span.smallcaps{font-variant: small-caps;}
11 |     span.underline{text-decoration: underline;}
12 |     div.column{display: inline-block; vertical-align: top; width: 50%;}
13 |     div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
14 |     ul.task-list{list-style: none;}
15 |     .display.math{display: block; text-align: center; margin: 0.5rem auto;}
16 |   </style>
17 |   <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" />
18 |   <!--[if lt IE 9]>
19 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
20 |   <![endif]-->
21 | </head>
22 | <body>
23 | <div class="article">
24 | <div id="header">
25 |  
26 | <h1>AsciiDoc Article Title</h1>
27 | </div>
28 | <div id="content">
29 |  
30 | <h2>First level heading</h2>
31 | <div class="paragraph">
32 | <p>This is a paragraph with a <strong>bold</strong> word and an <em>italicized</em> word.</p>
33 | </div>
34 | <h3>Second level heading</h3>
35 | <div class="ulist">
36 | <div class="title">
37 | Unordered list title
38 | </div>
39 | <ul>
40 | <li><div class="paragraph">
41 | <p>list item 1</p>
42 | </div>
43 | <div class="ulist">
44 | <ul>
45 | <li><div class="paragraph">
46 | <p>nested list item</p>
47 | </div>
48 | <div class="ulist">
49 | <ul>
50 | <li><div class="paragraph">
51 | <p>nested nested list item 1</p>
52 | </div></li>
53 | <li><div class="paragraph">
54 | <p>nested nested list item 2</p>
55 | </div></li>
56 | </ul>
57 | </div></li>
58 | </ul>
59 | </div></li>
60 | <li><div class="paragraph">
61 | <p>list item 2</p>
62 | </div></li>
63 | </ul>
64 | </div>
65 | <div class="paragraph">
66 | <p>This is a paragraph.</p>
67 | </div>
68 | <div class="exampleblock">
69 | <div class="title">
70 | Example 1. Example block title
71 | </div>
72 | <div class="content">
73 | <div class="paragraph">
74 | <p>Content in an example block is subject to normal substitutions.</p>
75 | </div>
76 | </div>
77 | </div>
78 | <div class="sidebarblock">
79 | <div class="content">
80 | <div class="title">
81 | Sidebar title
82 | </div>
83 | <div class="paragraph">
84 | <p>Sidebars contain aside text and are subject to normal substitutions.</p>
85 | </div>
86 | </div>
87 | </div>
88 | <h4>Third level heading</h4>
89 | <h5>Fourth level heading</h5>
90 | <h6>Fifth level heading</h6>
91 | <h2>First level heading</h2>
92 | <h2>First level heading</h2>
93 | </div>
94 | </div>
95 | </body>
96 | </html>
97 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/Debug/ParseTest.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | {-# OPTIONS_HADDOCK hide #-}
 3 | 
 4 | module Text.AsciiDoc.Debug.ParseTest
 5 |   ( OutputType (..),
 6 |     parseInline,
 7 |     parseFile,
 8 |     module Text.AsciiDoc.Blocks,
 9 |     module Text.AsciiDoc.Inlines,
10 |   )
11 | where
12 | 
13 | import qualified Control.Exception as E
14 | -- The following import is correct, but haskell-language-server sometimes complains.
15 | import Control.Monad.Reader
16 | import Data.IORef
17 | import Data.Text (Text)
18 | import qualified Data.Text as T
19 | import qualified Data.Text.IO as T
20 | import Text.AsciiDoc.Blocks hiding (Parser, State)
21 | import qualified Text.AsciiDoc.Blocks as B
22 | import Text.AsciiDoc.Inlines hiding (Parser, State)
23 | import qualified Text.AsciiDoc.Inlines as I
24 | import Text.Parsec (ParsecT, Stream)
25 | -- The following import is correct, but haskell-language-server sometimes complains.
26 | import Text.Parsec.Free.Log (LogType, renderLog)
27 | import Text.Parsec.Prim (runPT, runPTLog)
28 | -- The following import is correct, but haskell-language-server sometimes complains.
29 | import qualified Text.Pretty.Simple as Pretty
30 | 
31 | data OutputType = Result | Log
32 | 
33 | -- | Usage example:
34 | --
35 | -- > cabal repl asciidoc-hs:library:debug-with-parsec-free <<< 'parseInline inlinesP Log "*foo*"'
36 | parseInline ::
37 |   Show a =>
38 |   ParsecT Text I.State (ReaderT LogType IO) a ->
39 |   OutputType ->
40 |   Text ->
41 |   IO ()
42 | parseInline parser outputType input = do
43 |   parseTestLogWithState False parser outputType I.inlineParserInitialState input
44 | 
45 | -- | Usage example:
46 | --
47 | -- > cabal repl asciidoc-hs:library:debug-with-parsec-free <<< 'parseFile pDocument Log "input.adoc"' > output.txt
48 | parseFile ::
49 |   Show a =>
50 |   ParsecT [Text] B.State (ReaderT LogType IO) a ->
51 |   OutputType ->
52 |   FilePath ->
53 |   IO ()
54 | parseFile parser outputType file = do
55 |   tokens <- readTokens file
56 |   parseTestLogWithState False parser outputType B.blockParserInitialState tokens
57 | 
58 | readTokens :: FilePath -> IO [Text]
59 | readTokens file = do
60 |   t <- T.readFile file
61 |   pure $ T.lines t
62 | 
63 | parseTestLogWithState ::
64 |   (Stream s (ReaderT LogType IO) t, Show a, Show t) =>
65 |   -- | If True, display every parse, not just the interesting ones
66 |   Bool ->
67 |   ParsecT s u (ReaderT LogType IO) a ->
68 |   OutputType ->
69 |   u ->
70 |   s ->
71 |   IO ()
72 | parseTestLogWithState b p outputType state input = do
73 |   lg <- newIORef []
74 |   eres <- E.try $ runReaderT (parseTestLogWithStateAux p outputType state input) lg
75 |   putStrLn $ case eres of
76 |     Left err -> "EXCEPTION => " ++ show (err :: E.SomeException)
77 |     Right a -> "Result => " ++ show a
78 |   theLog <- readIORef lg
79 |   putStrLn $ renderLog b theLog
80 | 
81 | parseTestLogWithStateAux ::
82 |   (MonadIO m, MonadReader LogType m, Stream s m t, Show a, Show t) =>
83 |   ParsecT s u m a ->
84 |   OutputType ->
85 |   u ->
86 |   s ->
87 |   m ()
88 | parseTestLogWithStateAux p outputType state input = do
89 |   eres <- case outputType of
90 |     Result -> runPT p state "" input
91 |     Log -> runPTLog p state "" input
92 |   liftIO $ case eres of
93 |     Left err -> do
94 |       putStr "parse error at "
95 |       print err
96 |     Right x -> Pretty.pPrint x
97 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/SourceRange.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE LambdaCase #-}
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | 
 4 | -- |
 5 | -- Module      :  Text.AsciiDoc.SourceRange
 6 | -- Copyright   :  © 2020–present Guillem Marpons
 7 | -- License     :  BSD-3-Clause
 8 | --
 9 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
10 | -- Stability   :  experimental
11 | -- Portability :  portable
12 | --
13 | -- This module contains functions that traverse an AsciiDoc AST and decorate it
14 | -- with source ranges information.
15 | module Text.AsciiDoc.SourceRange
16 |   ( -- * Source ranges
17 |     SourcePosition,
18 |     SourceRange (..),
19 |     addSourceRanges,
20 |     childInlines,
21 |     contentLength,
22 |   )
23 | where
24 | 
25 | import Data.List.NonEmpty
26 | import qualified Data.Text as T
27 | import qualified Optics.Core as Optics
28 | import Optics.Traversal (Traversal')
29 | import Text.AsciiDoc.Inlines
30 | 
31 | -- | Extract the immediate descendants (sub-inlines) of an `Inline`.
32 | --
33 | -- Analogous to @Lens.Plated.children@.
34 | childInlines :: Traversal' Inline Inline
35 | childInlines = Optics.traversalVL subInlines'
36 |   where
37 |     subInlines' f = \case
38 |       StyledText style parameters open inlines close ->
39 |         (\x -> StyledText style parameters open x close) <$> traverse f inlines
40 |       InlineSeq inlines -> InlineSeq <$> traverse f inlines
41 |       x -> pure x
42 | 
43 | contentLength :: Inline -> Int
44 | contentLength = \case
45 |   Space t -> T.length t
46 |   AlphaNum t -> T.length t
47 |   Symbol t -> T.length t
48 |   Newline t -> T.length t
49 |   _ -> 0
50 | 
51 | type SourcePosition = (Int, Int)
52 | 
53 | data SourceRange = SourceRange SourcePosition SourcePosition
54 | 
55 | instance Show SourceRange where
56 |   show (SourceRange (l1, c1) (l2, c2)) =
57 |     show l1 <> ":" <> show c1 <> "-" <> show l2 <> ":" <> show c2
58 | 
59 | -- | TODO: maybe this function should be idempotent.
60 | addSourceRanges :: Inline -> Inline
61 | addSourceRanges = fst . addSourceRanges' (1, 1)
62 |   where
63 |     addSourceRanges' :: SourcePosition -> Inline -> (Inline, SourcePosition)
64 |     addSourceRanges' initial@(initialLine, initialColumn) x =
65 |       let (x', (finalLine', finalColumn')) =
66 |             Optics.mapAccumLOf
67 |               childInlines
68 |               addSourceRanges'
69 |               (initialLine, initialColumn + prefixLength x)
70 |               x
71 |           final@(finalLine, finalColumn) = case x of
72 |             -- `final` is the _previous_ position to the first position of the
73 |             -- next inline, so we use 0 as column value (we increment by one the
74 |             -- value returned by this function).
75 |             Newline _ -> (finalLine' + 1, 0)
76 |             _ -> (finalLine', finalColumn' + contentLength x + suffixLength x - 1)
77 |        in (wrap (SourceRange initial final) x', (finalLine, finalColumn + 1))
78 |     wrap :: SourceRange -> Inline -> Inline
79 |     wrap range = \case
80 |       x@(Newline _) -> x
81 |       x@(AlphaNum _) ->
82 |         StyledText
83 |           Custom
84 |           (InlineAttributeList (T.pack $ "data-sourcepos: " <> show range))
85 |           ""
86 |           (x :| [])
87 |           ""
88 |       x -> x
89 |     prefixLength = \case
90 |       StyledText _ (InlineAttributeList t) o _ _
91 |         | T.null t -> T.length o
92 |         | otherwise -> 2 + T.length t + T.length o
93 |       _ -> 0
94 |     suffixLength = \case
95 |       StyledText _ _ _ _ c -> T.length c
96 |       _ -> 0
97 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     types: [opened, synchronize, reopened]
  6 |   push:
  7 |     branches: [main]
  8 | 
  9 | jobs:
 10 |   ormolu:
 11 |     runs-on: ubuntu-latest
 12 |     steps:
 13 |     - name: Checkout repository
 14 |       uses: actions/checkout@v2
 15 | 
 16 |     - name: Ormolu
 17 |       uses: mrkkrp/ormolu-action@v2
 18 |       with:
 19 |         extra-args: --ghc-opt -XTypeApplications
 20 | 
 21 |   cabal:
 22 |     name: ${{ matrix.os }} / ghc ${{ matrix.ghc }}
 23 |     runs-on: ${{ matrix.os }}
 24 |     needs: ormolu
 25 |     strategy:
 26 |       matrix:
 27 |         os: [ubuntu-latest, macos-latest, windows-latest]
 28 |         cabal: [latest]
 29 |         ghc:
 30 |           - "8.10.7"
 31 |           - "9.0.1"
 32 |           - "9.2.1"
 33 |         exclude:
 34 |           - os: macos-latest
 35 |             ghc: "8.10.7"
 36 |           - os: macos-latest
 37 |             ghc: "9.2.1"
 38 |           - os: windows-latest
 39 |             ghc: "8.10.7"
 40 |           - os: windows-latest
 41 |             ghc: "9.2.1"
 42 | 
 43 |     steps:
 44 |     - name: Checkout repository
 45 |       uses: actions/checkout@v2
 46 | 
 47 |     - name: Setup Haskell
 48 |       uses: haskell/actions/setup@v1
 49 |       id: setup-haskell-cabal
 50 |       with:
 51 |         ghc-version: ${{ matrix.ghc }}
 52 |         cabal-version: ${{ matrix.cabal }}
 53 | 
 54 |     - name: Freeze
 55 |       run: |
 56 |         cabal freeze
 57 | 
 58 |     - name: Cache ~/.cabal/store
 59 |       uses: actions/cache@v2
 60 |       with:
 61 |         path: |
 62 |           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
 63 |           dist-newstyle
 64 |         key: ${{ runner.os }}-${{ matrix.ghc }}-${{ hashFiles('cabal.project.freeze') }}
 65 |         restore-keys: ${{ runner.os }}-${{ matrix.ghc }}-
 66 | 
 67 |     - name: Build
 68 |       run: |
 69 |         cabal configure --enable-tests --enable-benchmarks --enable-documentation --test-show-details=direct
 70 |         cabal update
 71 |         cabal build lib:asciidoc-hs exe:asciidoc-hs asciidoc-hs-test
 72 | 
 73 |     - name: Test
 74 |       run: |
 75 |         cabal test
 76 | 
 77 |     - name: Documentation
 78 |       run: |
 79 |         cabal haddock
 80 | 
 81 |   stack:
 82 |     name: stack / ghc ${{ matrix.ghc }}
 83 |     runs-on: ubuntu-latest
 84 |     strategy:
 85 |       matrix:
 86 |         ghc:
 87 |           - "8.10.7"
 88 |           - "9.0.1"
 89 | 
 90 |     steps:
 91 |     - name: Checkout repository
 92 |       uses: actions/checkout@v2
 93 | 
 94 |     - name: Setup Haskell Stack
 95 |       uses: haskell/actions/setup@v1
 96 |       with:
 97 |         ghc-version: ${{ matrix.ghc }}
 98 |         stack-version: ${{ matrix.stack }}
 99 | 
100 |     - name: Cache ~/.stack
101 |       uses: actions/cache@v2
102 |       with:
103 |         path: ~/.stack
104 |         key: ${{ runner.os }}-${{ matrix.ghc }}-stack
105 | 
106 |     - name: Build Stack
107 |       run: |
108 |         stack build --system-ghc --test --bench --no-run-tests --no-run-benchmarks
109 | 
110 |     - name: Test Stack
111 |       run: |
112 |         stack test --system-ghc
113 | 
114 |   hlint:
115 |     runs-on: ubuntu-latest
116 |     needs: cabal
117 | 
118 |     steps:
119 |     - name: Checkout repository
120 |       uses: actions/checkout@v2
121 | 
122 |     - name: Setup HLint
123 |       uses: rwe/actions-hlint-setup@v1
124 |       with:
125 |         version: '3.3.1'
126 | 
127 |     - name: Run HLint
128 |       uses: rwe/actions-hlint-run@v2
129 |       with:
130 |         path: '["src/", "exe/", "test/"]'
131 |         fail-on: warning
132 | 


--------------------------------------------------------------------------------
/test/Tests/Metadata.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | module Tests.Metadata
  4 |   ( metadataUnitTests,
  5 |   )
  6 | where
  7 | 
  8 | import qualified Data.IntMap as IntMap
  9 | import Data.List.NonEmpty (NonEmpty (..))
 10 | import qualified Data.Map as Map
 11 | import Data.Semigroup (Last (..))
 12 | import Data.Text (Text)
 13 | import Test.Hspec.Expectations.Pretty (shouldBe)
 14 | import Test.Tasty (TestTree, testGroup)
 15 | import Test.Tasty.HUnit (assertFailure, testCase)
 16 | import Tests.Blocks (parseTest)
 17 | import Text.AsciiDoc.Blocks
 18 | import Text.AsciiDoc.Metadata
 19 | import Text.AsciiDoc.UnparsedInline
 20 | 
 21 | parseBlockPrefix :: [Text] -> IO (NonEmpty (BlockPrefixItem UnparsedInline))
 22 | parseBlockPrefix t = case parseTest blockPrefixP t of
 23 |   Right prefix -> pure prefix
 24 |   Left parseError -> assertFailure $ "Parser fails: " <> show parseError
 25 | 
 26 | metadataUnitTests :: TestTree
 27 | metadataUnitTests =
 28 |   testGroup
 29 |     "metadata unit tests"
 30 |     [ testCase "block title" $ do
 31 |         p <- parseBlockPrefix [".Foo"]
 32 |         toMetadata p
 33 |           `shouldBe` mempty {metadataTitle = Just (Last (MarkupLine "Foo" :| []))},
 34 |       testCase "standalone block id" $ do
 35 |         p <- parseBlockPrefix ["[[Foo]]"]
 36 |         toMetadata p
 37 |           `shouldBe` (mempty @(Metadata UnparsedInline)) {metadataIds = ["Foo"]},
 38 |       testCase "two standalone block ids" $ do
 39 |         p <- parseBlockPrefix ["[[Foo]]", "[[Bar]]"]
 40 |         toMetadata p
 41 |           `shouldBe` (mempty @(Metadata UnparsedInline))
 42 |             { metadataIds = ["Foo", "Bar"]
 43 |             },
 44 |       testCase "standalone block style" $ do
 45 |         p <- parseBlockPrefix ["[Foo]"]
 46 |         toMetadata p
 47 |           `shouldBe` (mempty @(Metadata UnparsedInline))
 48 |             { metadataStyle = Just (Last "Foo")
 49 |             },
 50 |       testCase "standalone block role" $ do
 51 |         p <- parseBlockPrefix ["[.Foo]"]
 52 |         -- Compatible with how Asciidoctor cleans style when none is specified
 53 |         -- in shortand syntax.
 54 |         toMetadata p
 55 |           `shouldBe` (mempty @(Metadata UnparsedInline))
 56 |             { metadataStyle = Just (Last ""),
 57 |               metadataRoles = ["Foo"]
 58 |             },
 59 |       testCase "positional attributes" $ do
 60 |         p <- parseBlockPrefix ["[Foo, Bar, Baz]"]
 61 |         toMetadata p
 62 |           `shouldBe` (mempty @(Metadata UnparsedInline))
 63 |             { metadataStyle = Just (Last "Foo"),
 64 |               metadataPositionalAttributes = IntMap.fromList [(2, "Bar"), (3, "Baz")]
 65 |             },
 66 |       testCase "named attribute" $ do
 67 |         p <- parseBlockPrefix ["[Foo = Bar]"]
 68 |         toMetadata p
 69 |           `shouldBe` (mempty @(Metadata UnparsedInline))
 70 |             { metadataNamedAttributes = Map.fromList [("Foo", "Bar")]
 71 |             },
 72 |       testCase "standalone option" $ do
 73 |         p <- parseBlockPrefix ["[%Foo]"]
 74 |         -- Compatible with how Asciidoctor cleans style when none is specified
 75 |         -- in shortand syntax.
 76 |         toMetadata p
 77 |           `shouldBe` (mempty @(Metadata UnparsedInline))
 78 |             { metadataStyle = Just (Last ""),
 79 |               metadataOptions = ["Foo"]
 80 |             },
 81 |       testCase "complex example" $ do
 82 |         p <-
 83 |           parseBlockPrefix
 84 |             [ "[.Foo]",
 85 |               "// Comment",
 86 |               "[Foo#Foo%Foo.Foo.Bar%%Bar, 'Foo', Foo = Bar]",
 87 |               "[role = 'Baz Foo']",
 88 |               "[opts = Baz]",
 89 |               "",
 90 |               "[[Bar]]",
 91 |               "[Bar, Foo = Baz, title=Baz, Bar]"
 92 |             ]
 93 |         toMetadata p
 94 |           `shouldBe` mempty
 95 |             { metadataStyle = Just (Last "Bar"),
 96 |               metadataIds = ["Foo", "Bar"],
 97 |               metadataRoles = ["Baz", "Foo"],
 98 |               metadataOptions = ["Foo", "", "Bar", "Baz"],
 99 |               metadataTitle = Just (Last (MarkupLine "Baz" :| [])),
100 |               metadataPositionalAttributes = IntMap.fromList [(2, "Foo"), (4, "Bar")],
101 |               metadataNamedAttributes = Map.fromList [("Foo", "Baz")],
102 |               metadataRoleNamedAttribute = Just (Last ["Baz", "Foo"])
103 |             }
104 |     ]
105 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/Metadata.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingStrategies #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | 
  4 | -- |
  5 | -- Module      :  Text.AsciiDoc.Metadata
  6 | -- Copyright   :  © 2020–present Guillem Marpons
  7 | -- License     :  BSD-3-Clause
  8 | --
  9 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 10 | -- Stability   :  experimental
 11 | -- Portability :  portable
 12 | --
 13 | -- A flexible Metadata type that can be used to store properties of both
 14 | -- 'Text.AsciiDoc.Blocks' and 'Text.AsciiDoc.Inlines'.
 15 | --
 16 | -- Its 'Semigroup' and 'Monoid' instances codify the non-trivial rules of
 17 | -- metadata addition.
 18 | module Text.AsciiDoc.Metadata
 19 |   ( -- * Metadata Type
 20 |     Metadata (..),
 21 | 
 22 |     -- * ToMetadata Class
 23 |     ToMetadata (..),
 24 |   )
 25 | where
 26 | 
 27 | import qualified Data.IntMap as IntMap
 28 | import Data.List.NonEmpty (NonEmpty ((:|)))
 29 | import qualified Data.Map as Map
 30 | import Data.Semigroup (Last (..))
 31 | import Data.Text (Text)
 32 | import qualified Data.Text as T
 33 | import Text.AsciiDoc.ElementAttributes
 34 | import Text.AsciiDoc.UnparsedInline
 35 | 
 36 | data Metadata a = Metadata
 37 |   { metadataStyle :: Maybe (Last Text),
 38 |     metadataIds :: [Text],
 39 |     metadataRoles :: [Text],
 40 |     metadataOptions :: [Text],
 41 |     metadataTitle :: Maybe (Last a),
 42 |     metadataPositionalAttributes :: IntMap.IntMap Text,
 43 |     -- | Named attributes different than @id@, @opts@, @options@, @role@ and
 44 |     -- @title@.
 45 |     metadataNamedAttributes :: Map.Map Text Text,
 46 |     metadataRoleNamedAttribute :: Maybe (Last [Text])
 47 |   }
 48 |   deriving stock (Eq, Show, Functor)
 49 | 
 50 | instance Semigroup (Metadata a) where
 51 |   x <> y =
 52 |     let a = metadataStyle x <> metadataStyle y
 53 |         b = metadataIds x <> metadataIds y
 54 |         -- If the right operand contains and explicit "role=ROLENAME" attribute,
 55 |         -- it replaces whatever roles contained in the left operand. The right
 56 |         -- operand can append new roles with the "[.ROLENAME]" syntax.
 57 |         c = case metadataRoleNamedAttribute y of
 58 |           Just yRoles -> getLast yRoles
 59 |           Nothing -> metadataRoles x <> metadataRoles y
 60 |         d = metadataOptions x <> metadataOptions y
 61 |         e = metadataTitle x <> metadataTitle y
 62 |         -- Semigroup instance from IntMap gives precedence to values from left
 63 |         -- operand. In this case we prefer the last value (i.e. value from
 64 |         -- operand @y@).
 65 |         f = metadataPositionalAttributes y <> metadataPositionalAttributes x
 66 |         -- Semigroup instance from Map gives precedence to values from left
 67 |         -- operand. In this case we prefer the last value (i.e. value from
 68 |         -- operand @y@).
 69 |         g = metadataNamedAttributes y <> metadataNamedAttributes x
 70 |         h = metadataRoleNamedAttribute x <> metadataRoleNamedAttribute y
 71 |      in Metadata
 72 |           { metadataStyle = a,
 73 |             metadataIds = b,
 74 |             metadataRoles = c,
 75 |             metadataOptions = d,
 76 |             metadataTitle = e,
 77 |             metadataPositionalAttributes = f,
 78 |             metadataNamedAttributes = g,
 79 |             metadataRoleNamedAttribute = h
 80 |           }
 81 | 
 82 | instance Monoid (Metadata a) where
 83 |   mempty =
 84 |     Metadata
 85 |       { metadataStyle = mempty,
 86 |         metadataIds = mempty,
 87 |         metadataRoles = mempty,
 88 |         metadataOptions = mempty,
 89 |         metadataTitle = mempty,
 90 |         metadataPositionalAttributes = mempty,
 91 |         metadataNamedAttributes = mempty,
 92 |         metadataRoleNamedAttribute = mempty
 93 |       }
 94 | 
 95 | class ToMetadata b a where
 96 |   toMetadata :: b -> Metadata a
 97 | 
 98 | instance ToMetadata PositionedAttribute UnparsedInline where
 99 |   toMetadata (PositionedAttribute (index, PositionalAttribute p)) =
100 |     mempty {metadataPositionalAttributes = IntMap.singleton index p}
101 |   -- Special treatment of attribute names: @id@, @opts@, @options@, @role@ and
102 |   -- @title@.
103 |   toMetadata (PositionedAttribute (_, NamedAttribute "id" v)) =
104 |     mempty {metadataIds = [v]}
105 |   toMetadata (PositionedAttribute (_, NamedAttribute "opts" v)) =
106 |     mempty {metadataOptions = [v]}
107 |   toMetadata (PositionedAttribute (_, NamedAttribute "options" v)) =
108 |     mempty {metadataOptions = [v]}
109 |   toMetadata (PositionedAttribute (_, NamedAttribute "role" v)) =
110 |     mempty
111 |       { metadataRoles = T.words v,
112 |         metadataRoleNamedAttribute = Just $ Last $ T.words v
113 |       }
114 |   toMetadata (PositionedAttribute (_, NamedAttribute "title" v)) =
115 |     mempty {metadataTitle = Just $ Last $ MarkupLine v :| []}
116 |   -- Any other named attribute
117 |   toMetadata (PositionedAttribute (_, NamedAttribute k v)) =
118 |     mempty {metadataNamedAttributes = Map.singleton k v}
119 |   toMetadata (PositionedAttribute (_, ShorthandSyntaxAttribute s i r o)) =
120 |     mempty
121 |       { metadataStyle = Just (Last s),
122 |         metadataIds = i,
123 |         metadataRoles = r,
124 |         metadataOptions = o
125 |       }
126 | 
127 | instance
128 |   {-# OVERLAPPABLE #-}
129 |   (Foldable f, ToMetadata b a) =>
130 |   ToMetadata (f b) a
131 |   where
132 |   toMetadata = foldMap toMetadata
133 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/LineParsers.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | -- |
  4 | -- Module      :  Text.AsciiDoc.LineParsers
  5 | -- Copyright   :  © 2020–present Guillem Marpons
  6 | -- License     :  BSD-3-Clause
  7 | --
  8 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
  9 | -- Stability   :  experimental
 10 | -- Portability :  portable
 11 | --
 12 | -- Parsec-style parser combinators to help classifying individual AsciiDoc
 13 | -- lines.
 14 | --
 15 | -- All parsers in this module return 'Data.Text.Text', which helps to combine
 16 | -- them using the @Monoid@ instance of 'Text.Parsec.ParsecT'.
 17 | module Text.AsciiDoc.LineParsers
 18 |   ( -- * Line parser type
 19 |     LineParser,
 20 | 
 21 |     -- * Concrete parsers
 22 |     blockId,
 23 |     blockAttributeList,
 24 | 
 25 |     -- * Generic parsers accepting 'Marker's
 26 |     runOfN,
 27 |     count,
 28 | 
 29 |     -- * Generic parsers accepting 'Text'
 30 |     anyRemainder,
 31 |     many,
 32 |     manyText,
 33 |     some,
 34 |     string,
 35 |     char,
 36 |     satisfy,
 37 |   )
 38 | where
 39 | 
 40 | import Control.Monad (MonadPlus)
 41 | import qualified Control.Monad.Combinators as PC hiding
 42 |   ( endBy1,
 43 |     sepBy1,
 44 |     sepEndBy1,
 45 |     some,
 46 |     someTill,
 47 |   )
 48 | import qualified Control.Monad.Combinators.NonEmpty as PC
 49 | import Data.Char (isAlphaNum, isDigit, isLetter, isSpace, ord)
 50 | import Data.Functor.Identity (Identity)
 51 | import qualified Data.List.NonEmpty as NE
 52 | import Data.Text (Text)
 53 | import qualified Data.Text as T
 54 | import Text.AsciiDoc.SpecialChars
 55 | import qualified Text.Parsec as Parsec
 56 | 
 57 | -- | Parser type used to check syntactic conditions on single lines of an
 58 | -- AsciiDoc document.
 59 | type LineParser = Parsec.ParsecT Text () Identity
 60 | 
 61 | -- | Accepts a block identifier using the double square bracket syntax: an
 62 | -- identifier surrounded by "@[[@" and "@]]@". The identifier must start with a
 63 | -- letter, or "@_@", or "@:@". It can contain letters, digits, and some other
 64 | -- special characters defined in https://www.w3.org/TR/REC-xml/#NT-Name
 65 | --
 66 | -- Note that that the aforementioned requirements only apply to double square
 67 | -- bracket syntax: other ways of defining identifiers accept the full range of
 68 | -- Unicode characters. For more information see
 69 | -- https://asciidoctor.org/docs/user-manual/#custom-ids.
 70 | --
 71 | -- Accepts null identifiers (the empty string).
 72 | --
 73 | -- It does not accept spaces between the square brackets.
 74 | blockId :: LineParser Text
 75 | blockId =
 76 |   string "[["
 77 |     *> PC.option
 78 |       T.empty
 79 |       ( satisfy isBlockIdStartChar <> many (Parsec.satisfy isBlockIdChar)
 80 |       )
 81 |       <* string "]]"
 82 |   where
 83 |     -- TODO. Assess if package charset can be used to speed-up the following
 84 |     -- checks.
 85 |     isBlockIdStartChar c = c `elem` ['_', ':'] || isLetter c
 86 |     isBlockIdChar c =
 87 |       isBlockIdStartChar c
 88 |         || isDigit c
 89 |         || c == '-'
 90 |         || c == '.'
 91 |         -- #xB7
 92 |         || c == '·'
 93 |         -- #x203F
 94 |         || c == '‿'
 95 |         -- #x2040
 96 |         || c == '⁀'
 97 |         -- #x0300-#x036F (Combining Diacritical Marks)
 98 |         || (ord c >= 768 && ord c <= 879)
 99 | 
100 | -- | Accepts an square-bracket-enclosed string with almost no restrictions on
101 | -- the characters in between. Only the very first character needs to be alphanum
102 | -- or one of the following list: @[\',', \'.', \'#', \'%', \'_', \'º', \'ª',
103 | -- \'\'', \'"']@. This list has been discovered empirically (testing).
104 | --
105 | -- The input can be empty.
106 | --
107 | -- The input can contain newline characters, even if this function is usually
108 | -- called over one line strings (Asciidoctor doesn't support multi-line block
109 | -- attribute lists).
110 | blockAttributeList :: LineParser Text
111 | blockAttributeList = do
112 |   t <- string "[" *> anyRemainder
113 |   let (t', remainder) = T.breakOnEnd "]" t
114 |   case (T.all isSpace remainder, T.uncons t', T.unsnoc t') of
115 |     -- No chars between square brackets: empty attribute list
116 |     (True, Nothing, Just (_, ']')) -> pure ""
117 |     -- There are some chars between square brackets: accept them if starting
118 |     -- char belongs to a restricted group of characters.
119 |     (True, Just (s, _), Just (t'', ']')) | isStartChar s -> pure t''
120 |     -- Fail otherwise (no square bracket at the end, or no correct first char).
121 |     _ -> PC.empty
122 |   where
123 |     isStartChar c =
124 |       isAlphaNum c || c `elem` [',', '.', '#', '%', '_', 'º', 'ª', '\'', '"']
125 | 
126 | -- | @runOfN n cs@ creates a list of parsers, one for every character @c@ member
127 | -- of @cs@. Each of these parsers accepts any run of @n@ or more consecutive
128 | -- appearances of @c@.
129 | --
130 | -- Example: @runOfN 4 [\'+', \'=']@ accepts runs of four or more symbols @"+"@, or
131 | -- four or more symbols @"="@.
132 | runOfN :: Int -> [SpecialChar a] -> [LineParser (Marker a)]
133 | runOfN n = fmap $ \c ->
134 |   (\t -> c :* (n + T.length t))
135 |     <$ PC.count n (Parsec.char $ fromSpecialChar c)
136 |       <*> many (Parsec.char $ fromSpecialChar c)
137 | 
138 | -- | Returns (parses successfully) the remaining text of the line, whatever its
139 | -- content.
140 | anyRemainder :: LineParser Text
141 | -- anyRemainder = Parsec.getInput <* Parsec.setInput ""
142 | anyRemainder = many Parsec.anyChar
143 | 
144 | many :: MonadPlus m => m Char -> m Text
145 | many p = T.pack <$> PC.many p
146 | 
147 | manyText :: MonadPlus m => m Text -> m Text
148 | manyText p = T.concat <$> PC.many p
149 | 
150 | some :: MonadPlus m => m Char -> m Text
151 | some p = T.pack . NE.toList <$> PC.some p
152 | 
153 | count :: Int -> SpecialChar a -> LineParser (Marker a)
154 | count m c = c :* m <$ PC.count m (Parsec.char (fromSpecialChar c))
155 | 
156 | string :: String -> LineParser Text
157 | string s = T.pack <$> Parsec.string s
158 | 
159 | char :: Char -> LineParser Text
160 | char c = T.singleton <$> Parsec.char c
161 | 
162 | satisfy :: (Char -> Bool) -> LineParser Text
163 | satisfy f = T.singleton <$> Parsec.satisfy f
164 | 


--------------------------------------------------------------------------------
/asciidoc-hs.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version:          2.4
  2 | name:                   asciidoc-hs
  3 | version:                0.0.0.0
  4 | synopsis:               AsciiDoc parser that can be used as a Pandoc front-end
  5 | description:            See README.adoc for more info.
  6 | homepage:               https://github.com/gmarpons/asciidoc-hs
  7 | bug-reports:            https://github.com/gmarpons/asciidoc-hs/issues
  8 | license:                BSD-3-Clause
  9 | license-file:           LICENSE
 10 | author:                 Guillem Marpons
 11 | maintainer:             Guillem Marpons <gmarpons@mailbox.org>
 12 | copyright:              2020-2021 Guillem Marpons
 13 | build-type:             Simple
 14 | category:               Text
 15 | extra-doc-files:        CHANGELOG.adoc
 16 |                         README.adoc
 17 | tested-with:            GHC == 8.10.7
 18 |                         GHC == 9.0.1
 19 |                         GHC == 9.2.1
 20 | 
 21 | source-repository head
 22 |   type:                 git
 23 |   location:             https://github.com/gmarpons/asciidoc-hs.git
 24 | 
 25 | flag debug-with-parsec-free
 26 |   Description: Enable debugging with parsec-free
 27 |   Default:     False
 28 |   Manual:      True
 29 | 
 30 | common common-options
 31 |   build-depends:        base >= 4.13.0.0 && < 4.17
 32 |   ghc-options:          -Wall
 33 |                         -Wcompat
 34 |                         -Widentities
 35 |                         -Wincomplete-uni-patterns
 36 |                         -Wincomplete-record-updates
 37 |   if impl(ghc >= 8.0)
 38 |     ghc-options:        -Wredundant-constraints
 39 |   if impl(ghc >= 8.2)
 40 |     ghc-options:        -fhide-source-paths
 41 |   if impl(ghc >= 8.4)
 42 |     ghc-options:        -Wmissing-export-lists
 43 |                         -Wpartial-fields
 44 |   if impl(ghc >= 8.8)
 45 |     ghc-options:        -Wmissing-deriving-strategies
 46 | 
 47 |   if impl(ghc >= 9.2)
 48 |     default-language:   GHC2021
 49 |   else
 50 |     default-language:   Haskell2010
 51 |     default-extensions:
 52 |                         DeriveDataTypeable
 53 |                         DeriveFunctor
 54 |                         EmptyDataDeriving
 55 |                         FlexibleContexts
 56 |                         FlexibleInstances
 57 |                         GeneralizedNewtypeDeriving
 58 |                         MultiParamTypeClasses
 59 |                         StandaloneDeriving
 60 |                         TypeApplications
 61 |   other-extensions:
 62 |                         DerivingStrategies
 63 |                         GADTs
 64 |                         LambdaCase
 65 |                         OverloadedStrings
 66 | 
 67 | library
 68 |   import:               common-options
 69 |   hs-source-dirs:       src
 70 |   if flag(debug-with-parsec-free)
 71 |     exposed-modules:
 72 |                         Text.AsciiDoc.Debug.ParseTest
 73 |                         Text.AsciiDoc.Blocks
 74 |                         Text.AsciiDoc.ElementAttributes
 75 |                         Text.AsciiDoc.Inlines
 76 |                         Text.AsciiDoc.LineParsers
 77 |                         Text.AsciiDoc.Metadata
 78 |                         Text.AsciiDoc.Pandoc
 79 |                         Text.AsciiDoc.SourceRange
 80 |                         Text.AsciiDoc.SpecialChars
 81 |                         Text.AsciiDoc.UnparsedInline
 82 |   else
 83 |     exposed-modules:
 84 |                         Text.AsciiDoc.Blocks
 85 |                         Text.AsciiDoc.ElementAttributes
 86 |                         Text.AsciiDoc.Inlines
 87 |                         Text.AsciiDoc.LineParsers
 88 |                         Text.AsciiDoc.Metadata
 89 |                         Text.AsciiDoc.Pandoc
 90 |                         Text.AsciiDoc.SourceRange
 91 |                         Text.AsciiDoc.SpecialChars
 92 |                         Text.AsciiDoc.UnparsedInline
 93 |   if flag(debug-with-parsec-free)
 94 |     build-depends:
 95 |                         containers >= 0.6.0.1,
 96 |                         mtl >= 2.1.2,
 97 |                         optics-core >= 0.3,
 98 |                         pandoc-types >= 1.20,
 99 |                         parsec-free >= 3.1.11.7,
100 |                         parser-combinators >= 1.2,
101 |                         pretty-simple >= 4.0,
102 |                         syb >= 0.7,
103 |                         text >= 1.2.4
104 |   else
105 |     build-depends:
106 |                         containers >=0.6.0.1 && <0.7,
107 |                         optics-core >=0.3 && <0.5,
108 |                         pandoc-types >=1.20 && <1.23,
109 |                         parsec >=3.1.14 && <3.2,
110 |                         parser-combinators >=1.2 && <1.4,
111 |                         syb >=0.7 && <0.8,
112 |                         text >=1.2.4 && <1.3
113 | 
114 | executable asciidoc-hs
115 |   import:               common-options
116 |   hs-source-dirs:       exe
117 |   main-is:              Main.hs
118 |   build-depends:
119 |                         asciidoc-hs,
120 |                         aeson >=1.4.7 && <2.1,
121 |                         parsec >=3.1.14 && <3.2,
122 |                         text >=1.2.4 && <1.3
123 |   ghc-options:          -threaded
124 |                         -rtsopts
125 |                         -with-rtsopts=-N
126 | 
127 | test-suite asciidoc-hs-test
128 |   import:               common-options
129 |   type:                 exitcode-stdio-1.0
130 |   hs-source-dirs:       test
131 |   main-is:              Tests/Main.hs
132 |   other-modules:        Tests.Blocks
133 |                         Tests.Inlines
134 |                         Tests.Metadata
135 |   build-depends:
136 |                         asciidoc-hs,
137 |                         containers >=0.6.0.1 && <0.7,
138 |                         hspec-expectations-pretty-diff >=0.7.2.5,
139 |                         parsec >=3.1.14 && <3.2,
140 |                         pretty-show >=1.10,
141 |                         repr-tree-syb >=0.1.1,
142 |                         tasty >=1.1.0.3,
143 |                         tasty-hunit-compat >=0.2,
144 |                         text >=1.2.4 && <1.3
145 |   ghc-options:          -threaded
146 |                         -rtsopts
147 |                         -with-rtsopts=-N
148 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/SpecialChars.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingStrategies #-}
  2 | {-# LANGUAGE GADTs #-}
  3 | {-# LANGUAGE LambdaCase #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | 
  6 | -- |
  7 | -- Module      :  Text.AsciiDoc.SpecialChars
  8 | -- Copyright   :  © 2020–present Guillem Marpons
  9 | -- License     :  BSD-3-Clause
 10 | --
 11 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 12 | -- Stability   :  experimental
 13 | -- Portability :  portable
 14 | --
 15 | -- This module defines an algebraic type for describing characters that have a
 16 | -- special meaning in AsciiDoc, like @*@, @=@, or @/@.
 17 | --
 18 | -- Data constructors of the 'SpecialChar' type (like 'AsteriskD' or 'HyphenL')
 19 | -- can be used to preserve full information about what has been parsed.
 20 | -- This is useful, for example, to apply the "parse, don't validate" principle.
 21 | -- Suffixes "D" or "L" in the aforementioned constructors are explained in the
 22 | -- documentation of type 'SpecialChar'.
 23 | --
 24 | -- The module also defines types that represent combinations of 'SpecialChar's
 25 | -- that have a particular meaning in AsciiDoc: 'Mark's and 'Marker's.
 26 | module Text.AsciiDoc.SpecialChars
 27 |   ( -- * Special characters
 28 |     CommentChar,
 29 |     DelimiterChar,
 30 |     FormatChar,
 31 |     HeaderChar,
 32 |     ListChar,
 33 |     SpecialChar (..),
 34 |     fromSpecialChar,
 35 | 
 36 |     -- * Formatting @Mark@s used in inlines
 37 |     Mark (..),
 38 |     fromMark,
 39 |     isConstrained,
 40 |     isUnconstrained,
 41 |     extendedMarksOf,
 42 |     closingMarkOf,
 43 | 
 44 |     -- * @Marker@s for initiating and delimiting blocks
 45 |     Marker (..),
 46 |     fromMarker,
 47 |   )
 48 | where
 49 | 
 50 | import Data.String (IsString (..))
 51 | import Data.Text (Text)
 52 | import qualified Data.Text as T (justifyRight)
 53 | 
 54 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is
 55 | -- used in a delimiter for a block comment or in a marker for a line comment.
 56 | data CommentChar deriving stock (Eq, Show)
 57 | 
 58 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is
 59 | -- used in a block delimiter.
 60 | data DelimiterChar deriving stock (Eq, Show)
 61 | 
 62 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is
 63 | -- used in a formatting mark for defining inline text (such as bold or
 64 | -- monospace) and punctuation (such as curved quotation marks) styles.
 65 | data FormatChar deriving stock (Eq, Show)
 66 | 
 67 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is
 68 | -- used in a section header marker.
 69 | data HeaderChar deriving stock (Eq, Show)
 70 | 
 71 | -- | Used as a type parameter in 'SpecialChar' to indicate that a character is
 72 | -- used in a list item marker.
 73 | data ListChar deriving stock (Eq, Show)
 74 | 
 75 | -- | Algebraic type for describing characters that have a special meaning in
 76 | -- AsciiDoc, like @*@, @=@, or @/@.
 77 | --
 78 | -- Every constructor defines a tuple @(character, function in AsciiDoc)@.
 79 | -- The last letter of every constructor name indicates the function.
 80 | -- The same character can be used in more than one function.
 81 | -- We use GADTs to represent those tuples, as in this way we get more precise
 82 | -- type checking with no code duplication.
 83 | -- The list of possible functions is:
 84 | --
 85 | -- * 'CommentChar'
 86 | -- * 'DelimiterChar'
 87 | -- * 'FormatChar'
 88 | -- * 'HeaderChar'
 89 | -- * 'ListChar'
 90 | data SpecialChar a where
 91 |   SlashC :: SpecialChar CommentChar
 92 |   AsteriskD :: SpecialChar DelimiterChar
 93 |   HyphenD :: SpecialChar DelimiterChar
 94 |   EqualsSignD :: SpecialChar DelimiterChar
 95 |   NumberF :: SpecialChar FormatChar
 96 |   AsteriskF :: SpecialChar FormatChar
 97 |   UnderscoreF :: SpecialChar FormatChar
 98 |   GraveF :: SpecialChar FormatChar
 99 |   EqualsSignH :: SpecialChar HeaderChar
100 |   AsteriskL :: SpecialChar ListChar
101 |   HyphenL :: SpecialChar ListChar
102 | 
103 | deriving stock instance (Eq a) => Eq (SpecialChar a)
104 | 
105 | instance Show (SpecialChar a) where
106 |   show = show . fromSpecialChar
107 | 
108 | fromSpecialChar :: SpecialChar a -> Char
109 | fromSpecialChar = \case
110 |   SlashC -> '/'
111 |   AsteriskD -> '*'
112 |   HyphenD -> '-'
113 |   NumberF -> '#'
114 |   AsteriskF -> '*'
115 |   UnderscoreF -> '_'
116 |   GraveF -> '`'
117 |   EqualsSignD -> '='
118 |   EqualsSignH -> '='
119 |   AsteriskL -> '*'
120 |   HyphenL -> '-'
121 | 
122 | -- | A data type describing a sequence of special characters used as formatting
123 | -- marks.
124 | --
125 | -- A @Mark@ contains at least one character.
126 | --
127 | -- Most @Mark@s consist of a sequence of one or more repeated symbols, like
128 | -- "@**@" or "@_@".
129 | data Mark
130 |   = SingleMark (SpecialChar FormatChar)
131 |   | DoubleMark (SpecialChar FormatChar)
132 |   deriving stock (Eq)
133 | 
134 | instance Show Mark where
135 |   show = fromMark
136 | 
137 | isUnconstrained :: Mark -> Bool
138 | isUnconstrained (SingleMark _) = False
139 | isUnconstrained (DoubleMark _) = True
140 | 
141 | isConstrained :: Mark -> Bool
142 | isConstrained = not . isUnconstrained
143 | 
144 | extendedMarksOf :: Mark -> [Mark]
145 | extendedMarksOf (SingleMark c) = [DoubleMark c]
146 | extendedMarksOf _ = []
147 | 
148 | closingMarkOf :: Mark -> Mark
149 | closingMarkOf m@(SingleMark _) = m
150 | closingMarkOf m@(DoubleMark _) = m
151 | 
152 | instance IsString Mark where
153 |   fromString [x] =
154 |     case toFormatChar x of
155 |       Just c -> SingleMark c
156 |       _ -> error $ "Mark.fromString: bad char: " ++ show x
157 |   fromString [x1, x2] =
158 |     case (toFormatChar x1, toFormatChar x2) of
159 |       (Just c1, Just c2) | c1 == c2 -> DoubleMark c1
160 |       _ -> error $ "Mark.fromString: bad chars: " ++ show [x1, x2]
161 |   fromString _ = error "Mark.fromString: bad length"
162 | 
163 | fromMark :: Mark -> String
164 | fromMark (SingleMark c) = [fromSpecialChar c]
165 | fromMark (DoubleMark c) = [fromSpecialChar c, fromSpecialChar c]
166 | 
167 | toFormatChar :: Char -> Maybe (SpecialChar FormatChar)
168 | toFormatChar = \case
169 |   '#' -> Just NumberF
170 |   '*' -> Just AsteriskF
171 |   '_' -> Just UnderscoreF
172 |   '`' -> Just GraveF
173 |   _ -> Nothing
174 | 
175 | -- | An algebraic data type for describing a sequence of special characters that
176 | -- can be:
177 | --
178 | -- * A marker that signals the start of a new element in the document.
179 | --   Examples of markers are:
180 | --
181 | --     * "@*@" for starting a new list item.
182 | --     * "@===@" for starting a new level 2 section header.
183 | --
184 | -- * A delimiter.
185 | --   Examples of delimiters are:
186 | --
187 | --     * "@****@" or "@====@" or other openings and closings of delimited
188 | --       blocks.
189 | data Marker a
190 |   = -- | Most markers consist in a single character repeated a number of times.
191 |     SpecialChar a :* Int
192 | 
193 | deriving stock instance Eq (Marker CommentChar)
194 | 
195 | deriving stock instance Eq (Marker DelimiterChar)
196 | 
197 | deriving stock instance Eq (Marker FormatChar)
198 | 
199 | deriving stock instance Eq (Marker ListChar)
200 | 
201 | deriving stock instance Show (Marker CommentChar)
202 | 
203 | deriving stock instance Show (Marker DelimiterChar)
204 | 
205 | deriving stock instance Show (Marker FormatChar)
206 | 
207 | deriving stock instance Show (Marker ListChar)
208 | 
209 | fromMarker :: Marker a -> Text
210 | fromMarker (c :* n) = T.justifyRight n (fromSpecialChar c) ""
211 | 


--------------------------------------------------------------------------------
/README.adoc:
--------------------------------------------------------------------------------
  1 | = Asciidoc-hs
  2 | :toc: macro
  3 | ifndef::env-github[]
  4 | :icons: font
  5 | :outfilesuffix: .adoc
  6 | :caution-caption: :fire:
  7 | :important-caption: :exclamation:
  8 | :note-caption: :paperclip:
  9 | :tip-caption: :bulb:
 10 | :warning-caption: :warning:
 11 | endif::[]
 12 | // Variables:
 13 | :release-version:
 14 | // URLs:
 15 | :url-repo: https://github.com/gmarpons/asciidoc-hs
 16 | 
 17 | WARNING: This project is in its early stages.
 18 | Don't expect to be able to use it in anything relevant, yet.
 19 | 
 20 | `asciidoc-hs` is a parser for the lightweight markup language https://docs.asciidoctor.org/asciidoc/latest/[AsciiDoc] that can be used as a https://pandoc.org/[Pandoc] front-end.
 21 | It draws on https://github.com/jgm/commonmark-hs[`commonmark-hs`] and is written in pure Haskell.
 22 | 
 23 | image:https://github.com/gmarpons/asciidoc-hs/workflows/CI/badge.svg[Build Status (GitHub Actions),link={url-repo}/actions]
 24 | 
 25 | toc::[]
 26 | 
 27 | == Mission
 28 | 
 29 | The main goal of this project is to become a reasonably complete implementation of the AsciiDoc language and to allow Pandoc to read AsciiDoc documents.
 30 | 
 31 | We want to read AsciiDoc as it's used today (mainly in its https://docs.asciidoctor.org/asciidoctor/latest/[Asciidoctor] variant), and also support the https://www.eclipse.org/org/workinggroups/asciidoc-charter.php[AsciiDoc Standard] when it arrives.
 32 | We're following the standardization process closely and we'll try to contribute to it if we see the opportunity.
 33 | 
 34 | There have been former attempts at a Pandoc Reader for AsciiDoc, but they have been abandoned.
 35 | If you want to feed Pandoc with an AsciiDoc source, your best option at the moment is probably to first convert to Docbook using Asciidoctor.
 36 | 
 37 | Secondary goals of the project are (but don't expect any roadmap or timeline at the moment):
 38 | 
 39 | * Be used as infrastructure for building advanced editor and IDE features (e.g., linters and live previewers).
 40 | This means to be able to track precise source mapping information, among others.
 41 | 
 42 | * Be scriptable in a similar way to Pandoc, but using and AST/DOM that is specific to AsciiDoc and fully supports its semantic richness.
 43 | 
 44 | * Enable source-to-source transformations, including exact-print for those parts not explicitly modified by the scripts (a.k.a. lossless transformation).
 45 | 
 46 | * End up being distributed together with Pandoc, as a regular Pandoc Reader.
 47 | 
 48 | == Features
 49 | 
 50 | The supported features can be found in our https://github.com/gmarpons/asciidoc-hs/wiki/AsciiDoc-Compatibility-Matrix[compatibility matrix], currently based on Asciidoctor's feature list.
 51 | 
 52 | == Install
 53 | 
 54 | You'll need to build from source for the time being.
 55 | We expect to upload the package to Hackage soon.
 56 | 
 57 | If you don't have Haskell installed in your system, try install GHC and Cabal with https://www.haskell.org/ghcup/[`ghcup`], and follow instructions for Cabal below.
 58 | 
 59 | `asciidoc-hs` has been tested with GHC 8.8 and 8.10, and Cabal 3.4.
 60 | 
 61 | === Using Cabal
 62 | 
 63 | First, clone the repository:
 64 | 
 65 | [subs=attributes]
 66 |   $ git clone {url-repo}.git
 67 |   $ cd asciidoc-hs
 68 | 
 69 | Then, build using `cabal`:
 70 | 
 71 |   $ cabal build
 72 | 
 73 | You can optionally copy the resulting executable under a location of your choice (that should be under the `PATH` environment variable):
 74 | 
 75 |   $ cp "$(cabal exec --verbose=0 --offline sh -- -c 'command -v asciidoc-hs')" ~/.local/bin/asciidoc-hs
 76 | 
 77 | === Using Stack
 78 | 
 79 | First, clone the repository:
 80 | 
 81 | [subs=attributes]
 82 |   $ git clone {url-repo}.git
 83 |   $ cd asciidoc-hs
 84 | 
 85 | Then, build using `stack`:
 86 | 
 87 |   $ stack build
 88 | 
 89 | You can optionally copy the resulting executable under a location of your choice (that should be under the `PATH` environment variable):
 90 | 
 91 |   $ cp "$(stack path --local-install-root)/bin/asciidoc-hs" ~/.local/bin/asciidoc-hs
 92 | 
 93 | == Use
 94 | 
 95 | In the https://github.com/gmarpons/asciidoc-hs/tree/main/examples[examples] directory of this repository you can find some example AsciiDoc files and its various conversions.
 96 | 
 97 | The commands used to get the converted files are:
 98 | 
 99 | :filename: ⟨FILENAME⟩
100 | 
101 | * Convert to HTML using Asciidoctor:
102 | +
103 | [subs=attributes]
104 |   $ asciidoctor -a "sectids!" -a "showtitle" -a "linkcss" -a "stylesheet=https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" -a "webfonts!" {filename}.adoc -o - > {filename}-asciidoctor.html
105 | 
106 | * Convert to HTML using `asciidoc-hs` + Pandoc:
107 | +
108 | [subs=attributes]
109 |   $ cat {filename}.adoc | asciidoc-hs | pandoc -f json -t html5 --standalone --css="https://cdn.jsdelivr.net/gh/asciidoctor/asciidoctor@2.0/data/stylesheets/asciidoctor-default.css" --css="./asciidoc-hs.css" > {filename}-pandoc.html
110 | 
111 | * Convert to ODT using `asciidoc-hs` + Pandoc:
112 | +
113 | [subs=attributes]
114 |   $ cat {filename}.adoc | asciidoc-hs | pandoc -f json -t odt > {filename}-pandoc.odt
115 | 
116 | * Convert to Markdown using `asciidoc-hs` + Pandoc:
117 | +
118 | [subs=attributes]
119 |   $ cat {filename}.adoc | asciidoc-hs | pandoc -f json -t markdown_strict > {filename}-pandoc.md
120 | 
121 | File https://github.com/gmarpons/asciidoc-hs/blob/main/examples/asciidoctor-article-template.adoc[`asciidoctor-article-template.adoc`] presents a variety of AsciiDoc features.
122 | Those not yet supported by `asciidoc-hs` are commented out.
123 | 
124 | == Contribute
125 | 
126 | Contributions are Welcome!
127 | 
128 | For any of:
129 | 
130 | * reporting a bug,
131 | * filling a feature request,
132 | * opening a PR,
133 | 
134 | we'll try to stick to the workflow and guidelines set in https://github.com/kowainik/.github/blob/main/CONTRIBUTING.md[Kowainik contributing guidelines], with the difference that we use https://hackage.haskell.org/package/ormolu[`ormolu`] instead of http://hackage.haskell.org/package/stylish-haskell[`stylish-haskell`].
135 | 
136 | === Implement a new AsciiDoc feature
137 | 
138 | AsciiDoc is an extensive language.
139 | There are plenty of features still to be implemented, with varying degrees of difficulty.
140 | 
141 | The recommended workflow is the following:
142 | 
143 | . Check the https://github.com/gmarpons/asciidoc-hs/wiki/AsciiDoc-Compatibility-Matrix[compatibility matrix] to look for unsupported features.
144 | . Fill an issue with the proposed feature if you cannot find it in the https://github.com/gmarpons/asciidoc-hs/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement[issue tracker], yet.
145 | . Go inspect files https://github.com/gmarpons/asciidoc-hs/blob/main/src/Text/AsciiDoc/Inlines.hs[Inlines.hs] or https://github.com/gmarpons/asciidoc-hs/blob/main/src/Text/AsciiDoc/Blocks.hs[Blocks.hs] and see:
146 | ** If the current AST data types (mainly `Inline` and `Block`) support the intended feature, or need to be modified.
147 | ** Look for a similar and already implemented feature, and see how the corresponding parser functions are written.
148 | . Discuss a possible implementation in the issue tracker.
149 | . Modify the aforementioned files.
150 | . Add new https://github.com/gmarpons/asciidoc-hs/tree/main/test/Tests[test cases] for inlines or blocks.
151 | . Add the necessary new cases to functions `convertInline` or `convertBlock` in https://github.com/gmarpons/asciidoc-hs/blob/main/src/Text/AsciiDoc/Pandoc.hs[Pandoc.hs].
152 | 
153 | You don't need to wait to complete the steps above before opening a PR.
154 | In fact, it's better if your code can be reviewed from the beginning.
155 | 
156 | == Acknowledgements
157 | 
158 | This work has been supported by a https://www.tweag.io/blog/2020-02-14-os-fellowship/[Tweag Open Source Fellowship].
159 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/ElementAttributes.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingStrategies #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | 
  4 | -- |
  5 | -- Module      :  Text.AsciiDoc.ElementAttributes
  6 | -- Copyright   :  © 2020–present Guillem Marpons
  7 | -- License     :  BSD-3-Clause
  8 | --
  9 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 10 | -- Stability   :  experimental
 11 | -- Portability :  portable
 12 | --
 13 | -- Implementation of the Element Attribute Language for AsciiDoc, i.e., the
 14 | -- language for describing properties (styles, identifiers, roles, named
 15 | -- attributes, etc.) of entities like inlines and blocks.
 16 | --
 17 | -- This module contains Parsec-style parsers for the aforementioned language.
 18 | --
 19 | -- It tries to be compatible with Asciidoctor.
 20 | -- See https://docs.asciidoctor.org/asciidoc/latest/attributes/element-attributes/.
 21 | module Text.AsciiDoc.ElementAttributes
 22 |   ( -- * AST types
 23 |     ElementAttribute (..),
 24 |     PositionedAttribute (..),
 25 | 
 26 |     -- * Parsers
 27 |     AttributeParser,
 28 |     attributeListP,
 29 |     attributeShorthandSyntaxP,
 30 |     CommaAcceptance (..),
 31 |   )
 32 | where
 33 | 
 34 | import Control.Applicative.Permutations
 35 | import Control.Monad.Combinators hiding
 36 |   ( endBy1,
 37 |     sepBy1,
 38 |     sepEndBy1,
 39 |     some,
 40 |     someTill,
 41 |   )
 42 | import Control.Monad.Combinators.NonEmpty
 43 | import Data.Foldable (fold)
 44 | import Data.List.NonEmpty (NonEmpty (..))
 45 | import Data.Text (Text)
 46 | import qualified Data.Text as T
 47 | import qualified Text.AsciiDoc.LineParsers as LP
 48 | import qualified Text.Parsec as Parsec
 49 | 
 50 | data ElementAttribute
 51 |   = PositionalAttribute Text
 52 |   | NamedAttribute Text Text
 53 |   | -- | Special Asciidoctor syntax, all fields are optional and can appear in
 54 |     -- any order: @[style#id.role1.role2%option1%option2]@.
 55 |     --
 56 |     -- An empty style means no style has been specified.
 57 |     --
 58 |     -- Empty ids, roles and options are admissible, even if they normally rise a
 59 |     -- warning.
 60 |     --
 61 |     -- More than one id is admissible, even if this situation normally rises a
 62 |     -- warning.
 63 |     ShorthandSyntaxAttribute Text [Text] [Text] [Text]
 64 |   deriving stock (Eq, Show)
 65 | 
 66 | newtype PositionedAttribute = PositionedAttribute (Int, ElementAttribute)
 67 | 
 68 | type AttributeParser = LP.LineParser
 69 | 
 70 | data Position
 71 |   = Start
 72 |   | Other
 73 | 
 74 | -- | Converts a @Text@ into a list of attributes.
 75 | --
 76 | -- Attributes are separated by commas.
 77 | --
 78 | -- Attributes can be empty (the empty string). Attributes can contain square
 79 | -- brackets.
 80 | --
 81 | -- Attributes can be enclosed between single or double quotes. Quote-enclosed
 82 | -- attributes can contain commas and quote characters (if escaped with "@\@").
 83 | --
 84 | -- This parser does not fail for any input.
 85 | -- It supports both the empty string and a sequence of spaces as input.
 86 | --
 87 | -- __Divergence from Asciidoctor__: When a string of the list is only partially
 88 | -- enclosed between single or double quotes, Asciidoctor breaks the string into
 89 | -- different attributes, and this function considers it a non-quoted single
 90 | -- attribute (i.e., quotes are part of the attribute and commas break the
 91 | -- string).
 92 | attributeListP :: AttributeParser (NonEmpty ElementAttribute)
 93 | attributeListP =
 94 |   (:|)
 95 |     <$> attributeP Start
 96 |     <*> option [] (sepP *> sepBy (attributeP Other) sepP) <* Parsec.eof
 97 |   where
 98 |     attributeP position =
 99 |       Parsec.try (quotedP position '"' <* many Parsec.space <* endP)
100 |         <|> Parsec.try (quotedP position '\'' <* many Parsec.space <* endP)
101 |         <|> unquotedP position <* many Parsec.space <* endP
102 |     quotedP :: Position -> Char -> AttributeParser ElementAttribute
103 |     quotedP position quote = do
104 |       v <- quotedValueP quote
105 |       let shorthandOrError =
106 |             Parsec.parse (attributeShorthandSyntaxP AcceptCommas <* Parsec.eof) "" v
107 |       case (position, shorthandOrError) of
108 |         -- First positional attribute, shorthand syntax accepted
109 |         (Start, Right shorthand) -> pure shorthand
110 |         -- First positional attribute, shorthand syntax failed: treat as a
111 |         -- regular positional attribute
112 |         (Start, Left _) -> pure $ PositionalAttribute v
113 |         -- Position different from first: regular positional attribute
114 |         (Other, _) -> pure $ PositionalAttribute v
115 |     unquotedP Start =
116 |       Parsec.try unquotedShorthandP
117 |         <|> Parsec.try namedP
118 |         <|> positionalP
119 |     unquotedP Other =
120 |       Parsec.try namedP
121 |         <|> positionalP
122 |     unquotedShorthandP = do
123 |       shorthand <-
124 |         -- Parsec.try not needed because this functions is only ever called
125 |         -- inside a Parsec.try.
126 |         attributeShorthandSyntaxP RejectCommas <* many Parsec.space <* endP
127 |       case shorthand of
128 |         -- No '=' found in the first segment of the potential shorthand syntax
129 |         ShorthandSyntaxAttribute style _ _ _
130 |           | T.all (/= '=') style -> pure shorthand
131 |         _ -> empty
132 |     quotedValueP :: Char -> AttributeParser Text
133 |     quotedValueP quote =
134 |       between (Parsec.char quote) (Parsec.char quote) $
135 |         LP.manyText (LP.satisfy (\c -> c /= quote && c /= '\\'))
136 |           <> LP.manyText
137 |             ( (Parsec.try (LP.char '\\' *> LP.char quote) <|> LP.char '\\')
138 |                 <> LP.manyText (LP.satisfy (\c -> c /= quote && c /= '\\'))
139 |             )
140 |     unquotedValueP :: AttributeParser Text
141 |     unquotedValueP = T.strip . T.pack <$> many (Parsec.satisfy (/= ','))
142 |     namedP :: AttributeParser ElementAttribute
143 |     namedP =
144 |       NamedAttribute
145 |         <$> nameP <* Parsec.char '=' <* many Parsec.space <*> valueP
146 |     nameP :: AttributeParser Text
147 |     nameP = LP.some (Parsec.noneOf [' ', ',', '=']) <* many Parsec.space
148 |     valueP =
149 |       Parsec.try (quotedValueP '"')
150 |         <|> Parsec.try (quotedValueP '\'')
151 |         <|> unquotedValueP
152 |     positionalP = PositionalAttribute <$> unquotedValueP
153 |     sepP = Parsec.char ',' <* many Parsec.space
154 |     endP = Parsec.lookAhead $ eitherP (LP.char ',') Parsec.eof
155 | 
156 | data CommaAcceptance
157 |   = AcceptCommas
158 |   | RejectCommas
159 |   deriving stock (Eq, Show)
160 | 
161 | attributeShorthandSyntaxP :: CommaAcceptance -> AttributeParser ElementAttribute
162 | attributeShorthandSyntaxP commaAcceptance =
163 |   wrap <$> someTill permutationP endP
164 |   where
165 |     -- Use monoid instances of (,), Text and [] to collect ids, roles and
166 |     -- options.
167 |     wrap ((s, (i, (r, o))) :| xs) =
168 |       ShorthandSyntaxAttribute
169 |         s
170 |         (i <> (fst . snd . fold) xs)
171 |         (r <> (fst . snd . snd . fold) xs)
172 |         (o <> (snd . snd . snd . fold) xs)
173 |     permutationP :: AttributeParser (Text, ([Text], ([Text], [Text])))
174 |     permutationP =
175 |       runPermutation $
176 |         (\s i r o -> (s, (i, (r, o))))
177 |           <$> toPermutationWithDefault "" styleP
178 |           <*> toPermutationWithDefault [] identifierP
179 |           <*> toPermutationWithDefault [] roleP
180 |           <*> toPermutationWithDefault [] optionP
181 |     styleP = LP.many anyChar
182 |     identifierP = (: []) <$ Parsec.char '#' <*> LP.many anyChar
183 |     roleP = (: []) <$ Parsec.char '.' <*> LP.many anyChar
184 |     optionP = (: []) <$ Parsec.char '%' <*> LP.many anyChar
185 |     anyChar =
186 |       Parsec.noneOf $
187 |         [' ', '#', '.', '%'] <> case commaAcceptance of
188 |           AcceptCommas -> []
189 |           RejectCommas -> [',']
190 |     endP = Parsec.lookAhead $ eitherP Parsec.eof $ Parsec.oneOf [' ', ',']
191 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/Pandoc.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | 
  5 | -- |
  6 | -- Module      :  Text.AsciiDoc.Pandoc
  7 | -- Copyright   :  © 2020–present Guillem Marpons
  8 | -- License     :  BSD-3-Clause
  9 | --
 10 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 11 | -- Stability   :  experimental
 12 | -- Portability :  portable
 13 | --
 14 | -- Converters from AsciiDoc AST node types to Pandoc AST node types.
 15 | module Text.AsciiDoc.Pandoc
 16 |   ( -- * Types
 17 |     Document,
 18 |     Inline,
 19 | 
 20 |     -- * Conversions
 21 |     convertDocument,
 22 |     convertBlock,
 23 |     convertInline,
 24 |     -- TODO. Relocate parseInlines function
 25 |     parseInlines,
 26 |   )
 27 | where
 28 | 
 29 | import Data.List.NonEmpty as NE (NonEmpty (..))
 30 | import qualified Data.List.NonEmpty as NE
 31 | import Data.Semigroup (Last (Last))
 32 | import Data.Text (Text)
 33 | import qualified Data.Text as T
 34 | import Text.AsciiDoc.Blocks hiding (Parser, State)
 35 | import Text.AsciiDoc.Inlines hiding (Parser, State)
 36 | import Text.AsciiDoc.Metadata
 37 | import Text.AsciiDoc.UnparsedInline
 38 | import Text.Pandoc.Builder (Attr)
 39 | import qualified Text.Pandoc.Builder as Pandoc
 40 | import Text.Pandoc.Definition (Pandoc)
 41 | import qualified Text.Parsec as Parsec
 42 | 
 43 | -- type Document = [Block Inline]
 44 | 
 45 | parseInlines :: Document UnparsedInline -> Document Inline
 46 | parseInlines = fmap parseInline
 47 | 
 48 | parseInline :: UnparsedInline -> Inline
 49 | parseInline x =
 50 |   case Parsec.runParser inlinesP inlineParserInitialState "" (fromUnparsedInline x) of
 51 |     Right result -> result
 52 |     Left parseError -> error $ "parseInlines: " <> show parseError
 53 |   where
 54 |     fromUnparsedInline :: UnparsedInline -> Text
 55 |     -- TODO. Check what line finalizers T.unilines considers, and update the
 56 |     -- following definition to be congruent with Text.AsciiDoc.Inlines.newlineP.
 57 |     fromUnparsedInline = T.unlines . fmap toText . NE.toList
 58 |     toText :: InputLine -> Text
 59 |     toText = \case
 60 |       MarkupLine t -> t
 61 |       CommentLine _ -> ""
 62 | 
 63 | convertDocument :: Document Inline -> Pandoc
 64 | convertDocument (Document mHeader bs) =
 65 |   Pandoc.setMeta "pagetitle" pageTitle $
 66 |     Pandoc.doc $
 67 |       -- Divergence from Asciidoctor: the following div with role "article" is
 68 |       -- not necessary in Asciidoctor because "article" role (and other metadata
 69 |       -- in @metadata@) is assigned to the <body> tag.
 70 |       Pandoc.divWith
 71 |         (toAttr $ articleMetadata {metadataRoles = "article" : metadataRoles articleMetadata})
 72 |         ( Pandoc.divWith
 73 |             (toAttr $ mempty {metadataIds = ["header"]})
 74 |             -- Spurious @plain@ block needed to avoid Pandoc fuse <div> and <h*>
 75 |             (Pandoc.plain " " <> headerContent)
 76 |             <> Pandoc.divWith
 77 |               (toAttr $ mempty {metadataIds = ["content"]})
 78 |               -- Spurious @plain@ block needed to avoid Pandoc fuse <div> and <h*>
 79 |               (Pandoc.plain " " <> contentTitle <> foldMap convertBlock bs)
 80 |         )
 81 |   where
 82 |     -- TODO. Var sectionMetadata will be used when correct section nesting is
 83 |     -- computed
 84 |     (pageTitle, articleMetadata, _sectionMetadata, headerContent, contentTitle) =
 85 |       case mHeader of
 86 |         Just (DocumentHeader p level i) ->
 87 |           let m = toMetadata p :: Metadata UnparsedInline
 88 |               t = convertInline i
 89 |               h = Pandoc.header (level + 1) t
 90 |            in if level == 0
 91 |                 then
 92 |                   ( t,
 93 |                     m,
 94 |                     mempty :: Metadata UnparsedInline,
 95 |                     h,
 96 |                     mempty
 97 |                   )
 98 |                 else
 99 |                   ( t,
100 |                     m {metadataIds = [], metadataRoles = []},
101 |                     mempty {metadataRoles = metadataRoles m},
102 |                     mempty,
103 |                     Pandoc.divWith (toAttr $ mempty {metadataIds = metadataIds m}) h
104 |                   )
105 |         Nothing -> ("Untitled", mempty, mempty, mempty, mempty)
106 | 
107 | convertBlock :: Block Inline -> Pandoc.Blocks
108 | convertBlock = \case
109 |   Paragraph p i ->
110 |     -- We cannot use <> to extend m because, to mimic Asciidoctor, metadataRoles
111 |     -- appending doesn't preserve the left value if the right one uses
112 |     -- "role=..." syntax. See Monoid Metadata instance.
113 |     let m = toMetadata p
114 |      in Pandoc.divWith (toAttr $ m {metadataRoles = "paragraph" : metadataRoles m}) $
115 |           prependTitleDiv (fmap parseInline m) $
116 |             Pandoc.para $ convertInline i
117 |   -- Divergence from Asciidoctor: we add the possible title in the section
118 |   -- prefix to the header of the section, and not to the first non-header block
119 |   -- found, as Asciidoctor does.
120 |   Section p level i bs ->
121 |     let m = toMetadata p
122 |         mSect = m {metadataRoles = T.pack ("sect" ++ show (level + 1)) : metadataRoles m}
123 |      in Pandoc.divWith (toAttr mSect) $
124 |           prependTitleDiv (fmap parseInline m) $
125 |             Pandoc.headerWith mempty (level + 1) (convertInline i)
126 |               <> foldMap convertBlock bs
127 |   -- TODO. Compute Section's (nesting) before converting. The following case
128 |   -- should be redundant.
129 |   -- TODO. Add a Metadata value to SectionHeaderBlock and avoid recalculating it
130 |   SectionHeader p level i ->
131 |     let m = toMetadata p
132 |      in Pandoc.divWith (toAttr m) $
133 |           prependTitleDiv (fmap parseInline m) $
134 |             Pandoc.headerWith mempty (level + 1) (convertInline i)
135 |   List (Unordered Nothing) p bss ->
136 |     let m = toMetadata p
137 |      in Pandoc.divWith (toAttr $ m {metadataRoles = "ulist" : metadataRoles m}) $
138 |           prependTitleDiv (fmap parseInline m) $
139 |             Pandoc.bulletList $ NE.toList $ fmap (foldMap convertBlock) bss
140 |   -- TODO. Cover other list types
141 |   List _ _p _ -> undefined
142 |   Nestable Example p bs ->
143 |     let m = toMetadata p
144 |         -- TODO. This prefix should come from a (localized) variable (L10N)
145 |         prefix = parseInline $ MarkupLine "Example 1. " :| []
146 |         title = fmap ((prefix <>) . parseInline) <$> metadataTitle m
147 |      in Pandoc.divWith (toAttr $ m {metadataRoles = "exampleblock" : metadataRoles m}) $
148 |           prependTitleDiv (m {metadataTitle = title}) $
149 |             Pandoc.divWith (toAttr $ mempty {metadataRoles = ["content"]}) $
150 |               foldMap convertBlock bs
151 |   Nestable Sidebar p bs ->
152 |     let m = toMetadata p
153 |      in Pandoc.divWith (toAttr $ m {metadataRoles = "sidebarblock" : metadataRoles m}) $
154 |           Pandoc.divWith (toAttr $ mempty {metadataRoles = ["content"]}) $
155 |             -- In contrast with Nestable Example, here the title is part of the
156 |             -- content, to mimic Asciidoctor
157 |             prependTitleDiv (fmap parseInline m) $
158 |               foldMap convertBlock bs
159 |   -- TODO. Cover other nestable types
160 |   Nestable _ _p _ -> undefined
161 |   DanglingBlockPrefix _ -> mempty
162 | 
163 | convertInline :: Inline -> Pandoc.Inlines
164 | convertInline = \case
165 |   AlphaNum t -> Pandoc.str t
166 |   EndOfInline _ -> mempty
167 |   Newline t -> Pandoc.str t
168 |   Space _ -> Pandoc.space
169 |   InlineSeq inlines -> foldMap convertInline inlines
170 |   StyledText Bold as _ inlines _
171 |     | as == defaultAttributeList ->
172 |       Pandoc.strong $ foldMap convertInline inlines
173 |   -- NOTE. Asciidoctor creates a single <strong> element with attributes in this
174 |   -- case, but Pandoc AST doesn't support attributes in <strong> nodes. The
175 |   -- following is semantically equivalent:
176 |   StyledText Bold as _ inlines _ ->
177 |     Pandoc.spanWith (toAttr $ toMetadata as) $
178 |       Pandoc.strong $ foldMap convertInline inlines
179 |   -- NOTE. Pandoc's AST don't support "mark" elements, but the HTML writer
180 |   -- produces a <mark> element if the class "mark" is added to the span.
181 |   --
182 |   -- As Asciidoctor, we only produce a <mark> element for "empty" custom spans.
183 |   StyledText Custom as _ inlines _ ->
184 |     let m = toMetadata as :: Metadata UnparsedInline
185 |         -- Returns a functions that *can* enclose its argument into a span,
186 |         -- depending on 'as'.
187 |         encloseInSpan :: Pandoc.Inlines -> Pandoc.Inlines
188 |         encloseInSpan
189 |           | as == defaultAttributeList =
190 |             Pandoc.spanWith (toAttr $ mempty {metadataRoles = ["mark"]})
191 |           -- If attributes are not significant for translation to Pandoc, in
192 |           -- congruence with Asciidoctor we skip the superfluous span.
193 |           | toAttr m == mempty = id
194 |           | otherwise = Pandoc.spanWith $ toAttr $ toMetadata as
195 |      in encloseInSpan $ foldMap convertInline inlines
196 |   StyledText Italic as _ inlines _
197 |     | as == defaultAttributeList ->
198 |       Pandoc.emph $ foldMap convertInline inlines
199 |   -- NOTE. Asciidoctor creates a single <emph> element with attributes in this
200 |   -- case, but Pandoc AST doesn't support attributes in <emph> nodes. The
201 |   -- following is semantically equivalent:
202 |   StyledText Italic as _ inlines _ ->
203 |     Pandoc.spanWith (toAttr $ toMetadata as) $
204 |       Pandoc.emph $ foldMap convertInline inlines
205 |   -- NOTE. Pandoc's Code nodes do not support nested markup. It's a pity because
206 |   -- HTML <code> element does. Among the three following solutions, we choose 1)
207 |   -- because it's simpler than 2) and produces cleaner (probably more robust for
208 |   -- backends that are not HTML) Pandoc output, and with 3) a lot of markup is
209 |   -- potentially lost. Also, it's congruent with the treatment of <mark> above:
210 |   --
211 |   -- 1) Convert Monospace into a span, do not call Pandoc.code, and add a
212 |   --    "monospace" class. I.e., do not generate <code> element in HTML and
213 |   --    delegate monospace markup to CSS or another formatting mechanism.
214 |   --
215 |   -- 2) Break inlines into pieces of uniform markup, and apply Pandoc.code as
216 |   --    the inner-most construct of each piece. Apply other markup to the pieces
217 |   --    that need it.
218 |   --
219 |   -- 3) Convert inlines into a Text with no markup applied.
220 |   --
221 |   -- TODO. Find a solution that can work for other writers than HTML.
222 |   StyledText Monospace as _ inlines _ ->
223 |     Pandoc.spanWith (toAttr $ mempty {metadataRoles = ["monospace"]} <> toMetadata as) $
224 |       foldMap convertInline inlines
225 |   Symbol t -> Pandoc.str t
226 | 
227 | toAttr :: Metadata UnparsedInline -> Attr
228 | toAttr m = (identifier, classes, keyvals)
229 |   where
230 |     identifier = case metadataIds m of
231 |       [] -> ""
232 |       (x : _) -> x -- TODO. Support multiple id's per entity.
233 |     classes = metadataRoles m
234 |     -- We could do
235 |     --   Map.toList $ metadataNamedAttributes m
236 |     -- but keyvals is used by Pandoc for data-* attributes.
237 |     keyvals = []
238 | 
239 | -- | In case the metadata contains a block title, this function prepends a div
240 | -- with the title to the provided 'Pandoc.Blocks' value.
241 | prependTitleDiv :: Metadata Inline -> Pandoc.Blocks -> Pandoc.Blocks
242 | prependTitleDiv m = case metadataTitle m of
243 |   Nothing -> id
244 |   Just (Last i) ->
245 |     mappend $
246 |       Pandoc.divWith (toAttr $ mempty {metadataRoles = ["title"]}) $
247 |         Pandoc.plain $ convertInline i
248 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/Inlines.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingStrategies #-}
  2 | {-# LANGUAGE GADTs #-}
  3 | {-# LANGUAGE LambdaCase #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | 
  6 | -- |
  7 | -- Module      :  Text.AsciiDoc.Inlines
  8 | -- Copyright   :  © 2020–present Guillem Marpons
  9 | -- License     :  BSD-3-Clause
 10 | --
 11 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 12 | -- Stability   :  experimental
 13 | -- Portability :  portable
 14 | --
 15 | -- This module contains Parsec-style parsers for AsciiDoc inline elements.
 16 | --
 17 | -- It tries to be compatible with Asciidoctor, but uses a grammar-based parsing
 18 | -- approach instead of regexes.
 19 | --
 20 | -- There are three kinds of terminals in the grammar:
 21 | --
 22 | -- * Alpha-numeric sequences ('AlphaNum').
 23 | -- * Gaps: space-like character sequences and 'Newline's (including the
 24 | --   surrounding space).
 25 | -- * Other characters: punctuation symbols, mathematical symbols, etc.
 26 | --   It includes formatting and punctuation marks.
 27 | --
 28 | -- These groups of characters govern how constrained enclosures are parsed.
 29 | module Text.AsciiDoc.Inlines
 30 |   ( -- * AST types
 31 |     Inline (..),
 32 |     Style (..),
 33 |     InlineAttributeList (..),
 34 |     defaultAttributeList,
 35 | 
 36 |     -- * Parsers
 37 |     inlinesP,
 38 | 
 39 |     -- * Parser type
 40 |     Parser,
 41 |     State (..),
 42 |     inlineParserInitialState,
 43 |   )
 44 | where
 45 | 
 46 | import Control.Monad (when)
 47 | import Control.Monad.Combinators (someTill)
 48 | import Control.Monad.Combinators hiding
 49 |   ( endBy1,
 50 |     sepBy1,
 51 |     sepEndBy1,
 52 |     some,
 53 |     someTill,
 54 |   )
 55 | import Control.Monad.Combinators.NonEmpty (some)
 56 | import Data.Char (isAlphaNum, isSpace)
 57 | import Data.Functor (void)
 58 | import Data.Generics (Data, Typeable)
 59 | import Data.List.NonEmpty (NonEmpty (..), (<|))
 60 | import qualified Data.List.NonEmpty as NE
 61 | import Data.Text (Text)
 62 | import qualified Data.Text as T
 63 | import Text.AsciiDoc.ElementAttributes
 64 | import Text.AsciiDoc.Metadata
 65 | import Text.AsciiDoc.SpecialChars
 66 | import Text.AsciiDoc.UnparsedInline
 67 | import Text.Parsec ((<?>))
 68 | import qualified Text.Parsec as Parsec
 69 |   ( ParsecT,
 70 |     char,
 71 |     eof,
 72 |     getState,
 73 |     label,
 74 |     lookAhead,
 75 |     notFollowedBy,
 76 |     parse,
 77 |     putState,
 78 |     try,
 79 |   )
 80 | import qualified Text.Parsec.Char as Parsec
 81 |   ( anyChar,
 82 |     satisfy,
 83 |     string,
 84 |   )
 85 | 
 86 | type Parser m = Parsec.ParsecT Text State m
 87 | 
 88 | -- | Custom parser state for the parser for 'Inline's.
 89 | newtype State = State
 90 |   { -- | A stack (LIFO) of formatting and punctuation 'Mark's.
 91 |     --
 92 |     -- Every mark in the stack is the opening mark for the corresponding
 93 |     -- enclosure.
 94 |     -- Top of the stack contains the opening mark for most recently open
 95 |     -- enclosure.
 96 |     openEnclosures :: [Mark]
 97 |   }
 98 |   deriving newtype (Eq, Show)
 99 | 
100 | inlineParserInitialState :: State
101 | inlineParserInitialState =
102 |   State
103 |     { openEnclosures = []
104 |     }
105 | 
106 | -- | Formatting styles for inline text.
107 | data Style
108 |   = Bold
109 |   | Custom
110 |   | Italic
111 |   | Monospace
112 |   deriving stock (Eq, Show, Typeable, Data)
113 | 
114 | --  | Subscript
115 | --  | Superscript
116 | 
117 | -- | Every formatting 'Mark' maps to a 'Style', and this function computes this
118 | -- map.
119 | toStyle :: Mark -> Style
120 | toStyle = \case
121 |   SingleMark NumberF -> Custom
122 |   SingleMark AsteriskF -> Bold
123 |   SingleMark UnderscoreF -> Italic
124 |   SingleMark GraveF -> Monospace
125 |   DoubleMark NumberF -> Custom
126 |   DoubleMark AsteriskF -> Bold
127 |   DoubleMark UnderscoreF -> Italic
128 |   DoubleMark GraveF -> Monospace
129 | 
130 | -- | A data type for all the different inline types.
131 | --
132 | -- Some inline types can contain other inlines, and there is a constructor
133 | -- 'InlineSeq' serving as a general inline container.
134 | data Inline
135 |   = AlphaNum Text
136 |   | EndOfInline Text
137 |   | InlineSeq (NonEmpty Inline)
138 |   | Newline Text
139 |   | Space Text
140 |   | StyledText Style InlineAttributeList Text (NonEmpty Inline) Text
141 |   | Symbol Text
142 |   deriving stock (Eq, Show, Typeable, Data)
143 | 
144 | --  | InlineMacro Text
145 | --  | EscapedSymbol Text
146 | --  | DoubleEscapedSymbol Text
147 | 
148 | instance Semigroup Inline where
149 |   InlineSeq x <> InlineSeq y = InlineSeq (x <> y)
150 |   InlineSeq x <> b = InlineSeq $ x <> (b :| [])
151 |   a <> InlineSeq y = InlineSeq $ a <| y
152 |   a <> b = InlineSeq $ a :| [b]
153 | 
154 | -- EBNF grammar non-terminal symbols  ------------------------------------------
155 | 
156 | -- The parser can be read as an EBNF grammar, with starting symbol 'inlinesP'.
157 | -- The resulting grammar would be ambiguous, so there are a series of functions
158 | -- used to discard some parsing paths ('muP', 'piP', 'sigmaP', 'phiP', 'psiP',
159 | -- 'omegaP'), that work together with order in alternatives to give a completely
160 | -- deterministic parser.
161 | 
162 | -- | This function is the only exported parser function for AsciiDoc inlines.
163 | --
164 | -- If more than one individual inlines can be parsed, it returns all of them
165 | -- encapsulated into an 'InlineSeq'.
166 | inlinesP :: Monad m => Parser m Inline
167 | inlinesP =
168 |   (\(x :| xs) ys -> InlineSeq (x :| xs ++ ys))
169 |     <$> (firstP <?> "F")
170 |       <*> ( concat
171 |               <$> many
172 |                 ( NE.toList <$> Parsec.label gapWithOptionalContinuationP "N1"
173 |                     <|> Parsec.label (sigmaP *> nonGapSequenceP) "N2"
174 |                 )
175 |           )
176 |       <* Parsec.eof
177 | 
178 | unconstrainedP :: Monad m => Parser m Inline
179 | unconstrainedP = Parsec.try $ do
180 |   Parsec.label (pure ()) "U"
181 |   ps <- option defaultAttributeList inlineAttributeListP
182 |   phiP
183 |   openMark <- openP ["##", "**", "__", "``"]
184 |   is <- inlinesInUnconstrainedP
185 |   closeMark <- closeP openMark
186 |   pure $ StyledText (toStyle openMark) ps (fromMarkT openMark) is (fromMarkT closeMark)
187 |   where
188 |     fromMarkT = T.pack . fromMark
189 |     inlinesInUnconstrainedP =
190 |       (\(x :| xs) ys -> x :| xs ++ ys)
191 |         <$> ((gapWithOptionalContinuationP <|> firstP) <?> "Y_p | F")
192 |         <*> ( concat
193 |                 <$> many
194 |                   ( NE.toList <$> Parsec.label gapWithOptionalContinuationP "N1"
195 |                       <|> Parsec.label (sigmaP *> nonGapSequenceP) "N2"
196 |                   )
197 |             )
198 | {-# ANN unconstrainedP ("HLint: ignore" :: String) #-}
199 | 
200 | constrainedP :: Monad m => Parser m Inline
201 | constrainedP = Parsec.try $ do
202 |   Parsec.label (pure ()) "C"
203 |   ps <- option defaultAttributeList inlineAttributeListP
204 |   varphiP
205 |   openMark <- openP ["#", "*", "_", "`"]
206 |   is <- inlinesInConstrainedP
207 |   omegaP
208 |   closeMark <- closeP openMark
209 |   pure $ StyledText (toStyle openMark) ps (fromMarkT openMark) is (fromMarkT closeMark)
210 |   where
211 |     fromMarkT = T.pack . fromMark
212 |     inlinesInConstrainedP =
213 |       (\(x :| xs) ys -> x :| xs ++ ys)
214 |         <$> (firstP <?> "F")
215 |         <*> ( concat
216 |                 <$> many
217 |                   ( Parsec.label gapWithContinuationP "N1"
218 |                       <|> Parsec.label (muP *> nonGapSequenceP) "N2"
219 |                   )
220 |             )
221 | {-# ANN constrainedP ("HLint: ignore" :: String) #-}
222 | 
223 | firstP :: Monad m => Parser m (NonEmpty Inline)
224 | firstP =
225 |   -- Notice the similarity with alphaNumOrOtherP.
226 |   (:| [])
227 |     <$> alphaNumP
228 |     <|> piP *> otherWithContinuationP
229 | 
230 | nonGapSequenceP :: Monad m => Parser m [Inline]
231 | nonGapSequenceP =
232 |   (:)
233 |     <$> (unconstrainedP <|> otherP)
234 |     <*> option [] alphaNumOrOtherP
235 | 
236 | openP :: Monad m => [Mark] -> Parser m Mark
237 | openP ms = do
238 |   Parsec.label (pure ()) "M_<"
239 |   mark <- choice $ Parsec.try . markP <$> ms
240 |   -- What follows is not part of the EBNF description of the language, but it's
241 |   -- easier to put it here than create a specific function for it.
242 |   st <- Parsec.getState
243 |   Parsec.putState $ st {openEnclosures = mark : openEnclosures st}
244 |   Parsec.label (pure ()) $ "State: " ++ show (mark : openEnclosures st)
245 |   pure mark
246 | 
247 | closeP :: Monad m => Mark -> Parser m Mark
248 | closeP openMark = do
249 |   -- Passing a mark to this function is redundant, but the openP/closeP
250 |   -- connection makes the interface more clear for callers. It can also be used
251 |   -- to (run-time) check for some programming errors.
252 |   Parsec.label (pure ()) $ "M_>: " ++ show openMark
253 |   let closeMark = closingMarkOf openMark
254 |   _ <- markP closeMark
255 |   -- What follows is not part of the EBNF description of the language, but it's
256 |   -- easier to put it here than create a specific function for it.
257 |   st <- Parsec.getState
258 |   case openEnclosures st of
259 |     (e : es) -> do
260 |       when (closingMarkOf e /= closeMark) $
261 |         error "closeP: trying to close mark different from the innermost enclosure"
262 |       Parsec.putState $ st {openEnclosures = es}
263 |     [] -> error "closeP: trying to close non-existent enclosure"
264 |   pure closeMark
265 | 
266 | gapWithContinuationP :: Monad m => Parser m [Inline]
267 | gapWithContinuationP =
268 |   flip Parsec.label "Y" $
269 |     -- Parsec.try is necessary because we can accept gapP and fail afterwards.
270 |     Parsec.try $
271 |       (++)
272 |         <$> (NE.toList <$> gapP)
273 |         <*> ((: []) <$> alphaNumP <|> NE.toList <$ sigmaP <*> otherWithContinuationP)
274 | 
275 | gapWithOptionalContinuationP :: Monad m => Parser m (NonEmpty Inline)
276 | gapWithOptionalContinuationP =
277 |   flip Parsec.label "Y_p" $
278 |     (\(h :| t) c -> h :| t ++ c)
279 |       <$> gapP
280 |       <*> option [] ((: []) <$> alphaNumP <|> NE.toList <$ sigmaP <*> otherWithContinuationP)
281 | 
282 | otherWithContinuationP :: Monad m => Parser m (NonEmpty Inline)
283 | otherWithContinuationP =
284 |   flip Parsec.label "X" $
285 |     ((:|) <$> unconstrainedP <*> option [] alphaNumOrOtherP)
286 |       <|> ((:|) <$> constrainedP <*> option [] gapOrOtherWithContinuationP)
287 |       <|> ((:|) <$> otherP <*> option [] alphaNumOrOtherP)
288 |   where
289 |     gapOrOtherWithContinuationP =
290 |       flip Parsec.label "Y | X" $
291 |         gapWithContinuationP
292 |           <|> muP *> (NE.toList <$> otherWithContinuationP)
293 | 
294 | alphaNumOrOtherP :: Monad m => Parser m [Inline]
295 | alphaNumOrOtherP =
296 |   flip Parsec.label "A | X" $
297 |     (: [])
298 |       <$> alphaNumP
299 |       <|> muP *> (NE.toList <$> otherWithContinuationP)
300 | 
301 | gapP :: Monad m => Parser m (NonEmpty Inline)
302 | gapP = flip Parsec.label "G" $ some newlineOrSpaceP
303 |   where
304 |     newlineOrSpaceP =
305 |       newlineP
306 |         -- Inline the definition of spaceP here allows to avoid redundant checks
307 |         -- against '\n' or '\r'.
308 |         <|> Space . T.pack . NE.toList <$> some (Parsec.satisfy isSpace)
309 | 
310 | -- Functions for disambiguating the EBNF grammar  ------------------------------
311 | 
312 | -- | Function called after a character of kind alphanum or other, and before a
313 | -- character of kind other.
314 | --
315 | -- TODO. Check that this function together with 'sigmaP' cover all cases at the
316 | -- beginning of N
317 | --
318 | -- It fails if an open enclosure can be closed (using 'closableMarks'), or a
319 | -- full unconstrained enclosure can be parsed, at current input.
320 | muP :: Monad m => Parser m ()
321 | muP = do
322 |   Parsec.label (pure ()) "MU"
323 |   st <- Parsec.getState
324 |   Parsec.notFollowedBy (choice $ tryToCloseMarkP <$> closableMarks (openEnclosures st))
325 |     <|> Parsec.lookAhead (void unconstrainedP)
326 | 
327 | -- | Function called after the opening mark of an enclosure (i.e., after a
328 | -- character of kind other), and before a character of kind other.
329 | --
330 | -- It's identical to 'muP' with the exception that the mark recently open (top
331 | -- of the stack) is not taken into account.
332 | --
333 | -- It fails if an open enclosure (except the innermost one) can be closed (using
334 | -- 'closableMarks'), or a full unconstrained enclosure can be parsed, at current
335 | -- input.
336 | piP :: Monad m => Parser m ()
337 | piP = do
338 |   Parsec.label (pure ()) "PI"
339 |   st <- Parsec.getState
340 |   case openEnclosures st of
341 |     (_ : es) ->
342 |       Parsec.notFollowedBy (choice $ tryToCloseMarkP <$> closableMarks es)
343 |         <|> Parsec.lookAhead (void unconstrainedP)
344 |     [] -> pure ()
345 | 
346 | -- | Function called after a character of kind gap, and before a character of
347 | -- kind other.
348 | --
349 | -- It fails if any unconstrained open enclosure can be closed.
350 | sigmaP :: Monad m => Parser m ()
351 | sigmaP = do
352 |   Parsec.label (pure ()) "SIGMA"
353 |   st <- Parsec.getState
354 |   Parsec.notFollowedBy $
355 |     choice $ tryToCloseMarkP <$> filter isUnconstrained (openEnclosures st)
356 | 
357 | -- | Function called before the opening mark for an unconstrained enclosure
358 | -- (i.e., before a character of kind other)
359 | --
360 | -- It fails if we're trying to open an already open mark.
361 | --
362 | -- It takes into account the case that we can open both an arleady open mark and
363 | -- an extension of it (e.g., "@**@" is an extension of "@*@"). This function
364 | -- doesn't fail in this case, and the parser will try to open the extended mark.
365 | phiP :: Monad m => Parser m ()
366 | phiP = do
367 |   Parsec.label (pure ()) "PHI"
368 |   st <- Parsec.getState
369 |   Parsec.notFollowedBy (choice $ markP <$> openEnclosures st)
370 |     <|> void (Parsec.lookAhead (choice $ markP <$> concatMap extendedMarksOf (openEnclosures st)))
371 | 
372 | -- | Function called before the opening mark for a constrained enclosure (i.e.,
373 | -- before a character of kind other).
374 | --
375 | -- It fails if we're trying to open an already open mark.
376 | varphiP :: Monad m => Parser m ()
377 | varphiP = do
378 |   Parsec.label (pure ()) "VARPHI"
379 |   st <- Parsec.getState
380 |   Parsec.notFollowedBy $ choice $ markP <$> openEnclosures st
381 | 
382 | -- | Function called after a character of kind alphanum or other, and before the
383 | -- closing mark for a constrained enclosure (i.e., before a character of kind
384 | -- other).
385 | --
386 | -- It fails if we can close an open extended version (i.e., an unconstrained
387 | -- enclosure) of the mark we're trying to close, as closing the unconstrained
388 | -- enclosure takes priority.
389 | omegaP :: Monad m => Parser m ()
390 | omegaP = do
391 |   Parsec.label (pure ()) "OMEGA"
392 |   st <- Parsec.getState
393 |   Parsec.notFollowedBy $
394 |     choice $ tryToCloseMarkP <$> filter isUnconstrained (openEnclosures st)
395 | 
396 | closableMarks :: [Mark] -> [Mark]
397 | closableMarks ms = x ++ filter isUnconstrained y
398 |   where
399 |     (x, y) = span isConstrained ms
400 | 
401 | tryToCloseMarkP :: Monad m => Mark -> Parser m ()
402 | tryToCloseMarkP m = Parsec.try $ do
403 |   Parsec.label (pure ()) $ "tryToCloseMarkP: " ++ show m
404 |   _ <- markP $ closingMarkOf m
405 |   when (isConstrained m) $ do
406 |     Parsec.eof <|> void (Parsec.satisfy (not . isAlphaNum))
407 | 
408 | -- EBNF grammar terminal symbols  ----------------------------------------------
409 | 
410 | markP :: Monad m => Mark -> Parser m Mark
411 | markP m = m <$ Parsec.string (fromMark m)
412 | 
413 | otherP :: Monad m => Parser m Inline
414 | otherP =
415 |   Symbol . T.singleton
416 |     <$> Parsec.satisfy (\c -> not (isSpace c || isAlphaNum c))
417 | 
418 | -- | It parses as newlines the combinations:
419 | --
420 | --     * @CR@
421 | --     * @CR LF@
422 | --     * @LF@
423 | --
424 | -- This is the exact set parsed by @libasciidoc@. At the moment we do not
425 | -- consider the combination @LF CR@ (used in some systems, see
426 | -- https://en.wikipedia.org/wiki/Newline#Representation) as a single newline.
427 | newlineP :: Monad m => Parser m Inline
428 | newlineP =
429 |   wrap <$> newlineP' <*> optional Parsec.eof
430 |   where
431 |     wrap t Nothing = Newline t
432 |     wrap t (Just ()) = EndOfInline t
433 |     newlineP' :: Monad m => Parser m Text
434 |     newlineP' =
435 |       (<>) <$> singletonP '\r' <*> option "" (singletonP '\n')
436 |         <|> singletonP '\n'
437 |     singletonP :: Monad m => Char -> Parser m Text
438 |     singletonP c = T.singleton <$> Parsec.char c
439 | 
440 | alphaNumP :: Monad m => Parser m Inline
441 | alphaNumP =
442 |   AlphaNum . T.pack . NE.toList
443 |     <$> Parsec.label (some wordCharP) "A"
444 |   where
445 |     wordCharP = Parsec.satisfy $ \c ->
446 |       isAlphaNum c
447 | 
448 | -- Parser for element attribute (aka parameter) lists  ------------------------
449 | 
450 | newtype InlineAttributeList = InlineAttributeList Text
451 |   deriving newtype (Eq, Show)
452 |   deriving stock (Data, Typeable)
453 | 
454 | -- | This instance accepts the same kind of attributes than the instance for
455 | -- 'Text.AsciiDoc.Blocks.BlockPrefixItem's, including the shorthand syntax.
456 | --
457 | -- Attributes @title@ and @opts@/@options@ have currently no meaning for
458 | -- inlines, but they are still parsed and stored in the resulting 'Metadata'
459 | -- value.
460 | --
461 | -- __Divergence from Asciidoctor__: The aforementioned behavior implies that
462 | -- some inputs produce different results than Asciidoctor.
463 | -- Asciidoctor only honours @role@ and @id@ attributes and messes up the rest.
464 | instance ToMetadata InlineAttributeList UnparsedInline where
465 |   toMetadata (InlineAttributeList "") = mempty
466 |   toMetadata (InlineAttributeList t) =
467 |     case Parsec.parse attributeListP "" t of
468 |       Right attributes ->
469 |         toMetadata $ PositionedAttribute <$> NE.zip (1 :| [2 ..]) attributes
470 |       Left _ -> error "toMetadata @InlineAttributeList: parse should not fail"
471 | 
472 | defaultAttributeList :: InlineAttributeList
473 | defaultAttributeList = InlineAttributeList ""
474 | 
475 | -- | Accepts an square-bracket-enclosed string with no restrictions on the
476 | -- characters in between, provided that there is at least one such character.
477 | inlineAttributeListP :: Monad m => Parser m InlineAttributeList
478 | inlineAttributeListP =
479 |   flip Parsec.label "P" $
480 |     InlineAttributeList . T.pack
481 |       <$ Parsec.char '[' <*> someTill Parsec.anyChar (Parsec.char ']')
482 | 


--------------------------------------------------------------------------------
/src/Text/AsciiDoc/Blocks.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingStrategies #-}
  2 | {-# LANGUAGE GADTs #-}
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | 
  5 | -- |
  6 | -- Module      :  Text.AsciiDoc.Blocks
  7 | -- Copyright   :  © 2020–present Guillem Marpons
  8 | -- License     :  BSD-3-Clause
  9 | --
 10 | -- Maintainer  :  Guillem Marpons <gmarpons@mailbox.org>
 11 | -- Stability   :  experimental
 12 | -- Portability :  portable
 13 | --
 14 | -- This module contains Parsec-style parsers for AsciiDoc block elements.
 15 | --
 16 | -- It tries to be compatible with Asciidoctor.
 17 | module Text.AsciiDoc.Blocks
 18 |   ( -- * AST types
 19 |     HeaderLevel,
 20 |     ListType (..),
 21 |     ListCheckStatus (..),
 22 |     NestableBlockType (..),
 23 |     AdmonitionType (..),
 24 |     LiteralBlockType (..),
 25 |     LiteralIndentation (..),
 26 |     BlockMacroType (..),
 27 |     MacroArguments (..),
 28 |     IncludeOptions (..),
 29 |     AttributeId,
 30 |     Comment (..),
 31 |     MetadataItem (..),
 32 |     BlockPrefixItem (..),
 33 |     UnparsedBlockPrefix,
 34 |     Block (..),
 35 |     DocumentHeader (..),
 36 |     Document (..),
 37 | 
 38 |     -- * Parsers
 39 |     documentP,
 40 |     documentHeaderP,
 41 |     blocksP,
 42 |     blockP,
 43 |     blockPrefixP,
 44 |     attributeEntryP,
 45 |     blockIdP,
 46 |     blockAttributeListP,
 47 |     blockTitleP,
 48 |     nestableP,
 49 |     sectionHeaderP,
 50 |     paragraphP,
 51 |     danglingBlockPrefixP,
 52 |     initialBlankLinesP,
 53 |     blankLineP,
 54 | 
 55 |     -- * Parser type
 56 |     Parser,
 57 |     State (..),
 58 |     blockParserInitialState,
 59 | 
 60 |     -- * Helper low-level parsers
 61 |     lineP,
 62 |     lineP',
 63 |     lineOneOfP,
 64 |     lineNoneOfP,
 65 |     includeP,
 66 |     openDelimiterP,
 67 |     closeDelimiterP,
 68 |     satisfyToken,
 69 |   )
 70 | where
 71 | 
 72 | import Control.Arrow (Arrow ((&&&)))
 73 | import Control.Monad.Combinators hiding
 74 |   ( endBy1,
 75 |     sepBy1,
 76 |     sepEndBy1,
 77 |     some,
 78 |     someTill,
 79 |   )
 80 | import Control.Monad.Combinators.NonEmpty
 81 | import Data.Char (isSpace)
 82 | import Data.List.NonEmpty (NonEmpty (..), (<|))
 83 | import qualified Data.List.NonEmpty as NE
 84 | import qualified Data.Map as Map
 85 | import Data.Maybe (catMaybes)
 86 | import Data.Semigroup (Last (..))
 87 | import Data.Text (Text)
 88 | import qualified Data.Text as T
 89 | import Text.AsciiDoc.ElementAttributes
 90 | import qualified Text.AsciiDoc.LineParsers as LP
 91 | import Text.AsciiDoc.Metadata
 92 | import Text.AsciiDoc.SpecialChars
 93 | import Text.AsciiDoc.UnparsedInline
 94 | import Text.Parsec ((<?>))
 95 | import qualified Text.Parsec as Parsec
 96 | import Text.Parsec.Char (alphaNum, char, space)
 97 | 
 98 | -- | Greater or equal than 0.
 99 | -- A section header prefixed by one single "@=@" has level 0, and one with two
100 | -- "@=@"'s has level 1.
101 | -- This follows Asciidoctor's behavior.
102 | type HeaderLevel = Int
103 | 
104 | -- Text: can contain symbols, does not begin nor end with space.
105 | -- Text': can end with spaces.
106 | data ListType
107 |   = Description -- PEG: Space* Text Space* "::" (Space+ Text)? Space*
108 |   | Ordered -- PEG: Space* "."+ Space+ Text'
109 |   | Unordered (Maybe ListCheckStatus) -- PEG: Space* ("*"+ / "-"+ / ... ) Space+ Text'
110 |   | -- | Callouts can be conceptualized as belonging to the block they follow
111 |     -- from, but Asciidoctor treats them as an independent entity, very similar to
112 |     -- any other list.
113 |     Callout -- PEG: "<" (Num / ".") ">" Space+ Text'
114 |   deriving stock (Eq, Show)
115 | 
116 | data ListCheckStatus
117 |   = Checked
118 |   | Unchecked
119 |   deriving stock (Eq, Show)
120 | 
121 | data NestableBlockType
122 |   = Admonition AdmonitionType
123 |   | Example
124 |   | Sidebar
125 |   | Quote
126 |   | -- | Open block (delimited with "--") with non-standard name.
127 |     Other Text
128 |   deriving stock (Eq, Show)
129 | 
130 | data AdmonitionType
131 |   = Note
132 |   | Tip
133 |   | Important
134 |   | Caution
135 |   | Warning
136 |   deriving stock (Eq, Show)
137 | 
138 | -- | Literal block types are subject by default to substitution group
139 | -- "Verbatim", if not stated otherwise. The actual substitutions applied can be
140 | -- modified with the @subs@ block attribute, nonetheless.
141 | data LiteralBlockType
142 |   = Fenced
143 |   | Listing
144 |   | Literal LiteralIndentation
145 |   | -- | Default substitution group: None (aka Passthrough).
146 |     Passthrough
147 |   | Source
148 |   | -- | Default substitution group: None (aka Passthrough).
149 |     Stem
150 |   | Verse
151 |   deriving stock (Eq, Show)
152 | 
153 | -- | The @Int@ is the indentation of the block. If the @Literal@ block is not
154 | -- signaled by indentation (i.e., @....@ or @[literal]@ is used), then
155 | -- indentation is 0 (all preceding space is copied verbatim as content).
156 | newtype LiteralIndentation = LiteralIndentation Int
157 |   deriving newtype (Eq, Show)
158 | 
159 | data BlockMacroType
160 |   = ImageBlockMacro
161 |   | TableOfContentsMacro
162 |   | CustomBlockMacro
163 |   deriving stock (Eq, Show)
164 | 
165 | data MacroArguments = MacroArguments
166 |   deriving stock (Eq, Show)
167 | 
168 | data IncludeOptions
169 |   = IncludeOptions
170 |   deriving stock (Eq, Show)
171 | 
172 | type AttributeId = Text
173 | 
174 | data Comment
175 |   = LineCommentSequence (NonEmpty Text)
176 |   | BlockComment [Text]
177 |   deriving stock (Eq, Show)
178 | 
179 | -- | A Block can be preceded by an arbitrary (finite) list of @MetadataItem@s.
180 | --
181 | -- This is a syntactic element. Every value of this type comes from a source
182 | -- line.
183 | data MetadataItem a
184 |   = -- | A block can have more than one ID (aka anchor), and all of them can be
185 |     -- used in cross-references.
186 |     BlockId Text
187 |   | -- | A block can be preceded by any number of @BlockTitle@s (aka labels).
188 |     -- Only the last one is semantically relevant.
189 |     BlockTitle a
190 |   | -- | A block can be preceded by any number of @BlockAttributeList@s. For
191 |     -- positional arguments, only the last list is taken into account.
192 |     --
193 |     -- Some of the elements of the list can be name-value pairs.
194 |     --
195 |     -- TODO. Check if some attributes in the list can contain full inlines, as
196 |     -- it's the case with standalone (aka attribute entry) attributes.
197 |     BlockAttributeList Text
198 |   deriving stock (Eq, Show, Functor)
199 | 
200 | instance ToMetadata (MetadataItem UnparsedInline) UnparsedInline where
201 |   toMetadata (BlockId i) = mempty {metadataIds = [i]}
202 |   toMetadata (BlockTitle t) = mempty {metadataTitle = Just $ Last t}
203 |   toMetadata (BlockAttributeList "") = mempty
204 |   toMetadata (BlockAttributeList t) =
205 |     case Parsec.parse attributeListP "" t of
206 |       Right attributes ->
207 |         toMetadata $ PositionedAttribute <$> NE.zip (1 :| [2 ..]) attributes
208 |       Left _ -> error "toMetadata @(MetadataItem UnparsedInline): parse should not fail"
209 | 
210 | data BlockPrefixItem a
211 |   = MetadataItem (MetadataItem a)
212 |   | -- | A value of @Nothing@ means the attribute has been unset.
213 |     AttributeEntry AttributeId (Maybe a)
214 |   | Comment Comment
215 |   deriving stock (Eq, Show, Functor)
216 | 
217 | instance ToMetadata (BlockPrefixItem UnparsedInline) UnparsedInline where
218 |   toMetadata (MetadataItem x) = toMetadata x
219 |   toMetadata (AttributeEntry _ _) = mempty
220 |   toMetadata (Comment _) = mempty
221 | 
222 | type UnparsedBlockPrefix = [BlockPrefixItem UnparsedInline]
223 | 
224 | -- | A Block consists, syntactically, of one or more contiguous and complete
225 | -- lines of text.
226 | -- Some block types can contain other blocks.
227 | data Block a
228 |   = -- | Regular paragraph.
229 |     Paragraph UnparsedBlockPrefix a
230 |   | -- | This data constructor is not used during parsing, it requires an
231 |     -- additional "nesting" pass.
232 |     --
233 |     -- There can be a @Section@ inside an, e.g., open block, but it needs to
234 |     -- have style @discrete@.
235 |     Section UnparsedBlockPrefix HeaderLevel a [Block a]
236 |   | -- | A section header contains the same information as a section, except the
237 |     -- contained sequence of blocks.
238 |     --
239 |     -- After the "nesting" pass, all @SectionHeader@s but @discrete@ ones are
240 |     -- converted to proper @Section@s.
241 |     SectionHeader UnparsedBlockPrefix HeaderLevel a
242 |   | List ListType UnparsedBlockPrefix (NonEmpty (NonEmpty (Block a)))
243 |   | {- Table -- TODO. Many things here -}
244 |     {- ThematicBreak UnparsedBlockPrefix -}
245 |     {- PageBreak UnparsedBlockPrefix -}
246 | 
247 |     -- | Sequence of blocks of some defined type that allows nested blocks
248 |     -- inside (i.e. admonition, sidebar, example, quote, and open block with no
249 |     -- other standard type).
250 |     Nestable NestableBlockType UnparsedBlockPrefix [Block a]
251 |   | {- VerseBlock UnparsedBlockPrefix [a] -}
252 |     {- -- | Block type determines substitution group applied: @Verbatim@ or @None@
253 |     -- (aka passthrough).
254 |     --
255 |     -- TODO: Check that designed pipeline guarantees that pre-processor
256 |     -- directives are expanded (if not escaped) even in literal blocks, as
257 |     -- https://asciidoctor.org/docs/user-manual/#include-processing states.
258 |     LiteralBlock LiteralBlockType UnparsedBlockPrefix [Text] -}
259 | 
260 |     {- -- | Some macros accept block metadata, as e.g. @toc::[]@, that accepts
261 |     -- defining its title with @.TITLE@ syntax.
262 |     BlockMacro BlockMacroType UnparsedBlockPrefix MacroArguments -}
263 |     DanglingBlockPrefix UnparsedBlockPrefix
264 |   deriving stock (Eq, Show, Functor)
265 | 
266 | data DocumentHeader a
267 |   = DocumentHeader UnparsedBlockPrefix HeaderLevel a
268 |   deriving stock (Eq, Show, Functor)
269 | 
270 | data Document a
271 |   = Document (Maybe (DocumentHeader a)) [Block a]
272 |   deriving stock (Eq, Show, Functor)
273 | 
274 | -- Document {
275 | --   docPrefix :: UnparsedBlockPrefix,
276 | --   docTitle :: a,
277 | --   docBlocks :: [Block UnparsedInline]
278 | -- }
279 | 
280 | -- | Custom parser state for the parser for 'Block's.
281 | data State = State
282 |   { -- | A stack of open 'Nestable' blocks.
283 |     -- Innermost element is the top of the stack.
284 |     --
285 |     -- For every nestable block we store:
286 |     --
287 |     -- * The syntactic 'DelimiterChar' used to open the block.
288 |     --   This is what we need to recognize the matching closing delimiter.
289 |     -- * A stack of list item markers previously used in the current (possibly
290 |     --   nested, aka multi-level, list).
291 |     --   If the parser position is not currently on a list, the stack is empty.
292 |     --
293 |     -- The list representing the stack of open nestable blocks is non-empty: at
294 |     -- the bottom of the stack there is always a value representing the
295 |     -- top-level document (defined in 'State's @Monoid@ instance), so a
296 |     -- one-element stack indicates no nestable block has been open.
297 |     openBlocks :: NonEmpty (Marker DelimiterChar, [Marker ListChar]),
298 |     -- | An environment mapping attribute names to their values (i.e. inlines).
299 |     env :: Map.Map AttributeId Text
300 |   }
301 |   deriving stock (Eq, Show)
302 | 
303 | blockParserInitialState :: State
304 | blockParserInitialState =
305 |   State
306 |     { -- We use @'*' :* 0@ as an arbitrary value that is always present as the
307 |       -- bottom of the stack.
308 |       openBlocks = (AsteriskD :* 0, []) :| [],
309 |       env = mempty
310 |     }
311 | 
312 | type Parser m = Parsec.ParsecT [Text] State m
313 | 
314 | documentP :: Monad m => Parser m (Document UnparsedInline)
315 | documentP =
316 |   Document
317 |     <$ option () includeP
318 |     <* initialBlankLinesP
319 |     <*> optional (Parsec.try documentHeaderP <?> "document header")
320 |     <* many blankLineP
321 |     <*> blocksP
322 |     <* Parsec.eof
323 | 
324 | documentHeaderP :: Monad m => Parser m (DocumentHeader UnparsedInline)
325 | documentHeaderP = do
326 |   prefix <- option [] (NE.toList <$> blockPrefixP)
327 |   (level, i) <- rawSectionHeaderP
328 |   pure $ DocumentHeader prefix level i
329 | 
330 | blocksP :: Monad m => Parser m [Block UnparsedInline]
331 | blocksP = many (blockP []) <?> "blocks"
332 | 
333 | blockP :: Monad m => [LP.LineParser Text] -> Parser m (Block UnparsedInline)
334 | blockP extraParagraphFinalizers = do
335 |   prefix <- option [] (NE.toList <$> blockPrefixP)
336 |   blockP' prefix <?> "block"
337 |   where
338 |     blockP' prefix =
339 |       (nestableP prefix <?> "nestable")
340 |         <|> (sectionHeaderP prefix <?> "section header")
341 |         <|> (listP prefix <?> "list")
342 |         <|> (paragraphP prefix extraParagraphFinalizers <?> "paragraph")
343 |         <|> (danglingBlockPrefixP prefix <?> "dangling block prefix")
344 | 
345 | blockPrefixP :: Monad m => Parser m (NonEmpty (BlockPrefixItem UnparsedInline))
346 | blockPrefixP = some pBlockPrefixItem <?> "block prefix"
347 |   where
348 |     pBlockPrefixItem =
349 |       Comment <$> blockCommentP
350 |         <|> Comment <$> lineCommentSequenceP
351 |         <|> attributeEntryP
352 |         <|> blockIdP
353 |         <|> blockAttributeListP
354 |         <|> blockTitleP
355 | 
356 | blockCommentP :: Monad m => Parser m Comment
357 | blockCommentP = do
358 |   _ :* n <- choice $ fmap lineP' $ LP.runOfN 4 [SlashC]
359 |   -- We use here an alternative version of lineP, called lineP', that does not
360 |   -- try to handle pre-processor directives, as includes have no effect inside
361 |   -- block comments.
362 |   ts <-
363 |     manyTill (lineP' LP.anyRemainder) $
364 |       eitherP (lineP' (LP.count n SlashC)) Parsec.eof
365 |   option () includeP
366 |   _ <- many blankLineP
367 |   pure $ BlockComment ts
368 | {-# ANN blockCommentP ("HLint: ignore" :: String) #-}
369 | 
370 | lineCommentSequenceP :: Monad m => Parser m Comment
371 | lineCommentSequenceP =
372 |   LineCommentSequence <$> some lineCommentP <* many blankLineP
373 | 
374 | -- | Parses a line starting with *exactly* two '/'s.
375 | lineCommentP :: Monad m => Parser m Text
376 | lineCommentP =
377 |   lineP (LP.string "//" *> Parsec.notFollowedBy (char '/') *> LP.anyRemainder)
378 | 
379 | -- TODO. Add attribute continuations.
380 | attributeEntryP :: Monad m => Parser m (BlockPrefixItem UnparsedInline)
381 | attributeEntryP = attributeEntryP' <* many blankLineP
382 |   where
383 |     attributeEntryP' = do
384 |       (k, v) <-
385 |         lineP
386 |           ( (,) <$ LP.char ':' <*> LP.some alphaNum
387 |               <* LP.char ':'
388 |               <* LP.some space <*> LP.anyRemainder
389 |           )
390 |       Parsec.modifyState $ \st -> st {env = Map.insert k v (env st)}
391 |       pure $ AttributeEntry k $ Just (MarkupLine v :| [])
392 | 
393 | blockIdP :: Monad m => Parser m (BlockPrefixItem a)
394 | blockIdP = blockIdP' <* many blankLineP
395 |   where
396 |     blockIdP' = MetadataItem . BlockId <$> lineP LP.blockId
397 | 
398 | blockAttributeListP :: Monad m => Parser m (BlockPrefixItem a)
399 | blockAttributeListP = blockAttributeListP' <* many blankLineP
400 |   where
401 |     blockAttributeListP' =
402 |       MetadataItem . BlockAttributeList
403 |         <$> lineP LP.blockAttributeList
404 | 
405 | blockTitleP :: Monad m => Parser m (BlockPrefixItem UnparsedInline)
406 | blockTitleP = blockTitleP' <* many blankLineP
407 |   where
408 |     blockTitleP' =
409 |       MetadataItem . BlockTitle . (:| []) . MarkupLine
410 |         <$> lineP (LP.char '.' *> (LP.satisfy (not . isSpace) <> LP.anyRemainder))
411 | 
412 | -- | Parses a nestable delimited block.
413 | nestableP ::
414 |   Monad m =>
415 |   UnparsedBlockPrefix ->
416 |   Parser m (Block UnparsedInline)
417 | nestableP prefix = do
418 |   c <- openDelimiterP [AsteriskD, EqualsSignD]
419 |   bs <- manyTill (blockP []) $ eitherP closeDelimiterP Parsec.eof
420 |   _ <- many blankLineP
421 |   pure $ case c of
422 |     AsteriskD -> Nestable Sidebar prefix bs
423 |     HyphenD -> error "nestableP: HyphenD case not implemented yet"
424 |     EqualsSignD -> Nestable Example prefix bs
425 | 
426 | -- | Parses a section header and computes its level.
427 | --
428 | -- __POST-CONDITION__: The computed level is greater or equal than 0.
429 | sectionHeaderP ::
430 |   Monad m =>
431 |   UnparsedBlockPrefix ->
432 |   Parser m (Block UnparsedInline)
433 | sectionHeaderP prefix = do
434 |   -- Post-condition above follows from the fact that 'LP.runOfN 1' can only
435 |   -- return texts of length >= 1.
436 |   -- TODO. Use type-level Nat in 'Marker', so post-condition can be checked by
437 |   -- the compiler.
438 |   state <- Parsec.getState
439 |   case (NE.tail (openBlocks state), style) of
440 |     -- If parser is currently inside a nestable block (tail state.openBlocks is
441 |     -- not null), and the section header we're trying to parse has a style
442 |     -- different from "discrete", this parser must fail (and the text be
443 |     -- considered a regular paragraph).
444 |     (_ : _, Nothing) -> empty
445 |     (_ : _, Just (Last t)) | t /= "discrete" -> empty
446 |     -- In any other case: parse as a section header.
447 |     _ -> do
448 |       (level, text) <- rawSectionHeaderP
449 |       _ <- many blankLineP
450 |       pure $ SectionHeader prefix level text
451 |   where
452 |     style = metadataStyle $ toMetadata @_ @UnparsedInline $ prefix
453 | 
454 | rawSectionHeaderP :: Monad m => Parser m (HeaderLevel, UnparsedInline)
455 | rawSectionHeaderP =
456 |   (\(_c :* n, x) -> (n - 1, MarkupLine x :| []))
457 |     <$> lineP
458 |       ( (,)
459 |           <$> choice (LP.runOfN 1 [EqualsSignH]) <* some space
460 |             <*> (LP.satisfy (not . isSpace) <> LP.anyRemainder)
461 |       )
462 | 
463 | listP ::
464 |   (Monad m) =>
465 |   UnparsedBlockPrefix ->
466 |   Parser m (Block UnparsedInline)
467 | listP prefix =
468 |   listP' prefix <* many blankLineP
469 |   where
470 |     allUnorderedMarkers = LP.runOfN 1 [AsteriskL, HyphenL]
471 |     listP' prefix' = do
472 |       state <- Parsec.getState
473 |       let allowedMarkers = allUnorderedMarkers
474 |           -- Disallow as markers those markers already in use in the current
475 |           -- list tree of the innermost open block
476 |           disallowedMarkers = snd currentBlock
477 |           (currentBlock, otherBlocks) = NE.head &&& NE.tail $ openBlocks state
478 |       -- Accept item with a new marker
479 |       (marker@(c :* n), firstLine) <-
480 |         itemFirstLineP allowedMarkers disallowedMarkers
481 |       -- Add new marker to the state
482 |       Parsec.setState $
483 |         state
484 |           { openBlocks =
485 |               (fst currentBlock, marker : disallowedMarkers) :| otherBlocks
486 |           }
487 |       -- Complete the first item, using the already parsed first line
488 |       firstItem <-
489 |         itemP firstLine
490 |           <?> "first item " <> T.unpack (fromMarker marker)
491 |       -- Accept items with the same marker of the first item
492 |       nextItems <-
493 |         many
494 |           ( itemFirstLineP [LP.count n c] []
495 |               >>= itemP . snd <?> "item " <> T.unpack (fromMarker marker)
496 |           )
497 |       -- Recover state present at the beginning of the function. Functions like
498 |       -- pItem could have modified it.
499 |       Parsec.setState state
500 |       pure $ List (Unordered Nothing) prefix' (firstItem :| nextItems)
501 |     itemFirstLineP =
502 |       \x -> lineP . itemFirstLine x
503 |     itemFirstLine ::
504 |       [LP.LineParser (Marker ListChar)] ->
505 |       [Marker ListChar] ->
506 |       LP.LineParser (Marker ListChar, Text)
507 |     itemFirstLine allowedMarkers disallowedMarkers = do
508 |       _ <- many space
509 |       marker <- choice allowedMarkers
510 |       if marker `elem` disallowedMarkers
511 |         then empty
512 |         else do
513 |           _ <- some space
514 |           remainder <- LP.satisfy (not . isSpace) <> LP.anyRemainder
515 |           pure (marker, remainder)
516 |     itemP firstLine = do
517 |       -- As we are inside a list, any list marker is a finalizer of the current
518 |       -- item (no blank line needed)
519 |       nextLines <-
520 |         many $
521 |           paragraphContinuationP [snd <$> itemFirstLine allUnorderedMarkers []]
522 |       nextBlocks <-
523 |         option
524 |           []
525 |           ( ((: []) <$> sublistP <?> "sublist")
526 |               <|> catMaybes <$> many (listContinuationP <?> "list continuation")
527 |               <?> "next blocks"
528 |           )
529 |       _ <- many blankLineP
530 |       pure $ Paragraph [] (MarkupLine firstLine :| nextLines) :| nextBlocks
531 |     -- __Divergence DVB001 from Asciidoctor__. Before sublist:
532 |     --
533 |     --     * Full prefix (including attributes and block title) is allowed.
534 |     --
535 |     --     * Any number of blank lines is allowed.
536 |     --
537 |     -- Probably a linter should warn against any block prefix not preceded by
538 |     -- blank lines.
539 |     sublistP = Parsec.try $ do
540 |       _ <- many blankLineP
541 |       prefix' <- option [] (NE.toList <$> blockPrefixP)
542 |       listP prefix'
543 |     -- __Divergence DVB002 from Asciidoctor__: As in classic AsciiDoc, no blank
544 |     -- lines are allowed before the @+@ sign.
545 |     listContinuationP :: Monad m => Parser m (Maybe (Block UnparsedInline))
546 |     listContinuationP =
547 |       lineP (LP.char '+')
548 |         *> optional blankLineP
549 |         *> optional (blockP [snd <$> itemFirstLine allUnorderedMarkers []])
550 | 
551 | paragraphP ::
552 |   Monad m =>
553 |   UnparsedBlockPrefix ->
554 |   [LP.LineParser Text] ->
555 |   Parser m (Block UnparsedInline)
556 | paragraphP prefix extraFinalizers =
557 |   Paragraph prefix <$> paragraphP' <* many blankLineP
558 |   where
559 |     paragraphP' =
560 |       (:|) <$> firstP <*> many (paragraphContinuationP extraFinalizers <?> "paragraph continuation")
561 |     firstP :: Monad m => Parser m InputLine
562 |     firstP =
563 |       MarkupLine
564 |         <$> lineNoneOfP
565 |           -- Nestable
566 |           ( (fmap fromMarker <$> LP.runOfN 4 [AsteriskD, EqualsSignD])
567 |               <> [
568 |                    -- Blank line
569 |                    pure ""
570 |                  ]
571 |           )
572 | 
573 | -- Line comments (but not block comments!) can be contained in a paragraph.
574 | paragraphContinuationP :: Monad m => [LP.LineParser Text] -> Parser m InputLine
575 | paragraphContinuationP extraFinalizers =
576 |   CommentLine <$> lineCommentP
577 |     <|> MarkupLine
578 |       <$> lineNoneOfP
579 |         ( fmap Parsec.try extraFinalizers
580 |             -- Nestable
581 |             <> (fmap fromMarker <$> LP.runOfN 4 [AsteriskD, EqualsSignD])
582 |             -- BlockComment
583 |             <> (fmap fromMarker <$> LP.runOfN 4 [SlashC])
584 |             <> [
585 |                  -- BlockId, starts with "[["
586 |                  Parsec.try LP.blockId,
587 |                  -- BlockAttributeList, starts with "["
588 |                  "" <$ LP.blockAttributeList,
589 |                  -- New block introducer, '+'
590 |                  Parsec.try (LP.char '+'),
591 |                  -- BlankLine
592 |                  pure ""
593 |                ]
594 |         )
595 | 
596 | danglingBlockPrefixP ::
597 |   Monad m =>
598 |   UnparsedBlockPrefix ->
599 |   Parser m (Block UnparsedInline)
600 | danglingBlockPrefixP [] = empty
601 | danglingBlockPrefixP prefix =
602 |   DanglingBlockPrefix prefix
603 |     <$ Parsec.lookAhead (closeDelimiterP <|> Parsec.eof)
604 | 
605 | initialBlankLinesP :: Monad m => Parser m [Text]
606 | initialBlankLinesP = many blankLineP
607 | 
608 | blankLineP :: Monad m => Parser m Text
609 | blankLineP = lineP $ pure ""
610 | 
611 | -- | Argument can be a parser for the beginning of the line. Function checks
612 | -- that the part of the line not parsed is whitespace.
613 | --
614 | -- If the line is parsed successfully, this combinator checks if an include line
615 | -- follows. If that is the case it inserts the corresponding lines into the
616 | -- input stream of the parser.
617 | lineP :: Monad m => LP.LineParser a -> Parser m a
618 | lineP p = do
619 |   result <- lineP' p
620 |   option () includeP
621 |   pure result
622 | 
623 | -- | A version of 'lineP' that does not check if the line is followed by an
624 | -- include.
625 | lineP' :: Monad m => LP.LineParser a -> Parser m a
626 | lineP' p = satisfyToken $
627 |   \t -> f $ Parsec.parse (p <* many space <* Parsec.eof) "" t
628 |   where
629 |     f (Right l) = Just l
630 |     f (Left _) = Nothing
631 | 
632 | -- | @lineOneOfP ps@ accepts any line that consists in syntax described by any
633 | -- parser in @ps@ plus optional space characters.
634 | --
635 | -- This function runs parsers in @ps@ in sequence, with no lookahead. This means
636 | -- that the order in which parsers appear in @ps@ is relevant, and that
637 | -- 'Parsec.try' could be needed in some elements of @ps@ if their recognized
638 | -- languages share some prefix.
639 | --
640 | -- If blank lines need to be accepted, add @pure ""@ as the last element of
641 | -- @ps@.
642 | lineOneOfP :: Monad m => [LP.LineParser a] -> Parser m a
643 | lineOneOfP parsers = do
644 |   result <- lineOneOfP'
645 |   option () includeP
646 |   pure result
647 |   where
648 |     lineOneOfP' = satisfyToken $
649 |       \t ->
650 |         f $
651 |           Parsec.parse (choice parsers <* many space <* Parsec.eof) "" t
652 |     f (Right l) = Just l
653 |     f (Left _) = Nothing
654 | 
655 | -- | @lineNoneOfP ps@ accepts any line that does not consist in syntax described
656 | -- by any parser in @ps@ plus optional space characters.
657 | --
658 | -- This function runs parsers in @ps@ in sequence, with no lookahead. This means
659 | -- that the order in which parsers appear in @ps@ is relevant, and that
660 | -- 'Parsec.try' could be needed in some elements of @ps@ if their recognized
661 | -- languages share some prefix.
662 | --
663 | -- If blank lines need to excluded from acceptance, add @pure ""@ as the last
664 | -- element of @ps@.
665 | lineNoneOfP :: Monad m => [LP.LineParser a] -> Parser m Text
666 | lineNoneOfP parsers = do
667 |   result <- lineNoneOfP'
668 |   option () includeP
669 |   pure result
670 |   where
671 |     lineNoneOfP' = satisfyToken $
672 |       \t ->
673 |         f t $
674 |           Parsec.parse (choice parsers <* many space <* Parsec.eof) "" t
675 |     f _ (Right _) = Nothing
676 |     f t (Left _) = Just t
677 | 
678 | includeP :: Parser m ()
679 | includeP = empty
680 | 
681 | -- includeP = do
682 | --   (filename, arguments) <-
683 | --     lineP' $
684 | --       (,)
685 | --         <$ LP.string "include::"
686 | --         <*> LP.many (satisfy (/= '[')) <* char '['
687 | --         <*> LP.many (satisfy (/= ']')) <* char ']'
688 | --   current <- Parsec.getInput
689 | --   -- TODO. Read actual file content, this is a stub.
690 | --   Parsec.setInput $ ["// (STUB) include::" <> filename <> "[" <> arguments <> "]"] <> current
691 | --   -- Recursive call to handle the case in which the first line of the included
692 | --   -- file is also an include.
693 | --   option () includeP
694 | 
695 | openDelimiterP ::
696 |   Monad m =>
697 |   [SpecialChar DelimiterChar] ->
698 |   Parser m (SpecialChar DelimiterChar)
699 | openDelimiterP cs = do
700 |   -- Parsec.lookAhead needed here because in case we fail later on (because the
701 |   -- block is already open) we don't want to consume any input.
702 |   (c :* n) <- Parsec.lookAhead $ Parsec.try $ lineOneOfP (LP.runOfN 4 cs)
703 |   st <- Parsec.getState
704 |   -- If block is already open (the delimiter is in the stack of open blocks),
705 |   -- we're not opening it again, but fail. In case we don't fail, we consume the
706 |   -- line that was looked ahead above.
707 |   if (c :* n) `elem` (fst <$> openBlocks st)
708 |     then empty
709 |     else
710 |       ( do
711 |           -- Add found delimiter to the stack of open blocks
712 |           Parsec.putState (st {openBlocks = (c :* n, []) <| openBlocks st})
713 |           -- Complete consumption of the token (aka one line of input), and
714 |           -- following blanklines
715 |           _ <- lineP LP.anyRemainder
716 |           _ <- many blankLineP
717 |           pure c
718 |       )
719 | 
720 | closeDelimiterP :: Monad m => Parser m ()
721 | closeDelimiterP = do
722 |   st <- Parsec.getState
723 |   let (c :* n, _) = NE.head (openBlocks st)
724 |   case NE.tail (openBlocks st) of
725 |     -- In presence of DanglingBlockPrefix'es, we can try to pop from an
726 |     -- openBlocks stack that contains the initial open block only. We do nothing
727 |     -- in this case.
728 |     [] -> pure ()
729 |     b : bs -> do
730 |       -- If c :* n found in openBlocks stack, pop one element. Only consume line
731 |       -- from input (and look for includes) if the found delimiter matches
732 |       -- openBlocks' top.
733 |       _ <-
734 |         lineP (LP.count n c)
735 |           <|> Parsec.lookAhead
736 |             ( choice $
737 |                 fmap (\(c' :* n', _) -> lineP' (LP.count n' c')) (b : bs)
738 |             )
739 |       Parsec.putState $ st {openBlocks = b :| bs}
740 | 
741 | -- TODO: Add name to source positions (possibly storing current filename when an
742 | -- inline arrives).
743 | --
744 | -- TODO: Fix line numbering in the presence of includes.
745 | satisfyToken :: Monad m => (Text -> Maybe a) -> Parser m a
746 | satisfyToken matcher = Parsec.tokenPrim show updatePos matcher
747 |   where
748 |     updatePos :: Parsec.SourcePos -> Text -> [Text] -> Parsec.SourcePos
749 |     updatePos pos _ _ = Parsec.incSourceLine pos 1
750 | {-# ANN satisfyToken ("HLint: ignore" :: String) #-}
751 | 


--------------------------------------------------------------------------------
/test/Tests/Inlines.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | module Tests.Inlines
  4 |   ( inlineUnitTests,
  5 |   )
  6 | where
  7 | 
  8 | import Data.Either (isLeft)
  9 | import Data.Functor.Identity (Identity)
 10 | import Data.List.NonEmpty (NonEmpty (..))
 11 | import Data.Map.Internal (Identity (runIdentity))
 12 | import Data.Text (Text)
 13 | import Test.Hspec.Expectations.Pretty (shouldBe)
 14 | import Test.Tasty (TestTree, testGroup)
 15 | import Test.Tasty.HUnit (assertBool, assertFailure, testCase)
 16 | import Text.AsciiDoc.Inlines
 17 | import qualified Text.Parsec as Parsec
 18 | 
 19 | parseInline :: Text -> IO Inline
 20 | parseInline t =
 21 |   case parseTest inlinesP t of
 22 |     Right result -> pure result
 23 |     Left parseError -> assertFailure $ "Parser fails: " <> show parseError
 24 | 
 25 | parseTest :: Parser Identity a -> Text -> Either Parsec.ParseError a
 26 | parseTest parser t =
 27 |   runIdentity $ Parsec.runParserT parser inlineParserInitialState "" t
 28 | 
 29 | inlineUnitTests :: TestTree
 30 | inlineUnitTests =
 31 |   testGroup
 32 |     "inline unit tests"
 33 |     [ simpleInlineTests,
 34 |       boldInlineTests,
 35 |       unconstrainedStylingTests,
 36 |       mixedFormattingStyleTests,
 37 |       enclosureInlineAttributeListTests,
 38 |       punctuationSymbolTests
 39 |     ]
 40 | 
 41 | simpleInlineTests :: TestTree
 42 | simpleInlineTests =
 43 |   testGroup
 44 |     "simple inlines"
 45 |     [ testCase "single-line, no formatting marks" $ do
 46 |         i <- parseInline "some words with no format"
 47 |         i `shouldBe` InlineSeq (AlphaNum "some" :| [Space " ", AlphaNum "words", Space " ", AlphaNum "with", Space " ", AlphaNum "no", Space " ", AlphaNum "format"]),
 48 |       testCase "Space at the beginning" $
 49 |         assertBool "Parser doesn't fail" $
 50 |           isLeft $
 51 |             parseTest inlinesP " some words preceded by space",
 52 |       testCase "no formatting marks with space at the end" $ do
 53 |         i <- parseInline "some words with no format "
 54 |         i `shouldBe` InlineSeq (AlphaNum "some" :| [Space " ", AlphaNum "words", Space " ", AlphaNum "with", Space " ", AlphaNum "no", Space " ", AlphaNum "format", Space " "])
 55 |     ]
 56 | 
 57 | boldInlineTests :: TestTree
 58 | boldInlineTests =
 59 |   testGroup
 60 |     "bold inlines"
 61 |     [ testCase "single-line, bold string" $ do
 62 |         i <- parseInline "*a sentence all in strong*"
 63 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "sentence", Space " ", AlphaNum "all", Space " ", AlphaNum "in", Space " ", AlphaNum "strong"]) "*" :| []),
 64 |       testCase "single-line, bold string with space at the end" $ do
 65 |         i <- parseInline "*a sentence all in strong* "
 66 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "sentence", Space " ", AlphaNum "all", Space " ", AlphaNum "in", Space " ", AlphaNum "strong"]) "*" :| [Space " "]),
 67 |       testCase "a word in bold in the middle" $ do
 68 |         i <- parseInline "a *few* words"
 69 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", StyledText Bold defaultAttributeList "*" (AlphaNum "few" :| []) "*", Space " ", AlphaNum "words"]),
 70 |       testCase "two words in bold at the beginning" $ do
 71 |         i <- parseInline "*a few* words"
 72 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "few"]) "*" :| [Space " ", AlphaNum "words"]),
 73 |       testCase "two words in bold at the end" $ do
 74 |         i <- parseInline "a *few words*"
 75 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", StyledText Bold defaultAttributeList "*" (AlphaNum "few" :| [Space " ", AlphaNum "words"]) "*"]),
 76 |       testCase "bad bold ending with closing mark after space" $ do
 77 |         i <- parseInline "*a few *"
 78 |         i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", AlphaNum "few", Space " ", Symbol "*"]),
 79 |       testCase "bad bold ending with closing mark after space and before word" $ do
 80 |         i <- parseInline "*a *few words"
 81 |         i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words"]),
 82 |       testCase "asterisk in the middle of bold phrase" $ do
 83 |         i <- parseInline "*a *few words*"
 84 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words"]) "*" :| []),
 85 |       testCase "single asterisk in phrase" $ do
 86 |         i <- parseInline "a *few words"
 87 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words"]),
 88 |       testCase "single asterisk in phrase with space at the end" $ do
 89 |         i <- parseInline "a *few words "
 90 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", Symbol "*", AlphaNum "few", Space " ", AlphaNum "words", Space " "]),
 91 |       testCase "single asterisk in the middle of a word" $ do
 92 |         i <- parseInline "a f*ew words"
 93 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Space " ", AlphaNum "f", Symbol "*", AlphaNum "ew", Space " ", AlphaNum "words"]),
 94 |       testCase "an asterisk in the middle of a word in strong phrase" $ do
 95 |         i <- parseInline "*a f*ew* words"
 96 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", AlphaNum "f", Symbol "*", AlphaNum "ew"]) "*" :| [Space " ", AlphaNum "words"]),
 97 |       testCase "single asterisk followed by word and space" $ do
 98 |         i <- parseInline "*a "
 99 |         i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " "])
100 |     ]
101 | 
102 | unconstrainedStylingTests :: TestTree
103 | unconstrainedStylingTests =
104 |   testGroup
105 |     "unconstrained formatting marks"
106 |     [ testCase "nesting unconstrained inside constrained, with no space" $ do
107 |         i <- parseInline "#a##b##c#"
108 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", AlphaNum "c"]) "#" :| []),
109 |       testCase "unpaired opening mark before correctly closed unconstrained pair" $ do
110 |         i <- parseInline "#a##b##"
111 |         i `shouldBe` InlineSeq (Symbol "#" :| [AlphaNum "a", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##"]),
112 |       --  Divergence from Asciidoctor.
113 |       testCase "(## #a ##b## c# ##)" $ do
114 |         i <- parseInline "## #a ##b## c# ##"
115 |         -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", Space " ", AlphaNum "c"]) "#", Space " "]) "##" :| [])
116 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [Symbol "#", AlphaNum "a", Space " "]) "##" :| [AlphaNum "b", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "c", Symbol "#", Space " "]) "##"])
117 |         i `shouldBe` result,
118 |       -- libasciidoc fails test
119 |       testCase "unpaired opening mark directly inside unconstrained pair" $ do
120 |         i <- parseInline "## #a b ##"
121 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [Symbol "#", AlphaNum "a", Space " ", AlphaNum "b", Space " "]) "##" :| [])
122 |         i `shouldBe` result,
123 |       --  Divergence DVI001 from Asciidoctor.
124 |       testCase "three opening marks, and three closing marks, with no space" $ do
125 |         i <- parseInline "###a###"
126 |         -- Asciidoctor:
127 |         -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#" :| []) "##" :| [])
128 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [AlphaNum "a"]) "##" :| [Symbol "#"])
129 |         i `shouldBe` result,
130 |       -- Divergence DVI001 from Asciidoctor.
131 |       testCase "three opening marks, and three closing marks, two words and no space" $ do
132 |         i <- parseInline "###a b###"
133 |         -- Asciidoctor:
134 |         -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", AlphaNum "b"]) "#" :| []) "##" :| [])
135 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [AlphaNum "a", Space " ", AlphaNum "b"]) "##" :| [Symbol "#"])
136 |         i `shouldBe` result,
137 |       -- Divergence from Asciidoctor.
138 |       -- TODO. This one is different from DVI001. Asciidoctor's outputs is just
139 |       -- wrong. It looks like it works because it doesn't respect the nesting
140 |       -- rule.
141 |       testCase "three opening marks, and three closing marks, two words and no space" $ do
142 |         -- Asciidoctor:
143 |         -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "#" (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", AlphaNum "b", Space " "]) "##" :| []) "#" :| [])
144 |         i <- parseInline "###a b ###"
145 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [AlphaNum "a", Space " ", AlphaNum "b", Space " "]) "##" :| [Symbol "#"])
146 |         i `shouldBe` result,
147 |       testCase "unconstrained styled word" $ do
148 |         i <- parseInline "##a##"
149 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| []) "##" :| []),
150 |       testCase "unconstrained formatting pair with inner space on the left" $ do
151 |         i <- parseInline "## a##"
152 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "a"]) "##" :| []),
153 |       testCase "unconstrained formatting pair with inner space on the right" $ do
154 |         i <- parseInline "##a ##"
155 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " "]) "##" :| []),
156 |       testCase "unconstrained formatting pair with inner space on both sides" $ do
157 |         i <- parseInline "## a ##"
158 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "a", Space " "]) "##" :| []),
159 |       testCase "unbalanced marks, one missing on the left" $ do
160 |         i <- parseInline "##a#"
161 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (Symbol "#" :| [AlphaNum "a"]) "#" :| []),
162 |       testCase "unbalanced marks, one missing on the right" $ do
163 |         i <- parseInline "#a##"
164 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#" :| [Symbol "#"]),
165 |       testCase "nesting constrained directly inside unconstrained, with space" $ do
166 |         i <- parseInline "## #a# ##"
167 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#", Space " "]) "##" :| []),
168 |       testCase "nesting unconstrained inside constrained, with spaces everywhere" $ do
169 |         i <- parseInline "#a ## b ## c#"
170 |         let result = InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "b", Space " "]) "##", Space " ", AlphaNum "c"]) "#" :| [])
171 |         i `shouldBe` result,
172 |       testCase "bad nesting: constrained directly inside constrained" $ do
173 |         i <- parseInline "#a #b# c#"
174 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "#", AlphaNum "b"]) "#" :| [Space " ", AlphaNum "c", Symbol "#"]),
175 |       testCase "two unconstrained pairs, false nesting" $ do
176 |         i <- parseInline "##a ##b## c##"
177 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " "]) "##" :| [AlphaNum "b", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "c"]) "##"]),
178 |       testCase "unpaired opening mark inside unconstrained pair" $ do
179 |         i <- parseInline "##a #b ##"
180 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", Symbol "#", AlphaNum "b", Space " "]) "##" :| [])
181 |         i `shouldBe` result,
182 |       testCase "unpaired closing mark inside unconstrained pair" $ do
183 |         i <- parseInline "##a b# ##"
184 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", AlphaNum "b", Symbol "#", Space " "]) "##" :| []),
185 |       testCase "unpaired mark between space inside unconstrained pair" $ do
186 |         i <- parseInline "##a # ##"
187 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", Symbol "#", Space " "]) "##" :| []),
188 |       testCase "double mark ending constrained enclosure" $ do
189 |         i <- parseInline "#a ## b#"
190 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "#"]) "#" :| [Space " ", AlphaNum "b", Symbol "#"]),
191 |       testCase "nesting constrained inside unconstrained, with spaces on both sides" $ do
192 |         i <- parseInline "##a #b# c##"
193 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "#" (AlphaNum "b" :| []) "#", Space " ", AlphaNum "c"]) "##" :| []),
194 |       testCase "nesting unconstrained inside constrained, with spaces on both sides" $ do
195 |         i <- parseInline "#a ##b## c#"
196 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", Space " ", AlphaNum "c"]) "#" :| []),
197 |       testCase "unpaired opening mark inside constrained pair" $ do
198 |         i <- parseInline "#a ##b c#"
199 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "#", Symbol "#", AlphaNum "b", Space " ", AlphaNum "c"]) "#" :| []),
200 |       testCase "unpaired opening mark after correcty closed pair and some noise" $ do
201 |         i <- parseInline "##a##b# ##"
202 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (AlphaNum "a" :| []) "##" :| [AlphaNum "b", Symbol "#", Space " ", Symbol "#", Symbol "#"]),
203 |       testCase "three-mark sequence" $ do
204 |         i <- parseInline "###"
205 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (Symbol "#" :| []) "#" :| []),
206 |       testCase "four-mark sequence" $ do
207 |         i <- parseInline "####"
208 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (Symbol "#" :| []) "#" :| [Symbol "#"]),
209 |       testCase "five-mark sequence" $ do
210 |         i <- parseInline "#####"
211 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| []) "##" :| [])
212 |         i `shouldBe` result,
213 |       -- Divergence DVI002 from Asciidoctor: Asciidoctor accepts nested
214 |       -- enclosures with empty inline inside.
215 |       testCase "six-mark sequence" $ do
216 |         i <- parseInline "######"
217 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| []) "##" :| [Symbol "#"])
218 |         i `shouldBe` result,
219 |       testCase "unconstrained enclosure with a single space" $ do
220 |         i <- parseInline "## ##"
221 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| []) "##" :| []),
222 |       testCase "two three-mark sequences separated by space" $ do
223 |         i <- parseInline "### ###"
224 |         -- Asciidoctor seems inconsistent w.r.t "###a###" because in this case
225 |         -- it flips U and C.
226 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Symbol "#" :| [Space " "]) "##" :| [Symbol "#"])
227 |         i `shouldBe` result,
228 |       -- Divergence DVI003 from Asciidoctor: Asciidoctor doesn't parse "...#b
229 |       -- c#" as an enclosure, but it does if the mark of both enclosures is
230 |       -- different. We consider the behavior here more uniform.
231 |       testCase "two constrained enclosures with the same mark an no space in between" $ do
232 |         i <- parseInline "#a##b c#"
233 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| []) "#" :| [StyledText Custom defaultAttributeList "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"])
234 |     ]
235 | 
236 | mixedFormattingStyleTests :: TestTree
237 | mixedFormattingStyleTests =
238 |   testGroup
239 |     "mixed formatting styles"
240 |     [ testCase "constrained monospace inside italics, inside bold, no space" $ do
241 |         i <- parseInline "*_`a`_*"
242 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (StyledText Italic defaultAttributeList "_" (StyledText Monospace defaultAttributeList "`" (AlphaNum "a" :| []) "`" :| []) "_" :| []) "*" :| []),
243 |       testCase "constrained monospace inside unconstrained italics, inside bold, no space" $ do
244 |         i <- parseInline "*__`a`__*"
245 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (StyledText Italic defaultAttributeList "__" (StyledText Monospace defaultAttributeList "`" (AlphaNum "a" :| []) "`" :| []) "__" :| []) "*" :| []),
246 |       testCase "constrained monospace inside unconstrained italics, inside bold, some space" $ do
247 |         i <- parseInline "*a __ `b` __c*"
248 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", StyledText Italic defaultAttributeList "__" (Space " " :| [StyledText Monospace defaultAttributeList "`" (AlphaNum "b" :| []) "`", Space " "]) "__", AlphaNum "c"]) "*" :| []),
249 |       testCase "constrained italics interrupted inside custom" $ do
250 |         i <- parseInline "#a _b#"
251 |         i `shouldBe` InlineSeq (StyledText Custom defaultAttributeList "#" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "#" :| []),
252 |       -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule.
253 |       testCase "(## _a ##b## c_ ##)" $ do
254 |         i <- parseInline "## _a ##b## c_ ##"
255 |         -- let alt_result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [StyledText Italic defaultAttributeList "_" (AlphaNum "a" :| [Space " ", StyledText Custom defaultAttributeList "##" (AlphaNum "b" :| []) "##", Space " ", AlphaNum "c"]) "_", Space " "]) "##" :| [])
256 |         let result = InlineSeq (StyledText Custom defaultAttributeList "##" (Space " " :| [Symbol "_", AlphaNum "a", Space " "]) "##" :| [AlphaNum "b", StyledText Custom defaultAttributeList "##" (Space " " :| [AlphaNum "c", Symbol "_", Space " "]) "##"])
257 |         i `shouldBe` result,
258 |       testCase "(__ #a##b##c# __)" $ do
259 |         i <- parseInline "__ #a##b##c# __"
260 |         let result = InlineSeq (StyledText Italic (InlineAttributeList "") "__" (Space " " :| [StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| [StyledText Custom (InlineAttributeList "") "##" (AlphaNum "b" :| []) "##", AlphaNum "c"]) "#", Space " "]) "__" :| [])
261 |         i `shouldBe` result,
262 |       testCase "(#a *##b##*#)" $ do
263 |         i <- parseInline "#a *##b##*#"
264 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| [Space " ", StyledText Bold (InlineAttributeList "") "*" (StyledText Custom (InlineAttributeList "") "##" (AlphaNum "b" :| []) "##" :| []) "*"]) "#" :| [])
265 |         i `shouldBe` result,
266 |       -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule.
267 |       testCase "(*a _b* c_)" $ do
268 |         i <- parseInline "*a _b* c_"
269 |         -- let alt_result = InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic defaultAttributeList "_" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "_"])
270 |         let result = InlineSeq (StyledText Bold defaultAttributeList "*" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "*" :| [Space " ", AlphaNum "c", Symbol "_"])
271 |         i `shouldBe` result,
272 |       -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule.
273 |       testCase "(**a __b** c__)" $ do
274 |         i <- parseInline "**a __b** c__"
275 |         -- let alt_result = InlineSeq (Symbol "*" :| [Symbol "*", AlphaNum "a", Space " ", StyledText Italic defaultAttributeList "__" (AlphaNum "b" :| [Symbol "*", Symbol "*", Space " ", AlphaNum "c"]) "__"])
276 |         let result = InlineSeq (StyledText Bold defaultAttributeList "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_", Symbol "_"])
277 |         i `shouldBe` result,
278 |       -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule.
279 |       testCase "(**a _b** c_)" $ do
280 |         i <- parseInline "**a _b** c_"
281 |         i `shouldBe` InlineSeq (StyledText Bold defaultAttributeList "**" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_"]),
282 |       -- Divergence from Asciidoctor: Asciidoctor doesn't respect nesting rule.
283 |       testCase "(*a __b* c__)" $ do
284 |         i <- parseInline "*a __b* c__"
285 |         i `shouldBe` InlineSeq (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic defaultAttributeList "__" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "__"]),
286 |       testCase "(#*a _b* c_#)" $ do
287 |         i <- parseInline "#*a _b* c_#"
288 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (StyledText Bold (InlineAttributeList "") "*" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "*" :| [Space " ", AlphaNum "c", Symbol "_"]) "#" :| [])
289 |         i `shouldBe` result,
290 |       testCase "(#**a __b** c__#)" $ do
291 |         i <- parseInline "#**a __b** c__#"
292 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_", Symbol "_"]) "#" :| [])
293 |         i `shouldBe` result,
294 |       testCase "(#**a _b** c_#)" $ do
295 |         i <- parseInline "#**a _b** c_#"
296 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_"]) "#" :| [])
297 |         i `shouldBe` result,
298 |       testCase "(#*a __b* c__#)" $ do
299 |         i <- parseInline "#*a __b* c__#"
300 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "#" (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic (InlineAttributeList "") "__" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "__"]) "#" :| [])
301 |         i `shouldBe` result,
302 |       testCase "(##*a _b* c_##)" $ do
303 |         i <- parseInline "##*a _b* c_##"
304 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "*" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "*" :| [Space " ", AlphaNum "c", Symbol "_"]) "##" :| [])
305 |         i `shouldBe` result,
306 |       testCase "(##**a __b** c__##)" $ do
307 |         i <- parseInline "##**a __b** c__##"
308 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_", Symbol "_"]) "##" :| [])
309 |         i `shouldBe` result,
310 |       testCase "(##**a _b** c_##)" $ do
311 |         i <- parseInline "##**a _b** c_##"
312 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", AlphaNum "b"]) "**" :| [Space " ", AlphaNum "c", Symbol "_"]) "##" :| [])
313 |         i `shouldBe` result,
314 |       testCase "(##*a __b* c__##)" $ do
315 |         i <- parseInline "##*a __b* c__##"
316 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic (InlineAttributeList "") "__" (AlphaNum "b" :| [Symbol "*", Space " ", AlphaNum "c"]) "__"]) "##" :| [])
317 |         i `shouldBe` result,
318 |       testCase "(##*a _#b c#* d_##)" $ do
319 |         i <- parseInline "##*a _#b c#* d_##"
320 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "*" (AlphaNum "a" :| [Space " ", Symbol "_", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) "*" :| [Space " ", AlphaNum "d", Symbol "_"]) "##" :| [])
321 |         i `shouldBe` result,
322 |       testCase "(##**a __#b c#** d__##)" $ do
323 |         i <- parseInline "##**a __#b c#** d__##"
324 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", Symbol "_", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) "**" :| [Space " ", AlphaNum "d", Symbol "_", Symbol "_"]) "##" :| [])
325 |         i `shouldBe` result,
326 |       testCase "(##**a _#b c#** d_##)" $ do
327 |         i <- parseInline "##**a _#b c#** d_##"
328 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (StyledText Bold (InlineAttributeList "") "**" (AlphaNum "a" :| [Space " ", Symbol "_", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#"]) "**" :| [Space " ", AlphaNum "d", Symbol "_"]) "##" :| [])
329 |         i `shouldBe` result,
330 |       testCase "(##*a __#b c#* d__##)" $ do
331 |         i <- parseInline "##*a __#b c#* d__##"
332 |         let result = InlineSeq (StyledText Custom (InlineAttributeList "") "##" (Symbol "*" :| [AlphaNum "a", Space " ", StyledText Italic (InlineAttributeList "") "__" (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| [Space " ", AlphaNum "c"]) "#" :| [Symbol "*", Space " ", AlphaNum "d"]) "__"]) "##" :| [])
333 |         i `shouldBe` result
334 |     ]
335 | 
336 | enclosureInlineAttributeListTests :: TestTree
337 | enclosureInlineAttributeListTests =
338 |   testGroup
339 |     "parameter lists for enclosures"
340 |     [ testCase "simple parameter list for constrained enclosure" $ do
341 |         i <- parseInline "[underline]#a#"
342 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "underline") "#" (AlphaNum "a" :| []) "#" :| []),
343 |       testCase "simple parameter list for unconstrained enclosure" $ do
344 |         i <- parseInline "[underline]##a##"
345 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "underline") "##" (AlphaNum "a" :| []) "##" :| []),
346 |       testCase "parameter list for unconstrained enclosure preceded by word" $ do
347 |         i <- parseInline "a[underline]##b##"
348 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [StyledText Custom (InlineAttributeList "underline") "##" (AlphaNum "b" :| []) "##"]),
349 |       testCase "parameter list for unconstrained enclosure followed by word" $ do
350 |         i <- parseInline "[underline]##a##b"
351 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "underline") "##" (AlphaNum "a" :| []) "##" :| [AlphaNum "b"]),
352 |       testCase "failed parameter list for constrained enclosure preceded by word" $ do
353 |         i <- parseInline "a[underline]#b#"
354 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Symbol "[", AlphaNum "underline", Symbol "]", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| []) "#"]),
355 |       testCase "failed parameter list for constrained enclosure followed by word" $ do
356 |         i <- parseInline "[underline]#a#b"
357 |         i `shouldBe` InlineSeq (Symbol "[" :| [AlphaNum "underline", Symbol "]", Symbol "#", AlphaNum "a", Symbol "#", AlphaNum "b"]),
358 |       testCase "unfinished (failed) parameter list" $ do
359 |         i <- parseInline "[underline"
360 |         i `shouldBe` InlineSeq (Symbol "[" :| [AlphaNum "underline"]),
361 |       testCase "isolated (failed) parameter list" $ do
362 |         i <- parseInline "[underline] #a#"
363 |         i `shouldBe` InlineSeq (Symbol "[" :| [AlphaNum "underline", Symbol "]", Space " ", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#"])
364 |     ]
365 | 
366 | punctuationSymbolTests :: TestTree
367 | punctuationSymbolTests =
368 |   testGroup
369 |     "punctuation symbols"
370 |     [ testCase "(a.) sentence finalizer" $ do
371 |         i <- parseInline "a."
372 |         i `shouldBe` InlineSeq (AlphaNum "a" :| [Symbol "."]),
373 |       testCase "(#a.#) sentence finalizer at end of constrained enclosure" $ do
374 |         i <- parseInline "#a.#"
375 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| [Symbol "."]) "#" :| []),
376 |       testCase "(##a.##) sentence finalizer at end of unconstrained enclosure" $ do
377 |         i <- parseInline "##a.##"
378 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "##" (AlphaNum "a" :| [Symbol "."]) "##" :| []),
379 |       testCase "(*#a#, b*) comma after enclosure, nested inside another enclosure" $ do
380 |         i <- parseInline "*#a#, b*"
381 |         i `shouldBe` InlineSeq (StyledText Bold (InlineAttributeList "") "*" (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#" :| [Symbol ",", Space " ", AlphaNum "b"]) "*" :| []),
382 |       testCase "(#a#,#b#) punctuation symbol separating two enclosures" $ do
383 |         i <- parseInline "#a#,#b#"
384 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#" :| [Symbol ",", StyledText Custom (InlineAttributeList "") "#" (AlphaNum "b" :| []) "#"]),
385 |       testCase "(#¿a?#) question marks inside constrained enclosure" $ do
386 |         i <- parseInline "#¿a?#"
387 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "#" (Symbol "¿" :| [AlphaNum "a", Symbol "?"]) "#" :| []),
388 |       testCase "(##¿a?##) question marks inside unconstrained enclosure" $ do
389 |         i <- parseInline "##¿a?##"
390 |         i `shouldBe` InlineSeq (StyledText Custom (InlineAttributeList "") "##" (Symbol "¿" :| [AlphaNum "a", Symbol "?"]) "##" :| []),
391 |       testCase "(¿#a#?) question marks outside constrained enclosure" $ do
392 |         i <- parseInline "¿#a#?"
393 |         i `shouldBe` InlineSeq (Symbol "¿" :| [StyledText Custom (InlineAttributeList "") "#" (AlphaNum "a" :| []) "#", Symbol "?"]),
394 |       testCase "(¿##a##?) question marks outside unconstrained enclosure" $ do
395 |         i <- parseInline "¿##a##?"
396 |         i `shouldBe` InlineSeq (Symbol "¿" :| [StyledText Custom (InlineAttributeList "") "##" (AlphaNum "a" :| []) "##", Symbol "?"])
397 |     ]
398 | 


--------------------------------------------------------------------------------
/test/Tests/Blocks.hs:
--------------------------------------------------------------------------------
   1 | {-# LANGUAGE OverloadedStrings #-}
   2 | 
   3 | module Tests.Blocks
   4 |   ( parseTest,
   5 |     blockUnitTests,
   6 |   )
   7 | where
   8 | 
   9 | import Data.Functor.Identity (Identity (runIdentity))
  10 | import Data.List.NonEmpty (NonEmpty ((:|)))
  11 | import Data.Semigroup (Last (..))
  12 | import Data.Text (Text)
  13 | import Test.Hspec.Expectations.Pretty (shouldBe)
  14 | import Test.Tasty (TestTree, testGroup)
  15 | import Test.Tasty.HUnit (assertFailure, testCase)
  16 | import Text.AsciiDoc.Blocks
  17 | import Text.AsciiDoc.Metadata
  18 | import Text.AsciiDoc.UnparsedInline
  19 | import qualified Text.Parsec as Parsec
  20 | 
  21 | parseDocument :: [Text] -> IO (Document UnparsedInline)
  22 | parseDocument t = case parseTest documentP t of
  23 |   Right x -> pure x
  24 |   Left parseError -> assertFailure $ "Parser fails: " <> show parseError
  25 | 
  26 | parseBlocks :: [Text] -> IO [Block UnparsedInline]
  27 | parseBlocks t = case parseTest blocksP t of
  28 |   Right x -> pure x
  29 |   Left parseError -> assertFailure $ "Parser fails: " <> show parseError
  30 | 
  31 | parseTest :: Parser Identity a -> [Text] -> Either Parsec.ParseError a
  32 | parseTest parser tokens =
  33 |   runIdentity $ Parsec.runParserT parser blockParserInitialState "" tokens
  34 | 
  35 | blockUnitTests :: TestTree
  36 | blockUnitTests =
  37 |   testGroup
  38 |     "block unit tests"
  39 |     [ documentUnitTests,
  40 |       blockCornerCaseUnitTests,
  41 |       paragraphUnitTests,
  42 |       sectionHeaderUnitTests,
  43 |       danglingBlockPrefixUnitTests,
  44 |       nestableUnitTests,
  45 |       unorderedListUnitTests,
  46 |       nestedListsUnitTests,
  47 |       listContinuationUnitTests,
  48 |       commentUnitTests
  49 |     ]
  50 | 
  51 | documentUnitTests :: TestTree
  52 | documentUnitTests =
  53 |   testGroup
  54 |     "document unit tests"
  55 |     [ testCase "empty document" $ do
  56 |         p <-
  57 |           parseDocument
  58 |             []
  59 |         p `shouldBe` Document Nothing [],
  60 |       testCase "single document title" $ do
  61 |         p <-
  62 |           parseDocument
  63 |             [ "= Foo"
  64 |             ]
  65 |         p
  66 |           `shouldBe` Document
  67 |             (Just (DocumentHeader [] 0 (MarkupLine "Foo" :| [])))
  68 |             [],
  69 |       testCase "single document title with prefix" $ do
  70 |         p <-
  71 |           parseDocument
  72 |             [ "[[Foo]]",
  73 |               "= Bar"
  74 |             ]
  75 |         p
  76 |           `shouldBe` Document
  77 |             (Just (DocumentHeader [MetadataItem (BlockId "Foo")] 0 (MarkupLine "Bar" :| [])))
  78 |             [],
  79 |       testCase "document title with prefix followed by paragraph" $ do
  80 |         p <-
  81 |           parseDocument
  82 |             [ "[[Foo]]",
  83 |               "= Bar",
  84 |               "Baz"
  85 |             ]
  86 |         p
  87 |           `shouldBe` Document
  88 |             (Just (DocumentHeader [MetadataItem (BlockId "Foo")] 0 (MarkupLine "Bar" :| [])))
  89 |             [Paragraph [] (MarkupLine "Baz" :| [])],
  90 |       testCase "document title followed by empty line and paragraph" $ do
  91 |         p <-
  92 |           parseDocument
  93 |             [ "= Foo",
  94 |               "",
  95 |               "Bar"
  96 |             ]
  97 |         p
  98 |           `shouldBe` Document
  99 |             (Just (DocumentHeader [] 0 (MarkupLine "Foo" :| [])))
 100 |             [Paragraph [] (MarkupLine "Bar" :| [])],
 101 |       testCase "document title with level 1 followed by paragraph" $ do
 102 |         p <-
 103 |           parseDocument
 104 |             [ "== Foo",
 105 |               "Bar"
 106 |             ]
 107 |         p
 108 |           `shouldBe` Document
 109 |             (Just (DocumentHeader [] 1 (MarkupLine "Foo" :| [])))
 110 |             [Paragraph [] (MarkupLine "Bar" :| [])],
 111 |       testCase "document with no title and a paragraph" $ do
 112 |         p <-
 113 |           parseDocument
 114 |             [ "Foo"
 115 |             ]
 116 |         p
 117 |           `shouldBe` Document
 118 |             Nothing
 119 |             [Paragraph [] (MarkupLine "Foo" :| [])],
 120 |       testCase "single dangling block prefix" $ do
 121 |         p <- parseDocument ["[[Foo]]"]
 122 |         p
 123 |           `shouldBe` Document Nothing [DanglingBlockPrefix [MetadataItem (BlockId "Foo")]],
 124 |       testCase "dangling block prefix at eof and after empty line" $ do
 125 |         p <-
 126 |           parseDocument
 127 |             [ "",
 128 |               "[[Foo]]"
 129 |             ]
 130 |         p `shouldBe` Document Nothing [DanglingBlockPrefix [MetadataItem (BlockId "Foo")]]
 131 |     ]
 132 | 
 133 | blockCornerCaseUnitTests :: TestTree
 134 | blockCornerCaseUnitTests =
 135 |   testGroup
 136 |     "block corner case unit tests"
 137 |     [ testCase "empty block list" $ do
 138 |         p <-
 139 |           parseBlocks
 140 |             []
 141 |         p `shouldBe` [],
 142 |       testCase "single empty line" $ do
 143 |         p <-
 144 |           parseBlocks
 145 |             [ ""
 146 |             ]
 147 |         p `shouldBe` []
 148 |     ]
 149 | 
 150 | paragraphUnitTests :: TestTree
 151 | paragraphUnitTests =
 152 |   testGroup
 153 |     "paragraph unit tests"
 154 |     [ testCase "one line paragraph" $ do
 155 |         p <-
 156 |           parseBlocks
 157 |             [ "Foo"
 158 |             ]
 159 |         p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [])],
 160 |       testCase "two lines paragraph" $ do
 161 |         p <-
 162 |           parseBlocks
 163 |             [ "Foo",
 164 |               "Bar"
 165 |             ]
 166 |         p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [MarkupLine "Bar"])],
 167 |       testCase "paragraph followed by empty line" $ do
 168 |         p <-
 169 |           parseBlocks
 170 |             [ "Foo",
 171 |               ""
 172 |             ]
 173 |         p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [])],
 174 |       testCase "paragraph with indented following lines" $ do
 175 |         p <-
 176 |           parseBlocks
 177 |             [ "Foo",
 178 |               "  Bar",
 179 |               " Baz"
 180 |             ]
 181 |         p `shouldBe` [Paragraph [] (MarkupLine "Foo" :| [MarkupLine "  Bar", MarkupLine " Baz"])],
 182 |       testCase "two paragraphs" $ do
 183 |         p <-
 184 |           parseBlocks
 185 |             [ "Foo",
 186 |               "",
 187 |               "Bar"
 188 |             ]
 189 |         p
 190 |           `shouldBe` [ Paragraph [] (MarkupLine "Foo" :| []),
 191 |                        Paragraph [] (MarkupLine "Bar" :| [])
 192 |                      ],
 193 |       testCase "paragraph with block prefix" $ do
 194 |         p <-
 195 |           parseBlocks
 196 |             [ ".Foo",
 197 |               "// Comment",
 198 |               "[Foo#Bar%Baz]",
 199 |               "Foo"
 200 |             ]
 201 |         p
 202 |           `shouldBe` [ Paragraph
 203 |                          [ MetadataItem (BlockTitle (MarkupLine "Foo" :| [])),
 204 |                            Comment (LineCommentSequence (" Comment" :| [])),
 205 |                            MetadataItem (BlockAttributeList "Foo#Bar%Baz")
 206 |                          ]
 207 |                          (MarkupLine "Foo" :| [])
 208 |                      ],
 209 |       testCase "paragraph with block prefix containing empty lines" $ do
 210 |         p <-
 211 |           parseBlocks
 212 |             [ ".Foo",
 213 |               "",
 214 |               "[Foo#Bar%Baz]",
 215 |               "",
 216 |               "Foo"
 217 |             ]
 218 |         p
 219 |           `shouldBe` [ Paragraph
 220 |                          [ MetadataItem (BlockTitle (MarkupLine "Foo" :| [])),
 221 |                            MetadataItem (BlockAttributeList "Foo#Bar%Baz")
 222 |                          ]
 223 |                          (MarkupLine "Foo" :| [])
 224 |                      ],
 225 |       testCase "paragraph followed by dangling block prefix" $ do
 226 |         p <-
 227 |           parseBlocks
 228 |             [ "Foo",
 229 |               "",
 230 |               ".Foo"
 231 |             ]
 232 |         p
 233 |           `shouldBe` [ Paragraph
 234 |                          []
 235 |                          (MarkupLine "Foo" :| []),
 236 |                        DanglingBlockPrefix
 237 |                          [ MetadataItem (BlockTitle (MarkupLine "Foo" :| []))
 238 |                          ]
 239 |                      ],
 240 |       testCase "paragraph with second line resembling block title" $ do
 241 |         p <-
 242 |           parseBlocks
 243 |             [ "Foo",
 244 |               ".Bar"
 245 |             ]
 246 |         p
 247 |           `shouldBe` [ Paragraph
 248 |                          []
 249 |                          (MarkupLine "Foo" :| [MarkupLine ".Bar"])
 250 |                      ]
 251 |     ]
 252 | 
 253 | sectionHeaderUnitTests :: TestTree
 254 | sectionHeaderUnitTests =
 255 |   testGroup
 256 |     "section header unit tests"
 257 |     [ testCase "level 0 section header" $ do
 258 |         p <-
 259 |           parseBlocks
 260 |             [ "= Foo"
 261 |             ]
 262 |         p
 263 |           `shouldBe` [ SectionHeader
 264 |                          []
 265 |                          0
 266 |                          (MarkupLine "Foo" :| [])
 267 |                      ],
 268 |       testCase "level 1 section header" $ do
 269 |         p <-
 270 |           parseBlocks
 271 |             [ "== Foo"
 272 |             ]
 273 |         p
 274 |           `shouldBe` [ SectionHeader
 275 |                          []
 276 |                          1
 277 |                          (MarkupLine "Foo" :| [])
 278 |                      ],
 279 |       testCase "level 2 section header" $ do
 280 |         p <-
 281 |           parseBlocks
 282 |             [ "=== Foo"
 283 |             ]
 284 |         p
 285 |           `shouldBe` [ SectionHeader
 286 |                          []
 287 |                          2
 288 |                          (MarkupLine "Foo" :| [])
 289 |                      ],
 290 |       testCase "section header with two words" $ do
 291 |         p <-
 292 |           parseBlocks
 293 |             [ "= Foo bar"
 294 |             ]
 295 |         p
 296 |           `shouldBe` [ SectionHeader
 297 |                          []
 298 |                          0
 299 |                          (MarkupLine "Foo bar" :| [])
 300 |                      ],
 301 |       testCase "section header beginning with space" $ do
 302 |         p <-
 303 |           parseBlocks
 304 |             [ "=  Foo"
 305 |             ]
 306 |         p
 307 |           `shouldBe` [ SectionHeader
 308 |                          []
 309 |                          0
 310 |                          (MarkupLine "Foo" :| [])
 311 |                      ],
 312 |       testCase "section header followed by paragraph" $ do
 313 |         p <-
 314 |           parseBlocks
 315 |             [ "= Foo",
 316 |               "Bar"
 317 |             ]
 318 |         p
 319 |           `shouldBe` [ SectionHeader
 320 |                          []
 321 |                          0
 322 |                          (MarkupLine "Foo" :| []),
 323 |                        Paragraph [] (MarkupLine "Bar" :| [])
 324 |                      ],
 325 |       testCase "section header followed by empty line and paragraph" $ do
 326 |         p <-
 327 |           parseBlocks
 328 |             [ "= Foo",
 329 |               "",
 330 |               "Bar"
 331 |             ]
 332 |         p
 333 |           `shouldBe` [ SectionHeader
 334 |                          []
 335 |                          0
 336 |                          (MarkupLine "Foo" :| []),
 337 |                        Paragraph [] (MarkupLine "Bar" :| [])
 338 |                      ],
 339 |       testCase "section header with block prefix" $ do
 340 |         p <-
 341 |           parseBlocks
 342 |             [ ".Foo",
 343 |               "= Foo"
 344 |             ]
 345 |         p
 346 |           `shouldBe` [ SectionHeader
 347 |                          [MetadataItem (BlockTitle (MarkupLine "Foo" :| []))]
 348 |                          0
 349 |                          (MarkupLine "Foo" :| [])
 350 |                      ],
 351 |       testCase "section header followed by paragraph with block prefix" $ do
 352 |         p <-
 353 |           parseBlocks
 354 |             [ "= Foo",
 355 |               ".Bar",
 356 |               "Bar"
 357 |             ]
 358 |         p
 359 |           `shouldBe` [ SectionHeader
 360 |                          []
 361 |                          0
 362 |                          (MarkupLine "Foo" :| []),
 363 |                        Paragraph
 364 |                          [MetadataItem (BlockTitle (MarkupLine "Bar" :| []))]
 365 |                          (MarkupLine "Bar" :| [])
 366 |                      ],
 367 |       testCase "discrete section header" $ do
 368 |         p <-
 369 |           parseBlocks
 370 |             [ "[discrete]",
 371 |               "= Foo"
 372 |             ]
 373 |         p
 374 |           `shouldBe` [ SectionHeader
 375 |                          [MetadataItem (BlockAttributeList "discrete")]
 376 |                          0
 377 |                          (MarkupLine "Foo" :| [])
 378 |                      ]
 379 |         case p of
 380 |           (SectionHeader prefix _ _) : _ ->
 381 |             toMetadata prefix
 382 |               `shouldBe` (mempty @(Metadata UnparsedInline))
 383 |                 { metadataStyle =
 384 |                     Just
 385 |                       ( Last
 386 |                           { getLast = "discrete"
 387 |                           }
 388 |                       )
 389 |                 }
 390 |           _ -> error "test case: discrete section header",
 391 |       -- TODO. Must change when indented literal paragraphs are implemented.
 392 |       testCase "false section header (space before '=')" $ do
 393 |         p <-
 394 |           parseBlocks
 395 |             [ " = Foo"
 396 |             ]
 397 |         p `shouldBe` [Paragraph [] (MarkupLine " = Foo" :| [])]
 398 |     ]
 399 | 
 400 | danglingBlockPrefixUnitTests :: TestTree
 401 | danglingBlockPrefixUnitTests =
 402 |   testGroup
 403 |     "dangling block prefix unit tests"
 404 |     [ testCase "dangling block prefix at eof and after paragraph" $ do
 405 |         p <-
 406 |           parseBlocks
 407 |             [ "Foo",
 408 |               "[[Foo]]"
 409 |             ]
 410 |         p
 411 |           `shouldBe` [ Paragraph
 412 |                          []
 413 |                          (MarkupLine "Foo" :| []),
 414 |                        DanglingBlockPrefix [MetadataItem (BlockId "Foo")]
 415 |                      ],
 416 |       testCase "dangling block prefix at end of example block" $ do
 417 |         p <-
 418 |           parseBlocks
 419 |             [ "====",
 420 |               "Foo",
 421 |               "",
 422 |               "[[Bar]]",
 423 |               "===="
 424 |             ]
 425 |         p
 426 |           `shouldBe` [ Nestable
 427 |                          Example
 428 |                          []
 429 |                          [ Paragraph [] (MarkupLine "Foo" :| []),
 430 |                            DanglingBlockPrefix [MetadataItem (BlockId "Bar")]
 431 |                          ]
 432 |                      ]
 433 |     ]
 434 | 
 435 | nestableUnitTests :: TestTree
 436 | nestableUnitTests =
 437 |   testGroup
 438 |     "nestable block unit tests"
 439 |     [ testCase "simple example block" $ do
 440 |         p <-
 441 |           parseBlocks
 442 |             [ "====",
 443 |               "Foo",
 444 |               "===="
 445 |             ]
 446 |         p
 447 |           `shouldBe` [ Nestable
 448 |                          Example
 449 |                          []
 450 |                          [Paragraph [] (MarkupLine "Foo" :| [])]
 451 |                      ],
 452 |       testCase "simple sidebar block" $ do
 453 |         p <-
 454 |           parseBlocks
 455 |             [ "****",
 456 |               "Foo",
 457 |               "****"
 458 |             ]
 459 |         p
 460 |           `shouldBe` [ Nestable
 461 |                          Sidebar
 462 |                          []
 463 |                          [Paragraph [] (MarkupLine "Foo" :| [])]
 464 |                      ],
 465 |       testCase "example block containing two paragraphs" $ do
 466 |         p <-
 467 |           parseBlocks
 468 |             [ "====",
 469 |               "Foo",
 470 |               "",
 471 |               "Bar",
 472 |               "===="
 473 |             ]
 474 |         p
 475 |           `shouldBe` [ Nestable
 476 |                          Example
 477 |                          []
 478 |                          [ Paragraph [] (MarkupLine "Foo" :| []),
 479 |                            Paragraph [] (MarkupLine "Bar" :| [])
 480 |                          ]
 481 |                      ],
 482 |       testCase "example block with block title" $ do
 483 |         p <-
 484 |           parseBlocks
 485 |             [ ".Foo",
 486 |               "====",
 487 |               "Bar",
 488 |               "===="
 489 |             ]
 490 |         p
 491 |           `shouldBe` [ Nestable
 492 |                          Example
 493 |                          [MetadataItem (BlockTitle (MarkupLine "Foo" :| []))]
 494 |                          [Paragraph [] (MarkupLine "Bar" :| [])]
 495 |                      ],
 496 |       testCase "sidebar nested into example block" $ do
 497 |         p <-
 498 |           parseBlocks
 499 |             [ "====",
 500 |               "****",
 501 |               "Bar",
 502 |               "****",
 503 |               "===="
 504 |             ]
 505 |         p
 506 |           `shouldBe` [ Nestable
 507 |                          Example
 508 |                          []
 509 |                          [ Nestable
 510 |                              Sidebar
 511 |                              []
 512 |                              [ Paragraph [] (MarkupLine "Bar" :| [])
 513 |                              ]
 514 |                          ]
 515 |                      ],
 516 |       testCase "sidebar nested into example block and following paragraph" $ do
 517 |         p <-
 518 |           parseBlocks
 519 |             [ "====",
 520 |               "Foo",
 521 |               "****",
 522 |               "Bar",
 523 |               "****",
 524 |               "===="
 525 |             ]
 526 |         p
 527 |           `shouldBe` [ Nestable
 528 |                          Example
 529 |                          []
 530 |                          [ Paragraph [] (MarkupLine "Foo" :| []),
 531 |                            Nestable
 532 |                              Sidebar
 533 |                              []
 534 |                              [ Paragraph [] (MarkupLine "Bar" :| [])
 535 |                              ]
 536 |                          ]
 537 |                      ],
 538 |       testCase "sidebar nested into example block and following empty line" $ do
 539 |         p <-
 540 |           parseBlocks
 541 |             [ "====",
 542 |               "",
 543 |               "****",
 544 |               "Bar",
 545 |               "****",
 546 |               "===="
 547 |             ]
 548 |         p
 549 |           `shouldBe` [ Nestable
 550 |                          Example
 551 |                          []
 552 |                          [ Nestable
 553 |                              Sidebar
 554 |                              []
 555 |                              [ Paragraph [] (MarkupLine "Bar" :| [])
 556 |                              ]
 557 |                          ]
 558 |                      ],
 559 |       testCase "example block nested into example block and following paragraph" $ do
 560 |         p <-
 561 |           parseBlocks
 562 |             [ "====",
 563 |               "Foo",
 564 |               "======",
 565 |               "Bar",
 566 |               "======",
 567 |               "===="
 568 |             ]
 569 |         p
 570 |           `shouldBe` [ Nestable
 571 |                          Example
 572 |                          []
 573 |                          [ Paragraph [] (MarkupLine "Foo" :| []),
 574 |                            Nestable
 575 |                              Example
 576 |                              []
 577 |                              [ Paragraph [] (MarkupLine "Bar" :| [])
 578 |                              ]
 579 |                          ]
 580 |                      ],
 581 |       testCase "non-closed sidebar nested into example block" $ do
 582 |         p <-
 583 |           parseBlocks
 584 |             [ "====",
 585 |               "Foo",
 586 |               "****",
 587 |               "Bar",
 588 |               "===="
 589 |             ]
 590 |         p
 591 |           `shouldBe` [ Nestable
 592 |                          Example
 593 |                          []
 594 |                          [ Paragraph [] (MarkupLine "Foo" :| []),
 595 |                            Nestable
 596 |                              Sidebar
 597 |                              []
 598 |                              [ Paragraph [] (MarkupLine "Bar" :| [])
 599 |                              ]
 600 |                          ]
 601 |                      ],
 602 |       testCase "non-closed example block nested into example block" $ do
 603 |         p <-
 604 |           parseBlocks
 605 |             [ "====",
 606 |               "Foo",
 607 |               "======",
 608 |               "Bar",
 609 |               "===="
 610 |             ]
 611 |         p
 612 |           `shouldBe` [ Nestable
 613 |                          Example
 614 |                          []
 615 |                          [ Paragraph [] (MarkupLine "Foo" :| []),
 616 |                            Nestable
 617 |                              Example
 618 |                              []
 619 |                              [ Paragraph [] (MarkupLine "Bar" :| [])
 620 |                              ]
 621 |                          ]
 622 |                      ]
 623 |     ]
 624 | 
 625 | unorderedListUnitTests :: TestTree
 626 | unorderedListUnitTests =
 627 |   testGroup
 628 |     "unordered list unit tests"
 629 |     [ testCase "simple unordered list using '-' (hyphen)" $ do
 630 |         p <-
 631 |           parseBlocks
 632 |             [ "- Foo",
 633 |               "- Bar",
 634 |               "- Baz"
 635 |             ]
 636 |         p
 637 |           `shouldBe` [ List
 638 |                          (Unordered Nothing)
 639 |                          []
 640 |                          ( (Paragraph [] (MarkupLine "Foo" :| []) :| [])
 641 |                              :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [],
 642 |                                   Paragraph [] (MarkupLine "Baz" :| []) :| []
 643 |                                 ]
 644 |                          )
 645 |                      ],
 646 |       testCase "simple unordered list using '*' (asterisk)" $ do
 647 |         p <-
 648 |           parseBlocks
 649 |             [ "* Foo",
 650 |               "* Bar",
 651 |               "* Baz"
 652 |             ]
 653 |         p
 654 |           `shouldBe` [ List
 655 |                          (Unordered Nothing)
 656 |                          []
 657 |                          ( (Paragraph [] (MarkupLine "Foo" :| []) :| [])
 658 |                              :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [],
 659 |                                   Paragraph [] (MarkupLine "Baz" :| []) :| []
 660 |                                 ]
 661 |                          )
 662 |                      ],
 663 |       testCase "unordered list with irregular indentation" $ do
 664 |         p <-
 665 |           parseBlocks
 666 |             [ "* Foo",
 667 |               "  * Bar",
 668 |               " * Baz"
 669 |             ]
 670 |         p
 671 |           `shouldBe` [ List
 672 |                          (Unordered Nothing)
 673 |                          []
 674 |                          ( (Paragraph [] (MarkupLine "Foo" :| []) :| [])
 675 |                              :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [],
 676 |                                   Paragraph [] (MarkupLine "Baz" :| []) :| []
 677 |                                 ]
 678 |                          )
 679 |                      ],
 680 |       testCase "unordered list with indented first item" $ do
 681 |         p <-
 682 |           parseBlocks
 683 |             [ "Foo",
 684 |               "",
 685 |               "  * Bar",
 686 |               "* Baz"
 687 |             ]
 688 |         p
 689 |           `shouldBe` [ Paragraph [] (MarkupLine "Foo" :| []),
 690 |                        List
 691 |                          (Unordered Nothing)
 692 |                          []
 693 |                          ( (Paragraph [] (MarkupLine "Bar" :| []) :| [])
 694 |                              :| [ Paragraph [] (MarkupLine "Baz" :| []) :| []
 695 |                                 ]
 696 |                          )
 697 |                      ],
 698 |       testCase "unordered list with irregular line spacing" $ do
 699 |         p <-
 700 |           parseBlocks
 701 |             [ "* Foo",
 702 |               "",
 703 |               "",
 704 |               "* Bar",
 705 |               "",
 706 |               "* Baz"
 707 |             ]
 708 |         p
 709 |           `shouldBe` [ List
 710 |                          (Unordered Nothing)
 711 |                          []
 712 |                          ( (Paragraph [] (MarkupLine "Foo" :| []) :| [])
 713 |                              :| [ Paragraph [] (MarkupLine "Bar" :| []) :| [],
 714 |                                   Paragraph [] (MarkupLine "Baz" :| []) :| []
 715 |                                 ]
 716 |                          )
 717 |                      ],
 718 |       testCase "unordered list with multi-line paragraphs" $ do
 719 |         p <-
 720 |           parseBlocks
 721 |             [ "* Foo",
 722 |               "Bar",
 723 |               "* Baz",
 724 |               "  Qux"
 725 |             ]
 726 |         p
 727 |           `shouldBe` [ List
 728 |                          (Unordered Nothing)
 729 |                          []
 730 |                          ( (Paragraph [] (MarkupLine "Foo" :| [MarkupLine "Bar"]) :| [])
 731 |                              :| [Paragraph [] (MarkupLine "Baz" :| [MarkupLine "  Qux"]) :| []]
 732 |                          )
 733 |                      ],
 734 |       testCase "unordered list item with literal second paragraph" $ do
 735 |         p <-
 736 |           parseBlocks
 737 |             [ "* Foo",
 738 |               "",
 739 |               " Bar"
 740 |             ]
 741 |         p
 742 |           `shouldBe` [ List
 743 |                          (Unordered Nothing)
 744 |                          []
 745 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 746 |                                -- TODO. Add when indented literal paragraphs are
 747 |                                -- implemented:
 748 |                                --
 749 |                                --  :| [QUOTED [] (MarkupLine "Bar" :| [])]
 750 |                                :| []
 751 |                            )
 752 |                              :| []
 753 |                          ),
 754 |                        -- TODO. Remove when indented literal paragraphs are
 755 |                        -- implemented:
 756 |                        Paragraph [] (MarkupLine " Bar" :| [])
 757 |                      ],
 758 |       testCase "two unordered lists separated by a paragraph" $ do
 759 |         p <-
 760 |           parseBlocks
 761 |             [ "* Foo",
 762 |               "",
 763 |               "Bar",
 764 |               "",
 765 |               "* Baz",
 766 |               "* Qux"
 767 |             ]
 768 |         p
 769 |           `shouldBe` [ List
 770 |                          (Unordered Nothing)
 771 |                          []
 772 |                          ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []),
 773 |                        Paragraph [] (MarkupLine "Bar" :| []),
 774 |                        List
 775 |                          (Unordered Nothing)
 776 |                          []
 777 |                          ( (Paragraph [] (MarkupLine "Baz" :| []) :| [])
 778 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
 779 |                          )
 780 |                      ],
 781 |       testCase "unordered list followed up by consecutive example block" $ do
 782 |         p <-
 783 |           parseBlocks
 784 |             [ "* Foo",
 785 |               "====",
 786 |               "Bar",
 787 |               "===="
 788 |             ]
 789 |         p
 790 |           `shouldBe` [ List
 791 |                          (Unordered Nothing)
 792 |                          []
 793 |                          ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []),
 794 |                        Nestable
 795 |                          Example
 796 |                          []
 797 |                          [Paragraph [] (MarkupLine "Bar" :| [])]
 798 |                      ]
 799 |     ]
 800 | 
 801 | nestedListsUnitTests :: TestTree
 802 | nestedListsUnitTests =
 803 |   testGroup
 804 |     "nested lists unit tests"
 805 |     [ testCase "unordered list using '-' (hyphen) nested into list using '*' (asterisk)" $ do
 806 |         p <-
 807 |           parseBlocks
 808 |             [ "* Foo",
 809 |               "- Bar",
 810 |               "- Baz",
 811 |               "* Qux"
 812 |             ]
 813 |         p
 814 |           `shouldBe` [ List
 815 |                          (Unordered Nothing)
 816 |                          []
 817 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 818 |                                :| [ List
 819 |                                       (Unordered Nothing)
 820 |                                       []
 821 |                                       ( (Paragraph [] (MarkupLine "Bar" :| []) :| [])
 822 |                                           :| [Paragraph [] (MarkupLine "Baz" :| []) :| []]
 823 |                                       )
 824 |                                   ]
 825 |                            )
 826 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
 827 |                          )
 828 |                      ],
 829 |       testCase "nested unordered lists using increasing number of '*' (asterisk)" $ do
 830 |         p <-
 831 |           parseBlocks
 832 |             [ "* Foo",
 833 |               "** Bar",
 834 |               "*** Baz",
 835 |               "* Qux"
 836 |             ]
 837 |         p
 838 |           `shouldBe` [ List
 839 |                          (Unordered Nothing)
 840 |                          []
 841 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 842 |                                :| [ List
 843 |                                       (Unordered Nothing)
 844 |                                       []
 845 |                                       ( ( Paragraph [] (MarkupLine "Bar" :| [])
 846 |                                             :| [ List
 847 |                                                    (Unordered Nothing)
 848 |                                                    []
 849 |                                                    ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| [])
 850 |                                                ]
 851 |                                         )
 852 |                                           :| []
 853 |                                       )
 854 |                                   ]
 855 |                            )
 856 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
 857 |                          )
 858 |                      ],
 859 |       testCase "nested unordered lists with empty lines interspersed" $ do
 860 |         p <-
 861 |           parseBlocks
 862 |             [ "* Foo",
 863 |               "",
 864 |               "** Bar",
 865 |               "",
 866 |               "*** Baz",
 867 |               "",
 868 |               "* Qux"
 869 |             ]
 870 |         p
 871 |           `shouldBe` [ List
 872 |                          (Unordered Nothing)
 873 |                          []
 874 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 875 |                                :| [ List
 876 |                                       (Unordered Nothing)
 877 |                                       []
 878 |                                       ( ( Paragraph [] (MarkupLine "Bar" :| [])
 879 |                                             :| [ List
 880 |                                                    (Unordered Nothing)
 881 |                                                    []
 882 |                                                    ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| [])
 883 |                                                ]
 884 |                                         )
 885 |                                           :| []
 886 |                                       )
 887 |                                   ]
 888 |                            )
 889 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
 890 |                          )
 891 |                      ],
 892 |       testCase "nested unordered lists using unordered number of '*' (asterisk)" $ do
 893 |         p <-
 894 |           parseBlocks
 895 |             [ "** Foo",
 896 |               "* Bar",
 897 |               "*** Baz",
 898 |               " ** Qux"
 899 |             ]
 900 |         p
 901 |           `shouldBe` [ List
 902 |                          (Unordered Nothing)
 903 |                          []
 904 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 905 |                                :| [ List
 906 |                                       (Unordered Nothing)
 907 |                                       []
 908 |                                       ( ( Paragraph [] (MarkupLine "Bar" :| [])
 909 |                                             :| [ List
 910 |                                                    (Unordered Nothing)
 911 |                                                    []
 912 |                                                    ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| [])
 913 |                                                ]
 914 |                                         )
 915 |                                           :| []
 916 |                                       )
 917 |                                   ]
 918 |                            )
 919 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
 920 |                          )
 921 |                      ],
 922 |       testCase "nested unordered list with multi-line paragraph" $ do
 923 |         p <-
 924 |           parseBlocks
 925 |             [ "* Foo",
 926 |               "- Bar",
 927 |               "Baz",
 928 |               "* Qux"
 929 |             ]
 930 |         p
 931 |           `shouldBe` [ List
 932 |                          (Unordered Nothing)
 933 |                          []
 934 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 935 |                                :| [ List
 936 |                                       (Unordered Nothing)
 937 |                                       []
 938 |                                       ( (Paragraph [] (MarkupLine "Bar" :| [MarkupLine "Baz"]) :| [])
 939 |                                           :| []
 940 |                                       )
 941 |                                   ]
 942 |                            )
 943 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
 944 |                          )
 945 |                      ],
 946 |       testCase "(DVB001) nested unordered list with block prefixes" $ do
 947 |         p <-
 948 |           parseBlocks
 949 |             [ "[.red]",
 950 |               ".FooFoo",
 951 |               "* Foo",
 952 |               "[.blue]",
 953 |               ".BarBar",
 954 |               "- Bar",
 955 |               "[.green]",
 956 |               ".BazBaz",
 957 |               "- Baz"
 958 |             ]
 959 |         p
 960 |           `shouldBe` [ List
 961 |                          (Unordered Nothing)
 962 |                          [ MetadataItem (BlockAttributeList ".red"),
 963 |                            MetadataItem (BlockTitle (MarkupLine "FooFoo" :| []))
 964 |                          ]
 965 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
 966 |                                :| [ List
 967 |                                       (Unordered Nothing)
 968 |                                       [ MetadataItem (BlockAttributeList ".blue"),
 969 |                                         MetadataItem (BlockTitle (MarkupLine "BarBar" :| []))
 970 |                                       ]
 971 |                                       ( (Paragraph [] (MarkupLine "Bar" :| []) :| [])
 972 |                                           :| []
 973 |                                       )
 974 |                                   ]
 975 |                            )
 976 |                              :| []
 977 |                          ),
 978 |                        List
 979 |                          (Unordered Nothing)
 980 |                          [ MetadataItem (BlockAttributeList ".green"),
 981 |                            MetadataItem (BlockTitle (MarkupLine "BazBaz" :| []))
 982 |                          ]
 983 |                          ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| [])
 984 |                      ],
 985 |       -- Identical result to the previous test case.
 986 |       testCase "(DVB001) nested unordered list with block prefixes and some empty lines" $ do
 987 |         p <-
 988 |           parseBlocks
 989 |             [ "[.red]",
 990 |               ".FooFoo",
 991 |               "",
 992 |               "* Foo",
 993 |               "[.blue]",
 994 |               "",
 995 |               ".BarBar",
 996 |               "- Bar",
 997 |               "",
 998 |               "[.green]",
 999 |               ".BazBaz",
1000 |               "- Baz"
1001 |             ]
1002 |         p
1003 |           `shouldBe` [ List
1004 |                          (Unordered Nothing)
1005 |                          [ MetadataItem (BlockAttributeList ".red"),
1006 |                            MetadataItem (BlockTitle (MarkupLine "FooFoo" :| []))
1007 |                          ]
1008 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1009 |                                :| [ List
1010 |                                       (Unordered Nothing)
1011 |                                       [ MetadataItem (BlockAttributeList ".blue"),
1012 |                                         MetadataItem (BlockTitle (MarkupLine "BarBar" :| []))
1013 |                                       ]
1014 |                                       ( (Paragraph [] (MarkupLine "Bar" :| []) :| [])
1015 |                                           :| []
1016 |                                       )
1017 |                                   ]
1018 |                            )
1019 |                              :| []
1020 |                          ),
1021 |                        List
1022 |                          (Unordered Nothing)
1023 |                          [ MetadataItem (BlockAttributeList ".green"),
1024 |                            MetadataItem (BlockTitle (MarkupLine "BazBaz" :| []))
1025 |                          ]
1026 |                          ((Paragraph [] (MarkupLine "Baz" :| []) :| []) :| [])
1027 |                      ]
1028 |     ]
1029 | 
1030 | listContinuationUnitTests :: TestTree
1031 | listContinuationUnitTests =
1032 |   testGroup
1033 |     "list continuation unit tests"
1034 |     [ testCase "list continuation (paragraph), followed by another list item" $ do
1035 |         p <-
1036 |           parseBlocks
1037 |             [ "* Foo",
1038 |               "+",
1039 |               "Bar",
1040 |               "* Baz"
1041 |             ]
1042 |         p
1043 |           `shouldBe` [ List
1044 |                          (Unordered Nothing)
1045 |                          []
1046 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1047 |                                :| [Paragraph [] (MarkupLine "Bar" :| [])]
1048 |                            )
1049 |                              :| [Paragraph [] (MarkupLine "Baz" :| []) :| []]
1050 |                          )
1051 |                      ],
1052 |       testCase "two list continuations (paragraph), followed by another list item" $ do
1053 |         p <-
1054 |           parseBlocks
1055 |             [ "* Foo",
1056 |               "+",
1057 |               "Bar",
1058 |               "+",
1059 |               "Baz",
1060 |               "* Qux"
1061 |             ]
1062 |         p
1063 |           `shouldBe` [ List
1064 |                          (Unordered Nothing)
1065 |                          []
1066 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1067 |                                :| [ Paragraph [] (MarkupLine "Bar" :| []),
1068 |                                     Paragraph [] (MarkupLine "Baz" :| [])
1069 |                                   ]
1070 |                            )
1071 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
1072 |                          )
1073 |                      ],
1074 |       testCase "list continuation (paragraph) with a block prefix" $ do
1075 |         p <-
1076 |           parseBlocks
1077 |             [ "* Foo",
1078 |               "+",
1079 |               "[.red]",
1080 |               "Bar"
1081 |             ]
1082 |         p
1083 |           `shouldBe` [ List
1084 |                          (Unordered Nothing)
1085 |                          []
1086 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1087 |                                :| [ Paragraph
1088 |                                       [MetadataItem (BlockAttributeList ".red")]
1089 |                                       (MarkupLine "Bar" :| [])
1090 |                                   ]
1091 |                            )
1092 |                              :| []
1093 |                          )
1094 |                      ],
1095 |       testCase "list continuation (example block), followed by another list item" $ do
1096 |         p <-
1097 |           parseBlocks
1098 |             [ "* Foo",
1099 |               "+",
1100 |               "====",
1101 |               "Bar",
1102 |               "====",
1103 |               "* Baz"
1104 |             ]
1105 |         p
1106 |           `shouldBe` [ List
1107 |                          (Unordered Nothing)
1108 |                          []
1109 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1110 |                                :| [Nestable Example [] [Paragraph [] (MarkupLine "Bar" :| [])]]
1111 |                            )
1112 |                              :| [Paragraph [] (MarkupLine "Baz" :| []) :| []]
1113 |                          )
1114 |                      ],
1115 |       testCase "list continuation into a nested unordered list" $ do
1116 |         p <-
1117 |           parseBlocks
1118 |             [ "* Foo",
1119 |               "** Bar",
1120 |               "+",
1121 |               "Baz",
1122 |               "* Qux"
1123 |             ]
1124 |         p
1125 |           `shouldBe` [ List
1126 |                          (Unordered Nothing)
1127 |                          []
1128 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1129 |                                :| [ List
1130 |                                       (Unordered Nothing)
1131 |                                       []
1132 |                                       ( ( Paragraph [] (MarkupLine "Bar" :| [])
1133 |                                             :| [Paragraph [] (MarkupLine "Baz" :| [])]
1134 |                                         )
1135 |                                           :| []
1136 |                                       )
1137 |                                   ]
1138 |                            )
1139 |                              :| [Paragraph [] (MarkupLine "Qux" :| []) :| []]
1140 |                          )
1141 |                      ],
1142 |       testCase "dangling list continuation marker in outermost list" $ do
1143 |         p <-
1144 |           parseBlocks
1145 |             [ "* Foo",
1146 |               "+",
1147 |               "", -- Asciidoctor allows this optional empty line here
1148 |               "",
1149 |               "Bar"
1150 |             ]
1151 |         p
1152 |           `shouldBe` [ List
1153 |                          (Unordered Nothing)
1154 |                          []
1155 |                          ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []),
1156 |                        Paragraph [] (MarkupLine "Bar" :| [])
1157 |                      ],
1158 |       testCase "dangling list continuation marker into a nested unordered list" $ do
1159 |         p <-
1160 |           parseBlocks
1161 |             [ "* Foo",
1162 |               "** Bar",
1163 |               "+",
1164 |               "", -- Asciidoctor allows this optional blank line here
1165 |               "",
1166 |               "Baz"
1167 |             ]
1168 |         p
1169 |           `shouldBe` [ List
1170 |                          (Unordered Nothing)
1171 |                          []
1172 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1173 |                                :| [ List
1174 |                                       (Unordered Nothing)
1175 |                                       []
1176 |                                       ((Paragraph [] (MarkupLine "Bar" :| []) :| []) :| [])
1177 |                                   ]
1178 |                            )
1179 |                              :| []
1180 |                          ),
1181 |                        Paragraph [] (MarkupLine "Baz" :| [])
1182 |                      ],
1183 |       testCase "(DVB002) broken list continuation attempt in outermost list" $ do
1184 |         p <-
1185 |           parseBlocks
1186 |             [ "* Foo",
1187 |               "",
1188 |               "+",
1189 |               "Bar"
1190 |             ]
1191 |         p
1192 |           `shouldBe` [ List
1193 |                          (Unordered Nothing)
1194 |                          []
1195 |                          ((Paragraph [] (MarkupLine "Foo" :| []) :| []) :| []),
1196 |                        Paragraph [] (MarkupLine "+" :| [MarkupLine "Bar"])
1197 |                      ],
1198 |       testCase "(DVB002) broken list continuation attempt in nested list" $ do
1199 |         p <-
1200 |           parseBlocks
1201 |             [ "* Foo",
1202 |               "** Bar",
1203 |               "",
1204 |               "+",
1205 |               "Baz"
1206 |             ]
1207 |         p
1208 |           `shouldBe` [ List
1209 |                          (Unordered Nothing)
1210 |                          []
1211 |                          ( ( Paragraph [] (MarkupLine "Foo" :| [])
1212 |                                :| [ List
1213 |                                       (Unordered Nothing)
1214 |                                       []
1215 |                                       ((Paragraph [] (MarkupLine "Bar" :| []) :| []) :| [])
1216 |                                   ]
1217 |                            )
1218 |                              :| []
1219 |                          ),
1220 |                        Paragraph [] (MarkupLine "+" :| [MarkupLine "Baz"])
1221 |                      ],
1222 |       testCase "line break that resembles list continuation" $ do
1223 |         p <-
1224 |           parseBlocks
1225 |             [ "* Foo",
1226 |               " +",
1227 |               "Bar"
1228 |             ]
1229 |         p
1230 |           `shouldBe` [ List
1231 |                          (Unordered Nothing)
1232 |                          []
1233 |                          -- TODO. Must be changed when line breaks are
1234 |                          -- implemented.
1235 |                          ( ( Paragraph
1236 |                                []
1237 |                                ( MarkupLine "Foo"
1238 |                                    :| [ MarkupLine " +",
1239 |                                         MarkupLine "Bar"
1240 |                                       ]
1241 |                                )
1242 |                                :| []
1243 |                            )
1244 |                              :| []
1245 |                          )
1246 |                      ]
1247 |     ]
1248 | 
1249 | commentUnitTests :: TestTree
1250 | commentUnitTests =
1251 |   testGroup
1252 |     "comment unit tests"
1253 |     [ testCase "dangling block comment" $ do
1254 |         p <-
1255 |           parseBlocks
1256 |             [ "////",
1257 |               "Foo",
1258 |               "////"
1259 |             ]
1260 |         p
1261 |           `shouldBe` [DanglingBlockPrefix [Comment (BlockComment ["Foo"])]],
1262 |       testCase "dangling line comment sequence" $ do
1263 |         p <-
1264 |           parseBlocks
1265 |             [ "//Foo",
1266 |               "// Bar"
1267 |             ]
1268 |         p
1269 |           `shouldBe` [ DanglingBlockPrefix
1270 |                          [Comment (LineCommentSequence ("Foo" :| [" Bar"]))]
1271 |                      ],
1272 |       testCase "block comment before paragraph" $ do
1273 |         p <-
1274 |           parseBlocks
1275 |             [ "////",
1276 |               "Foo",
1277 |               "////",
1278 |               "Bar"
1279 |             ]
1280 |         p
1281 |           `shouldBe` [ Paragraph
1282 |                          [Comment (BlockComment ["Foo"])]
1283 |                          (MarkupLine "Bar" :| [])
1284 |                      ],
1285 |       testCase "block comment before paragraph, with redundant space" $ do
1286 |         p <-
1287 |           parseBlocks
1288 |             [ "//// ",
1289 |               "Foo",
1290 |               "//// ",
1291 |               "Bar"
1292 |             ]
1293 |         p
1294 |           `shouldBe` [ Paragraph
1295 |                          [Comment (BlockComment ["Foo"])]
1296 |                          (MarkupLine "Bar" :| [])
1297 |                      ],
1298 |       testCase "block comment before paragraph, separated by blank line" $ do
1299 |         p <-
1300 |           parseBlocks
1301 |             [ "////",
1302 |               "Foo",
1303 |               "////",
1304 |               "",
1305 |               "Bar"
1306 |             ]
1307 |         p
1308 |           `shouldBe` [ Paragraph
1309 |                          [Comment (BlockComment ["Foo"])]
1310 |                          (MarkupLine "Bar" :| [])
1311 |                      ],
1312 |       testCase "empty block comment before paragraph" $ do
1313 |         p <-
1314 |           parseBlocks
1315 |             [ "////",
1316 |               "////",
1317 |               "Foo"
1318 |             ]
1319 |         p
1320 |           `shouldBe` [ Paragraph
1321 |                          [Comment (BlockComment [])]
1322 |                          (MarkupLine "Foo" :| [])
1323 |                      ],
1324 |       testCase "empty line comment before paragraph" $ do
1325 |         p <-
1326 |           parseBlocks
1327 |             [ "//",
1328 |               "Foo"
1329 |             ]
1330 |         p
1331 |           `shouldBe` [ Paragraph
1332 |                          [Comment (LineCommentSequence ("" :| []))]
1333 |                          (MarkupLine "Foo" :| [])
1334 |                      ],
1335 |       testCase "block comment with multiple pseudo-paragraphs" $ do
1336 |         p <-
1337 |           parseBlocks
1338 |             [ "////",
1339 |               "Foo",
1340 |               "",
1341 |               "Bar",
1342 |               "////",
1343 |               "Baz"
1344 |             ]
1345 |         p
1346 |           `shouldBe` [ Paragraph
1347 |                          [Comment (BlockComment ["Foo", "", "Bar"])]
1348 |                          (MarkupLine "Baz" :| [])
1349 |                      ],
1350 |       testCase "line comment sequence before paragraph" $ do
1351 |         p <-
1352 |           parseBlocks
1353 |             [ "//Foo",
1354 |               "// Bar",
1355 |               "Baz"
1356 |             ]
1357 |         p
1358 |           `shouldBe` [ Paragraph
1359 |                          [Comment (LineCommentSequence ("Foo" :| [" Bar"]))]
1360 |                          (MarkupLine "Baz" :| [])
1361 |                      ],
1362 |       testCase "line comment inside paragraph" $ do
1363 |         p <-
1364 |           parseBlocks
1365 |             [ "Foo",
1366 |               "// Bar",
1367 |               "Baz"
1368 |             ]
1369 |         p
1370 |           `shouldBe` [ Paragraph
1371 |                          []
1372 |                          (MarkupLine "Foo" :| [CommentLine " Bar", MarkupLine "Baz"])
1373 |                      ],
1374 |       testCase "line comment inside paragraph and after paragraph" $ do
1375 |         p <-
1376 |           parseBlocks
1377 |             [ "Foo",
1378 |               "// Bar",
1379 |               "Baz",
1380 |               "//Qux"
1381 |             ]
1382 |         p
1383 |           `shouldBe` [ Paragraph
1384 |                          []
1385 |                          ( MarkupLine "Foo"
1386 |                              :| [ CommentLine " Bar",
1387 |                                   MarkupLine "Baz",
1388 |                                   CommentLine "Qux"
1389 |                                 ]
1390 |                          )
1391 |                      ],
1392 |       testCase "line comment sequence before section header" $ do
1393 |         p <-
1394 |           parseBlocks
1395 |             [ "//Foo",
1396 |               "// Bar",
1397 |               "== Baz"
1398 |             ]
1399 |         p
1400 |           `shouldBe` [ SectionHeader
1401 |                          [Comment (LineCommentSequence ("Foo" :| [" Bar"]))]
1402 |                          1
1403 |                          (MarkupLine "Baz" :| [])
1404 |                      ],
1405 |       testCase "block comment followed by line comment sequence" $ do
1406 |         p <-
1407 |           parseBlocks
1408 |             [ "////",
1409 |               "Foo",
1410 |               "",
1411 |               "////",
1412 |               "//Bar",
1413 |               "Baz"
1414 |             ]
1415 |         p
1416 |           `shouldBe` [ Paragraph
1417 |                          [ Comment (BlockComment ["Foo", ""]),
1418 |                            Comment (LineCommentSequence ("Bar" :| []))
1419 |                          ]
1420 |                          (MarkupLine "Baz" :| [])
1421 |                      ],
1422 |       testCase "line comment sequence followed by block comment" $ do
1423 |         p <-
1424 |           parseBlocks
1425 |             [ "// Foo",
1426 |               "//Bar",
1427 |               "////",
1428 |               "Baz",
1429 |               "////",
1430 |               "Qux"
1431 |             ]
1432 |         p
1433 |           `shouldBe` [ Paragraph
1434 |                          [ Comment (LineCommentSequence (" Foo" :| ["Bar"])),
1435 |                            Comment (BlockComment ["Baz"])
1436 |                          ]
1437 |                          (MarkupLine "Qux" :| [])
1438 |                      ],
1439 |       testCase "block comment with more than four '/' (slash)" $ do
1440 |         p <-
1441 |           parseBlocks
1442 |             [ "/////",
1443 |               "Foo",
1444 |               "////",
1445 |               "Bar",
1446 |               "////",
1447 |               "/////",
1448 |               "Baz"
1449 |             ]
1450 |         p
1451 |           `shouldBe` [ Paragraph
1452 |                          [ Comment (BlockComment ["Foo", "////", "Bar", "////"])
1453 |                          ]
1454 |                          (MarkupLine "Baz" :| [])
1455 |                      ],
1456 |       testCase "dangling non-closed block comment" $ do
1457 |         p <-
1458 |           parseBlocks
1459 |             [ "////",
1460 |               "Foo",
1461 |               "",
1462 |               "Bar"
1463 |             ]
1464 |         p
1465 |           `shouldBe` [ DanglingBlockPrefix
1466 |                          [Comment (BlockComment ["Foo", "", "Bar"])]
1467 |                      ],
1468 |       testCase "bad block comment opening, with three '/' (slashes)" $ do
1469 |         p <-
1470 |           parseBlocks
1471 |             [ "///",
1472 |               "Foo",
1473 |               "////"
1474 |             ]
1475 |         p
1476 |           `shouldBe` [ Paragraph [] (MarkupLine "///" :| [MarkupLine "Foo"]),
1477 |                        DanglingBlockPrefix [Comment (BlockComment [])]
1478 |                      ],
1479 |       testCase "bad line comment, with three '/' (slashes)" $ do
1480 |         p <-
1481 |           parseBlocks
1482 |             [ "///Foo",
1483 |               "Bar"
1484 |             ]
1485 |         p
1486 |           `shouldBe` [Paragraph [] (MarkupLine "///Foo" :| [MarkupLine "Bar"])],
1487 |       testCase "bad block comment opening, preceded by space" $ do
1488 |         p <-
1489 |           parseBlocks
1490 |             [ " ////",
1491 |               "Foo",
1492 |               "////"
1493 |             ]
1494 |         p
1495 |           `shouldBe` [ Paragraph [] (MarkupLine " ////" :| [MarkupLine "Foo"]),
1496 |                        DanglingBlockPrefix [Comment (BlockComment [])]
1497 |                      ],
1498 |       testCase "bad line comment opening, preceded by space" $ do
1499 |         p <-
1500 |           parseBlocks
1501 |             [ " //Foo",
1502 |               "Bar"
1503 |             ]
1504 |         p
1505 |           `shouldBe` [Paragraph [] (MarkupLine " //Foo" :| [MarkupLine "Bar"])]
1506 |     ]
1507 | 


--------------------------------------------------------------------------------