├── src
    ├── markd
    │   ├── version.cr
    │   ├── parser.cr
    │   ├── rules
    │   │   ├── document.cr
    │   │   ├── paragraph.cr
    │   │   ├── thematic_break.cr
    │   │   ├── item.cr
    │   │   ├── block_quote.cr
    │   │   ├── html_block.cr
    │   │   ├── heading.cr
    │   │   ├── code_block.cr
    │   │   ├── table.cr
    │   │   └── list.cr
    │   ├── utils.cr
    │   ├── mappings
    │   │   ├── decode.cr
    │   │   └── legacy.cr
    │   ├── html_entities.cr
    │   ├── options.cr
    │   ├── node.cr
    │   ├── renderer.cr
    │   ├── rule.cr
    │   ├── parsers
    │   │   ├── block.cr
    │   │   └── inline.cr
    │   └── renderers
    │   │   └── html_renderer.cr
    └── markd.cr
├── .github
    ├── trafico.yml
    └── workflows
    │   ├── release.yml
    │   └── ci.yml
├── shard.yml
├── .gitignore
├── .vscode
    └── launch.json
├── .ameba.yml
├── spec
    ├── fixtures
    │   ├── emoji.txt
    │   ├── alert.txt
    │   ├── regression.txt
    │   ├── smart_punct.txt
    │   ├── gfm-regression.txt
    │   └── gfm-extensions.txt
    ├── markd_spec.cr
    ├── api_spec.cr
    └── spec_helper.cr
├── LICENSE
├── CHANGELOG.md
└── README.md


/src/markd/version.cr:
--------------------------------------------------------------------------------
1 | module Markd
2 |   VERSION = "0.5.0"
3 | end
4 | 


--------------------------------------------------------------------------------
/.github/trafico.yml:
--------------------------------------------------------------------------------
1 | addWipLabel: true
2 | reviewers:
3 |   icyleaf:
4 |     name: "icyleaf"
5 |     color: "#000000"
6 | 


--------------------------------------------------------------------------------
/shard.yml:
--------------------------------------------------------------------------------
 1 | name: markd
 2 | version: 0.5.0
 3 | 
 4 | authors:
 5 |   - icyleaf <icyleaf.cn@gmail.com>
 6 | 
 7 | crystal: ">= 0.36.1, < 2.0.0"
 8 | 
 9 | license: MIT
10 | 


--------------------------------------------------------------------------------
/src/markd/parser.cr:
--------------------------------------------------------------------------------
 1 | module Markd
 2 |   module Parser
 3 |     def self.parse(source : String, options = Options.new)
 4 |       Block.parse(source, options)
 5 |     end
 6 |   end
 7 | end
 8 | 
 9 | require "./parsers/*"
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /doc/
 2 | /lib/
 3 | /bin/
 4 | /.shards/
 5 | /src/main.cr
 6 | 
 7 | # Libraries don't need dependency lock
 8 | # Dependencies will be locked in application that uses them
 9 | /shard.lock
10 | 
11 | # vscode
12 | /.history/
13 | /.vscode/settings.json
14 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.2.0",
 3 |   "configurations": [
 4 |     {
 5 |       "type": "lldb",
 6 |       "request": "launch",
 7 |       "name": "Launch",
 8 |       "program": "${workspaceRoot}/bin/main",
 9 |       "args": [],
10 |       "cwd": "${workspaceRoot}"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/.ameba.yml:
--------------------------------------------------------------------------------
 1 | Metrics/CyclomaticComplexity:
 2 |   Excluded:
 3 |     - spec/**/*
 4 |     - src/markd/utils.cr
 5 |     - src/markd/rules/heading.cr
 6 |     - src/markd/rules/list.cr
 7 |     - src/markd/parsers/inline.cr
 8 |     - src/markd/parsers/block.cr
 9 |     - src/markd/renderer.cr
10 | 
11 | Naming/BlockParameterName:
12 |   Enabled: false
13 | 
14 | Style/ParenthesesAroundCondition:
15 |   Enabled: true
16 |   AllowSafeAssignment: true
17 | 
18 | Lint/NotNil:
19 |   Excluded:
20 |     - src/markd/parsers/inline.cr
21 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy new release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v*"
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout
13 |       uses: actions/checkout@master
14 | 
15 |     - name: Create Release
16 |       id: create_release
17 |       uses: actions/create-release@v1
18 |       env:
19 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
20 |       with:
21 |         tag_name: ${{ github.ref }}
22 |         release_name: Release ${{ github.ref }}
23 |         draft: false
24 |         prerelease: false
25 | 
26 | 


--------------------------------------------------------------------------------
/src/markd/rules/document.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct Document
 3 |     include Rule
 4 | 
 5 |     def match(parser : Parser, container : Node) : MatchValue
 6 |       MatchValue::None
 7 |     end
 8 | 
 9 |     def continue(parser : Parser, container : Node) : ContinueStatus
10 |       ContinueStatus::Continue
11 |     end
12 | 
13 |     def token(parser : Parser, container : Node) : Nil
14 |       # do nothing
15 |     end
16 | 
17 |     def can_contain?(type : Node::Type) : Bool
18 |       !type.item?
19 |     end
20 | 
21 |     def accepts_lines? : Bool
22 |       false
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/src/markd/utils.cr:
--------------------------------------------------------------------------------
 1 | require "json"
 2 | 
 3 | module Markd
 4 |   module Utils
 5 |     def self.timer(label : String, measure_time : Bool, &)
 6 |       return yield unless measure_time
 7 | 
 8 |       start_time = Time.utc
 9 |       yield
10 | 
11 |       puts "#{label}: #{(Time.utc - start_time).total_milliseconds}ms"
12 |     end
13 | 
14 |     DECODE_ENTITIES_REGEX = Regex.new("\\\\" + Rule::ESCAPABLE_STRING, Regex::Options::IGNORE_CASE)
15 | 
16 |     def self.decode_entities_string(text : String) : String
17 |       HTML.decode_entities(text).gsub(DECODE_ENTITIES_REGEX, &.[1].to_s)
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/src/markd/mappings/decode.cr:
--------------------------------------------------------------------------------
 1 | module Markd::HTMLEntities
 2 |   DECODE_MAPPINGS = {
 3 |       0 => 65533,
 4 |     128 => 8364,
 5 |     130 => 8218,
 6 |     131 => 402,
 7 |     132 => 8222,
 8 |     133 => 8230,
 9 |     134 => 8224,
10 |     135 => 8225,
11 |     136 => 710,
12 |     137 => 8240,
13 |     138 => 352,
14 |     139 => 8249,
15 |     140 => 338,
16 |     142 => 381,
17 |     145 => 8216,
18 |     146 => 8217,
19 |     147 => 8220,
20 |     148 => 8221,
21 |     149 => 8226,
22 |     150 => 8211,
23 |     151 => 8212,
24 |     152 => 732,
25 |     153 => 8482,
26 |     154 => 353,
27 |     155 => 8250,
28 |     156 => 339,
29 |     158 => 382,
30 |     159 => 376,
31 |   }
32 | end
33 | 


--------------------------------------------------------------------------------
/spec/fixtures/emoji.txt:
--------------------------------------------------------------------------------
 1 | ## Emoji
 2 | 
 3 | ```````````````````````````````` example emoji
 4 | :100:
 5 | .
 6 | <p>💯</p>
 7 | ````````````````````````````````
 8 | 
 9 | ```````````````````````````````` example emoji
10 | :gb:
11 | .
12 | <p>🇬🇧</p>
13 | ````````````````````````````````
14 | 
15 | ```````````````````````````````` example emoji
16 | :people_holding_hands:
17 | .
18 | <p>🧑‍🤝‍🧑</p>
19 | ````````````````````````````````
20 | 
21 | ```````````````````````````````` example emoji
22 | :scotland:
23 | .
24 | <p>🏴󠁧󠁢󠁳󠁣󠁴󠁿</p>
25 | ````````````````````````````````
26 | 
27 | ```````````````````````````````` example emoji
28 | :emoji_doesnt_exist:
29 | .
30 | <p>:emoji_doesnt_exist:</p>
31 | ````````````````````````````````
32 | 
33 | ```````````````````````````````` example emoji
34 | :family_man_boy_boy:
35 | .
36 | <p>👨‍👦‍👦</p>
37 | ````````````````````````````````
38 | 


--------------------------------------------------------------------------------
/src/markd/rules/paragraph.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct Paragraph
 3 |     include Rule
 4 | 
 5 |     def match(parser : Parser, container : Node) : MatchValue
 6 |       MatchValue::None
 7 |     end
 8 | 
 9 |     def continue(parser : Parser, container : Node) : ContinueStatus
10 |       parser.blank ? ContinueStatus::Stop : ContinueStatus::Continue
11 |     end
12 | 
13 |     def token(parser : Parser, container : Node) : Nil
14 |       has_reference_defs = false
15 | 
16 |       while container.text[0]? == '[' &&
17 |             (pos = parser.inline_lexer.reference(container.text, parser.refmap)) && pos > 0
18 |         container.text = container.text.byte_slice(pos)
19 |         has_reference_defs = true
20 |       end
21 | 
22 |       container.unlink if has_reference_defs && container.text.each_char.all? &.ascii_whitespace?
23 |     end
24 | 
25 |     def can_contain?(type)
26 |       false
27 |     end
28 | 
29 |     def accepts_lines? : Bool
30 |       true
31 |     end
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/spec/markd_spec.cr:
--------------------------------------------------------------------------------
 1 | require "./spec_helper"
 2 | 
 3 | # Commonmark spec examples
 4 | describe_spec("fixtures/spec.txt")
 5 | 
 6 | # Smart punctuation examples
 7 | describe_spec("fixtures/smart_punct.txt", smart: true)
 8 | 
 9 | # Regression examples
10 | describe_spec("fixtures/regression.txt")
11 | 
12 | describe_spec("fixtures/emoji.txt")
13 | 
14 | describe_spec("fixtures/gfm-spec.txt", gfm: true)
15 | 
16 | describe_spec("fixtures/gfm-extensions.txt", gfm: true)
17 | 
18 | describe_spec("fixtures/gfm-regression.txt", gfm: true)
19 | 
20 | # Alert spec examples
21 | describe_spec("fixtures/alert.txt", gfm: true)
22 | 
23 | describe Markd do
24 |   # Thanks Ryan Westlund <rlwestlund@gmail.com> feedback via email.
25 |   it "should escape unsafe html" do
26 |     raw = %Q(```"><script>window.location="https://footbar.com"</script>\n```)
27 |     html = %Q(<pre><code class="language-&quot;&gt;&lt;script&gt;window.location=&quot;https://footbar.com&quot;&lt;/script&gt;"></code></pre>\n)
28 | 
29 |     Markd.to_html(raw).should eq(html)
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/spec/api_spec.cr:
--------------------------------------------------------------------------------
 1 | require "spec"
 2 | require "../src/markd"
 3 | 
 4 | describe Markd::Options do
 5 |   describe "#base_url" do
 6 |     it "it disabled by default" do
 7 |       options = Markd::Options.new
 8 |       Markd.to_html("[foo](bar)", options).should eq %(<p><a href="bar">foo</a></p>\n)
 9 |       Markd.to_html("![](bar)", options).should eq %(<p><img src="bar" alt="" /></p>\n)
10 |     end
11 | 
12 |     it "absolutizes relative urls" do
13 |       options = Markd::Options.new
14 |       options.base_url = URI.parse("http://example.com")
15 |       Markd.to_html("[foo](bar)", options).should eq %(<p><a href="http://example.com/bar">foo</a></p>\n)
16 |       Markd.to_html("[foo](https://example.com/baz)", options).should eq %(<p><a href="https://example.com/baz">foo</a></p>\n)
17 |       Markd.to_html("![](bar)", options).should eq %(<p><img src="http://example.com/bar" alt="" /></p>\n)
18 |       Markd.to_html("![](https://example.com/baz)", options).should eq %(<p><img src="https://example.com/baz" alt="" /></p>\n)
19 |     end
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/src/markd/rules/thematic_break.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct ThematicBreak
 3 |     include Rule
 4 | 
 5 |     THEMATIC_BREAK = /^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$/
 6 | 
 7 |     def match(parser : Parser, container : Node) : MatchValue
 8 |       if !parser.indented && parser.line[parser.next_nonspace..-1].match(THEMATIC_BREAK)
 9 |         parser.close_unmatched_blocks
10 |         parser.add_child(Node::Type::ThematicBreak, parser.next_nonspace)
11 |         parser.advance_offset(parser.line.size - parser.offset, false)
12 |         MatchValue::Leaf
13 |       else
14 |         MatchValue::None
15 |       end
16 |     end
17 | 
18 |     def continue(parser : Parser, container : Node) : ContinueStatus
19 |       # a thematic break can never container > 1 line, so fail to match:
20 |       ContinueStatus::Stop
21 |     end
22 | 
23 |     def token(parser : Parser, container : Node) : Nil
24 |       # do nothing
25 |     end
26 | 
27 |     def can_contain?(type)
28 |       false
29 |     end
30 | 
31 |     def accepts_lines? : Bool
32 |       false
33 |     end
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/src/markd/rules/item.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct Item
 3 |     include Rule
 4 | 
 5 |     def match(parser : Parser, container : Node) : MatchValue
 6 |       # match and parse in Rule::List
 7 |       MatchValue::None
 8 |     end
 9 | 
10 |     def continue(parser : Parser, container : Node) : ContinueStatus
11 |       indent_offset = container.data["marker_offset"].as(Int32) + container.data["padding"].as(Int32)
12 | 
13 |       if parser.blank
14 |         if container.first_child?
15 |           parser.advance_next_nonspace
16 |         else
17 |           # Blank line after empty list item
18 |           return ContinueStatus::Stop
19 |         end
20 |       elsif parser.indent >= indent_offset
21 |         parser.advance_offset(indent_offset, true)
22 |       else
23 |         return ContinueStatus::Stop
24 |       end
25 | 
26 |       ContinueStatus::Continue
27 |     end
28 | 
29 |     def token(parser : Parser, container : Node) : Nil
30 |       # do nothing
31 |     end
32 | 
33 |     def can_contain?(type : Node::Type)
34 |       !type.item?
35 |     end
36 | 
37 |     def accepts_lines? : Bool
38 |       false
39 |     end
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017-present icyleaf
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/markd.cr:
--------------------------------------------------------------------------------
 1 | require "./markd/html_entities"
 2 | require "./markd/utils"
 3 | require "./markd/node"
 4 | require "./markd/rule"
 5 | require "./markd/options"
 6 | require "./markd/renderer"
 7 | require "./markd/parser"
 8 | require "./markd/version"
 9 | 
10 | module Markd
11 |   {% if @top_level.has_constant?("Tartrazine") %}
12 |     def self.to_html(
13 |       source : String,
14 |       options = Options.new,
15 |       *,
16 |       formatter : Tartrazine::Formatter | String = "catppuccin-macchiato",
17 |     )
18 |       return "" if source.empty?
19 | 
20 |       if formatter.is_a?(String)
21 |         formatter = Tartrazine::Html.new(
22 |           theme: Tartrazine.theme(formatter),
23 |           line_numbers: true,
24 |           standalone: true,
25 |         )
26 |       end
27 | 
28 |       document = Parser.parse(source, options)
29 |       renderer = HTMLRenderer.new(options)
30 |       renderer.render(document, formatter)
31 |     end
32 |   {% else %}
33 |     def self.to_html(
34 |       source : String,
35 |       options = Options.new,
36 |       formatter = nil,
37 |     )
38 |       return "" if source.empty?
39 | 
40 |       document = Parser.parse(source, options)
41 |       renderer = HTMLRenderer.new(options)
42 |       renderer.render(document, formatter)
43 |     end
44 |   {% end %}
45 | end
46 | 


--------------------------------------------------------------------------------
/src/markd/rules/block_quote.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct BlockQuote
 3 |     include Rule
 4 | 
 5 |     def match(parser : Parser, container : Node) : MatchValue
 6 |       if match?(parser)
 7 |         seek(parser)
 8 |         parser.close_unmatched_blocks
 9 |         if parser.gfm? && (match = parser.line.match(Rule::ADMONITION_START))
10 |           node = parser.add_child(Node::Type::Alert, parser.next_nonspace)
11 |           # This is an alert
12 |           node.data["alert"] = match[1]
13 |           node.data["title"] = (match[2]? && !match[2].strip.empty?) ? match[2].strip : match[1]
14 |           parser.advance_offset(parser.line.size, false)
15 |         else
16 |           parser.add_child(Node::Type::BlockQuote, parser.next_nonspace)
17 |         end
18 | 
19 |         MatchValue::Container
20 |       else
21 |         MatchValue::None
22 |       end
23 |     end
24 | 
25 |     def continue(parser : Parser, container : Node) : ContinueStatus
26 |       if match?(parser)
27 |         seek(parser)
28 |         ContinueStatus::Continue
29 |       else
30 |         ContinueStatus::Stop
31 |       end
32 |     end
33 | 
34 |     def token(parser : Parser, container : Node) : Nil
35 |       # do nothing
36 |     end
37 | 
38 |     def can_contain?(type : Node::Type) : Bool
39 |       !type.item?
40 |     end
41 | 
42 |     def accepts_lines? : Bool
43 |       false
44 |     end
45 | 
46 |     private def match?(parser)
47 |       !parser.indented && parser.line[parser.next_nonspace]? == '>'
48 |     end
49 | 
50 |     private def seek(parser : Parser)
51 |       parser.advance_next_nonspace
52 |       parser.advance_offset(1, false)
53 | 
54 |       if space_or_tab?(parser.line[parser.offset]?)
55 |         parser.advance_offset(1, true)
56 |       end
57 |     end
58 |   end
59 | end
60 | 


--------------------------------------------------------------------------------
/src/markd/rules/html_block.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct HTMLBlock
 3 |     include Rule
 4 | 
 5 |     def match(parser : Parser, container : Node) : MatchValue
 6 |       if !parser.indented && parser.line[parser.next_nonspace]? == '<'
 7 |         text = parser.line[parser.next_nonspace..-1]
 8 |         block_type_size = Rule::HTML_BLOCK_OPEN.size - 1
 9 | 
10 |         Rule::HTML_BLOCK_OPEN.each_with_index do |regex, index|
11 |           if text.match(regex) &&
12 |              (index < block_type_size || !container.type.paragraph?)
13 |             parser.close_unmatched_blocks
14 |             # We don't adjust parser.offset;
15 |             # spaces are part of the HTML block:
16 |             node = parser.add_child(Node::Type::HTMLBlock, parser.offset)
17 |             node.data["html_block_type"] = index
18 | 
19 |             return MatchValue::Leaf
20 |           end
21 |         end
22 |       end
23 | 
24 |       MatchValue::None
25 |     end
26 | 
27 |     def continue(parser : Parser, container : Node) : ContinueStatus
28 |       (parser.blank && {5, 6}.includes?(container.data["html_block_type"])) ? ContinueStatus::Stop : ContinueStatus::Continue
29 |     end
30 | 
31 |     def token(parser : Parser, container : Node) : Nil
32 |       text = container.text.gsub(/(\n *)+$/, "")
33 | 
34 |       if parser.tagfilter?
35 |         text = self.class.escape_disallowed_html(text)
36 |       end
37 | 
38 |       container.text = text
39 |     end
40 | 
41 |     def can_contain?(type)
42 |       false
43 |     end
44 | 
45 |     def accepts_lines? : Bool
46 |       true
47 |     end
48 | 
49 |     def self.escape_disallowed_html(text : String) : String
50 |       String.build do |string|
51 |         pos = 0
52 | 
53 |         text.scan(/<\/?\s*(#{GFM_DISALLOWED_HTML_TAGS.join('|')})\b/i) do |match|
54 |           start = text.index(match[0], pos)
55 |           next if start.nil?
56 | 
57 |           string << text[pos...start] << "&lt;#{match[0][1..]}"
58 |           pos = start + match[0].size
59 |         end
60 | 
61 |         string << text[pos..-1]
62 |       end
63 |     end
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   ameba_linter:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         ameba-version: [v1.6.4]
17 |     name: Ameba ${{ matrix.ameba-version }} linter check
18 |     steps:
19 |       - name: Install latest Crystal
20 |         uses: crystal-lang/install-crystal@v1
21 |       - name: Check out repository code
22 |         uses: actions/checkout@master
23 |       - name: Install dependencies
24 |         run: shards install --without-development
25 |       - name: Cache Ameba binary
26 |         id: cache-ameba
27 |         uses: actions/cache@v3
28 |         with:
29 |           path: bin/ameba
30 |           key: ${{ runner.os }}-ameba-${{ matrix.ameba-version }}
31 | 
32 |       - name: Build Ameba
33 |         if: steps.cache-ameba.outputs.cache-hit != 'true'
34 |         run: |
35 |           git clone --branch ${{ matrix.ameba-version }} --single-branch https://github.com/crystal-ameba/ameba.git
36 |           cd ameba
37 |           make bin/ameba CRFLAGS='-Dpreview_mt --no-debug'
38 |           mkdir -p ../bin
39 |           mv bin/ameba ../bin/ameba
40 |           cd ..
41 |           rm -rf ameba
42 | 
43 |       - name: Run Ameba Linter
44 |         run: bin/ameba -c .ameba.yml
45 |   specs:
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         include:
50 |           - { os: ubuntu-latest, crystal: latest }
51 |           - { os: ubuntu-latest, crystal: nightly }
52 |           - { os: macos-latest }
53 |           - { os: windows-latest }
54 |     runs-on: ${{matrix.os}}
55 | 
56 |     name: Crystal ${{ matrix.crystal }} specs on ${{ matrix.os }}
57 |     steps:
58 |       - name: Checkout
59 |         uses: actions/checkout@master
60 | 
61 |       - name: Install Crystal
62 |         uses: crystal-lang/install-crystal@v1
63 |         with:
64 |           crystal: ${{ matrix.crystal }}
65 | 
66 |       - name: Install dependencies
67 |         run: shards install --without-development
68 | 
69 |       - name: Run specs
70 |         run: crystal spec --error-on-warnings --error-trace
71 | 


--------------------------------------------------------------------------------
/spec/fixtures/alert.txt:
--------------------------------------------------------------------------------
 1 | ## Alert
 2 | 
 3 | Alerts are a Markdown extension based on the blockquote syntax that
 4 | you can use to emphasize critical information. On GitHub, they are
 5 | displayed with distinctive colors and icons to indicate the significance
 6 | of the content.
 7 | 
 8 | Use alerts only when they are crucial for user success and limit them
 9 | to one or two per article to prevent overloading the reader. Additionally,
10 | you should avoid placing alerts consecutively. Alerts cannot be nested
11 | within other elements.
12 | 
13 | To add an alert, use a special blockquote line specifying the alert type
14 | and an optional title, followed by the alert information in a standard
15 | blockquote.
16 | 
17 | There are five types of alert: 
18 | 
19 | * NOTE
20 | * TIP
21 | * IMPORTANT
22 | * WARNING
23 | * CAUTION
24 | 
25 | ```````````````````````````````` example alert
26 | > [!NOTE]
27 | > Useful information that users should know, even when skimming content.
28 | .
29 | <div class="alert alert-note"><p class="alert-title">NOTE</p>
30 | <p>Useful information that users should know, even when skimming content.</p>
31 | </div>
32 | ````````````````````````````````
33 | 
34 | An optional title can be added after the closing bracket.
35 | 
36 | ```````````````````````````````` example alert
37 | > [!NOTE] What is a note?
38 | > Useful information that users should know, even when skimming content.
39 | .
40 | <div class="alert alert-note"><p class="alert-title">What is a note?</p>
41 | <p>Useful information that users should know, even when skimming content.</p>
42 | </div>
43 | ````````````````````````````````
44 | 
45 | Empty spaces after the brackets are ignored.
46 | 
47 | ```````````````````````````````` example alert
48 | > [!NOTE]  
49 | > Useful information that users should know, even when skimming content.
50 | .
51 | <div class="alert alert-note"><p class="alert-title">NOTE</p>
52 | <p>Useful information that users should know, even when skimming content.</p>
53 | </div>
54 | ````````````````````````````````
55 | 
56 | Alert-like block quotes which don't use one of the five listed
57 | alert types are just block quotes.
58 | 
59 | ```````````````````````````````` example alert
60 | > [!FOO]  
61 | > Not a real alert.
62 | .
63 | <blockquote>
64 | <p>[!FOO]<br />
65 | Not a real alert.</p>
66 | </blockquote>
67 | ````````````````````````````````
68 | 


--------------------------------------------------------------------------------
/src/markd/rules/heading.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct Heading
 3 |     include Rule
 4 | 
 5 |     ATX_HEADING_MARKER    = /^\#{1,6}(?:[ \t]+|$)/
 6 |     SETEXT_HEADING_MARKER = /^(?:=+|-+)[ \t]*$/
 7 | 
 8 |     def match(parser : Parser, container : Node) : MatchValue
 9 |       if (match = match?(parser, ATX_HEADING_MARKER))
10 |         # ATX Heading matched
11 |         parser.advance_next_nonspace
12 |         parser.advance_offset(match[0].size, false)
13 |         parser.close_unmatched_blocks
14 | 
15 |         container = parser.add_child(Node::Type::Heading, parser.next_nonspace)
16 |         container.data["level"] = match[0].strip.size
17 |         container.text = parser.line[parser.offset..-1]
18 |           .sub(/^[ \t]*#+[ \t]*$/, "")
19 |           .sub(/[ \t]+#+[ \t]*$/, "")
20 | 
21 |         parser.advance_offset(parser.line.size - parser.offset)
22 | 
23 |         MatchValue::Leaf
24 |       elsif (match = match?(parser, SETEXT_HEADING_MARKER)) &&
25 |             container.type.paragraph? && (parent = container.parent?) &&
26 |             !parent.type.block_quote?
27 |         # Setext Heading matched
28 |         parser.close_unmatched_blocks
29 | 
30 |         while container.text[0]? == '[' &&
31 |               (pos = parser.inline_lexer.reference(container.text, parser.refmap)) && pos > 0
32 |           container.text = container.text.byte_slice(pos)
33 |         end
34 |         return MatchValue::None if container.text.empty?
35 | 
36 |         heading = Node.new(Node::Type::Heading)
37 |         heading.source_pos = container.source_pos
38 |         heading.data["level"] = match[0][0] == '=' ? 1 : 2
39 |         heading.text = container.text
40 | 
41 |         container.insert_after(heading)
42 |         container.unlink
43 | 
44 |         parser.tip = heading
45 |         parser.advance_offset(parser.line.size - parser.offset, false)
46 | 
47 |         MatchValue::Leaf
48 |       else
49 |         MatchValue::None
50 |       end
51 |     end
52 | 
53 |     def token(parser : Parser, container : Node) : Nil
54 |       # do nothing
55 |     end
56 | 
57 |     def continue(parser : Parser, container : Node) : ContinueStatus
58 |       # a heading can never container > 1 line, so fail to match
59 |       ContinueStatus::Stop
60 |     end
61 | 
62 |     def can_contain?(type)
63 |       false
64 |     end
65 | 
66 |     def accepts_lines? : Bool
67 |       false
68 |     end
69 | 
70 |     private def match?(parser : Parser, regex : Regex) : Regex::MatchData?
71 |       match = parser.line[parser.next_nonspace..-1].match(regex)
72 |       !parser.indented && match ? match : nil
73 |     end
74 |   end
75 | end
76 | 


--------------------------------------------------------------------------------
/src/markd/html_entities.cr:
--------------------------------------------------------------------------------
 1 | require "./mappings/*"
 2 | 
 3 | module Markd::HTMLEntities
 4 |   module ExtendToHTML
 5 |     def decode_entities(source : String)
 6 |       Decoder.decode(source)
 7 |     end
 8 | 
 9 |     def decode_entity(source : String)
10 |       Decoder.decode_entity(source)
11 |     end
12 | 
13 |     def encode_entities(source)
14 |       Encoder.encode(source)
15 |     end
16 |   end
17 | 
18 |   module Decoder
19 |     REGEX = /&(?:([a-zA-Z0-9]{2,32};)|(#[xX][\da-fA-F]+;?|#\d+;?))/
20 | 
21 |     def self.decode(source)
22 |       source.gsub(REGEX) do |chars|
23 |         decode_entity(chars[1..-2])
24 |       end
25 |     end
26 | 
27 |     def self.decode_entity(chars)
28 |       if chars[0] == '#'
29 |         if chars.size > 1
30 |           if chars[1].downcase == 'x'
31 |             if chars.size > 2
32 |               return decode_codepoint(chars[2..-1].to_i(16))
33 |             end
34 |           else
35 |             return decode_codepoint(chars[1..-1].to_i(10))
36 |           end
37 |         end
38 |       else
39 |         entities_key = chars[0..-1]
40 |         if (resolved_entity = Markd::HTMLEntities::ENTITIES_MAPPINGS[entities_key]?)
41 |           return resolved_entity
42 |         end
43 |       end
44 | 
45 |       "&#{chars};"
46 |     end
47 | 
48 |     def self.decode_codepoint(codepoint)
49 |       return "\uFFFD" if codepoint >= 0xD800 && codepoint <= 0xDFFF || codepoint > 0x10FFF
50 | 
51 |       if (decoded = Markd::HTMLEntities::DECODE_MAPPINGS[codepoint]?)
52 |         codepoint = decoded
53 |       end
54 | 
55 |       codepoint.unsafe_chr
56 |     end
57 |   end
58 | 
59 |   module Encoder
60 |     ENTITIES_REGEX = Regex.union(HTMLEntities::ENTITIES_MAPPINGS.values)
61 |     ASTRAL_REGEX   = Regex.new("[\xED\xA0\x80-\xED\xAF\xBF][\xED\xB0\x80-\xED\xBF\xBF]")
62 |     ENCODE_REGEX   = /[^\x{20}-\x{7E}]/
63 | 
64 |     def self.encode(source : String)
65 |       source.gsub(ENTITIES_REGEX) { |chars| encode_entities(chars) }
66 |         .gsub(ASTRAL_REGEX) { |chars| encode_astral(chars) }
67 |         .gsub(ENCODE_REGEX) { |chars| encode_extend(chars) }
68 |     end
69 | 
70 |     private def self.encode_entities(chars : String)
71 |       entity = HTMLEntities::ENTITIES_MAPPINGS.key(chars)
72 |       "&#{entity};"
73 |     end
74 | 
75 |     private def self.encode_astral(chars : String)
76 |       high = chars.char_at(0).ord
77 |       low = chars.char_at(0).ord
78 |       codepoint = (high - 0xD800) * -0x400 + low - 0xDC00 + 0x10000
79 | 
80 |       "&#x#{codepoint.to_s(16).upcase};"
81 |     end
82 | 
83 |     private def self.encode_extend(char : String)
84 |       "&#x#{char[0].ord.to_s(16).upcase};"
85 |     end
86 |   end
87 | end
88 | 
89 | module HTML
90 |   extend Markd::HTMLEntities::ExtendToHTML
91 | end
92 | 


--------------------------------------------------------------------------------
/src/markd/rules/code_block.cr:
--------------------------------------------------------------------------------
 1 | module Markd::Rule
 2 |   struct CodeBlock
 3 |     include Rule
 4 | 
 5 |     CODE_FENCE         = /^`{3,}(?!.*`)|^~{3,}/
 6 |     CLOSING_CODE_FENCE = /^(?:`{3,}|~{3,})(?= *$)/
 7 | 
 8 |     def match(parser : Parser, container : Node) : MatchValue
 9 |       if !parser.indented &&
10 |          (match = parser.line[parser.next_nonspace..-1].match(CODE_FENCE))
11 |         # fenced
12 |         fence_length = match[0].size
13 | 
14 |         parser.close_unmatched_blocks
15 |         node = parser.add_child(Node::Type::CodeBlock, parser.next_nonspace)
16 |         node.fenced = true
17 |         node.fence_length = fence_length
18 |         node.fence_char = match[0][0].to_s
19 |         node.fence_offset = parser.indent
20 | 
21 |         parser.advance_next_nonspace
22 |         parser.advance_offset(fence_length, false)
23 | 
24 |         MatchValue::Leaf
25 |       elsif parser.indented && !parser.blank && (tip = parser.tip) &&
26 |             !tip.type.paragraph?
27 |         # indented
28 |         parser.advance_offset(Rule::CODE_INDENT, true)
29 |         parser.close_unmatched_blocks
30 |         parser.add_child(Node::Type::CodeBlock, parser.offset)
31 | 
32 |         MatchValue::Leaf
33 |       else
34 |         MatchValue::None
35 |       end
36 |     end
37 | 
38 |     def continue(parser : Parser, container : Node) : ContinueStatus
39 |       line = parser.line
40 |       indent = parser.indent
41 |       if container.fenced?
42 |         # fenced
43 |         match = indent <= 3 &&
44 |                 line[parser.next_nonspace]? == container.fence_char[0] &&
45 |                 line[parser.next_nonspace..-1].match(CLOSING_CODE_FENCE)
46 | 
47 |         if match && match.as(Regex::MatchData)[0].size >= container.fence_length
48 |           # closing fence - we're at end of line, so we can return
49 |           parser.token(container, parser.current_line)
50 |           return ContinueStatus::Return
51 |         else
52 |           # skip optional spaces of fence offset
53 |           index = container.fence_offset
54 |           while index > 0 && space_or_tab?(parser.line[parser.offset]?)
55 |             parser.advance_offset(1, true)
56 |             index -= 1
57 |           end
58 |         end
59 |       else
60 |         # indented
61 |         if indent >= Rule::CODE_INDENT
62 |           parser.advance_offset(Rule::CODE_INDENT, true)
63 |         elsif parser.blank
64 |           parser.advance_next_nonspace
65 |         else
66 |           return ContinueStatus::Stop
67 |         end
68 |       end
69 | 
70 |       ContinueStatus::Continue
71 |     end
72 | 
73 |     def token(parser : Parser, container : Node) : Nil
74 |       if container.fenced?
75 |         # fenced
76 |         first_line, _, text = container.text.partition('\n')
77 | 
78 |         container.fence_language = Utils.decode_entities_string(first_line.strip)
79 |         container.text = text
80 |       else
81 |         # indented
82 |         container.text = container.text.gsub(/(\n *)+$/, "\n")
83 |       end
84 |     end
85 | 
86 |     def can_contain?(type)
87 |       false
88 |     end
89 | 
90 |     def accepts_lines? : Bool
91 |       true
92 |     end
93 |   end
94 | end
95 | 


--------------------------------------------------------------------------------
/src/markd/mappings/legacy.cr:
--------------------------------------------------------------------------------
  1 | module Markd::HTMLEntities
  2 |   LEGACY_MAPPINGS = {
  3 |     "Aacute" => '\u00C1',
  4 |     "aacute" => '\u00E1',
  5 |     "Acirc"  => '\u00C2',
  6 |     "acirc"  => '\u00E2',
  7 |     "acute"  => '\u00B4',
  8 |     "AElig"  => '\u00C6',
  9 |     "aelig"  => '\u00E6',
 10 |     "Agrave" => '\u00C0',
 11 |     "agrave" => '\u00E0',
 12 |     "amp"    => '&',
 13 |     "AMP"    => '&',
 14 |     "Aring"  => '\u00C5',
 15 |     "aring"  => '\u00E5',
 16 |     "Atilde" => '\u00C3',
 17 |     "atilde" => '\u00E3',
 18 |     "Auml"   => '\u00C4',
 19 |     "auml"   => '\u00E4',
 20 |     "brvbar" => '\u00A6',
 21 |     "Ccedil" => '\u00C7',
 22 |     "ccedil" => '\u00E7',
 23 |     "cedil"  => '\u00B8',
 24 |     "cent"   => '\u00A2',
 25 |     "copy"   => '\u00A9',
 26 |     "COPY"   => '\u00A9',
 27 |     "curren" => '\u00A4',
 28 |     "deg"    => '\u00B0',
 29 |     "divide" => '\u00F7',
 30 |     "Eacute" => '\u00C9',
 31 |     "eacute" => '\u00E9',
 32 |     "Ecirc"  => '\u00CA',
 33 |     "ecirc"  => '\u00EA',
 34 |     "Egrave" => '\u00C8',
 35 |     "egrave" => '\u00E8',
 36 |     "ETH"    => '\u00D0',
 37 |     "eth"    => '\u00F0',
 38 |     "Euml"   => '\u00CB',
 39 |     "euml"   => '\u00EB',
 40 |     "frac12" => '\u00BD',
 41 |     "frac14" => '\u00BC',
 42 |     "frac34" => '\u00BE',
 43 |     "gt"     => '>',
 44 |     "GT"     => '>',
 45 |     "Iacute" => '\u00CD',
 46 |     "iacute" => '\u00ED',
 47 |     "Icirc"  => '\u00CE',
 48 |     "icirc"  => '\u00EE',
 49 |     "iexcl"  => '\u00A1',
 50 |     "Igrave" => '\u00CC',
 51 |     "igrave" => '\u00EC',
 52 |     "iquest" => '\u00BF',
 53 |     "Iuml"   => '\u00CF',
 54 |     "iuml"   => '\u00EF',
 55 |     "laquo"  => '\u00AB',
 56 |     "lt"     => '<',
 57 |     "LT"     => '<',
 58 |     "macr"   => '\u00AF',
 59 |     "micro"  => '\u00B5',
 60 |     "middot" => '\u00B7',
 61 |     "nbsp"   => '\u00A0',
 62 |     "not"    => '\u00AC',
 63 |     "Ntilde" => '\u00D1',
 64 |     "ntilde" => '\u00F1',
 65 |     "Oacute" => '\u00D3',
 66 |     "oacute" => '\u00F3',
 67 |     "Ocirc"  => '\u00D4',
 68 |     "ocirc"  => '\u00F4',
 69 |     "Ograve" => '\u00D2',
 70 |     "ograve" => '\u00F2',
 71 |     "ordf"   => '\u00AA',
 72 |     "ordm"   => '\u00BA',
 73 |     "Oslash" => '\u00D8',
 74 |     "oslash" => '\u00F8',
 75 |     "Otilde" => '\u00D5',
 76 |     "otilde" => '\u00F5',
 77 |     "Ouml"   => '\u00D6',
 78 |     "ouml"   => '\u00F6',
 79 |     "para"   => '\u00B6',
 80 |     "plusmn" => '\u00B1',
 81 |     "pound"  => '\u00A3',
 82 |     "quot"   => "\"",
 83 |     "QUOT"   => "\"",
 84 |     "raquo"  => '\u00BB',
 85 |     "reg"    => '\u00AE',
 86 |     "REG"    => '\u00AE',
 87 |     "sect"   => '\u00A7',
 88 |     "shy"    => '\u00AD',
 89 |     "sup1"   => '\u00B9',
 90 |     "sup2"   => '\u00B2',
 91 |     "sup3"   => '\u00B3',
 92 |     "szlig"  => '\u00DF',
 93 |     "THORN"  => '\u00DE',
 94 |     "thorn"  => '\u00FE',
 95 |     "times"  => '\u00D7',
 96 |     "Uacute" => '\u00DA',
 97 |     "uacute" => '\u00FA',
 98 |     "Ucirc"  => '\u00DB',
 99 |     "ucirc"  => '\u00FB',
100 |     "Ugrave" => '\u00D9',
101 |     "ugrave" => '\u00F9',
102 |     "uml"    => '\u00A8',
103 |     "Uuml"   => '\u00DC',
104 |     "uuml"   => '\u00FC',
105 |     "Yacute" => '\u00DD',
106 |     "yacute" => '\u00FD',
107 |     "yen"    => '\u00A5',
108 |     "yuml"   => '\u00FF',
109 |   }
110 | end
111 | 


--------------------------------------------------------------------------------
/src/markd/options.cr:
--------------------------------------------------------------------------------
  1 | require "uri"
  2 | 
  3 | module Markd
  4 |   # Markdown rendering options.
  5 |   class Options
  6 |     # Render parsing cost time for reading the source, parsing blocks, and parsing inline.
  7 |     property? time : Bool
  8 | 
  9 |     # Enables GitHub Flavored Markdown support.
 10 |     #
 11 |     # https://github.github.com/gfm/
 12 |     property? gfm : Bool
 13 | 
 14 |     # Not supported for now.
 15 |     property? toc : Bool
 16 | 
 17 |     # If `true`:
 18 |     # - straight quotes will be made curly
 19 |     # - `--` will be changed to an en dash
 20 |     # - `---` will be changed to an em dash
 21 |     # - `...` will be changed to ellipses
 22 |     property? smart : Bool
 23 | 
 24 |     # If `true`, source position information for block-level elements
 25 |     # will be rendered in the `data-sourcepos` attribute (for HTML).
 26 |     property? source_pos : Bool
 27 | 
 28 |     # If `true`, raw HTML will not be passed through to HTML output
 29 |     # (it will be replaced by comments).
 30 |     property? safe : Bool
 31 | 
 32 |     # If `true`, code tags generated by code blocks will have a
 33 |     # prettyprint class added to them, to be used by
 34 |     # [Google code-prettify](https://github.com/google/code-prettify).
 35 |     property? prettyprint : Bool
 36 | 
 37 |     # If `base_url` is not `nil`, it is used to resolve URLs of relative
 38 |     # links. It act's like HTML's `<base href="base_url">` in the context
 39 |     # of a Markdown document.
 40 |     property base_url : URI?
 41 | 
 42 |     # Enables GFM emoji support.
 43 |     #
 44 |     # For example:
 45 |     #
 46 |     # ```
 47 |     # @octocat :+1: This PR looks great - it's ready to merge! :ship:
 48 |     # ```
 49 |     #
 50 |     # Becomes:
 51 |     #
 52 |     # ```
 53 |     # @octocat 👍 This PR looks great - it's ready to merge! 🚢
 54 |     # ```
 55 |     # https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#using-emojis
 56 |     property? emoji : Bool
 57 | 
 58 |     # If `true`, the following HTML tags will be filtered when rendering HTML output:
 59 |     #
 60 |     # * `<title>`
 61 |     # * `<textarea>`
 62 |     # * `<style>`
 63 |     # * `<xmp>`
 64 |     # * `<iframe>`
 65 |     # * `<noembed>`
 66 |     # * `<noframes>`
 67 |     # * `<script>`
 68 |     # * `<plaintext>`
 69 |     #
 70 |     # All other HTML tags are left untouched.
 71 |     property? tagfilter : Bool
 72 | 
 73 |     # If `true`, more autolinks will be detected.
 74 |     # Setting to `false` does not disable autolink support as a whole.
 75 |     property? autolink : Bool
 76 | 
 77 |     def initialize(
 78 |       @time = false,
 79 |       @gfm = false,
 80 |       @toc = false,
 81 |       @smart = false,
 82 |       @source_pos = false,
 83 |       @safe = false,
 84 |       @prettyprint = false,
 85 |       @emoji = false,
 86 |       @tagfilter = false,
 87 |       @autolink = false,
 88 |       @base_url = nil,
 89 |     )
 90 |     end
 91 | 
 92 |     # Deprecated
 93 | 
 94 |     @[Deprecated("Use `#time?` instead.")]
 95 |     getter time
 96 | 
 97 |     @[Deprecated("Use `#gfm?` instead.")]
 98 |     getter gfm
 99 | 
100 |     @[Deprecated("Use `#smart?` instead.")]
101 |     getter smart
102 | 
103 |     @[Deprecated("Use `#source_pos?` instead.")]
104 |     getter source_pos
105 | 
106 |     @[Deprecated("Use `#safe?` instead.")]
107 |     getter safe
108 | 
109 |     @[Deprecated("Use `#prettyprint?` instead.")]
110 |     getter prettyprint
111 |   end
112 | end
113 | 


--------------------------------------------------------------------------------
/spec/fixtures/regression.txt:
--------------------------------------------------------------------------------
  1 | # Regression tests
  2 | 
  3 | Eating a character after a partially consumed tab.
  4 | 
  5 | ```````````````````````````````` example
  6 | * foo
  7 | →bar
  8 | .
  9 | <ul>
 10 | <li>foo
 11 | bar</li>
 12 | </ul>
 13 | ````````````````````````````````
 14 | 
 15 | Type 7 HTML block followed by whitespace (#98).
 16 | 
 17 | ```````````````````````````````` example
 18 | <a>  
 19 | x
 20 | .
 21 | <a>  
 22 | x
 23 | ````````````````````````````````
 24 | 
 25 | h2..h6 raw HTML blocks (jgm/CommonMark#430).
 26 | 
 27 | ```````````````````````````````` example
 28 | <h1>lorem</h1>
 29 | 
 30 | <h2>lorem</h2>
 31 | 
 32 | <h3>lorem</h3>
 33 | 
 34 | <h4>lorem</h4>
 35 | 
 36 | <h5>lorem</h5>
 37 | 
 38 | <h6>lorem</h6>
 39 | .
 40 | <h1>lorem</h1>
 41 | <h2>lorem</h2>
 42 | <h3>lorem</h3>
 43 | <h4>lorem</h4>
 44 | <h5>lorem</h5>
 45 | <h6>lorem</h6>
 46 | ````````````````````````````````
 47 | 
 48 | Issue #109 - tabs after setext header line
 49 | 
 50 | 
 51 | ```````````````````````````````` example
 52 | hi
 53 | --→
 54 | .
 55 | <h2>hi</h2>
 56 | ````````````````````````````````
 57 | 
 58 | Issue #108 - Chinese punctuation not recognized
 59 | 
 60 | ```````````````````````````````` example
 61 | **。**话
 62 | .
 63 | <p>**。**话</p>
 64 | ````````````````````````````````
 65 | 
 66 | Issue jgm/cmark#177 - incorrect emphasis parsing
 67 | 
 68 | ```````````````````````````````` example
 69 | a***b* c*
 70 | .
 71 | <p>a*<em><em>b</em> c</em></p>
 72 | ````````````````````````````````
 73 | 
 74 | Issue jgm/CommonMark#468 - backslash at end of link definition
 75 | 
 76 | 
 77 | ```````````````````````````````` example
 78 | [\]: test
 79 | .
 80 | <p>[]: test</p>
 81 | ````````````````````````````````
 82 | 
 83 | Issue jgm/commonmark.js#121 - punctuation set different
 84 | 
 85 | ```````````````````````````````` example
 86 | ^_test_
 87 | .
 88 | <p>^<em>test</em></p>
 89 | ````````````````````````````````
 90 | 
 91 | Issue #116 - tabs before and after ATX closing heading
 92 | ```````````````````````````````` example
 93 | # foo→#→
 94 | .
 95 | <h1>foo</h1>
 96 | ````````````````````````````````
 97 | 
 98 | commonmark/CommonMark#493 - escaped space not allowed in link destination.
 99 | 
100 | ```````````````````````````````` example
101 | [link](a\ b)
102 | .
103 | <p>[link](a\ b)</p>
104 | ````````````````````````````````
105 | 
106 | Issue #527 - meta tags in inline contexts
107 | 
108 | ```````````````````````````````` example
109 | City:
110 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
111 |   <meta itemprop="name" content="Springfield">
112 | </span>
113 | .
114 | <p>City:
115 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
116 | <meta itemprop="name" content="Springfield">
117 | </span></p>
118 | ````````````````````````````````
119 | 
120 | Double-encoding.
121 | 
122 | ```````````````````````````````` example
123 | [XSS](javascript&amp;colon;alert%28&#039;XSS&#039;%29)
124 | .
125 | <p><a href="javascript&amp;colon;alert('XSS')">XSS</a></p>
126 | ````````````````````````````````
127 | 
128 | Issue commonamrk#517 - script, pre, style close tag without
129 | opener.
130 | 
131 | ```````````````````````````````` example
132 | </script>
133 | 
134 | </pre>
135 | 
136 | </style>
137 | .
138 | </script>
139 | </pre>
140 | </style>
141 | ````````````````````````````````
142 | 
143 | Issue #289.
144 | 
145 | ```````````````````````````````` example
146 | [a](<b) c>
147 | .
148 | <p>[a](&lt;b) c&gt;</p>
149 | ````````````````````````````````
150 | 
151 | icyleaf/markd issue #80
152 | 
153 | ```````````````````````````````` example
154 | 1212121212121
155 | .
156 | <p>1212121212121</p>
157 | ````````````````````````````````
158 | 


--------------------------------------------------------------------------------
/spec/spec_helper.cr:
--------------------------------------------------------------------------------
  1 | require "spec"
  2 | require "../src/markd"
  3 | 
  4 | def describe_spec(file, smart = false, render = false, gfm = false)
  5 |   file = File.join(__DIR__, file)
  6 | 
  7 |   specs = extract_spec_tests(file)
  8 | 
  9 |   skip_examples = [] of Int32
 10 | 
 11 |   if render
 12 |     puts "Run [#{file}] examples"
 13 |     examples_count = 0
 14 |     section_count = 0
 15 |     specs.each_with_index do |(section, examples), index|
 16 |       section = "#{(index + 1).to_s.rjust(2)}. #{section} (#{examples.size})"
 17 |       if skip_examples.includes?(index + 1)
 18 |         puts section + " [SKIP]"
 19 |         next
 20 |       end
 21 |       section_count += 1
 22 |       examples_count += examples.size
 23 |       puts section
 24 |     end
 25 |     puts "Total #{section_count} describes and #{examples_count} examples"
 26 |   end
 27 | 
 28 |   specs.each_with_index do |(section, examples), index|
 29 |     no = index + 1
 30 |     next if skip_examples.includes?(no)
 31 |     assert_section(file, section, examples, smart, gfm)
 32 |   end
 33 | end
 34 | 
 35 | def assert_section(file, section, examples, smart, gfm = false)
 36 |   describe section do
 37 |     examples.each do |index, example|
 38 |       assert_example(file, section, index, example, smart, gfm)
 39 |     end
 40 |   end
 41 | end
 42 | 
 43 | def assert_example(file, section, index, example, smart, gfm = false)
 44 |   markdown = example["markdown"].gsub("→", "\t").chomp
 45 |   html = example["html"].gsub("→", "\t")
 46 |   line = example["line"].to_i
 47 |   tags = example["test_tags"].split(" ")
 48 | 
 49 |   options = Markd::Options.new(
 50 |     gfm: gfm || tags.includes?("gfm"),
 51 |     emoji: tags.includes?("emoji"),
 52 |     tagfilter: tags.includes?("tagfilter"),
 53 |     autolink: tags.includes?("autolink")
 54 |   )
 55 |   options.smart = true if smart
 56 | 
 57 |   if example["test_tags"].ends_with?("pending")
 58 |     pending "- #{index}\n#{show_space(markdown)}", file, line do
 59 |       output = Markd.to_html(markdown, options)
 60 |       output.should eq(html), file: file, line: line
 61 |     end
 62 |   else
 63 |     it "- #{index}\n#{show_space(markdown)}", file, line do
 64 |       output = Markd.to_html(markdown, options)
 65 |       next if html == "<IGNORE>\n"
 66 | 
 67 |       output.should eq(html), file: file, line: line
 68 |     end
 69 |   end
 70 | end
 71 | 
 72 | def extract_spec_tests(file)
 73 |   examples = {} of String => Hash(Int32, Hash(String, String))
 74 | 
 75 |   current_section = 0
 76 |   example_count = 0
 77 |   test_start = false
 78 |   result_start = false
 79 | 
 80 |   begin
 81 |     File.open(file) do |input|
 82 |       line_number = 0
 83 |       test_tags = ""
 84 | 
 85 |       while (line = input.read_line)
 86 |         line_number += 1
 87 |         line = line.gsub(/\r\n?/, "\n")
 88 |         break if line.includes?("<!-- END TESTS -->")
 89 | 
 90 |         if !test_start && !result_start && (match = line.match(/^\#{1,6}\s+(.*)$/))
 91 |           current_section = match[1]
 92 |           examples[current_section] = {} of Int32 => Hash(String, String)
 93 |           example_count = 0
 94 |         else
 95 |           if !test_start && !result_start && line =~ /^`{32} example([a-z ])*$/
 96 |             test_start = true
 97 |             test_tags = line[line.rindex!(' ') + 1..-1]
 98 |           elsif test_start && !result_start && line =~ /^\.$/
 99 |             test_start = false
100 |             result_start = true
101 |           elsif !test_start && result_start && line =~ /^`{32}/
102 |             result_start = false
103 |             example_count += 1
104 |           elsif test_start && !result_start
105 |             examples[current_section][example_count] ||= {
106 |               "line"      => line_number.to_s,
107 |               "markdown"  => "",
108 |               "html"      => "",
109 |               "test_tags" => (test_tags == "example" ? "" : test_tags),
110 |             } of String => String
111 | 
112 |             examples[current_section][example_count]["markdown"] += line + "\n"
113 |           elsif !test_start && result_start
114 |             examples[current_section][example_count]["html"] += line + "\n"
115 |           end
116 |         end
117 |       end
118 |     end
119 |   rescue IO::EOFError
120 |     # do nothing
121 |   end
122 | 
123 |   # Remove empty examples
124 |   examples.keys.each { |k| examples.delete(k) if examples[k].empty? }
125 |   examples
126 | end
127 | 
128 | def show_space(text)
129 |   text.gsub("\t", "→").gsub(/ /, '␣')
130 | end
131 | 


--------------------------------------------------------------------------------
/src/markd/node.cr:
--------------------------------------------------------------------------------
  1 | module Markd
  2 |   class Node
  3 |     # Node Type
  4 |     enum Type
  5 |       Document
  6 |       Paragraph
  7 |       Text
  8 |       Strong
  9 |       Emphasis
 10 |       Strikethrough
 11 |       Link
 12 |       Image
 13 |       Heading
 14 |       List
 15 |       Item
 16 |       BlockQuote
 17 |       Alert
 18 |       ThematicBreak
 19 |       Code
 20 |       CodeBlock
 21 |       HTMLBlock
 22 |       HTMLInline
 23 |       LineBreak
 24 |       SoftBreak
 25 | 
 26 |       CustomInLine
 27 |       CustomBlock
 28 |       Table
 29 |       TableCell
 30 |       TableRow
 31 | 
 32 |       def container?
 33 |         CONTAINER_TYPES.includes?(self)
 34 |       end
 35 |     end
 36 | 
 37 |     CONTAINER_TYPES = {
 38 |       Type::Document,
 39 |       Type::Paragraph,
 40 |       Type::Strong,
 41 |       Type::Emphasis,
 42 |       Type::Strikethrough,
 43 |       Type::Link,
 44 |       Type::Image,
 45 |       Type::Heading,
 46 |       Type::List,
 47 |       Type::Item,
 48 |       Type::BlockQuote,
 49 |       Type::Alert,
 50 |       Type::CustomInLine,
 51 |       Type::CustomBlock,
 52 |       Type::Table,
 53 |       Type::TableRow,
 54 |       Type::TableCell,
 55 |     }
 56 | 
 57 |     alias DataValue = String | Int32 | Bool
 58 |     alias DataType = Hash(String, DataValue)
 59 | 
 60 |     property type : Type
 61 | 
 62 |     property(data) { {} of String => DataValue }
 63 |     property source_pos = { {1, 1}, {0, 0} }
 64 |     property text = ""
 65 |     property? open = true
 66 |     property? fenced = false
 67 |     property fence_language = ""
 68 |     property fence_char = ""
 69 |     property fence_length = 0
 70 |     property fence_offset = 0
 71 |     property? last_line_blank = false
 72 |     property? last_line_checked = false
 73 | 
 74 |     property! parent : Node?
 75 |     property! first_child : Node?
 76 |     property! last_child : Node?
 77 |     property! prev : Node?
 78 |     property! next : Node?
 79 | 
 80 |     def initialize(@type)
 81 |     end
 82 | 
 83 |     def append_child(child : Node)
 84 |       child.unlink
 85 |       child.parent = self
 86 | 
 87 |       if (last = last_child?)
 88 |         last.next = child
 89 |         child.prev = last
 90 |         @last_child = child
 91 |       else
 92 |         @first_child = child
 93 |         @last_child = child
 94 |       end
 95 |     end
 96 | 
 97 |     def insert_after(sibling : Node)
 98 |       sibling.unlink
 99 | 
100 |       if (nxt = next?)
101 |         nxt.prev = sibling
102 |       elsif (parent = parent?)
103 |         parent.last_child = sibling
104 |       end
105 |       sibling.next = nxt
106 | 
107 |       sibling.prev = self
108 |       @next = sibling
109 |       sibling.parent = parent?
110 |     end
111 | 
112 |     def unlink
113 |       if (prev = prev?)
114 |         prev.next = next?
115 |       elsif (parent = parent?)
116 |         parent.first_child = next?
117 |       end
118 | 
119 |       if (nxt = next?)
120 |         nxt.prev = prev?
121 |       elsif (parent = parent?)
122 |         parent.last_child = prev?
123 |       end
124 | 
125 |       @parent = nil
126 |       @next = nil
127 |       @prev = nil
128 |     end
129 | 
130 |     def walker
131 |       Walker.new(self)
132 |     end
133 | 
134 |     def to_s(io : IO)
135 |       io << "#<" << {{@type.name.id.stringify}} << ":0x"
136 |       object_id.to_s(16, io)
137 |       io << " @type=" << @type
138 |       io << " @parent=" << @parent if @parent
139 |       io << " @next=" << @next if @next
140 | 
141 |       data = @data
142 |       io << " @data=" << data if data && !data.empty?
143 | 
144 |       io << ">"
145 |       nil
146 |     end
147 | 
148 |     private class Walker
149 |       def initialize(@root : Node)
150 |         @current = @root
151 |         @entering = true
152 |       end
153 | 
154 |       def next
155 |         current = @current
156 |         return unless current
157 | 
158 |         entering = @entering
159 | 
160 |         if entering && current.type.container?
161 |           if (first_child = current.first_child?)
162 |             @current = first_child
163 |             @entering = true
164 |           else
165 |             @entering = false
166 |           end
167 |         elsif current == @root
168 |           @current = nil
169 |         elsif current.next?
170 |           @current = current.next?
171 |           @entering = true
172 |         else
173 |           @current = current.parent?
174 |           @entering = false
175 |         end
176 | 
177 |         return current, entering
178 |       end
179 | 
180 |       def resume_at(node : Node, entering : Bool)
181 |         @current = node
182 |         @entering = entering
183 |       end
184 |     end
185 |   end
186 | end
187 | 


--------------------------------------------------------------------------------
/spec/fixtures/smart_punct.txt:
--------------------------------------------------------------------------------
  1 | ## Smart punctuation
  2 | 
  3 | Open quotes are matched with closed quotes.
  4 | The same method is used for matching openers and closers
  5 | as is used in emphasis parsing:
  6 | 
  7 | ```````````````````````````````` example
  8 | "Hello," said the spider.
  9 | "'Shelob' is my name."
 10 | .
 11 | <p>“Hello,” said the spider.
 12 | “‘Shelob’ is my name.”</p>
 13 | ````````````````````````````````
 14 | 
 15 | ```````````````````````````````` example
 16 | 'A', 'B', and 'C' are letters.
 17 | .
 18 | <p>‘A’, ‘B’, and ‘C’ are letters.</p>
 19 | ````````````````````````````````
 20 | 
 21 | ```````````````````````````````` example
 22 | 'Oak,' 'elm,' and 'beech' are names of trees.
 23 | So is 'pine.'
 24 | .
 25 | <p>‘Oak,’ ‘elm,’ and ‘beech’ are names of trees.
 26 | So is ‘pine.’</p>
 27 | ````````````````````````````````
 28 | 
 29 | ```````````````````````````````` example
 30 | 'He said, "I want to go."'
 31 | .
 32 | <p>‘He said, “I want to go.”’</p>
 33 | ````````````````````````````````
 34 | 
 35 | A single quote that isn't an open quote matched
 36 | with a close quote will be treated as an
 37 | apostrophe:
 38 | 
 39 | ```````````````````````````````` example
 40 | Were you alive in the 70's?
 41 | .
 42 | <p>Were you alive in the 70’s?</p>
 43 | ````````````````````````````````
 44 | 
 45 | ```````````````````````````````` example
 46 | Here is some quoted '`code`' and a "[quoted link](url)".
 47 | .
 48 | <p>Here is some quoted ‘<code>code</code>’ and a “<a href="url">quoted link</a>”.</p>
 49 | ````````````````````````````````
 50 | 
 51 | Here the first `'` is treated as an apostrophe, not
 52 | an open quote, because the final single quote is matched
 53 | by the single quote before `jolly`:
 54 | 
 55 | ```````````````````````````````` example
 56 | 'tis the season to be 'jolly'
 57 | .
 58 | <p>’tis the season to be ‘jolly’</p>
 59 | ````````````````````````````````
 60 | 
 61 | Multiple apostrophes should not be marked as open/closing quotes.
 62 | 
 63 | ```````````````````````````````` example
 64 | 'We'll use Jane's boat and John's truck,' Jenna said.
 65 | .
 66 | <p>‘We’ll use Jane’s boat and John’s truck,’ Jenna said.</p>
 67 | ````````````````````````````````
 68 | 
 69 | An unmatched double quote will be interpreted as a
 70 | left double quote, to facilitate this style:
 71 | 
 72 | ```````````````````````````````` example
 73 | "A paragraph with no closing quote.
 74 | 
 75 | "Second paragraph by same speaker, in fiction."
 76 | .
 77 | <p>“A paragraph with no closing quote.</p>
 78 | <p>“Second paragraph by same speaker, in fiction.”</p>
 79 | ````````````````````````````````
 80 | 
 81 | Quotes that are escaped come out as literal straight
 82 | quotes:
 83 | 
 84 | ```````````````````````````````` example
 85 | \"This is not smart.\"
 86 | This isn\'t either.
 87 | 5\'8\"
 88 | .
 89 | <p>&quot;This is not smart.&quot;
 90 | This isn't either.
 91 | 5'8&quot;</p>
 92 | ````````````````````````````````
 93 | 
 94 | Two hyphens form an en-dash, three an em-dash.
 95 | 
 96 | ```````````````````````````````` example
 97 | Some dashes:  em---em
 98 | en--en
 99 | em --- em
100 | en -- en
101 | 2--3
102 | .
103 | <p>Some dashes:  em—em
104 | en–en
105 | em — em
106 | en – en
107 | 2–3</p>
108 | ````````````````````````````````
109 | 
110 | A sequence of more than three hyphens is
111 | parsed as a sequence of em and/or en dashes,
112 | with no hyphens. If possible, a homogeneous
113 | sequence of dashes is used (so, 10 hyphens
114 | = 5 en dashes, and 9 hyphens = 3 em dashes).
115 | When a heterogeneous sequence must be used,
116 | the em dashes come first, followed by the en
117 | dashes, and as few en dashes as possible are
118 | used (so, 7 hyphens = 2 em dashes an 1 en
119 | dash).
120 | 
121 | ```````````````````````````````` example
122 | one-
123 | two--
124 | three---
125 | four----
126 | five-----
127 | six------
128 | seven-------
129 | eight--------
130 | nine---------
131 | thirteen-------------.
132 | .
133 | <p>one-
134 | two–
135 | three—
136 | four––
137 | five—–
138 | six——
139 | seven—––
140 | eight––––
141 | nine———
142 | thirteen———––.</p>
143 | ````````````````````````````````
144 | 
145 | Hyphens can be escaped:
146 | 
147 | ```````````````````````````````` example
148 | Escaped hyphens: \-- \-\-\-.
149 | .
150 | <p>Escaped hyphens: -- ---.</p>
151 | ````````````````````````````````
152 | 
153 | Three periods form an ellipsis:
154 | 
155 | ```````````````````````````````` example
156 | Ellipses...and...and....
157 | .
158 | <p>Ellipses…and…and….</p>
159 | ````````````````````````````````
160 | 
161 | Periods can be escaped if ellipsis-formation
162 | is not wanted:
163 | 
164 | ```````````````````````````````` example
165 | No ellipses\.\.\.
166 | .
167 | <p>No ellipses...</p>
168 | ````````````````````````````````
169 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
  6 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [Unreleased]
  9 | 
 10 | ### Added
 11 | 
 12 | - Tables #[72](https://github.com/icyleaf/markd/pull/72) thanks @[ralsina](https://github.com/ralsina)
 13 | - Alerts #[94](https://github.com/icyleaf/markd/pull/94) thanks @[ralsina](https://github.com/ralsina)
 14 | - Extended Autolinks #[86](https://github.com/icyleaf/markd/pull/86) thanks @[ralsina](https://github.com/ralsina)
 15 | - Tartrazine code block syntax highlighting #[67](https://github.com/icyleaf/markd/pull/81) thanks @[zw963](https://github.com/zw963).
 16 | - Tagfilter option for GFM #[64](https://github.com/icyleaf/markd/pull/64) thanks @[nobodywasishere](https://github.com/nobodywasishere).
 17 | - Task list / checkbox support for GFM #[63](https://github.com/icyleaf/markd/pull/63) thanks @[nobodywasishere](https://github.com/nobodywasishere).
 18 | - Strikethrough support for GFM #[62](https://github.com/icyleaf/markd/pull/62) thanks @[nobodywasishere](https://github.com/nobodywasishere).
 19 | - Emoji support for GFM #[61](https://github.com/icyleaf/markd/pull/61) thanks @[nobodywasishere](https://github.com/nobodywasishere).
 20 | 
 21 | ### Fixed
 22 | 
 23 | - Large numbers failing to be parsed as starts of lists #[81](https://github.com/icyleaf/markd/pull/81)
 24 | 
 25 | ### TODO
 26 | 
 27 | - Full GFM support
 28 | 
 29 | ## [0.5.0] (2022-06-14)
 30 | 
 31 | - Support CommonMark 0.29 #[50](https://github.com/icyleaf/markd/pull/50) thanks @[HertzDevil](https://github.com/HertzDevil).
 32 | - Fix typos #[47](https://github.com/icyleaf/markd/pull/47) #[49](https://github.com/icyleaf/markd/pull/49) thanks @[kianmeng](https://github.com/kianmeng), @[jsoref](https://github.com/jsoref).
 33 | 
 34 | ## [0.4.2] (2021-10-19)
 35 | 
 36 | ### Added
 37 | 
 38 | - Enable Table of Content (TOC) #[41](https://github.com/icyleaf/markd/pull/41) thanks @[Nephos](https://github.com/Nephos).
 39 | 
 40 | ### Fixed
 41 | 
 42 | - Fix byte slice negative #[43](https://github.com/icyleaf/markd/pull/43).
 43 | - Compatibility with Crystal 1.2.
 44 | 
 45 | ## [0.4.1] (2021-09-27)
 46 | 
 47 | ### Added
 48 | 
 49 | - Refactor Options and change to a class #[36](https://github.com/icyleaf/markd/pull/36) thanks @[straight-shoota](https://github.com/straight-shoota).
 50 | - Add `lang` parameter to to `HTMLRenderer#code_block_body` #[38](https://github.com/icyleaf/markd/pull/38) thanks @[straight-shoota](https://github.com/straight-shoota).
 51 | 
 52 | ## [0.4.0] (2021-03-23)
 53 | 
 54 | - Compatibility with Crystal 1.0. #[34](https://github.com/icyleaf/markd/pull/34) thanks @[bcardiff](https://github.com/bcardiff).
 55 | 
 56 | ## [0.3.0] (2021-03-02)
 57 | 
 58 | No changelog.
 59 | 
 60 | ## [0.2.1] (2020-08-24)
 61 | 
 62 | ### Added
 63 | 
 64 | - Add Options#base_url to allow resolving relative links. #[26](https://github.com/icyleaf/markd/pull/26), #[28](https://github.com/icyleaf/markd/pull/28) thanks @[straight-shoota](https://github.com/straight-shoota).
 65 | 
 66 | ### Fixed
 67 | 
 68 | - [high severity] escape unsafe html entry inline of code block. #[32](https://github.com/icyleaf/markd/pull/32).
 69 | - Fixed some typos in README. #[29](https://github.com/icyleaf/markd/pull/29) thanks @[Calamari](https://github.com/Calamari).
 70 | 
 71 | ## [0.2.0] (2019-10-08)
 72 | 
 73 | ### Changed
 74 | 
 75 | - Optimizations speed. many thanks @[asterite](https://github.com/asterite). #[19](https://github.com/icyleaf/markd/pull/19)
 76 | 
 77 | ### Fixed
 78 | 
 79 | - Compatibility with Crystal 0.31. #[22](https://github.com/icyleaf/markd/pull/22).
 80 | 
 81 | ## [0.1.2] (2019-08-26)
 82 | 
 83 | - Use Crystal v0.31.0 as default compiler.
 84 | 
 85 | ## [0.1.1] (2017-12-26)
 86 | 
 87 | - Minor refactoring and improving speed. thanks @[straight-shoota](https://github.com/straight-shoota).
 88 | - Use Crystal v0.24.1 as default compiler.
 89 | 
 90 | ## 0.1.0 (2017-09-22)
 91 | 
 92 | - [initial implementation](https://github.com/icyleaf/markd/milestone/1?closed=1)
 93 | 
 94 | [Unreleased]: https://github.com/icyleaf/markd/compare/v0.5.0...HEAD
 95 | [0.5.0]: https://github.com/icyleaf/markd/compare/v0.4.2...v0.5.0
 96 | [0.4.2]: https://github.com/icyleaf/markd/compare/v0.4.1...v0.4.2
 97 | [0.4.1]: https://github.com/icyleaf/markd/compare/v0.4.0...v0.4.1
 98 | [0.4.0]: https://github.com/icyleaf/markd/compare/v0.3.0...v0.4.0
 99 | [0.3.0]: https://github.com/icyleaf/markd/compare/v0.2.1...v0.3.0
100 | [0.2.1]: https://github.com/icyleaf/markd/compare/v0.2.0...v0.2.1
101 | [0.2.0]: https://github.com/icyleaf/markd/compare/v0.1.2...v0.2.0
102 | [0.1.2]: https://github.com/icyleaf/markd/compare/v0.1.1...v0.1.2
103 | [0.1.1]: https://github.com/icyleaf/markd/compare/v0.1.0...v0.1.1
104 | 


--------------------------------------------------------------------------------
/src/markd/renderer.cr:
--------------------------------------------------------------------------------
  1 | module Markd
  2 |   abstract class Renderer
  3 |     def initialize(@options = Options.new)
  4 |       @output_io = String::Builder.new
  5 |       @last_output = "\n"
  6 |     end
  7 | 
  8 |     def output(string : String)
  9 |       literal(escape(string))
 10 |     end
 11 | 
 12 |     def literal(string : String)
 13 |       @output_io << string
 14 |       @last_output = string
 15 |     end
 16 | 
 17 |     # render a Line Feed character
 18 |     def newline
 19 |       literal("\n") if @last_output != "\n"
 20 |     end
 21 | 
 22 |     private ESCAPES = {
 23 |       '&' => "&amp;",
 24 |       '"' => "&quot;",
 25 |       '<' => "&lt;",
 26 |       '>' => "&gt;",
 27 |     }
 28 | 
 29 |     def escape(text)
 30 |       # If we can determine that the text has no escape chars
 31 |       # then we can return the text as is, avoiding an allocation
 32 |       # and a lot of processing in `String#gsub`.
 33 |       if has_escape_char?(text)
 34 |         text.gsub(ESCAPES)
 35 |       else
 36 |         text
 37 |       end
 38 |     end
 39 | 
 40 |     private def has_escape_char?(text)
 41 |       text.each_byte do |byte|
 42 |         case byte
 43 |         when '&', '"', '<', '>'
 44 |           return true
 45 |         else
 46 |           next
 47 |         end
 48 |       end
 49 |       false
 50 |     end
 51 | 
 52 |     abstract def heading(node : Node, entering : Bool) : Nil
 53 |     abstract def list(node : Node, entering : Bool) : Nil
 54 |     abstract def item(node : Node, entering : Bool) : Nil
 55 |     abstract def block_quote(node : Node, entering : Bool) : Nil
 56 |     abstract def alert(node : Node, entering : Bool) : Nil
 57 |     abstract def thematic_break(node : Node, entering : Bool) : Nil
 58 |     abstract def code_block(node : Node, entering : Bool, formatter : T?) : Nil forall T
 59 |     abstract def code(node : Node, entering : Bool) : Nil
 60 |     abstract def html_block(node : Node, entering : Bool) : Nil
 61 |     abstract def html_inline(node : Node, entering : Bool) : Nil
 62 |     abstract def paragraph(node : Node, entering : Bool) : Nil
 63 |     abstract def emphasis(node : Node, entering : Bool) : Nil
 64 |     abstract def soft_break(node : Node, entering : Bool) : Nil
 65 |     abstract def line_break(node : Node, entering : Bool) : Nil
 66 |     abstract def strong(node : Node, entering : Bool) : Nil
 67 |     abstract def strikethrough(node : Node, entering : Bool) : Nil
 68 |     abstract def link(node : Node, entering : Bool) : Nil
 69 |     abstract def image(node : Node, entering : Bool) : Nil
 70 |     abstract def text(node : Node, entering : Bool) : Nil
 71 |     abstract def table(node : Node, entering : Bool) : Nil
 72 |     abstract def table_row(node : Node, entering : Bool) : Nil
 73 |     abstract def table_cell(node : Node, entering : Bool) : Nil
 74 | 
 75 |     def render(document : Node, formatter : T?) forall T
 76 |       Utils.timer("rendering", @options.time?) do
 77 |         walker = document.walker
 78 |         while (event = walker.next)
 79 |           node, entering = event
 80 | 
 81 |           case node.type
 82 |           when Node::Type::Heading
 83 |             heading(node, entering)
 84 |           when Node::Type::List
 85 |             list(node, entering)
 86 |           when Node::Type::Item
 87 |             item(node, entering)
 88 |           when Node::Type::BlockQuote
 89 |             block_quote(node, entering)
 90 |           when Node::Type::Alert
 91 |             alert(node, entering)
 92 |           when Node::Type::ThematicBreak
 93 |             thematic_break(node, entering)
 94 |           when Node::Type::CodeBlock
 95 |             code_block(node, entering, formatter)
 96 |           when Node::Type::Code
 97 |             code(node, entering)
 98 |           when Node::Type::HTMLBlock
 99 |             html_block(node, entering)
100 |           when Node::Type::HTMLInline
101 |             html_inline(node, entering)
102 |           when Node::Type::Paragraph
103 |             paragraph(node, entering)
104 |           when Node::Type::Emphasis
105 |             emphasis(node, entering)
106 |           when Node::Type::SoftBreak
107 |             soft_break(node, entering)
108 |           when Node::Type::LineBreak
109 |             line_break(node, entering)
110 |           when Node::Type::Strong
111 |             strong(node, entering)
112 |           when Node::Type::Strikethrough
113 |             strikethrough(node, entering)
114 |           when Node::Type::Link
115 |             link(node, entering)
116 |           when Node::Type::Image
117 |             image(node, entering)
118 |           when Node::Type::Table
119 |             table(node, entering)
120 |           when Node::Type::TableRow
121 |             table_row(node, entering)
122 |           when Node::Type::TableCell
123 |             table_cell(node, entering)
124 |           else
125 |             text(node, entering)
126 |           end
127 |         end
128 |       end
129 | 
130 |       @output_io.to_s.sub("\n", "")
131 |     end
132 |   end
133 | end
134 | 
135 | require "./renderers/*"
136 | 


--------------------------------------------------------------------------------
/src/markd/rules/table.cr:
--------------------------------------------------------------------------------
  1 | module Markd::Rule
  2 |   struct Table
  3 |     include Rule
  4 | 
  5 |     # Detects the first row of a table, if the parser is in gfm mode
  6 | 
  7 |     def match(parser : Parser, container : Node) : MatchValue
  8 |       # Looks like the 1st line of a table and we have gfm enabled
  9 |       if parser.gfm? && match?(parser)
 10 |         parser.close_unmatched_blocks
 11 |         parser.add_child(Node::Type::Table, 0)
 12 | 
 13 |         MatchValue::Leaf
 14 |       else
 15 |         MatchValue::None
 16 |       end
 17 |     end
 18 | 
 19 |     # Decides if the table continues or if it ended before the current line
 20 | 
 21 |     def continue(parser : Parser, container : Node) : ContinueStatus
 22 |       # Only continue if line looks like a divider or a table row
 23 |       if match_continuation?(parser)
 24 |         ContinueStatus::Continue
 25 |       else
 26 |         ContinueStatus::Stop
 27 |       end
 28 |     end
 29 | 
 30 |     # Because of `match` and `continue` the `container` has
 31 |     # all the text of the table. We parse it here and
 32 |     # insert all `TableRow` and `TableCell` nodes from parsing.
 33 |     #
 34 |     # First, it will perform a sanity check, and if the
 35 |     # table is broken it will be converted into a `Paragraph`
 36 | 
 37 |     def token(parser : Parser, container : Node) : Nil
 38 |       lines = container.text.strip.split("\n")
 39 | 
 40 |       row_sizes = lines[...2].map do |line|
 41 |         strip_pipe(line.strip).split(TABLE_CELL_SEPARATOR).size
 42 |       end.uniq!
 43 | 
 44 |       # Do we have a real table?
 45 |       # * At least two lines
 46 |       # * Second line is a divider
 47 |       # * First two lines have the same number of cells
 48 | 
 49 |       if lines.size < 2 || !lines[1].match(TABLE_HEADING_SEPARATOR) ||
 50 |          row_sizes.size != 1
 51 |         # Not enough table or a broken table.
 52 |         # We need to convert it into a paragraph
 53 |         # I am fairly sure this is not supposed to work
 54 |         container.type = Node::Type::Paragraph
 55 |         return
 56 |       end
 57 | 
 58 |       max_row_size = row_sizes[0]
 59 |       has_body = lines.size > 2
 60 |       container.data["has_body"] = has_body
 61 | 
 62 |       alignments = strip_pipe(lines[1].strip).split(TABLE_CELL_SEPARATOR).map do |cell|
 63 |         cell = cell.strip
 64 |         if cell.starts_with?(":") && cell.ends_with?(":")
 65 |           "center"
 66 |         elsif cell.starts_with?(":")
 67 |           "left"
 68 |         elsif cell.ends_with?(":")
 69 |           "right"
 70 |         else
 71 |           ""
 72 |         end
 73 |       end
 74 | 
 75 |       # Each line maps to a table row
 76 |       lines.each_with_index do |line, i|
 77 |         next if i == 1
 78 |         row = Node.new(Node::Type::TableRow)
 79 |         row.data["heading"] = i == 0
 80 |         row.data["has_body"] = has_body
 81 |         container.append_child(row)
 82 |         # This splits on | but not on \| (escaped |)
 83 |         cells = strip_pipe(line.strip).split(TABLE_CELL_SEPARATOR)[...max_row_size]
 84 | 
 85 |         # Each row should have exactly the same size as the header.
 86 |         while cells.size < max_row_size
 87 |           cells << ""
 88 |         end
 89 | 
 90 |         # Create cells with text and metadata
 91 |         cells.each_with_index do |text, j|
 92 |           cell = Node.new(Node::Type::TableCell)
 93 |           # Cell text should be stripped and escaped pipes unescaped
 94 |           cell.text = text.strip.gsub("\\|", "|")
 95 |           cell.data["align"] = alignments[j]
 96 |           cell.data["heading"] = i == 0
 97 |           row.append_child(cell)
 98 |         end
 99 |       end
100 |     end
101 | 
102 |     # Not really used because of how parsing is done
103 |     def can_contain?(type : Node::Type) : Bool
104 |       !type.container?
105 |     end
106 | 
107 |     # Tables are multi-line
108 |     def accepts_lines? : Bool
109 |       true
110 |     end
111 | 
112 |     # Match only lines that look like the first line of a table:
113 |     # * Start with a | or look like multiple cells separated by |
114 |     # * Is at least 3 characters long (smallest table starts are "|a|" or "a|b")
115 | 
116 |     private def match?(parser)
117 |       !parser.indented && \
118 |          (parser.line[0]? == '|' || parser.line.match(TABLE_CELL_SEPARATOR)) &&
119 |           parser.line.size > 2
120 |     end
121 | 
122 |     # Match only lines that look like a table separator
123 |     # or start with a | or look like multiple cells separated by |
124 |     private def match_continuation?(parser : Parser)
125 |       !parser.indented && (parser.line[0]? == '|' ||
126 |         parser.line.match(TABLE_HEADING_SEPARATOR) ||
127 |         parser.line.match(TABLE_CELL_SEPARATOR)) ||
128 |         # Lines that are not empty and are not the start of a
129 |         # block-level structure are ALSO continuations (see gfm-spec.txt:3397)
130 |         !(parser.line.strip.empty? || parser.line.matches?(/^(?:>|\#{1,6}|`{3}|\t{1}|\s{4}|(?:[*-+]\s)+|[0-9]+\.)+/))
131 |     end
132 | 
133 |     private def strip_pipe(text : String) : String
134 |       if text.ends_with?("\\|")
135 |         text.lstrip("|")
136 |       else
137 |         text.strip("|")
138 |       end
139 |     end
140 |   end
141 | end
142 | 


--------------------------------------------------------------------------------
/src/markd/rules/list.cr:
--------------------------------------------------------------------------------
  1 | module Markd::Rule
  2 |   struct List
  3 |     include Rule
  4 | 
  5 |     BULLET_LIST_MARKERS  = {'*', '+', '-'}
  6 |     ORDERED_LIST_MARKERS = {'.', ')'}
  7 | 
  8 |     def match(parser : Parser, container : Node) : MatchValue
  9 |       if !parser.indented || container.type.list?
 10 |         data = parse_list_marker(parser, container)
 11 |         return MatchValue::None if !data || data.empty?
 12 | 
 13 |         parser.close_unmatched_blocks
 14 |         if !parser.tip.type.list? || !list_match?(container.data, data)
 15 |           list_node = parser.add_child(Node::Type::List, parser.next_nonspace)
 16 |           list_node.data = data
 17 |         end
 18 | 
 19 |         item_node = parser.add_child(Node::Type::Item, parser.next_nonspace)
 20 |         item_node.data = data
 21 | 
 22 |         MatchValue::Container
 23 |       else
 24 |         MatchValue::None
 25 |       end
 26 |     end
 27 | 
 28 |     def continue(parser : Parser, container : Node) : ContinueStatus
 29 |       ContinueStatus::Continue
 30 |     end
 31 | 
 32 |     def token(parser : Parser, container : Node) : Nil
 33 |       item = container.first_child?
 34 |       while item
 35 |         if ends_with_blankline?(item) && item.next?
 36 |           container.data["tight"] = false
 37 |           break
 38 |         end
 39 | 
 40 |         subitem = item.first_child?
 41 |         while subitem
 42 |           if ends_with_blankline?(subitem) && (item.next? || subitem.next?)
 43 |             container.data["tight"] = false
 44 |             break
 45 |           end
 46 | 
 47 |           subitem = subitem.next?
 48 |         end
 49 | 
 50 |         item = item.next?
 51 |       end
 52 |     end
 53 | 
 54 |     def can_contain?(type)
 55 |       type.item?
 56 |     end
 57 | 
 58 |     def accepts_lines? : Bool
 59 |       false
 60 |     end
 61 | 
 62 |     private def list_match?(list_data, item_data)
 63 |       list_data["type"] == item_data["type"] &&
 64 |         list_data["delimiter"] == item_data["delimiter"] &&
 65 |         list_data["bullet_char"] == item_data["bullet_char"]
 66 |     end
 67 | 
 68 |     private def parse_list_marker(parser : Parser, container : Node) : Node::DataType
 69 |       empty_data = {} of String => Node::DataValue
 70 |       if parser.indent >= 4
 71 |         return empty_data
 72 |       end
 73 | 
 74 |       data = {
 75 |         "delimiter"     => 0,
 76 |         "marker_offset" => parser.indent,
 77 |         "bullet_char"   => "",
 78 |         "tight"         => true, # lists are tight by default
 79 |         "start"         => 1,
 80 |       } of String => Node::DataValue
 81 | 
 82 |       line = parser.line[parser.next_nonspace..-1]
 83 | 
 84 |       if BULLET_LIST_MARKERS.includes?(line[0])
 85 |         if parser.gfm? && line[1..].strip.starts_with?("[ ]")
 86 |           data["type"] = "checkbox"
 87 |           data["bullet_char"] = line[0].to_s
 88 |           data["checked"] = false
 89 |           padding_checkbox = line.index!(']')
 90 |         elsif parser.gfm? && line[1..].strip.starts_with?("[x]")
 91 |           data["type"] = "checkbox"
 92 |           data["bullet_char"] = line[0].to_s
 93 |           data["checked"] = true
 94 |           padding_checkbox = line.index!(']')
 95 |         else
 96 |           data["type"] = "bullet"
 97 |           data["bullet_char"] = line[0].to_s
 98 |         end
 99 | 
100 |         first_match_size = 1
101 |       else
102 |         pos = 0
103 |         while line[pos]?.try &.ascii_number?
104 |           pos += 1
105 |         end
106 | 
107 |         number = pos >= 1 ? line[0..pos - 1].to_i? : -1
108 |         if number.nil?
109 |           return empty_data
110 |         end
111 | 
112 |         if pos >= 1 && pos <= 9 && ORDERED_LIST_MARKERS.includes?(line[pos]?) &&
113 |            (!container.type.paragraph? || number == 1)
114 |           data["type"] = "ordered"
115 |           data["start"] = number
116 |           data["delimiter"] = line[pos].to_s
117 |           first_match_size = pos + 1
118 |         else
119 |           return empty_data
120 |         end
121 |       end
122 | 
123 |       next_char = parser.line[parser.next_nonspace + first_match_size]?
124 |       unless next_char.nil? || space_or_tab?(next_char)
125 |         return empty_data
126 |       end
127 | 
128 |       if container.type.paragraph? &&
129 |          parser.line[(parser.next_nonspace + first_match_size)..-1].each_char.all? &.ascii_whitespace?
130 |         return empty_data
131 |       end
132 | 
133 |       parser.advance_next_nonspace
134 |       parser.advance_offset(first_match_size, true)
135 | 
136 |       # Skip past the checkbox brackets ([])
137 |       if parser.gfm? && padding_checkbox
138 |         parser.advance_offset(padding_checkbox, true)
139 |       end
140 | 
141 |       spaces_start_column = parser.column
142 |       spaces_start_offset = parser.offset
143 | 
144 |       loop do
145 |         parser.advance_offset(1, true)
146 |         next_char = parser.line[parser.offset]?
147 | 
148 |         break unless parser.column - spaces_start_column < 5 && space_or_tab?(next_char)
149 |       end
150 | 
151 |       blank_item = parser.line[parser.offset]?.nil?
152 |       spaces_after_marker = parser.column - spaces_start_column
153 |       if spaces_after_marker >= 5 || spaces_after_marker < 1 || blank_item
154 |         data["padding"] = first_match_size + 1
155 |         parser.column = spaces_start_column
156 |         parser.offset = spaces_start_offset
157 | 
158 |         parser.advance_offset(1, true) if space_or_tab?(parser.line[parser.offset]?)
159 |       else
160 |         data["padding"] = first_match_size + spaces_after_marker
161 |       end
162 | 
163 |       data
164 |     end
165 | 
166 |     private def ends_with_blankline?(container : Node) : Bool
167 |       while container
168 |         return true if container.last_line_blank?
169 | 
170 |         break if container.last_line_checked? || !container.type.in?(Node::Type::List, Node::Type::Item)
171 |         container.last_line_checked = true
172 |         container = container.last_child?
173 |       end
174 | 
175 |       false
176 |     end
177 |   end
178 | end
179 | 


--------------------------------------------------------------------------------
/src/markd/rule.cr:
--------------------------------------------------------------------------------
  1 | module Markd
  2 |   module Rule
  3 |     ESCAPABLE_STRING    = %Q([!"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-])
  4 |     ESCAPED_CHAR_STRING = %Q(\\\\) + ESCAPABLE_STRING
  5 | 
  6 |     NUMERIC_HTML_ENTITY = /^&#(?:[Xx][0-9a-fA-F]{1,6}|[0-9]{1,7});/
  7 |     HTML_ENTITY         = /^&[a-zA-Z0-9]+;/
  8 | 
  9 |     TAG_NAME_STRING             = %Q([A-Za-z][A-Za-z0-9-]*)
 10 |     ATTRIBUTE_NAME_STRING       = %Q([a-zA-Z_:][a-zA-Z0-9:._-]*)
 11 |     UNQUOTED_VALUE_STRING       = %Q([^"'=<>`\\x00-\\x20]+)
 12 |     SINGLE_QUOTED_VALUE_STRING  = %Q('[^']*')
 13 |     DOUBLE_QUOTED_VALUE_STRING  = %Q("[^"]*")
 14 |     ATTRIBUTE_VALUE_STRING      = "(?:" + UNQUOTED_VALUE_STRING + "|" + SINGLE_QUOTED_VALUE_STRING + "|" + DOUBLE_QUOTED_VALUE_STRING + ")"
 15 |     ATTRIBUTE_VALUE_SPEC_STRING = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTE_VALUE_STRING + ")"
 16 |     ATTRIBUTE                   = "(?:" + "\\s+" + ATTRIBUTE_NAME_STRING + ATTRIBUTE_VALUE_SPEC_STRING + "?)"
 17 | 
 18 |     MAYBE_SPECIAL  = {'#', '`', '~', '*', '+', '_', '=', '<', '>', '-', '|'}
 19 |     THEMATIC_BREAK = /^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$/
 20 | 
 21 |     ESCAPABLE = /^#{ESCAPABLE_STRING}/
 22 | 
 23 |     TICKS = /`+/
 24 | 
 25 |     ELLIPSIS = "..."
 26 |     DASH     = /--+/
 27 | 
 28 |     OPEN_TAG  = "<" + TAG_NAME_STRING + ATTRIBUTE + "*" + "\\s*/?>"
 29 |     CLOSE_TAG = "</" + TAG_NAME_STRING + "\\s*[>]"
 30 | 
 31 |     OPEN_TAG_STRING               = "<#{TAG_NAME_STRING}#{ATTRIBUTE}*" + "\\s*/?>"
 32 |     CLOSE_TAG_STRING              = "</#{TAG_NAME_STRING}\\s*[>]"
 33 |     COMMENT_STRING                = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
 34 |     PROCESSING_INSTRUCTION_STRING = "[<][?].*?[?][>]"
 35 |     DECLARATION_STRING            = "<![A-Z]+" + "\\s+[^>]*>"
 36 |     CDATA_STRING                  = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
 37 |     HTML_TAG_STRING               = "(?:#{OPEN_TAG_STRING}|#{CLOSE_TAG_STRING}|#{COMMENT_STRING}|#{PROCESSING_INSTRUCTION_STRING}|#{DECLARATION_STRING}|#{CDATA_STRING})"
 38 |     HTML_TAG                      = /^#{HTML_TAG_STRING}/i
 39 | 
 40 |     HTML_BLOCK_OPEN = [
 41 |       /^<(?:script|pre|style)(?:\s|>|$)/i,
 42 |       /^<!--/,
 43 |       /^<[?]/,
 44 |       /^<![A-Z]/,
 45 |       /^<!\[CDATA\[/,
 46 |       /^<[\/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|title|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[\/]?[>]|$)/i,
 47 |       Regex.new("^(?:" + OPEN_TAG + "|" + CLOSE_TAG + ")\\s*$", Regex::Options::IGNORE_CASE),
 48 |     ]
 49 | 
 50 |     HTML_BLOCK_CLOSE = [
 51 |       /<\/(?:script|pre|style)>/i,
 52 |       /-->/,
 53 |       /\?>/,
 54 |       />/,
 55 |       /\]\]>/,
 56 |     ]
 57 | 
 58 |     LINK_TITLE = Regex.new("^(?:\"(#{ESCAPED_CHAR_STRING}|[^\"\\x00])*\"" +
 59 |                            "|'(#{ESCAPED_CHAR_STRING}|[^'\\x00])*'" +
 60 |                            "|\\((#{ESCAPED_CHAR_STRING}|[^)\\x00])*\\))")
 61 | 
 62 |     LINK_LABEL = Regex.new("^\\[(?:[^\\\\\\[\\]]|" + ESCAPED_CHAR_STRING + "|\\\\){0,}\\]")
 63 | 
 64 |     LINK_DESTINATION_BRACES = Regex.new("^(?:[<](?:[^<>\\t\\n\\\\\\x00]|" + ESCAPED_CHAR_STRING + ")*[>])")
 65 | 
 66 |     # A valid domain name is:
 67 |     #
 68 |     # segments of alphanumeric characters, underscores (_) and hyphens (-)
 69 |     # separated by periods (.). There must be at least one period, and no
 70 |     # underscores may be present in the last two segments of the domain.
 71 |     #
 72 |     # Alphanumeric characters in this context include emojis.
 73 |     LAST_DOMAIN_SEGMENT   = /(?:[a-zA-Z0-9\-\p{Emoji_Presentation}\-]+)/
 74 |     OTHER_DOMAIN_SEGMENTS = /(?:[a-zA-Z0-9\p{Emoji_Presentation}\-_]+)/
 75 |     # The spec wants to capture greedily, even invalid domain names and then
 76 |     # reject the invalid ones later.
 77 |     # For example: www.xxx._yyy.zzz is never linked because of the
 78 |     # _ in the last segment.
 79 |     DOMAIN_NAME       = /(?:#{OTHER_DOMAIN_SEGMENTS}\.)*#{OTHER_DOMAIN_SEGMENTS}/
 80 |     VALID_DOMAIN_NAME = /^(?:#{OTHER_DOMAIN_SEGMENTS}\.)*(?:#{LAST_DOMAIN_SEGMENT}\.)+#{LAST_DOMAIN_SEGMENT}$/
 81 |     VALID_URL_PATH    = /(?:\/[^\s<]*)?/
 82 | 
 83 |     AUTOLINK_PROTOCOLS = /^(?:http|https|ftp):\/\//
 84 | 
 85 |     EMAIL_AUTO_LINK          = /^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/
 86 |     EXTENDED_EMAIL_AUTO_LINK = /^([a-zA-Z0-9][a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)[-_]*/
 87 |     AUTO_LINK                = /^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>/i
 88 |     WWW_AUTO_LINK            = /^www\.#{DOMAIN_NAME}#{VALID_URL_PATH}/
 89 |     XMPP_AUTO_LINK           = /^xmpp:[A-Za-z0-9]+@#{DOMAIN_NAME}#{VALID_URL_PATH}/
 90 |     MAILTO_AUTO_LINK         = /^mailto:[A-Za-z0-9]+@#{DOMAIN_NAME}/
 91 |     PROTOCOL_AUTO_LINK       = /#{AUTOLINK_PROTOCOLS}#{DOMAIN_NAME}#{VALID_URL_PATH}[^\s?!.,:*_~]/
 92 | 
 93 |     WHITESPACE_CHAR = /^[ \t\n\x0b\x0c\x0d]/
 94 |     WHITESPACE      = /[ \t\n\x0b\x0c\x0d]+/
 95 |     LINE_ENDING     = /\n|\x0d|\x0d\n/
 96 |     PUNCTUATION     = /[$+<=>^`|~\p{P}]/ # Regex.new("[!"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]")
 97 | 
 98 |     UNSAFE_PROTOCOL      = /^javascript:|vbscript:|file:|data:/i
 99 |     UNSAFE_DATA_PROTOCOL = /^data:image\/(?:png|gif|jpeg|webp)/i
100 | 
101 |     CODE_INDENT = 4
102 | 
103 |     GFM_DISALLOWED_HTML_TAGS = %w[title textarea style xmp iframe noembed noframes script plaintext]
104 | 
105 |     TABLE_HEADING_SEPARATOR = /^(\|?\s*:{0,1}-:{0,1}+\s*)+(\||\s*)$/
106 |     TABLE_CELL_SEPARATOR    = /(?<!\\)\|/
107 | 
108 |     ADMONITION_START = /^> \[!((?:NOTE|TIP|IMPORTANT|CAUTION|WARNING)+)](\s*.*)?$/
109 | 
110 |     # Match Value
111 |     #
112 |     # - None: no match
113 |     # - Container: match container, keep going
114 |     # - Leaf: match leaf, no more block starts
115 |     enum MatchValue
116 |       None
117 |       Container
118 |       Leaf
119 |     end
120 | 
121 |     # match and parse
122 |     abstract def match(parser : Parser, container : Node) : MatchValue
123 | 
124 |     # token finalize
125 |     abstract def token(parser : Parser, container : Node) : Nil
126 | 
127 |     # continue
128 |     abstract def continue(parser : Parser, container : Node) : ContinueStatus
129 | 
130 |     enum ContinueStatus
131 |       Continue
132 |       Stop
133 |       Return
134 |     end
135 | 
136 |     # accepts_line
137 |     abstract def accepts_lines? : Bool
138 | 
139 |     private def space_or_tab?(char : Char?) : Bool
140 |       char == ' ' || char == '\t'
141 |     end
142 |   end
143 | end
144 | 
145 | require "./rules/*"
146 | 


--------------------------------------------------------------------------------
/src/markd/parsers/block.cr:
--------------------------------------------------------------------------------
  1 | module Markd::Parser
  2 |   class Block
  3 |     include Parser
  4 | 
  5 |     def self.parse(source : String, options = Options.new)
  6 |       new(options).parse(source)
  7 |     end
  8 | 
  9 |     RULES = {
 10 |       Node::Type::Document      => Rule::Document.new,
 11 |       Node::Type::BlockQuote    => Rule::BlockQuote.new,
 12 |       Node::Type::Alert         => Rule::BlockQuote.new, # Alerts and BlockQuotes are the same
 13 |       Node::Type::Heading       => Rule::Heading.new,
 14 |       Node::Type::CodeBlock     => Rule::CodeBlock.new,
 15 |       Node::Type::HTMLBlock     => Rule::HTMLBlock.new,
 16 |       Node::Type::ThematicBreak => Rule::ThematicBreak.new,
 17 |       Node::Type::List          => Rule::List.new,
 18 |       Node::Type::Item          => Rule::Item.new,
 19 |       Node::Type::Paragraph     => Rule::Paragraph.new,
 20 |       Node::Type::Table         => Rule::Table.new,
 21 |     }
 22 | 
 23 |     property! tip : Node?
 24 |     property offset, column
 25 | 
 26 |     getter line, current_line, blank, inline_lexer,
 27 |       indent, indented, next_nonspace, refmap
 28 | 
 29 |     delegate gfm?, tagfilter?, to: @options
 30 | 
 31 |     def initialize(@options : Options)
 32 |       @inline_lexer = Inline.new(@options)
 33 | 
 34 |       @document = Node.new(Node::Type::Document)
 35 |       @tip = @document
 36 |       @oldtip = @tip
 37 |       @last_matched_container = @tip
 38 | 
 39 |       @line = ""
 40 | 
 41 |       @current_line = 0
 42 |       @offset = 0
 43 |       @column = 0
 44 |       @last_line_length = 0
 45 | 
 46 |       @next_nonspace = 0
 47 |       @next_nonspace_column = 0
 48 | 
 49 |       @indent = 0
 50 |       @indented = false
 51 |       @blank = false
 52 |       @partially_consumed_tab = false
 53 |       @all_closed = true
 54 |       @refmap = {} of String => Hash(String, String) | String
 55 |     end
 56 | 
 57 |     def parse(source : String)
 58 |       Utils.timer("block parsing", @options.time?) do
 59 |         parse_blocks(source)
 60 |       end
 61 | 
 62 |       Utils.timer("inline parsing", @options.time?) do
 63 |         process_inlines
 64 |       end
 65 | 
 66 |       @document
 67 |     end
 68 | 
 69 |     private def parse_blocks(source)
 70 |       lines_size = 0
 71 |       source.each_line do |line|
 72 |         process_line(line)
 73 |         lines_size += 1
 74 |       end
 75 | 
 76 |       # ignore last blank line created by final newline
 77 |       lines_size -= 1 if source.ends_with?('\n')
 78 | 
 79 |       while (tip = tip?)
 80 |         token(tip, lines_size)
 81 |       end
 82 |     end
 83 | 
 84 |     private def process_line(line : String)
 85 |       container = @document
 86 |       @oldtip = tip
 87 |       @offset = 0
 88 |       @column = 0
 89 |       @blank = false
 90 |       @partially_consumed_tab = false
 91 |       @current_line += 1
 92 | 
 93 |       line = line.gsub(Char::ZERO, '\u{FFFD}')
 94 |       @line = line
 95 | 
 96 |       while (last_child = container.last_child?) && last_child.open?
 97 |         container = last_child
 98 | 
 99 |         find_next_nonspace
100 | 
101 |         case RULES[container.type].continue(self, container)
102 |         when Rule::ContinueStatus::Continue
103 |           # we've matched, keep going
104 |         when Rule::ContinueStatus::Stop
105 |           # we've failed to match a block
106 |           # back up to last matching block
107 |           container = container.parent
108 |           break
109 |         when Rule::ContinueStatus::Return
110 |           # we've hit end of line for fenced code close and can return
111 |           @last_line_length = line.size
112 |           return
113 |         end
114 |       end
115 | 
116 |       @all_closed = (container == @oldtip)
117 |       @last_matched_container = container
118 | 
119 |       matched_leaf = !container.type.paragraph? && RULES[container.type].accepts_lines?
120 | 
121 |       while !matched_leaf
122 |         find_next_nonspace
123 | 
124 |         # this is a little performance optimization
125 |         unless @indented
126 |           first_char = @line[@next_nonspace]?
127 |           unless first_char && (Rule::MAYBE_SPECIAL.includes?(first_char) || first_char.ascii_number? || @line.match(Rule::TABLE_CELL_SEPARATOR))
128 |             advance_next_nonspace
129 |             break
130 |           end
131 |         end
132 | 
133 |         matched = RULES.each_value do |rule|
134 |           case rule.match(self, container)
135 |           when Rule::MatchValue::Container
136 |             container = tip
137 |             break true
138 |           when Rule::MatchValue::Leaf
139 |             container = tip
140 |             matched_leaf = true
141 |             break true
142 |           else
143 |             false
144 |           end
145 |         end
146 | 
147 |         # nothing matched
148 |         unless matched
149 |           advance_next_nonspace
150 |           break
151 |         end
152 |       end
153 | 
154 |       if !@all_closed && !@blank && tip.type.paragraph?
155 |         # lazy paragraph continuation
156 |         add_line
157 |       else
158 |         # not a lazy continuation
159 |         close_unmatched_blocks
160 |         if @blank && (last_child = container.last_child?)
161 |           last_child.last_line_blank = true
162 |         end
163 | 
164 |         container_type = container.type
165 |         last_line_blank = @blank &&
166 |                           !(container_type.block_quote? ||
167 |                             (container_type.code_block? && container.fenced?) ||
168 |                             (container_type.item? && !container.first_child? && container.source_pos[0][0] == @current_line))
169 | 
170 |         cont = container
171 |         while cont
172 |           cont.last_line_blank = last_line_blank
173 |           cont = cont.parent?
174 |         end
175 | 
176 |         if RULES[container_type].accepts_lines?
177 |           add_line
178 | 
179 |           # if HtmlBlock, check for end condition
180 |           if container_type.html_block? && match_html_block?(container)
181 |             token(container, @current_line)
182 |           end
183 |         elsif @offset < line.size && !@blank
184 |           # create paragraph container for line
185 |           add_child(Node::Type::Paragraph, @offset)
186 |           advance_next_nonspace
187 |           add_line
188 |         end
189 | 
190 |         @last_line_length = line.size
191 |       end
192 | 
193 |       nil
194 |     end
195 | 
196 |     private def process_inlines
197 |       walker = @document.walker
198 |       @inline_lexer.refmap = @refmap
199 |       while (event = walker.next)
200 |         node, entering = event
201 |         if !entering && (node.type.paragraph? || node.type.heading? || node.type.table_cell?)
202 |           @inline_lexer.parse(node)
203 |         end
204 |       end
205 | 
206 |       nil
207 |     end
208 | 
209 |     def token(container : Node, line_number : Int32)
210 |       container_parent = container.parent?
211 | 
212 |       container.open = false
213 |       container.source_pos = {
214 |         container.source_pos[0],
215 |         {line_number, @last_line_length},
216 |       }
217 |       RULES[container.type].token(self, container)
218 | 
219 |       @tip = container_parent
220 | 
221 |       nil
222 |     end
223 | 
224 |     private def add_line
225 |       if @partially_consumed_tab
226 |         @offset += 1 # skip over tab
227 |         # add space characters
228 |         chars_to_tab = Rule::CODE_INDENT - (@column % 4)
229 |         tip.text += " " * chars_to_tab
230 |       end
231 | 
232 |       tip.text += @line[@offset..-1] + "\n"
233 | 
234 |       nil
235 |     end
236 | 
237 |     def add_child(type : Node::Type, offset : Int32) : Node
238 |       while !RULES[tip.type].can_contain?(type)
239 |         token(tip, @current_line - 1)
240 |       end
241 | 
242 |       column_number = offset + 1 # offset 0 = column 1
243 | 
244 |       node = Node.new(type)
245 |       node.source_pos = { {@current_line, column_number}, {0, 0} }
246 |       node.text = ""
247 |       tip.append_child(node)
248 |       @tip = node
249 | 
250 |       node
251 |     end
252 | 
253 |     def close_unmatched_blocks
254 |       unless @all_closed
255 |         while (oldtip = @oldtip) && oldtip != @last_matched_container
256 |           parent = oldtip.parent?
257 |           token(oldtip, @current_line - 1)
258 |           @oldtip = parent
259 |         end
260 |         @all_closed = true
261 |       end
262 |       nil
263 |     end
264 | 
265 |     private def find_next_nonspace
266 |       offset = @offset
267 |       column = @column
268 | 
269 |       if @line.empty?
270 |         @blank = true
271 |       else
272 |         while (char = @line[offset]?)
273 |           case char
274 |           when ' '
275 |             offset += 1
276 |             column += 1
277 |           when '\t'
278 |             offset += 1
279 |             column += (4 - (column % 4))
280 |           else
281 |             break
282 |           end
283 |         end
284 | 
285 |         @blank = {nil, '\n', '\r'}.includes?(char)
286 |       end
287 | 
288 |       @next_nonspace = offset
289 |       @next_nonspace_column = column
290 |       @indent = @next_nonspace_column - @column
291 |       @indented = @indent >= Rule::CODE_INDENT
292 | 
293 |       nil
294 |     end
295 | 
296 |     def advance_offset(count : Int32, columns = false)
297 |       line = @line
298 |       while count > 0 && (char = line[@offset]?)
299 |         if char == '\t'
300 |           chars_to_tab = Rule::CODE_INDENT - (@column % 4)
301 |           if columns
302 |             @partially_consumed_tab = chars_to_tab > count
303 |             chars_to_advance = chars_to_tab > count ? count : chars_to_tab
304 |             @column += chars_to_advance
305 |             @offset += @partially_consumed_tab ? 0 : 1
306 |             count -= chars_to_advance
307 |           else
308 |             @partially_consumed_tab = false
309 |             @column += chars_to_tab
310 |             @offset += 1
311 |             count -= 1
312 |           end
313 |         else
314 |           @partially_consumed_tab = false
315 |           @column += 1 # assume ascii; block starts are ascii
316 |           @offset += 1
317 |           count -= 1
318 |         end
319 |       end
320 | 
321 |       nil
322 |     end
323 | 
324 |     def advance_next_nonspace
325 |       @offset = @next_nonspace
326 |       @column - @next_nonspace_column
327 |       @partially_consumed_tab = false
328 | 
329 |       nil
330 |     end
331 | 
332 |     private def match_html_block?(container : Node)
333 |       if (block_type = container.data["html_block_type"])
334 |         block_type = block_type.as(Int32)
335 |         block_type >= 0 && block_type <= 4 && Rule::HTML_BLOCK_CLOSE[block_type].match(@line[@offset..-1])
336 |       else
337 |         false
338 |       end
339 |     end
340 |   end
341 | end
342 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # markd
  2 | 
  3 | [![Language](https://img.shields.io/badge/language-crystal-776791.svg)](https://github.com/crystal-lang/crystal)
  4 | [![Tag](https://img.shields.io/github/tag/icyleaf/markd.svg)](https://github.com/icyleaf/markd/blob/master/CHANGELOG.md)
  5 | [![Build Status](https://img.shields.io/circleci/project/github/icyleaf/markd/master.svg?style=flat)](https://circleci.com/gh/icyleaf/markd)
  6 | 
  7 | 
  8 | **THIS PROJECT IS LOOKING FOR MAINTAINER**
  9 | 
 10 | Unfortunately, the maintainer no longer has the time and/or resources to work on markd further. This means that bugs will not be fixed and features will not be added unless someone else does so. 
 11 | 
 12 | If you're interested in fixing up markd, please [file an issue](https://github.com/icyleaf/markd/issues/new) let me know.
 13 | 
 14 | <hr />
 15 | 
 16 | Yet another markdown parser built for speed, written in [Crystal](https://crystal-lang.org), Compliant to [CommonMark](http://spec.commonmark.org) specification (`v0.29`). Copy from [commonmark.js](https://github.com/jgm/commonmark.js).
 17 | 
 18 | ## Installation
 19 | 
 20 | Add this to your application's `shard.yml`:
 21 | 
 22 | ```yaml
 23 | dependencies:
 24 |   markd:
 25 |     github: icyleaf/markd
 26 | ```
 27 | 
 28 | ## Quick start
 29 | 
 30 | ```crystal
 31 | require "markd"
 32 | 
 33 | markdown = <<-MD
 34 | # Hello Markd
 35 | 
 36 | > Yet another markdown parser built for speed, written in Crystal, Compliant to CommonMark specification.
 37 | MD
 38 | 
 39 | html = Markd.to_html(markdown)
 40 | ```
 41 | 
 42 | Also here are options to configure the parse and render.
 43 | 
 44 | ```crystal
 45 | options = Markd::Options.new(smart: true, safe: true)
 46 | Markd.to_html(markdown, options)
 47 | ```
 48 | 
 49 | ## Options
 50 | 
 51 | | Name        | Type   | Default value | Description                                                                                                                                                                     |
 52 | | ----------- | ------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 53 | | time        | `Bool` | false         | render parse cost time during read source, parse blocks, parse inline.                                                                                                          |
 54 | | smart       | `Bool` | false         | if **true**, straight quotes will be made curly,<br />`--` will be changed to an en dash,<br />`---` will be changed to an em dash, and<br />`...` will be changed to ellipses. |
 55 | | source_pos  | `Bool` | false         | if **true**, source position information for block-level elements<br />will be rendered in the data-sourcepos attribute (for HTML)                                              |
 56 | | safe        | `Bool` | false         | if **true**, raw HTML will not be passed through to HTML output (it will be replaced by comments)                                                                               |
 57 | | prettyprint | `Bool` | false         | if **true**, code tags generated by code blocks will have a `prettyprint` class added to them, to be used by [Google code-prettify](https://github.com/google/code-prettify).   |
 58 | | gfm         | `Bool` | false         | **Partial support**                                                                                                                                                         |
 59 | | autolink    | `Bool` | false         | if **true**, more autolinks are detected, like bare email addresses or http links                                             |
 60 | | toc         | `Bool` | false         | **Not supported for now**                                                                                                                                                         |
 61 | | base_url    | `URI?` | nil           | if not **nil**, relative URLs of links are resolved against this `URI`. It act's like HTML's `<base href="base_url">` in the context of a Markdown document.                    |
 62 | 
 63 | ## Advanced
 64 | 
 65 | If you want to use a custom renderer, it can!
 66 | 
 67 | ```crystal
 68 | 
 69 | class CustomRenderer < Markd::Renderer
 70 | 
 71 |   def strong(node, entering)
 72 |   end
 73 | 
 74 |   # more methods following in render.
 75 | end
 76 | 
 77 | options = Markd::Options.new(time: true)
 78 | document = Markd::Parser.parse(markdown, options)
 79 | renderer = CustomRenderer.new(options)
 80 | 
 81 | html = renderer.render(document)
 82 | ```
 83 | 
 84 | ## Use tartrazine shards to render code block.
 85 | 
 86 | Added and require [tartrazine](https://github.com/ralsina/tartrazine) before markd will use it to render code block.
 87 | 
 88 | By default, it use formatter like following:
 89 | 
 90 | ```crystal
 91 | formatter = Tartrazine::Html.new(
 92 |   theme: Tartrazine.theme("catppuccin-macchiato"),
 93 |   line_numbers: true,
 94 |   standalone: true,
 95 | )
 96 | ```
 97 | 
 98 | You can passing a formatter instead.
 99 | 
100 | e.g.
101 | 
102 | ```crystal
103 | require "tartrazine" # require it before markd
104 | require "markd"
105 | 
106 | formatter = Tartrazine::Html.new(
107 |   theme: Tartrazine.theme("emacs"),
108 |   
109 |   # Disable print line number
110 |   line_numbers: false,
111 |   
112 |   # Set standalone to false for better performace.
113 |   #
114 |   # You need generate css file use `bin/tartrazine -f html -t "emacs" --css`, 
115 |   # then link it in you site.
116 |   standalone: false,
117 | )
118 | 
119 | html = Markd.to_html(markdown,formatter: formatter)
120 | ```
121 | 
122 | If you don't care about the formatter config, you can just passing a string instead.
123 | 
124 | ```crystal
125 | require "tartrazine" # require it before markd
126 | require "markd"
127 | 
128 | html = Markd.to_html(markdown, formatter: "emacs")
129 | ```
130 | 
131 | 
132 | Currently Tartrazine supports 247 languages and [331 themes](https://github.com/ralsina/tartrazine/tree/main/styles), you can retrieve the supported languages use `Tartrazine::LEXERS_BY_NAME.values.uniq.sort`, for now the result is:
133 | 
134 | ```crystal
135 | [
136 |   "LiquidLexer", "VelocityLexer", 
137 |   
138 |   "abap", "abnf", "actionscript", "actionscript_3", "ada", "agda", "al", "alloy", "angular2", 
139 |   "antlr",   "apacheconf", "apl", "applescript", "arangodb_aql", "arduino", "armasm", 
140 |   "autohotkey", "autoit", "awk", 
141 |   
142 |   "ballerina", "bash", "bash_session", "batchfile", "bbcode", "bibtex", "bicep", "blitzbasic", 
143 |   "bnf", "bqn", "brainfuck", 
144 |   
145 |   "c", "c#", "c++", "cap_n_proto", "cassandra_cql", "ceylon", "cfengine3", "cfstatement", 
146 |   "chaiscript", "chapel",   "cheetah", "clojure", "cmake", "cobol", "coffeescript", 
147 |   "common_lisp", "coq", "crystal", "css", "cue", "cython", 
148 |   
149 |   "d", "dart", "dax", "desktop_entry", "diff", "django_jinja", "dns", "docker", "dtd", "dylan", 
150 |   
151 |   "ebnf", "elixir", "elm", "emacslisp", "erlang", 
152 |   
153 |   "factor", "fennel", "fish", "forth", "fortran", "fortranfixed", "fsharp", 
154 |   
155 |   "gas", "gdscript", "gdscript3", "gherkin", "gleam", "glsl", "gnuplot", "go_template", 
156 |   "graphql", "groff", "groovy", 
157 |   
158 |   "handlebars", "hare", "haskell", "hcl", "hexdump", "hlb", "hlsl", "holyc", "html", "hy", 
159 |   
160 |   "idris", "igor", "ini", "io", "iscdhcpd", 
161 |   
162 |   "j", "java", "javascript", "json", "jsonata", "julia", "jungle", 
163 |   
164 |   "kotlin", 
165 |   
166 |   "lighttpd_configuration_file", "llvm", "lua", 
167 |   
168 |   "makefile", "mako", "markdown", "mason", "materialize_sql_dialect", "mathematica", "matlab", 
169 |   "mcfunction", "meson", "metal", "minizinc", "mlir", "modula-2", "moinwiki", "monkeyc", 
170 |   "morrowindscript", "myghty", "mysql", 
171 |   
172 |   "nasm", "natural", "ndisasm", "newspeak", "nginx_configuration_file", "nim", "nix", 
173 |   
174 |   "objective-c", "objectpascal", "ocaml", "octave", "odin", "onesenterprise", "openedge_abl", 
175 |   "openscad", "org_mode", 
176 |   
177 |   "pacmanconf", "perl", "php", "pig", "pkgconfig", "pl_pgsql", "plaintext", "plutus_core", 
178 |   "pony", "postgresql_sql_dialect", "postscript", "povray", "powerquery", "powershell", 
179 |   "prolog", "promela", "promql", "properties", "protocol_buffer",   "prql", "psl", "puppet", 
180 |   "python", "python_2", 
181 |   
182 |   "qbasic", "qml", 
183 |   
184 |   "r", "racket", "ragel", "react", "reasonml", "reg", "rego", "rexx", "rpm_spec", "rst", 
185 |   "ruby", "rust", 
186 |   
187 |   "sas", "sass", "scala", "scheme", "scilab", "scss", "sed", "sieve", "smali", "smalltalk", 
188 |   "smarty", "snobol", "solidity", "sourcepawn", "sparql", "sql", "squidconf", "standard_ml", 
189 |   "stas", "stylus", "swift", "systemd", "systemverilog", 
190 |   
191 |   "tablegen", "tal", "tasm", "tcl", "tcsh", "termcap", "terminfo", "terraform", "tex", 
192 |   "thrift",  "toml", "tradingview", "transact-sql", "turing", "turtle", "twig", "typescript", 
193 |   "typoscript", "typoscriptcssdata", "typoscripthtmldata", 
194 |   
195 |   "ucode", 
196 |   
197 |   "v", "v_shell", "vala", "vb_net", "verilog", "vhdl", "vhs", "viml", "vue", "wdte", 
198 |   
199 |   "webgpu_shading_language", "whiley", 
200 |   
201 |   "xml", "xorg", 
202 |   
203 |   "yaml", "yang", "z80_assembly", 
204 |   
205 |   "zed", "zig"
206 | ]
207 | ```
208 | 
209 | For details usage, check [tartrazine](https://github.com/ralsina/tartrazine) documents.
210 | 
211 | ## Performance
212 | 
213 | Here is the result of [a sample markdown file](benchmarks/source.md) parse at MacBook Pro Retina 2015 (2.2 GHz):
214 | 
215 | ```
216 | Crystal Markdown (no longer present)   3.28k (305.29µs) (± 0.92%)       fastest
217 |            Markd                       305.36 (  3.27ms) (± 5.52%) 10.73× slower
218 | ```
219 | 
220 | Recently, I'm working to compare the other popular commonmark parser, the code is stored in [benchmarks](/benchmarks).
221 | 
222 | ## How to Contribute
223 | 
224 | Your contributions are always welcome! Please submit a pull request or create an issue to add a new question, bug or feature to the list.
225 | 
226 | All [Contributors](https://github.com/icyleaf/markd/graphs/contributors) are on the wall.
227 | 
228 | ## You may also like
229 | 
230 | - [halite](https://github.com/icyleaf/halite) - HTTP Requests Client with a chainable REST API, built-in sessions and middlewares.
231 | - [totem](https://github.com/icyleaf/totem) - Load and parse a configuration file or string in JSON, YAML, dotenv formats.
232 | - [poncho](https://github.com/icyleaf/poncho) - A .env parser/loader improved for performance.
233 | - [popcorn](https://github.com/icyleaf/popcorn) - Easy and Safe casting from one type to another.
234 | - [fast-crystal](https://github.com/icyleaf/fast-crystal) - 💨 Writing Fast Crystal 😍 -- Collect Common Crystal idioms.
235 | 
236 | ## License
237 | 
238 | [MIT License](https://github.com/icyleaf/markd/blob/master/LICENSE) © icyleaf
239 | 


--------------------------------------------------------------------------------
/src/markd/renderers/html_renderer.cr:
--------------------------------------------------------------------------------
  1 | require "uri"
  2 | 
  3 | module Markd
  4 |   class HTMLRenderer < Renderer
  5 |     @disable_tag = 0
  6 |     @last_output = "\n"
  7 | 
  8 |     @strong_stack = 0
  9 | 
 10 |     HEADINGS = %w[h1 h2 h3 h4 h5 h6]
 11 | 
 12 |     def heading(node : Node, entering : Bool) : Nil
 13 |       tag_name = HEADINGS[node.data["level"].as(Int32) - 1]
 14 |       if entering
 15 |         newline
 16 |         tag(tag_name, attrs(node))
 17 |         toc(node) if @options.toc?
 18 |       else
 19 |         tag(tag_name, end_tag: true)
 20 |         newline
 21 |       end
 22 |     end
 23 | 
 24 |     def code(node : Node, entering : Bool) : Nil
 25 |       tag("code") do
 26 |         code_body(node)
 27 |       end
 28 |     end
 29 | 
 30 |     def code_body(node : Node)
 31 |       output(node.text)
 32 |     end
 33 | 
 34 |     def code_block(node : Node, entering : Bool, formatter : T?) : Nil forall T
 35 |       {% if @top_level.has_constant?("Tartrazine") %}
 36 |         render_code_block_use_tartrazine(node, formatter)
 37 |       {% else %}
 38 |         render_code_block_use_code_tag(node)
 39 |       {% end %}
 40 |     end
 41 | 
 42 |     def code_block_language(languages)
 43 |       languages.try(&.first?).try(&.strip.presence)
 44 |     end
 45 | 
 46 |     def code_block_body(node : Node, lang : String?) : Nil
 47 |       output(node.text)
 48 |     end
 49 | 
 50 |     def thematic_break(node : Node, entering : Bool) : Nil
 51 |       newline
 52 |       tag("hr", attrs(node), self_closing: true)
 53 |       newline
 54 |     end
 55 | 
 56 |     def block_quote(node : Node, entering : Bool) : Nil
 57 |       newline
 58 |       if entering
 59 |         tag("blockquote", attrs(node))
 60 |       else
 61 |         tag("blockquote", end_tag: true)
 62 |       end
 63 |       newline
 64 |     end
 65 | 
 66 |     def alert(node : Node, entering : Bool) : Nil
 67 |       newline
 68 |       if entering
 69 |         tag("div", {"class" => "alert alert-#{node.data["alert"].to_s.downcase}"})
 70 |         tag("p", {"class" => "alert-title"}) do
 71 |           output(node.data["title"].as(String))
 72 |         end
 73 |       else
 74 |         tag("div", end_tag: true)
 75 |       end
 76 |       newline
 77 |     end
 78 | 
 79 |     def table(node : Node, entering : Bool) : Nil
 80 |       has_body = node.data["has_body"]
 81 |       newline
 82 |       if entering
 83 |         tag("table", attrs(node))
 84 |       else
 85 |         if has_body
 86 |           tag("tbody", end_tag: true)
 87 |           newline
 88 |         end
 89 |         tag("table", end_tag: true)
 90 |       end
 91 |       newline
 92 |     end
 93 | 
 94 |     def table_row(node : Node, entering : Bool) : Nil
 95 |       newline
 96 |       is_heading = node.data["heading"]
 97 |       has_body = node.data["has_body"]
 98 |       if entering
 99 |         if is_heading
100 |           tag("thead")
101 |           newline
102 |         end
103 |         tag("tr", attrs(node))
104 |       else
105 |         tag("tr", end_tag: true)
106 |         newline
107 |         if is_heading
108 |           tag("thead", end_tag: true)
109 |           newline
110 |           if has_body
111 |             tag("tbody")
112 |             newline
113 |           end
114 |         end
115 |       end
116 |     end
117 | 
118 |     def table_cell(node : Node, entering : Bool) : Nil
119 |       tag_name = node.data["heading"] ? "th" : "td"
120 |       if !node.data["align"].to_s.empty?
121 |         attrs = {"align" => node.data["align"]}
122 |       else
123 |         attrs = {} of String => String
124 |       end
125 |       if entering
126 |         newline
127 |         tag(tag_name, attrs)
128 |       else
129 |         tag(tag_name, end_tag: true)
130 |         newline
131 |       end
132 |     end
133 | 
134 |     def list(node : Node, entering : Bool) : Nil
135 |       tag_name = node.data["type"] == "ordered" ? "ol" : "ul"
136 | 
137 |       newline
138 |       if entering
139 |         attrs = attrs(node)
140 | 
141 |         if (start = node.data["start"].as(Int32)) && start != 1
142 |           attrs ||= {} of String => String
143 |           attrs["start"] = start.to_s
144 |         end
145 | 
146 |         tag(tag_name, attrs)
147 |       else
148 |         tag(tag_name, end_tag: true)
149 |       end
150 |       newline
151 |     end
152 | 
153 |     def item(node : Node, entering : Bool) : Nil
154 |       if entering
155 |         tag("li", attrs(node))
156 | 
157 |         if node.data["type"] == "checkbox"
158 |           if node.data["checked"]?
159 |             attributes = {
160 |               "checked"  => "",
161 |               "disabled" => "",
162 |               "type"     => "checkbox",
163 |             }
164 |           else
165 |             attributes = {
166 |               "disabled" => "",
167 |               "type"     => "checkbox",
168 |             }
169 |           end
170 | 
171 |           tag("input", attributes)
172 |           literal(" ")
173 |         end
174 |       else
175 |         tag("li", end_tag: true)
176 |         newline
177 |       end
178 |     end
179 | 
180 |     def link(node : Node, entering : Bool) : Nil
181 |       if entering
182 |         attrs = attrs(node)
183 |         destination = node.data["destination"].as(String)
184 | 
185 |         unless @options.safe? && potentially_unsafe(destination)
186 |           attrs ||= {} of String => String
187 |           destination = resolve_uri(destination, node)
188 |           attrs["href"] = escape(destination)
189 |         end
190 | 
191 |         if (title = node.data["title"].as(String)) && !title.empty?
192 |           attrs ||= {} of String => String
193 |           attrs["title"] = escape(title)
194 |         end
195 | 
196 |         tag("a", attrs)
197 |       else
198 |         tag("a", end_tag: true)
199 |       end
200 |     end
201 | 
202 |     private def resolve_uri(destination, node)
203 |       base_url = @options.base_url
204 |       return destination unless base_url
205 | 
206 |       uri = URI.parse(destination)
207 |       return destination if uri.absolute?
208 | 
209 |       base_url.resolve(uri).to_s
210 |     end
211 | 
212 |     def image(node : Node, entering : Bool) : Nil
213 |       if entering
214 |         if @disable_tag == 0
215 |           destination = node.data["destination"].as(String)
216 |           if @options.safe? && potentially_unsafe(destination)
217 |             literal(%(<img src="" alt=""))
218 |           else
219 |             destination = resolve_uri(destination, node)
220 |             literal(%(<img src="#{escape(destination)}" alt="))
221 |           end
222 |         end
223 |         @disable_tag += 1
224 |       else
225 |         @disable_tag -= 1
226 |         if @disable_tag == 0
227 |           if (title = node.data["title"].as(String)) && !title.empty?
228 |             literal(%(" title="#{escape(title)}))
229 |           end
230 |           literal(%(" />))
231 |         end
232 |       end
233 |     end
234 | 
235 |     def html_block(node : Node, entering : Bool) : Nil
236 |       newline
237 |       content = @options.safe? ? "<!-- raw HTML omitted -->" : node.text
238 |       literal(content)
239 |       newline
240 |     end
241 | 
242 |     def html_inline(node : Node, entering : Bool) : Nil
243 |       content = @options.safe? ? "<!-- raw HTML omitted -->" : node.text
244 |       literal(content)
245 |     end
246 | 
247 |     def paragraph(node : Node, entering : Bool) : Nil
248 |       if (grand_parent = node.parent?.try &.parent?) && grand_parent.type.list?
249 |         return if grand_parent.data["tight"]
250 |       end
251 | 
252 |       if entering
253 |         newline
254 |         tag("p", attrs(node))
255 |       else
256 |         tag("p", end_tag: true)
257 |         newline
258 |       end
259 |     end
260 | 
261 |     def emphasis(node : Node, entering : Bool) : Nil
262 |       if entering
263 |         node.data["strong_stack"] = @strong_stack
264 |         @strong_stack = 0
265 |       end
266 | 
267 |       tag("em", end_tag: !entering)
268 | 
269 |       if !entering
270 |         @strong_stack = node.data["strong_stack"].as(Int32)
271 |       end
272 |     end
273 | 
274 |     def soft_break(node : Node, entering : Bool) : Nil
275 |       literal("\n")
276 |     end
277 | 
278 |     def line_break(node : Node, entering : Bool) : Nil
279 |       tag("br", self_closing: true)
280 |       newline
281 |     end
282 | 
283 |     def strong(node : Node, entering : Bool) : Nil
284 |       @strong_stack -= 1 if @options.gfm? && !entering
285 | 
286 |       tag("strong", end_tag: !entering) if @strong_stack == 0
287 | 
288 |       @strong_stack += 1 if @options.gfm? && entering
289 |     end
290 | 
291 |     def strikethrough(node : Node, entering : Bool) : Nil
292 |       tag("del", end_tag: !entering)
293 |     end
294 | 
295 |     def text(node : Node, entering : Bool) : Nil
296 |       output(node.text)
297 |     end
298 | 
299 |     private def tag(name : String, attrs = nil, self_closing = false, end_tag = false)
300 |       return if @disable_tag > 0
301 | 
302 |       @output_io << "<"
303 |       @output_io << "/" if end_tag
304 |       @output_io << name
305 |       attrs.try &.each do |key, value|
306 |         @output_io << ' ' << key << '=' << '"' << value << '"'
307 |       end
308 | 
309 |       @output_io << " /" if self_closing
310 |       @output_io << ">"
311 |       @last_output = ">"
312 |     end
313 | 
314 |     private def tag(name : String, attrs = nil, &)
315 |       tag(name, attrs)
316 |       yield
317 |       tag(name, end_tag: true)
318 |     end
319 | 
320 |     private def potentially_unsafe(url : String)
321 |       url.match(Rule::UNSAFE_PROTOCOL) && !url.match(Rule::UNSAFE_DATA_PROTOCOL)
322 |     end
323 | 
324 |     private def toc(node : Node)
325 |       return unless node.type.heading?
326 | 
327 |       {% if compare_versions(Crystal::VERSION, "1.2.0") < 0 %}
328 |         title = URI.encode(node.first_child.text)
329 |         @output_io << %(<a id="anchor-) << title << %(" class="anchor" href="#anchor-) << title << %("></a>)
330 |       {% else %}
331 |         title = URI.encode_path(node.first_child.text)
332 |         @output_io << %(<a id="anchor-) << title << %(" class="anchor" href="#anchor-) << title << %("></a>)
333 |       {% end %}
334 |       @last_output = ">"
335 |     end
336 | 
337 |     private def attrs(node : Node)
338 |       if @options.source_pos? && (pos = node.source_pos)
339 |         {"data-source-pos" => "#{pos[0][0]}:#{pos[0][1]}-#{pos[1][0]}:#{pos[1][1]}"}
340 |       end
341 |     end
342 | 
343 |     private def render_code_block_use_tartrazine(node : Node, formatter : Tartrazine::Formatter?)
344 |       languages = node.fence_language ? node.fence_language.split : nil
345 |       lang = code_block_language(languages)
346 | 
347 |       newline
348 | 
349 |       if lang
350 |         lexer = Tartrazine.lexer(lang)
351 | 
352 |         literal(formatter.format(node.text.chomp, lexer))
353 |       else
354 |         code_tag_attrs = attrs(node)
355 |         pre_tag_attrs = if @options.prettyprint?
356 |                           {"class" => "prettyprint"}
357 |                         end
358 | 
359 |         tag("pre", pre_tag_attrs) do
360 |           tag("code", code_tag_attrs) do
361 |             code_block_body(node, lang)
362 |           end
363 |         end
364 |       end
365 | 
366 |       newline
367 |     end
368 | 
369 |     private def render_code_block_use_code_tag(node : Node)
370 |       languages = node.fence_language ? node.fence_language.split : nil
371 |       code_tag_attrs = attrs(node)
372 |       pre_tag_attrs = if @options.prettyprint?
373 |                         {"class" => "prettyprint"}
374 |                       end
375 | 
376 |       lang = code_block_language(languages)
377 |       if lang
378 |         code_tag_attrs ||= {} of String => String
379 |         code_tag_attrs["class"] = "language-#{escape(lang)}"
380 |       end
381 | 
382 |       newline
383 |       tag("pre", pre_tag_attrs) do
384 |         tag("code", code_tag_attrs) do
385 |           code_block_body(node, lang)
386 |         end
387 |       end
388 |       newline
389 |     end
390 |   end
391 | end
392 | 


--------------------------------------------------------------------------------
/spec/fixtures/gfm-regression.txt:
--------------------------------------------------------------------------------
  1 | ### Regression tests
  2 | 
  3 | Issue #113: EOL character weirdness on Windows
  4 | (Important: first line ends with CR + CR + LF)
  5 | 
  6 | ```````````````````````````````` example
  7 | line1
  8 | 
  9 | line2
 10 | .
 11 | <p>line1</p>
 12 | <p>line2</p>
 13 | ````````````````````````````````
 14 | 
 15 | Issue #114: cmark skipping first character in line
 16 | (Important: the blank lines around "Repeatedly" contain a tab.)
 17 | 
 18 | ```````````````````````````````` example
 19 | By taking it apart
 20 | 
 21 | - alternative solutions
 22 | →
 23 | Repeatedly solving
 24 | →
 25 | - how techniques
 26 | .
 27 | <p>By taking it apart</p>
 28 | <ul>
 29 | <li>alternative solutions</li>
 30 | </ul>
 31 | <p>Repeatedly solving</p>
 32 | <ul>
 33 | <li>how techniques</li>
 34 | </ul>
 35 | ````````````````````````````````
 36 | 
 37 | Issue jgm/CommonMark#430:  h2..h6 not recognized as block tags.
 38 | 
 39 | ```````````````````````````````` example
 40 | <h1>lorem</h1>
 41 | 
 42 | <h2>lorem</h2>
 43 | 
 44 | <h3>lorem</h3>
 45 | 
 46 | <h4>lorem</h4>
 47 | 
 48 | <h5>lorem</h5>
 49 | 
 50 | <h6>lorem</h6>
 51 | .
 52 | <h1>lorem</h1>
 53 | <h2>lorem</h2>
 54 | <h3>lorem</h3>
 55 | <h4>lorem</h4>
 56 | <h5>lorem</h5>
 57 | <h6>lorem</h6>
 58 | ````````````````````````````````
 59 | 
 60 | Issue jgm/commonmark.js#109 - tabs after setext header line
 61 | 
 62 | 
 63 | ```````````````````````````````` example
 64 | hi
 65 | --→
 66 | .
 67 | <h2>hi</h2>
 68 | ````````````````````````````````
 69 | 
 70 | Issue #177 - incorrect emphasis parsing
 71 | 
 72 | ```````````````````````````````` example
 73 | a***b* c*
 74 | .
 75 | <p>a*<em><em>b</em> c</em></p>
 76 | ````````````````````````````````
 77 | 
 78 | Issue #193 - unescaped left angle brackets in link destination
 79 | 
 80 | ```````````````````````````````` example
 81 | [a]
 82 | 
 83 | [a]: <te<st>
 84 | .
 85 | <p>[a]</p>
 86 | <p>[a]: &lt;te<st></p>
 87 | ````````````````````````````````
 88 | 
 89 | Issue #192 - escaped spaces in link destination
 90 | 
 91 | 
 92 | ```````````````````````````````` example
 93 | [a](te\ st)
 94 | .
 95 | <p>[a](te\ st)</p>
 96 | ````````````````````````````````
 97 | 
 98 | Issue github/github#76615:  multiple delimiter combinations gets sketchy
 99 | 
100 | 
101 | ```````````````````````````````` example strikethrough
102 | ~~**_`this`_**~~  
103 | ~~***`this`***~~  
104 | ~~___`this`___~~
105 | 
106 | **_`this`_**  
107 | ***`this`***  
108 | ___`this`___
109 | 
110 | ~~**_this_**~~  
111 | ~~***this***~~  
112 | ~~___this___~~
113 | 
114 | **_this_**  
115 | ***this***  
116 | ___this___
117 | .
118 | <p><del><strong><em><code>this</code></em></strong></del><br />
119 | <del><em><strong><code>this</code></strong></em></del><br />
120 | <del><em><strong><code>this</code></strong></em></del></p>
121 | <p><strong><em><code>this</code></em></strong><br />
122 | <em><strong><code>this</code></strong></em><br />
123 | <em><strong><code>this</code></strong></em></p>
124 | <p><del><strong><em>this</em></strong></del><br />
125 | <del><em><strong>this</strong></em></del><br />
126 | <del><em><strong>this</strong></em></del></p>
127 | <p><strong><em>this</em></strong><br />
128 | <em><strong>this</strong></em><br />
129 | <em><strong>this</strong></em></p>
130 | ````````````````````````````````
131 | 
132 | Issue #527 - meta tags in inline contexts
133 | 
134 | ```````````````````````````````` example
135 | City:
136 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
137 |   <meta itemprop="name" content="Springfield">
138 | </span>
139 | .
140 | <p>City:
141 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
142 | <meta itemprop="name" content="Springfield">
143 | </span></p>
144 | ````````````````````````````````
145 | 
146 | cmark-gfm strikethrough rules
147 | 
148 | ```````````````````````````````` example strikethrough
149 | ~Hi~ Hello, world!
150 | .
151 | <p><del>Hi</del> Hello, world!</p>
152 | ````````````````````````````````
153 | 
154 | ```````````````````````````````` example strikethrough
155 | This ~text~ ~~is~~ ~~~curious~~~.
156 | .
157 | <p>This <del>text</del> <del>is</del> ~~~curious~~~.</p>
158 | ````````````````````````````````
159 | 
160 | `~` should not be escaped in href — https://github.com/github/markup/issues/311
161 | 
162 | ```````````````````````````````` example
163 | [x](http://members.aon.at/~nkehrer/ibm_5110/emu5110.html)
164 | .
165 | <p><a href="http://members.aon.at/~nkehrer/ibm_5110/emu5110.html">x</a></p>
166 | ````````````````````````````````
167 | 
168 | Footnotes in tables
169 | 
170 | ```````````````````````````````` example table footnotes pending
171 | A footnote in a paragraph[^1]
172 | 
173 | | Column1   | Column2 |
174 | | --------- | ------- |
175 | | foot [^1] | note    |
176 | 
177 | [^1]: a footnote
178 | .
179 | <p>A footnote in a paragraph<sup class="footnote-ref"><a href="#fn-1" id="fnref-1" data-footnote-ref>1</a></sup></p>
180 | <table>
181 | <thead>
182 | <tr>
183 | <th>Column1</th>
184 | <th>Column2</th>
185 | </tr>
186 | </thead>
187 | <tbody>
188 | <tr>
189 | <td>foot <sup class="footnote-ref"><a href="#fn-1" id="fnref-1-2" data-footnote-ref>1</a></sup></td>
190 | <td>note</td>
191 | </tr>
192 | </tbody>
193 | </table>
194 | <section class="footnotes" data-footnotes>
195 | <ol>
196 | <li id="fn-1">
197 | <p>a footnote <a href="#fnref-1" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a> <a href="#fnref-1-2" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1-2" aria-label="Back to reference 1-2">↩<sup class="footnote-ref">2</sup></a></p>
198 | </li>
199 | </ol>
200 | </section>
201 | ````````````````````````````````
202 | 
203 | Issue #527 - meta tags in inline contexts
204 | 
205 | ```````````````````````````````` example
206 | City:
207 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
208 |   <meta itemprop="name" content="Springfield">
209 | </span>
210 | .
211 | <p>City:
212 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
213 | <meta itemprop="name" content="Springfield">
214 | </span></p>
215 | ````````````````````````````````
216 | 
217 | Issue #530 - link parsing corner cases
218 | 
219 | ```````````````````````````````` example
220 | [a](\ b)
221 | 
222 | [a](<<b)
223 | 
224 | [a](<b
225 | )
226 | .
227 | <p>[a](\ b)</p>
228 | <p>[a](&lt;&lt;b)</p>
229 | <p>[a](&lt;b
230 | )</p>
231 | ````````````````````````````````
232 | 
233 | Issue commonmark#526 - unescaped ( in link title
234 | 
235 | ```````````````````````````````` example pending
236 | [link](url ((title))
237 | .
238 | <p>[link](url ((title))</p>
239 | ````````````````````````````````
240 | 
241 | Issue commonamrk#517 - script, pre, style close tag without
242 | opener.
243 | 
244 | ```````````````````````````````` example
245 | </script>
246 | 
247 | </pre>
248 | 
249 | </style>
250 | .
251 | </script>
252 | </pre>
253 | </style>
254 | ````````````````````````````````
255 | 
256 | Issue #289.
257 | 
258 | ```````````````````````````````` example
259 | [a](<b) c>
260 | .
261 | <p>[a](&lt;b) c&gt;</p>
262 | ````````````````````````````````
263 | 
264 | Pull request #128 - Buffer overread in tables extension
265 | 
266 | ```````````````````````````````` example table
267 | |
268 | -|
269 | .
270 | <p>|
271 | -|</p>
272 | ````````````````````````````````
273 | 
274 | Footnotes may be nested inside other footnotes.
275 | 
276 | ```````````````````````````````` example footnotes pending
277 | This is some text. It has a citation.[^citation]
278 | 
279 | [^another-citation]: My second citation.
280 | 
281 | [^citation]: This is a long winded parapgraph that also has another citation.[^another-citation]
282 | .
283 | <p>This is some text. It has a citation.<sup class="footnote-ref"><a href="#fn-citation" id="fnref-citation" data-footnote-ref>1</a></sup></p>
284 | <section class="footnotes" data-footnotes>
285 | <ol>
286 | <li id="fn-citation">
287 | <p>This is a long winded parapgraph that also has another citation.<sup class="footnote-ref"><a href="#fn-another-citation" id="fnref-another-citation" data-footnote-ref>2</a></sup> <a href="#fnref-citation" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p>
288 | </li>
289 | <li id="fn-another-citation">
290 | <p>My second citation. <a href="#fnref-another-citation" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p>
291 | </li>
292 | </ol>
293 | </section>
294 | ````````````````````````````````
295 | 
296 | Footnotes are similar to, but should not be confused with, link references
297 | 
298 | ```````````````````````````````` example footnotes pending
299 | This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references.
300 | 
301 | [^footnote1]: Hello.
302 | 
303 | [^footnote2]: Goodbye.
304 | .
305 | <p>This is some text. It has two footnotes references, side-by-side without any spaces,<sup class="footnote-ref"><a href="#fn-footnote1" id="fnref-footnote1" data-footnote-ref>1</a></sup><sup class="footnote-ref"><a href="#fn-footnote2" id="fnref-footnote2" data-footnote-ref>2</a></sup> which are definitely not link references.</p>
306 | <section class="footnotes" data-footnotes>
307 | <ol>
308 | <li id="fn-footnote1">
309 | <p>Hello. <a href="#fnref-footnote1" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p>
310 | </li>
311 | <li id="fn-footnote2">
312 | <p>Goodbye. <a href="#fnref-footnote2" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p>
313 | </li>
314 | </ol>
315 | </section>
316 | ````````````````````````````````
317 | 
318 | Footnotes may begin with or have a 'w' or a '_' in their reference label.
319 | 
320 | ```````````````````````````````` example footnotes autolink pending
321 | This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited]
322 | 
323 | It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789]
324 | 
325 | [^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this.
326 | 
327 | [^widely-cited]: this renders properly.
328 | .
329 | <p>This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a <code>w</code>.<sup class="footnote-ref"><a href="#fn-widely-cited" id="fnref-widely-cited" data-footnote-ref>1</a></sup></p>
330 | <p>It has another footnote that contains many different characters (the autolinker was also breaking on <code>_</code>).<sup class="footnote-ref"><a href="#fn-sphinx-of-black-quartz_judge-my-vow-0123456789" id="fnref-sphinx-of-black-quartz_judge-my-vow-0123456789" data-footnote-ref>2</a></sup></p>
331 | <section class="footnotes" data-footnotes>
332 | <ol>
333 | <li id="fn-widely-cited">
334 | <p>this renders properly. <a href="#fnref-widely-cited" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p>
335 | </li>
336 | <li id="fn-sphinx-of-black-quartz_judge-my-vow-0123456789">
337 | <p>so does this. <a href="#fnref-sphinx-of-black-quartz_judge-my-vow-0123456789" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p>
338 | </li>
339 | </ol>
340 | </section>
341 | ````````````````````````````````
342 | 
343 | Footnotes interacting with strikethrough should not lead to a use-after-free
344 | 
345 | ```````````````````````````````` example footnotes autolink strikethrough table pending
346 | |Tot.....[^_a_]|
347 | .
348 | <p>|Tot.....[^_a_]|</p>
349 | ````````````````````````````````
350 | 
351 | Footnotes interacting with strikethrough should not lead to a use-after-free pt2
352 | 
353 | ```````````````````````````````` example footnotes autolink strikethrough table pending
354 | [^~~is~~1]
355 | .
356 | <p>[^~~is~~1]</p>
357 | ````````````````````````````````
358 | 
359 | Adjacent unused footnotes definitions should not lead to a use after free
360 | 
361 | ```````````````````````````````` example footnotes autolink strikethrough table
362 | Hello world
363 | 
364 | 
365 | [^a]:[^b]:
366 | .
367 | <p>Hello world</p>
368 | ````````````````````````````````
369 | 
370 | Issue #424 - emphasis before links
371 | 
372 | ```````````````````````````````` example
373 | *text* [link](#section)
374 | .
375 | <p><em>text</em> <a href="#section">link</a></p>
376 | ````````````````````````````````
377 | 


--------------------------------------------------------------------------------
/spec/fixtures/gfm-extensions.txt:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Extensions test
  3 | author: Yuki Izumi
  4 | version: 0.1
  5 | date: '2016-08-31'
  6 | license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
  7 | ...
  8 | 
  9 | ## Tables
 10 | 
 11 | Here's a well-formed table, doing everything it should.
 12 | 
 13 | ```````````````````````````````` example
 14 | | abc | def |
 15 | | --- | --- |
 16 | | ghi | jkl |
 17 | | mno | pqr |
 18 | .
 19 | <table>
 20 | <thead>
 21 | <tr>
 22 | <th>abc</th>
 23 | <th>def</th>
 24 | </tr>
 25 | </thead>
 26 | <tbody>
 27 | <tr>
 28 | <td>ghi</td>
 29 | <td>jkl</td>
 30 | </tr>
 31 | <tr>
 32 | <td>mno</td>
 33 | <td>pqr</td>
 34 | </tr>
 35 | </tbody>
 36 | </table>
 37 | ````````````````````````````````
 38 | 
 39 | We're going to mix up the table now; we'll demonstrate that inline formatting
 40 | works fine, but block elements don't.  You can also have empty cells, and the
 41 | textual alignment of the columns is shown to be irrelevant.
 42 | 
 43 | ```````````````````````````````` example
 44 | Hello!
 45 | 
 46 | | _abc_ | セン |
 47 | | ----- | ---- |
 48 | | 1. Block elements inside cells don't work. | |
 49 | | But _**inline elements do**_. | x |
 50 | 
 51 | Hi!
 52 | .
 53 | <p>Hello!</p>
 54 | <table>
 55 | <thead>
 56 | <tr>
 57 | <th><em>abc</em></th>
 58 | <th>セン</th>
 59 | </tr>
 60 | </thead>
 61 | <tbody>
 62 | <tr>
 63 | <td>1. Block elements inside cells don't work.</td>
 64 | <td></td>
 65 | </tr>
 66 | <tr>
 67 | <td>But <em><strong>inline elements do</strong></em>.</td>
 68 | <td>x</td>
 69 | </tr>
 70 | </tbody>
 71 | </table>
 72 | <p>Hi!</p>
 73 | ````````````````````````````````
 74 | 
 75 | Here we demonstrate some edge cases about what is and isn't a table.
 76 | 
 77 | ```````````````````````````````` example
 78 | | Not enough table | to be considered table |
 79 | 
 80 | | Not enough table | to be considered table |
 81 | | Not enough table | to be considered table |
 82 | 
 83 | | Just enough table | to be considered table |
 84 | | ----------------- | ---------------------- |
 85 | 
 86 | | ---- | --- |
 87 | 
 88 | |x|
 89 | |-|
 90 | 
 91 | | xyz |
 92 | | --- |
 93 | .
 94 | <p>| Not enough table | to be considered table |</p>
 95 | <p>| Not enough table | to be considered table |
 96 | | Not enough table | to be considered table |</p>
 97 | <table>
 98 | <thead>
 99 | <tr>
100 | <th>Just enough table</th>
101 | <th>to be considered table</th>
102 | </tr>
103 | </thead>
104 | </table>
105 | <p>| ---- | --- |</p>
106 | <table>
107 | <thead>
108 | <tr>
109 | <th>x</th>
110 | </tr>
111 | </thead>
112 | </table>
113 | <table>
114 | <thead>
115 | <tr>
116 | <th>xyz</th>
117 | </tr>
118 | </thead>
119 | </table>
120 | ````````````````````````````````
121 | 
122 | A "simpler" table, GFM style:
123 | 
124 | ```````````````````````````````` example
125 | abc | def
126 | --- | ---
127 | xyz | ghi
128 | .
129 | <table>
130 | <thead>
131 | <tr>
132 | <th>abc</th>
133 | <th>def</th>
134 | </tr>
135 | </thead>
136 | <tbody>
137 | <tr>
138 | <td>xyz</td>
139 | <td>ghi</td>
140 | </tr>
141 | </tbody>
142 | </table>
143 | ````````````````````````````````
144 | 
145 | We are making the parser slighly more lax here. Here is a table with spaces at
146 | the end:
147 | 
148 | ```````````````````````````````` example
149 | Hello!
150 | 
151 | | _abc_ | セン |
152 | | ----- | ---- |
153 | | this row has a space at the end | | 
154 | | But _**inline elements do**_. | x |
155 | 
156 | Hi!
157 | .
158 | <p>Hello!</p>
159 | <table>
160 | <thead>
161 | <tr>
162 | <th><em>abc</em></th>
163 | <th>セン</th>
164 | </tr>
165 | </thead>
166 | <tbody>
167 | <tr>
168 | <td>this row has a space at the end</td>
169 | <td></td>
170 | </tr>
171 | <tr>
172 | <td>But <em><strong>inline elements do</strong></em>.</td>
173 | <td>x</td>
174 | </tr>
175 | </tbody>
176 | </table>
177 | <p>Hi!</p>
178 | ````````````````````````````````
179 | 
180 | Table alignment:
181 | 
182 | ```````````````````````````````` example
183 | aaa | bbb | ccc | ddd | eee
184 | :-- | --- | :-: | --- | --:
185 | fff | ggg | hhh | iii | jjj
186 | .
187 | <table>
188 | <thead>
189 | <tr>
190 | <th align="left">aaa</th>
191 | <th>bbb</th>
192 | <th align="center">ccc</th>
193 | <th>ddd</th>
194 | <th align="right">eee</th>
195 | </tr>
196 | </thead>
197 | <tbody>
198 | <tr>
199 | <td align="left">fff</td>
200 | <td>ggg</td>
201 | <td align="center">hhh</td>
202 | <td>iii</td>
203 | <td align="right">jjj</td>
204 | </tr>
205 | </tbody>
206 | </table>
207 | ````````````````````````````````
208 | 
209 | ### Table cell count mismatches
210 | 
211 | The header and delimiter row must match.
212 | 
213 | ```````````````````````````````` example
214 | | a | b | c |
215 | | --- | --- |
216 | | this | isn't | okay |
217 | .
218 | <p>| a | b | c |
219 | | --- | --- |
220 | | this | isn't | okay |</p>
221 | ````````````````````````````````
222 | 
223 | But any of the body rows can be shorter. Rows longer
224 | than the header are truncated.
225 | 
226 | ```````````````````````````````` example
227 | | a | b | c |
228 | | --- | --- | ---
229 | | x
230 | | a | b
231 | | 1 | 2 | 3 | 4 | 5 |
232 | .
233 | <table>
234 | <thead>
235 | <tr>
236 | <th>a</th>
237 | <th>b</th>
238 | <th>c</th>
239 | </tr>
240 | </thead>
241 | <tbody>
242 | <tr>
243 | <td>x</td>
244 | <td></td>
245 | <td></td>
246 | </tr>
247 | <tr>
248 | <td>a</td>
249 | <td>b</td>
250 | <td></td>
251 | </tr>
252 | <tr>
253 | <td>1</td>
254 | <td>2</td>
255 | <td>3</td>
256 | </tr>
257 | </tbody>
258 | </table>
259 | ````````````````````````````````
260 | 
261 | ### Embedded pipes
262 | 
263 | Tables with embedded pipes could be tricky.
264 | 
265 | ```````````````````````````````` example
266 | | a | b |
267 | | --- | --- |
268 | | Escaped pipes are \|okay\|. | Like \| this. |
269 | | Within `\|code\| is okay` too. |
270 | | _**`c\|`**_ \| complex
271 | | don't **\_reparse\_**
272 | .
273 | <table>
274 | <thead>
275 | <tr>
276 | <th>a</th>
277 | <th>b</th>
278 | </tr>
279 | </thead>
280 | <tbody>
281 | <tr>
282 | <td>Escaped pipes are |okay|.</td>
283 | <td>Like | this.</td>
284 | </tr>
285 | <tr>
286 | <td>Within <code>|code| is okay</code> too.</td>
287 | <td></td>
288 | </tr>
289 | <tr>
290 | <td><em><strong><code>c|</code></strong></em> | complex</td>
291 | <td></td>
292 | </tr>
293 | <tr>
294 | <td>don't <strong>_reparse_</strong></td>
295 | <td></td>
296 | </tr>
297 | </tbody>
298 | </table>
299 | ````````````````````````````````
300 | 
301 | ### Oddly-formatted markers
302 | 
303 | This shouldn't assert.
304 | 
305 | ```````````````````````````````` example
306 | | a |
307 | --- |
308 | .
309 | <table>
310 | <thead>
311 | <tr>
312 | <th>a</th>
313 | </tr>
314 | </thead>
315 | </table>
316 | ````````````````````````````````
317 | 
318 | ### Escaping
319 | 
320 | ```````````````````````````````` example
321 | | a | b |
322 | | --- | --- |
323 | | \\ | `\\` |
324 | | \\\\ | `\\\\` |
325 | | \_ | `\_` |
326 | | \| | `\|` |
327 | | \a | `\a` |
328 | 
329 | \\ `\\`
330 | 
331 | \\\\ `\\\\`
332 | 
333 | \_ `\_`
334 | 
335 | \| `\|`
336 | 
337 | \a `\a`
338 | .
339 | <table>
340 | <thead>
341 | <tr>
342 | <th>a</th>
343 | <th>b</th>
344 | </tr>
345 | </thead>
346 | <tbody>
347 | <tr>
348 | <td>\</td>
349 | <td><code>\\</code></td>
350 | </tr>
351 | <tr>
352 | <td>\\</td>
353 | <td><code>\\\\</code></td>
354 | </tr>
355 | <tr>
356 | <td>_</td>
357 | <td><code>\_</code></td>
358 | </tr>
359 | <tr>
360 | <td>|</td>
361 | <td><code>|</code></td>
362 | </tr>
363 | <tr>
364 | <td>\a</td>
365 | <td><code>\a</code></td>
366 | </tr>
367 | </tbody>
368 | </table>
369 | <p>\ <code>\\</code></p>
370 | <p>\\ <code>\\\\</code></p>
371 | <p>_ <code>\_</code></p>
372 | <p>| <code>\|</code></p>
373 | <p>\a <code>\a</code></p>
374 | ````````````````````````````````
375 | 
376 | ### Embedded HTML
377 | 
378 | ```````````````````````````````` example
379 | | a |
380 | | --- |
381 | | <strong>hello</strong> |
382 | | ok <br> sure |
383 | .
384 | <table>
385 | <thead>
386 | <tr>
387 | <th>a</th>
388 | </tr>
389 | </thead>
390 | <tbody>
391 | <tr>
392 | <td><strong>hello</strong></td>
393 | </tr>
394 | <tr>
395 | <td>ok <br> sure</td>
396 | </tr>
397 | </tbody>
398 | </table>
399 | ````````````````````````````````
400 | 
401 | ### Reference-style links
402 | 
403 | ```````````````````````````````` example
404 | Here's a link to [Freedom Planet 2][].
405 | 
406 | | Here's a link to [Freedom Planet 2][] in a table header. |
407 | | --- |
408 | | Here's a link to [Freedom Planet 2][] in a table row. |
409 | 
410 | [Freedom Planet 2]: http://www.freedomplanet2.com/
411 | .
412 | <p>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a>.</p>
413 | <table>
414 | <thead>
415 | <tr>
416 | <th>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table header.</th>
417 | </tr>
418 | </thead>
419 | <tbody>
420 | <tr>
421 | <td>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table row.</td>
422 | </tr>
423 | </tbody>
424 | </table>
425 | ````````````````````````````````
426 | 
427 | ### Sequential cells
428 | 
429 | ```````````````````````````````` example
430 | | a | b | c |
431 | | --- | --- | --- |
432 | | d || e |
433 | .
434 | <table>
435 | <thead>
436 | <tr>
437 | <th>a</th>
438 | <th>b</th>
439 | <th>c</th>
440 | </tr>
441 | </thead>
442 | <tbody>
443 | <tr>
444 | <td>d</td>
445 | <td></td>
446 | <td>e</td>
447 | </tr>
448 | </tbody>
449 | </table>
450 | ````````````````````````````````
451 | 
452 | ### Interaction with emphasis
453 | 
454 | ```````````````````````````````` example
455 | | a | b |
456 | | --- | --- |
457 | |***(a)***|
458 | .
459 | <table>
460 | <thead>
461 | <tr>
462 | <th>a</th>
463 | <th>b</th>
464 | </tr>
465 | </thead>
466 | <tbody>
467 | <tr>
468 | <td><em><strong>(a)</strong></em></td>
469 | <td></td>
470 | </tr>
471 | </tbody>
472 | </table>
473 | ````````````````````````````````
474 | 
475 | ### a table can be recognised when separated from a paragraph of text without an empty line
476 | 
477 | ```````````````````````````````` example
478 | 123
479 | 456
480 | | a | b |
481 | | ---| --- |
482 | d | e
483 | .
484 | <p>123
485 | 456</p>
486 | <table>
487 | <thead>
488 | <tr>
489 | <th>a</th>
490 | <th>b</th>
491 | </tr>
492 | </thead>
493 | <tbody>
494 | <tr>
495 | <td>d</td>
496 | <td>e</td>
497 | </tr>
498 | </tbody>
499 | </table>
500 | ````````````````````````````````
501 | 
502 | ## Strikethroughs
503 | 
504 | A well-formed strikethrough.
505 | 
506 | ```````````````````````````````` example
507 | A proper ~strikethrough~.
508 | .
509 | <p>A proper <del>strikethrough</del>.</p>
510 | ````````````````````````````````
511 | 
512 | Some strikethrough edge cases.
513 | 
514 | ```````````````````````````````` example
515 | These are ~not strikethroughs.
516 | 
517 | No, they are not~
518 | 
519 | This ~is ~ legit~ isn't ~ legit.
520 | 
521 | This is not ~~~~~one~~~~~ huge strikethrough.
522 | 
523 | ~one~ ~~two~~ ~~~three~~~
524 | 
525 | No ~mismatch~~
526 | .
527 | <p>These are ~not strikethroughs.</p>
528 | <p>No, they are not~</p>
529 | <p>This <del>is ~ legit</del> isn't ~ legit.</p>
530 | <p>This is not ~~~~~one~~~~~ huge strikethrough.</p>
531 | <p><del>one</del> <del>two</del> ~~~three~~~</p>
532 | <p>No ~mismatch~~</p>
533 | ````````````````````````````````
534 | 
535 | Using 200 tilde since it overflows the internal buffer
536 | size (100) for parsing delimiters in inlines.c
537 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~striked~
538 | 
539 | ## Autolinks
540 | 
541 | ```````````````````````````````` example autolink
542 | : http://google.com https://google.com
543 | 
544 | <http://google.com/å> http://google.com/å
545 | 
546 | scyther@pokemon.com
547 | 
548 | scy.the_rbe-edr+ill@pokemon.com
549 | 
550 | scyther@pokemon.com.
551 | 
552 | scyther@pokemon.com/
553 | 
554 | scyther@pokemon.com/beedrill@pokemon.com
555 | 
556 | mailto:scyther@pokemon.com
557 | 
558 | This is a mailto:scyther@pokemon.com
559 | 
560 | mailto:scyther@pokemon.com.
561 | 
562 | mailto:scyther@pokemon.com/
563 | 
564 | mailto:scyther@pokemon.com/message
565 | 
566 | mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com
567 | 
568 | xmpp:scyther@pokemon.com
569 | 
570 | xmpp:scyther@pokemon.com.
571 | 
572 | xmpp:scyther@pokemon.com/message
573 | 
574 | xmpp:scyther@pokemon.com/message.
575 | 
576 | Email me at:scyther@pokemon.com
577 | 
578 | www.github.com www.github.com/á
579 | 
580 | www.google.com/a_b
581 | 
582 | Underscores not allowed in host name www.xxx.yyy._zzz
583 | 
584 | Underscores not allowed in host name www.xxx._yyy.zzz
585 | 
586 | Underscores allowed in domain name www._xxx.yyy.zzz
587 | 
588 | **Autolink and http://inlines.com**
589 | 
590 | ![http://inline.com/image](http://inline.com/image)
591 | 
592 | a.w@b.c
593 | 
594 | Full stop outside parens shouldn't be included http://google.com/ok.
595 | 
596 | (Full stop inside parens shouldn't be included http://google.com/ok.)
597 | 
598 | "http://google.com"
599 | 
600 | 'http://google.com'
601 | 
602 | http://🍄.ga/ http://x🍄.ga/
603 | .
604 | <p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p>
605 | <p><a href="http://google.com/%C3%A5">http://google.com/å</a> <a href="http://google.com/%C3%A5">http://google.com/å</a></p>
606 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p>
607 | <p><a href="mailto:scy.the_rbe-edr+ill@pokemon.com">scy.the_rbe-edr+ill@pokemon.com</a></p>
608 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a>.</p>
609 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a>/</p>
610 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a>/<a href="mailto:beedrill@pokemon.com">beedrill@pokemon.com</a></p>
611 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a></p>
612 | <p>This is a <a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a></p>
613 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>.</p>
614 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>/</p>
615 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>/message</p>
616 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>/<a href="mailto:beedrill@pokemon.com">mailto:beedrill@pokemon.com</a></p>
617 | <p><a href="xmpp:scyther@pokemon.com">xmpp:scyther@pokemon.com</a></p>
618 | <p><a href="xmpp:scyther@pokemon.com">xmpp:scyther@pokemon.com</a>.</p>
619 | <p><a href="xmpp:scyther@pokemon.com/message">xmpp:scyther@pokemon.com/message</a></p>
620 | <p><a href="xmpp:scyther@pokemon.com/message">xmpp:scyther@pokemon.com/message</a>.</p>
621 | <p>Email me at:<a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p>
622 | <p><a href="http://www.github.com">www.github.com</a> <a href="http://www.github.com/%C3%A1">www.github.com/á</a></p>
623 | <p><a href="http://www.google.com/a_b">www.google.com/a_b</a></p>
624 | <p>Underscores not allowed in host name www.xxx.yyy._zzz</p>
625 | <p>Underscores not allowed in host name www.xxx._yyy.zzz</p>
626 | <p>Underscores allowed in domain name <a href="http://www._xxx.yyy.zzz">www._xxx.yyy.zzz</a></p>
627 | <p><strong>Autolink and <a href="http://inlines.com">http://inlines.com</a></strong></p>
628 | <p><img src="http://inline.com/image" alt="http://inline.com/image" /></p>
629 | <p><a href="mailto:a.w@b.c">a.w@b.c</a></p>
630 | <p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p>
631 | <p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p>
632 | <p>&quot;<a href="http://google.com">http://google.com</a>&quot;</p>
633 | <p>'<a href="http://google.com">http://google.com</a>'</p>
634 | <p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p>
635 | ````````````````````````````````
636 | 
637 | ```````````````````````````````` example pending
638 | mmmmailto:scyther@pokemon.com
639 | .
640 | <p>mmmmailto:<a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p>
641 | ````````````````````````````````
642 | 
643 | ```````````````````````````````` example
644 | This shouldn't crash everything: (_A_@_.A
645 | .
646 | <IGNORE>
647 | ````````````````````````````````
648 | 
649 | ```````````````````````````````` example
650 | These should not link:
651 | 
652 | * @a.b.c@. x
653 | * n@.  b
654 | .
655 | <p>These should not link:</p>
656 | <ul>
657 | <li>@a.b.c@. x</li>
658 | <li>n@.  b</li>
659 | </ul>
660 | ````````````````````````````````
661 | 
662 | ## HTML tag filter
663 | 
664 | 
665 | ```````````````````````````````` example tagfilter
666 | This is <xmp> not okay, but **this** <strong>is</strong>.
667 | 
668 | <p>This is <xmp> not okay, but **this** <strong>is</strong>.</p>
669 | 
670 | Nope, I won't have <textarea>.
671 | 
672 | <p>No <textarea> here either.</p>
673 | 
674 | <p>This <random /> <thing> is okay</thing> though.</p>
675 | 
676 | Yep, <totally>okay</totally>.
677 | 
678 | <!-- HTML comments are okay, though. -->
679 | <!- But we're strict. ->
680 | <! No nonsense. >
681 | <!-- Leave multiline comments the heck alone, though, okay?
682 | Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**.
683 | -->
684 | <!--- Support everything CommonMark's parser does. -->
685 | <!---->
686 | <!--thistoo-->
687 | .
688 | <p>This is &lt;xmp> not okay, but <strong>this</strong> <strong>is</strong>.</p>
689 | <p>This is &lt;xmp> not okay, but **this** <strong>is</strong>.</p>
690 | <p>Nope, I won't have &lt;textarea>.</p>
691 | <p>No &lt;textarea> here either.</p>
692 | <p>This <random /> <thing> is okay</thing> though.</p>
693 | <p>Yep, <totally>okay</totally>.</p>
694 | <!-- HTML comments are okay, though. -->
695 | <p>&lt;!- But we're strict. -&gt;
696 | &lt;! No nonsense. &gt;</p>
697 | <!-- Leave multiline comments the heck alone, though, okay?
698 | Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**.
699 | -->
700 | <!--- Support everything CommonMark's parser does. -->
701 | <!---->
702 | <!--thistoo-->
703 | ````````````````````````````````
704 | 
705 | ## Footnotes
706 | 
707 | ```````````````````````````````` example pending
708 | This is some text![^1]. Other text.[^footnote].
709 | 
710 | Here's a thing[^other-note].
711 | 
712 | And another thing[^codeblock-note].
713 | 
714 | This doesn't have a referent[^nope].
715 | 
716 | 
717 | [^other-note]:       no code block here (spaces are stripped away)
718 | 
719 | [^codeblock-note]:
720 |         this is now a code block (8 spaces indentation)
721 | 
722 | [^1]: Some *bolded* footnote definition.
723 | 
724 | Hi!
725 | 
726 | [^footnote]:
727 |     > Blockquotes can be in a footnote.
728 | 
729 |         as well as code blocks
730 | 
731 |     or, naturally, simple paragraphs.
732 | 
733 | [^unused]: This is unused.
734 | .
735 | <p>This is some text!<sup class="footnote-ref"><a href="#fn-1" id="fnref-1" data-footnote-ref>1</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn-footnote" id="fnref-footnote" data-footnote-ref>2</a></sup>.</p>
736 | <p>Here's a thing<sup class="footnote-ref"><a href="#fn-other-note" id="fnref-other-note" data-footnote-ref>3</a></sup>.</p>
737 | <p>And another thing<sup class="footnote-ref"><a href="#fn-codeblock-note" id="fnref-codeblock-note" data-footnote-ref>4</a></sup>.</p>
738 | <p>This doesn't have a referent[^nope].</p>
739 | <p>Hi!</p>
740 | <section class="footnotes" data-footnotes>
741 | <ol>
742 | <li id="fn-1">
743 | <p>Some <em>bolded</em> footnote definition. <a href="#fnref-1" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p>
744 | </li>
745 | <li id="fn-footnote">
746 | <blockquote>
747 | <p>Blockquotes can be in a footnote.</p>
748 | </blockquote>
749 | <pre><code>as well as code blocks
750 | </code></pre>
751 | <p>or, naturally, simple paragraphs. <a href="#fnref-footnote" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p>
752 | </li>
753 | <li id="fn-other-note">
754 | <p>no code block here (spaces are stripped away) <a href="#fnref-other-note" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="3" aria-label="Back to reference 3">↩</a></p>
755 | </li>
756 | <li id="fn-codeblock-note">
757 | <pre><code>this is now a code block (8 spaces indentation)
758 | </code></pre>
759 | <a href="#fnref-codeblock-note" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="4" aria-label="Back to reference 4">↩</a>
760 | </li>
761 | </ol>
762 | </section>
763 | ````````````````````````````````
764 | 
765 | ## When a footnote is used multiple times, we insert multiple backrefs.
766 | 
767 | ```````````````````````````````` example pending
768 | This is some text. It has a footnote[^a-footnote].
769 | 
770 | This footnote is referenced[^a-footnote] multiple times, in lots of different places.[^a-footnote]
771 | 
772 | [^a-footnote]: This footnote definition should have three backrefs.
773 | .
774 | <p>This is some text. It has a footnote<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote" data-footnote-ref>1</a></sup>.</p>
775 | <p>This footnote is referenced<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote-2" data-footnote-ref>1</a></sup> multiple times, in lots of different places.<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote-3" data-footnote-ref>1</a></sup></p>
776 | <section class="footnotes" data-footnotes>
777 | <ol>
778 | <li id="fn-a-footnote">
779 | <p>This footnote definition should have three backrefs. <a href="#fnref-a-footnote" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a> <a href="#fnref-a-footnote-2" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1-2" aria-label="Back to reference 1-2">↩<sup class="footnote-ref">2</sup></a> <a href="#fnref-a-footnote-3" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1-3" aria-label="Back to reference 1-3">↩<sup class="footnote-ref">3</sup></a></p>
780 | </li>
781 | </ol>
782 | </section>
783 | ````````````````````````````````
784 | 
785 | ## Footnote reference labels are href escaped
786 | 
787 | ```````````````````````````````` example pending
788 | Hello[^"><script>alert(1)</script>]
789 | 
790 | [^"><script>alert(1)</script>]: pwned
791 | .
792 | <p>Hello<sup class="footnote-ref"><a href="#fn-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" id="fnref-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" data-footnote-ref>1</a></sup></p>
793 | <section class="footnotes" data-footnotes>
794 | <ol>
795 | <li id="fn-%22%3E%3Cscript%3Ealert(1)%3C/script%3E">
796 | <p>pwned <a href="#fnref-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p>
797 | </li>
798 | </ol>
799 | </section>
800 | ````````````````````````````````
801 | 
802 | ## Interop
803 | 
804 | Autolink and strikethrough.
805 | 
806 | ```````````````````````````````` example autolink
807 | ~~www.google.com~~
808 | 
809 | ~~http://google.com~~
810 | .
811 | <p><del><a href="http://www.google.com">www.google.com</a></del></p>
812 | <p><del><a href="http://google.com">http://google.com</a></del></p>
813 | ````````````````````````````````
814 | 
815 | Autolink and tables.
816 | 
817 | ```````````````````````````````` example autolink
818 | | a | b |
819 | | --- | --- |
820 | | https://github.com www.github.com | http://pokemon.com |
821 | .
822 | <table>
823 | <thead>
824 | <tr>
825 | <th>a</th>
826 | <th>b</th>
827 | </tr>
828 | </thead>
829 | <tbody>
830 | <tr>
831 | <td><a href="https://github.com">https://github.com</a> <a href="http://www.github.com">www.github.com</a></td>
832 | <td><a href="http://pokemon.com">http://pokemon.com</a></td>
833 | </tr>
834 | </tbody>
835 | </table>
836 | ````````````````````````````````
837 | 
838 | ## Task lists
839 | 
840 | ```````````````````````````````` example
841 | - [ ] foo
842 | - [x] bar
843 | .
844 | <ul>
845 | <li><input disabled="" type="checkbox"> foo</li>
846 | <li><input checked="" disabled="" type="checkbox"> bar</li>
847 | </ul>
848 | ````````````````````````````````
849 | 
850 | Show that a task list and a regular list get processed the same in
851 | the way that sublists are created. If something works in a list
852 | item, then it should work the same way with a task.  The only
853 | difference should be the tasklist marker. So, if we use something
854 | other than a space or x, it won't be recognized as a task item, and
855 | so will be treated as a regular item.
856 | 
857 | ```````````````````````````````` example
858 | - [x] foo
859 |   - [ ] bar
860 |   - [x] baz
861 | - [ ] bim
862 | 
863 | Show a regular (non task) list to show that it has the same structure
864 | - [@] foo
865 |   - [@] bar
866 |   - [@] baz
867 | - [@] bim
868 | .
869 | <ul>
870 | <li><input checked="" disabled="" type="checkbox"> foo
871 | <ul>
872 | <li><input disabled="" type="checkbox"> bar</li>
873 | <li><input checked="" disabled="" type="checkbox"> baz</li>
874 | </ul>
875 | </li>
876 | <li><input disabled="" type="checkbox"> bim</li>
877 | </ul>
878 | <p>Show a regular (non task) list to show that it has the same structure</p>
879 | <ul>
880 | <li>[@] foo
881 | <ul>
882 | <li>[@] bar</li>
883 | <li>[@] baz</li>
884 | </ul>
885 | </li>
886 | <li>[@] bim</li>
887 | </ul>
888 | ````````````````````````````````
889 | Use a larger indent -- a task list and a regular list should produce
890 | the same structure.
891 | 
892 | ```````````````````````````````` example
893 | - [x] foo
894 |     - [ ] bar
895 |     - [x] baz
896 | - [ ] bim
897 | 
898 | Show a regular (non task) list to show that it has the same structure
899 | - [@] foo
900 |     - [@] bar
901 |     - [@] baz
902 | - [@] bim
903 | .
904 | <ul>
905 | <li><input checked="" disabled="" type="checkbox"> foo
906 | <ul>
907 | <li><input disabled="" type="checkbox"> bar</li>
908 | <li><input checked="" disabled="" type="checkbox"> baz</li>
909 | </ul>
910 | </li>
911 | <li><input disabled="" type="checkbox"> bim</li>
912 | </ul>
913 | <p>Show a regular (non task) list to show that it has the same structure</p>
914 | <ul>
915 | <li>[@] foo
916 | <ul>
917 | <li>[@] bar</li>
918 | <li>[@] baz</li>
919 | </ul>
920 | </li>
921 | <li>[@] bim</li>
922 | </ul>
923 | ````````````````````````````````
924 | 


--------------------------------------------------------------------------------
/src/markd/parsers/inline.cr:
--------------------------------------------------------------------------------
   1 | require "html"
   2 | require "uri"
   3 | 
   4 | module Markd::Parser
   5 |   class Inline
   6 |     include Parser
   7 | 
   8 |     property refmap
   9 |     private getter! brackets
  10 | 
  11 |     @delimiters : Delimiter?
  12 | 
  13 |     def initialize(@options : Options)
  14 |       @text = ""
  15 |       @pos = 0
  16 |       @refmap = {} of String => Hash(String, String) | String
  17 |     end
  18 | 
  19 |     def parse(node : Node)
  20 |       @pos = 0
  21 |       @delimiters = nil
  22 |       @text = node.text.strip
  23 | 
  24 |       loop do
  25 |         break unless process_line(node)
  26 |       end
  27 | 
  28 |       node.text = ""
  29 |       process_delimiters(nil)
  30 |     end
  31 | 
  32 |     private def process_line(node : Node)
  33 |       char = char_at?(@pos)
  34 |       return false unless char && char != Char::ZERO
  35 | 
  36 |       res = case char
  37 |             when '\n'
  38 |               newline(node)
  39 |             when '\\'
  40 |               backslash(node)
  41 |             when '`'
  42 |               backtick(node)
  43 |             when '*', '_'
  44 |               handle_delim(char, node)
  45 |             when '~'
  46 |               if @options.gfm?
  47 |                 handle_delim(char, node)
  48 |               else
  49 |                 string(node)
  50 |               end
  51 |             when '\'', '"'
  52 |               @options.smart? && handle_delim(char, node)
  53 |             when '['
  54 |               open_bracket(node)
  55 |             when '!'
  56 |               bang(node)
  57 |             when ']'
  58 |               close_bracket(node)
  59 |             when '<'
  60 |               auto_link(node) || html_tag(node)
  61 |             when 'w'
  62 |               # Catch www. autolinks for GFM
  63 |               # Do not match if it's http://www
  64 |               if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '/')
  65 |                 auto_link(node)
  66 |               else
  67 |                 false
  68 |               end
  69 |             when 'h'
  70 |               # Catch http:// and https:// autolinks for GFM
  71 |               # Do not match ![http:// ... because that was matched by '!']
  72 |               if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '[')
  73 |                 auto_link(node)
  74 |               else
  75 |                 false
  76 |               end
  77 |             when 'f'
  78 |               # Catch ftp:// autolinks for GFM
  79 |               # Do not match if it's <ftp:// ... because that was matched by '<'
  80 |               if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '<')
  81 |                 auto_link(node)
  82 |               else
  83 |                 false
  84 |               end
  85 |             when 'x'
  86 |               # Catch xmpp: autolinks for GFM
  87 |               if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '<')
  88 |                 auto_link(node)
  89 |               else
  90 |                 false
  91 |               end
  92 |             when 'm'
  93 |               # Catch mailto: autolinks for GFM
  94 |               if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '<')
  95 |                 auto_link(node)
  96 |               else
  97 |                 false
  98 |               end
  99 |             when '&'
 100 |               entity(node)
 101 |             when ':'
 102 |               emoji(node)
 103 |             else
 104 |               if @options.autolink? && node.text.includes? '@'
 105 |                 # Catch email autolinks for GFM
 106 |                 auto_link(node)
 107 |               else
 108 |                 string(node)
 109 |               end
 110 |             end
 111 | 
 112 |       unless res
 113 |         @pos += 1
 114 |         node.append_child(text(char))
 115 |       end
 116 | 
 117 |       true
 118 |     end
 119 | 
 120 |     private def newline(node : Node)
 121 |       @pos += 1 # assume we're at a \n
 122 |       last_child = node.last_child?
 123 |       # check previous node for trailing spaces
 124 |       if last_child && last_child.type.text? &&
 125 |          last_child.text.ends_with?(' ')
 126 |         hard_break = if last_child.text.size == 1
 127 |                        false # Must be space
 128 |                      else
 129 |                        last_child.text[-2]? == ' '
 130 |                      end
 131 |         last_child.text = last_child.text.rstrip ' '
 132 |         node.append_child(Node.new(hard_break ? Node::Type::LineBreak : Node::Type::SoftBreak))
 133 |       else
 134 |         node.append_child(Node.new(Node::Type::SoftBreak))
 135 |       end
 136 | 
 137 |       # gobble leading spaces in next line
 138 |       while char_at?(@pos) == ' '
 139 |         @pos += 1
 140 |       end
 141 | 
 142 |       true
 143 |     end
 144 | 
 145 |     private def backslash(node : Node)
 146 |       @pos += 1
 147 | 
 148 |       char = @pos < @text.bytesize ? char_at(@pos).to_s : nil
 149 |       child = if char_at?(@pos) == '\n'
 150 |                 @pos += 1
 151 |                 Node.new(Node::Type::LineBreak)
 152 |               elsif char && char.match(Rule::ESCAPABLE)
 153 |                 c = text(char)
 154 |                 @pos += 1
 155 |                 c
 156 |               else
 157 |                 text("\\")
 158 |               end
 159 | 
 160 |       node.append_child(child)
 161 | 
 162 |       true
 163 |     end
 164 | 
 165 |     private def backtick(node : Node)
 166 |       start_pos = @pos
 167 |       while char_at?(@pos) == '`'
 168 |         @pos += 1
 169 |       end
 170 |       return false if start_pos == @pos
 171 | 
 172 |       num_ticks = @pos - start_pos
 173 |       after_open_ticks = @pos
 174 |       while (text = match(Rule::TICKS))
 175 |         if text.bytesize == num_ticks
 176 |           child = Node.new(Node::Type::Code)
 177 |           child_text = @text.byte_slice(after_open_ticks, (@pos - num_ticks) - after_open_ticks).gsub(Rule::LINE_ENDING, " ")
 178 |           if child_text.bytesize >= 2 && child_text[0] == ' ' && child_text[-1] == ' ' && child_text.matches?(/[^ ]/)
 179 |             child_text = child_text.byte_slice(1, child_text.bytesize - 2)
 180 |           end
 181 |           child.text = child_text
 182 |           node.append_child(child)
 183 | 
 184 |           return true
 185 |         end
 186 |       end
 187 | 
 188 |       @pos = after_open_ticks
 189 |       node.append_child(text("`" * num_ticks))
 190 | 
 191 |       true
 192 |     end
 193 | 
 194 |     private def bang(node : Node)
 195 |       start_pos = @pos
 196 |       @pos += 1
 197 |       if char_at?(@pos) == '['
 198 |         @pos += 1
 199 |         child = text("![")
 200 |         node.append_child(child)
 201 | 
 202 |         add_bracket(child, start_pos + 1, true)
 203 |       else
 204 |         node.append_child(text("!"))
 205 |       end
 206 | 
 207 |       true
 208 |     end
 209 | 
 210 |     private def add_bracket(node : Node, index : Int32, image = false)
 211 |       brackets.bracket_after = true if brackets?
 212 |       @brackets = Bracket.new(node, @brackets, @delimiters, index, image, true)
 213 |     end
 214 | 
 215 |     private def remove_bracket
 216 |       @brackets = brackets.previous?
 217 |     end
 218 | 
 219 |     private def open_bracket(node : Node)
 220 |       start_pos = @pos
 221 |       @pos += 1
 222 | 
 223 |       child = text("[")
 224 |       node.append_child(child)
 225 | 
 226 |       add_bracket(child, start_pos, false)
 227 | 
 228 |       true
 229 |     end
 230 | 
 231 |     private def close_bracket(node : Node)
 232 |       title = ""
 233 |       dest = ""
 234 |       matched = false
 235 |       @pos += 1
 236 |       start_pos = @pos
 237 | 
 238 |       # get last [ or ![
 239 |       opener = @brackets
 240 |       unless opener
 241 |         # no matched opener, just return a literal
 242 |         node.append_child(text("]"))
 243 |         return true
 244 |       end
 245 | 
 246 |       unless opener.active?
 247 |         # no matched opener, just return a literal
 248 |         node.append_child(text("]"))
 249 |         # take opener off brackets stack
 250 |         remove_bracket
 251 |         return true
 252 |       end
 253 | 
 254 |       # If we got here, open is a potential opener
 255 |       is_image = opener.image?
 256 | 
 257 |       # Check to see if we have a link/image
 258 |       save_pos = @pos
 259 | 
 260 |       # Inline link?
 261 |       if char_at?(@pos) == '('
 262 |         @pos += 1
 263 |         if spnl && (dest = link_destination) &&
 264 |            spnl && (char_at?(@pos - 1).try(&.whitespace?) &&
 265 |            (title = link_title) || true) && spnl &&
 266 |            char_at?(@pos) == ')'
 267 |           @pos += 1
 268 |           matched = true
 269 |         else
 270 |           @pos = save_pos
 271 |         end
 272 |       end
 273 | 
 274 |       ref_label = nil
 275 |       unless matched
 276 |         # Next, see if there's a link label
 277 |         before_label = @pos
 278 |         label_size = link_label
 279 |         if label_size > 2
 280 |           ref_label = normalize_reference(@text.byte_slice(before_label, label_size + 1))
 281 |         elsif !opener.bracket_after?
 282 |           # Empty or missing second label means to use the first label as the reference.
 283 |           # The reference must not contain a bracket. If we know there's a bracket, we don't even bother checking it.
 284 |           byte_count = start_pos - opener.index
 285 |           ref_label = byte_count > 0 ? normalize_reference(@text.byte_slice(opener.index, byte_count)) : nil
 286 |         end
 287 | 
 288 |         if label_size == 0
 289 |           # If shortcut reference link, rewind before spaces we skipped.
 290 |           @pos = save_pos
 291 |         end
 292 | 
 293 |         if ref_label && @refmap[ref_label]?
 294 |           # lookup rawlabel in refmap
 295 |           link = @refmap[ref_label].as(Hash)
 296 |           dest = link["destination"] if link["destination"]
 297 |           title = link["title"] if link["title"]
 298 |           matched = true
 299 |         end
 300 |       end
 301 | 
 302 |       if matched
 303 |         child = Node.new(is_image ? Node::Type::Image : Node::Type::Link)
 304 |         child.data["destination"] = dest.not_nil!
 305 |         child.data["title"] = title || ""
 306 | 
 307 |         tmp = opener.node.next?
 308 |         while tmp
 309 |           next_node = tmp.next?
 310 |           tmp.unlink
 311 |           child.append_child(tmp)
 312 |           tmp = next_node
 313 |         end
 314 | 
 315 |         node.append_child(child)
 316 |         process_delimiters(opener.previous_delimiter)
 317 |         remove_bracket
 318 |         opener.node.unlink
 319 | 
 320 |         unless is_image
 321 |           opener = @brackets
 322 |           while opener
 323 |             opener.active = false unless opener.image?
 324 |             opener = opener.previous?
 325 |           end
 326 |         end
 327 |       else
 328 |         remove_bracket
 329 |         @pos = start_pos
 330 |         node.append_child(text("]"))
 331 |       end
 332 | 
 333 |       true
 334 |     end
 335 | 
 336 |     private def process_delimiters(delimiter : Delimiter?)
 337 |       # find first closer above stack_bottom:
 338 |       closer = @delimiters
 339 |       while closer
 340 |         previous = closer.previous?
 341 |         break if previous == delimiter
 342 |         closer = previous
 343 |       end
 344 | 
 345 |       if closer
 346 |         openers_bottom = {
 347 |           '_'  => delimiter,
 348 |           '*'  => delimiter,
 349 |           '\'' => delimiter,
 350 |           '"'  => delimiter,
 351 |         } of Char => Delimiter?
 352 | 
 353 |         openers_bottom['~'] = delimiter if @options.gfm?
 354 | 
 355 |         # move forward, looking for closers, and handling each
 356 |         while closer
 357 |           closer_char = closer.char
 358 | 
 359 |           unless closer.can_close?
 360 |             closer = closer.next?
 361 |             next
 362 |           end
 363 | 
 364 |           # found emphasis closer. now look back for first matching opener:
 365 |           opener = closer.previous?
 366 |           opener_found = false
 367 |           while opener && opener != delimiter && opener != openers_bottom[closer_char]
 368 |             odd_match = (closer.can_open? || opener.can_close?) &&
 369 |                         closer.orig_delims % 3 != 0 &&
 370 |                         (opener.orig_delims + closer.orig_delims) % 3 == 0
 371 |             if opener.char == closer.char && opener.can_open? && !odd_match
 372 |               opener_found = true
 373 |               break
 374 |             end
 375 |             opener = opener.previous?
 376 |           end
 377 |           opener = nil unless opener_found
 378 | 
 379 |           old_closer = closer
 380 | 
 381 |           case closer_char
 382 |           when '*', '_', '~'
 383 |             if closer_char != '~' || (closer_char == '~' && @options.gfm?)
 384 |               if opener
 385 |                 # calculate actual number of delimiters used from closer
 386 |                 use_delims = (closer.num_delims >= 2 && opener.num_delims >= 2) ? 2 : 1
 387 | 
 388 |                 if closer_char == '~' && (
 389 |                      closer.num_delims > 2 ||
 390 |                      opener.num_delims > 2 ||
 391 |                      closer.num_delims != opener.num_delims
 392 |                    )
 393 |                   closer = closer.next?
 394 |                   next
 395 |                 end
 396 | 
 397 |                 opener_inl = opener.node
 398 |                 closer_inl = closer.node
 399 | 
 400 |                 # remove used delimiters from stack elts and inlines
 401 |                 opener.num_delims -= use_delims
 402 |                 closer.num_delims -= use_delims
 403 | 
 404 |                 opener_inl.text = opener_inl.text[0..(-use_delims - 1)]
 405 |                 closer_inl.text = closer_inl.text[0..(-use_delims - 1)]
 406 | 
 407 |                 if closer_char == '~'
 408 |                   emph = Node.new(Node::Type::Strikethrough)
 409 |                 else
 410 |                   # build contents for new emph element
 411 |                   emph = Node.new((use_delims == 1) ? Node::Type::Emphasis : Node::Type::Strong)
 412 |                 end
 413 | 
 414 |                 tmp = opener_inl.next?
 415 |                 while tmp && tmp != closer_inl
 416 |                   next_node = tmp.next?
 417 |                   tmp.unlink
 418 |                   emph.append_child(tmp)
 419 |                   tmp = next_node
 420 |                 end
 421 | 
 422 |                 opener_inl.insert_after(emph)
 423 | 
 424 |                 # remove elts between opener and closer in delimiters stack
 425 |                 remove_delimiter_between(opener, closer)
 426 | 
 427 |                 # if opener has 0 delims, remove it and the inline
 428 |                 if opener.num_delims == 0
 429 |                   opener_inl.unlink
 430 |                   remove_delimiter(opener)
 431 |                 end
 432 | 
 433 |                 if closer.num_delims == 0
 434 |                   closer_inl.unlink
 435 |                   tmp_stack = closer.next?
 436 |                   remove_delimiter(closer)
 437 |                   closer = tmp_stack
 438 |                 end
 439 |               else
 440 |                 closer = closer.next?
 441 |               end
 442 |             end
 443 |           when '\''
 444 |             closer.node.text = "\u{2019}"
 445 |             if opener
 446 |               opener.node.text = "\u{2018}"
 447 |             end
 448 |             closer = closer.next?
 449 |           when '"'
 450 |             closer.node.text = "\u{201D}"
 451 |             if opener
 452 |               opener.node.text = "\u{201C}"
 453 |             end
 454 |             closer = closer.next?
 455 |           end
 456 | 
 457 |           if !opener && !odd_match
 458 |             openers_bottom[closer_char] = old_closer.previous?
 459 |             remove_delimiter(old_closer) if !old_closer.can_open?
 460 |           end
 461 |         end
 462 |       end
 463 | 
 464 |       # remove all delimiters
 465 |       while (curr_delimiter = @delimiters) && curr_delimiter != delimiter
 466 |         remove_delimiter(curr_delimiter)
 467 |       end
 468 |     end
 469 | 
 470 |     private def auto_link(node : Node)
 471 |       if (matched_text = match(Rule::EMAIL_AUTO_LINK))
 472 |         node.append_child(link(matched_text, true))
 473 |         return true
 474 |       elsif (matched_text = match(Rule::AUTO_LINK))
 475 |         node.append_child(link(matched_text, false))
 476 |         return true
 477 |       elsif @options.autolink?
 478 |         # These are all the extended autolinks from the
 479 |         # autolink extension
 480 | 
 481 |         if (matched_text = match(Rule::WWW_AUTO_LINK))
 482 |           clean_text = autolink_cleanup(matched_text)
 483 |           if clean_text.empty?
 484 |             node.append_child(text(matched_text))
 485 |           else
 486 |             _, post = @text.split(clean_text, 2)
 487 |             node.append_child(link(clean_text, false, true))
 488 |             node.append_child(text(post)) if post.size > 0 && matched_text != clean_text
 489 |           end
 490 |           return true
 491 |         elsif (matched_text = (
 492 |                 match(Rule::PROTOCOL_AUTO_LINK) ||
 493 |                 match(Rule::XMPP_AUTO_LINK) ||
 494 |                 match(Rule::MAILTO_AUTO_LINK)
 495 |               ))
 496 |           clean_text = autolink_cleanup(matched_text)
 497 |           if clean_text.empty?
 498 |             node.append_child(text(matched_text))
 499 |           else
 500 |             _, post = @text.split(clean_text, 2)
 501 |             node.append_child(link(clean_text, false, false))
 502 |             node.append_child(text(post)) if post.size > 0 && matched_text != clean_text
 503 |           end
 504 |           return true
 505 |         elsif (matched_text = match(Rule::EXTENDED_EMAIL_AUTO_LINK))
 506 |           # Emails that end in - or _ are declared not to be links by the spec:
 507 |           #
 508 |           # `.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at
 509 |           # the end of the email address, in which case it will not be considered part of
 510 |           # the address:
 511 | 
 512 |           # a.b-c_d@a.b_  => <p>a.b-c_d@a.b_</p>
 513 | 
 514 |           if "-_".includes?(matched_text[-1])
 515 |             node.append_child(text(matched_text))
 516 |           else
 517 |             node.append_child(link(matched_text, true, false))
 518 |           end
 519 |           return true
 520 |         end
 521 |       end
 522 | 
 523 |       false
 524 |     end
 525 | 
 526 |     private def html_tag(node : Node)
 527 |       if (text = match(Rule::HTML_TAG))
 528 |         child = Node.new(Node::Type::HTMLInline)
 529 | 
 530 |         if @options.tagfilter?
 531 |           text = Rule::HTMLBlock.escape_disallowed_html(text)
 532 |         end
 533 | 
 534 |         child.text = text
 535 |         node.append_child(child)
 536 |         true
 537 |       else
 538 |         false
 539 |       end
 540 |     end
 541 | 
 542 |     private def entity(node : Node)
 543 |       if char_at?(@pos) == '&'
 544 |         if char_at?(@pos + 1) == '#'
 545 |           text = match(Rule::NUMERIC_HTML_ENTITY) || return false
 546 |           text = text.byte_slice(1, text.bytesize - 2)
 547 |         else
 548 |           pos = @pos + 1
 549 |           loop do
 550 |             char = char_at?(pos)
 551 |             pos += 1
 552 |             case char
 553 |             when ';'
 554 |               break
 555 |             when Char::ZERO, nil
 556 |               return false
 557 |             end
 558 |           end
 559 |           text = @text.byte_slice((@pos + 1), (pos - 1) - (@pos + 1))
 560 |           @pos = pos
 561 |         end
 562 | 
 563 |         decoded_text = HTML.decode_entity text
 564 |         node.append_child(text(decoded_text))
 565 |         true
 566 |       else
 567 |         false
 568 |       end
 569 |     end
 570 | 
 571 |     private def emoji(node : Node)
 572 |       return false unless @options.emoji?
 573 | 
 574 |       if char_at?(@pos) == ':'
 575 |         pos = @pos + 1
 576 |         loop do
 577 |           char = char_at?(pos)
 578 |           pos += 1
 579 | 
 580 |           case char
 581 |           when ':'
 582 |             break
 583 |           when Char::ZERO, nil
 584 |             return false
 585 |           when 'a'..'z', 'A'..'Z', '0'..'9', '+', '-', '_'
 586 |             nil
 587 |           else
 588 |             return false
 589 |           end
 590 |         end
 591 | 
 592 |         text = @text.byte_slice((@pos + 1), (pos - 1) - (@pos + 1))
 593 |         if (emoji = EmojiEntities::EMOJI_MAPPINGS[text]?)
 594 |           @pos = pos
 595 |           node.append_child(text(emoji))
 596 | 
 597 |           true
 598 |         else
 599 |           false
 600 |         end
 601 |       else
 602 |         false
 603 |       end
 604 |     end
 605 | 
 606 |     private def string(node : Node)
 607 |       if (text = match_main)
 608 |         if @options.smart?
 609 |           text = text.gsub(Rule::ELLIPSIS, '\u{2026}')
 610 |             .gsub(Rule::DASH) do |chars|
 611 |               en_count = em_count = 0
 612 |               chars_length = chars.size
 613 | 
 614 |               if chars_length % 3 == 0
 615 |                 em_count = chars_length // 3
 616 |               elsif chars_length % 2 == 0
 617 |                 en_count = chars_length // 2
 618 |               elsif chars_length % 3 == 2
 619 |                 en_count = 1
 620 |                 em_count = (chars_length - 2) // 3
 621 |               else
 622 |                 en_count = 2
 623 |                 em_count = (chars_length - 4) // 3
 624 |               end
 625 | 
 626 |               "\u{2014}" * em_count + "\u{2013}" * en_count
 627 |             end
 628 |         end
 629 |         node.append_child(text(text))
 630 |         true
 631 |       else
 632 |         false
 633 |       end
 634 |     end
 635 | 
 636 |     private def link(match : String, email = false, add_proto = false) : Node
 637 |       dest = match.lstrip("<").rstrip(">")
 638 |       destination = email ? "mailto:#{dest}" : dest
 639 |       if add_proto
 640 |         destination = "http://#{destination}"
 641 |       end
 642 | 
 643 |       node = Node.new(Node::Type::Link)
 644 |       node.data["title"] = ""
 645 |       node.data["destination"] = normalize_uri(destination)
 646 |       node.append_child(text(dest))
 647 |       node
 648 |     end
 649 | 
 650 |     private def link_label
 651 |       text = match(Rule::LINK_LABEL)
 652 |       if text && text.size <= 1001 && (!text.ends_with?("\\]") || text[-3]? == '\\')
 653 |         text.bytesize - 1
 654 |       else
 655 |         0
 656 |       end
 657 |     end
 658 | 
 659 |     private def link_title
 660 |       title = match(Rule::LINK_TITLE)
 661 |       return unless title
 662 | 
 663 |       Utils.decode_entities_string(title[1..-2])
 664 |     end
 665 | 
 666 |     private def link_destination
 667 |       dest = if (text = match(Rule::LINK_DESTINATION_BRACES))
 668 |                text[1..-2]
 669 |              elsif char_at?(@pos) != '<'
 670 |                save_pos = @pos
 671 |                open_parens = 0
 672 |                while (char = char_at?(@pos))
 673 |                  case char
 674 |                  when '\\'
 675 |                    @pos += 1
 676 |                    match(Rule::ESCAPABLE)
 677 |                  when '('
 678 |                    @pos += 1
 679 |                    open_parens += 1
 680 |                  when ')'
 681 |                    break if open_parens < 1
 682 | 
 683 |                    @pos += 1
 684 |                    open_parens -= 1
 685 |                  when .ascii_whitespace?
 686 |                    break
 687 |                  else
 688 |                    @pos += 1
 689 |                  end
 690 |                end
 691 | 
 692 |                @text.byte_slice(save_pos, @pos - save_pos)
 693 |              end
 694 | 
 695 |       normalize_uri(Utils.decode_entities_string(dest)) if dest
 696 |     end
 697 | 
 698 |     private def handle_delim(char : Char, node : Node)
 699 |       res = scan_delims(char)
 700 |       return false unless res
 701 | 
 702 |       num_delims = res[:num_delims]
 703 |       start_pos = @pos
 704 |       @pos += num_delims
 705 |       text = case char
 706 |              when '\''
 707 |                "\u{2019}"
 708 |              when '"'
 709 |                "\u{201C}"
 710 |              else
 711 |                @text.byte_slice(start_pos, @pos - start_pos)
 712 |              end
 713 | 
 714 |       child = text(text)
 715 |       node.append_child(child)
 716 | 
 717 |       delimiter = Delimiter.new(char, num_delims, num_delims, child, @delimiters, nil, res[:can_open], res[:can_close])
 718 | 
 719 |       if (prev = delimiter.previous?)
 720 |         prev.next = delimiter
 721 |       end
 722 | 
 723 |       @delimiters = delimiter
 724 | 
 725 |       true
 726 |     end
 727 | 
 728 |     private def remove_delimiter(delimiter : Delimiter)
 729 |       if (prev = delimiter.previous?)
 730 |         prev.next = delimiter.next?
 731 |       end
 732 | 
 733 |       if (nxt = delimiter.next?)
 734 |         nxt.previous = delimiter.previous?
 735 |       else
 736 |         # top of stack
 737 |         @delimiters = delimiter.previous?
 738 |       end
 739 |     end
 740 | 
 741 |     private def remove_delimiter_between(bottom : Delimiter, top : Delimiter)
 742 |       if bottom.next? != top
 743 |         bottom.next = top
 744 |         top.previous = bottom
 745 |       end
 746 |     end
 747 | 
 748 |     private def scan_delims(char : Char)
 749 |       num_delims = 0
 750 |       start_pos = @pos
 751 |       if char == '\'' || char == '"'
 752 |         num_delims += 1
 753 |         @pos += 1
 754 |       else
 755 |         while char_at?(@pos) == char
 756 |           num_delims += 1
 757 |           @pos += 1
 758 |         end
 759 |       end
 760 | 
 761 |       return if num_delims == 0
 762 | 
 763 |       char_before = start_pos == 0 ? '\n' : previous_unicode_char_at(start_pos)
 764 |       char_after = unicode_char_at?(@pos) || '\n'
 765 | 
 766 |       # Match ASCII code 160 => \xA0 (See http://www.adamkoch.com/2009/07/25/white-space-and-character-160/)
 767 |       after_is_whitespace = char_after.ascii_whitespace? || char_after == '\u00A0'
 768 |       after_is_punctuation = !!char_after.to_s.match(Rule::PUNCTUATION)
 769 |       before_is_whitespace = char_before.ascii_whitespace? || char_after == '\u00A0'
 770 |       before_is_punctuation = !!char_before.to_s.match(Rule::PUNCTUATION)
 771 | 
 772 |       left_flanking = !after_is_whitespace &&
 773 |                       (!after_is_punctuation || before_is_whitespace || before_is_punctuation)
 774 |       right_flanking = !before_is_whitespace &&
 775 |                        (!before_is_punctuation || after_is_whitespace || after_is_punctuation)
 776 | 
 777 |       case char
 778 |       when '_'
 779 |         can_open = left_flanking && (!right_flanking || before_is_punctuation)
 780 |         can_close = right_flanking && (!left_flanking || after_is_punctuation)
 781 |       when '\'', '"'
 782 |         can_open = left_flanking && !right_flanking
 783 |         can_close = right_flanking
 784 |       else
 785 |         can_open = left_flanking
 786 |         can_close = right_flanking
 787 |       end
 788 | 
 789 |       @pos = start_pos
 790 | 
 791 |       {
 792 |         num_delims: num_delims,
 793 |         can_open:   can_open,
 794 |         can_close:  can_close,
 795 |       }
 796 |     end
 797 | 
 798 |     def reference(text : String, refmap)
 799 |       @text = text
 800 |       @pos = 0
 801 | 
 802 |       startpos = @pos
 803 |       match_chars = link_label
 804 | 
 805 |       # label
 806 |       return 0 if match_chars == 0
 807 |       raw_label = @text.byte_slice(0, match_chars + 1)
 808 | 
 809 |       # colon
 810 |       if char_at?(@pos) == ':'
 811 |         @pos += 1
 812 |       else
 813 |         @pos = startpos
 814 |         return 0
 815 |       end
 816 | 
 817 |       # link url
 818 |       spnl
 819 | 
 820 |       save_pos = @pos
 821 |       dest = link_destination
 822 | 
 823 |       if !dest || (dest.size == 0 && !(@pos == save_pos + 2 && @text.byte_slice(save_pos, 2) == "<>"))
 824 |         @pos = startpos
 825 |         return 0
 826 |       end
 827 | 
 828 |       before_title = @pos
 829 |       spnl
 830 |       if @pos != before_title
 831 |         title = link_title
 832 |       end
 833 | 
 834 |       unless title
 835 |         title = ""
 836 |         @pos = before_title
 837 |       end
 838 | 
 839 |       at_line_end = true
 840 |       unless space_at_end_of_line?
 841 |         if title.empty?
 842 |           at_line_end = false
 843 |         else
 844 |           title = ""
 845 |           @pos = before_title
 846 |           at_line_end = space_at_end_of_line?
 847 |         end
 848 |       end
 849 | 
 850 |       unless at_line_end
 851 |         @pos = startpos
 852 |         return 0
 853 |       end
 854 | 
 855 |       normal_label = normalize_reference(raw_label)
 856 |       if normal_label.empty?
 857 |         @pos = startpos
 858 |         return 0
 859 |       end
 860 | 
 861 |       unless refmap[normal_label]?
 862 |         refmap[normal_label] = {
 863 |           "destination" => dest,
 864 |           "title"       => title,
 865 |         }
 866 |       end
 867 | 
 868 |       @pos - startpos
 869 |     end
 870 | 
 871 |     private def space_at_end_of_line?
 872 |       while char_at?(@pos) == ' '
 873 |         @pos += 1
 874 |       end
 875 | 
 876 |       case char_at?(@pos)
 877 |       when '\n'
 878 |         @pos += 1
 879 |       when Char::ZERO
 880 |       else
 881 |         return false
 882 |       end
 883 | 
 884 |       true
 885 |     end
 886 | 
 887 |     # Parse zero or more space characters, including at most one newline
 888 |     private def spnl
 889 |       seen_newline = false
 890 |       while (c = char_at?(@pos))
 891 |         if !seen_newline && c == '\n'
 892 |           seen_newline = true
 893 |         elsif c != ' '
 894 |           break
 895 |         end
 896 | 
 897 |         @pos += 1
 898 |       end
 899 | 
 900 |       true
 901 |     end
 902 | 
 903 |     private def match(regex : Regex) : String?
 904 |       text = @text.byte_slice(@pos)
 905 |       if (match = text.match(regex))
 906 |         @pos += match.byte_end.not_nil!
 907 |         return match[0]
 908 |       end
 909 |     end
 910 | 
 911 |     # This function advances @pos as far as possible until it finds a
 912 |     # "special" character, such as '<', ']', or a special string (like a URL).
 913 |     #
 914 |     # Then it returns the chunk before it found that match, or in the case
 915 |     # of special strings, the chunk matched.
 916 | 
 917 |     private def match_main : String?
 918 |       start_pos = @pos
 919 |       while (char = char_at?(@pos))
 920 |         # If we detected a special string (like a URL), and it's
 921 |         # not the beggining of the string, we need to break right away.
 922 |         #
 923 |         # If we are at the beginning of the string, then we return
 924 |         # the chunk matched
 925 |         if @options.autolink?
 926 |           advance = special_string?(@text, @pos)
 927 |           if advance > 0
 928 |             if @pos > start_pos
 929 |               break
 930 |             else
 931 |               @pos += advance
 932 |               break
 933 |             end
 934 |           end
 935 |         end
 936 | 
 937 |         # If we detect a special character, we need to break
 938 |         break if !main_char?(char)
 939 |         @pos += 1
 940 |       end
 941 | 
 942 |       if start_pos == @pos
 943 |         nil
 944 |       else
 945 |         @text.byte_slice(start_pos, @pos - start_pos)
 946 |       end
 947 |     end
 948 | 
 949 |     # Identify "special" strings by matching against
 950 |     # regular expressions. It returns the number of characters
 951 |     # that were matched.
 952 | 
 953 |     private def special_string?(full_text : String, pos : Int) : Int
 954 |       text = full_text.byte_slice(pos)
 955 |       # All such recognized autolinks can only come at the beginning of
 956 |       # a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`,
 957 |       # and `(`.
 958 |       if pos > 0 && !("*_~( \n\t".includes? char_at(pos - 1))
 959 |         0
 960 |       elsif text.starts_with?("http://") || text.starts_with?("https://") || text.starts_with?("ftp://")
 961 |         # This should not be an autolink:
 962 |         # < ftp://example.com >
 963 |         if full_text[...pos].includes?("<") && full_text[...pos].matches?(/<\s*$/)
 964 |           return 0
 965 |         end
 966 | 
 967 |         m = autolink_cleanup(text.match(Rule::PROTOCOL_AUTO_LINK).to_s)
 968 |         m.size
 969 |       elsif text.starts_with?("www.") && text.matches?(Rule::WWW_AUTO_LINK)
 970 |         m = autolink_cleanup(text.match(Rule::WWW_AUTO_LINK).to_s)
 971 |         m.size
 972 |       elsif text.includes?("@") && text.matches?(Rule::EXTENDED_EMAIL_AUTO_LINK)
 973 |         # m = autolink_cleanup(text.match(Rule::EMAIL_AUTO_LINK).to_s)
 974 |         matched_text = text.match(Rule::EMAIL_AUTO_LINK).to_s
 975 | 
 976 |         # `.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at
 977 |         # the end of the email address, in which case it will not be considered part of
 978 |         # the address:
 979 | 
 980 |         if "-_".includes? char_at(pos + matched_text.size + 1)
 981 |           return 0
 982 |         end
 983 |         matched_text.size
 984 |       else
 985 |         0
 986 |       end
 987 |     end
 988 | 
 989 |     # These cleanups are defined in the spec
 990 | 
 991 |     private def autolink_cleanup(text : String) : String
 992 |       return text if text.empty?
 993 |       # When an autolink ends in `)`, we scan the entire autolink for the total number
 994 |       # of parentheses.  If there is a greater number of closing parentheses than
 995 |       # opening ones, we don't consider the unmatched trailing parentheses part of the
 996 |       # autolink, in order to facilitate including an autolink inside a parenthesis:
 997 |       while text.ends_with?(")") && text.count(")") != text.count("(")
 998 |         text = text[0..-2]
 999 |       end
1000 | 
1001 |       # Trailing punctuation (specifically, `?`, `!`, `.`, `,`, `:`, `*`, `_`, and `~`)
1002 |       # will not be considered part of the autolink, though they may be included in the
1003 |       # interior of the link
1004 |       while "\"'?!.,:*~_".includes?(text[-1])
1005 |         text = text[0..-2]
1006 |       end
1007 | 
1008 |       # If an autolink ends in a semicolon (`;`), we check to see if it appears to
1009 |       # resemble an [entity reference][entity references]; if the preceding text is `&`
1010 |       # followed by one or more alphanumeric characters.  If so, it is excluded from
1011 |       # the autolink:
1012 | 
1013 |       if text.ends_with?(";") && text.includes?("&")
1014 |         parts = text.split("&")
1015 |         if "&#{parts[-1]}".matches?(Rule::HTML_ENTITY)
1016 |           text = parts[0..-2].join("&")
1017 |         end
1018 |       end
1019 | 
1020 |       # If the autolink has a domain and the last component has a `_` then
1021 |       # it's invalid.
1022 |       if text.starts_with?("www.")
1023 |         uri = URI.parse("http://#{text}")
1024 |       else
1025 |         uri = URI.parse(text)
1026 |       end
1027 |       if uri.host && !uri.host.to_s.match(Rule::VALID_DOMAIN_NAME)
1028 |         text = ""
1029 |       end
1030 | 
1031 |       text
1032 |     end
1033 | 
1034 |     # This is the same as match(/^[^\n`\[\]\\!<&*_'":]+/m) but done manually (faster)
1035 |     private def main_char?(char)
1036 |       case char
1037 |       when '\n', '`', '[', ']', '\\', '!', '<', '&', '*', '_', '\'', '"', ':', 'w'
1038 |         false
1039 |       when '~'
1040 |         !@options.gfm?
1041 |       else
1042 |         true
1043 |       end
1044 |     end
1045 | 
1046 |     private def text(text) : Node
1047 |       node = Node.new(Node::Type::Text)
1048 |       node.text = text.to_s
1049 |       node
1050 |     end
1051 | 
1052 |     private def char_at?(byte_index)
1053 |       @text.byte_at?(byte_index).try &.unsafe_chr
1054 |     end
1055 | 
1056 |     private def char_at(byte_index)
1057 |       @text.byte_at(byte_index).unsafe_chr
1058 |     end
1059 | 
1060 |     private def previous_unicode_char_at(byte_index)
1061 |       reader = Char::Reader.new(@text, byte_index)
1062 |       reader.previous_char
1063 |     end
1064 | 
1065 |     private def unicode_char_at?(byte_index)
1066 |       if byte_index < @text.bytesize
1067 |         reader = Char::Reader.new(@text, byte_index)
1068 |         reader.current_char
1069 |       end
1070 |     end
1071 | 
1072 |     # Normalize reference label: collapse internal whitespace
1073 |     # to single space, remove leading/trailing whitespace, case fold.
1074 |     def normalize_reference(text : String)
1075 |       text[1..-2].strip.downcase.gsub("\n", " ")
1076 |     end
1077 | 
1078 |     private RESERVED_CHARS = ['&', '+', ',', '(', ')', '\'', '#', '*', '!', '#', '$', '/', ':', ';', '?', '@', '=']
1079 | 
1080 |     def normalize_uri(uri : String)
1081 |       String.build(capacity: uri.bytesize) do |io|
1082 |         URI.encode(decode_uri(uri), io) do |byte|
1083 |           URI.unreserved?(byte) || RESERVED_CHARS.includes?(byte.chr)
1084 |         end
1085 |       end
1086 |     end
1087 | 
1088 |     def decode_uri(text : String)
1089 |       decoded = URI.decode(text)
1090 |       if decoded.includes?('&') && decoded.includes?(';')
1091 |         decoded = decoded.gsub(/^&(\w+);$/) { |chars| HTML.decode_entities(chars) }
1092 |       end
1093 |       decoded
1094 |     end
1095 | 
1096 |     class Bracket
1097 |       property node : Node
1098 |       property! previous : Bracket?
1099 |       property previous_delimiter : Delimiter?
1100 |       property index : Int32
1101 |       property? image : Bool
1102 |       property? active : Bool
1103 |       property? bracket_after : Bool
1104 | 
1105 |       def initialize(@node, @previous, @previous_delimiter, @index, @image, @active = true)
1106 |         @bracket_after = false
1107 |       end
1108 |     end
1109 | 
1110 |     class Delimiter
1111 |       property char : Char
1112 |       property num_delims : Int32
1113 |       property orig_delims : Int32
1114 |       property node : Node
1115 |       property! previous : Delimiter?
1116 |       property! next : Delimiter?
1117 |       property? can_open : Bool
1118 |       property? can_close : Bool
1119 | 
1120 |       def initialize(@char, @num_delims, @orig_delims, @node,
1121 |                      @previous, @next, @can_open, @can_close)
1122 |       end
1123 |     end
1124 |   end
1125 | end
1126 | 


--------------------------------------------------------------------------------