├── src ├── markd │ ├── version.cr │ ├── parser.cr │ ├── rules │ │ ├── document.cr │ │ ├── paragraph.cr │ │ ├── thematic_break.cr │ │ ├── item.cr │ │ ├── block_quote.cr │ │ ├── html_block.cr │ │ ├── heading.cr │ │ ├── code_block.cr │ │ ├── table.cr │ │ └── list.cr │ ├── utils.cr │ ├── mappings │ │ ├── decode.cr │ │ └── legacy.cr │ ├── html_entities.cr │ ├── options.cr │ ├── node.cr │ ├── renderer.cr │ ├── rule.cr │ ├── parsers │ │ ├── block.cr │ │ └── inline.cr │ └── renderers │ │ └── html_renderer.cr └── markd.cr ├── .github ├── trafico.yml └── workflows │ ├── release.yml │ └── ci.yml ├── shard.yml ├── .gitignore ├── .vscode └── launch.json ├── .ameba.yml ├── spec ├── fixtures │ ├── emoji.txt │ ├── alert.txt │ ├── regression.txt │ ├── smart_punct.txt │ ├── gfm-regression.txt │ └── gfm-extensions.txt ├── markd_spec.cr ├── api_spec.cr └── spec_helper.cr ├── LICENSE ├── CHANGELOG.md └── README.md /src/markd/version.cr: -------------------------------------------------------------------------------- 1 | module Markd 2 | VERSION = "0.5.0" 3 | end 4 | -------------------------------------------------------------------------------- /.github/trafico.yml: -------------------------------------------------------------------------------- 1 | addWipLabel: true 2 | reviewers: 3 | icyleaf: 4 | name: "icyleaf" 5 | color: "#000000" 6 | -------------------------------------------------------------------------------- /shard.yml: -------------------------------------------------------------------------------- 1 | name: markd 2 | version: 0.5.0 3 | 4 | authors: 5 | - icyleaf 6 | 7 | crystal: ">= 0.36.1, < 2.0.0" 8 | 9 | license: MIT 10 | -------------------------------------------------------------------------------- /src/markd/parser.cr: -------------------------------------------------------------------------------- 1 | module Markd 2 | module Parser 3 | def self.parse(source : String, options = Options.new) 4 | Block.parse(source, options) 5 | end 6 | end 7 | end 8 | 9 | require "./parsers/*" 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /doc/ 2 | /lib/ 3 | /bin/ 4 | /.shards/ 5 | /src/main.cr 6 | 7 | # Libraries don't need dependency lock 8 | # Dependencies will be locked in application that uses them 9 | /shard.lock 10 | 11 | # vscode 12 | /.history/ 13 | /.vscode/settings.json 14 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "type": "lldb", 6 | "request": "launch", 7 | "name": "Launch", 8 | "program": "${workspaceRoot}/bin/main", 9 | "args": [], 10 | "cwd": "${workspaceRoot}" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /.ameba.yml: -------------------------------------------------------------------------------- 1 | Metrics/CyclomaticComplexity: 2 | Excluded: 3 | - spec/**/* 4 | - src/markd/utils.cr 5 | - src/markd/rules/heading.cr 6 | - src/markd/rules/list.cr 7 | - src/markd/parsers/inline.cr 8 | - src/markd/parsers/block.cr 9 | - src/markd/renderer.cr 10 | 11 | Naming/BlockParameterName: 12 | Enabled: false 13 | 14 | Style/ParenthesesAroundCondition: 15 | Enabled: true 16 | AllowSafeAssignment: true 17 | 18 | Lint/NotNil: 19 | Excluded: 20 | - src/markd/parsers/inline.cr 21 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Deploy new release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@master 14 | 15 | - name: Create Release 16 | id: create_release 17 | uses: actions/create-release@v1 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 20 | with: 21 | tag_name: ${{ github.ref }} 22 | release_name: Release ${{ github.ref }} 23 | draft: false 24 | prerelease: false 25 | 26 | -------------------------------------------------------------------------------- /src/markd/rules/document.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct Document 3 | include Rule 4 | 5 | def match(parser : Parser, container : Node) : MatchValue 6 | MatchValue::None 7 | end 8 | 9 | def continue(parser : Parser, container : Node) : ContinueStatus 10 | ContinueStatus::Continue 11 | end 12 | 13 | def token(parser : Parser, container : Node) : Nil 14 | # do nothing 15 | end 16 | 17 | def can_contain?(type : Node::Type) : Bool 18 | !type.item? 19 | end 20 | 21 | def accepts_lines? : Bool 22 | false 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /src/markd/utils.cr: -------------------------------------------------------------------------------- 1 | require "json" 2 | 3 | module Markd 4 | module Utils 5 | def self.timer(label : String, measure_time : Bool, &) 6 | return yield unless measure_time 7 | 8 | start_time = Time.utc 9 | yield 10 | 11 | puts "#{label}: #{(Time.utc - start_time).total_milliseconds}ms" 12 | end 13 | 14 | DECODE_ENTITIES_REGEX = Regex.new("\\\\" + Rule::ESCAPABLE_STRING, Regex::Options::IGNORE_CASE) 15 | 16 | def self.decode_entities_string(text : String) : String 17 | HTML.decode_entities(text).gsub(DECODE_ENTITIES_REGEX, &.[1].to_s) 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /src/markd/mappings/decode.cr: -------------------------------------------------------------------------------- 1 | module Markd::HTMLEntities 2 | DECODE_MAPPINGS = { 3 | 0 => 65533, 4 | 128 => 8364, 5 | 130 => 8218, 6 | 131 => 402, 7 | 132 => 8222, 8 | 133 => 8230, 9 | 134 => 8224, 10 | 135 => 8225, 11 | 136 => 710, 12 | 137 => 8240, 13 | 138 => 352, 14 | 139 => 8249, 15 | 140 => 338, 16 | 142 => 381, 17 | 145 => 8216, 18 | 146 => 8217, 19 | 147 => 8220, 20 | 148 => 8221, 21 | 149 => 8226, 22 | 150 => 8211, 23 | 151 => 8212, 24 | 152 => 732, 25 | 153 => 8482, 26 | 154 => 353, 27 | 155 => 8250, 28 | 156 => 339, 29 | 158 => 382, 30 | 159 => 376, 31 | } 32 | end 33 | -------------------------------------------------------------------------------- /spec/fixtures/emoji.txt: -------------------------------------------------------------------------------- 1 | ## Emoji 2 | 3 | ```````````````````````````````` example emoji 4 | :100: 5 | . 6 |

💯

7 | ```````````````````````````````` 8 | 9 | ```````````````````````````````` example emoji 10 | :gb: 11 | . 12 |

🇬🇧

13 | ```````````````````````````````` 14 | 15 | ```````````````````````````````` example emoji 16 | :people_holding_hands: 17 | . 18 |

🧑‍🤝‍🧑

19 | ```````````````````````````````` 20 | 21 | ```````````````````````````````` example emoji 22 | :scotland: 23 | . 24 |

🏴󠁧󠁢󠁳󠁣󠁴󠁿

25 | ```````````````````````````````` 26 | 27 | ```````````````````````````````` example emoji 28 | :emoji_doesnt_exist: 29 | . 30 |

:emoji_doesnt_exist:

31 | ```````````````````````````````` 32 | 33 | ```````````````````````````````` example emoji 34 | :family_man_boy_boy: 35 | . 36 |

👨‍👦‍👦

37 | ```````````````````````````````` 38 | -------------------------------------------------------------------------------- /src/markd/rules/paragraph.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct Paragraph 3 | include Rule 4 | 5 | def match(parser : Parser, container : Node) : MatchValue 6 | MatchValue::None 7 | end 8 | 9 | def continue(parser : Parser, container : Node) : ContinueStatus 10 | parser.blank ? ContinueStatus::Stop : ContinueStatus::Continue 11 | end 12 | 13 | def token(parser : Parser, container : Node) : Nil 14 | has_reference_defs = false 15 | 16 | while container.text[0]? == '[' && 17 | (pos = parser.inline_lexer.reference(container.text, parser.refmap)) && pos > 0 18 | container.text = container.text.byte_slice(pos) 19 | has_reference_defs = true 20 | end 21 | 22 | container.unlink if has_reference_defs && container.text.each_char.all? &.ascii_whitespace? 23 | end 24 | 25 | def can_contain?(type) 26 | false 27 | end 28 | 29 | def accepts_lines? : Bool 30 | true 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /spec/markd_spec.cr: -------------------------------------------------------------------------------- 1 | require "./spec_helper" 2 | 3 | # Commonmark spec examples 4 | describe_spec("fixtures/spec.txt") 5 | 6 | # Smart punctuation examples 7 | describe_spec("fixtures/smart_punct.txt", smart: true) 8 | 9 | # Regression examples 10 | describe_spec("fixtures/regression.txt") 11 | 12 | describe_spec("fixtures/emoji.txt") 13 | 14 | describe_spec("fixtures/gfm-spec.txt", gfm: true) 15 | 16 | describe_spec("fixtures/gfm-extensions.txt", gfm: true) 17 | 18 | describe_spec("fixtures/gfm-regression.txt", gfm: true) 19 | 20 | # Alert spec examples 21 | describe_spec("fixtures/alert.txt", gfm: true) 22 | 23 | describe Markd do 24 | # Thanks Ryan Westlund feedback via email. 25 | it "should escape unsafe html" do 26 | raw = %Q(```">\n```) 27 | html = %Q(
\n) 28 | 29 | Markd.to_html(raw).should eq(html) 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /spec/api_spec.cr: -------------------------------------------------------------------------------- 1 | require "spec" 2 | require "../src/markd" 3 | 4 | describe Markd::Options do 5 | describe "#base_url" do 6 | it "it disabled by default" do 7 | options = Markd::Options.new 8 | Markd.to_html("[foo](bar)", options).should eq %(

foo

\n) 9 | Markd.to_html("![](bar)", options).should eq %(

\n) 10 | end 11 | 12 | it "absolutizes relative urls" do 13 | options = Markd::Options.new 14 | options.base_url = URI.parse("http://example.com") 15 | Markd.to_html("[foo](bar)", options).should eq %(

foo

\n) 16 | Markd.to_html("[foo](https://example.com/baz)", options).should eq %(

foo

\n) 17 | Markd.to_html("![](bar)", options).should eq %(

\n) 18 | Markd.to_html("![](https://example.com/baz)", options).should eq %(

\n) 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /src/markd/rules/thematic_break.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct ThematicBreak 3 | include Rule 4 | 5 | THEMATIC_BREAK = /^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$/ 6 | 7 | def match(parser : Parser, container : Node) : MatchValue 8 | if !parser.indented && parser.line[parser.next_nonspace..-1].match(THEMATIC_BREAK) 9 | parser.close_unmatched_blocks 10 | parser.add_child(Node::Type::ThematicBreak, parser.next_nonspace) 11 | parser.advance_offset(parser.line.size - parser.offset, false) 12 | MatchValue::Leaf 13 | else 14 | MatchValue::None 15 | end 16 | end 17 | 18 | def continue(parser : Parser, container : Node) : ContinueStatus 19 | # a thematic break can never container > 1 line, so fail to match: 20 | ContinueStatus::Stop 21 | end 22 | 23 | def token(parser : Parser, container : Node) : Nil 24 | # do nothing 25 | end 26 | 27 | def can_contain?(type) 28 | false 29 | end 30 | 31 | def accepts_lines? : Bool 32 | false 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /src/markd/rules/item.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct Item 3 | include Rule 4 | 5 | def match(parser : Parser, container : Node) : MatchValue 6 | # match and parse in Rule::List 7 | MatchValue::None 8 | end 9 | 10 | def continue(parser : Parser, container : Node) : ContinueStatus 11 | indent_offset = container.data["marker_offset"].as(Int32) + container.data["padding"].as(Int32) 12 | 13 | if parser.blank 14 | if container.first_child? 15 | parser.advance_next_nonspace 16 | else 17 | # Blank line after empty list item 18 | return ContinueStatus::Stop 19 | end 20 | elsif parser.indent >= indent_offset 21 | parser.advance_offset(indent_offset, true) 22 | else 23 | return ContinueStatus::Stop 24 | end 25 | 26 | ContinueStatus::Continue 27 | end 28 | 29 | def token(parser : Parser, container : Node) : Nil 30 | # do nothing 31 | end 32 | 33 | def can_contain?(type : Node::Type) 34 | !type.item? 35 | end 36 | 37 | def accepts_lines? : Bool 38 | false 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017-present icyleaf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/markd.cr: -------------------------------------------------------------------------------- 1 | require "./markd/html_entities" 2 | require "./markd/utils" 3 | require "./markd/node" 4 | require "./markd/rule" 5 | require "./markd/options" 6 | require "./markd/renderer" 7 | require "./markd/parser" 8 | require "./markd/version" 9 | 10 | module Markd 11 | {% if @top_level.has_constant?("Tartrazine") %} 12 | def self.to_html( 13 | source : String, 14 | options = Options.new, 15 | *, 16 | formatter : Tartrazine::Formatter | String = "catppuccin-macchiato", 17 | ) 18 | return "" if source.empty? 19 | 20 | if formatter.is_a?(String) 21 | formatter = Tartrazine::Html.new( 22 | theme: Tartrazine.theme(formatter), 23 | line_numbers: true, 24 | standalone: true, 25 | ) 26 | end 27 | 28 | document = Parser.parse(source, options) 29 | renderer = HTMLRenderer.new(options) 30 | renderer.render(document, formatter) 31 | end 32 | {% else %} 33 | def self.to_html( 34 | source : String, 35 | options = Options.new, 36 | formatter = nil, 37 | ) 38 | return "" if source.empty? 39 | 40 | document = Parser.parse(source, options) 41 | renderer = HTMLRenderer.new(options) 42 | renderer.render(document, formatter) 43 | end 44 | {% end %} 45 | end 46 | -------------------------------------------------------------------------------- /src/markd/rules/block_quote.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct BlockQuote 3 | include Rule 4 | 5 | def match(parser : Parser, container : Node) : MatchValue 6 | if match?(parser) 7 | seek(parser) 8 | parser.close_unmatched_blocks 9 | if parser.gfm? && (match = parser.line.match(Rule::ADMONITION_START)) 10 | node = parser.add_child(Node::Type::Alert, parser.next_nonspace) 11 | # This is an alert 12 | node.data["alert"] = match[1] 13 | node.data["title"] = (match[2]? && !match[2].strip.empty?) ? match[2].strip : match[1] 14 | parser.advance_offset(parser.line.size, false) 15 | else 16 | parser.add_child(Node::Type::BlockQuote, parser.next_nonspace) 17 | end 18 | 19 | MatchValue::Container 20 | else 21 | MatchValue::None 22 | end 23 | end 24 | 25 | def continue(parser : Parser, container : Node) : ContinueStatus 26 | if match?(parser) 27 | seek(parser) 28 | ContinueStatus::Continue 29 | else 30 | ContinueStatus::Stop 31 | end 32 | end 33 | 34 | def token(parser : Parser, container : Node) : Nil 35 | # do nothing 36 | end 37 | 38 | def can_contain?(type : Node::Type) : Bool 39 | !type.item? 40 | end 41 | 42 | def accepts_lines? : Bool 43 | false 44 | end 45 | 46 | private def match?(parser) 47 | !parser.indented && parser.line[parser.next_nonspace]? == '>' 48 | end 49 | 50 | private def seek(parser : Parser) 51 | parser.advance_next_nonspace 52 | parser.advance_offset(1, false) 53 | 54 | if space_or_tab?(parser.line[parser.offset]?) 55 | parser.advance_offset(1, true) 56 | end 57 | end 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /src/markd/rules/html_block.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct HTMLBlock 3 | include Rule 4 | 5 | def match(parser : Parser, container : Node) : MatchValue 6 | if !parser.indented && parser.line[parser.next_nonspace]? == '<' 7 | text = parser.line[parser.next_nonspace..-1] 8 | block_type_size = Rule::HTML_BLOCK_OPEN.size - 1 9 | 10 | Rule::HTML_BLOCK_OPEN.each_with_index do |regex, index| 11 | if text.match(regex) && 12 | (index < block_type_size || !container.type.paragraph?) 13 | parser.close_unmatched_blocks 14 | # We don't adjust parser.offset; 15 | # spaces are part of the HTML block: 16 | node = parser.add_child(Node::Type::HTMLBlock, parser.offset) 17 | node.data["html_block_type"] = index 18 | 19 | return MatchValue::Leaf 20 | end 21 | end 22 | end 23 | 24 | MatchValue::None 25 | end 26 | 27 | def continue(parser : Parser, container : Node) : ContinueStatus 28 | (parser.blank && {5, 6}.includes?(container.data["html_block_type"])) ? ContinueStatus::Stop : ContinueStatus::Continue 29 | end 30 | 31 | def token(parser : Parser, container : Node) : Nil 32 | text = container.text.gsub(/(\n *)+$/, "") 33 | 34 | if parser.tagfilter? 35 | text = self.class.escape_disallowed_html(text) 36 | end 37 | 38 | container.text = text 39 | end 40 | 41 | def can_contain?(type) 42 | false 43 | end 44 | 45 | def accepts_lines? : Bool 46 | true 47 | end 48 | 49 | def self.escape_disallowed_html(text : String) : String 50 | String.build do |string| 51 | pos = 0 52 | 53 | text.scan(/<\/?\s*(#{GFM_DISALLOWED_HTML_TAGS.join('|')})\b/i) do |match| 54 | start = text.index(match[0], pos) 55 | next if start.nil? 56 | 57 | string << text[pos...start] << "<#{match[0][1..]}" 58 | pos = start + match[0].size 59 | end 60 | 61 | string << text[pos..-1] 62 | end 63 | end 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | ameba_linter: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | ameba-version: [v1.6.4] 17 | name: Ameba ${{ matrix.ameba-version }} linter check 18 | steps: 19 | - name: Install latest Crystal 20 | uses: crystal-lang/install-crystal@v1 21 | - name: Check out repository code 22 | uses: actions/checkout@master 23 | - name: Install dependencies 24 | run: shards install --without-development 25 | - name: Cache Ameba binary 26 | id: cache-ameba 27 | uses: actions/cache@v3 28 | with: 29 | path: bin/ameba 30 | key: ${{ runner.os }}-ameba-${{ matrix.ameba-version }} 31 | 32 | - name: Build Ameba 33 | if: steps.cache-ameba.outputs.cache-hit != 'true' 34 | run: | 35 | git clone --branch ${{ matrix.ameba-version }} --single-branch https://github.com/crystal-ameba/ameba.git 36 | cd ameba 37 | make bin/ameba CRFLAGS='-Dpreview_mt --no-debug' 38 | mkdir -p ../bin 39 | mv bin/ameba ../bin/ameba 40 | cd .. 41 | rm -rf ameba 42 | 43 | - name: Run Ameba Linter 44 | run: bin/ameba -c .ameba.yml 45 | specs: 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | include: 50 | - { os: ubuntu-latest, crystal: latest } 51 | - { os: ubuntu-latest, crystal: nightly } 52 | - { os: macos-latest } 53 | - { os: windows-latest } 54 | runs-on: ${{matrix.os}} 55 | 56 | name: Crystal ${{ matrix.crystal }} specs on ${{ matrix.os }} 57 | steps: 58 | - name: Checkout 59 | uses: actions/checkout@master 60 | 61 | - name: Install Crystal 62 | uses: crystal-lang/install-crystal@v1 63 | with: 64 | crystal: ${{ matrix.crystal }} 65 | 66 | - name: Install dependencies 67 | run: shards install --without-development 68 | 69 | - name: Run specs 70 | run: crystal spec --error-on-warnings --error-trace 71 | -------------------------------------------------------------------------------- /spec/fixtures/alert.txt: -------------------------------------------------------------------------------- 1 | ## Alert 2 | 3 | Alerts are a Markdown extension based on the blockquote syntax that 4 | you can use to emphasize critical information. On GitHub, they are 5 | displayed with distinctive colors and icons to indicate the significance 6 | of the content. 7 | 8 | Use alerts only when they are crucial for user success and limit them 9 | to one or two per article to prevent overloading the reader. Additionally, 10 | you should avoid placing alerts consecutively. Alerts cannot be nested 11 | within other elements. 12 | 13 | To add an alert, use a special blockquote line specifying the alert type 14 | and an optional title, followed by the alert information in a standard 15 | blockquote. 16 | 17 | There are five types of alert: 18 | 19 | * NOTE 20 | * TIP 21 | * IMPORTANT 22 | * WARNING 23 | * CAUTION 24 | 25 | ```````````````````````````````` example alert 26 | > [!NOTE] 27 | > Useful information that users should know, even when skimming content. 28 | . 29 |

NOTE

30 |

Useful information that users should know, even when skimming content.

31 |
32 | ```````````````````````````````` 33 | 34 | An optional title can be added after the closing bracket. 35 | 36 | ```````````````````````````````` example alert 37 | > [!NOTE] What is a note? 38 | > Useful information that users should know, even when skimming content. 39 | . 40 |

What is a note?

41 |

Useful information that users should know, even when skimming content.

42 |
43 | ```````````````````````````````` 44 | 45 | Empty spaces after the brackets are ignored. 46 | 47 | ```````````````````````````````` example alert 48 | > [!NOTE] 49 | > Useful information that users should know, even when skimming content. 50 | . 51 |

NOTE

52 |

Useful information that users should know, even when skimming content.

53 |
54 | ```````````````````````````````` 55 | 56 | Alert-like block quotes which don't use one of the five listed 57 | alert types are just block quotes. 58 | 59 | ```````````````````````````````` example alert 60 | > [!FOO] 61 | > Not a real alert. 62 | . 63 |
64 |

[!FOO]
65 | Not a real alert.

66 |
67 | ```````````````````````````````` 68 | -------------------------------------------------------------------------------- /src/markd/rules/heading.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct Heading 3 | include Rule 4 | 5 | ATX_HEADING_MARKER = /^\#{1,6}(?:[ \t]+|$)/ 6 | SETEXT_HEADING_MARKER = /^(?:=+|-+)[ \t]*$/ 7 | 8 | def match(parser : Parser, container : Node) : MatchValue 9 | if (match = match?(parser, ATX_HEADING_MARKER)) 10 | # ATX Heading matched 11 | parser.advance_next_nonspace 12 | parser.advance_offset(match[0].size, false) 13 | parser.close_unmatched_blocks 14 | 15 | container = parser.add_child(Node::Type::Heading, parser.next_nonspace) 16 | container.data["level"] = match[0].strip.size 17 | container.text = parser.line[parser.offset..-1] 18 | .sub(/^[ \t]*#+[ \t]*$/, "") 19 | .sub(/[ \t]+#+[ \t]*$/, "") 20 | 21 | parser.advance_offset(parser.line.size - parser.offset) 22 | 23 | MatchValue::Leaf 24 | elsif (match = match?(parser, SETEXT_HEADING_MARKER)) && 25 | container.type.paragraph? && (parent = container.parent?) && 26 | !parent.type.block_quote? 27 | # Setext Heading matched 28 | parser.close_unmatched_blocks 29 | 30 | while container.text[0]? == '[' && 31 | (pos = parser.inline_lexer.reference(container.text, parser.refmap)) && pos > 0 32 | container.text = container.text.byte_slice(pos) 33 | end 34 | return MatchValue::None if container.text.empty? 35 | 36 | heading = Node.new(Node::Type::Heading) 37 | heading.source_pos = container.source_pos 38 | heading.data["level"] = match[0][0] == '=' ? 1 : 2 39 | heading.text = container.text 40 | 41 | container.insert_after(heading) 42 | container.unlink 43 | 44 | parser.tip = heading 45 | parser.advance_offset(parser.line.size - parser.offset, false) 46 | 47 | MatchValue::Leaf 48 | else 49 | MatchValue::None 50 | end 51 | end 52 | 53 | def token(parser : Parser, container : Node) : Nil 54 | # do nothing 55 | end 56 | 57 | def continue(parser : Parser, container : Node) : ContinueStatus 58 | # a heading can never container > 1 line, so fail to match 59 | ContinueStatus::Stop 60 | end 61 | 62 | def can_contain?(type) 63 | false 64 | end 65 | 66 | def accepts_lines? : Bool 67 | false 68 | end 69 | 70 | private def match?(parser : Parser, regex : Regex) : Regex::MatchData? 71 | match = parser.line[parser.next_nonspace..-1].match(regex) 72 | !parser.indented && match ? match : nil 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /src/markd/html_entities.cr: -------------------------------------------------------------------------------- 1 | require "./mappings/*" 2 | 3 | module Markd::HTMLEntities 4 | module ExtendToHTML 5 | def decode_entities(source : String) 6 | Decoder.decode(source) 7 | end 8 | 9 | def decode_entity(source : String) 10 | Decoder.decode_entity(source) 11 | end 12 | 13 | def encode_entities(source) 14 | Encoder.encode(source) 15 | end 16 | end 17 | 18 | module Decoder 19 | REGEX = /&(?:([a-zA-Z0-9]{2,32};)|(#[xX][\da-fA-F]+;?|#\d+;?))/ 20 | 21 | def self.decode(source) 22 | source.gsub(REGEX) do |chars| 23 | decode_entity(chars[1..-2]) 24 | end 25 | end 26 | 27 | def self.decode_entity(chars) 28 | if chars[0] == '#' 29 | if chars.size > 1 30 | if chars[1].downcase == 'x' 31 | if chars.size > 2 32 | return decode_codepoint(chars[2..-1].to_i(16)) 33 | end 34 | else 35 | return decode_codepoint(chars[1..-1].to_i(10)) 36 | end 37 | end 38 | else 39 | entities_key = chars[0..-1] 40 | if (resolved_entity = Markd::HTMLEntities::ENTITIES_MAPPINGS[entities_key]?) 41 | return resolved_entity 42 | end 43 | end 44 | 45 | "&#{chars};" 46 | end 47 | 48 | def self.decode_codepoint(codepoint) 49 | return "\uFFFD" if codepoint >= 0xD800 && codepoint <= 0xDFFF || codepoint > 0x10FFF 50 | 51 | if (decoded = Markd::HTMLEntities::DECODE_MAPPINGS[codepoint]?) 52 | codepoint = decoded 53 | end 54 | 55 | codepoint.unsafe_chr 56 | end 57 | end 58 | 59 | module Encoder 60 | ENTITIES_REGEX = Regex.union(HTMLEntities::ENTITIES_MAPPINGS.values) 61 | ASTRAL_REGEX = Regex.new("[\xED\xA0\x80-\xED\xAF\xBF][\xED\xB0\x80-\xED\xBF\xBF]") 62 | ENCODE_REGEX = /[^\x{20}-\x{7E}]/ 63 | 64 | def self.encode(source : String) 65 | source.gsub(ENTITIES_REGEX) { |chars| encode_entities(chars) } 66 | .gsub(ASTRAL_REGEX) { |chars| encode_astral(chars) } 67 | .gsub(ENCODE_REGEX) { |chars| encode_extend(chars) } 68 | end 69 | 70 | private def self.encode_entities(chars : String) 71 | entity = HTMLEntities::ENTITIES_MAPPINGS.key(chars) 72 | "&#{entity};" 73 | end 74 | 75 | private def self.encode_astral(chars : String) 76 | high = chars.char_at(0).ord 77 | low = chars.char_at(0).ord 78 | codepoint = (high - 0xD800) * -0x400 + low - 0xDC00 + 0x10000 79 | 80 | "&#x#{codepoint.to_s(16).upcase};" 81 | end 82 | 83 | private def self.encode_extend(char : String) 84 | "&#x#{char[0].ord.to_s(16).upcase};" 85 | end 86 | end 87 | end 88 | 89 | module HTML 90 | extend Markd::HTMLEntities::ExtendToHTML 91 | end 92 | -------------------------------------------------------------------------------- /src/markd/rules/code_block.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct CodeBlock 3 | include Rule 4 | 5 | CODE_FENCE = /^`{3,}(?!.*`)|^~{3,}/ 6 | CLOSING_CODE_FENCE = /^(?:`{3,}|~{3,})(?= *$)/ 7 | 8 | def match(parser : Parser, container : Node) : MatchValue 9 | if !parser.indented && 10 | (match = parser.line[parser.next_nonspace..-1].match(CODE_FENCE)) 11 | # fenced 12 | fence_length = match[0].size 13 | 14 | parser.close_unmatched_blocks 15 | node = parser.add_child(Node::Type::CodeBlock, parser.next_nonspace) 16 | node.fenced = true 17 | node.fence_length = fence_length 18 | node.fence_char = match[0][0].to_s 19 | node.fence_offset = parser.indent 20 | 21 | parser.advance_next_nonspace 22 | parser.advance_offset(fence_length, false) 23 | 24 | MatchValue::Leaf 25 | elsif parser.indented && !parser.blank && (tip = parser.tip) && 26 | !tip.type.paragraph? 27 | # indented 28 | parser.advance_offset(Rule::CODE_INDENT, true) 29 | parser.close_unmatched_blocks 30 | parser.add_child(Node::Type::CodeBlock, parser.offset) 31 | 32 | MatchValue::Leaf 33 | else 34 | MatchValue::None 35 | end 36 | end 37 | 38 | def continue(parser : Parser, container : Node) : ContinueStatus 39 | line = parser.line 40 | indent = parser.indent 41 | if container.fenced? 42 | # fenced 43 | match = indent <= 3 && 44 | line[parser.next_nonspace]? == container.fence_char[0] && 45 | line[parser.next_nonspace..-1].match(CLOSING_CODE_FENCE) 46 | 47 | if match && match.as(Regex::MatchData)[0].size >= container.fence_length 48 | # closing fence - we're at end of line, so we can return 49 | parser.token(container, parser.current_line) 50 | return ContinueStatus::Return 51 | else 52 | # skip optional spaces of fence offset 53 | index = container.fence_offset 54 | while index > 0 && space_or_tab?(parser.line[parser.offset]?) 55 | parser.advance_offset(1, true) 56 | index -= 1 57 | end 58 | end 59 | else 60 | # indented 61 | if indent >= Rule::CODE_INDENT 62 | parser.advance_offset(Rule::CODE_INDENT, true) 63 | elsif parser.blank 64 | parser.advance_next_nonspace 65 | else 66 | return ContinueStatus::Stop 67 | end 68 | end 69 | 70 | ContinueStatus::Continue 71 | end 72 | 73 | def token(parser : Parser, container : Node) : Nil 74 | if container.fenced? 75 | # fenced 76 | first_line, _, text = container.text.partition('\n') 77 | 78 | container.fence_language = Utils.decode_entities_string(first_line.strip) 79 | container.text = text 80 | else 81 | # indented 82 | container.text = container.text.gsub(/(\n *)+$/, "\n") 83 | end 84 | end 85 | 86 | def can_contain?(type) 87 | false 88 | end 89 | 90 | def accepts_lines? : Bool 91 | true 92 | end 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /src/markd/mappings/legacy.cr: -------------------------------------------------------------------------------- 1 | module Markd::HTMLEntities 2 | LEGACY_MAPPINGS = { 3 | "Aacute" => '\u00C1', 4 | "aacute" => '\u00E1', 5 | "Acirc" => '\u00C2', 6 | "acirc" => '\u00E2', 7 | "acute" => '\u00B4', 8 | "AElig" => '\u00C6', 9 | "aelig" => '\u00E6', 10 | "Agrave" => '\u00C0', 11 | "agrave" => '\u00E0', 12 | "amp" => '&', 13 | "AMP" => '&', 14 | "Aring" => '\u00C5', 15 | "aring" => '\u00E5', 16 | "Atilde" => '\u00C3', 17 | "atilde" => '\u00E3', 18 | "Auml" => '\u00C4', 19 | "auml" => '\u00E4', 20 | "brvbar" => '\u00A6', 21 | "Ccedil" => '\u00C7', 22 | "ccedil" => '\u00E7', 23 | "cedil" => '\u00B8', 24 | "cent" => '\u00A2', 25 | "copy" => '\u00A9', 26 | "COPY" => '\u00A9', 27 | "curren" => '\u00A4', 28 | "deg" => '\u00B0', 29 | "divide" => '\u00F7', 30 | "Eacute" => '\u00C9', 31 | "eacute" => '\u00E9', 32 | "Ecirc" => '\u00CA', 33 | "ecirc" => '\u00EA', 34 | "Egrave" => '\u00C8', 35 | "egrave" => '\u00E8', 36 | "ETH" => '\u00D0', 37 | "eth" => '\u00F0', 38 | "Euml" => '\u00CB', 39 | "euml" => '\u00EB', 40 | "frac12" => '\u00BD', 41 | "frac14" => '\u00BC', 42 | "frac34" => '\u00BE', 43 | "gt" => '>', 44 | "GT" => '>', 45 | "Iacute" => '\u00CD', 46 | "iacute" => '\u00ED', 47 | "Icirc" => '\u00CE', 48 | "icirc" => '\u00EE', 49 | "iexcl" => '\u00A1', 50 | "Igrave" => '\u00CC', 51 | "igrave" => '\u00EC', 52 | "iquest" => '\u00BF', 53 | "Iuml" => '\u00CF', 54 | "iuml" => '\u00EF', 55 | "laquo" => '\u00AB', 56 | "lt" => '<', 57 | "LT" => '<', 58 | "macr" => '\u00AF', 59 | "micro" => '\u00B5', 60 | "middot" => '\u00B7', 61 | "nbsp" => '\u00A0', 62 | "not" => '\u00AC', 63 | "Ntilde" => '\u00D1', 64 | "ntilde" => '\u00F1', 65 | "Oacute" => '\u00D3', 66 | "oacute" => '\u00F3', 67 | "Ocirc" => '\u00D4', 68 | "ocirc" => '\u00F4', 69 | "Ograve" => '\u00D2', 70 | "ograve" => '\u00F2', 71 | "ordf" => '\u00AA', 72 | "ordm" => '\u00BA', 73 | "Oslash" => '\u00D8', 74 | "oslash" => '\u00F8', 75 | "Otilde" => '\u00D5', 76 | "otilde" => '\u00F5', 77 | "Ouml" => '\u00D6', 78 | "ouml" => '\u00F6', 79 | "para" => '\u00B6', 80 | "plusmn" => '\u00B1', 81 | "pound" => '\u00A3', 82 | "quot" => "\"", 83 | "QUOT" => "\"", 84 | "raquo" => '\u00BB', 85 | "reg" => '\u00AE', 86 | "REG" => '\u00AE', 87 | "sect" => '\u00A7', 88 | "shy" => '\u00AD', 89 | "sup1" => '\u00B9', 90 | "sup2" => '\u00B2', 91 | "sup3" => '\u00B3', 92 | "szlig" => '\u00DF', 93 | "THORN" => '\u00DE', 94 | "thorn" => '\u00FE', 95 | "times" => '\u00D7', 96 | "Uacute" => '\u00DA', 97 | "uacute" => '\u00FA', 98 | "Ucirc" => '\u00DB', 99 | "ucirc" => '\u00FB', 100 | "Ugrave" => '\u00D9', 101 | "ugrave" => '\u00F9', 102 | "uml" => '\u00A8', 103 | "Uuml" => '\u00DC', 104 | "uuml" => '\u00FC', 105 | "Yacute" => '\u00DD', 106 | "yacute" => '\u00FD', 107 | "yen" => '\u00A5', 108 | "yuml" => '\u00FF', 109 | } 110 | end 111 | -------------------------------------------------------------------------------- /src/markd/options.cr: -------------------------------------------------------------------------------- 1 | require "uri" 2 | 3 | module Markd 4 | # Markdown rendering options. 5 | class Options 6 | # Render parsing cost time for reading the source, parsing blocks, and parsing inline. 7 | property? time : Bool 8 | 9 | # Enables GitHub Flavored Markdown support. 10 | # 11 | # https://github.github.com/gfm/ 12 | property? gfm : Bool 13 | 14 | # Not supported for now. 15 | property? toc : Bool 16 | 17 | # If `true`: 18 | # - straight quotes will be made curly 19 | # - `--` will be changed to an en dash 20 | # - `---` will be changed to an em dash 21 | # - `...` will be changed to ellipses 22 | property? smart : Bool 23 | 24 | # If `true`, source position information for block-level elements 25 | # will be rendered in the `data-sourcepos` attribute (for HTML). 26 | property? source_pos : Bool 27 | 28 | # If `true`, raw HTML will not be passed through to HTML output 29 | # (it will be replaced by comments). 30 | property? safe : Bool 31 | 32 | # If `true`, code tags generated by code blocks will have a 33 | # prettyprint class added to them, to be used by 34 | # [Google code-prettify](https://github.com/google/code-prettify). 35 | property? prettyprint : Bool 36 | 37 | # If `base_url` is not `nil`, it is used to resolve URLs of relative 38 | # links. It act's like HTML's `` in the context 39 | # of a Markdown document. 40 | property base_url : URI? 41 | 42 | # Enables GFM emoji support. 43 | # 44 | # For example: 45 | # 46 | # ``` 47 | # @octocat :+1: This PR looks great - it's ready to merge! :ship: 48 | # ``` 49 | # 50 | # Becomes: 51 | # 52 | # ``` 53 | # @octocat 👍 This PR looks great - it's ready to merge! 🚢 54 | # ``` 55 | # https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#using-emojis 56 | property? emoji : Bool 57 | 58 | # If `true`, the following HTML tags will be filtered when rendering HTML output: 59 | # 60 | # * `` 61 | # * `<textarea>` 62 | # * `<style>` 63 | # * `<xmp>` 64 | # * `<iframe>` 65 | # * `<noembed>` 66 | # * `<noframes>` 67 | # * `<script>` 68 | # * `<plaintext>` 69 | # 70 | # All other HTML tags are left untouched. 71 | property? tagfilter : Bool 72 | 73 | # If `true`, more autolinks will be detected. 74 | # Setting to `false` does not disable autolink support as a whole. 75 | property? autolink : Bool 76 | 77 | def initialize( 78 | @time = false, 79 | @gfm = false, 80 | @toc = false, 81 | @smart = false, 82 | @source_pos = false, 83 | @safe = false, 84 | @prettyprint = false, 85 | @emoji = false, 86 | @tagfilter = false, 87 | @autolink = false, 88 | @base_url = nil, 89 | ) 90 | end 91 | 92 | # Deprecated 93 | 94 | @[Deprecated("Use `#time?` instead.")] 95 | getter time 96 | 97 | @[Deprecated("Use `#gfm?` instead.")] 98 | getter gfm 99 | 100 | @[Deprecated("Use `#smart?` instead.")] 101 | getter smart 102 | 103 | @[Deprecated("Use `#source_pos?` instead.")] 104 | getter source_pos 105 | 106 | @[Deprecated("Use `#safe?` instead.")] 107 | getter safe 108 | 109 | @[Deprecated("Use `#prettyprint?` instead.")] 110 | getter prettyprint 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /spec/fixtures/regression.txt: -------------------------------------------------------------------------------- 1 | # Regression tests 2 | 3 | Eating a character after a partially consumed tab. 4 | 5 | ```````````````````````````````` example 6 | * foo 7 | →bar 8 | . 9 | <ul> 10 | <li>foo 11 | bar</li> 12 | </ul> 13 | ```````````````````````````````` 14 | 15 | Type 7 HTML block followed by whitespace (#98). 16 | 17 | ```````````````````````````````` example 18 | <a> 19 | x 20 | . 21 | <a> 22 | x 23 | ```````````````````````````````` 24 | 25 | h2..h6 raw HTML blocks (jgm/CommonMark#430). 26 | 27 | ```````````````````````````````` example 28 | <h1>lorem</h1> 29 | 30 | <h2>lorem</h2> 31 | 32 | <h3>lorem</h3> 33 | 34 | <h4>lorem</h4> 35 | 36 | <h5>lorem</h5> 37 | 38 | <h6>lorem</h6> 39 | . 40 | <h1>lorem</h1> 41 | <h2>lorem</h2> 42 | <h3>lorem</h3> 43 | <h4>lorem</h4> 44 | <h5>lorem</h5> 45 | <h6>lorem</h6> 46 | ```````````````````````````````` 47 | 48 | Issue #109 - tabs after setext header line 49 | 50 | 51 | ```````````````````````````````` example 52 | hi 53 | --→ 54 | . 55 | <h2>hi</h2> 56 | ```````````````````````````````` 57 | 58 | Issue #108 - Chinese punctuation not recognized 59 | 60 | ```````````````````````````````` example 61 | **。**话 62 | . 63 | <p>**。**话</p> 64 | ```````````````````````````````` 65 | 66 | Issue jgm/cmark#177 - incorrect emphasis parsing 67 | 68 | ```````````````````````````````` example 69 | a***b* c* 70 | . 71 | <p>a*<em><em>b</em> c</em></p> 72 | ```````````````````````````````` 73 | 74 | Issue jgm/CommonMark#468 - backslash at end of link definition 75 | 76 | 77 | ```````````````````````````````` example 78 | [\]: test 79 | . 80 | <p>[]: test</p> 81 | ```````````````````````````````` 82 | 83 | Issue jgm/commonmark.js#121 - punctuation set different 84 | 85 | ```````````````````````````````` example 86 | ^_test_ 87 | . 88 | <p>^<em>test</em></p> 89 | ```````````````````````````````` 90 | 91 | Issue #116 - tabs before and after ATX closing heading 92 | ```````````````````````````````` example 93 | # foo→#→ 94 | . 95 | <h1>foo</h1> 96 | ```````````````````````````````` 97 | 98 | commonmark/CommonMark#493 - escaped space not allowed in link destination. 99 | 100 | ```````````````````````````````` example 101 | [link](a\ b) 102 | . 103 | <p>[link](a\ b)</p> 104 | ```````````````````````````````` 105 | 106 | Issue #527 - meta tags in inline contexts 107 | 108 | ```````````````````````````````` example 109 | City: 110 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> 111 | <meta itemprop="name" content="Springfield"> 112 | </span> 113 | . 114 | <p>City: 115 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> 116 | <meta itemprop="name" content="Springfield"> 117 | </span></p> 118 | ```````````````````````````````` 119 | 120 | Double-encoding. 121 | 122 | ```````````````````````````````` example 123 | [XSS](javascript&colon;alert%28'XSS'%29) 124 | . 125 | <p><a href="javascript&colon;alert('XSS')">XSS</a></p> 126 | ```````````````````````````````` 127 | 128 | Issue commonamrk#517 - script, pre, style close tag without 129 | opener. 130 | 131 | ```````````````````````````````` example 132 | </script> 133 | 134 | </pre> 135 | 136 | </style> 137 | . 138 | </script> 139 | </pre> 140 | </style> 141 | ```````````````````````````````` 142 | 143 | Issue #289. 144 | 145 | ```````````````````````````````` example 146 | [a](<b) c> 147 | . 148 | <p>[a](<b) c></p> 149 | ```````````````````````````````` 150 | 151 | icyleaf/markd issue #80 152 | 153 | ```````````````````````````````` example 154 | 1212121212121 155 | . 156 | <p>1212121212121</p> 157 | ```````````````````````````````` 158 | -------------------------------------------------------------------------------- /spec/spec_helper.cr: -------------------------------------------------------------------------------- 1 | require "spec" 2 | require "../src/markd" 3 | 4 | def describe_spec(file, smart = false, render = false, gfm = false) 5 | file = File.join(__DIR__, file) 6 | 7 | specs = extract_spec_tests(file) 8 | 9 | skip_examples = [] of Int32 10 | 11 | if render 12 | puts "Run [#{file}] examples" 13 | examples_count = 0 14 | section_count = 0 15 | specs.each_with_index do |(section, examples), index| 16 | section = "#{(index + 1).to_s.rjust(2)}. #{section} (#{examples.size})" 17 | if skip_examples.includes?(index + 1) 18 | puts section + " [SKIP]" 19 | next 20 | end 21 | section_count += 1 22 | examples_count += examples.size 23 | puts section 24 | end 25 | puts "Total #{section_count} describes and #{examples_count} examples" 26 | end 27 | 28 | specs.each_with_index do |(section, examples), index| 29 | no = index + 1 30 | next if skip_examples.includes?(no) 31 | assert_section(file, section, examples, smart, gfm) 32 | end 33 | end 34 | 35 | def assert_section(file, section, examples, smart, gfm = false) 36 | describe section do 37 | examples.each do |index, example| 38 | assert_example(file, section, index, example, smart, gfm) 39 | end 40 | end 41 | end 42 | 43 | def assert_example(file, section, index, example, smart, gfm = false) 44 | markdown = example["markdown"].gsub("→", "\t").chomp 45 | html = example["html"].gsub("→", "\t") 46 | line = example["line"].to_i 47 | tags = example["test_tags"].split(" ") 48 | 49 | options = Markd::Options.new( 50 | gfm: gfm || tags.includes?("gfm"), 51 | emoji: tags.includes?("emoji"), 52 | tagfilter: tags.includes?("tagfilter"), 53 | autolink: tags.includes?("autolink") 54 | ) 55 | options.smart = true if smart 56 | 57 | if example["test_tags"].ends_with?("pending") 58 | pending "- #{index}\n#{show_space(markdown)}", file, line do 59 | output = Markd.to_html(markdown, options) 60 | output.should eq(html), file: file, line: line 61 | end 62 | else 63 | it "- #{index}\n#{show_space(markdown)}", file, line do 64 | output = Markd.to_html(markdown, options) 65 | next if html == "<IGNORE>\n" 66 | 67 | output.should eq(html), file: file, line: line 68 | end 69 | end 70 | end 71 | 72 | def extract_spec_tests(file) 73 | examples = {} of String => Hash(Int32, Hash(String, String)) 74 | 75 | current_section = 0 76 | example_count = 0 77 | test_start = false 78 | result_start = false 79 | 80 | begin 81 | File.open(file) do |input| 82 | line_number = 0 83 | test_tags = "" 84 | 85 | while (line = input.read_line) 86 | line_number += 1 87 | line = line.gsub(/\r\n?/, "\n") 88 | break if line.includes?("<!-- END TESTS -->") 89 | 90 | if !test_start && !result_start && (match = line.match(/^\#{1,6}\s+(.*)$/)) 91 | current_section = match[1] 92 | examples[current_section] = {} of Int32 => Hash(String, String) 93 | example_count = 0 94 | else 95 | if !test_start && !result_start && line =~ /^`{32} example([a-z ])*$/ 96 | test_start = true 97 | test_tags = line[line.rindex!(' ') + 1..-1] 98 | elsif test_start && !result_start && line =~ /^\.$/ 99 | test_start = false 100 | result_start = true 101 | elsif !test_start && result_start && line =~ /^`{32}/ 102 | result_start = false 103 | example_count += 1 104 | elsif test_start && !result_start 105 | examples[current_section][example_count] ||= { 106 | "line" => line_number.to_s, 107 | "markdown" => "", 108 | "html" => "", 109 | "test_tags" => (test_tags == "example" ? "" : test_tags), 110 | } of String => String 111 | 112 | examples[current_section][example_count]["markdown"] += line + "\n" 113 | elsif !test_start && result_start 114 | examples[current_section][example_count]["html"] += line + "\n" 115 | end 116 | end 117 | end 118 | end 119 | rescue IO::EOFError 120 | # do nothing 121 | end 122 | 123 | # Remove empty examples 124 | examples.keys.each { |k| examples.delete(k) if examples[k].empty? } 125 | examples 126 | end 127 | 128 | def show_space(text) 129 | text.gsub("\t", "→").gsub(/ /, '␣') 130 | end 131 | -------------------------------------------------------------------------------- /src/markd/node.cr: -------------------------------------------------------------------------------- 1 | module Markd 2 | class Node 3 | # Node Type 4 | enum Type 5 | Document 6 | Paragraph 7 | Text 8 | Strong 9 | Emphasis 10 | Strikethrough 11 | Link 12 | Image 13 | Heading 14 | List 15 | Item 16 | BlockQuote 17 | Alert 18 | ThematicBreak 19 | Code 20 | CodeBlock 21 | HTMLBlock 22 | HTMLInline 23 | LineBreak 24 | SoftBreak 25 | 26 | CustomInLine 27 | CustomBlock 28 | Table 29 | TableCell 30 | TableRow 31 | 32 | def container? 33 | CONTAINER_TYPES.includes?(self) 34 | end 35 | end 36 | 37 | CONTAINER_TYPES = { 38 | Type::Document, 39 | Type::Paragraph, 40 | Type::Strong, 41 | Type::Emphasis, 42 | Type::Strikethrough, 43 | Type::Link, 44 | Type::Image, 45 | Type::Heading, 46 | Type::List, 47 | Type::Item, 48 | Type::BlockQuote, 49 | Type::Alert, 50 | Type::CustomInLine, 51 | Type::CustomBlock, 52 | Type::Table, 53 | Type::TableRow, 54 | Type::TableCell, 55 | } 56 | 57 | alias DataValue = String | Int32 | Bool 58 | alias DataType = Hash(String, DataValue) 59 | 60 | property type : Type 61 | 62 | property(data) { {} of String => DataValue } 63 | property source_pos = { {1, 1}, {0, 0} } 64 | property text = "" 65 | property? open = true 66 | property? fenced = false 67 | property fence_language = "" 68 | property fence_char = "" 69 | property fence_length = 0 70 | property fence_offset = 0 71 | property? last_line_blank = false 72 | property? last_line_checked = false 73 | 74 | property! parent : Node? 75 | property! first_child : Node? 76 | property! last_child : Node? 77 | property! prev : Node? 78 | property! next : Node? 79 | 80 | def initialize(@type) 81 | end 82 | 83 | def append_child(child : Node) 84 | child.unlink 85 | child.parent = self 86 | 87 | if (last = last_child?) 88 | last.next = child 89 | child.prev = last 90 | @last_child = child 91 | else 92 | @first_child = child 93 | @last_child = child 94 | end 95 | end 96 | 97 | def insert_after(sibling : Node) 98 | sibling.unlink 99 | 100 | if (nxt = next?) 101 | nxt.prev = sibling 102 | elsif (parent = parent?) 103 | parent.last_child = sibling 104 | end 105 | sibling.next = nxt 106 | 107 | sibling.prev = self 108 | @next = sibling 109 | sibling.parent = parent? 110 | end 111 | 112 | def unlink 113 | if (prev = prev?) 114 | prev.next = next? 115 | elsif (parent = parent?) 116 | parent.first_child = next? 117 | end 118 | 119 | if (nxt = next?) 120 | nxt.prev = prev? 121 | elsif (parent = parent?) 122 | parent.last_child = prev? 123 | end 124 | 125 | @parent = nil 126 | @next = nil 127 | @prev = nil 128 | end 129 | 130 | def walker 131 | Walker.new(self) 132 | end 133 | 134 | def to_s(io : IO) 135 | io << "#<" << {{@type.name.id.stringify}} << ":0x" 136 | object_id.to_s(16, io) 137 | io << " @type=" << @type 138 | io << " @parent=" << @parent if @parent 139 | io << " @next=" << @next if @next 140 | 141 | data = @data 142 | io << " @data=" << data if data && !data.empty? 143 | 144 | io << ">" 145 | nil 146 | end 147 | 148 | private class Walker 149 | def initialize(@root : Node) 150 | @current = @root 151 | @entering = true 152 | end 153 | 154 | def next 155 | current = @current 156 | return unless current 157 | 158 | entering = @entering 159 | 160 | if entering && current.type.container? 161 | if (first_child = current.first_child?) 162 | @current = first_child 163 | @entering = true 164 | else 165 | @entering = false 166 | end 167 | elsif current == @root 168 | @current = nil 169 | elsif current.next? 170 | @current = current.next? 171 | @entering = true 172 | else 173 | @current = current.parent? 174 | @entering = false 175 | end 176 | 177 | return current, entering 178 | end 179 | 180 | def resume_at(node : Node, entering : Bool) 181 | @current = node 182 | @entering = entering 183 | end 184 | end 185 | end 186 | end 187 | -------------------------------------------------------------------------------- /spec/fixtures/smart_punct.txt: -------------------------------------------------------------------------------- 1 | ## Smart punctuation 2 | 3 | Open quotes are matched with closed quotes. 4 | The same method is used for matching openers and closers 5 | as is used in emphasis parsing: 6 | 7 | ```````````````````````````````` example 8 | "Hello," said the spider. 9 | "'Shelob' is my name." 10 | . 11 | <p>“Hello,” said the spider. 12 | “‘Shelob’ is my name.”</p> 13 | ```````````````````````````````` 14 | 15 | ```````````````````````````````` example 16 | 'A', 'B', and 'C' are letters. 17 | . 18 | <p>‘A’, ‘B’, and ‘C’ are letters.</p> 19 | ```````````````````````````````` 20 | 21 | ```````````````````````````````` example 22 | 'Oak,' 'elm,' and 'beech' are names of trees. 23 | So is 'pine.' 24 | . 25 | <p>‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. 26 | So is ‘pine.’</p> 27 | ```````````````````````````````` 28 | 29 | ```````````````````````````````` example 30 | 'He said, "I want to go."' 31 | . 32 | <p>‘He said, “I want to go.”’</p> 33 | ```````````````````````````````` 34 | 35 | A single quote that isn't an open quote matched 36 | with a close quote will be treated as an 37 | apostrophe: 38 | 39 | ```````````````````````````````` example 40 | Were you alive in the 70's? 41 | . 42 | <p>Were you alive in the 70’s?</p> 43 | ```````````````````````````````` 44 | 45 | ```````````````````````````````` example 46 | Here is some quoted '`code`' and a "[quoted link](url)". 47 | . 48 | <p>Here is some quoted ‘<code>code</code>’ and a “<a href="url">quoted link</a>”.</p> 49 | ```````````````````````````````` 50 | 51 | Here the first `'` is treated as an apostrophe, not 52 | an open quote, because the final single quote is matched 53 | by the single quote before `jolly`: 54 | 55 | ```````````````````````````````` example 56 | 'tis the season to be 'jolly' 57 | . 58 | <p>’tis the season to be ‘jolly’</p> 59 | ```````````````````````````````` 60 | 61 | Multiple apostrophes should not be marked as open/closing quotes. 62 | 63 | ```````````````````````````````` example 64 | 'We'll use Jane's boat and John's truck,' Jenna said. 65 | . 66 | <p>‘We’ll use Jane’s boat and John’s truck,’ Jenna said.</p> 67 | ```````````````````````````````` 68 | 69 | An unmatched double quote will be interpreted as a 70 | left double quote, to facilitate this style: 71 | 72 | ```````````````````````````````` example 73 | "A paragraph with no closing quote. 74 | 75 | "Second paragraph by same speaker, in fiction." 76 | . 77 | <p>“A paragraph with no closing quote.</p> 78 | <p>“Second paragraph by same speaker, in fiction.”</p> 79 | ```````````````````````````````` 80 | 81 | Quotes that are escaped come out as literal straight 82 | quotes: 83 | 84 | ```````````````````````````````` example 85 | \"This is not smart.\" 86 | This isn\'t either. 87 | 5\'8\" 88 | . 89 | <p>"This is not smart." 90 | This isn't either. 91 | 5'8"</p> 92 | ```````````````````````````````` 93 | 94 | Two hyphens form an en-dash, three an em-dash. 95 | 96 | ```````````````````````````````` example 97 | Some dashes: em---em 98 | en--en 99 | em --- em 100 | en -- en 101 | 2--3 102 | . 103 | <p>Some dashes: em—em 104 | en–en 105 | em — em 106 | en – en 107 | 2–3</p> 108 | ```````````````````````````````` 109 | 110 | A sequence of more than three hyphens is 111 | parsed as a sequence of em and/or en dashes, 112 | with no hyphens. If possible, a homogeneous 113 | sequence of dashes is used (so, 10 hyphens 114 | = 5 en dashes, and 9 hyphens = 3 em dashes). 115 | When a heterogeneous sequence must be used, 116 | the em dashes come first, followed by the en 117 | dashes, and as few en dashes as possible are 118 | used (so, 7 hyphens = 2 em dashes an 1 en 119 | dash). 120 | 121 | ```````````````````````````````` example 122 | one- 123 | two-- 124 | three--- 125 | four---- 126 | five----- 127 | six------ 128 | seven------- 129 | eight-------- 130 | nine--------- 131 | thirteen-------------. 132 | . 133 | <p>one- 134 | two– 135 | three— 136 | four–– 137 | five—– 138 | six—— 139 | seven—–– 140 | eight–––– 141 | nine——— 142 | thirteen———––.</p> 143 | ```````````````````````````````` 144 | 145 | Hyphens can be escaped: 146 | 147 | ```````````````````````````````` example 148 | Escaped hyphens: \-- \-\-\-. 149 | . 150 | <p>Escaped hyphens: -- ---.</p> 151 | ```````````````````````````````` 152 | 153 | Three periods form an ellipsis: 154 | 155 | ```````````````````````````````` example 156 | Ellipses...and...and.... 157 | . 158 | <p>Ellipses…and…and….</p> 159 | ```````````````````````````````` 160 | 161 | Periods can be escaped if ellipsis-formation 162 | is not wanted: 163 | 164 | ```````````````````````````````` example 165 | No ellipses\.\.\. 166 | . 167 | <p>No ellipses...</p> 168 | ```````````````````````````````` 169 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 6 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ### Added 11 | 12 | - Tables #[72](https://github.com/icyleaf/markd/pull/72) thanks @[ralsina](https://github.com/ralsina) 13 | - Alerts #[94](https://github.com/icyleaf/markd/pull/94) thanks @[ralsina](https://github.com/ralsina) 14 | - Extended Autolinks #[86](https://github.com/icyleaf/markd/pull/86) thanks @[ralsina](https://github.com/ralsina) 15 | - Tartrazine code block syntax highlighting #[67](https://github.com/icyleaf/markd/pull/81) thanks @[zw963](https://github.com/zw963). 16 | - Tagfilter option for GFM #[64](https://github.com/icyleaf/markd/pull/64) thanks @[nobodywasishere](https://github.com/nobodywasishere). 17 | - Task list / checkbox support for GFM #[63](https://github.com/icyleaf/markd/pull/63) thanks @[nobodywasishere](https://github.com/nobodywasishere). 18 | - Strikethrough support for GFM #[62](https://github.com/icyleaf/markd/pull/62) thanks @[nobodywasishere](https://github.com/nobodywasishere). 19 | - Emoji support for GFM #[61](https://github.com/icyleaf/markd/pull/61) thanks @[nobodywasishere](https://github.com/nobodywasishere). 20 | 21 | ### Fixed 22 | 23 | - Large numbers failing to be parsed as starts of lists #[81](https://github.com/icyleaf/markd/pull/81) 24 | 25 | ### TODO 26 | 27 | - Full GFM support 28 | 29 | ## [0.5.0] (2022-06-14) 30 | 31 | - Support CommonMark 0.29 #[50](https://github.com/icyleaf/markd/pull/50) thanks @[HertzDevil](https://github.com/HertzDevil). 32 | - Fix typos #[47](https://github.com/icyleaf/markd/pull/47) #[49](https://github.com/icyleaf/markd/pull/49) thanks @[kianmeng](https://github.com/kianmeng), @[jsoref](https://github.com/jsoref). 33 | 34 | ## [0.4.2] (2021-10-19) 35 | 36 | ### Added 37 | 38 | - Enable Table of Content (TOC) #[41](https://github.com/icyleaf/markd/pull/41) thanks @[Nephos](https://github.com/Nephos). 39 | 40 | ### Fixed 41 | 42 | - Fix byte slice negative #[43](https://github.com/icyleaf/markd/pull/43). 43 | - Compatibility with Crystal 1.2. 44 | 45 | ## [0.4.1] (2021-09-27) 46 | 47 | ### Added 48 | 49 | - Refactor Options and change to a class #[36](https://github.com/icyleaf/markd/pull/36) thanks @[straight-shoota](https://github.com/straight-shoota). 50 | - Add `lang` parameter to to `HTMLRenderer#code_block_body` #[38](https://github.com/icyleaf/markd/pull/38) thanks @[straight-shoota](https://github.com/straight-shoota). 51 | 52 | ## [0.4.0] (2021-03-23) 53 | 54 | - Compatibility with Crystal 1.0. #[34](https://github.com/icyleaf/markd/pull/34) thanks @[bcardiff](https://github.com/bcardiff). 55 | 56 | ## [0.3.0] (2021-03-02) 57 | 58 | No changelog. 59 | 60 | ## [0.2.1] (2020-08-24) 61 | 62 | ### Added 63 | 64 | - Add Options#base_url to allow resolving relative links. #[26](https://github.com/icyleaf/markd/pull/26), #[28](https://github.com/icyleaf/markd/pull/28) thanks @[straight-shoota](https://github.com/straight-shoota). 65 | 66 | ### Fixed 67 | 68 | - [high severity] escape unsafe html entry inline of code block. #[32](https://github.com/icyleaf/markd/pull/32). 69 | - Fixed some typos in README. #[29](https://github.com/icyleaf/markd/pull/29) thanks @[Calamari](https://github.com/Calamari). 70 | 71 | ## [0.2.0] (2019-10-08) 72 | 73 | ### Changed 74 | 75 | - Optimizations speed. many thanks @[asterite](https://github.com/asterite). #[19](https://github.com/icyleaf/markd/pull/19) 76 | 77 | ### Fixed 78 | 79 | - Compatibility with Crystal 0.31. #[22](https://github.com/icyleaf/markd/pull/22). 80 | 81 | ## [0.1.2] (2019-08-26) 82 | 83 | - Use Crystal v0.31.0 as default compiler. 84 | 85 | ## [0.1.1] (2017-12-26) 86 | 87 | - Minor refactoring and improving speed. thanks @[straight-shoota](https://github.com/straight-shoota). 88 | - Use Crystal v0.24.1 as default compiler. 89 | 90 | ## 0.1.0 (2017-09-22) 91 | 92 | - [initial implementation](https://github.com/icyleaf/markd/milestone/1?closed=1) 93 | 94 | [Unreleased]: https://github.com/icyleaf/markd/compare/v0.5.0...HEAD 95 | [0.5.0]: https://github.com/icyleaf/markd/compare/v0.4.2...v0.5.0 96 | [0.4.2]: https://github.com/icyleaf/markd/compare/v0.4.1...v0.4.2 97 | [0.4.1]: https://github.com/icyleaf/markd/compare/v0.4.0...v0.4.1 98 | [0.4.0]: https://github.com/icyleaf/markd/compare/v0.3.0...v0.4.0 99 | [0.3.0]: https://github.com/icyleaf/markd/compare/v0.2.1...v0.3.0 100 | [0.2.1]: https://github.com/icyleaf/markd/compare/v0.2.0...v0.2.1 101 | [0.2.0]: https://github.com/icyleaf/markd/compare/v0.1.2...v0.2.0 102 | [0.1.2]: https://github.com/icyleaf/markd/compare/v0.1.1...v0.1.2 103 | [0.1.1]: https://github.com/icyleaf/markd/compare/v0.1.0...v0.1.1 104 | -------------------------------------------------------------------------------- /src/markd/renderer.cr: -------------------------------------------------------------------------------- 1 | module Markd 2 | abstract class Renderer 3 | def initialize(@options = Options.new) 4 | @output_io = String::Builder.new 5 | @last_output = "\n" 6 | end 7 | 8 | def output(string : String) 9 | literal(escape(string)) 10 | end 11 | 12 | def literal(string : String) 13 | @output_io << string 14 | @last_output = string 15 | end 16 | 17 | # render a Line Feed character 18 | def newline 19 | literal("\n") if @last_output != "\n" 20 | end 21 | 22 | private ESCAPES = { 23 | '&' => "&", 24 | '"' => """, 25 | '<' => "<", 26 | '>' => ">", 27 | } 28 | 29 | def escape(text) 30 | # If we can determine that the text has no escape chars 31 | # then we can return the text as is, avoiding an allocation 32 | # and a lot of processing in `String#gsub`. 33 | if has_escape_char?(text) 34 | text.gsub(ESCAPES) 35 | else 36 | text 37 | end 38 | end 39 | 40 | private def has_escape_char?(text) 41 | text.each_byte do |byte| 42 | case byte 43 | when '&', '"', '<', '>' 44 | return true 45 | else 46 | next 47 | end 48 | end 49 | false 50 | end 51 | 52 | abstract def heading(node : Node, entering : Bool) : Nil 53 | abstract def list(node : Node, entering : Bool) : Nil 54 | abstract def item(node : Node, entering : Bool) : Nil 55 | abstract def block_quote(node : Node, entering : Bool) : Nil 56 | abstract def alert(node : Node, entering : Bool) : Nil 57 | abstract def thematic_break(node : Node, entering : Bool) : Nil 58 | abstract def code_block(node : Node, entering : Bool, formatter : T?) : Nil forall T 59 | abstract def code(node : Node, entering : Bool) : Nil 60 | abstract def html_block(node : Node, entering : Bool) : Nil 61 | abstract def html_inline(node : Node, entering : Bool) : Nil 62 | abstract def paragraph(node : Node, entering : Bool) : Nil 63 | abstract def emphasis(node : Node, entering : Bool) : Nil 64 | abstract def soft_break(node : Node, entering : Bool) : Nil 65 | abstract def line_break(node : Node, entering : Bool) : Nil 66 | abstract def strong(node : Node, entering : Bool) : Nil 67 | abstract def strikethrough(node : Node, entering : Bool) : Nil 68 | abstract def link(node : Node, entering : Bool) : Nil 69 | abstract def image(node : Node, entering : Bool) : Nil 70 | abstract def text(node : Node, entering : Bool) : Nil 71 | abstract def table(node : Node, entering : Bool) : Nil 72 | abstract def table_row(node : Node, entering : Bool) : Nil 73 | abstract def table_cell(node : Node, entering : Bool) : Nil 74 | 75 | def render(document : Node, formatter : T?) forall T 76 | Utils.timer("rendering", @options.time?) do 77 | walker = document.walker 78 | while (event = walker.next) 79 | node, entering = event 80 | 81 | case node.type 82 | when Node::Type::Heading 83 | heading(node, entering) 84 | when Node::Type::List 85 | list(node, entering) 86 | when Node::Type::Item 87 | item(node, entering) 88 | when Node::Type::BlockQuote 89 | block_quote(node, entering) 90 | when Node::Type::Alert 91 | alert(node, entering) 92 | when Node::Type::ThematicBreak 93 | thematic_break(node, entering) 94 | when Node::Type::CodeBlock 95 | code_block(node, entering, formatter) 96 | when Node::Type::Code 97 | code(node, entering) 98 | when Node::Type::HTMLBlock 99 | html_block(node, entering) 100 | when Node::Type::HTMLInline 101 | html_inline(node, entering) 102 | when Node::Type::Paragraph 103 | paragraph(node, entering) 104 | when Node::Type::Emphasis 105 | emphasis(node, entering) 106 | when Node::Type::SoftBreak 107 | soft_break(node, entering) 108 | when Node::Type::LineBreak 109 | line_break(node, entering) 110 | when Node::Type::Strong 111 | strong(node, entering) 112 | when Node::Type::Strikethrough 113 | strikethrough(node, entering) 114 | when Node::Type::Link 115 | link(node, entering) 116 | when Node::Type::Image 117 | image(node, entering) 118 | when Node::Type::Table 119 | table(node, entering) 120 | when Node::Type::TableRow 121 | table_row(node, entering) 122 | when Node::Type::TableCell 123 | table_cell(node, entering) 124 | else 125 | text(node, entering) 126 | end 127 | end 128 | end 129 | 130 | @output_io.to_s.sub("\n", "") 131 | end 132 | end 133 | end 134 | 135 | require "./renderers/*" 136 | -------------------------------------------------------------------------------- /src/markd/rules/table.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct Table 3 | include Rule 4 | 5 | # Detects the first row of a table, if the parser is in gfm mode 6 | 7 | def match(parser : Parser, container : Node) : MatchValue 8 | # Looks like the 1st line of a table and we have gfm enabled 9 | if parser.gfm? && match?(parser) 10 | parser.close_unmatched_blocks 11 | parser.add_child(Node::Type::Table, 0) 12 | 13 | MatchValue::Leaf 14 | else 15 | MatchValue::None 16 | end 17 | end 18 | 19 | # Decides if the table continues or if it ended before the current line 20 | 21 | def continue(parser : Parser, container : Node) : ContinueStatus 22 | # Only continue if line looks like a divider or a table row 23 | if match_continuation?(parser) 24 | ContinueStatus::Continue 25 | else 26 | ContinueStatus::Stop 27 | end 28 | end 29 | 30 | # Because of `match` and `continue` the `container` has 31 | # all the text of the table. We parse it here and 32 | # insert all `TableRow` and `TableCell` nodes from parsing. 33 | # 34 | # First, it will perform a sanity check, and if the 35 | # table is broken it will be converted into a `Paragraph` 36 | 37 | def token(parser : Parser, container : Node) : Nil 38 | lines = container.text.strip.split("\n") 39 | 40 | row_sizes = lines[...2].map do |line| 41 | strip_pipe(line.strip).split(TABLE_CELL_SEPARATOR).size 42 | end.uniq! 43 | 44 | # Do we have a real table? 45 | # * At least two lines 46 | # * Second line is a divider 47 | # * First two lines have the same number of cells 48 | 49 | if lines.size < 2 || !lines[1].match(TABLE_HEADING_SEPARATOR) || 50 | row_sizes.size != 1 51 | # Not enough table or a broken table. 52 | # We need to convert it into a paragraph 53 | # I am fairly sure this is not supposed to work 54 | container.type = Node::Type::Paragraph 55 | return 56 | end 57 | 58 | max_row_size = row_sizes[0] 59 | has_body = lines.size > 2 60 | container.data["has_body"] = has_body 61 | 62 | alignments = strip_pipe(lines[1].strip).split(TABLE_CELL_SEPARATOR).map do |cell| 63 | cell = cell.strip 64 | if cell.starts_with?(":") && cell.ends_with?(":") 65 | "center" 66 | elsif cell.starts_with?(":") 67 | "left" 68 | elsif cell.ends_with?(":") 69 | "right" 70 | else 71 | "" 72 | end 73 | end 74 | 75 | # Each line maps to a table row 76 | lines.each_with_index do |line, i| 77 | next if i == 1 78 | row = Node.new(Node::Type::TableRow) 79 | row.data["heading"] = i == 0 80 | row.data["has_body"] = has_body 81 | container.append_child(row) 82 | # This splits on | but not on \| (escaped |) 83 | cells = strip_pipe(line.strip).split(TABLE_CELL_SEPARATOR)[...max_row_size] 84 | 85 | # Each row should have exactly the same size as the header. 86 | while cells.size < max_row_size 87 | cells << "" 88 | end 89 | 90 | # Create cells with text and metadata 91 | cells.each_with_index do |text, j| 92 | cell = Node.new(Node::Type::TableCell) 93 | # Cell text should be stripped and escaped pipes unescaped 94 | cell.text = text.strip.gsub("\\|", "|") 95 | cell.data["align"] = alignments[j] 96 | cell.data["heading"] = i == 0 97 | row.append_child(cell) 98 | end 99 | end 100 | end 101 | 102 | # Not really used because of how parsing is done 103 | def can_contain?(type : Node::Type) : Bool 104 | !type.container? 105 | end 106 | 107 | # Tables are multi-line 108 | def accepts_lines? : Bool 109 | true 110 | end 111 | 112 | # Match only lines that look like the first line of a table: 113 | # * Start with a | or look like multiple cells separated by | 114 | # * Is at least 3 characters long (smallest table starts are "|a|" or "a|b") 115 | 116 | private def match?(parser) 117 | !parser.indented && \ 118 | (parser.line[0]? == '|' || parser.line.match(TABLE_CELL_SEPARATOR)) && 119 | parser.line.size > 2 120 | end 121 | 122 | # Match only lines that look like a table separator 123 | # or start with a | or look like multiple cells separated by | 124 | private def match_continuation?(parser : Parser) 125 | !parser.indented && (parser.line[0]? == '|' || 126 | parser.line.match(TABLE_HEADING_SEPARATOR) || 127 | parser.line.match(TABLE_CELL_SEPARATOR)) || 128 | # Lines that are not empty and are not the start of a 129 | # block-level structure are ALSO continuations (see gfm-spec.txt:3397) 130 | !(parser.line.strip.empty? || parser.line.matches?(/^(?:>|\#{1,6}|`{3}|\t{1}|\s{4}|(?:[*-+]\s)+|[0-9]+\.)+/)) 131 | end 132 | 133 | private def strip_pipe(text : String) : String 134 | if text.ends_with?("\\|") 135 | text.lstrip("|") 136 | else 137 | text.strip("|") 138 | end 139 | end 140 | end 141 | end 142 | -------------------------------------------------------------------------------- /src/markd/rules/list.cr: -------------------------------------------------------------------------------- 1 | module Markd::Rule 2 | struct List 3 | include Rule 4 | 5 | BULLET_LIST_MARKERS = {'*', '+', '-'} 6 | ORDERED_LIST_MARKERS = {'.', ')'} 7 | 8 | def match(parser : Parser, container : Node) : MatchValue 9 | if !parser.indented || container.type.list? 10 | data = parse_list_marker(parser, container) 11 | return MatchValue::None if !data || data.empty? 12 | 13 | parser.close_unmatched_blocks 14 | if !parser.tip.type.list? || !list_match?(container.data, data) 15 | list_node = parser.add_child(Node::Type::List, parser.next_nonspace) 16 | list_node.data = data 17 | end 18 | 19 | item_node = parser.add_child(Node::Type::Item, parser.next_nonspace) 20 | item_node.data = data 21 | 22 | MatchValue::Container 23 | else 24 | MatchValue::None 25 | end 26 | end 27 | 28 | def continue(parser : Parser, container : Node) : ContinueStatus 29 | ContinueStatus::Continue 30 | end 31 | 32 | def token(parser : Parser, container : Node) : Nil 33 | item = container.first_child? 34 | while item 35 | if ends_with_blankline?(item) && item.next? 36 | container.data["tight"] = false 37 | break 38 | end 39 | 40 | subitem = item.first_child? 41 | while subitem 42 | if ends_with_blankline?(subitem) && (item.next? || subitem.next?) 43 | container.data["tight"] = false 44 | break 45 | end 46 | 47 | subitem = subitem.next? 48 | end 49 | 50 | item = item.next? 51 | end 52 | end 53 | 54 | def can_contain?(type) 55 | type.item? 56 | end 57 | 58 | def accepts_lines? : Bool 59 | false 60 | end 61 | 62 | private def list_match?(list_data, item_data) 63 | list_data["type"] == item_data["type"] && 64 | list_data["delimiter"] == item_data["delimiter"] && 65 | list_data["bullet_char"] == item_data["bullet_char"] 66 | end 67 | 68 | private def parse_list_marker(parser : Parser, container : Node) : Node::DataType 69 | empty_data = {} of String => Node::DataValue 70 | if parser.indent >= 4 71 | return empty_data 72 | end 73 | 74 | data = { 75 | "delimiter" => 0, 76 | "marker_offset" => parser.indent, 77 | "bullet_char" => "", 78 | "tight" => true, # lists are tight by default 79 | "start" => 1, 80 | } of String => Node::DataValue 81 | 82 | line = parser.line[parser.next_nonspace..-1] 83 | 84 | if BULLET_LIST_MARKERS.includes?(line[0]) 85 | if parser.gfm? && line[1..].strip.starts_with?("[ ]") 86 | data["type"] = "checkbox" 87 | data["bullet_char"] = line[0].to_s 88 | data["checked"] = false 89 | padding_checkbox = line.index!(']') 90 | elsif parser.gfm? && line[1..].strip.starts_with?("[x]") 91 | data["type"] = "checkbox" 92 | data["bullet_char"] = line[0].to_s 93 | data["checked"] = true 94 | padding_checkbox = line.index!(']') 95 | else 96 | data["type"] = "bullet" 97 | data["bullet_char"] = line[0].to_s 98 | end 99 | 100 | first_match_size = 1 101 | else 102 | pos = 0 103 | while line[pos]?.try &.ascii_number? 104 | pos += 1 105 | end 106 | 107 | number = pos >= 1 ? line[0..pos - 1].to_i? : -1 108 | if number.nil? 109 | return empty_data 110 | end 111 | 112 | if pos >= 1 && pos <= 9 && ORDERED_LIST_MARKERS.includes?(line[pos]?) && 113 | (!container.type.paragraph? || number == 1) 114 | data["type"] = "ordered" 115 | data["start"] = number 116 | data["delimiter"] = line[pos].to_s 117 | first_match_size = pos + 1 118 | else 119 | return empty_data 120 | end 121 | end 122 | 123 | next_char = parser.line[parser.next_nonspace + first_match_size]? 124 | unless next_char.nil? || space_or_tab?(next_char) 125 | return empty_data 126 | end 127 | 128 | if container.type.paragraph? && 129 | parser.line[(parser.next_nonspace + first_match_size)..-1].each_char.all? &.ascii_whitespace? 130 | return empty_data 131 | end 132 | 133 | parser.advance_next_nonspace 134 | parser.advance_offset(first_match_size, true) 135 | 136 | # Skip past the checkbox brackets ([]) 137 | if parser.gfm? && padding_checkbox 138 | parser.advance_offset(padding_checkbox, true) 139 | end 140 | 141 | spaces_start_column = parser.column 142 | spaces_start_offset = parser.offset 143 | 144 | loop do 145 | parser.advance_offset(1, true) 146 | next_char = parser.line[parser.offset]? 147 | 148 | break unless parser.column - spaces_start_column < 5 && space_or_tab?(next_char) 149 | end 150 | 151 | blank_item = parser.line[parser.offset]?.nil? 152 | spaces_after_marker = parser.column - spaces_start_column 153 | if spaces_after_marker >= 5 || spaces_after_marker < 1 || blank_item 154 | data["padding"] = first_match_size + 1 155 | parser.column = spaces_start_column 156 | parser.offset = spaces_start_offset 157 | 158 | parser.advance_offset(1, true) if space_or_tab?(parser.line[parser.offset]?) 159 | else 160 | data["padding"] = first_match_size + spaces_after_marker 161 | end 162 | 163 | data 164 | end 165 | 166 | private def ends_with_blankline?(container : Node) : Bool 167 | while container 168 | return true if container.last_line_blank? 169 | 170 | break if container.last_line_checked? || !container.type.in?(Node::Type::List, Node::Type::Item) 171 | container.last_line_checked = true 172 | container = container.last_child? 173 | end 174 | 175 | false 176 | end 177 | end 178 | end 179 | -------------------------------------------------------------------------------- /src/markd/rule.cr: -------------------------------------------------------------------------------- 1 | module Markd 2 | module Rule 3 | ESCAPABLE_STRING = %Q([!"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]) 4 | ESCAPED_CHAR_STRING = %Q(\\\\) + ESCAPABLE_STRING 5 | 6 | NUMERIC_HTML_ENTITY = /^&#(?:[Xx][0-9a-fA-F]{1,6}|[0-9]{1,7});/ 7 | HTML_ENTITY = /^&[a-zA-Z0-9]+;/ 8 | 9 | TAG_NAME_STRING = %Q([A-Za-z][A-Za-z0-9-]*) 10 | ATTRIBUTE_NAME_STRING = %Q([a-zA-Z_:][a-zA-Z0-9:._-]*) 11 | UNQUOTED_VALUE_STRING = %Q([^"'=<>`\\x00-\\x20]+) 12 | SINGLE_QUOTED_VALUE_STRING = %Q('[^']*') 13 | DOUBLE_QUOTED_VALUE_STRING = %Q("[^"]*") 14 | ATTRIBUTE_VALUE_STRING = "(?:" + UNQUOTED_VALUE_STRING + "|" + SINGLE_QUOTED_VALUE_STRING + "|" + DOUBLE_QUOTED_VALUE_STRING + ")" 15 | ATTRIBUTE_VALUE_SPEC_STRING = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTE_VALUE_STRING + ")" 16 | ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTE_NAME_STRING + ATTRIBUTE_VALUE_SPEC_STRING + "?)" 17 | 18 | MAYBE_SPECIAL = {'#', '`', '~', '*', '+', '_', '=', '<', '>', '-', '|'} 19 | THEMATIC_BREAK = /^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$/ 20 | 21 | ESCAPABLE = /^#{ESCAPABLE_STRING}/ 22 | 23 | TICKS = /`+/ 24 | 25 | ELLIPSIS = "..." 26 | DASH = /--+/ 27 | 28 | OPEN_TAG = "<" + TAG_NAME_STRING + ATTRIBUTE + "*" + "\\s*/?>" 29 | CLOSE_TAG = "</" + TAG_NAME_STRING + "\\s*[>]" 30 | 31 | OPEN_TAG_STRING = "<#{TAG_NAME_STRING}#{ATTRIBUTE}*" + "\\s*/?>" 32 | CLOSE_TAG_STRING = "</#{TAG_NAME_STRING}\\s*[>]" 33 | COMMENT_STRING = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->" 34 | PROCESSING_INSTRUCTION_STRING = "[<][?].*?[?][>]" 35 | DECLARATION_STRING = "<![A-Z]+" + "\\s+[^>]*>" 36 | CDATA_STRING = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>" 37 | HTML_TAG_STRING = "(?:#{OPEN_TAG_STRING}|#{CLOSE_TAG_STRING}|#{COMMENT_STRING}|#{PROCESSING_INSTRUCTION_STRING}|#{DECLARATION_STRING}|#{CDATA_STRING})" 38 | HTML_TAG = /^#{HTML_TAG_STRING}/i 39 | 40 | HTML_BLOCK_OPEN = [ 41 | /^<(?:script|pre|style)(?:\s|>|$)/i, 42 | /^<!--/, 43 | /^<[?]/, 44 | /^<![A-Z]/, 45 | /^<!\[CDATA\[/, 46 | /^<[\/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|title|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[\/]?[>]|$)/i, 47 | Regex.new("^(?:" + OPEN_TAG + "|" + CLOSE_TAG + ")\\s*$", Regex::Options::IGNORE_CASE), 48 | ] 49 | 50 | HTML_BLOCK_CLOSE = [ 51 | /<\/(?:script|pre|style)>/i, 52 | /-->/, 53 | /\?>/, 54 | />/, 55 | /\]\]>/, 56 | ] 57 | 58 | LINK_TITLE = Regex.new("^(?:\"(#{ESCAPED_CHAR_STRING}|[^\"\\x00])*\"" + 59 | "|'(#{ESCAPED_CHAR_STRING}|[^'\\x00])*'" + 60 | "|\\((#{ESCAPED_CHAR_STRING}|[^)\\x00])*\\))") 61 | 62 | LINK_LABEL = Regex.new("^\\[(?:[^\\\\\\[\\]]|" + ESCAPED_CHAR_STRING + "|\\\\){0,}\\]") 63 | 64 | LINK_DESTINATION_BRACES = Regex.new("^(?:[<](?:[^<>\\t\\n\\\\\\x00]|" + ESCAPED_CHAR_STRING + ")*[>])") 65 | 66 | # A valid domain name is: 67 | # 68 | # segments of alphanumeric characters, underscores (_) and hyphens (-) 69 | # separated by periods (.). There must be at least one period, and no 70 | # underscores may be present in the last two segments of the domain. 71 | # 72 | # Alphanumeric characters in this context include emojis. 73 | LAST_DOMAIN_SEGMENT = /(?:[a-zA-Z0-9\-\p{Emoji_Presentation}\-]+)/ 74 | OTHER_DOMAIN_SEGMENTS = /(?:[a-zA-Z0-9\p{Emoji_Presentation}\-_]+)/ 75 | # The spec wants to capture greedily, even invalid domain names and then 76 | # reject the invalid ones later. 77 | # For example: www.xxx._yyy.zzz is never linked because of the 78 | # _ in the last segment. 79 | DOMAIN_NAME = /(?:#{OTHER_DOMAIN_SEGMENTS}\.)*#{OTHER_DOMAIN_SEGMENTS}/ 80 | VALID_DOMAIN_NAME = /^(?:#{OTHER_DOMAIN_SEGMENTS}\.)*(?:#{LAST_DOMAIN_SEGMENT}\.)+#{LAST_DOMAIN_SEGMENT}$/ 81 | VALID_URL_PATH = /(?:\/[^\s<]*)?/ 82 | 83 | AUTOLINK_PROTOCOLS = /^(?:http|https|ftp):\/\// 84 | 85 | EMAIL_AUTO_LINK = /^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/ 86 | EXTENDED_EMAIL_AUTO_LINK = /^([a-zA-Z0-9][a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)[-_]*/ 87 | AUTO_LINK = /^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>/i 88 | WWW_AUTO_LINK = /^www\.#{DOMAIN_NAME}#{VALID_URL_PATH}/ 89 | XMPP_AUTO_LINK = /^xmpp:[A-Za-z0-9]+@#{DOMAIN_NAME}#{VALID_URL_PATH}/ 90 | MAILTO_AUTO_LINK = /^mailto:[A-Za-z0-9]+@#{DOMAIN_NAME}/ 91 | PROTOCOL_AUTO_LINK = /#{AUTOLINK_PROTOCOLS}#{DOMAIN_NAME}#{VALID_URL_PATH}[^\s?!.,:*_~]/ 92 | 93 | WHITESPACE_CHAR = /^[ \t\n\x0b\x0c\x0d]/ 94 | WHITESPACE = /[ \t\n\x0b\x0c\x0d]+/ 95 | LINE_ENDING = /\n|\x0d|\x0d\n/ 96 | PUNCTUATION = /[$+<=>^`|~\p{P}]/ # Regex.new("[!"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]") 97 | 98 | UNSAFE_PROTOCOL = /^javascript:|vbscript:|file:|data:/i 99 | UNSAFE_DATA_PROTOCOL = /^data:image\/(?:png|gif|jpeg|webp)/i 100 | 101 | CODE_INDENT = 4 102 | 103 | GFM_DISALLOWED_HTML_TAGS = %w[title textarea style xmp iframe noembed noframes script plaintext] 104 | 105 | TABLE_HEADING_SEPARATOR = /^(\|?\s*:{0,1}-:{0,1}+\s*)+(\||\s*)$/ 106 | TABLE_CELL_SEPARATOR = /(?<!\\)\|/ 107 | 108 | ADMONITION_START = /^> \[!((?:NOTE|TIP|IMPORTANT|CAUTION|WARNING)+)](\s*.*)?$/ 109 | 110 | # Match Value 111 | # 112 | # - None: no match 113 | # - Container: match container, keep going 114 | # - Leaf: match leaf, no more block starts 115 | enum MatchValue 116 | None 117 | Container 118 | Leaf 119 | end 120 | 121 | # match and parse 122 | abstract def match(parser : Parser, container : Node) : MatchValue 123 | 124 | # token finalize 125 | abstract def token(parser : Parser, container : Node) : Nil 126 | 127 | # continue 128 | abstract def continue(parser : Parser, container : Node) : ContinueStatus 129 | 130 | enum ContinueStatus 131 | Continue 132 | Stop 133 | Return 134 | end 135 | 136 | # accepts_line 137 | abstract def accepts_lines? : Bool 138 | 139 | private def space_or_tab?(char : Char?) : Bool 140 | char == ' ' || char == '\t' 141 | end 142 | end 143 | end 144 | 145 | require "./rules/*" 146 | -------------------------------------------------------------------------------- /src/markd/parsers/block.cr: -------------------------------------------------------------------------------- 1 | module Markd::Parser 2 | class Block 3 | include Parser 4 | 5 | def self.parse(source : String, options = Options.new) 6 | new(options).parse(source) 7 | end 8 | 9 | RULES = { 10 | Node::Type::Document => Rule::Document.new, 11 | Node::Type::BlockQuote => Rule::BlockQuote.new, 12 | Node::Type::Alert => Rule::BlockQuote.new, # Alerts and BlockQuotes are the same 13 | Node::Type::Heading => Rule::Heading.new, 14 | Node::Type::CodeBlock => Rule::CodeBlock.new, 15 | Node::Type::HTMLBlock => Rule::HTMLBlock.new, 16 | Node::Type::ThematicBreak => Rule::ThematicBreak.new, 17 | Node::Type::List => Rule::List.new, 18 | Node::Type::Item => Rule::Item.new, 19 | Node::Type::Paragraph => Rule::Paragraph.new, 20 | Node::Type::Table => Rule::Table.new, 21 | } 22 | 23 | property! tip : Node? 24 | property offset, column 25 | 26 | getter line, current_line, blank, inline_lexer, 27 | indent, indented, next_nonspace, refmap 28 | 29 | delegate gfm?, tagfilter?, to: @options 30 | 31 | def initialize(@options : Options) 32 | @inline_lexer = Inline.new(@options) 33 | 34 | @document = Node.new(Node::Type::Document) 35 | @tip = @document 36 | @oldtip = @tip 37 | @last_matched_container = @tip 38 | 39 | @line = "" 40 | 41 | @current_line = 0 42 | @offset = 0 43 | @column = 0 44 | @last_line_length = 0 45 | 46 | @next_nonspace = 0 47 | @next_nonspace_column = 0 48 | 49 | @indent = 0 50 | @indented = false 51 | @blank = false 52 | @partially_consumed_tab = false 53 | @all_closed = true 54 | @refmap = {} of String => Hash(String, String) | String 55 | end 56 | 57 | def parse(source : String) 58 | Utils.timer("block parsing", @options.time?) do 59 | parse_blocks(source) 60 | end 61 | 62 | Utils.timer("inline parsing", @options.time?) do 63 | process_inlines 64 | end 65 | 66 | @document 67 | end 68 | 69 | private def parse_blocks(source) 70 | lines_size = 0 71 | source.each_line do |line| 72 | process_line(line) 73 | lines_size += 1 74 | end 75 | 76 | # ignore last blank line created by final newline 77 | lines_size -= 1 if source.ends_with?('\n') 78 | 79 | while (tip = tip?) 80 | token(tip, lines_size) 81 | end 82 | end 83 | 84 | private def process_line(line : String) 85 | container = @document 86 | @oldtip = tip 87 | @offset = 0 88 | @column = 0 89 | @blank = false 90 | @partially_consumed_tab = false 91 | @current_line += 1 92 | 93 | line = line.gsub(Char::ZERO, '\u{FFFD}') 94 | @line = line 95 | 96 | while (last_child = container.last_child?) && last_child.open? 97 | container = last_child 98 | 99 | find_next_nonspace 100 | 101 | case RULES[container.type].continue(self, container) 102 | when Rule::ContinueStatus::Continue 103 | # we've matched, keep going 104 | when Rule::ContinueStatus::Stop 105 | # we've failed to match a block 106 | # back up to last matching block 107 | container = container.parent 108 | break 109 | when Rule::ContinueStatus::Return 110 | # we've hit end of line for fenced code close and can return 111 | @last_line_length = line.size 112 | return 113 | end 114 | end 115 | 116 | @all_closed = (container == @oldtip) 117 | @last_matched_container = container 118 | 119 | matched_leaf = !container.type.paragraph? && RULES[container.type].accepts_lines? 120 | 121 | while !matched_leaf 122 | find_next_nonspace 123 | 124 | # this is a little performance optimization 125 | unless @indented 126 | first_char = @line[@next_nonspace]? 127 | unless first_char && (Rule::MAYBE_SPECIAL.includes?(first_char) || first_char.ascii_number? || @line.match(Rule::TABLE_CELL_SEPARATOR)) 128 | advance_next_nonspace 129 | break 130 | end 131 | end 132 | 133 | matched = RULES.each_value do |rule| 134 | case rule.match(self, container) 135 | when Rule::MatchValue::Container 136 | container = tip 137 | break true 138 | when Rule::MatchValue::Leaf 139 | container = tip 140 | matched_leaf = true 141 | break true 142 | else 143 | false 144 | end 145 | end 146 | 147 | # nothing matched 148 | unless matched 149 | advance_next_nonspace 150 | break 151 | end 152 | end 153 | 154 | if !@all_closed && !@blank && tip.type.paragraph? 155 | # lazy paragraph continuation 156 | add_line 157 | else 158 | # not a lazy continuation 159 | close_unmatched_blocks 160 | if @blank && (last_child = container.last_child?) 161 | last_child.last_line_blank = true 162 | end 163 | 164 | container_type = container.type 165 | last_line_blank = @blank && 166 | !(container_type.block_quote? || 167 | (container_type.code_block? && container.fenced?) || 168 | (container_type.item? && !container.first_child? && container.source_pos[0][0] == @current_line)) 169 | 170 | cont = container 171 | while cont 172 | cont.last_line_blank = last_line_blank 173 | cont = cont.parent? 174 | end 175 | 176 | if RULES[container_type].accepts_lines? 177 | add_line 178 | 179 | # if HtmlBlock, check for end condition 180 | if container_type.html_block? && match_html_block?(container) 181 | token(container, @current_line) 182 | end 183 | elsif @offset < line.size && !@blank 184 | # create paragraph container for line 185 | add_child(Node::Type::Paragraph, @offset) 186 | advance_next_nonspace 187 | add_line 188 | end 189 | 190 | @last_line_length = line.size 191 | end 192 | 193 | nil 194 | end 195 | 196 | private def process_inlines 197 | walker = @document.walker 198 | @inline_lexer.refmap = @refmap 199 | while (event = walker.next) 200 | node, entering = event 201 | if !entering && (node.type.paragraph? || node.type.heading? || node.type.table_cell?) 202 | @inline_lexer.parse(node) 203 | end 204 | end 205 | 206 | nil 207 | end 208 | 209 | def token(container : Node, line_number : Int32) 210 | container_parent = container.parent? 211 | 212 | container.open = false 213 | container.source_pos = { 214 | container.source_pos[0], 215 | {line_number, @last_line_length}, 216 | } 217 | RULES[container.type].token(self, container) 218 | 219 | @tip = container_parent 220 | 221 | nil 222 | end 223 | 224 | private def add_line 225 | if @partially_consumed_tab 226 | @offset += 1 # skip over tab 227 | # add space characters 228 | chars_to_tab = Rule::CODE_INDENT - (@column % 4) 229 | tip.text += " " * chars_to_tab 230 | end 231 | 232 | tip.text += @line[@offset..-1] + "\n" 233 | 234 | nil 235 | end 236 | 237 | def add_child(type : Node::Type, offset : Int32) : Node 238 | while !RULES[tip.type].can_contain?(type) 239 | token(tip, @current_line - 1) 240 | end 241 | 242 | column_number = offset + 1 # offset 0 = column 1 243 | 244 | node = Node.new(type) 245 | node.source_pos = { {@current_line, column_number}, {0, 0} } 246 | node.text = "" 247 | tip.append_child(node) 248 | @tip = node 249 | 250 | node 251 | end 252 | 253 | def close_unmatched_blocks 254 | unless @all_closed 255 | while (oldtip = @oldtip) && oldtip != @last_matched_container 256 | parent = oldtip.parent? 257 | token(oldtip, @current_line - 1) 258 | @oldtip = parent 259 | end 260 | @all_closed = true 261 | end 262 | nil 263 | end 264 | 265 | private def find_next_nonspace 266 | offset = @offset 267 | column = @column 268 | 269 | if @line.empty? 270 | @blank = true 271 | else 272 | while (char = @line[offset]?) 273 | case char 274 | when ' ' 275 | offset += 1 276 | column += 1 277 | when '\t' 278 | offset += 1 279 | column += (4 - (column % 4)) 280 | else 281 | break 282 | end 283 | end 284 | 285 | @blank = {nil, '\n', '\r'}.includes?(char) 286 | end 287 | 288 | @next_nonspace = offset 289 | @next_nonspace_column = column 290 | @indent = @next_nonspace_column - @column 291 | @indented = @indent >= Rule::CODE_INDENT 292 | 293 | nil 294 | end 295 | 296 | def advance_offset(count : Int32, columns = false) 297 | line = @line 298 | while count > 0 && (char = line[@offset]?) 299 | if char == '\t' 300 | chars_to_tab = Rule::CODE_INDENT - (@column % 4) 301 | if columns 302 | @partially_consumed_tab = chars_to_tab > count 303 | chars_to_advance = chars_to_tab > count ? count : chars_to_tab 304 | @column += chars_to_advance 305 | @offset += @partially_consumed_tab ? 0 : 1 306 | count -= chars_to_advance 307 | else 308 | @partially_consumed_tab = false 309 | @column += chars_to_tab 310 | @offset += 1 311 | count -= 1 312 | end 313 | else 314 | @partially_consumed_tab = false 315 | @column += 1 # assume ascii; block starts are ascii 316 | @offset += 1 317 | count -= 1 318 | end 319 | end 320 | 321 | nil 322 | end 323 | 324 | def advance_next_nonspace 325 | @offset = @next_nonspace 326 | @column - @next_nonspace_column 327 | @partially_consumed_tab = false 328 | 329 | nil 330 | end 331 | 332 | private def match_html_block?(container : Node) 333 | if (block_type = container.data["html_block_type"]) 334 | block_type = block_type.as(Int32) 335 | block_type >= 0 && block_type <= 4 && Rule::HTML_BLOCK_CLOSE[block_type].match(@line[@offset..-1]) 336 | else 337 | false 338 | end 339 | end 340 | end 341 | end 342 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # markd 2 | 3 | [![Language](https://img.shields.io/badge/language-crystal-776791.svg)](https://github.com/crystal-lang/crystal) 4 | [![Tag](https://img.shields.io/github/tag/icyleaf/markd.svg)](https://github.com/icyleaf/markd/blob/master/CHANGELOG.md) 5 | [![Build Status](https://img.shields.io/circleci/project/github/icyleaf/markd/master.svg?style=flat)](https://circleci.com/gh/icyleaf/markd) 6 | 7 | 8 | **THIS PROJECT IS LOOKING FOR MAINTAINER** 9 | 10 | Unfortunately, the maintainer no longer has the time and/or resources to work on markd further. This means that bugs will not be fixed and features will not be added unless someone else does so. 11 | 12 | If you're interested in fixing up markd, please [file an issue](https://github.com/icyleaf/markd/issues/new) let me know. 13 | 14 | <hr /> 15 | 16 | Yet another markdown parser built for speed, written in [Crystal](https://crystal-lang.org), Compliant to [CommonMark](http://spec.commonmark.org) specification (`v0.29`). Copy from [commonmark.js](https://github.com/jgm/commonmark.js). 17 | 18 | ## Installation 19 | 20 | Add this to your application's `shard.yml`: 21 | 22 | ```yaml 23 | dependencies: 24 | markd: 25 | github: icyleaf/markd 26 | ``` 27 | 28 | ## Quick start 29 | 30 | ```crystal 31 | require "markd" 32 | 33 | markdown = <<-MD 34 | # Hello Markd 35 | 36 | > Yet another markdown parser built for speed, written in Crystal, Compliant to CommonMark specification. 37 | MD 38 | 39 | html = Markd.to_html(markdown) 40 | ``` 41 | 42 | Also here are options to configure the parse and render. 43 | 44 | ```crystal 45 | options = Markd::Options.new(smart: true, safe: true) 46 | Markd.to_html(markdown, options) 47 | ``` 48 | 49 | ## Options 50 | 51 | | Name | Type | Default value | Description | 52 | | ----------- | ------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 53 | | time | `Bool` | false | render parse cost time during read source, parse blocks, parse inline. | 54 | | smart | `Bool` | false | if **true**, straight quotes will be made curly,<br />`--` will be changed to an en dash,<br />`---` will be changed to an em dash, and<br />`...` will be changed to ellipses. | 55 | | source_pos | `Bool` | false | if **true**, source position information for block-level elements<br />will be rendered in the data-sourcepos attribute (for HTML) | 56 | | safe | `Bool` | false | if **true**, raw HTML will not be passed through to HTML output (it will be replaced by comments) | 57 | | prettyprint | `Bool` | false | if **true**, code tags generated by code blocks will have a `prettyprint` class added to them, to be used by [Google code-prettify](https://github.com/google/code-prettify). | 58 | | gfm | `Bool` | false | **Partial support** | 59 | | autolink | `Bool` | false | if **true**, more autolinks are detected, like bare email addresses or http links | 60 | | toc | `Bool` | false | **Not supported for now** | 61 | | base_url | `URI?` | nil | if not **nil**, relative URLs of links are resolved against this `URI`. It act's like HTML's `<base href="base_url">` in the context of a Markdown document. | 62 | 63 | ## Advanced 64 | 65 | If you want to use a custom renderer, it can! 66 | 67 | ```crystal 68 | 69 | class CustomRenderer < Markd::Renderer 70 | 71 | def strong(node, entering) 72 | end 73 | 74 | # more methods following in render. 75 | end 76 | 77 | options = Markd::Options.new(time: true) 78 | document = Markd::Parser.parse(markdown, options) 79 | renderer = CustomRenderer.new(options) 80 | 81 | html = renderer.render(document) 82 | ``` 83 | 84 | ## Use tartrazine shards to render code block. 85 | 86 | Added and require [tartrazine](https://github.com/ralsina/tartrazine) before markd will use it to render code block. 87 | 88 | By default, it use formatter like following: 89 | 90 | ```crystal 91 | formatter = Tartrazine::Html.new( 92 | theme: Tartrazine.theme("catppuccin-macchiato"), 93 | line_numbers: true, 94 | standalone: true, 95 | ) 96 | ``` 97 | 98 | You can passing a formatter instead. 99 | 100 | e.g. 101 | 102 | ```crystal 103 | require "tartrazine" # require it before markd 104 | require "markd" 105 | 106 | formatter = Tartrazine::Html.new( 107 | theme: Tartrazine.theme("emacs"), 108 | 109 | # Disable print line number 110 | line_numbers: false, 111 | 112 | # Set standalone to false for better performace. 113 | # 114 | # You need generate css file use `bin/tartrazine -f html -t "emacs" --css`, 115 | # then link it in you site. 116 | standalone: false, 117 | ) 118 | 119 | html = Markd.to_html(markdown,formatter: formatter) 120 | ``` 121 | 122 | If you don't care about the formatter config, you can just passing a string instead. 123 | 124 | ```crystal 125 | require "tartrazine" # require it before markd 126 | require "markd" 127 | 128 | html = Markd.to_html(markdown, formatter: "emacs") 129 | ``` 130 | 131 | 132 | Currently Tartrazine supports 247 languages and [331 themes](https://github.com/ralsina/tartrazine/tree/main/styles), you can retrieve the supported languages use `Tartrazine::LEXERS_BY_NAME.values.uniq.sort`, for now the result is: 133 | 134 | ```crystal 135 | [ 136 | "LiquidLexer", "VelocityLexer", 137 | 138 | "abap", "abnf", "actionscript", "actionscript_3", "ada", "agda", "al", "alloy", "angular2", 139 | "antlr", "apacheconf", "apl", "applescript", "arangodb_aql", "arduino", "armasm", 140 | "autohotkey", "autoit", "awk", 141 | 142 | "ballerina", "bash", "bash_session", "batchfile", "bbcode", "bibtex", "bicep", "blitzbasic", 143 | "bnf", "bqn", "brainfuck", 144 | 145 | "c", "c#", "c++", "cap_n_proto", "cassandra_cql", "ceylon", "cfengine3", "cfstatement", 146 | "chaiscript", "chapel", "cheetah", "clojure", "cmake", "cobol", "coffeescript", 147 | "common_lisp", "coq", "crystal", "css", "cue", "cython", 148 | 149 | "d", "dart", "dax", "desktop_entry", "diff", "django_jinja", "dns", "docker", "dtd", "dylan", 150 | 151 | "ebnf", "elixir", "elm", "emacslisp", "erlang", 152 | 153 | "factor", "fennel", "fish", "forth", "fortran", "fortranfixed", "fsharp", 154 | 155 | "gas", "gdscript", "gdscript3", "gherkin", "gleam", "glsl", "gnuplot", "go_template", 156 | "graphql", "groff", "groovy", 157 | 158 | "handlebars", "hare", "haskell", "hcl", "hexdump", "hlb", "hlsl", "holyc", "html", "hy", 159 | 160 | "idris", "igor", "ini", "io", "iscdhcpd", 161 | 162 | "j", "java", "javascript", "json", "jsonata", "julia", "jungle", 163 | 164 | "kotlin", 165 | 166 | "lighttpd_configuration_file", "llvm", "lua", 167 | 168 | "makefile", "mako", "markdown", "mason", "materialize_sql_dialect", "mathematica", "matlab", 169 | "mcfunction", "meson", "metal", "minizinc", "mlir", "modula-2", "moinwiki", "monkeyc", 170 | "morrowindscript", "myghty", "mysql", 171 | 172 | "nasm", "natural", "ndisasm", "newspeak", "nginx_configuration_file", "nim", "nix", 173 | 174 | "objective-c", "objectpascal", "ocaml", "octave", "odin", "onesenterprise", "openedge_abl", 175 | "openscad", "org_mode", 176 | 177 | "pacmanconf", "perl", "php", "pig", "pkgconfig", "pl_pgsql", "plaintext", "plutus_core", 178 | "pony", "postgresql_sql_dialect", "postscript", "povray", "powerquery", "powershell", 179 | "prolog", "promela", "promql", "properties", "protocol_buffer", "prql", "psl", "puppet", 180 | "python", "python_2", 181 | 182 | "qbasic", "qml", 183 | 184 | "r", "racket", "ragel", "react", "reasonml", "reg", "rego", "rexx", "rpm_spec", "rst", 185 | "ruby", "rust", 186 | 187 | "sas", "sass", "scala", "scheme", "scilab", "scss", "sed", "sieve", "smali", "smalltalk", 188 | "smarty", "snobol", "solidity", "sourcepawn", "sparql", "sql", "squidconf", "standard_ml", 189 | "stas", "stylus", "swift", "systemd", "systemverilog", 190 | 191 | "tablegen", "tal", "tasm", "tcl", "tcsh", "termcap", "terminfo", "terraform", "tex", 192 | "thrift", "toml", "tradingview", "transact-sql", "turing", "turtle", "twig", "typescript", 193 | "typoscript", "typoscriptcssdata", "typoscripthtmldata", 194 | 195 | "ucode", 196 | 197 | "v", "v_shell", "vala", "vb_net", "verilog", "vhdl", "vhs", "viml", "vue", "wdte", 198 | 199 | "webgpu_shading_language", "whiley", 200 | 201 | "xml", "xorg", 202 | 203 | "yaml", "yang", "z80_assembly", 204 | 205 | "zed", "zig" 206 | ] 207 | ``` 208 | 209 | For details usage, check [tartrazine](https://github.com/ralsina/tartrazine) documents. 210 | 211 | ## Performance 212 | 213 | Here is the result of [a sample markdown file](benchmarks/source.md) parse at MacBook Pro Retina 2015 (2.2 GHz): 214 | 215 | ``` 216 | Crystal Markdown (no longer present) 3.28k (305.29µs) (± 0.92%) fastest 217 | Markd 305.36 ( 3.27ms) (± 5.52%) 10.73× slower 218 | ``` 219 | 220 | Recently, I'm working to compare the other popular commonmark parser, the code is stored in [benchmarks](/benchmarks). 221 | 222 | ## How to Contribute 223 | 224 | Your contributions are always welcome! Please submit a pull request or create an issue to add a new question, bug or feature to the list. 225 | 226 | All [Contributors](https://github.com/icyleaf/markd/graphs/contributors) are on the wall. 227 | 228 | ## You may also like 229 | 230 | - [halite](https://github.com/icyleaf/halite) - HTTP Requests Client with a chainable REST API, built-in sessions and middlewares. 231 | - [totem](https://github.com/icyleaf/totem) - Load and parse a configuration file or string in JSON, YAML, dotenv formats. 232 | - [poncho](https://github.com/icyleaf/poncho) - A .env parser/loader improved for performance. 233 | - [popcorn](https://github.com/icyleaf/popcorn) - Easy and Safe casting from one type to another. 234 | - [fast-crystal](https://github.com/icyleaf/fast-crystal) - 💨 Writing Fast Crystal 😍 -- Collect Common Crystal idioms. 235 | 236 | ## License 237 | 238 | [MIT License](https://github.com/icyleaf/markd/blob/master/LICENSE) © icyleaf 239 | -------------------------------------------------------------------------------- /src/markd/renderers/html_renderer.cr: -------------------------------------------------------------------------------- 1 | require "uri" 2 | 3 | module Markd 4 | class HTMLRenderer < Renderer 5 | @disable_tag = 0 6 | @last_output = "\n" 7 | 8 | @strong_stack = 0 9 | 10 | HEADINGS = %w[h1 h2 h3 h4 h5 h6] 11 | 12 | def heading(node : Node, entering : Bool) : Nil 13 | tag_name = HEADINGS[node.data["level"].as(Int32) - 1] 14 | if entering 15 | newline 16 | tag(tag_name, attrs(node)) 17 | toc(node) if @options.toc? 18 | else 19 | tag(tag_name, end_tag: true) 20 | newline 21 | end 22 | end 23 | 24 | def code(node : Node, entering : Bool) : Nil 25 | tag("code") do 26 | code_body(node) 27 | end 28 | end 29 | 30 | def code_body(node : Node) 31 | output(node.text) 32 | end 33 | 34 | def code_block(node : Node, entering : Bool, formatter : T?) : Nil forall T 35 | {% if @top_level.has_constant?("Tartrazine") %} 36 | render_code_block_use_tartrazine(node, formatter) 37 | {% else %} 38 | render_code_block_use_code_tag(node) 39 | {% end %} 40 | end 41 | 42 | def code_block_language(languages) 43 | languages.try(&.first?).try(&.strip.presence) 44 | end 45 | 46 | def code_block_body(node : Node, lang : String?) : Nil 47 | output(node.text) 48 | end 49 | 50 | def thematic_break(node : Node, entering : Bool) : Nil 51 | newline 52 | tag("hr", attrs(node), self_closing: true) 53 | newline 54 | end 55 | 56 | def block_quote(node : Node, entering : Bool) : Nil 57 | newline 58 | if entering 59 | tag("blockquote", attrs(node)) 60 | else 61 | tag("blockquote", end_tag: true) 62 | end 63 | newline 64 | end 65 | 66 | def alert(node : Node, entering : Bool) : Nil 67 | newline 68 | if entering 69 | tag("div", {"class" => "alert alert-#{node.data["alert"].to_s.downcase}"}) 70 | tag("p", {"class" => "alert-title"}) do 71 | output(node.data["title"].as(String)) 72 | end 73 | else 74 | tag("div", end_tag: true) 75 | end 76 | newline 77 | end 78 | 79 | def table(node : Node, entering : Bool) : Nil 80 | has_body = node.data["has_body"] 81 | newline 82 | if entering 83 | tag("table", attrs(node)) 84 | else 85 | if has_body 86 | tag("tbody", end_tag: true) 87 | newline 88 | end 89 | tag("table", end_tag: true) 90 | end 91 | newline 92 | end 93 | 94 | def table_row(node : Node, entering : Bool) : Nil 95 | newline 96 | is_heading = node.data["heading"] 97 | has_body = node.data["has_body"] 98 | if entering 99 | if is_heading 100 | tag("thead") 101 | newline 102 | end 103 | tag("tr", attrs(node)) 104 | else 105 | tag("tr", end_tag: true) 106 | newline 107 | if is_heading 108 | tag("thead", end_tag: true) 109 | newline 110 | if has_body 111 | tag("tbody") 112 | newline 113 | end 114 | end 115 | end 116 | end 117 | 118 | def table_cell(node : Node, entering : Bool) : Nil 119 | tag_name = node.data["heading"] ? "th" : "td" 120 | if !node.data["align"].to_s.empty? 121 | attrs = {"align" => node.data["align"]} 122 | else 123 | attrs = {} of String => String 124 | end 125 | if entering 126 | newline 127 | tag(tag_name, attrs) 128 | else 129 | tag(tag_name, end_tag: true) 130 | newline 131 | end 132 | end 133 | 134 | def list(node : Node, entering : Bool) : Nil 135 | tag_name = node.data["type"] == "ordered" ? "ol" : "ul" 136 | 137 | newline 138 | if entering 139 | attrs = attrs(node) 140 | 141 | if (start = node.data["start"].as(Int32)) && start != 1 142 | attrs ||= {} of String => String 143 | attrs["start"] = start.to_s 144 | end 145 | 146 | tag(tag_name, attrs) 147 | else 148 | tag(tag_name, end_tag: true) 149 | end 150 | newline 151 | end 152 | 153 | def item(node : Node, entering : Bool) : Nil 154 | if entering 155 | tag("li", attrs(node)) 156 | 157 | if node.data["type"] == "checkbox" 158 | if node.data["checked"]? 159 | attributes = { 160 | "checked" => "", 161 | "disabled" => "", 162 | "type" => "checkbox", 163 | } 164 | else 165 | attributes = { 166 | "disabled" => "", 167 | "type" => "checkbox", 168 | } 169 | end 170 | 171 | tag("input", attributes) 172 | literal(" ") 173 | end 174 | else 175 | tag("li", end_tag: true) 176 | newline 177 | end 178 | end 179 | 180 | def link(node : Node, entering : Bool) : Nil 181 | if entering 182 | attrs = attrs(node) 183 | destination = node.data["destination"].as(String) 184 | 185 | unless @options.safe? && potentially_unsafe(destination) 186 | attrs ||= {} of String => String 187 | destination = resolve_uri(destination, node) 188 | attrs["href"] = escape(destination) 189 | end 190 | 191 | if (title = node.data["title"].as(String)) && !title.empty? 192 | attrs ||= {} of String => String 193 | attrs["title"] = escape(title) 194 | end 195 | 196 | tag("a", attrs) 197 | else 198 | tag("a", end_tag: true) 199 | end 200 | end 201 | 202 | private def resolve_uri(destination, node) 203 | base_url = @options.base_url 204 | return destination unless base_url 205 | 206 | uri = URI.parse(destination) 207 | return destination if uri.absolute? 208 | 209 | base_url.resolve(uri).to_s 210 | end 211 | 212 | def image(node : Node, entering : Bool) : Nil 213 | if entering 214 | if @disable_tag == 0 215 | destination = node.data["destination"].as(String) 216 | if @options.safe? && potentially_unsafe(destination) 217 | literal(%(<img src="" alt="")) 218 | else 219 | destination = resolve_uri(destination, node) 220 | literal(%(<img src="#{escape(destination)}" alt=")) 221 | end 222 | end 223 | @disable_tag += 1 224 | else 225 | @disable_tag -= 1 226 | if @disable_tag == 0 227 | if (title = node.data["title"].as(String)) && !title.empty? 228 | literal(%(" title="#{escape(title)})) 229 | end 230 | literal(%(" />)) 231 | end 232 | end 233 | end 234 | 235 | def html_block(node : Node, entering : Bool) : Nil 236 | newline 237 | content = @options.safe? ? "<!-- raw HTML omitted -->" : node.text 238 | literal(content) 239 | newline 240 | end 241 | 242 | def html_inline(node : Node, entering : Bool) : Nil 243 | content = @options.safe? ? "<!-- raw HTML omitted -->" : node.text 244 | literal(content) 245 | end 246 | 247 | def paragraph(node : Node, entering : Bool) : Nil 248 | if (grand_parent = node.parent?.try &.parent?) && grand_parent.type.list? 249 | return if grand_parent.data["tight"] 250 | end 251 | 252 | if entering 253 | newline 254 | tag("p", attrs(node)) 255 | else 256 | tag("p", end_tag: true) 257 | newline 258 | end 259 | end 260 | 261 | def emphasis(node : Node, entering : Bool) : Nil 262 | if entering 263 | node.data["strong_stack"] = @strong_stack 264 | @strong_stack = 0 265 | end 266 | 267 | tag("em", end_tag: !entering) 268 | 269 | if !entering 270 | @strong_stack = node.data["strong_stack"].as(Int32) 271 | end 272 | end 273 | 274 | def soft_break(node : Node, entering : Bool) : Nil 275 | literal("\n") 276 | end 277 | 278 | def line_break(node : Node, entering : Bool) : Nil 279 | tag("br", self_closing: true) 280 | newline 281 | end 282 | 283 | def strong(node : Node, entering : Bool) : Nil 284 | @strong_stack -= 1 if @options.gfm? && !entering 285 | 286 | tag("strong", end_tag: !entering) if @strong_stack == 0 287 | 288 | @strong_stack += 1 if @options.gfm? && entering 289 | end 290 | 291 | def strikethrough(node : Node, entering : Bool) : Nil 292 | tag("del", end_tag: !entering) 293 | end 294 | 295 | def text(node : Node, entering : Bool) : Nil 296 | output(node.text) 297 | end 298 | 299 | private def tag(name : String, attrs = nil, self_closing = false, end_tag = false) 300 | return if @disable_tag > 0 301 | 302 | @output_io << "<" 303 | @output_io << "/" if end_tag 304 | @output_io << name 305 | attrs.try &.each do |key, value| 306 | @output_io << ' ' << key << '=' << '"' << value << '"' 307 | end 308 | 309 | @output_io << " /" if self_closing 310 | @output_io << ">" 311 | @last_output = ">" 312 | end 313 | 314 | private def tag(name : String, attrs = nil, &) 315 | tag(name, attrs) 316 | yield 317 | tag(name, end_tag: true) 318 | end 319 | 320 | private def potentially_unsafe(url : String) 321 | url.match(Rule::UNSAFE_PROTOCOL) && !url.match(Rule::UNSAFE_DATA_PROTOCOL) 322 | end 323 | 324 | private def toc(node : Node) 325 | return unless node.type.heading? 326 | 327 | {% if compare_versions(Crystal::VERSION, "1.2.0") < 0 %} 328 | title = URI.encode(node.first_child.text) 329 | @output_io << %(<a id="anchor-) << title << %(" class="anchor" href="#anchor-) << title << %("></a>) 330 | {% else %} 331 | title = URI.encode_path(node.first_child.text) 332 | @output_io << %(<a id="anchor-) << title << %(" class="anchor" href="#anchor-) << title << %("></a>) 333 | {% end %} 334 | @last_output = ">" 335 | end 336 | 337 | private def attrs(node : Node) 338 | if @options.source_pos? && (pos = node.source_pos) 339 | {"data-source-pos" => "#{pos[0][0]}:#{pos[0][1]}-#{pos[1][0]}:#{pos[1][1]}"} 340 | end 341 | end 342 | 343 | private def render_code_block_use_tartrazine(node : Node, formatter : Tartrazine::Formatter?) 344 | languages = node.fence_language ? node.fence_language.split : nil 345 | lang = code_block_language(languages) 346 | 347 | newline 348 | 349 | if lang 350 | lexer = Tartrazine.lexer(lang) 351 | 352 | literal(formatter.format(node.text.chomp, lexer)) 353 | else 354 | code_tag_attrs = attrs(node) 355 | pre_tag_attrs = if @options.prettyprint? 356 | {"class" => "prettyprint"} 357 | end 358 | 359 | tag("pre", pre_tag_attrs) do 360 | tag("code", code_tag_attrs) do 361 | code_block_body(node, lang) 362 | end 363 | end 364 | end 365 | 366 | newline 367 | end 368 | 369 | private def render_code_block_use_code_tag(node : Node) 370 | languages = node.fence_language ? node.fence_language.split : nil 371 | code_tag_attrs = attrs(node) 372 | pre_tag_attrs = if @options.prettyprint? 373 | {"class" => "prettyprint"} 374 | end 375 | 376 | lang = code_block_language(languages) 377 | if lang 378 | code_tag_attrs ||= {} of String => String 379 | code_tag_attrs["class"] = "language-#{escape(lang)}" 380 | end 381 | 382 | newline 383 | tag("pre", pre_tag_attrs) do 384 | tag("code", code_tag_attrs) do 385 | code_block_body(node, lang) 386 | end 387 | end 388 | newline 389 | end 390 | end 391 | end 392 | -------------------------------------------------------------------------------- /spec/fixtures/gfm-regression.txt: -------------------------------------------------------------------------------- 1 | ### Regression tests 2 | 3 | Issue #113: EOL character weirdness on Windows 4 | (Important: first line ends with CR + CR + LF) 5 | 6 | ```````````````````````````````` example 7 | line1 8 | 9 | line2 10 | . 11 | <p>line1</p> 12 | <p>line2</p> 13 | ```````````````````````````````` 14 | 15 | Issue #114: cmark skipping first character in line 16 | (Important: the blank lines around "Repeatedly" contain a tab.) 17 | 18 | ```````````````````````````````` example 19 | By taking it apart 20 | 21 | - alternative solutions 22 | → 23 | Repeatedly solving 24 | → 25 | - how techniques 26 | . 27 | <p>By taking it apart</p> 28 | <ul> 29 | <li>alternative solutions</li> 30 | </ul> 31 | <p>Repeatedly solving</p> 32 | <ul> 33 | <li>how techniques</li> 34 | </ul> 35 | ```````````````````````````````` 36 | 37 | Issue jgm/CommonMark#430: h2..h6 not recognized as block tags. 38 | 39 | ```````````````````````````````` example 40 | <h1>lorem</h1> 41 | 42 | <h2>lorem</h2> 43 | 44 | <h3>lorem</h3> 45 | 46 | <h4>lorem</h4> 47 | 48 | <h5>lorem</h5> 49 | 50 | <h6>lorem</h6> 51 | . 52 | <h1>lorem</h1> 53 | <h2>lorem</h2> 54 | <h3>lorem</h3> 55 | <h4>lorem</h4> 56 | <h5>lorem</h5> 57 | <h6>lorem</h6> 58 | ```````````````````````````````` 59 | 60 | Issue jgm/commonmark.js#109 - tabs after setext header line 61 | 62 | 63 | ```````````````````````````````` example 64 | hi 65 | --→ 66 | . 67 | <h2>hi</h2> 68 | ```````````````````````````````` 69 | 70 | Issue #177 - incorrect emphasis parsing 71 | 72 | ```````````````````````````````` example 73 | a***b* c* 74 | . 75 | <p>a*<em><em>b</em> c</em></p> 76 | ```````````````````````````````` 77 | 78 | Issue #193 - unescaped left angle brackets in link destination 79 | 80 | ```````````````````````````````` example 81 | [a] 82 | 83 | [a]: <te<st> 84 | . 85 | <p>[a]</p> 86 | <p>[a]: <te<st></p> 87 | ```````````````````````````````` 88 | 89 | Issue #192 - escaped spaces in link destination 90 | 91 | 92 | ```````````````````````````````` example 93 | [a](te\ st) 94 | . 95 | <p>[a](te\ st)</p> 96 | ```````````````````````````````` 97 | 98 | Issue github/github#76615: multiple delimiter combinations gets sketchy 99 | 100 | 101 | ```````````````````````````````` example strikethrough 102 | ~~**_`this`_**~~ 103 | ~~***`this`***~~ 104 | ~~___`this`___~~ 105 | 106 | **_`this`_** 107 | ***`this`*** 108 | ___`this`___ 109 | 110 | ~~**_this_**~~ 111 | ~~***this***~~ 112 | ~~___this___~~ 113 | 114 | **_this_** 115 | ***this*** 116 | ___this___ 117 | . 118 | <p><del><strong><em><code>this</code></em></strong></del><br /> 119 | <del><em><strong><code>this</code></strong></em></del><br /> 120 | <del><em><strong><code>this</code></strong></em></del></p> 121 | <p><strong><em><code>this</code></em></strong><br /> 122 | <em><strong><code>this</code></strong></em><br /> 123 | <em><strong><code>this</code></strong></em></p> 124 | <p><del><strong><em>this</em></strong></del><br /> 125 | <del><em><strong>this</strong></em></del><br /> 126 | <del><em><strong>this</strong></em></del></p> 127 | <p><strong><em>this</em></strong><br /> 128 | <em><strong>this</strong></em><br /> 129 | <em><strong>this</strong></em></p> 130 | ```````````````````````````````` 131 | 132 | Issue #527 - meta tags in inline contexts 133 | 134 | ```````````````````````````````` example 135 | City: 136 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> 137 | <meta itemprop="name" content="Springfield"> 138 | </span> 139 | . 140 | <p>City: 141 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> 142 | <meta itemprop="name" content="Springfield"> 143 | </span></p> 144 | ```````````````````````````````` 145 | 146 | cmark-gfm strikethrough rules 147 | 148 | ```````````````````````````````` example strikethrough 149 | ~Hi~ Hello, world! 150 | . 151 | <p><del>Hi</del> Hello, world!</p> 152 | ```````````````````````````````` 153 | 154 | ```````````````````````````````` example strikethrough 155 | This ~text~ ~~is~~ ~~~curious~~~. 156 | . 157 | <p>This <del>text</del> <del>is</del> ~~~curious~~~.</p> 158 | ```````````````````````````````` 159 | 160 | `~` should not be escaped in href — https://github.com/github/markup/issues/311 161 | 162 | ```````````````````````````````` example 163 | [x](http://members.aon.at/~nkehrer/ibm_5110/emu5110.html) 164 | . 165 | <p><a href="http://members.aon.at/~nkehrer/ibm_5110/emu5110.html">x</a></p> 166 | ```````````````````````````````` 167 | 168 | Footnotes in tables 169 | 170 | ```````````````````````````````` example table footnotes pending 171 | A footnote in a paragraph[^1] 172 | 173 | | Column1 | Column2 | 174 | | --------- | ------- | 175 | | foot [^1] | note | 176 | 177 | [^1]: a footnote 178 | . 179 | <p>A footnote in a paragraph<sup class="footnote-ref"><a href="#fn-1" id="fnref-1" data-footnote-ref>1</a></sup></p> 180 | <table> 181 | <thead> 182 | <tr> 183 | <th>Column1</th> 184 | <th>Column2</th> 185 | </tr> 186 | </thead> 187 | <tbody> 188 | <tr> 189 | <td>foot <sup class="footnote-ref"><a href="#fn-1" id="fnref-1-2" data-footnote-ref>1</a></sup></td> 190 | <td>note</td> 191 | </tr> 192 | </tbody> 193 | </table> 194 | <section class="footnotes" data-footnotes> 195 | <ol> 196 | <li id="fn-1"> 197 | <p>a footnote <a href="#fnref-1" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a> <a href="#fnref-1-2" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1-2" aria-label="Back to reference 1-2">↩<sup class="footnote-ref">2</sup></a></p> 198 | </li> 199 | </ol> 200 | </section> 201 | ```````````````````````````````` 202 | 203 | Issue #527 - meta tags in inline contexts 204 | 205 | ```````````````````````````````` example 206 | City: 207 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> 208 | <meta itemprop="name" content="Springfield"> 209 | </span> 210 | . 211 | <p>City: 212 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> 213 | <meta itemprop="name" content="Springfield"> 214 | </span></p> 215 | ```````````````````````````````` 216 | 217 | Issue #530 - link parsing corner cases 218 | 219 | ```````````````````````````````` example 220 | [a](\ b) 221 | 222 | [a](<<b) 223 | 224 | [a](<b 225 | ) 226 | . 227 | <p>[a](\ b)</p> 228 | <p>[a](<<b)</p> 229 | <p>[a](<b 230 | )</p> 231 | ```````````````````````````````` 232 | 233 | Issue commonmark#526 - unescaped ( in link title 234 | 235 | ```````````````````````````````` example pending 236 | [link](url ((title)) 237 | . 238 | <p>[link](url ((title))</p> 239 | ```````````````````````````````` 240 | 241 | Issue commonamrk#517 - script, pre, style close tag without 242 | opener. 243 | 244 | ```````````````````````````````` example 245 | </script> 246 | 247 | </pre> 248 | 249 | </style> 250 | . 251 | </script> 252 | </pre> 253 | </style> 254 | ```````````````````````````````` 255 | 256 | Issue #289. 257 | 258 | ```````````````````````````````` example 259 | [a](<b) c> 260 | . 261 | <p>[a](<b) c></p> 262 | ```````````````````````````````` 263 | 264 | Pull request #128 - Buffer overread in tables extension 265 | 266 | ```````````````````````````````` example table 267 | | 268 | -| 269 | . 270 | <p>| 271 | -|</p> 272 | ```````````````````````````````` 273 | 274 | Footnotes may be nested inside other footnotes. 275 | 276 | ```````````````````````````````` example footnotes pending 277 | This is some text. It has a citation.[^citation] 278 | 279 | [^another-citation]: My second citation. 280 | 281 | [^citation]: This is a long winded parapgraph that also has another citation.[^another-citation] 282 | . 283 | <p>This is some text. It has a citation.<sup class="footnote-ref"><a href="#fn-citation" id="fnref-citation" data-footnote-ref>1</a></sup></p> 284 | <section class="footnotes" data-footnotes> 285 | <ol> 286 | <li id="fn-citation"> 287 | <p>This is a long winded parapgraph that also has another citation.<sup class="footnote-ref"><a href="#fn-another-citation" id="fnref-another-citation" data-footnote-ref>2</a></sup> <a href="#fnref-citation" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p> 288 | </li> 289 | <li id="fn-another-citation"> 290 | <p>My second citation. <a href="#fnref-another-citation" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p> 291 | </li> 292 | </ol> 293 | </section> 294 | ```````````````````````````````` 295 | 296 | Footnotes are similar to, but should not be confused with, link references 297 | 298 | ```````````````````````````````` example footnotes pending 299 | This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references. 300 | 301 | [^footnote1]: Hello. 302 | 303 | [^footnote2]: Goodbye. 304 | . 305 | <p>This is some text. It has two footnotes references, side-by-side without any spaces,<sup class="footnote-ref"><a href="#fn-footnote1" id="fnref-footnote1" data-footnote-ref>1</a></sup><sup class="footnote-ref"><a href="#fn-footnote2" id="fnref-footnote2" data-footnote-ref>2</a></sup> which are definitely not link references.</p> 306 | <section class="footnotes" data-footnotes> 307 | <ol> 308 | <li id="fn-footnote1"> 309 | <p>Hello. <a href="#fnref-footnote1" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p> 310 | </li> 311 | <li id="fn-footnote2"> 312 | <p>Goodbye. <a href="#fnref-footnote2" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p> 313 | </li> 314 | </ol> 315 | </section> 316 | ```````````````````````````````` 317 | 318 | Footnotes may begin with or have a 'w' or a '_' in their reference label. 319 | 320 | ```````````````````````````````` example footnotes autolink pending 321 | This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited] 322 | 323 | It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789] 324 | 325 | [^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this. 326 | 327 | [^widely-cited]: this renders properly. 328 | . 329 | <p>This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a <code>w</code>.<sup class="footnote-ref"><a href="#fn-widely-cited" id="fnref-widely-cited" data-footnote-ref>1</a></sup></p> 330 | <p>It has another footnote that contains many different characters (the autolinker was also breaking on <code>_</code>).<sup class="footnote-ref"><a href="#fn-sphinx-of-black-quartz_judge-my-vow-0123456789" id="fnref-sphinx-of-black-quartz_judge-my-vow-0123456789" data-footnote-ref>2</a></sup></p> 331 | <section class="footnotes" data-footnotes> 332 | <ol> 333 | <li id="fn-widely-cited"> 334 | <p>this renders properly. <a href="#fnref-widely-cited" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p> 335 | </li> 336 | <li id="fn-sphinx-of-black-quartz_judge-my-vow-0123456789"> 337 | <p>so does this. <a href="#fnref-sphinx-of-black-quartz_judge-my-vow-0123456789" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p> 338 | </li> 339 | </ol> 340 | </section> 341 | ```````````````````````````````` 342 | 343 | Footnotes interacting with strikethrough should not lead to a use-after-free 344 | 345 | ```````````````````````````````` example footnotes autolink strikethrough table pending 346 | |Tot.....[^_a_]| 347 | . 348 | <p>|Tot.....[^_a_]|</p> 349 | ```````````````````````````````` 350 | 351 | Footnotes interacting with strikethrough should not lead to a use-after-free pt2 352 | 353 | ```````````````````````````````` example footnotes autolink strikethrough table pending 354 | [^~~is~~1] 355 | . 356 | <p>[^~~is~~1]</p> 357 | ```````````````````````````````` 358 | 359 | Adjacent unused footnotes definitions should not lead to a use after free 360 | 361 | ```````````````````````````````` example footnotes autolink strikethrough table 362 | Hello world 363 | 364 | 365 | [^a]:[^b]: 366 | . 367 | <p>Hello world</p> 368 | ```````````````````````````````` 369 | 370 | Issue #424 - emphasis before links 371 | 372 | ```````````````````````````````` example 373 | *text* [link](#section) 374 | . 375 | <p><em>text</em> <a href="#section">link</a></p> 376 | ```````````````````````````````` 377 | -------------------------------------------------------------------------------- /spec/fixtures/gfm-extensions.txt: -------------------------------------------------------------------------------- 1 | --- 2 | title: Extensions test 3 | author: Yuki Izumi 4 | version: 0.1 5 | date: '2016-08-31' 6 | license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' 7 | ... 8 | 9 | ## Tables 10 | 11 | Here's a well-formed table, doing everything it should. 12 | 13 | ```````````````````````````````` example 14 | | abc | def | 15 | | --- | --- | 16 | | ghi | jkl | 17 | | mno | pqr | 18 | . 19 | <table> 20 | <thead> 21 | <tr> 22 | <th>abc</th> 23 | <th>def</th> 24 | </tr> 25 | </thead> 26 | <tbody> 27 | <tr> 28 | <td>ghi</td> 29 | <td>jkl</td> 30 | </tr> 31 | <tr> 32 | <td>mno</td> 33 | <td>pqr</td> 34 | </tr> 35 | </tbody> 36 | </table> 37 | ```````````````````````````````` 38 | 39 | We're going to mix up the table now; we'll demonstrate that inline formatting 40 | works fine, but block elements don't. You can also have empty cells, and the 41 | textual alignment of the columns is shown to be irrelevant. 42 | 43 | ```````````````````````````````` example 44 | Hello! 45 | 46 | | _abc_ | セン | 47 | | ----- | ---- | 48 | | 1. Block elements inside cells don't work. | | 49 | | But _**inline elements do**_. | x | 50 | 51 | Hi! 52 | . 53 | <p>Hello!</p> 54 | <table> 55 | <thead> 56 | <tr> 57 | <th><em>abc</em></th> 58 | <th>セン</th> 59 | </tr> 60 | </thead> 61 | <tbody> 62 | <tr> 63 | <td>1. Block elements inside cells don't work.</td> 64 | <td></td> 65 | </tr> 66 | <tr> 67 | <td>But <em><strong>inline elements do</strong></em>.</td> 68 | <td>x</td> 69 | </tr> 70 | </tbody> 71 | </table> 72 | <p>Hi!</p> 73 | ```````````````````````````````` 74 | 75 | Here we demonstrate some edge cases about what is and isn't a table. 76 | 77 | ```````````````````````````````` example 78 | | Not enough table | to be considered table | 79 | 80 | | Not enough table | to be considered table | 81 | | Not enough table | to be considered table | 82 | 83 | | Just enough table | to be considered table | 84 | | ----------------- | ---------------------- | 85 | 86 | | ---- | --- | 87 | 88 | |x| 89 | |-| 90 | 91 | | xyz | 92 | | --- | 93 | . 94 | <p>| Not enough table | to be considered table |</p> 95 | <p>| Not enough table | to be considered table | 96 | | Not enough table | to be considered table |</p> 97 | <table> 98 | <thead> 99 | <tr> 100 | <th>Just enough table</th> 101 | <th>to be considered table</th> 102 | </tr> 103 | </thead> 104 | </table> 105 | <p>| ---- | --- |</p> 106 | <table> 107 | <thead> 108 | <tr> 109 | <th>x</th> 110 | </tr> 111 | </thead> 112 | </table> 113 | <table> 114 | <thead> 115 | <tr> 116 | <th>xyz</th> 117 | </tr> 118 | </thead> 119 | </table> 120 | ```````````````````````````````` 121 | 122 | A "simpler" table, GFM style: 123 | 124 | ```````````````````````````````` example 125 | abc | def 126 | --- | --- 127 | xyz | ghi 128 | . 129 | <table> 130 | <thead> 131 | <tr> 132 | <th>abc</th> 133 | <th>def</th> 134 | </tr> 135 | </thead> 136 | <tbody> 137 | <tr> 138 | <td>xyz</td> 139 | <td>ghi</td> 140 | </tr> 141 | </tbody> 142 | </table> 143 | ```````````````````````````````` 144 | 145 | We are making the parser slighly more lax here. Here is a table with spaces at 146 | the end: 147 | 148 | ```````````````````````````````` example 149 | Hello! 150 | 151 | | _abc_ | セン | 152 | | ----- | ---- | 153 | | this row has a space at the end | | 154 | | But _**inline elements do**_. | x | 155 | 156 | Hi! 157 | . 158 | <p>Hello!</p> 159 | <table> 160 | <thead> 161 | <tr> 162 | <th><em>abc</em></th> 163 | <th>セン</th> 164 | </tr> 165 | </thead> 166 | <tbody> 167 | <tr> 168 | <td>this row has a space at the end</td> 169 | <td></td> 170 | </tr> 171 | <tr> 172 | <td>But <em><strong>inline elements do</strong></em>.</td> 173 | <td>x</td> 174 | </tr> 175 | </tbody> 176 | </table> 177 | <p>Hi!</p> 178 | ```````````````````````````````` 179 | 180 | Table alignment: 181 | 182 | ```````````````````````````````` example 183 | aaa | bbb | ccc | ddd | eee 184 | :-- | --- | :-: | --- | --: 185 | fff | ggg | hhh | iii | jjj 186 | . 187 | <table> 188 | <thead> 189 | <tr> 190 | <th align="left">aaa</th> 191 | <th>bbb</th> 192 | <th align="center">ccc</th> 193 | <th>ddd</th> 194 | <th align="right">eee</th> 195 | </tr> 196 | </thead> 197 | <tbody> 198 | <tr> 199 | <td align="left">fff</td> 200 | <td>ggg</td> 201 | <td align="center">hhh</td> 202 | <td>iii</td> 203 | <td align="right">jjj</td> 204 | </tr> 205 | </tbody> 206 | </table> 207 | ```````````````````````````````` 208 | 209 | ### Table cell count mismatches 210 | 211 | The header and delimiter row must match. 212 | 213 | ```````````````````````````````` example 214 | | a | b | c | 215 | | --- | --- | 216 | | this | isn't | okay | 217 | . 218 | <p>| a | b | c | 219 | | --- | --- | 220 | | this | isn't | okay |</p> 221 | ```````````````````````````````` 222 | 223 | But any of the body rows can be shorter. Rows longer 224 | than the header are truncated. 225 | 226 | ```````````````````````````````` example 227 | | a | b | c | 228 | | --- | --- | --- 229 | | x 230 | | a | b 231 | | 1 | 2 | 3 | 4 | 5 | 232 | . 233 | <table> 234 | <thead> 235 | <tr> 236 | <th>a</th> 237 | <th>b</th> 238 | <th>c</th> 239 | </tr> 240 | </thead> 241 | <tbody> 242 | <tr> 243 | <td>x</td> 244 | <td></td> 245 | <td></td> 246 | </tr> 247 | <tr> 248 | <td>a</td> 249 | <td>b</td> 250 | <td></td> 251 | </tr> 252 | <tr> 253 | <td>1</td> 254 | <td>2</td> 255 | <td>3</td> 256 | </tr> 257 | </tbody> 258 | </table> 259 | ```````````````````````````````` 260 | 261 | ### Embedded pipes 262 | 263 | Tables with embedded pipes could be tricky. 264 | 265 | ```````````````````````````````` example 266 | | a | b | 267 | | --- | --- | 268 | | Escaped pipes are \|okay\|. | Like \| this. | 269 | | Within `\|code\| is okay` too. | 270 | | _**`c\|`**_ \| complex 271 | | don't **\_reparse\_** 272 | . 273 | <table> 274 | <thead> 275 | <tr> 276 | <th>a</th> 277 | <th>b</th> 278 | </tr> 279 | </thead> 280 | <tbody> 281 | <tr> 282 | <td>Escaped pipes are |okay|.</td> 283 | <td>Like | this.</td> 284 | </tr> 285 | <tr> 286 | <td>Within <code>|code| is okay</code> too.</td> 287 | <td></td> 288 | </tr> 289 | <tr> 290 | <td><em><strong><code>c|</code></strong></em> | complex</td> 291 | <td></td> 292 | </tr> 293 | <tr> 294 | <td>don't <strong>_reparse_</strong></td> 295 | <td></td> 296 | </tr> 297 | </tbody> 298 | </table> 299 | ```````````````````````````````` 300 | 301 | ### Oddly-formatted markers 302 | 303 | This shouldn't assert. 304 | 305 | ```````````````````````````````` example 306 | | a | 307 | --- | 308 | . 309 | <table> 310 | <thead> 311 | <tr> 312 | <th>a</th> 313 | </tr> 314 | </thead> 315 | </table> 316 | ```````````````````````````````` 317 | 318 | ### Escaping 319 | 320 | ```````````````````````````````` example 321 | | a | b | 322 | | --- | --- | 323 | | \\ | `\\` | 324 | | \\\\ | `\\\\` | 325 | | \_ | `\_` | 326 | | \| | `\|` | 327 | | \a | `\a` | 328 | 329 | \\ `\\` 330 | 331 | \\\\ `\\\\` 332 | 333 | \_ `\_` 334 | 335 | \| `\|` 336 | 337 | \a `\a` 338 | . 339 | <table> 340 | <thead> 341 | <tr> 342 | <th>a</th> 343 | <th>b</th> 344 | </tr> 345 | </thead> 346 | <tbody> 347 | <tr> 348 | <td>\</td> 349 | <td><code>\\</code></td> 350 | </tr> 351 | <tr> 352 | <td>\\</td> 353 | <td><code>\\\\</code></td> 354 | </tr> 355 | <tr> 356 | <td>_</td> 357 | <td><code>\_</code></td> 358 | </tr> 359 | <tr> 360 | <td>|</td> 361 | <td><code>|</code></td> 362 | </tr> 363 | <tr> 364 | <td>\a</td> 365 | <td><code>\a</code></td> 366 | </tr> 367 | </tbody> 368 | </table> 369 | <p>\ <code>\\</code></p> 370 | <p>\\ <code>\\\\</code></p> 371 | <p>_ <code>\_</code></p> 372 | <p>| <code>\|</code></p> 373 | <p>\a <code>\a</code></p> 374 | ```````````````````````````````` 375 | 376 | ### Embedded HTML 377 | 378 | ```````````````````````````````` example 379 | | a | 380 | | --- | 381 | | <strong>hello</strong> | 382 | | ok <br> sure | 383 | . 384 | <table> 385 | <thead> 386 | <tr> 387 | <th>a</th> 388 | </tr> 389 | </thead> 390 | <tbody> 391 | <tr> 392 | <td><strong>hello</strong></td> 393 | </tr> 394 | <tr> 395 | <td>ok <br> sure</td> 396 | </tr> 397 | </tbody> 398 | </table> 399 | ```````````````````````````````` 400 | 401 | ### Reference-style links 402 | 403 | ```````````````````````````````` example 404 | Here's a link to [Freedom Planet 2][]. 405 | 406 | | Here's a link to [Freedom Planet 2][] in a table header. | 407 | | --- | 408 | | Here's a link to [Freedom Planet 2][] in a table row. | 409 | 410 | [Freedom Planet 2]: http://www.freedomplanet2.com/ 411 | . 412 | <p>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a>.</p> 413 | <table> 414 | <thead> 415 | <tr> 416 | <th>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table header.</th> 417 | </tr> 418 | </thead> 419 | <tbody> 420 | <tr> 421 | <td>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table row.</td> 422 | </tr> 423 | </tbody> 424 | </table> 425 | ```````````````````````````````` 426 | 427 | ### Sequential cells 428 | 429 | ```````````````````````````````` example 430 | | a | b | c | 431 | | --- | --- | --- | 432 | | d || e | 433 | . 434 | <table> 435 | <thead> 436 | <tr> 437 | <th>a</th> 438 | <th>b</th> 439 | <th>c</th> 440 | </tr> 441 | </thead> 442 | <tbody> 443 | <tr> 444 | <td>d</td> 445 | <td></td> 446 | <td>e</td> 447 | </tr> 448 | </tbody> 449 | </table> 450 | ```````````````````````````````` 451 | 452 | ### Interaction with emphasis 453 | 454 | ```````````````````````````````` example 455 | | a | b | 456 | | --- | --- | 457 | |***(a)***| 458 | . 459 | <table> 460 | <thead> 461 | <tr> 462 | <th>a</th> 463 | <th>b</th> 464 | </tr> 465 | </thead> 466 | <tbody> 467 | <tr> 468 | <td><em><strong>(a)</strong></em></td> 469 | <td></td> 470 | </tr> 471 | </tbody> 472 | </table> 473 | ```````````````````````````````` 474 | 475 | ### a table can be recognised when separated from a paragraph of text without an empty line 476 | 477 | ```````````````````````````````` example 478 | 123 479 | 456 480 | | a | b | 481 | | ---| --- | 482 | d | e 483 | . 484 | <p>123 485 | 456</p> 486 | <table> 487 | <thead> 488 | <tr> 489 | <th>a</th> 490 | <th>b</th> 491 | </tr> 492 | </thead> 493 | <tbody> 494 | <tr> 495 | <td>d</td> 496 | <td>e</td> 497 | </tr> 498 | </tbody> 499 | </table> 500 | ```````````````````````````````` 501 | 502 | ## Strikethroughs 503 | 504 | A well-formed strikethrough. 505 | 506 | ```````````````````````````````` example 507 | A proper ~strikethrough~. 508 | . 509 | <p>A proper <del>strikethrough</del>.</p> 510 | ```````````````````````````````` 511 | 512 | Some strikethrough edge cases. 513 | 514 | ```````````````````````````````` example 515 | These are ~not strikethroughs. 516 | 517 | No, they are not~ 518 | 519 | This ~is ~ legit~ isn't ~ legit. 520 | 521 | This is not ~~~~~one~~~~~ huge strikethrough. 522 | 523 | ~one~ ~~two~~ ~~~three~~~ 524 | 525 | No ~mismatch~~ 526 | . 527 | <p>These are ~not strikethroughs.</p> 528 | <p>No, they are not~</p> 529 | <p>This <del>is ~ legit</del> isn't ~ legit.</p> 530 | <p>This is not ~~~~~one~~~~~ huge strikethrough.</p> 531 | <p><del>one</del> <del>two</del> ~~~three~~~</p> 532 | <p>No ~mismatch~~</p> 533 | ```````````````````````````````` 534 | 535 | Using 200 tilde since it overflows the internal buffer 536 | size (100) for parsing delimiters in inlines.c 537 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~striked~ 538 | 539 | ## Autolinks 540 | 541 | ```````````````````````````````` example autolink 542 | : http://google.com https://google.com 543 | 544 | <http://google.com/å> http://google.com/å 545 | 546 | scyther@pokemon.com 547 | 548 | scy.the_rbe-edr+ill@pokemon.com 549 | 550 | scyther@pokemon.com. 551 | 552 | scyther@pokemon.com/ 553 | 554 | scyther@pokemon.com/beedrill@pokemon.com 555 | 556 | mailto:scyther@pokemon.com 557 | 558 | This is a mailto:scyther@pokemon.com 559 | 560 | mailto:scyther@pokemon.com. 561 | 562 | mailto:scyther@pokemon.com/ 563 | 564 | mailto:scyther@pokemon.com/message 565 | 566 | mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com 567 | 568 | xmpp:scyther@pokemon.com 569 | 570 | xmpp:scyther@pokemon.com. 571 | 572 | xmpp:scyther@pokemon.com/message 573 | 574 | xmpp:scyther@pokemon.com/message. 575 | 576 | Email me at:scyther@pokemon.com 577 | 578 | www.github.com www.github.com/á 579 | 580 | www.google.com/a_b 581 | 582 | Underscores not allowed in host name www.xxx.yyy._zzz 583 | 584 | Underscores not allowed in host name www.xxx._yyy.zzz 585 | 586 | Underscores allowed in domain name www._xxx.yyy.zzz 587 | 588 | **Autolink and http://inlines.com** 589 | 590 | ![http://inline.com/image](http://inline.com/image) 591 | 592 | a.w@b.c 593 | 594 | Full stop outside parens shouldn't be included http://google.com/ok. 595 | 596 | (Full stop inside parens shouldn't be included http://google.com/ok.) 597 | 598 | "http://google.com" 599 | 600 | 'http://google.com' 601 | 602 | http://🍄.ga/ http://x🍄.ga/ 603 | . 604 | <p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p> 605 | <p><a href="http://google.com/%C3%A5">http://google.com/å</a> <a href="http://google.com/%C3%A5">http://google.com/å</a></p> 606 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p> 607 | <p><a href="mailto:scy.the_rbe-edr+ill@pokemon.com">scy.the_rbe-edr+ill@pokemon.com</a></p> 608 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a>.</p> 609 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a>/</p> 610 | <p><a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a>/<a href="mailto:beedrill@pokemon.com">beedrill@pokemon.com</a></p> 611 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a></p> 612 | <p>This is a <a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a></p> 613 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>.</p> 614 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>/</p> 615 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>/message</p> 616 | <p><a href="mailto:scyther@pokemon.com">mailto:scyther@pokemon.com</a>/<a href="mailto:beedrill@pokemon.com">mailto:beedrill@pokemon.com</a></p> 617 | <p><a href="xmpp:scyther@pokemon.com">xmpp:scyther@pokemon.com</a></p> 618 | <p><a href="xmpp:scyther@pokemon.com">xmpp:scyther@pokemon.com</a>.</p> 619 | <p><a href="xmpp:scyther@pokemon.com/message">xmpp:scyther@pokemon.com/message</a></p> 620 | <p><a href="xmpp:scyther@pokemon.com/message">xmpp:scyther@pokemon.com/message</a>.</p> 621 | <p>Email me at:<a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p> 622 | <p><a href="http://www.github.com">www.github.com</a> <a href="http://www.github.com/%C3%A1">www.github.com/á</a></p> 623 | <p><a href="http://www.google.com/a_b">www.google.com/a_b</a></p> 624 | <p>Underscores not allowed in host name www.xxx.yyy._zzz</p> 625 | <p>Underscores not allowed in host name www.xxx._yyy.zzz</p> 626 | <p>Underscores allowed in domain name <a href="http://www._xxx.yyy.zzz">www._xxx.yyy.zzz</a></p> 627 | <p><strong>Autolink and <a href="http://inlines.com">http://inlines.com</a></strong></p> 628 | <p><img src="http://inline.com/image" alt="http://inline.com/image" /></p> 629 | <p><a href="mailto:a.w@b.c">a.w@b.c</a></p> 630 | <p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p> 631 | <p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p> 632 | <p>"<a href="http://google.com">http://google.com</a>"</p> 633 | <p>'<a href="http://google.com">http://google.com</a>'</p> 634 | <p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p> 635 | ```````````````````````````````` 636 | 637 | ```````````````````````````````` example pending 638 | mmmmailto:scyther@pokemon.com 639 | . 640 | <p>mmmmailto:<a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p> 641 | ```````````````````````````````` 642 | 643 | ```````````````````````````````` example 644 | This shouldn't crash everything: (_A_@_.A 645 | . 646 | <IGNORE> 647 | ```````````````````````````````` 648 | 649 | ```````````````````````````````` example 650 | These should not link: 651 | 652 | * @a.b.c@. x 653 | * n@. b 654 | . 655 | <p>These should not link:</p> 656 | <ul> 657 | <li>@a.b.c@. x</li> 658 | <li>n@. b</li> 659 | </ul> 660 | ```````````````````````````````` 661 | 662 | ## HTML tag filter 663 | 664 | 665 | ```````````````````````````````` example tagfilter 666 | This is <xmp> not okay, but **this** <strong>is</strong>. 667 | 668 | <p>This is <xmp> not okay, but **this** <strong>is</strong>.</p> 669 | 670 | Nope, I won't have <textarea>. 671 | 672 | <p>No <textarea> here either.</p> 673 | 674 | <p>This <random /> <thing> is okay</thing> though.</p> 675 | 676 | Yep, <totally>okay</totally>. 677 | 678 | <!-- HTML comments are okay, though. --> 679 | <!- But we're strict. -> 680 | <! No nonsense. > 681 | <!-- Leave multiline comments the heck alone, though, okay? 682 | Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. 683 | --> 684 | <!--- Support everything CommonMark's parser does. --> 685 | <!----> 686 | <!--thistoo--> 687 | . 688 | <p>This is <xmp> not okay, but <strong>this</strong> <strong>is</strong>.</p> 689 | <p>This is <xmp> not okay, but **this** <strong>is</strong>.</p> 690 | <p>Nope, I won't have <textarea>.</p> 691 | <p>No <textarea> here either.</p> 692 | <p>This <random /> <thing> is okay</thing> though.</p> 693 | <p>Yep, <totally>okay</totally>.</p> 694 | <!-- HTML comments are okay, though. --> 695 | <p><!- But we're strict. -> 696 | <! No nonsense. ></p> 697 | <!-- Leave multiline comments the heck alone, though, okay? 698 | Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. 699 | --> 700 | <!--- Support everything CommonMark's parser does. --> 701 | <!----> 702 | <!--thistoo--> 703 | ```````````````````````````````` 704 | 705 | ## Footnotes 706 | 707 | ```````````````````````````````` example pending 708 | This is some text![^1]. Other text.[^footnote]. 709 | 710 | Here's a thing[^other-note]. 711 | 712 | And another thing[^codeblock-note]. 713 | 714 | This doesn't have a referent[^nope]. 715 | 716 | 717 | [^other-note]: no code block here (spaces are stripped away) 718 | 719 | [^codeblock-note]: 720 | this is now a code block (8 spaces indentation) 721 | 722 | [^1]: Some *bolded* footnote definition. 723 | 724 | Hi! 725 | 726 | [^footnote]: 727 | > Blockquotes can be in a footnote. 728 | 729 | as well as code blocks 730 | 731 | or, naturally, simple paragraphs. 732 | 733 | [^unused]: This is unused. 734 | . 735 | <p>This is some text!<sup class="footnote-ref"><a href="#fn-1" id="fnref-1" data-footnote-ref>1</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn-footnote" id="fnref-footnote" data-footnote-ref>2</a></sup>.</p> 736 | <p>Here's a thing<sup class="footnote-ref"><a href="#fn-other-note" id="fnref-other-note" data-footnote-ref>3</a></sup>.</p> 737 | <p>And another thing<sup class="footnote-ref"><a href="#fn-codeblock-note" id="fnref-codeblock-note" data-footnote-ref>4</a></sup>.</p> 738 | <p>This doesn't have a referent[^nope].</p> 739 | <p>Hi!</p> 740 | <section class="footnotes" data-footnotes> 741 | <ol> 742 | <li id="fn-1"> 743 | <p>Some <em>bolded</em> footnote definition. <a href="#fnref-1" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p> 744 | </li> 745 | <li id="fn-footnote"> 746 | <blockquote> 747 | <p>Blockquotes can be in a footnote.</p> 748 | </blockquote> 749 | <pre><code>as well as code blocks 750 | </code></pre> 751 | <p>or, naturally, simple paragraphs. <a href="#fnref-footnote" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="2" aria-label="Back to reference 2">↩</a></p> 752 | </li> 753 | <li id="fn-other-note"> 754 | <p>no code block here (spaces are stripped away) <a href="#fnref-other-note" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="3" aria-label="Back to reference 3">↩</a></p> 755 | </li> 756 | <li id="fn-codeblock-note"> 757 | <pre><code>this is now a code block (8 spaces indentation) 758 | </code></pre> 759 | <a href="#fnref-codeblock-note" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="4" aria-label="Back to reference 4">↩</a> 760 | </li> 761 | </ol> 762 | </section> 763 | ```````````````````````````````` 764 | 765 | ## When a footnote is used multiple times, we insert multiple backrefs. 766 | 767 | ```````````````````````````````` example pending 768 | This is some text. It has a footnote[^a-footnote]. 769 | 770 | This footnote is referenced[^a-footnote] multiple times, in lots of different places.[^a-footnote] 771 | 772 | [^a-footnote]: This footnote definition should have three backrefs. 773 | . 774 | <p>This is some text. It has a footnote<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote" data-footnote-ref>1</a></sup>.</p> 775 | <p>This footnote is referenced<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote-2" data-footnote-ref>1</a></sup> multiple times, in lots of different places.<sup class="footnote-ref"><a href="#fn-a-footnote" id="fnref-a-footnote-3" data-footnote-ref>1</a></sup></p> 776 | <section class="footnotes" data-footnotes> 777 | <ol> 778 | <li id="fn-a-footnote"> 779 | <p>This footnote definition should have three backrefs. <a href="#fnref-a-footnote" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a> <a href="#fnref-a-footnote-2" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1-2" aria-label="Back to reference 1-2">↩<sup class="footnote-ref">2</sup></a> <a href="#fnref-a-footnote-3" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1-3" aria-label="Back to reference 1-3">↩<sup class="footnote-ref">3</sup></a></p> 780 | </li> 781 | </ol> 782 | </section> 783 | ```````````````````````````````` 784 | 785 | ## Footnote reference labels are href escaped 786 | 787 | ```````````````````````````````` example pending 788 | Hello[^"><script>alert(1)</script>] 789 | 790 | [^"><script>alert(1)</script>]: pwned 791 | . 792 | <p>Hello<sup class="footnote-ref"><a href="#fn-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" id="fnref-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" data-footnote-ref>1</a></sup></p> 793 | <section class="footnotes" data-footnotes> 794 | <ol> 795 | <li id="fn-%22%3E%3Cscript%3Ealert(1)%3C/script%3E"> 796 | <p>pwned <a href="#fnref-%22%3E%3Cscript%3Ealert(1)%3C/script%3E" class="footnote-backref" data-footnote-backref data-footnote-backref-idx="1" aria-label="Back to reference 1">↩</a></p> 797 | </li> 798 | </ol> 799 | </section> 800 | ```````````````````````````````` 801 | 802 | ## Interop 803 | 804 | Autolink and strikethrough. 805 | 806 | ```````````````````````````````` example autolink 807 | ~~www.google.com~~ 808 | 809 | ~~http://google.com~~ 810 | . 811 | <p><del><a href="http://www.google.com">www.google.com</a></del></p> 812 | <p><del><a href="http://google.com">http://google.com</a></del></p> 813 | ```````````````````````````````` 814 | 815 | Autolink and tables. 816 | 817 | ```````````````````````````````` example autolink 818 | | a | b | 819 | | --- | --- | 820 | | https://github.com www.github.com | http://pokemon.com | 821 | . 822 | <table> 823 | <thead> 824 | <tr> 825 | <th>a</th> 826 | <th>b</th> 827 | </tr> 828 | </thead> 829 | <tbody> 830 | <tr> 831 | <td><a href="https://github.com">https://github.com</a> <a href="http://www.github.com">www.github.com</a></td> 832 | <td><a href="http://pokemon.com">http://pokemon.com</a></td> 833 | </tr> 834 | </tbody> 835 | </table> 836 | ```````````````````````````````` 837 | 838 | ## Task lists 839 | 840 | ```````````````````````````````` example 841 | - [ ] foo 842 | - [x] bar 843 | . 844 | <ul> 845 | <li><input disabled="" type="checkbox"> foo</li> 846 | <li><input checked="" disabled="" type="checkbox"> bar</li> 847 | </ul> 848 | ```````````````````````````````` 849 | 850 | Show that a task list and a regular list get processed the same in 851 | the way that sublists are created. If something works in a list 852 | item, then it should work the same way with a task. The only 853 | difference should be the tasklist marker. So, if we use something 854 | other than a space or x, it won't be recognized as a task item, and 855 | so will be treated as a regular item. 856 | 857 | ```````````````````````````````` example 858 | - [x] foo 859 | - [ ] bar 860 | - [x] baz 861 | - [ ] bim 862 | 863 | Show a regular (non task) list to show that it has the same structure 864 | - [@] foo 865 | - [@] bar 866 | - [@] baz 867 | - [@] bim 868 | . 869 | <ul> 870 | <li><input checked="" disabled="" type="checkbox"> foo 871 | <ul> 872 | <li><input disabled="" type="checkbox"> bar</li> 873 | <li><input checked="" disabled="" type="checkbox"> baz</li> 874 | </ul> 875 | </li> 876 | <li><input disabled="" type="checkbox"> bim</li> 877 | </ul> 878 | <p>Show a regular (non task) list to show that it has the same structure</p> 879 | <ul> 880 | <li>[@] foo 881 | <ul> 882 | <li>[@] bar</li> 883 | <li>[@] baz</li> 884 | </ul> 885 | </li> 886 | <li>[@] bim</li> 887 | </ul> 888 | ```````````````````````````````` 889 | Use a larger indent -- a task list and a regular list should produce 890 | the same structure. 891 | 892 | ```````````````````````````````` example 893 | - [x] foo 894 | - [ ] bar 895 | - [x] baz 896 | - [ ] bim 897 | 898 | Show a regular (non task) list to show that it has the same structure 899 | - [@] foo 900 | - [@] bar 901 | - [@] baz 902 | - [@] bim 903 | . 904 | <ul> 905 | <li><input checked="" disabled="" type="checkbox"> foo 906 | <ul> 907 | <li><input disabled="" type="checkbox"> bar</li> 908 | <li><input checked="" disabled="" type="checkbox"> baz</li> 909 | </ul> 910 | </li> 911 | <li><input disabled="" type="checkbox"> bim</li> 912 | </ul> 913 | <p>Show a regular (non task) list to show that it has the same structure</p> 914 | <ul> 915 | <li>[@] foo 916 | <ul> 917 | <li>[@] bar</li> 918 | <li>[@] baz</li> 919 | </ul> 920 | </li> 921 | <li>[@] bim</li> 922 | </ul> 923 | ```````````````````````````````` 924 | -------------------------------------------------------------------------------- /src/markd/parsers/inline.cr: -------------------------------------------------------------------------------- 1 | require "html" 2 | require "uri" 3 | 4 | module Markd::Parser 5 | class Inline 6 | include Parser 7 | 8 | property refmap 9 | private getter! brackets 10 | 11 | @delimiters : Delimiter? 12 | 13 | def initialize(@options : Options) 14 | @text = "" 15 | @pos = 0 16 | @refmap = {} of String => Hash(String, String) | String 17 | end 18 | 19 | def parse(node : Node) 20 | @pos = 0 21 | @delimiters = nil 22 | @text = node.text.strip 23 | 24 | loop do 25 | break unless process_line(node) 26 | end 27 | 28 | node.text = "" 29 | process_delimiters(nil) 30 | end 31 | 32 | private def process_line(node : Node) 33 | char = char_at?(@pos) 34 | return false unless char && char != Char::ZERO 35 | 36 | res = case char 37 | when '\n' 38 | newline(node) 39 | when '\\' 40 | backslash(node) 41 | when '`' 42 | backtick(node) 43 | when '*', '_' 44 | handle_delim(char, node) 45 | when '~' 46 | if @options.gfm? 47 | handle_delim(char, node) 48 | else 49 | string(node) 50 | end 51 | when '\'', '"' 52 | @options.smart? && handle_delim(char, node) 53 | when '[' 54 | open_bracket(node) 55 | when '!' 56 | bang(node) 57 | when ']' 58 | close_bracket(node) 59 | when '<' 60 | auto_link(node) || html_tag(node) 61 | when 'w' 62 | # Catch www. autolinks for GFM 63 | # Do not match if it's http://www 64 | if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '/') 65 | auto_link(node) 66 | else 67 | false 68 | end 69 | when 'h' 70 | # Catch http:// and https:// autolinks for GFM 71 | # Do not match ![http:// ... because that was matched by '!'] 72 | if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '[') 73 | auto_link(node) 74 | else 75 | false 76 | end 77 | when 'f' 78 | # Catch ftp:// autolinks for GFM 79 | # Do not match if it's <ftp:// ... because that was matched by '<' 80 | if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '<') 81 | auto_link(node) 82 | else 83 | false 84 | end 85 | when 'x' 86 | # Catch xmpp: autolinks for GFM 87 | if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '<') 88 | auto_link(node) 89 | else 90 | false 91 | end 92 | when 'm' 93 | # Catch mailto: autolinks for GFM 94 | if @options.autolink? && (@pos == 0 || char_at?(@pos - 1) != '<') 95 | auto_link(node) 96 | else 97 | false 98 | end 99 | when '&' 100 | entity(node) 101 | when ':' 102 | emoji(node) 103 | else 104 | if @options.autolink? && node.text.includes? '@' 105 | # Catch email autolinks for GFM 106 | auto_link(node) 107 | else 108 | string(node) 109 | end 110 | end 111 | 112 | unless res 113 | @pos += 1 114 | node.append_child(text(char)) 115 | end 116 | 117 | true 118 | end 119 | 120 | private def newline(node : Node) 121 | @pos += 1 # assume we're at a \n 122 | last_child = node.last_child? 123 | # check previous node for trailing spaces 124 | if last_child && last_child.type.text? && 125 | last_child.text.ends_with?(' ') 126 | hard_break = if last_child.text.size == 1 127 | false # Must be space 128 | else 129 | last_child.text[-2]? == ' ' 130 | end 131 | last_child.text = last_child.text.rstrip ' ' 132 | node.append_child(Node.new(hard_break ? Node::Type::LineBreak : Node::Type::SoftBreak)) 133 | else 134 | node.append_child(Node.new(Node::Type::SoftBreak)) 135 | end 136 | 137 | # gobble leading spaces in next line 138 | while char_at?(@pos) == ' ' 139 | @pos += 1 140 | end 141 | 142 | true 143 | end 144 | 145 | private def backslash(node : Node) 146 | @pos += 1 147 | 148 | char = @pos < @text.bytesize ? char_at(@pos).to_s : nil 149 | child = if char_at?(@pos) == '\n' 150 | @pos += 1 151 | Node.new(Node::Type::LineBreak) 152 | elsif char && char.match(Rule::ESCAPABLE) 153 | c = text(char) 154 | @pos += 1 155 | c 156 | else 157 | text("\\") 158 | end 159 | 160 | node.append_child(child) 161 | 162 | true 163 | end 164 | 165 | private def backtick(node : Node) 166 | start_pos = @pos 167 | while char_at?(@pos) == '`' 168 | @pos += 1 169 | end 170 | return false if start_pos == @pos 171 | 172 | num_ticks = @pos - start_pos 173 | after_open_ticks = @pos 174 | while (text = match(Rule::TICKS)) 175 | if text.bytesize == num_ticks 176 | child = Node.new(Node::Type::Code) 177 | child_text = @text.byte_slice(after_open_ticks, (@pos - num_ticks) - after_open_ticks).gsub(Rule::LINE_ENDING, " ") 178 | if child_text.bytesize >= 2 && child_text[0] == ' ' && child_text[-1] == ' ' && child_text.matches?(/[^ ]/) 179 | child_text = child_text.byte_slice(1, child_text.bytesize - 2) 180 | end 181 | child.text = child_text 182 | node.append_child(child) 183 | 184 | return true 185 | end 186 | end 187 | 188 | @pos = after_open_ticks 189 | node.append_child(text("`" * num_ticks)) 190 | 191 | true 192 | end 193 | 194 | private def bang(node : Node) 195 | start_pos = @pos 196 | @pos += 1 197 | if char_at?(@pos) == '[' 198 | @pos += 1 199 | child = text("![") 200 | node.append_child(child) 201 | 202 | add_bracket(child, start_pos + 1, true) 203 | else 204 | node.append_child(text("!")) 205 | end 206 | 207 | true 208 | end 209 | 210 | private def add_bracket(node : Node, index : Int32, image = false) 211 | brackets.bracket_after = true if brackets? 212 | @brackets = Bracket.new(node, @brackets, @delimiters, index, image, true) 213 | end 214 | 215 | private def remove_bracket 216 | @brackets = brackets.previous? 217 | end 218 | 219 | private def open_bracket(node : Node) 220 | start_pos = @pos 221 | @pos += 1 222 | 223 | child = text("[") 224 | node.append_child(child) 225 | 226 | add_bracket(child, start_pos, false) 227 | 228 | true 229 | end 230 | 231 | private def close_bracket(node : Node) 232 | title = "" 233 | dest = "" 234 | matched = false 235 | @pos += 1 236 | start_pos = @pos 237 | 238 | # get last [ or ![ 239 | opener = @brackets 240 | unless opener 241 | # no matched opener, just return a literal 242 | node.append_child(text("]")) 243 | return true 244 | end 245 | 246 | unless opener.active? 247 | # no matched opener, just return a literal 248 | node.append_child(text("]")) 249 | # take opener off brackets stack 250 | remove_bracket 251 | return true 252 | end 253 | 254 | # If we got here, open is a potential opener 255 | is_image = opener.image? 256 | 257 | # Check to see if we have a link/image 258 | save_pos = @pos 259 | 260 | # Inline link? 261 | if char_at?(@pos) == '(' 262 | @pos += 1 263 | if spnl && (dest = link_destination) && 264 | spnl && (char_at?(@pos - 1).try(&.whitespace?) && 265 | (title = link_title) || true) && spnl && 266 | char_at?(@pos) == ')' 267 | @pos += 1 268 | matched = true 269 | else 270 | @pos = save_pos 271 | end 272 | end 273 | 274 | ref_label = nil 275 | unless matched 276 | # Next, see if there's a link label 277 | before_label = @pos 278 | label_size = link_label 279 | if label_size > 2 280 | ref_label = normalize_reference(@text.byte_slice(before_label, label_size + 1)) 281 | elsif !opener.bracket_after? 282 | # Empty or missing second label means to use the first label as the reference. 283 | # The reference must not contain a bracket. If we know there's a bracket, we don't even bother checking it. 284 | byte_count = start_pos - opener.index 285 | ref_label = byte_count > 0 ? normalize_reference(@text.byte_slice(opener.index, byte_count)) : nil 286 | end 287 | 288 | if label_size == 0 289 | # If shortcut reference link, rewind before spaces we skipped. 290 | @pos = save_pos 291 | end 292 | 293 | if ref_label && @refmap[ref_label]? 294 | # lookup rawlabel in refmap 295 | link = @refmap[ref_label].as(Hash) 296 | dest = link["destination"] if link["destination"] 297 | title = link["title"] if link["title"] 298 | matched = true 299 | end 300 | end 301 | 302 | if matched 303 | child = Node.new(is_image ? Node::Type::Image : Node::Type::Link) 304 | child.data["destination"] = dest.not_nil! 305 | child.data["title"] = title || "" 306 | 307 | tmp = opener.node.next? 308 | while tmp 309 | next_node = tmp.next? 310 | tmp.unlink 311 | child.append_child(tmp) 312 | tmp = next_node 313 | end 314 | 315 | node.append_child(child) 316 | process_delimiters(opener.previous_delimiter) 317 | remove_bracket 318 | opener.node.unlink 319 | 320 | unless is_image 321 | opener = @brackets 322 | while opener 323 | opener.active = false unless opener.image? 324 | opener = opener.previous? 325 | end 326 | end 327 | else 328 | remove_bracket 329 | @pos = start_pos 330 | node.append_child(text("]")) 331 | end 332 | 333 | true 334 | end 335 | 336 | private def process_delimiters(delimiter : Delimiter?) 337 | # find first closer above stack_bottom: 338 | closer = @delimiters 339 | while closer 340 | previous = closer.previous? 341 | break if previous == delimiter 342 | closer = previous 343 | end 344 | 345 | if closer 346 | openers_bottom = { 347 | '_' => delimiter, 348 | '*' => delimiter, 349 | '\'' => delimiter, 350 | '"' => delimiter, 351 | } of Char => Delimiter? 352 | 353 | openers_bottom['~'] = delimiter if @options.gfm? 354 | 355 | # move forward, looking for closers, and handling each 356 | while closer 357 | closer_char = closer.char 358 | 359 | unless closer.can_close? 360 | closer = closer.next? 361 | next 362 | end 363 | 364 | # found emphasis closer. now look back for first matching opener: 365 | opener = closer.previous? 366 | opener_found = false 367 | while opener && opener != delimiter && opener != openers_bottom[closer_char] 368 | odd_match = (closer.can_open? || opener.can_close?) && 369 | closer.orig_delims % 3 != 0 && 370 | (opener.orig_delims + closer.orig_delims) % 3 == 0 371 | if opener.char == closer.char && opener.can_open? && !odd_match 372 | opener_found = true 373 | break 374 | end 375 | opener = opener.previous? 376 | end 377 | opener = nil unless opener_found 378 | 379 | old_closer = closer 380 | 381 | case closer_char 382 | when '*', '_', '~' 383 | if closer_char != '~' || (closer_char == '~' && @options.gfm?) 384 | if opener 385 | # calculate actual number of delimiters used from closer 386 | use_delims = (closer.num_delims >= 2 && opener.num_delims >= 2) ? 2 : 1 387 | 388 | if closer_char == '~' && ( 389 | closer.num_delims > 2 || 390 | opener.num_delims > 2 || 391 | closer.num_delims != opener.num_delims 392 | ) 393 | closer = closer.next? 394 | next 395 | end 396 | 397 | opener_inl = opener.node 398 | closer_inl = closer.node 399 | 400 | # remove used delimiters from stack elts and inlines 401 | opener.num_delims -= use_delims 402 | closer.num_delims -= use_delims 403 | 404 | opener_inl.text = opener_inl.text[0..(-use_delims - 1)] 405 | closer_inl.text = closer_inl.text[0..(-use_delims - 1)] 406 | 407 | if closer_char == '~' 408 | emph = Node.new(Node::Type::Strikethrough) 409 | else 410 | # build contents for new emph element 411 | emph = Node.new((use_delims == 1) ? Node::Type::Emphasis : Node::Type::Strong) 412 | end 413 | 414 | tmp = opener_inl.next? 415 | while tmp && tmp != closer_inl 416 | next_node = tmp.next? 417 | tmp.unlink 418 | emph.append_child(tmp) 419 | tmp = next_node 420 | end 421 | 422 | opener_inl.insert_after(emph) 423 | 424 | # remove elts between opener and closer in delimiters stack 425 | remove_delimiter_between(opener, closer) 426 | 427 | # if opener has 0 delims, remove it and the inline 428 | if opener.num_delims == 0 429 | opener_inl.unlink 430 | remove_delimiter(opener) 431 | end 432 | 433 | if closer.num_delims == 0 434 | closer_inl.unlink 435 | tmp_stack = closer.next? 436 | remove_delimiter(closer) 437 | closer = tmp_stack 438 | end 439 | else 440 | closer = closer.next? 441 | end 442 | end 443 | when '\'' 444 | closer.node.text = "\u{2019}" 445 | if opener 446 | opener.node.text = "\u{2018}" 447 | end 448 | closer = closer.next? 449 | when '"' 450 | closer.node.text = "\u{201D}" 451 | if opener 452 | opener.node.text = "\u{201C}" 453 | end 454 | closer = closer.next? 455 | end 456 | 457 | if !opener && !odd_match 458 | openers_bottom[closer_char] = old_closer.previous? 459 | remove_delimiter(old_closer) if !old_closer.can_open? 460 | end 461 | end 462 | end 463 | 464 | # remove all delimiters 465 | while (curr_delimiter = @delimiters) && curr_delimiter != delimiter 466 | remove_delimiter(curr_delimiter) 467 | end 468 | end 469 | 470 | private def auto_link(node : Node) 471 | if (matched_text = match(Rule::EMAIL_AUTO_LINK)) 472 | node.append_child(link(matched_text, true)) 473 | return true 474 | elsif (matched_text = match(Rule::AUTO_LINK)) 475 | node.append_child(link(matched_text, false)) 476 | return true 477 | elsif @options.autolink? 478 | # These are all the extended autolinks from the 479 | # autolink extension 480 | 481 | if (matched_text = match(Rule::WWW_AUTO_LINK)) 482 | clean_text = autolink_cleanup(matched_text) 483 | if clean_text.empty? 484 | node.append_child(text(matched_text)) 485 | else 486 | _, post = @text.split(clean_text, 2) 487 | node.append_child(link(clean_text, false, true)) 488 | node.append_child(text(post)) if post.size > 0 && matched_text != clean_text 489 | end 490 | return true 491 | elsif (matched_text = ( 492 | match(Rule::PROTOCOL_AUTO_LINK) || 493 | match(Rule::XMPP_AUTO_LINK) || 494 | match(Rule::MAILTO_AUTO_LINK) 495 | )) 496 | clean_text = autolink_cleanup(matched_text) 497 | if clean_text.empty? 498 | node.append_child(text(matched_text)) 499 | else 500 | _, post = @text.split(clean_text, 2) 501 | node.append_child(link(clean_text, false, false)) 502 | node.append_child(text(post)) if post.size > 0 && matched_text != clean_text 503 | end 504 | return true 505 | elsif (matched_text = match(Rule::EXTENDED_EMAIL_AUTO_LINK)) 506 | # Emails that end in - or _ are declared not to be links by the spec: 507 | # 508 | # `.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at 509 | # the end of the email address, in which case it will not be considered part of 510 | # the address: 511 | 512 | # a.b-c_d@a.b_ => <p>a.b-c_d@a.b_</p> 513 | 514 | if "-_".includes?(matched_text[-1]) 515 | node.append_child(text(matched_text)) 516 | else 517 | node.append_child(link(matched_text, true, false)) 518 | end 519 | return true 520 | end 521 | end 522 | 523 | false 524 | end 525 | 526 | private def html_tag(node : Node) 527 | if (text = match(Rule::HTML_TAG)) 528 | child = Node.new(Node::Type::HTMLInline) 529 | 530 | if @options.tagfilter? 531 | text = Rule::HTMLBlock.escape_disallowed_html(text) 532 | end 533 | 534 | child.text = text 535 | node.append_child(child) 536 | true 537 | else 538 | false 539 | end 540 | end 541 | 542 | private def entity(node : Node) 543 | if char_at?(@pos) == '&' 544 | if char_at?(@pos + 1) == '#' 545 | text = match(Rule::NUMERIC_HTML_ENTITY) || return false 546 | text = text.byte_slice(1, text.bytesize - 2) 547 | else 548 | pos = @pos + 1 549 | loop do 550 | char = char_at?(pos) 551 | pos += 1 552 | case char 553 | when ';' 554 | break 555 | when Char::ZERO, nil 556 | return false 557 | end 558 | end 559 | text = @text.byte_slice((@pos + 1), (pos - 1) - (@pos + 1)) 560 | @pos = pos 561 | end 562 | 563 | decoded_text = HTML.decode_entity text 564 | node.append_child(text(decoded_text)) 565 | true 566 | else 567 | false 568 | end 569 | end 570 | 571 | private def emoji(node : Node) 572 | return false unless @options.emoji? 573 | 574 | if char_at?(@pos) == ':' 575 | pos = @pos + 1 576 | loop do 577 | char = char_at?(pos) 578 | pos += 1 579 | 580 | case char 581 | when ':' 582 | break 583 | when Char::ZERO, nil 584 | return false 585 | when 'a'..'z', 'A'..'Z', '0'..'9', '+', '-', '_' 586 | nil 587 | else 588 | return false 589 | end 590 | end 591 | 592 | text = @text.byte_slice((@pos + 1), (pos - 1) - (@pos + 1)) 593 | if (emoji = EmojiEntities::EMOJI_MAPPINGS[text]?) 594 | @pos = pos 595 | node.append_child(text(emoji)) 596 | 597 | true 598 | else 599 | false 600 | end 601 | else 602 | false 603 | end 604 | end 605 | 606 | private def string(node : Node) 607 | if (text = match_main) 608 | if @options.smart? 609 | text = text.gsub(Rule::ELLIPSIS, '\u{2026}') 610 | .gsub(Rule::DASH) do |chars| 611 | en_count = em_count = 0 612 | chars_length = chars.size 613 | 614 | if chars_length % 3 == 0 615 | em_count = chars_length // 3 616 | elsif chars_length % 2 == 0 617 | en_count = chars_length // 2 618 | elsif chars_length % 3 == 2 619 | en_count = 1 620 | em_count = (chars_length - 2) // 3 621 | else 622 | en_count = 2 623 | em_count = (chars_length - 4) // 3 624 | end 625 | 626 | "\u{2014}" * em_count + "\u{2013}" * en_count 627 | end 628 | end 629 | node.append_child(text(text)) 630 | true 631 | else 632 | false 633 | end 634 | end 635 | 636 | private def link(match : String, email = false, add_proto = false) : Node 637 | dest = match.lstrip("<").rstrip(">") 638 | destination = email ? "mailto:#{dest}" : dest 639 | if add_proto 640 | destination = "http://#{destination}" 641 | end 642 | 643 | node = Node.new(Node::Type::Link) 644 | node.data["title"] = "" 645 | node.data["destination"] = normalize_uri(destination) 646 | node.append_child(text(dest)) 647 | node 648 | end 649 | 650 | private def link_label 651 | text = match(Rule::LINK_LABEL) 652 | if text && text.size <= 1001 && (!text.ends_with?("\\]") || text[-3]? == '\\') 653 | text.bytesize - 1 654 | else 655 | 0 656 | end 657 | end 658 | 659 | private def link_title 660 | title = match(Rule::LINK_TITLE) 661 | return unless title 662 | 663 | Utils.decode_entities_string(title[1..-2]) 664 | end 665 | 666 | private def link_destination 667 | dest = if (text = match(Rule::LINK_DESTINATION_BRACES)) 668 | text[1..-2] 669 | elsif char_at?(@pos) != '<' 670 | save_pos = @pos 671 | open_parens = 0 672 | while (char = char_at?(@pos)) 673 | case char 674 | when '\\' 675 | @pos += 1 676 | match(Rule::ESCAPABLE) 677 | when '(' 678 | @pos += 1 679 | open_parens += 1 680 | when ')' 681 | break if open_parens < 1 682 | 683 | @pos += 1 684 | open_parens -= 1 685 | when .ascii_whitespace? 686 | break 687 | else 688 | @pos += 1 689 | end 690 | end 691 | 692 | @text.byte_slice(save_pos, @pos - save_pos) 693 | end 694 | 695 | normalize_uri(Utils.decode_entities_string(dest)) if dest 696 | end 697 | 698 | private def handle_delim(char : Char, node : Node) 699 | res = scan_delims(char) 700 | return false unless res 701 | 702 | num_delims = res[:num_delims] 703 | start_pos = @pos 704 | @pos += num_delims 705 | text = case char 706 | when '\'' 707 | "\u{2019}" 708 | when '"' 709 | "\u{201C}" 710 | else 711 | @text.byte_slice(start_pos, @pos - start_pos) 712 | end 713 | 714 | child = text(text) 715 | node.append_child(child) 716 | 717 | delimiter = Delimiter.new(char, num_delims, num_delims, child, @delimiters, nil, res[:can_open], res[:can_close]) 718 | 719 | if (prev = delimiter.previous?) 720 | prev.next = delimiter 721 | end 722 | 723 | @delimiters = delimiter 724 | 725 | true 726 | end 727 | 728 | private def remove_delimiter(delimiter : Delimiter) 729 | if (prev = delimiter.previous?) 730 | prev.next = delimiter.next? 731 | end 732 | 733 | if (nxt = delimiter.next?) 734 | nxt.previous = delimiter.previous? 735 | else 736 | # top of stack 737 | @delimiters = delimiter.previous? 738 | end 739 | end 740 | 741 | private def remove_delimiter_between(bottom : Delimiter, top : Delimiter) 742 | if bottom.next? != top 743 | bottom.next = top 744 | top.previous = bottom 745 | end 746 | end 747 | 748 | private def scan_delims(char : Char) 749 | num_delims = 0 750 | start_pos = @pos 751 | if char == '\'' || char == '"' 752 | num_delims += 1 753 | @pos += 1 754 | else 755 | while char_at?(@pos) == char 756 | num_delims += 1 757 | @pos += 1 758 | end 759 | end 760 | 761 | return if num_delims == 0 762 | 763 | char_before = start_pos == 0 ? '\n' : previous_unicode_char_at(start_pos) 764 | char_after = unicode_char_at?(@pos) || '\n' 765 | 766 | # Match ASCII code 160 => \xA0 (See http://www.adamkoch.com/2009/07/25/white-space-and-character-160/) 767 | after_is_whitespace = char_after.ascii_whitespace? || char_after == '\u00A0' 768 | after_is_punctuation = !!char_after.to_s.match(Rule::PUNCTUATION) 769 | before_is_whitespace = char_before.ascii_whitespace? || char_after == '\u00A0' 770 | before_is_punctuation = !!char_before.to_s.match(Rule::PUNCTUATION) 771 | 772 | left_flanking = !after_is_whitespace && 773 | (!after_is_punctuation || before_is_whitespace || before_is_punctuation) 774 | right_flanking = !before_is_whitespace && 775 | (!before_is_punctuation || after_is_whitespace || after_is_punctuation) 776 | 777 | case char 778 | when '_' 779 | can_open = left_flanking && (!right_flanking || before_is_punctuation) 780 | can_close = right_flanking && (!left_flanking || after_is_punctuation) 781 | when '\'', '"' 782 | can_open = left_flanking && !right_flanking 783 | can_close = right_flanking 784 | else 785 | can_open = left_flanking 786 | can_close = right_flanking 787 | end 788 | 789 | @pos = start_pos 790 | 791 | { 792 | num_delims: num_delims, 793 | can_open: can_open, 794 | can_close: can_close, 795 | } 796 | end 797 | 798 | def reference(text : String, refmap) 799 | @text = text 800 | @pos = 0 801 | 802 | startpos = @pos 803 | match_chars = link_label 804 | 805 | # label 806 | return 0 if match_chars == 0 807 | raw_label = @text.byte_slice(0, match_chars + 1) 808 | 809 | # colon 810 | if char_at?(@pos) == ':' 811 | @pos += 1 812 | else 813 | @pos = startpos 814 | return 0 815 | end 816 | 817 | # link url 818 | spnl 819 | 820 | save_pos = @pos 821 | dest = link_destination 822 | 823 | if !dest || (dest.size == 0 && !(@pos == save_pos + 2 && @text.byte_slice(save_pos, 2) == "<>")) 824 | @pos = startpos 825 | return 0 826 | end 827 | 828 | before_title = @pos 829 | spnl 830 | if @pos != before_title 831 | title = link_title 832 | end 833 | 834 | unless title 835 | title = "" 836 | @pos = before_title 837 | end 838 | 839 | at_line_end = true 840 | unless space_at_end_of_line? 841 | if title.empty? 842 | at_line_end = false 843 | else 844 | title = "" 845 | @pos = before_title 846 | at_line_end = space_at_end_of_line? 847 | end 848 | end 849 | 850 | unless at_line_end 851 | @pos = startpos 852 | return 0 853 | end 854 | 855 | normal_label = normalize_reference(raw_label) 856 | if normal_label.empty? 857 | @pos = startpos 858 | return 0 859 | end 860 | 861 | unless refmap[normal_label]? 862 | refmap[normal_label] = { 863 | "destination" => dest, 864 | "title" => title, 865 | } 866 | end 867 | 868 | @pos - startpos 869 | end 870 | 871 | private def space_at_end_of_line? 872 | while char_at?(@pos) == ' ' 873 | @pos += 1 874 | end 875 | 876 | case char_at?(@pos) 877 | when '\n' 878 | @pos += 1 879 | when Char::ZERO 880 | else 881 | return false 882 | end 883 | 884 | true 885 | end 886 | 887 | # Parse zero or more space characters, including at most one newline 888 | private def spnl 889 | seen_newline = false 890 | while (c = char_at?(@pos)) 891 | if !seen_newline && c == '\n' 892 | seen_newline = true 893 | elsif c != ' ' 894 | break 895 | end 896 | 897 | @pos += 1 898 | end 899 | 900 | true 901 | end 902 | 903 | private def match(regex : Regex) : String? 904 | text = @text.byte_slice(@pos) 905 | if (match = text.match(regex)) 906 | @pos += match.byte_end.not_nil! 907 | return match[0] 908 | end 909 | end 910 | 911 | # This function advances @pos as far as possible until it finds a 912 | # "special" character, such as '<', ']', or a special string (like a URL). 913 | # 914 | # Then it returns the chunk before it found that match, or in the case 915 | # of special strings, the chunk matched. 916 | 917 | private def match_main : String? 918 | start_pos = @pos 919 | while (char = char_at?(@pos)) 920 | # If we detected a special string (like a URL), and it's 921 | # not the beggining of the string, we need to break right away. 922 | # 923 | # If we are at the beginning of the string, then we return 924 | # the chunk matched 925 | if @options.autolink? 926 | advance = special_string?(@text, @pos) 927 | if advance > 0 928 | if @pos > start_pos 929 | break 930 | else 931 | @pos += advance 932 | break 933 | end 934 | end 935 | end 936 | 937 | # If we detect a special character, we need to break 938 | break if !main_char?(char) 939 | @pos += 1 940 | end 941 | 942 | if start_pos == @pos 943 | nil 944 | else 945 | @text.byte_slice(start_pos, @pos - start_pos) 946 | end 947 | end 948 | 949 | # Identify "special" strings by matching against 950 | # regular expressions. It returns the number of characters 951 | # that were matched. 952 | 953 | private def special_string?(full_text : String, pos : Int) : Int 954 | text = full_text.byte_slice(pos) 955 | # All such recognized autolinks can only come at the beginning of 956 | # a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`, 957 | # and `(`. 958 | if pos > 0 && !("*_~( \n\t".includes? char_at(pos - 1)) 959 | 0 960 | elsif text.starts_with?("http://") || text.starts_with?("https://") || text.starts_with?("ftp://") 961 | # This should not be an autolink: 962 | # < ftp://example.com > 963 | if full_text[...pos].includes?("<") && full_text[...pos].matches?(/<\s*$/) 964 | return 0 965 | end 966 | 967 | m = autolink_cleanup(text.match(Rule::PROTOCOL_AUTO_LINK).to_s) 968 | m.size 969 | elsif text.starts_with?("www.") && text.matches?(Rule::WWW_AUTO_LINK) 970 | m = autolink_cleanup(text.match(Rule::WWW_AUTO_LINK).to_s) 971 | m.size 972 | elsif text.includes?("@") && text.matches?(Rule::EXTENDED_EMAIL_AUTO_LINK) 973 | # m = autolink_cleanup(text.match(Rule::EMAIL_AUTO_LINK).to_s) 974 | matched_text = text.match(Rule::EMAIL_AUTO_LINK).to_s 975 | 976 | # `.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at 977 | # the end of the email address, in which case it will not be considered part of 978 | # the address: 979 | 980 | if "-_".includes? char_at(pos + matched_text.size + 1) 981 | return 0 982 | end 983 | matched_text.size 984 | else 985 | 0 986 | end 987 | end 988 | 989 | # These cleanups are defined in the spec 990 | 991 | private def autolink_cleanup(text : String) : String 992 | return text if text.empty? 993 | # When an autolink ends in `)`, we scan the entire autolink for the total number 994 | # of parentheses. If there is a greater number of closing parentheses than 995 | # opening ones, we don't consider the unmatched trailing parentheses part of the 996 | # autolink, in order to facilitate including an autolink inside a parenthesis: 997 | while text.ends_with?(")") && text.count(")") != text.count("(") 998 | text = text[0..-2] 999 | end 1000 | 1001 | # Trailing punctuation (specifically, `?`, `!`, `.`, `,`, `:`, `*`, `_`, and `~`) 1002 | # will not be considered part of the autolink, though they may be included in the 1003 | # interior of the link 1004 | while "\"'?!.,:*~_".includes?(text[-1]) 1005 | text = text[0..-2] 1006 | end 1007 | 1008 | # If an autolink ends in a semicolon (`;`), we check to see if it appears to 1009 | # resemble an [entity reference][entity references]; if the preceding text is `&` 1010 | # followed by one or more alphanumeric characters. If so, it is excluded from 1011 | # the autolink: 1012 | 1013 | if text.ends_with?(";") && text.includes?("&") 1014 | parts = text.split("&") 1015 | if "&#{parts[-1]}".matches?(Rule::HTML_ENTITY) 1016 | text = parts[0..-2].join("&") 1017 | end 1018 | end 1019 | 1020 | # If the autolink has a domain and the last component has a `_` then 1021 | # it's invalid. 1022 | if text.starts_with?("www.") 1023 | uri = URI.parse("http://#{text}") 1024 | else 1025 | uri = URI.parse(text) 1026 | end 1027 | if uri.host && !uri.host.to_s.match(Rule::VALID_DOMAIN_NAME) 1028 | text = "" 1029 | end 1030 | 1031 | text 1032 | end 1033 | 1034 | # This is the same as match(/^[^\n`\[\]\\!<&*_'":]+/m) but done manually (faster) 1035 | private def main_char?(char) 1036 | case char 1037 | when '\n', '`', '[', ']', '\\', '!', '<', '&', '*', '_', '\'', '"', ':', 'w' 1038 | false 1039 | when '~' 1040 | !@options.gfm? 1041 | else 1042 | true 1043 | end 1044 | end 1045 | 1046 | private def text(text) : Node 1047 | node = Node.new(Node::Type::Text) 1048 | node.text = text.to_s 1049 | node 1050 | end 1051 | 1052 | private def char_at?(byte_index) 1053 | @text.byte_at?(byte_index).try &.unsafe_chr 1054 | end 1055 | 1056 | private def char_at(byte_index) 1057 | @text.byte_at(byte_index).unsafe_chr 1058 | end 1059 | 1060 | private def previous_unicode_char_at(byte_index) 1061 | reader = Char::Reader.new(@text, byte_index) 1062 | reader.previous_char 1063 | end 1064 | 1065 | private def unicode_char_at?(byte_index) 1066 | if byte_index < @text.bytesize 1067 | reader = Char::Reader.new(@text, byte_index) 1068 | reader.current_char 1069 | end 1070 | end 1071 | 1072 | # Normalize reference label: collapse internal whitespace 1073 | # to single space, remove leading/trailing whitespace, case fold. 1074 | def normalize_reference(text : String) 1075 | text[1..-2].strip.downcase.gsub("\n", " ") 1076 | end 1077 | 1078 | private RESERVED_CHARS = ['&', '+', ',', '(', ')', '\'', '#', '*', '!', '#', '$', '/', ':', ';', '?', '@', '='] 1079 | 1080 | def normalize_uri(uri : String) 1081 | String.build(capacity: uri.bytesize) do |io| 1082 | URI.encode(decode_uri(uri), io) do |byte| 1083 | URI.unreserved?(byte) || RESERVED_CHARS.includes?(byte.chr) 1084 | end 1085 | end 1086 | end 1087 | 1088 | def decode_uri(text : String) 1089 | decoded = URI.decode(text) 1090 | if decoded.includes?('&') && decoded.includes?(';') 1091 | decoded = decoded.gsub(/^&(\w+);$/) { |chars| HTML.decode_entities(chars) } 1092 | end 1093 | decoded 1094 | end 1095 | 1096 | class Bracket 1097 | property node : Node 1098 | property! previous : Bracket? 1099 | property previous_delimiter : Delimiter? 1100 | property index : Int32 1101 | property? image : Bool 1102 | property? active : Bool 1103 | property? bracket_after : Bool 1104 | 1105 | def initialize(@node, @previous, @previous_delimiter, @index, @image, @active = true) 1106 | @bracket_after = false 1107 | end 1108 | end 1109 | 1110 | class Delimiter 1111 | property char : Char 1112 | property num_delims : Int32 1113 | property orig_delims : Int32 1114 | property node : Node 1115 | property! previous : Delimiter? 1116 | property! next : Delimiter? 1117 | property? can_open : Bool 1118 | property? can_close : Bool 1119 | 1120 | def initialize(@char, @num_delims, @orig_delims, @node, 1121 | @previous, @next, @can_open, @can_close) 1122 | end 1123 | end 1124 | end 1125 | end 1126 | --------------------------------------------------------------------------------