├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── cmd
    └── mark
    │   └── main.go
├── grammar.go
├── lexer.go
├── lexer_test.go
├── mark.go
├── mark_test.go
├── node.go
├── parser.go
├── parser_test.go
└── test
    ├── auto_links.html
    ├── auto_links.text
    ├── backslash_escapes.html
    ├── backslash_escapes.text
    ├── blockquote_list_item.html
    ├── blockquote_list_item.text
    ├── blockquotes_code_blocks.html
    ├── blockquotes_code_blocks.text
    ├── blockquotes_def.html
    ├── blockquotes_def.text
    ├── blockquotes_nested.html
    ├── blockquotes_nested.text
    ├── blockquotes_text.html
    ├── blockquotes_text.text
    ├── code_blocks.html
    ├── code_blocks.text
    ├── code_spans.html
    ├── code_spans.text
    ├── emphasis.html
    ├── emphasis.text
    ├── gfm_code_blocks.html
    ├── gfm_code_blocks.text
    ├── gfm_del.html
    ├── gfm_del.text
    ├── gfm_tables.html
    ├── gfm_tables.text
    ├── headers.html
    ├── headers.text
    ├── hr.html
    ├── hr.text
    ├── html_block.html
    ├── html_block.text
    ├── image_reference.html
    ├── image_reference.text
    ├── images.html
    ├── images.text
    ├── link_reference.html
    ├── link_reference.text
    ├── links_shortcut_references.html
    ├── links_shortcut_references.text
    ├── loose_list.html
    ├── loose_list.text
    ├── main.html
    ├── main.text
    ├── nested_emphasis.html
    ├── nested_emphasis.text
    ├── same_bullet.html
    ├── same_bullet.text
    ├── smartyfractions.html
    ├── smartyfractions.text
    ├── smartypants.html
    ├── smartypants.text
    ├── task_list.html
    ├── task_list.text
    ├── text_list.html
    ├── text_list.text
    ├── unordered_lists.html
    └── unordered_lists.text


/.gitignore:
--------------------------------------------------------------------------------
1 | draft*
2 | coverage/
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | go:
 3 |   - tip
 4 | before_install:
 5 |   - go get github.com/axw/gocov/gocov
 6 |   - go get github.com/mattn/goveralls
 7 |   - if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi
 8 | script:
 9 |     - $HOME/gopath/bin/goveralls -service=travis-ci
10 | 
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 | 
3 | Copyright (c) 2015 Ariel Mashraki
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Archived. use https://github.com/russross/blackfriday instead 
  2 | 
  3 | # Mark [![Test coverage][coveralls-image]][coveralls-url] [![Build status][travis-image]][travis-url] [![Go doc][doc-image]][doc-url] [![license](http://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/a8m/mark/master/LICENSE)
  4 | > A [markdown](http://daringfireball.net/projects/markdown/) processor written in Go. built for fun.
  5 | 
  6 | Mark is a markdown processor that supports all the features of GFM, smartypants and smart-fractions rendering.  
  7 | It was built with a nice-ish concurrency model that fully inspired from [Rob Pike - Lexical Scanning talk](https://www.youtube.com/watch?v=HxaD_trXwRE) and [marked](https://github.com/chjj/marked) project.  
  8 | Please note that any contribution is welcomed and appreciated, so feel free to take some task [here](#todo).
  9 | 
 10 | ## Table of contents:
 11 | - [Get Started](#get-started)
 12 | - [Examples](#examples)
 13 | - [Documentation](#documentation)
 14 |     - [Render](#render)
 15 |     - [type Mark](#mark)
 16 |         - [New](#new)
 17 |         - [AddRenderFn](#markaddrenderfn)
 18 |         - [Render](#markrender)
 19 |     - [smartypants and smartfractions](##smartypants-and-smartfractions)
 20 | - [Todo](#todo)
 21 | 
 22 | ### Get Started
 23 | #### Installation
 24 | ```sh
 25 | $ go get github.com/a8m/mark
 26 | ```
 27 | #### Examples
 28 | __Add to your project:__
 29 | ```go
 30 | import (
 31 | 	"fmt"
 32 | 	"github.com/a8m/mark"
 33 | )
 34 | 
 35 | func main() {
 36 | 	html := mark.Render("I am using __markdown__.")
 37 | 	fmt.Println(html)
 38 | 	// <p>I am using <strong>markdown</strong>.</p>
 39 | }
 40 | ```
 41 | 
 42 | __or using as a command line tool:__  
 43 | 
 44 | 1\. install:
 45 | ```sh
 46 | $ go get github.com/a8m/mark/cmd/mark
 47 | ```
 48 | 
 49 | 2\. usage:
 50 | ```sh
 51 | $ echo 'hello __world__...' | mark -smartypants
 52 | ```
 53 | or: 
 54 | ```sh
 55 | $ mark -i hello.text -o hello.html
 56 | ```
 57 | 
 58 | #### Documentation
 59 | ##### Render
 60 | Staic rendering function.
 61 | ```go
 62 | html := mark.Render("I am using __markdown__.")
 63 | fmt.Println(html)
 64 | // <p>I am using <strong>markdown</strong>.</p>
 65 | ```
 66 | 
 67 | ##### Mark
 68 | ##### New
 69 | `New` get string as an input, and `mark.Options` as configuration and return a new `Mark`.
 70 | ```go
 71 | m := mark.New("hello world...", &mark.Options{
 72 |     Smartypants: true,
 73 | })
 74 | fmt.Println(m.Render())
 75 | // <p>hello world…</p>
 76 | // Note: you can instantiate it like so: mark.New("...", nil) to get the default options.
 77 | ```
 78 | 
 79 | ##### Mark.AddRenderFn
 80 | `AddRenderFn` let you pass `NodeType`, and `RenderFn` function and override the default `Node` rendering.  
 81 | To get all Nodes type and their fields/methods, see the full documentation: [go-doc](http://godoc.org/github.com/a8m/mark)  
 82 | 
 83 | Example 1:
 84 | ```go
 85 | m := mark.New("hello", nil)
 86 | m.AddRenderFn(mark.NodeParagraph, func(node mark.Node) (s string) {
 87 |     p, _ := node.(*mark.ParagraphNode)
 88 |     s += "<p class=\"mv-msg\">"
 89 |     for _, n := range p.Nodes {
 90 |         s += n.Render()
 91 |     }
 92 |     s += "</p>"
 93 |     return
 94 | })
 95 | fmt.Println(m.Render())
 96 | // <p class="mv-msg">hello</p>
 97 | ```
 98 | 
 99 | Example 2:
100 | ```go
101 | m := mark.New("# Hello world", &mark.Options{
102 | 	Smartypants: true,
103 | 	Fractions:   true,
104 | })
105 | m.AddRenderFn(mark.NodeHeading, func(node mark.Node) string {
106 | 	h, _ := node.(*mark.HeadingNode)
107 | 	return fmt.Sprintf("<angular-heading-directive level=\"%d\" text=\"%s\"/>", h.Level, h.Text)
108 | })
109 | fmt.Println(m.Render())
110 | // <angular-heading-directive level="1" text="Hello world"/>
111 | ```
112 | 
113 | ##### Mark.Render
114 | Parse and render input.
115 | ```go
116 | m := mark.New("hello", nil)
117 | fmt.Println(m.Render())
118 | // <p>hello</p>
119 | ```
120 | 
121 | #### Smartypants and Smartfractions
122 | Mark also support [smartypants](http://daringfireball.net/projects/smartypants/) and smartfractions rendering
123 | ```go
124 | func main() {
125 | 	opts := mark.DefaultOptions()
126 | 	opts.Smartypants = true
127 | 	opts.Fractions = true
128 | 	m := mark.New("'hello', 1/2 beer please...", opts)
129 | 	fmt.Println(m.Render())
130 | 	// ‘hello’, ½ beer please…
131 | }
132 | ```
133 | 
134 | ### Todo
135 | - Commonmark support v0.2
136 | - Expand documentation
137 | - Configuration options
138 | 	- gfm, table
139 | 	- heading(auto hashing)
140 | 
141 | ### License
142 | MIT
143 | 
144 | [travis-url]: https://travis-ci.org/a8m/mark
145 | [travis-image]: https://api.travis-ci.org/a8m/mark.svg
146 | [coveralls-image]: https://coveralls.io/repos/a8m/mark/badge.svg?branch=master&service=github
147 | [coveralls-url]: https://coveralls.io/r/a8m/mark
148 | [doc-image]: https://godoc.org/github.com/a8m/mark?status.svg
149 | [doc-url]: https://godoc.org/github.com/a8m/mark
150 | 


--------------------------------------------------------------------------------
/cmd/mark/main.go:
--------------------------------------------------------------------------------
 1 | // mark command line tool. available at https://github.com/a8m/mark
 2 | package main
 3 | 
 4 | import (
 5 | 	"bufio"
 6 | 	"flag"
 7 | 	"fmt"
 8 | 	"io"
 9 | 	"os"
10 | 
11 | 	"github.com/a8m/mark"
12 | )
13 | 
14 | var (
15 | 	input     = flag.String("i", "", "")
16 | 	output    = flag.String("o", "", "")
17 | 	smarty    = flag.Bool("smartypants", false, "")
18 | 	fractions = flag.Bool("fractions", false, "")
19 | )
20 | 
21 | var usage = `Usage: mark [options...] <input>
22 | 
23 | Options:
24 |   -i  Specify file input, otherwise use last argument as input file. 
25 |       If no input file is specified, read from stdin.
26 |   -o  Specify file output. If none is specified, write to stdout.
27 | 
28 |   -smartypants  Use "smart" typograhic punctuation for things like 
29 |                 quotes and dashes.
30 |   -fractions    Traslate fraction like to suitable HTML elements
31 | `
32 | 
33 | func main() {
34 | 	flag.Usage = func() {
35 | 		fmt.Fprint(os.Stderr, fmt.Sprintf(usage))
36 | 	}
37 | 	flag.Parse()
38 | 	// read
39 | 	var reader *bufio.Reader
40 | 	if *input != "" {
41 | 		file, err := os.Open(*input)
42 | 		if err != nil {
43 | 			usageAndExit(fmt.Sprintf("Error to open file input: %s.", *input))
44 | 		}
45 | 		defer file.Close()
46 | 		reader = bufio.NewReader(file)
47 | 	} else {
48 | 		stat, err := os.Stdin.Stat()
49 | 		if err != nil || (stat.Mode()&os.ModeCharDevice) != 0 {
50 | 			usageAndExit("")
51 | 		}
52 | 		reader = bufio.NewReader(os.Stdin)
53 | 	}
54 | 	// collect data
55 | 	var data string
56 | 	for {
57 | 		line, err := reader.ReadString('\n')
58 | 		if err != nil {
59 | 			if err == io.EOF {
60 | 				break
61 | 			}
62 | 			usageAndExit("failed to reading input.")
63 | 		}
64 | 		data += line
65 | 	}
66 | 	// write
67 | 	var (
68 | 		err  error
69 | 		file = os.Stdout
70 | 	)
71 | 	if *output != "" {
72 | 		if file, err = os.Create(*output); err != nil {
73 | 			usageAndExit("error to create the wanted output file.")
74 | 		}
75 | 	}
76 | 	// mark rendering
77 | 	opts := mark.DefaultOptions()
78 | 	opts.Smartypants = *smarty
79 | 	opts.Fractions = *fractions
80 | 	m := mark.New(data, opts)
81 | 	if _, err := file.WriteString(m.Render()); err != nil {
82 | 		usageAndExit(fmt.Sprintf("error writing output to: %s.", file.Name()))
83 | 	}
84 | }
85 | 
86 | func usageAndExit(msg string) {
87 | 	if msg != "" {
88 | 		fmt.Fprintf(os.Stderr, msg)
89 | 		fmt.Fprintf(os.Stderr, "\n\n")
90 | 	}
91 | 	flag.Usage()
92 | 	fmt.Fprintf(os.Stderr, "\n")
93 | 	os.Exit(1)
94 | }
95 | 


--------------------------------------------------------------------------------
/grammar.go:
--------------------------------------------------------------------------------
 1 | package mark
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"regexp"
 6 | )
 7 | 
 8 | // Block Grammar
 9 | var (
10 | 	reHr         = regexp.MustCompile(`^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *(?:\n+|$)`)
11 | 	reHeading    = regexp.MustCompile(`^ *(#{1,6})(?: +#*| +([^\n]*?)|)(?: +#*|) *(?:\n|$)`)
12 | 	reLHeading   = regexp.MustCompile(`^([^\n]+?) *\n {0,3}(=|-){1,} *(?:\n+|$)`)
13 | 	reBlockQuote = regexp.MustCompile(`^ *>[^\n]*(\n[^\n]+)*\n*`)
14 | 	reDefLink    = regexp.MustCompile(`(?s)^ *\[([^\]]+)\]: *\n? *<?([^\s>]+)>?(?: *\n? *["'(](.+?)['")])? *(?:\n+|$)`)
15 | 	reSpaceGen   = func(i int) *regexp.Regexp {
16 | 		return regexp.MustCompile(fmt.Sprintf(`(?m)^ {1,%d}`, i))
17 | 	}
18 | )
19 | 
20 | var reList = struct {
21 | 	item, marker, loose   *regexp.Regexp
22 | 	scanLine, scanNewLine func(src string) string
23 | }{
24 | 	regexp.MustCompile(`^( *)(?:[*+-]|\d{1,9}\.) (.*)(?:\n|)`),
25 | 	regexp.MustCompile(`^ *([*+-]|\d+\.) +`),
26 | 	regexp.MustCompile(`(?m)\n\n(.*)`),
27 | 	regexp.MustCompile(`^(.*)(?:\n|)`).FindString,
28 | 	regexp.MustCompile(`^\n{1,}`).FindString,
29 | }
30 | 
31 | var reCodeBlock = struct {
32 | 	*regexp.Regexp
33 | 	trim func(src, repl string) string
34 | }{
35 | 	regexp.MustCompile(`^( {4}[^\n]+(?: *\n)*)+`),
36 | 	regexp.MustCompile("(?m)^( {0,4})").ReplaceAllLiteralString,
37 | }
38 | 
39 | var reGfmCode = struct {
40 | 	*regexp.Regexp
41 | 	endGen func(end string, i int) *regexp.Regexp
42 | }{
43 | 	regexp.MustCompile("^( {0,3})([`~]{3,}) *(\\S*)?(?:.*)"),
44 | 	func(end string, i int) *regexp.Regexp {
45 | 		return regexp.MustCompile(fmt.Sprintf(`(?s)(.*?)(?:((?m)^ {0,3}%s{%d,} *$)|$)`, end, i))
46 | 	},
47 | }
48 | 
49 | var reTable = struct {
50 | 	item, itemLp *regexp.Regexp
51 | 	split        func(s string, n int) []string
52 | 	trim         func(src, repl string) string
53 | }{
54 | 	regexp.MustCompile(`^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*`),
55 | 	regexp.MustCompile(`(^ *\|.+)\n( *\| *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*`),
56 | 	regexp.MustCompile(` *\| *`).Split,
57 | 	regexp.MustCompile(`^ *\| *| *\| *$`).ReplaceAllString,
58 | }
59 | 
60 | var reHTML = struct {
61 | 	CDATA_OPEN, CDATA_CLOSE  string
62 | 	item, comment, tag, span *regexp.Regexp
63 | 	endTagGen                func(tag string) *regexp.Regexp
64 | }{
65 | 	`![CDATA[`,
66 | 	"?\\]\\]",
67 | 	regexp.MustCompile(`^<(\w+|!\[CDATA\[)(?:"[^"]*"|'[^']*'|[^'">])*?>`),
68 | 	regexp.MustCompile(`(?sm)<!--.*?-->`),
69 | 	regexp.MustCompile(`^<!--.*?-->|^<\/?\w+(?:"[^"]*"|'[^']*'|[^'">])*?>`),
70 | 	// TODO: Add all span-tags and move to config.
71 | 	regexp.MustCompile(`^(a|em|strong|small|s|q|data|time|code|sub|sup|i|b|u|span|br|del|img)$`),
72 | 	func(tag string) *regexp.Regexp {
73 | 		return regexp.MustCompile(fmt.Sprintf(`(?s)(.+?)<\/%s> *`, tag))
74 | 	},
75 | }
76 | 
77 | // Inline Grammar
78 | var (
79 | 	reBr        = regexp.MustCompile(`^(?: {2,}|\\)\n`)
80 | 	reLinkText  = `(?:\[[^\]]*\]|[^\[\]]|\])*`
81 | 	reLinkHref  = `\s*<?(.*?)>?(?:\s+['"\(](.*?)['"\)])?\s*`
82 | 	reGfmLink   = regexp.MustCompile(`^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])`)
83 | 	reLink      = regexp.MustCompile(fmt.Sprintf(`(?s)^!?\[(%s)\]\(%s\)`, reLinkText, reLinkHref))
84 | 	reAutoLink  = regexp.MustCompile(`^<([^ >]+(@|:\/)[^ >]+)>`)
85 | 	reRefLink   = regexp.MustCompile(`^!?\[((?:\[[^\]]*\]|[^\[\]]|\])*)\](?:\s*\[([^\]]*)\])?`)
86 | 	reImage     = regexp.MustCompile(fmt.Sprintf(`(?s)^!?\[(%s)\]\(%s\)`, reLinkText, reLinkHref))
87 | 	reCode      = regexp.MustCompile("(?s)^`{1,2}\\s*(.*?[^`])\\s*`{1,2}")
88 | 	reStrike    = regexp.MustCompile(`(?s)^~{2}(.+?)~{2}`)
89 | 	reEmphasise = `(?s)^_{%[1]d}(\S.*?_*)_{%[1]d}|^\*{%[1]d}(\S.*?\**)\*{%[1]d}`
90 | 	reItalic    = regexp.MustCompile(fmt.Sprintf(reEmphasise, 1))
91 | 	reStrong    = regexp.MustCompile(fmt.Sprintf(reEmphasise, 2))
92 | )
93 | 


--------------------------------------------------------------------------------
/lexer.go:
--------------------------------------------------------------------------------
  1 | package mark
  2 | 
  3 | import (
  4 | 	"regexp"
  5 | 	"strings"
  6 | 	"unicode/utf8"
  7 | )
  8 | 
  9 | // type position
 10 | type Pos int
 11 | 
 12 | // itemType identifies the type of lex items.
 13 | type itemType int
 14 | 
 15 | // Item represent a token or text string returned from the scanner
 16 | type item struct {
 17 | 	typ itemType // The type of this item.
 18 | 	pos Pos      // The starting position, in bytes, of this item in the input string.
 19 | 	val string   // The value of this item.
 20 | }
 21 | 
 22 | const eof = -1 // Zero value so closed channel delivers EOF
 23 | 
 24 | const (
 25 | 	itemError itemType = iota // Error occurred; value is text of error
 26 | 	itemEOF
 27 | 	itemNewLine
 28 | 	itemHTML
 29 | 	itemHeading
 30 | 	itemLHeading
 31 | 	itemBlockQuote
 32 | 	itemList
 33 | 	itemListItem
 34 | 	itemLooseItem
 35 | 	itemCodeBlock
 36 | 	itemGfmCodeBlock
 37 | 	itemHr
 38 | 	itemTable
 39 | 	itemLpTable
 40 | 	itemTableRow
 41 | 	itemTableCell
 42 | 	itemStrong
 43 | 	itemItalic
 44 | 	itemStrike
 45 | 	itemCode
 46 | 	itemLink
 47 | 	itemDefLink
 48 | 	itemRefLink
 49 | 	itemAutoLink
 50 | 	itemGfmLink
 51 | 	itemImage
 52 | 	itemRefImage
 53 | 	itemText
 54 | 	itemBr
 55 | 	itemPipe
 56 | 	itemIndent
 57 | )
 58 | 
 59 | // stateFn represents the state of the scanner as a function that returns the next state.
 60 | type stateFn func(*lexer) stateFn
 61 | 
 62 | // Lexer interface, used to composed it inside the parser
 63 | type Lexer interface {
 64 | 	nextItem() item
 65 | }
 66 | 
 67 | // lexer holds the state of the scanner.
 68 | type lexer struct {
 69 | 	input   string    // the string being scanned
 70 | 	state   stateFn   // the next lexing function to enter
 71 | 	pos     Pos       // current position in the input
 72 | 	start   Pos       // start position of this item
 73 | 	width   Pos       // width of last rune read from input
 74 | 	lastPos Pos       // position of most recent item returned by nextItem
 75 | 	items   chan item // channel of scanned items
 76 | }
 77 | 
 78 | // lex creates a new lexer for the input string.
 79 | func lex(input string) *lexer {
 80 | 	l := &lexer{
 81 | 		input: input,
 82 | 		items: make(chan item),
 83 | 	}
 84 | 	go l.run()
 85 | 	return l
 86 | }
 87 | 
 88 | // lexInline create a new lexer for one phase lexing(inline blocks).
 89 | func lexInline(input string) *lexer {
 90 | 	l := &lexer{
 91 | 		input: input,
 92 | 		items: make(chan item),
 93 | 	}
 94 | 	go l.lexInline()
 95 | 	return l
 96 | }
 97 | 
 98 | // run runs the state machine for the lexer.
 99 | func (l *lexer) run() {
100 | 	for l.state = lexAny; l.state != nil; {
101 | 		l.state = l.state(l)
102 | 	}
103 | 	close(l.items)
104 | }
105 | 
106 | // next return the next rune in the input
107 | func (l *lexer) next() rune {
108 | 	if int(l.pos) >= len(l.input) {
109 | 		l.width = 0
110 | 		return eof
111 | 	}
112 | 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
113 | 	l.width = Pos(w)
114 | 	l.pos += l.width
115 | 	return r
116 | }
117 | 
118 | // lexAny scanner is kind of forwarder, it get the current char in the text
119 | // and forward it to the appropriate scanner based on some conditions.
120 | func lexAny(l *lexer) stateFn {
121 | 	switch r := l.peek(); r {
122 | 	case '*', '-', '_':
123 | 		return lexHr
124 | 	case '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
125 | 		return lexList
126 | 	case '<':
127 | 		return lexHTML
128 | 	case '>':
129 | 		return lexBlockQuote
130 | 	case '[':
131 | 		return lexDefLink
132 | 	case '#':
133 | 		return lexHeading
134 | 	case '`', '~':
135 | 		return lexGfmCode
136 | 	case ' ':
137 | 		if reCodeBlock.MatchString(l.input[l.pos:]) {
138 | 			return lexCode
139 | 		} else if reGfmCode.MatchString(l.input[l.pos:]) {
140 | 			return lexGfmCode
141 | 		}
142 | 		// Keep moving forward until we get all the indentation size
143 | 		for ; r == l.peek(); r = l.next() {
144 | 		}
145 | 		l.emit(itemIndent)
146 | 		return lexAny
147 | 	case '|':
148 | 		if m := reTable.itemLp.MatchString(l.input[l.pos:]); m {
149 | 			l.emit(itemLpTable)
150 | 			return lexTable
151 | 		}
152 | 		fallthrough
153 | 	default:
154 | 		if m := reTable.item.MatchString(l.input[l.pos:]); m {
155 | 			l.emit(itemTable)
156 | 			return lexTable
157 | 		}
158 | 		return lexText
159 | 	}
160 | }
161 | 
162 | // lexHeading test if the current text position is an heading item.
163 | // is so, it will emit an item and return back to lenAny function
164 | // else, lex it as a simple text value
165 | func lexHeading(l *lexer) stateFn {
166 | 	if m := reHeading.FindString(l.input[l.pos:]); m != "" {
167 | 		l.pos += Pos(len(m))
168 | 		l.emit(itemHeading)
169 | 		return lexAny
170 | 	}
171 | 	return lexText
172 | }
173 | 
174 | // lexHr test if the current text position is an horizontal rules item.
175 | // is so, it will emit an horizontal rule item and return back to lenAny function
176 | // else, forward it to lexList function
177 | func lexHr(l *lexer) stateFn {
178 | 	if match := reHr.FindString(l.input[l.pos:]); match != "" {
179 | 		l.pos += Pos(len(match))
180 | 		l.emit(itemHr)
181 | 		return lexAny
182 | 	}
183 | 	return lexList
184 | }
185 | 
186 | // lexGfmCode test if the current text position is start of GFM code-block item.
187 | // if so, it will generate regexp based on the fence type[`~] and it length.
188 | // it scan until the end, and then emit the code-block item and return back to the
189 | // lenAny forwarder.
190 | // else, lex it as a simple inline text.
191 | func lexGfmCode(l *lexer) stateFn {
192 | 	if match := reGfmCode.FindStringSubmatch(l.input[l.pos:]); len(match) != 0 {
193 | 		l.pos += Pos(len(match[0]))
194 | 		fence := match[2]
195 | 		// Generate Regexp based on fence type[`~] and length
196 | 		reGfmEnd := reGfmCode.endGen(fence[0:1], len(fence))
197 | 		infoContainer := reGfmEnd.FindStringSubmatch(l.input[l.pos:])
198 | 		l.pos += Pos(len(infoContainer[0]))
199 | 		infoString := infoContainer[1]
200 | 		// Remove leading and trailing spaces
201 | 		if indent := len(match[1]); indent > 0 {
202 | 			reSpace := reSpaceGen(indent)
203 | 			infoString = reSpace.ReplaceAllString(infoString, "")
204 | 		}
205 | 		l.emit(itemGfmCodeBlock, match[0]+infoString)
206 | 		return lexAny
207 | 	}
208 | 	return lexText
209 | }
210 | 
211 | // lexCode scans code block.
212 | func lexCode(l *lexer) stateFn {
213 | 	match := reCodeBlock.FindString(l.input[l.pos:])
214 | 	l.pos += Pos(len(match))
215 | 	l.emit(itemCodeBlock)
216 | 	return lexAny
217 | }
218 | 
219 | // lexText scans until end-of-line(\n)
220 | func lexText(l *lexer) stateFn {
221 | 	// Drain text before emitting
222 | 	emit := func(item itemType, pos Pos) {
223 | 		if l.pos > l.start {
224 | 			l.emit(itemText)
225 | 		}
226 | 		l.pos += pos
227 | 		l.emit(item)
228 | 	}
229 | Loop:
230 | 	for {
231 | 		switch r := l.peek(); r {
232 | 		case eof:
233 | 			emit(itemEOF, Pos(0))
234 | 			break Loop
235 | 		case '\n':
236 | 			// CM 4.4: An indented code block cannot interrupt a paragraph.
237 | 			if l.pos > l.start && strings.HasPrefix(l.input[l.pos+1:], "    ") {
238 | 				l.next()
239 | 				continue
240 | 			}
241 | 			emit(itemNewLine, l.width)
242 | 			break Loop
243 | 		default:
244 | 			// Test for Setext-style headers
245 | 			if m := reLHeading.FindString(l.input[l.pos:]); m != "" {
246 | 				emit(itemLHeading, Pos(len(m)))
247 | 				break Loop
248 | 			}
249 | 			l.next()
250 | 		}
251 | 	}
252 | 	return lexAny
253 | }
254 | 
255 | // backup steps back one rune. Can only be called once per call of next.
256 | func (l *lexer) backup() {
257 | 	l.pos -= l.width
258 | }
259 | 
260 | // peek returns but does not consume the next rune in the input.
261 | func (l *lexer) peek() rune {
262 | 	r := l.next()
263 | 	l.backup()
264 | 	return r
265 | }
266 | 
267 | // emit passes an item back to the client.
268 | func (l *lexer) emit(t itemType, s ...string) {
269 | 	if len(s) == 0 {
270 | 		s = append(s, l.input[l.start:l.pos])
271 | 	}
272 | 	l.items <- item{t, l.start, s[0]}
273 | 	l.start = l.pos
274 | }
275 | 
276 | // lexItem return the next item token, called by the parser.
277 | func (l *lexer) nextItem() item {
278 | 	item := <-l.items
279 | 	l.lastPos = l.pos
280 | 	return item
281 | }
282 | 
283 | // One phase lexing(inline reason)
284 | func (l *lexer) lexInline() {
285 | 	escape := regexp.MustCompile("^\\\\([\\`*{}\\[\\]()#+\\-.!_>~|])")
286 | 	// Drain text before emitting
287 | 	emit := func(item itemType, pos int) {
288 | 		if l.pos > l.start {
289 | 			l.emit(itemText)
290 | 		}
291 | 		l.pos += Pos(pos)
292 | 		l.emit(item)
293 | 	}
294 | Loop:
295 | 	for {
296 | 		switch r := l.peek(); r {
297 | 		case eof:
298 | 			if l.pos > l.start {
299 | 				l.emit(itemText)
300 | 			}
301 | 			break Loop
302 | 		// backslash escaping
303 | 		case '\\':
304 | 			if m := escape.FindStringSubmatch(l.input[l.pos:]); len(m) != 0 {
305 | 				if l.pos > l.start {
306 | 					l.emit(itemText)
307 | 				}
308 | 				l.pos += Pos(len(m[0]))
309 | 				l.emit(itemText, m[1])
310 | 				break
311 | 			}
312 | 			fallthrough
313 | 		case ' ':
314 | 			if m := reBr.FindString(l.input[l.pos:]); m != "" {
315 | 				// pos - length of new-line
316 | 				emit(itemBr, len(m))
317 | 				break
318 | 			}
319 | 			l.next()
320 | 		case '_', '*', '~', '`':
321 | 			input := l.input[l.pos:]
322 | 			// Strong
323 | 			if m := reStrong.FindString(input); m != "" {
324 | 				emit(itemStrong, len(m))
325 | 				break
326 | 			}
327 | 			// Italic
328 | 			if m := reItalic.FindString(input); m != "" {
329 | 				emit(itemItalic, len(m))
330 | 				break
331 | 			}
332 | 			// Strike
333 | 			if m := reStrike.FindString(input); m != "" {
334 | 				emit(itemStrike, len(m))
335 | 				break
336 | 			}
337 | 			// InlineCode
338 | 			if m := reCode.FindString(input); m != "" {
339 | 				emit(itemCode, len(m))
340 | 				break
341 | 			}
342 | 			l.next()
343 | 		// itemLink, itemImage, itemRefLink, itemRefImage
344 | 		case '[', '!':
345 | 			input := l.input[l.pos:]
346 | 			if m := reLink.FindString(input); m != "" {
347 | 				pos := len(m)
348 | 				if r == '[' {
349 | 					emit(itemLink, pos)
350 | 				} else {
351 | 					emit(itemImage, pos)
352 | 				}
353 | 				break
354 | 			}
355 | 			if m := reRefLink.FindString(input); m != "" {
356 | 				pos := len(m)
357 | 				if r == '[' {
358 | 					emit(itemRefLink, pos)
359 | 				} else {
360 | 					emit(itemRefImage, pos)
361 | 				}
362 | 				break
363 | 			}
364 | 			l.next()
365 | 		// itemAutoLink, htmlBlock
366 | 		case '<':
367 | 			if m := reAutoLink.FindString(l.input[l.pos:]); m != "" {
368 | 				emit(itemAutoLink, len(m))
369 | 				break
370 | 			}
371 | 			if match, res := l.matchHTML(l.input[l.pos:]); match {
372 | 				emit(itemHTML, len(res))
373 | 				break
374 | 			}
375 | 			l.next()
376 | 		default:
377 | 			if m := reGfmLink.FindString(l.input[l.pos:]); m != "" {
378 | 				emit(itemGfmLink, len(m))
379 | 				break
380 | 			}
381 | 			l.next()
382 | 		}
383 | 	}
384 | 	close(l.items)
385 | }
386 | 
387 | // lexHTML.
388 | func lexHTML(l *lexer) stateFn {
389 | 	if match, res := l.matchHTML(l.input[l.pos:]); match {
390 | 		l.pos += Pos(len(res))
391 | 		l.emit(itemHTML)
392 | 		return lexAny
393 | 	}
394 | 	return lexText
395 | }
396 | 
397 | // Test if the given input is match the HTML pattern(blocks only)
398 | func (l *lexer) matchHTML(input string) (bool, string) {
399 | 	if m := reHTML.comment.FindString(input); m != "" {
400 | 		return true, m
401 | 	}
402 | 	if m := reHTML.item.FindStringSubmatch(input); len(m) != 0 {
403 | 		el, name := m[0], m[1]
404 | 		// if name is a span... is a text
405 | 		if reHTML.span.MatchString(name) {
406 | 			return false, ""
407 | 		}
408 | 		// if it's a self-closed html element, but not a itemAutoLink
409 | 		if strings.HasSuffix(el, "/>") && !reAutoLink.MatchString(el) {
410 | 			return true, el
411 | 		}
412 | 		if name == reHTML.CDATA_OPEN {
413 | 			name = reHTML.CDATA_CLOSE
414 | 		}
415 | 		reEndTag := reHTML.endTagGen(name)
416 | 		if m := reEndTag.FindString(input); m != "" {
417 | 			return true, m
418 | 		}
419 | 	}
420 | 	return false, ""
421 | }
422 | 
423 | // lexDefLink scans link definition
424 | func lexDefLink(l *lexer) stateFn {
425 | 	if m := reDefLink.FindString(l.input[l.pos:]); m != "" {
426 | 		l.pos += Pos(len(m))
427 | 		l.emit(itemDefLink)
428 | 		return lexAny
429 | 	}
430 | 	return lexText
431 | }
432 | 
433 | // lexList scans ordered and unordered lists.
434 | func lexList(l *lexer) stateFn {
435 | 	match, items := l.matchList(l.input[l.pos:])
436 | 	if !match {
437 | 		return lexText
438 | 	}
439 | 	var space int
440 | 	var typ itemType
441 | 	for i, item := range items {
442 | 		// Emit itemList on the first loop
443 | 		if i == 0 {
444 | 			l.emit(itemList, reList.marker.FindStringSubmatch(item)[1])
445 | 		}
446 | 		// Initialize each loop
447 | 		typ = itemListItem
448 | 		space = len(item)
449 | 		l.pos += Pos(space)
450 | 		item = reList.marker.ReplaceAllString(item, "")
451 | 		// Indented
452 | 		if strings.Contains(item, "\n ") {
453 | 			space -= len(item)
454 | 			reSpace := reSpaceGen(space)
455 | 			item = reSpace.ReplaceAllString(item, "")
456 | 		}
457 | 		// If current is loose
458 | 		for _, l := range reList.loose.FindAllString(item, -1) {
459 | 			if len(strings.TrimSpace(l)) > 0 || i != len(items)-1 {
460 | 				typ = itemLooseItem
461 | 				break
462 | 			}
463 | 		}
464 | 		// or previous
465 | 		if typ != itemLooseItem && i > 0 && strings.HasSuffix(items[i-1], "\n\n") {
466 | 			typ = itemLooseItem
467 | 		}
468 | 		l.emit(typ, strings.TrimSpace(item))
469 | 	}
470 | 	return lexAny
471 | }
472 | 
473 | func (l *lexer) matchList(input string) (bool, []string) {
474 | 	var res []string
475 | 	reItem := reList.item
476 | 	if !reItem.MatchString(input) {
477 | 		return false, res
478 | 	}
479 | 	// First item
480 | 	m := reItem.FindStringSubmatch(input)
481 | 	item, depth := m[0], len(m[1])
482 | 	input = input[len(item):]
483 | 	// Loop over the input
484 | 	for len(input) > 0 {
485 | 		// Count new-lines('\n')
486 | 		if m := reList.scanNewLine(input); m != "" {
487 | 			item += m
488 | 			input = input[len(m):]
489 | 			if len(m) >= 2 || !reItem.MatchString(input) && !strings.HasPrefix(input, " ") {
490 | 				break
491 | 			}
492 | 		}
493 | 		// DefLink or hr
494 | 		if reDefLink.MatchString(input) || reHr.MatchString(input) {
495 | 			break
496 | 		}
497 | 		// It's list in the same depth
498 | 		if m := reItem.FindStringSubmatch(input); len(m) > 0 && len(m[1]) == depth {
499 | 			if item != "" {
500 | 				res = append(res, item)
501 | 			}
502 | 			item = m[0]
503 | 			input = input[len(item):]
504 | 		} else {
505 | 			m := reList.scanLine(input)
506 | 			item += m
507 | 			input = input[len(m):]
508 | 		}
509 | 	}
510 | 	// Drain res
511 | 	if item != "" {
512 | 		res = append(res, item)
513 | 	}
514 | 	return true, res
515 | }
516 | 
517 | // Test if the given input match blockquote
518 | func (l *lexer) matchBlockQuote(input string) (bool, string) {
519 | 	match := reBlockQuote.FindString(input)
520 | 	if match == "" {
521 | 		return false, match
522 | 	}
523 | 	lines := strings.Split(match, "\n")
524 | 	for i, line := range lines {
525 | 		// if line is a link-definition or horizontal role, we cut the match until this point
526 | 		if reDefLink.MatchString(line) || reHr.MatchString(line) {
527 | 			match = strings.Join(lines[0:i], "\n")
528 | 			break
529 | 		}
530 | 	}
531 | 	return true, match
532 | }
533 | 
534 | // lexBlockQuote
535 | func lexBlockQuote(l *lexer) stateFn {
536 | 	if match, res := l.matchBlockQuote(l.input[l.pos:]); match {
537 | 		l.pos += Pos(len(res))
538 | 		l.emit(itemBlockQuote)
539 | 		return lexAny
540 | 	}
541 | 	return lexText
542 | }
543 | 
544 | // lexTable
545 | func lexTable(l *lexer) stateFn {
546 | 	re := reTable.item
547 | 	if l.peek() == '|' {
548 | 		re = reTable.itemLp
549 | 	}
550 | 	table := re.FindStringSubmatch(l.input[l.pos:])
551 | 	l.pos += Pos(len(table[0]))
552 | 	l.start = l.pos
553 | 	// Ignore the first match, and flat all rows(by splitting \n)
554 | 	rows := append(table[1:3], strings.Split(table[3], "\n")...)
555 | 	for _, row := range rows {
556 | 		if row == "" {
557 | 			continue
558 | 		}
559 | 		l.emit(itemTableRow)
560 | 		rawCells := reTable.trim(row, "")
561 | 		cells := reTable.split(rawCells, -1)
562 | 		// Emit cells in the current row
563 | 		for _, cell := range cells {
564 | 			l.emit(itemTableCell, cell)
565 | 		}
566 | 	}
567 | 	return lexAny
568 | }
569 | 


--------------------------------------------------------------------------------
/lexer_test.go:
--------------------------------------------------------------------------------
  1 | package mark
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | )
  7 | 
  8 | var itemName = map[itemType]string{
  9 | 	itemError:        "Error",
 10 | 	itemEOF:          "EOF",
 11 | 	itemNewLine:      "NewLine",
 12 | 	itemHTML:         "HTML",
 13 | 	itemHeading:      "Heading",
 14 | 	itemLHeading:     "LHeading",
 15 | 	itemBlockQuote:   "BlockQuote",
 16 | 	itemList:         "List",
 17 | 	itemListItem:     "ListItem",
 18 | 	itemLooseItem:    "LooseItem",
 19 | 	itemCodeBlock:    "CodeBlock",
 20 | 	itemGfmCodeBlock: "GfmCodeBlock",
 21 | 	itemHr:           "Hr",
 22 | 	itemTable:        "Table",
 23 | 	itemLpTable:      "LpTable",
 24 | 	itemTableRow:     "TableRow",
 25 | 	itemTableCell:    "TableCell",
 26 | 	itemText:         "Text",
 27 | 	itemLink:         "Link",
 28 | 	itemDefLink:      "DefLink",
 29 | 	itemRefLink:      "RefLink",
 30 | 	itemAutoLink:     "AutoLink",
 31 | 	itemGfmLink:      "GfmLink",
 32 | 	itemStrong:       "Strong",
 33 | 	itemItalic:       "Italic",
 34 | 	itemStrike:       "Strike",
 35 | 	itemCode:         "Code",
 36 | 	itemImage:        "Image",
 37 | 	itemRefImage:     "RefImage",
 38 | 	itemBr:           "Br",
 39 | 	itemPipe:         "Pipe",
 40 | }
 41 | 
 42 | func (i itemType) String() string {
 43 | 	s := itemName[i]
 44 | 	if s == "" {
 45 | 		return fmt.Sprintf("item%d", int(i))
 46 | 	}
 47 | 	return s
 48 | }
 49 | 
 50 | type lexTest struct {
 51 | 	name  string
 52 | 	input string
 53 | 	items []item
 54 | }
 55 | 
 56 | var (
 57 | 	tEOF     = item{itemEOF, 0, ""}
 58 | 	tNewLine = item{itemNewLine, 0, "\n"}
 59 | 	tBr      = item{itemBr, 0, "  \n"}
 60 | 	tPipe    = item{itemPipe, 0, "|"}
 61 | 	tTable   = item{itemTable, 0, ""}
 62 | 	tLpTable = item{itemLpTable, 0, ""}
 63 | 	tRow     = item{itemTableRow, 0, ""}
 64 | )
 65 | 
 66 | var blockTests = []lexTest{
 67 | 	{"empty", "", []item{tEOF}},
 68 | 	{"heading", "# Hello", []item{
 69 | 		{itemHeading, 0, "# Hello"},
 70 | 		tEOF,
 71 | 	}},
 72 | 	{"lheading", "Hello\n===", []item{
 73 | 		{itemLHeading, 0, "Hello\n==="},
 74 | 		tEOF,
 75 | 	}},
 76 | 	{"blockqoute", "> foo bar", []item{
 77 | 		{itemBlockQuote, 0, "> foo bar"},
 78 | 		tEOF,
 79 | 	}},
 80 | 	{"unordered list", "- foo\n- bar", []item{
 81 | 		{itemList, 0, "-"},
 82 | 		{itemListItem, 0, "foo"},
 83 | 		{itemListItem, 0, "bar"},
 84 | 		tEOF,
 85 | 	}},
 86 | 	{"ordered list", "1. foo\n2. bar", []item{
 87 | 		{itemList, 0, "1."},
 88 | 		{itemListItem, 0, "foo"},
 89 | 		{itemListItem, 0, "bar"},
 90 | 		tEOF,
 91 | 	}},
 92 | 	{"loose-items", "- foo\n\n- bar", []item{
 93 | 		{itemList, 0, "-"},
 94 | 		{itemLooseItem, 0, "foo"},
 95 | 		{itemLooseItem, 0, "bar"},
 96 | 		tEOF,
 97 | 	}},
 98 | 	{"code-block", "    foo\n    bar", []item{
 99 | 		{itemCodeBlock, 0, "    foo\n    bar"},
100 | 		tEOF,
101 | 	}},
102 | 	{"gfm-code-block-1", "~~~js\nfoo\n~~~", []item{
103 | 		{itemGfmCodeBlock, 0, "~~~js\nfoo\n"},
104 | 		tEOF,
105 | 	}},
106 | 	{"gfm-code-block-2", "```js\nfoo\n```", []item{
107 | 		{itemGfmCodeBlock, 0, "```js\nfoo\n"},
108 | 		tEOF,
109 | 	}},
110 | 	{"hr1", "* * *\n***", []item{
111 | 		{itemHr, 0, "* * *\n"},
112 | 		{itemHr, 0, "***"},
113 | 		tEOF,
114 | 	}},
115 | 	{"hr2", "- - -\n---", []item{
116 | 		{itemHr, 0, "- - -\n"},
117 | 		{itemHr, 0, "---"},
118 | 		tEOF,
119 | 	}},
120 | 	{"hr3", "_ _ _\n___", []item{
121 | 		{itemHr, 0, "_ _ _\n"},
122 | 		{itemHr, 0, "___"},
123 | 		tEOF,
124 | 	}},
125 | 	{"table", "Id | Name\n---|-----\n1 | Ariel", []item{
126 | 		tTable,
127 | 		tRow,
128 | 		{itemTableCell, 0, "Id"},
129 | 		{itemTableCell, 0, "Name"},
130 | 		tRow,
131 | 		{itemTableCell, 0, "---"},
132 | 		{itemTableCell, 0, "-----"},
133 | 		tRow,
134 | 		{itemTableCell, 0, "1"},
135 | 		{itemTableCell, 0, "Ariel"},
136 | 		tEOF,
137 | 	}},
138 | 	{"lp-table", "|Id | Name|\n|---|-----|\n|1 | Ariel|", []item{
139 | 		tLpTable,
140 | 		tRow,
141 | 		{itemTableCell, 0, "Id"},
142 | 		{itemTableCell, 0, "Name"},
143 | 		tRow,
144 | 		{itemTableCell, 0, "---"},
145 | 		{itemTableCell, 0, "-----"},
146 | 		tRow,
147 | 		{itemTableCell, 0, "1"},
148 | 		{itemTableCell, 0, "Ariel"},
149 | 		tEOF,
150 | 	}},
151 | 	{"text-1", "hello\nworld", []item{
152 | 		{itemText, 0, "hello"},
153 | 		tNewLine,
154 | 		{itemText, 0, "world"},
155 | 		tEOF,
156 | 	}},
157 | 	{"text-2", "__hello__\n__world__", []item{
158 | 		{itemText, 0, "__hello__"},
159 | 		tNewLine,
160 | 		{itemText, 0, "__world__"},
161 | 		tEOF,
162 | 	}},
163 | 	{"text-3", "~**_hello world_**~", []item{
164 | 		{itemText, 0, "~**_hello world_**~"},
165 | 		tEOF,
166 | 	}},
167 | 	{"text-4", "  hello world", []item{
168 | 		{itemIndent, 0, "  "},
169 | 		{itemText, 0, "hello world"},
170 | 		tEOF,
171 | 	}},
172 | 	{"deflink", "[1]: http://example.com", []item{
173 | 		{itemDefLink, 0, "[1]: http://example.com"},
174 | 		tEOF,
175 | 	}},
176 | }
177 | 
178 | var inlineTests = []lexTest{
179 | 	{"text-1", "hello", []item{
180 | 		{itemText, 0, "hello"},
181 | 	}},
182 | 	{"text-2", "hello\nworld", []item{
183 | 		{itemText, 0, "hello\nworld"},
184 | 	}},
185 | 	{"br", "hello  \nworld", []item{
186 | 		{itemText, 0, "hello"},
187 | 		tBr,
188 | 		{itemText, 0, "world"},
189 | 	}},
190 | 	{"strong-1", "**hello**", []item{
191 | 		{itemStrong, 0, "**hello**"},
192 | 	}},
193 | 	{"strong-2", "__world__", []item{
194 | 		{itemStrong, 0, "__world__"},
195 | 	}},
196 | 	{"italic-1", "*hello*", []item{
197 | 		{itemItalic, 0, "*hello*"},
198 | 	}},
199 | 	{"italic-2", "_hello_", []item{
200 | 		{itemItalic, 0, "_hello_"},
201 | 	}},
202 | 	{"strike", "~~hello~~", []item{
203 | 		{itemStrike, 0, "~~hello~~"},
204 | 	}},
205 | 	{"code", "`hello`", []item{
206 | 		{itemCode, 0, "`hello`"},
207 | 	}},
208 | 	{"link-1", "[hello](world)", []item{
209 | 		{itemLink, 0, "[hello](world)"},
210 | 	}},
211 | 	{"link-2", "[hello](world 'title')", []item{
212 | 		{itemLink, 0, "[hello](world 'title')"},
213 | 	}},
214 | 	{"autolink-1", "<http://example.com/>", []item{
215 | 		{itemAutoLink, 0, "<http://example.com/>"},
216 | 	}},
217 | 	{"autolink-2", "<http://example.com/?foo=1&bar=2>", []item{
218 | 		{itemAutoLink, 0, "<http://example.com/?foo=1&bar=2>"},
219 | 	}},
220 | 	{"gfmlink-1", "link: http://example.com/?foo=1&bar=2", []item{
221 | 		{itemText, 0, "link: "},
222 | 		{itemGfmLink, 0, "http://example.com/?foo=1&bar=2"},
223 | 	}},
224 | 	{"gfmlink-2", "http://example.com", []item{
225 | 		{itemGfmLink, 0, "http://example.com"},
226 | 	}},
227 | 	{"reflink-1", "[hello][world]", []item{
228 | 		{itemRefLink, 0, "[hello][world]"},
229 | 	}},
230 | 	{"reflink-2", "[hello]", []item{
231 | 		{itemRefLink, 0, "[hello]"},
232 | 	}},
233 | 	{"image-1", "![hello](world)", []item{
234 | 		{itemImage, 0, "![hello](world)"},
235 | 	}},
236 | 	{"image-2", "![hello](world 'title')", []item{
237 | 		{itemImage, 0, "![hello](world 'title')"},
238 | 	}},
239 | 	{"refimage-1", "![hello][world]", []item{
240 | 		{itemRefImage, 0, "![hello][world]"},
241 | 	}},
242 | 	{"refimage-2", "![hello]", []item{
243 | 		{itemRefImage, 0, "![hello]"},
244 | 	}},
245 | }
246 | 
247 | // collect gathers the emitted items into a slice.
248 | func collect(t *lexTest, isInline bool) (items []item) {
249 | 	l := lex(t.input)
250 | 	if isInline {
251 | 		l = lexInline(t.input)
252 | 	}
253 | 	for item := range l.items {
254 | 		items = append(items, item)
255 | 		if item.typ == itemEOF || item.typ == itemError {
256 | 			break
257 | 		}
258 | 	}
259 | 	return
260 | }
261 | 
262 | func equal(i1, i2 []item, checkPos bool) bool {
263 | 	if len(i1) != len(i2) {
264 | 		return false
265 | 	}
266 | 	for k := range i1 {
267 | 		if i1[k].typ != i2[k].typ {
268 | 			return false
269 | 		}
270 | 		if i1[k].val != i2[k].val {
271 | 			return false
272 | 		}
273 | 		if checkPos && i1[k].pos != i2[k].pos {
274 | 			return false
275 | 		}
276 | 	}
277 | 	return true
278 | }
279 | 
280 | func TestBlockLexer(t *testing.T) {
281 | 	for _, test := range blockTests {
282 | 		items := collect(&test, false)
283 | 		if !equal(items, test.items, false) {
284 | 			t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, items, test.items)
285 | 		}
286 | 	}
287 | }
288 | 
289 | func TestInlineLexer(t *testing.T) {
290 | 	for _, test := range inlineTests {
291 | 		items := collect(&test, true)
292 | 		if !equal(items, test.items, false) {
293 | 			t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, items, test.items)
294 | 		}
295 | 	}
296 | }
297 | 
298 | var lexPosTests = []lexTest{
299 | 	{"empty", "", []item{tEOF}},
300 | 	{"text", "hello\nworld", []item{
301 | 		{itemText, 0, "hello"},
302 | 		{itemNewLine, 5, "\n"},
303 | 		{itemText, 6, "world"},
304 | 		{itemEOF, 11, ""},
305 | 	}},
306 | 	{"heading", "# hello\nworld", []item{
307 | 		{itemHeading, 0, "# hello\n"},
308 | 		{itemText, 8, "world"},
309 | 		{itemEOF, 13, ""},
310 | 	}},
311 | }
312 | 
313 | // This one tests position of the returning tokens
314 | func TestPos(t *testing.T) {
315 | 	for _, test := range lexPosTests {
316 | 		items := collect(&test, false)
317 | 		if !equal(items, test.items, true) {
318 | 			t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, items, test.items)
319 | 		}
320 | 	}
321 | }
322 | 


--------------------------------------------------------------------------------
/mark.go:
--------------------------------------------------------------------------------
 1 | package mark
 2 | 
 3 | import "strings"
 4 | 
 5 | // Mark
 6 | type Mark struct {
 7 | 	*parse
 8 | 	Input string
 9 | }
10 | 
11 | // Mark options used to configure your Mark object
12 | // set `Smartypants` and `Fractions` to true to enable
13 | // smartypants and smartfractions rendering.
14 | type Options struct {
15 | 	Gfm         bool
16 | 	Tables      bool
17 | 	Smartypants bool
18 | 	Fractions   bool
19 | }
20 | 
21 | // DefaultOptions return an options struct with default configuration
22 | // it's means that only Gfm, and Tables set to true.
23 | func DefaultOptions() *Options {
24 | 	return &Options{
25 | 		Gfm:    true,
26 | 		Tables: true,
27 | 	}
28 | }
29 | 
30 | // New return a new Mark
31 | func New(input string, opts *Options) *Mark {
32 | 	// Preprocessing
33 | 	input = strings.Replace(input, "\t", "    ", -1)
34 | 	if opts == nil {
35 | 		opts = DefaultOptions()
36 | 	}
37 | 	return &Mark{
38 | 		Input: input,
39 | 		parse: newParse(input, opts),
40 | 	}
41 | }
42 | 
43 | // parse and render input
44 | func (m *Mark) Render() string {
45 | 	m.parse.parse()
46 | 	m.render()
47 | 	return m.output
48 | }
49 | 
50 | // AddRenderFn let you pass NodeType, and RenderFn function
51 | // and override the default Node rendering
52 | func (m *Mark) AddRenderFn(typ NodeType, fn RenderFn) {
53 | 	m.renderFn[typ] = fn
54 | }
55 | 
56 | // Staic render function
57 | func Render(input string) string {
58 | 	m := New(input, nil)
59 | 	return m.Render()
60 | }
61 | 


--------------------------------------------------------------------------------
/mark_test.go:
--------------------------------------------------------------------------------
   1 | package mark
   2 | 
   3 | import (
   4 | 	"io/ioutil"
   5 | 	"regexp"
   6 | 	"strings"
   7 | 	"testing"
   8 | )
   9 | 
  10 | func TestRender(t *testing.T) {
  11 | 	cases := map[string]string{
  12 | 		"foobar":               "<p>foobar</p>",
  13 | 		"  foo bar":            "<p>foo bar</p>",
  14 | 		"|foo|bar":             "<p>|foo|bar</p>",
  15 | 		"foo  \nbar":           "<p>foo<br>bar</p>",
  16 | 		"__bar__ foo":          "<p><strong>bar</strong> foo</p>",
  17 | 		"**bar** foo __bar__":  "<p><strong>bar</strong> foo <strong>bar</strong></p>",
  18 | 		"**bar**__baz__":       "<p><strong>bar</strong><strong>baz</strong></p>",
  19 | 		"**bar**foo__bar__":    "<p><strong>bar</strong>foo<strong>bar</strong></p>",
  20 | 		"_bar_baz":             "<p><em>bar</em>baz</p>",
  21 | 		"_foo_~~bar~~ baz":     "<p><em>foo</em><del>bar</del> baz</p>",
  22 | 		"~~baz~~ _baz_":        "<p><del>baz</del> <em>baz</em></p>",
  23 | 		"`bool` and thats it.": "<p><code>bool</code> and thats it.</p>",
  24 | 		// Html
  25 | 		"<!--hello-->": "<!--hello-->",
  26 | 		// Emphasis mixim
  27 | 		"___foo___":       "<p><strong><em>foo</em></strong></p>",
  28 | 		"__foo _bar___":   "<p><strong>foo <em>bar</em></strong></p>",
  29 | 		"__*foo*__":       "<p><strong><em>foo</em></strong></p>",
  30 | 		"_**mixim**_":     "<p><em><strong>mixim</strong></em></p>",
  31 | 		"~~__*mixim*__~~": "<p><del><strong><em>mixim</em></strong></del></p>",
  32 | 		"~~*mixim*~~":     "<p><del><em>mixim</em></del></p>",
  33 | 		// Paragraph
  34 | 		"1  \n2  \n3":        "<p>1<br>2<br>3</p>",
  35 | 		"1\n\n2":             "<p>1</p>\n<p>2</p>",
  36 | 		"1\n\n\n2":           "<p>1</p>\n<p>2</p>",
  37 | 		"1\n\n\n\n\n\n\n\n2": "<p>1</p>\n<p>2</p>",
  38 | 		// Heading
  39 | 		"# 1\n## 2":                   "<h1 id=\"1\">1</h1>\n<h2 id=\"2\">2</h2>",
  40 | 		"# 1\np\n## 2\n### 3\n4\n===": "<h1 id=\"1\">1</h1>\n<p>p</p>\n<h2 id=\"2\">2</h2>\n<h3 id=\"3\">3</h3>\n<h1 id=\"4\">4</h1>",
  41 | 		"Hello\n===":                  "<h1 id=\"hello\">Hello</h1>",
  42 | 		// Links
  43 | 		"[text](link \"title\")": "<p><a href=\"link\" title=\"title\">text</a></p>",
  44 | 		"[text](link)":           "<p><a href=\"link\">text</a></p>",
  45 | 		"[](link)":               "<p><a href=\"link\"></a></p>",
  46 | 		"Link: [example](#)":     "<p>Link: <a href=\"#\">example</a></p>",
  47 | 		"Link: [not really":      "<p>Link: [not really</p>",
  48 | 		"http://localhost:3000":  "<p><a href=\"http://localhost:3000\">http://localhost:3000</a></p>",
  49 | 		"Link: http://yeah.com":  "<p>Link: <a href=\"http://yeah.com\">http://yeah.com</a></p>",
  50 | 		"<http://foo.com>":       "<p><a href=\"http://foo.com\">http://foo.com</a></p>",
  51 | 		"Link: <http://l.co>":    "<p>Link: <a href=\"http://l.co\">http://l.co</a></p>",
  52 | 		"Link: <not really":      "<p>Link: &lt;not really</p>",
  53 | 		// CodeBlock
  54 | 		"\tfoo\n\tbar": "<pre><code>foo\nbar</code></pre>",
  55 | 		"\tfoo\nbar":   "<pre><code>foo\n</code></pre>\n<p>bar</p>",
  56 | 		// GfmCodeBlock
  57 | 		"```js\nvar a;\n```":         "<pre><code class=\"lang-js\">\nvar a;\n</code></pre>",
  58 | 		"~~~\nvar b;~~let d = 1~~~~": "<pre><code>\nvar b;~~let d = 1~~~~</code></pre>",
  59 | 		"~~~js\n":                    "<pre><code class=\"lang-js\">\n</code></pre>",
  60 | 		// Hr
  61 | 		"foo\n****\nbar": "<p>foo</p>\n<hr>\n<p>bar</p>",
  62 | 		"foo\n___":       "<p>foo</p>\n<hr>",
  63 | 		// Images
  64 | 		"![name](url)":           "<p><img src=\"url\" alt=\"name\"></p>",
  65 | 		"![name](url \"title\")": "<p><img src=\"url\" alt=\"name\" title=\"title\"></p>",
  66 | 		"img: ![name]()":         "<p>img: <img src=\"\" alt=\"name\"></p>",
  67 | 		// Lists
  68 | 		"- foo\n- bar": "<ul>\n<li>foo</li>\n<li>bar</li>\n</ul>",
  69 | 		"* foo\n* bar": "<ul>\n<li>foo</li>\n<li>bar</li>\n</ul>",
  70 | 		"+ foo\n+ bar": "<ul>\n<li>foo</li>\n<li>bar</li>\n</ul>",
  71 | 		// // Ordered Lists
  72 | 		"1. one\n2. two\n3. three": "<ol>\n<li>one</li>\n<li>two</li>\n<li>three</li>\n</ol>",
  73 | 		"1. one\n 1. one of one":   "<ol>\n<li>one<ol>\n<li>one of one</li>\n</ol></li>\n</ol>",
  74 | 		"2. two\n 3. three":        "<ol>\n<li>two<ol>\n<li>three</li>\n</ol></li>\n</ol>",
  75 | 		// Task list
  76 | 		"- [ ] foo\n- [ ] bar": "<ul>\n<li><input type=\"checkbox\">foo</li>\n<li><input type=\"checkbox\">bar</li>\n</ul>",
  77 | 		"- [x] foo\n- [x] bar": "<ul>\n<li><input type=\"checkbox\" checked>foo</li>\n<li><input type=\"checkbox\" checked>bar</li>\n</ul>",
  78 | 		"- [ ] foo\n- [x] bar": "<ul>\n<li><input type=\"checkbox\">foo</li>\n<li><input type=\"checkbox\" checked>bar</li>\n</ul>",
  79 | 		// Special characters escaping
  80 | 		"< hello":   "<p>&lt; hello</p>",
  81 | 		"hello >":   "<p>hello &gt;</p>",
  82 | 		"foo & bar": "<p>foo &amp; bar</p>",
  83 | 		"'foo'":     "<p>&#39;foo&#39;</p>",
  84 | 		"\"foo\"":   "<p>&quot;foo&quot;</p>",
  85 | 		"&copy;":    "<p>&copy;</p>",
  86 | 		// Backslash escaping
  87 | 		"\\**foo\\**":       "<p>*<em>foo*</em></p>",
  88 | 		"\\*foo\\*":         "<p>*foo*</p>",
  89 | 		"\\_underscores\\_": "<p>_underscores_</p>",
  90 | 		"\\## header":       "<p>## header</p>",
  91 | 		"header\n\\===":     "<p>header\n\\===</p>",
  92 | 	}
  93 | 	for input, expected := range cases {
  94 | 		if actual := Render(input); actual != expected {
  95 | 			t.Errorf("%s: got\n%+v\nexpected\n%+v", input, actual, expected)
  96 | 		}
  97 | 	}
  98 | }
  99 | 
 100 | func TestData(t *testing.T) {
 101 | 	var testFiles []string
 102 | 	files, err := ioutil.ReadDir("test")
 103 | 	if err != nil {
 104 | 		t.Error("Couldn't open 'test' directory")
 105 | 	}
 106 | 	for _, file := range files {
 107 | 		if name := file.Name(); strings.HasSuffix(name, ".text") {
 108 | 			testFiles = append(testFiles, "test/"+strings.TrimSuffix(name, ".text"))
 109 | 		}
 110 | 	}
 111 | 	re := regexp.MustCompile(`\n`)
 112 | 	for _, file := range testFiles {
 113 | 		html, err := ioutil.ReadFile(file + ".html")
 114 | 		if err != nil {
 115 | 			t.Errorf("Error to read html file: %s", file)
 116 | 		}
 117 | 		text, err := ioutil.ReadFile(file + ".text")
 118 | 		if err != nil {
 119 | 			t.Errorf("Error to read text file: %s", file)
 120 | 		}
 121 | 		// Remove '\n'
 122 | 		sHTML := re.ReplaceAllLiteralString(string(html), "")
 123 | 		output := Render(string(text))
 124 | 		opts := DefaultOptions()
 125 | 		if strings.Contains(file, "smartypants") {
 126 | 			opts.Smartypants = true
 127 | 			output = New(string(text), opts).Render()
 128 | 		}
 129 | 		if strings.Contains(file, "smartyfractions") {
 130 | 			opts.Fractions = true
 131 | 			output = New(string(text), opts).Render()
 132 | 		}
 133 | 		sText := re.ReplaceAllLiteralString(output, "")
 134 | 		if sHTML != sText {
 135 | 			t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", file, sText, sHTML)
 136 | 		}
 137 | 	}
 138 | }
 139 | 
 140 | // TODO: Add more tests for it.
 141 | func TestRenderFn(t *testing.T) {
 142 | 	m := New("hello world", nil)
 143 | 	m.AddRenderFn(NodeParagraph, func(n Node) (s string) {
 144 | 		if p, ok := n.(*ParagraphNode); ok {
 145 | 			s += "<p class=\"mv-msg\">"
 146 | 			for _, pp := range p.Nodes {
 147 | 				s += pp.Render()
 148 | 			}
 149 | 			s += "</p>"
 150 | 		}
 151 | 		return
 152 | 	})
 153 | 	expected := "<p class=\"mv-msg\">hello world</p>"
 154 | 	if actual := m.Render(); actual != expected {
 155 | 		t.Errorf("RenderFn: got\n\t%+v\nexpected\n\t%+v", actual, expected)
 156 | 	}
 157 | }
 158 | 
 159 | type CommonMarkSpec struct {
 160 | 	name     string
 161 | 	input    string
 162 | 	expected string
 163 | }
 164 | 
 165 | var CMCases = []CommonMarkSpec{
 166 | 	{"6", "- `one\n- two`", "<ul><li>`one</li><li>two`</li></ul>"},
 167 | 	{"7", "***\n---\n___", "<hr><hr><hr>"},
 168 | 	{"8", "+++", "<p>+++</p>"},
 169 | 	{"9", "===", "<p>===</p>"},
 170 | 	{"10", "--\n**\n__", "<p>--**__</p>"},
 171 | 	{"11", " ***\n  ***\n   ***", "<hr><hr><hr>"},
 172 | 	{"12", "    ***", "<pre><code>***</code></pre>"},
 173 | 	{"14", "_____________________________________", "<hr>"},
 174 | 	{"15", " - - -", "<hr>"},
 175 | 	{"16", " **  * ** * ** * **", "<hr>"},
 176 | 	{"17", "-     -      -      -", "<hr>"},
 177 | 	{"18", "- - - -    ", "<hr>"},
 178 | 	{"20", " *-*", "<p><em>-</em></p>"},
 179 | 	{"21", "- foo\n***\n- bar", "<ul>\n<li>foo</li>\n</ul>\n<hr>\n<ul>\n<li>bar</li>\n</ul>"},
 180 | 	{"22", "Foo\n***\nbar", "<p>Foo</p><hr><p>bar</p>"},
 181 | 	{"23", "Foo\n---\nbar", "<h2>Foo</h2><p>bar</p>"},
 182 | 	{"24", "* Foo\n* * *\n* Bar", "<ul>\n<li>Foo</li>\n</ul>\n<hr>\n<ul>\n<li>Bar</li>\n</ul>"},
 183 | 	{"25", "- Foo\n- * * *", "<ul>\n<li>Foo</li>\n<li>\n<hr>\n</li>\n</ul>"},
 184 | 	{"26", `# foo
 185 | ## foo
 186 | ### foo
 187 | #### foo
 188 | ##### foo
 189 | ###### foo`, `<h1>foo</h1>
 190 | <h2>foo</h2>
 191 | <h3>foo</h3>
 192 | <h4>foo</h4>
 193 | <h5>foo</h5>
 194 | <h6>foo</h6>`},
 195 | 	{"27", "####### foo", "<p>####### foo</p>"},
 196 | 	{"28", "#5 bolt\n\n#foobar", "<p>#5 bolt</p>\n<p>#foobar</p>"},
 197 | 	{"29", "\\## foo", "<p>## foo</p>"},
 198 | 	{"30", "# foo *bar* \\*baz\\*", "<h1>foo <em>bar</em> *baz*</h1>"},
 199 | 	{"31", "#                  foo                     ", "<h1>foo</h1>"},
 200 | 	{"32", ` ### foo
 201 |   ## foo
 202 |    # foo`, `<h3>foo</h3>
 203 | <h2>foo</h2>
 204 | <h1>foo</h1>`},
 205 | 	{"33", "    # foo", "<pre><code># foo</code></pre>"},
 206 | 	{"34", `
 207 | foo
 208 |     # bar`, `
 209 | <p>foo
 210 | # bar</p>`},
 211 | 	{"35", `## foo ##
 212 |   ###   bar    ###`, `<h2>foo</h2>
 213 | <h3>bar</h3>`},
 214 | 	{"36", `# foo ##################################
 215 | ##### foo ##`, `<h1>foo</h1>
 216 | <h5>foo</h5>`},
 217 | 	{"37", "### foo ###     ", "<h3>foo</h3>"},
 218 | 	{"38", "### foo ### b", "<h3>foo ### b</h3>"},
 219 | 	{"39", "# foo#", "<h1>foo#</h1>"},
 220 | 	{"40", `
 221 | ### foo \###
 222 | ## foo #\##
 223 | # foo \#`, `
 224 | <h3>foo ###</h3>
 225 | <h2>foo ###</h2>
 226 | <h1>foo #</h1>`},
 227 | 	{"41", `****
 228 | ## foo
 229 | ****`, `<hr>
 230 | <h2>foo</h2>
 231 | <hr>`},
 232 | 	{"42", `Foo bar
 233 | # baz
 234 | Bar foo`, `<p>Foo bar</p>
 235 | <h1>baz</h1>
 236 | <p>Bar foo</p>`},
 237 | 	{"43", `
 238 | ## 
 239 | #
 240 | ### ###`, `
 241 | <h2></h2>
 242 | <h1></h1>
 243 | <h3></h3>`},
 244 | 	{"44", `
 245 | Foo *bar*
 246 | =========
 247 | 
 248 | Foo *bar*
 249 | ---------`, `
 250 | <h1>Foo <em>bar</em></h1>
 251 | <h2>Foo <em>bar</em></h2>`},
 252 | 	{"45", `Foo
 253 | -------------------------
 254 | 
 255 | Foo
 256 | =`, `<h2>Foo</h2>
 257 | <h1>Foo</h1>`},
 258 | 	{"46", `   Foo
 259 | ---
 260 | 
 261 |   Foo
 262 | -----
 263 | 
 264 |   Foo
 265 |   ===`, `<h2>Foo</h2>
 266 | <h2>Foo</h2>
 267 | <h1>Foo</h1>`},
 268 | 	{"47", `    Foo
 269 |     ---
 270 | 
 271 |     Foo
 272 | ---`, `<pre><code>Foo
 273 | ---
 274 | 
 275 | Foo
 276 | </code></pre>
 277 | <hr>`},
 278 | 	{"48", `Foo
 279 |    ----      `, "<h2>Foo</h2>"},
 280 | 	{"49", `
 281 |  Foo
 282 |     ---`, `
 283 | <p>Foo
 284 | ---</p>`},
 285 | 	{"50", `Foo
 286 | = =
 287 | 
 288 | Foo
 289 | --- -`, `<p>Foo
 290 | = =</p>
 291 | <p>Foo</p>
 292 | <hr>`},
 293 | 	{"51", `Foo  
 294 | -----`, "<h2>Foo</h2>"},
 295 | 	{"52", `Foo\
 296 | ----`, "<h2>Foo\\</h2>"},
 297 | 	{"53", "`Foo\n----\n`\n\n<a title=\"a lot\n---\nof dashes\"/>", "<h2>`Foo</h2>\n<p>`</p>\n<h2>&lt;a title=&quot;a lot</h2>\n<p>of dashes&quot;/&gt;</p>"},
 298 | 	{"54", `
 299 | > Foo
 300 | ---`, `
 301 | <blockquote>
 302 | <p>Foo</p>
 303 | </blockquote>
 304 | <hr>`},
 305 | 	{"55", `- Foo
 306 | ---`, `<ul>
 307 | <li>Foo</li>
 308 | </ul>
 309 | <hr>`},
 310 | 	{"57", `---
 311 | Foo
 312 | ---
 313 | Bar
 314 | ---
 315 | Baz`, `<hr>
 316 | <h2>Foo</h2>
 317 | <h2>Bar</h2>
 318 | <p>Baz</p>`},
 319 | 	{"58", "====", "<p>====</p>"},
 320 | 	{"59", `---
 321 | ---`, "<hr><hr>"},
 322 | 	{"60", `- foo
 323 | -----`, `<ul>
 324 | <li>foo</li>
 325 | </ul>
 326 | <hr>`},
 327 | 	{"61", `    foo
 328 | ---`, `<pre><code>foo
 329 | </code></pre>
 330 | <hr>`},
 331 | 	{"62", `
 332 | > foo
 333 | -----`, `
 334 | <blockquote>
 335 | <p>foo</p>
 336 | </blockquote>
 337 | <hr>`},
 338 | 	{"63", `
 339 | \> foo
 340 | ------`, `
 341 | <h2>&gt; foo</h2>`},
 342 | 	{"64", `    a simple
 343 |       indented code block`, `<pre><code>a simple
 344 |   indented code block
 345 | </code></pre>`},
 346 | 	{"65", `
 347 |   - foo
 348 | 
 349 |     bar`, `
 350 | <ul>
 351 | <li>
 352 | <p>foo</p>
 353 | <p>bar</p>
 354 | </li>
 355 | </ul>`},
 356 | 	{"66", `1.  foo
 357 | 
 358 |     - bar`, `<ol>
 359 | <li>
 360 | <p>foo</p>
 361 | <ul>
 362 | <li>bar</li>
 363 | </ul>
 364 | </li>
 365 | </ol>`},
 366 | 	{"67", `    <a/>
 367 |     *hi*
 368 | 
 369 |     - one`, `<pre><code>&lt;a/&gt;
 370 | *hi*
 371 | 
 372 | - one
 373 | </code></pre>`},
 374 | 	{"68", `
 375 |     chunk1
 376 | 
 377 |     chunk2
 378 |   
 379 |  
 380 |  
 381 |     chunk3`, `
 382 | <pre><code>chunk1
 383 | 
 384 | chunk2
 385 | 
 386 | 
 387 | 
 388 | chunk3
 389 | </code></pre>`},
 390 | 	{"69", `
 391 |     chunk1
 392 |       
 393 |       chunk2`, `
 394 | <pre><code>chunk1
 395 |   
 396 |   chunk2
 397 | </code></pre>`},
 398 | 	{"70", `
 399 | Foo
 400 |     bar`, `
 401 | <p>Foo
 402 | bar</p>`},
 403 | 	{"71", `    foo
 404 | bar`, `<pre><code>foo
 405 | </code></pre>
 406 | <p>bar</p>`},
 407 | 	{"72", `# Header
 408 |     foo
 409 | Header
 410 | ------
 411 |     foo
 412 | ----`, `<h1>Header</h1>
 413 | <pre><code>foo
 414 | </code></pre>
 415 | <h2>Header</h2>
 416 | <pre><code>foo
 417 | </code></pre>
 418 | <hr>`},
 419 | 	{"73", `        foo
 420 |     bar`, `<pre><code>    foo
 421 | bar
 422 | </code></pre>`},
 423 | 	{"74", `    
 424 |     foo
 425 |     `, `<pre><code>foo
 426 | </code></pre>`},
 427 | 	{"75", "    foo  ", `<pre><code>foo  
 428 | </code></pre>`},
 429 | 	{"76", "```\n< \n>\n```", `<pre><code>&lt;
 430 |  &gt;
 431 | </code></pre>`},
 432 | 	{"77", `~~~
 433 | <
 434 |  >
 435 | ~~~`, `<pre><code>&lt;
 436 |  &gt;
 437 | </code></pre>`},
 438 | 	{"78", "```\naaa\n~~~\n```", `<pre><code>aaa
 439 | ~~~
 440 | </code></pre>`},
 441 | 	{"79", "~~~\naaa\n```\n~~~", "<pre><code>aaa\n```\n</code></pre>"},
 442 | 	{"80", "````\naaa\n```\n``````", "<pre><code>aaa\n```\n</code></pre>"},
 443 | 	{"81", `
 444 | ~~~~
 445 | aaa
 446 | ~~~
 447 | ~~~~`, `
 448 | <pre><code>aaa
 449 | ~~~
 450 | </code></pre>`},
 451 | 	{"82", "```", "<pre><code></code></pre>"},
 452 | 	{"83", "`````\n\n```\naaa", "<pre><code>\n```\naaa\n</code></pre>"},
 453 | 	{"84", "> ```\n> aaa\n\nbbb", `
 454 | <blockquote>
 455 | <pre><code>aaa
 456 | </code></pre>
 457 | </blockquote>
 458 | <p>bbb</p>`},
 459 | 	{"85", "```\n\n  \n```", "<pre><code>\n  \n</code></pre>"},
 460 | 	{"86", "```\n```", `<pre><code></code></pre>`},
 461 | 	{"87", " ```\n aaa\naaa\n```", `
 462 | <pre><code>aaa
 463 | aaa
 464 | </code></pre>`},
 465 | 	{"88", "  ```\naaa\n  aaa\naaa\n  ```", `
 466 | <pre><code>aaa
 467 | aaa
 468 | aaa
 469 | </code></pre>`},
 470 | 	{"89", "   ```\n   aaa\n    aaa\n  aaa\n   ```", `
 471 | <pre><code>aaa
 472 |  aaa
 473 | aaa
 474 | </code></pre>`},
 475 | 	{"90", "    ```\n    aaa\n    ```", "<pre><code>```\naaa\n```\n</code></pre>"},
 476 | 	{"91", "```\naaa\n  ```", `<pre><code>aaa
 477 | </code></pre>`},
 478 | 	{"92", "   ```\naaa\n  ```", `<pre><code>aaa
 479 | </code></pre>`},
 480 | 	{"93", "```\naaa\n    ```", "<pre><code>aaa\n    ```\n</code></pre>"},
 481 | 	{"95", `
 482 | ~~~~~~
 483 | aaa
 484 | ~~~ ~~`, `
 485 | <pre><code>aaa
 486 | ~~~ ~~
 487 | </code></pre>`},
 488 | 	{"96", "foo\n```\nbar\n```\nbaz", `<p>foo</p>
 489 | <pre><code>bar
 490 | </code></pre>
 491 | <p>baz</p>`},
 492 | 	{"97", `foo
 493 | ---
 494 | ~~~
 495 | bar
 496 | ~~~
 497 | # baz`, `<h2>foo</h2>
 498 | <pre><code>bar
 499 | </code></pre>
 500 | <h1>baz</h1>`},
 501 | 	{"102", "```\n``` aaa\n```", "<pre><code>``` aaa\n</code></pre>"},
 502 | 	{"103", `
 503 | <table>
 504 |   <tr>
 505 |     <td>
 506 |            hi
 507 |     </td>
 508 |   </tr>
 509 | </table>
 510 | 
 511 | okay.`, `
 512 | <table>
 513 |   <tr>
 514 |     <td>
 515 |            hi
 516 |     </td>
 517 |   </tr>
 518 | </table>
 519 | <p>okay.</p>`},
 520 | 	// Move out the id, beacuse the regexp below
 521 | 	{"107", `
 522 | <div
 523 |   class="bar">
 524 | </div>`, `
 525 | <div
 526 |   class="bar">
 527 | </div>`},
 528 | 	{"108", `
 529 | <div class="bar
 530 |   baz">
 531 | </div>`, `
 532 | <div class="bar
 533 |   baz">
 534 | </div>`},
 535 | 	{"113", `<div><a href="bar">*foo*</a></div>`, `<div><a href="bar">*foo*</a></div>`},
 536 | 	{"114", `
 537 | <table><tr><td>
 538 | foo
 539 | </td></tr></table>`, `
 540 | <table><tr><td>
 541 | foo
 542 | </td></tr></table>`},
 543 | 	{"117", `
 544 | <Warning>
 545 | *bar*
 546 | </Warning>`, `
 547 | <Warning>
 548 | *bar*
 549 | </Warning>`},
 550 | 	{"121", "<del>*foo*</del>", "<p><del><em>foo</em></del></p>"},
 551 | 	{"122", `
 552 | <pre language="haskell"><code>
 553 | import Text.HTML.TagSoup
 554 | 
 555 | main :: IO ()
 556 | main = print $ parseTags tags
 557 | </code></pre>`, `
 558 | <pre language="haskell"><code>
 559 | import Text.HTML.TagSoup
 560 | 
 561 | main :: IO ()
 562 | main = print $ parseTags tags
 563 | </code></pre>`},
 564 | 	{"123", `
 565 | <script type="text/javascript">
 566 | // JavaScript example
 567 | 
 568 | document.getElementById("demo").innerHTML = "Hello JavaScript!";
 569 | </script>`, `
 570 | <script type="text/javascript">
 571 | // JavaScript example
 572 | 
 573 | document.getElementById("demo").innerHTML = "Hello JavaScript!";
 574 | </script>`},
 575 | 	{"124", `
 576 | <style
 577 |   type="text/css">
 578 | h1 {color:red;}
 579 | 
 580 | p {color:blue;}
 581 | </style>`, `
 582 | <style
 583 |   type="text/css">
 584 | h1 {color:red;}
 585 | 
 586 | p {color:blue;}
 587 | </style>`},
 588 | 	{"127", `
 589 | - <div>
 590 | - foo`, `
 591 | <ul>
 592 | <li>
 593 | <div>
 594 | </li>
 595 | <li>foo</li>
 596 | </ul>`},
 597 | 	{"137", `
 598 | Foo
 599 | <div>
 600 | bar
 601 | </div>`, `
 602 | <p>Foo</p>
 603 | <div>
 604 | bar
 605 | </div>`},
 606 | 	{"139", `
 607 | Foo
 608 | <a href="bar">
 609 | baz`, `
 610 | <p>Foo
 611 | <a href="bar">
 612 | baz</p>`},
 613 | 	{"141", `
 614 | <div>
 615 | *Emphasized* text.
 616 | </div>`, `
 617 | <div>
 618 | *Emphasized* text.
 619 | </div>
 620 | `},
 621 | 	{"142", `
 622 | <table>
 623 | 
 624 | <tr>
 625 | 
 626 | <td>
 627 | Hi
 628 | </td>
 629 | 
 630 | </tr>
 631 | 
 632 | </table>`, `
 633 | <table>
 634 | <tr>
 635 | <td>
 636 | Hi
 637 | </td>
 638 | </tr>
 639 | </table>
 640 | `},
 641 | 	{"144", `
 642 | [foo]: /url "title"
 643 | 
 644 | [foo]`, `<p><a href="/url" title="title">foo</a></p>`},
 645 | 	{"145", `
 646 |    [foo]: 
 647 |       /url  
 648 |            'the title'  
 649 | 
 650 | [foo]`, `<p><a href="/url" title="the title">foo</a></p>`},
 651 | 	{"148", `
 652 | [foo]: /url '
 653 | title
 654 | line1
 655 | line2
 656 | '
 657 | 
 658 | [foo]`, `
 659 | <p><a href="/url" title="
 660 | title
 661 | line1
 662 | line2
 663 | ">foo</a></p>`},
 664 | 	{"150", `
 665 | [foo]:
 666 | /url
 667 | 
 668 | [foo]`, `<p><a href="/url">foo</a></p>`},
 669 | 	{"151", `
 670 | [foo]:
 671 | 
 672 | [foo]`, `
 673 | <p>[foo]:</p>
 674 | <p>[foo]</p>`},
 675 | 	{"153", `
 676 | [foo]
 677 | 
 678 | [foo]: url`, `<p><a href="url">foo</a></p>`},
 679 | 	{"154", `
 680 | [foo]
 681 | 
 682 | [foo]: first
 683 | [foo]: second`, `<p><a href="first">foo</a></p>`},
 684 | 	{"155", `
 685 | [FOO]: /url
 686 | 
 687 | [Foo]`, `<p><a href="/url">Foo</a></p>`},
 688 | 	{"157", "[foo]: /url", ""},
 689 | 	{"158", `
 690 | [
 691 | foo
 692 | ]: /url
 693 | bar`, "<p>bar</p>"},
 694 | 	{"159", `[foo]: /url "title" ok`, "<p>[foo]: /url &quot;title&quot; ok</p>"},
 695 | 	{"160", `
 696 | [foo]: /url
 697 | "title" ok`, "<p>&quot;title&quot; ok</p>"},
 698 | 	{"161", `
 699 |     [foo]: /url "title"
 700 | 
 701 | [foo]`, `
 702 | <pre><code>[foo]: /url &quot;title&quot;
 703 | </code></pre>
 704 | <p>[foo]</p>`},
 705 | 	{"162", "```\n[foo]: /url\n```\n\n[foo]", `
 706 | <pre><code>[foo]: /url
 707 | </code></pre>
 708 | <p>[foo]</p>`},
 709 | 	{"166", `
 710 | [foo]
 711 | 
 712 | > [foo]: /url`, `
 713 | <p><a href="/url">foo</a></p>
 714 | <blockquote>
 715 | </blockquote>`},
 716 | 	{"167", `
 717 | aaa
 718 | 
 719 | bbb`, `
 720 | <p>aaa</p>
 721 | <p>bbb</p>`},
 722 | 	{"168", `
 723 | aaa
 724 | bbb
 725 | 
 726 | ccc
 727 | ddd`, `
 728 | <p>aaa
 729 | bbb</p>
 730 | <p>ccc
 731 | ddd</p>`},
 732 | 	{"169", `
 733 | aaa
 734 | 
 735 | 
 736 | bbb`, `
 737 | <p>aaa</p>
 738 | <p>bbb</p>`},
 739 | 	{"170", `
 740 |   aaa
 741 |  bbb`, `
 742 | <p>aaa
 743 | bbb</p>`},
 744 | 	{"171", `
 745 | aaa
 746 |              bbb
 747 |                                        ccc`, `
 748 | <p>aaa
 749 | bbb
 750 | ccc</p>`},
 751 | 	{"172", `
 752 |    aaa
 753 | bbb`, `
 754 | <p>aaa
 755 | bbb</p>`},
 756 | 	{"173", `
 757 |     aaa
 758 | bbb`, `
 759 | <pre><code>aaa
 760 | </code></pre>
 761 | <p>bbb</p>`},
 762 | 	{"174", `
 763 | aaa     
 764 | bbb     `, `
 765 | <p>aaa<br>
 766 | bbb</p>`},
 767 | 	{"175", `
 768 |   
 769 | 
 770 | aaa
 771 |   
 772 | 
 773 | # aaa
 774 | 
 775 |   `, `
 776 | <p>aaa</p>
 777 | <h1>aaa</h1>`},
 778 | 	{"176", `
 779 | > # Foo
 780 | > bar
 781 | > baz`, `
 782 | <blockquote>
 783 | <h1>Foo</h1>
 784 | <p>bar
 785 | baz</p>
 786 | </blockquote>`},
 787 | 	{"177", `
 788 | ># Foo
 789 | >bar
 790 | > baz`, `
 791 | <blockquote>
 792 | <h1>Foo</h1>
 793 | <p>bar
 794 | baz</p>
 795 | </blockquote>`},
 796 | 	{"178", `
 797 |    > # Foo
 798 |    > bar
 799 |  > baz`, `
 800 | <blockquote>
 801 | <h1>Foo</h1>
 802 | <p>bar
 803 | baz</p>
 804 | </blockquote>`},
 805 | 	{"179", `
 806 |     > # Foo
 807 |     > bar
 808 |     > baz`, `
 809 | <pre><code>&gt; # Foo
 810 | &gt; bar
 811 | &gt; baz
 812 | </code></pre>`},
 813 | 	{"180", `
 814 | > # Foo
 815 | > bar
 816 | baz`, `
 817 | <blockquote>
 818 | <h1>Foo</h1>
 819 | <p>bar
 820 | baz</p>
 821 | </blockquote>`},
 822 | 	{"181", `
 823 | > bar
 824 | baz
 825 | > foo`, `
 826 | <blockquote>
 827 | <p>bar
 828 | baz
 829 | foo</p>
 830 | </blockquote>`},
 831 | 	{"182", `
 832 | > foo
 833 | ---`, `
 834 | <blockquote>
 835 | <p>foo</p>
 836 | </blockquote>
 837 | <hr>`},
 838 | 	{"186", `
 839 | >`, `
 840 | <blockquote>
 841 | </blockquote>`},
 842 | 	{"187", `
 843 | >
 844 | >  
 845 | > `, `
 846 | <blockquote>
 847 | </blockquote>`},
 848 | 	{"188", `
 849 | >
 850 | > foo
 851 | >  `, `
 852 | <blockquote>
 853 | <p>foo</p>
 854 | </blockquote>`},
 855 | 	{"189", `
 856 | > foo
 857 | 
 858 | > bar`, `
 859 | <blockquote>
 860 | <p>foo</p>
 861 | </blockquote>
 862 | <blockquote>
 863 | <p>bar</p>
 864 | </blockquote>`},
 865 | 	{"190", `
 866 | > foo
 867 | > bar`, `
 868 | <blockquote>
 869 | <p>foo
 870 | bar</p>
 871 | </blockquote>`},
 872 | 	{"191", `
 873 | > foo
 874 | >
 875 | > bar`, `
 876 | <blockquote>
 877 | <p>foo</p>
 878 | <p>bar</p>
 879 | </blockquote>`},
 880 | 	{"192", `
 881 | foo
 882 | > bar`, `
 883 | <p>foo</p>
 884 | <blockquote>
 885 | <p>bar</p>
 886 | </blockquote>`},
 887 | 	{"193", `
 888 | > aaa
 889 | ***
 890 | > bbb`, `
 891 | <blockquote>
 892 | <p>aaa</p>
 893 | </blockquote>
 894 | <hr>
 895 | <blockquote>
 896 | <p>bbb</p>
 897 | </blockquote>`},
 898 | 	{"194", `
 899 | > bar
 900 | baz`, `
 901 | <blockquote>
 902 | <p>bar
 903 | baz</p>
 904 | </blockquote>`},
 905 | 	{"195", `
 906 | > bar
 907 | 
 908 | baz`, `
 909 | <blockquote>
 910 | <p>bar</p>
 911 | </blockquote>
 912 | <p>baz</p>`},
 913 | 	{"197", `
 914 | > > > foo
 915 | bar`, `
 916 | <blockquote>
 917 | <blockquote>
 918 | <blockquote>
 919 | <p>foo
 920 | bar</p>
 921 | </blockquote>
 922 | </blockquote>
 923 | </blockquote>`},
 924 | 	{"198", `
 925 | >>> foo
 926 | > bar
 927 | >>baz`, `
 928 | <blockquote>
 929 | <blockquote>
 930 | <blockquote>
 931 | <p>foo
 932 | bar
 933 | baz</p>
 934 | </blockquote>
 935 | </blockquote>
 936 | </blockquote>`},
 937 | 	{"199", `
 938 | >     code
 939 | 
 940 | >    not code`, `
 941 | <blockquote>
 942 | <pre><code>code
 943 | </code></pre>
 944 | </blockquote>
 945 | <blockquote>
 946 | <p>not code</p>
 947 | </blockquote>`},
 948 | 	{"200", `
 949 | A paragraph
 950 | with two lines.
 951 | 
 952 |     indented code
 953 | 
 954 | > A block quote.`, `
 955 | <p>A paragraph
 956 | with two lines.</p>
 957 | <pre><code>indented code
 958 | </code></pre>
 959 | <blockquote>
 960 | <p>A block quote.</p>
 961 | </blockquote>`},
 962 | 	{"201", `
 963 | 1.  A paragraph
 964 |     with two lines.
 965 | 
 966 |         indented code
 967 | 
 968 |     > A block quote.`, `
 969 | <ol>
 970 | <li>
 971 | <p>A paragraph
 972 | with two lines.</p>
 973 | <pre><code>indented code
 974 | </code></pre>
 975 | <blockquote>
 976 | <p>A block quote.</p>
 977 | </blockquote>
 978 | </li>
 979 | </ol>`},
 980 | 	{"203", `
 981 | - one
 982 | 
 983 |   two`, `
 984 | <ul>
 985 | <li>
 986 | <p>one</p>
 987 | <p>two</p>
 988 | </li>
 989 | </ul>`},
 990 | 	{"205", `
 991 |  -    one
 992 | 
 993 |       two`, `
 994 | <ul>
 995 | <li>
 996 | <p>one</p>
 997 | <p>two</p>
 998 | </li>
 999 | </ul>`},
1000 | 	{"206", `
1001 |    > > 1.  one
1002 | >>
1003 | >>     two`, `
1004 | <blockquote>
1005 | <blockquote>
1006 | <ol>
1007 | <li>
1008 | <p>one</p>
1009 | <p>two</p>
1010 | </li>
1011 | </ol>
1012 | </blockquote>
1013 | </blockquote>`},
1014 | 	{"207", `
1015 | >>- one
1016 | >>
1017 |   >  > two`, `
1018 | <blockquote>
1019 | <blockquote>
1020 | <ul>
1021 | <li>one</li>
1022 | </ul>
1023 | <p>two</p>
1024 | </blockquote>
1025 | </blockquote>`},
1026 | 	{"208", `-one
1027 | 
1028 | 2.two`, `
1029 | <p>-one</p>
1030 | <p>2.two</p>`},
1031 | 	{"210", `
1032 | 1.  foo
1033 | 
1034 |     ~~~
1035 |     bar
1036 |     ~~~
1037 | 
1038 |     baz
1039 | 
1040 |     > bam`, `
1041 | <ol>
1042 | <li>
1043 | <p>foo</p>
1044 | <pre><code>bar
1045 | </code></pre>
1046 | <p>baz</p>
1047 | <blockquote>
1048 | <p>bam</p>
1049 | </blockquote>
1050 | </li>
1051 | </ol>`},
1052 | 	{"212", `1234567890. not ok`, `<p>1234567890. not ok</p>`},
1053 | 	{"215", `-1. not ok`, `<p>-1. not ok</p>`},
1054 | 	{"216", `
1055 | - foo
1056 | 
1057 |       bar`, `
1058 | <ul>
1059 | <li>
1060 | <p>foo</p>
1061 | <pre><code>bar
1062 | </code></pre>
1063 | </li>
1064 | </ul>`},
1065 | 	{"218", `
1066 |     indented code
1067 | 
1068 | paragraph
1069 | 
1070 |     more code`, `
1071 | <pre><code>indented code
1072 | </code></pre>
1073 | <p>paragraph</p>
1074 | <pre><code>more code
1075 | </code></pre>`},
1076 | 	{"221", `
1077 |    foo
1078 | 
1079 | bar`, `
1080 | <p>foo</p>
1081 | <p>bar</p>`},
1082 | 	{"223", `
1083 | -  foo
1084 | 
1085 |    bar`, `
1086 | <ul>
1087 | <li>
1088 | <p>foo</p>
1089 | <p>bar</p>
1090 | </li>
1091 | </ul>`},
1092 | 	{"226", `
1093 | - foo
1094 | -   
1095 | - bar`, `
1096 | <ul>
1097 | <li>foo</li>
1098 | <li></li>
1099 | <li>bar</li>
1100 | </ul>`},
1101 | 	{"232", `
1102 |     1.  A paragraph
1103 |         with two lines.
1104 | 
1105 |             indented code
1106 | 
1107 |         > A block quote.`, `
1108 | <pre><code>1.  A paragraph
1109 |     with two lines.
1110 | 
1111 |         indented code
1112 | 
1113 |     &gt; A block quote.
1114 | </code></pre>`},
1115 | 	{"234", `
1116 |   1.  A paragraph
1117 |     with two lines.`, `
1118 | <ol>
1119 | <li>A paragraph
1120 | with two lines.</li>
1121 | </ol>`},
1122 | 	{"235", `
1123 | > 1. > Blockquote
1124 | continued here.`, `
1125 | <blockquote>
1126 | <ol>
1127 | <li>
1128 | <blockquote>
1129 | <p>Blockquote
1130 | continued here.</p>
1131 | </blockquote>
1132 | </li>
1133 | </ol>
1134 | </blockquote>`},
1135 | 	{"236", `
1136 | > 1. > Blockquote
1137 | continued here.`, `
1138 | <blockquote>
1139 | <ol>
1140 | <li>
1141 | <blockquote>
1142 | <p>Blockquote
1143 | continued here.</p>
1144 | </blockquote>
1145 | </li>
1146 | </ol>
1147 | </blockquote>`},
1148 | 	{"237", `
1149 | - foo
1150 |   - bar
1151 |     - baz`, `
1152 | <ul>
1153 | <li>foo
1154 | <ul>
1155 | <li>bar
1156 | <ul>
1157 | <li>baz</li>
1158 | </ul>
1159 | </li>
1160 | </ul>
1161 | </li>
1162 | </ul>`},
1163 | 	{"241", "- - foo", `
1164 | <ul>
1165 | <li>
1166 | <ul>
1167 | <li>foo</li>
1168 | </ul>
1169 | </li>
1170 | </ul>`},
1171 | 	{"243", `
1172 | - # Foo
1173 | - Bar
1174 |   ---
1175 |   baz`, `
1176 | <ul>
1177 | <li>
1178 | <h1>Foo</h1>
1179 | </li>
1180 | <li>
1181 | <h2>Bar</h2>
1182 | baz</li>
1183 | </ul>`},
1184 | 	{"246", `
1185 | Foo
1186 | - bar
1187 | - baz`, `
1188 | <p>Foo</p>
1189 | <ul>
1190 | <li>bar</li>
1191 | <li>baz</li>
1192 | </ul>`},
1193 | 	{"248", `
1194 | - foo
1195 | 
1196 | - bar
1197 | 
1198 | 
1199 | - baz`, `
1200 | <ul>
1201 | <li>
1202 | <p>foo</p>
1203 | </li>
1204 | <li>
1205 | <p>bar</p>
1206 | </li>
1207 | </ul>
1208 | <ul>
1209 | <li>baz</li>
1210 | </ul>`},
1211 | 	{"249", `
1212 | - foo
1213 | 
1214 | 
1215 |   bar
1216 | - baz`, `
1217 | <ul>
1218 | <li>foo</li>
1219 | </ul>
1220 | <p>bar</p>
1221 | <ul>
1222 | <li>baz</li>
1223 | </ul>`},
1224 | 	{"250", `
1225 | - foo
1226 |   - bar
1227 |     - baz
1228 | 
1229 | 
1230 |       bim`, `
1231 | <ul>
1232 | <li>foo
1233 | <ul>
1234 | <li>bar
1235 | <ul>
1236 | <li>baz</li>
1237 | </ul>
1238 | </li>
1239 | </ul>
1240 | </li>
1241 | </ul>
1242 | <pre><code>  bim
1243 | </code></pre>`},
1244 | 	{"251", `
1245 | - foo
1246 | - bar
1247 | 
1248 | 
1249 | - baz
1250 | - bim`, `
1251 | <ul>
1252 | <li>foo</li>
1253 | <li>bar</li>
1254 | </ul>
1255 | <ul>
1256 | <li>baz</li>
1257 | <li>bim</li>
1258 | </ul>`},
1259 | 	{"252", `
1260 | -   foo
1261 | 
1262 |     notcode
1263 | 
1264 | -   foo
1265 | 
1266 | 
1267 |     code`, `
1268 | <ul>
1269 | <li>
1270 | <p>foo</p>
1271 | <p>notcode</p>
1272 | </li>
1273 | <li>
1274 | <p>foo</p>
1275 | </li>
1276 | </ul>
1277 | <pre><code>code
1278 | </code></pre>`},
1279 | 	{"261", `
1280 | * a
1281 |   > b
1282 |   >
1283 | * c`, `
1284 | <ul>
1285 | <li>a
1286 | <blockquote>
1287 | <p>b</p>
1288 | </blockquote>
1289 | </li>
1290 | <li>c</li>
1291 | </ul>`},
1292 | 	{"263", "- a", `
1293 | <ul>
1294 | <li>a</li>
1295 | </ul>`},
1296 | 	{"264", `
1297 | - a
1298 |   - b`, `
1299 | <ul>
1300 | <li>a
1301 | <ul>
1302 | <li>b</li>
1303 | </ul>
1304 | </li>
1305 | </ul>`},
1306 | 	{"265", "\n1. ```\n   foo\n   ```\n\n   bar", `
1307 | <ol>
1308 | <li>
1309 | <pre><code>foo
1310 | </code></pre>
1311 | <p>bar</p>
1312 | </li>
1313 | </ol>`},
1314 | 	{"266", `
1315 | * foo
1316 |   * bar
1317 | 
1318 |   baz`, `
1319 | <ul>
1320 | <li>
1321 | <p>foo</p>
1322 | <ul>
1323 | <li>bar</li>
1324 | </ul>
1325 | <p>baz</p>
1326 | </li>
1327 | </ul>`},
1328 | 	{"267", `
1329 | - a
1330 |   - b
1331 |   - c
1332 | 
1333 | - d
1334 |   - e
1335 |   - f`, `
1336 | <ul>
1337 | <li>
1338 | <p>a</p>
1339 | <ul>
1340 | <li>b</li>
1341 | <li>c</li>
1342 | </ul>
1343 | </li>
1344 | <li>
1345 | <p>d</p>
1346 | <ul>
1347 | <li>e</li>
1348 | <li>f</li>
1349 | </ul>
1350 | </li>
1351 | </ul>`},
1352 | 	{"268", "`hi`lo`", "<p><code>hi</code>lo`</p>"},
1353 | 	{"273", `
1354 | foo\
1355 | bar
1356 | `, `
1357 | <p>foo<br>
1358 | bar</p>`},
1359 | 	{"275", `    \[\]`, `<pre><code>\[\]
1360 | </code></pre>`},
1361 | 	{"276", `
1362 | ~~~
1363 | \[\]
1364 | ~~~`, `
1365 | <pre><code>\[\]
1366 | </code></pre>`},
1367 | 	{"294", "`foo`", `<p><code>foo</code></p>`},
1368 | 	{"300", "`foo\\`bar`", "<p><code>foo\\</code>bar`</p>"},
1369 | 	{"303", "`<a href=\"`\">`", "<p><code>&lt;a href=&quot;</code>&quot;&gt;`</p>"},
1370 | 	{"308", "`foo", "<p>`foo</p>"},
1371 | 	{"309", "*foo bar*", "<p><em>foo bar</em></p>"},
1372 | 	{"310", "a * foo bar*", "<p>a * foo bar*</p>"},
1373 | 	{"313", "foo*bar*", "<p>foo<em>bar</em></p>"},
1374 | 	{"314", "5*6*78", "<p>5<em>6</em>78</p>"},
1375 | 	{"315", "_foo bar_", "<p><em>foo bar</em></p>"},
1376 | 	{"316", "_ foo bar_", "<p>_ foo bar_</p>"},
1377 | 	{"322", "foo-_(bar)_", "<p>foo-<em>(bar)</em></p>"},
1378 | 	{"323", "_foo*", "<p>_foo*</p>"},
1379 | 	{"328", "*foo*bar", "<p><em>foo</em>bar</p>"},
1380 | 	{"335", "_(bar)_.", "<p><em>(bar)</em>.</p>"},
1381 | 	{"336", "**foo bar**", "<p><strong>foo bar</strong></p>"},
1382 | 	{"339", "foo**bar**", "<p>foo<strong>bar</strong></p>"},
1383 | 	{"340", "__foo bar__", "<p><strong>foo bar</strong></p>"},
1384 | 	{"348", "foo-__(bar)__", "<p>foo-<strong>(bar)</strong></p>"},
1385 | 	{"352", "**Gomphocarpus (*Gomphocarpus physocarpus*, syn.*Asclepias physocarpa*)**",
1386 | 		"<p><strong>Gomphocarpus (<em>Gomphocarpus physocarpus</em>, syn.<em>Asclepias physocarpa</em>)</strong></p>"},
1387 | 	{"353", "**foo \"*bar*\" foo**", "<p><strong>foo &quot;<em>bar</em>&quot; foo</strong></p>"},
1388 | 	{"354", "**foo**bar", "<p><strong>foo</strong>bar</p>"},
1389 | 	{"361", "__(bar)__.", "<p><strong>(bar)</strong>.</p>"},
1390 | 	{"362", "*foo [bar](/url)*", "<p><em>foo <a href=\"/url\">bar</a></em></p>"},
1391 | 	{"363", "*foo\nbar*", "<p><em>foo\nbar</em></p>"},
1392 | 	{"375", "** is not an empty emphasis", "<p>** is not an empty emphasis</p>"},
1393 | 	{"377", "**foo [bar](/url)**", "<p><strong>foo <a href=\"/url\">bar</a></strong></p>"},
1394 | 	{"378", "**foo\nbar**", "<p><strong>foo\nbar</strong></p>"},
1395 | 	{"379", "__foo _bar_ baz__", "<p><strong>foo <em>bar</em> baz</strong></p>"},
1396 | 	{"383", "**foo *bar* baz**", "<p><strong>foo <em>bar</em> baz</strong></p>"},
1397 | 	{"385", "***foo* bar**", "<p><strong><em>foo</em> bar</strong></p>"},
1398 | 	{"386", "**foo *bar***", "<p><strong>foo <em>bar</em></strong></p>"},
1399 | 	{"389", "__ is not an empty emphasis", "<p>__ is not an empty emphasis</p>"},
1400 | 	{"392", "foo *\\**", "<p>foo <em>*</em></p>"},
1401 | 	{"393", "foo *_*", "<p>foo <em>_</em></p>"},
1402 | 	{"395", "foo **\\***", "<p>foo <strong>*</strong></p>"},
1403 | 	{"396", "foo **_**", "<p>foo <strong>_</strong></p>"},
1404 | 	{"404", "foo _\\__", "<p>foo <em>_</em></p>"},
1405 | 	{"405", "foo _*_", "<p>foo <em>*</em></p>"},
1406 | 	{"407", "foo __\\___", "<p>foo <strong>_</strong></p>"},
1407 | 	{"408", "foo __*__", "<p>foo <strong>*</strong></p>"},
1408 | 	{"415", "**foo**", "<p><strong>foo</strong></p>"},
1409 | 	{"416", "*_foo_*", "<p><em><em>foo</em></em></p>"},
1410 | 	{"417", "__foo__", "<p><strong>foo</strong></p>"},
1411 | 	{"418", "_*foo*_", "<p><em><em>foo</em></em></p>"},
1412 | 	{"419", "****foo****", "<p><strong><strong>foo</strong></strong></p>"},
1413 | 	{"420", "____foo____", "<p><strong><strong>foo</strong></strong></p>"},
1414 | 	{"422", "***foo***", "<p><strong><em>foo</em></strong></p>"},
1415 | 	{"424", "*foo _bar* baz_", "<p><em>foo _bar</em> baz_</p>"},
1416 | 	{"438", "[link](/uri \"title\")", "<p><a href=\"/uri\" title=\"title\">link</a></p>"},
1417 | 	{"439", "[link](/uri)", "<p><a href=\"/uri\">link</a></p>"},
1418 | 	{"440", "[link]()", "<p><a href=\"\">link</a></p>"},
1419 | 	{"441", "[link](<>)", "<p><a href=\"\">link</a></p>"},
1420 | 	{"451", `
1421 | [link](#fragment)
1422 | 
1423 | [link](http://example.com#fragment)
1424 | 
1425 | [link](http://example.com?foo=bar&baz#fragment)`, `
1426 | <p><a href="#fragment">link</a></p>
1427 | <p><a href="http://example.com#fragment">link</a></p>
1428 | <p><a href="http://example.com?foo=bar&amp;baz#fragment">link</a></p>`},
1429 | 	{"455", `
1430 | [link](/url "title")
1431 | [link](/url 'title')
1432 | [link](/url (title))`, `
1433 | <p><a href="/url" title="title">link</a>
1434 | <a href="/url" title="title">link</a>
1435 | <a href="/url" title="title">link</a></p>`},
1436 | 	{"458", `[link](/url 'title "and" title')`, `<p><a href="/url" title="title &quot;and&quot; title">link</a></p>`},
1437 | 	{"460", "[link] (/uri)", "<p>[link] (/uri)</p>"},
1438 | 	{"461", "[link [foo [bar]]](/uri)", `<p><a href="/uri">link [foo [bar]]</a></p>`},
1439 | 	{"463", "[link [bar](/uri)", `<p>[link <a href="/uri">bar</a></p>`},
1440 | 	{"471", "[foo *bar](baz*)", `<p><a href="baz*">foo *bar</a></p>`},
1441 | 	{"472", "*foo [bar* baz]", "<p><em>foo [bar</em> baz]</p>"},
1442 | 	{"476", `
1443 | [foo][bar]
1444 | 
1445 | [bar]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1446 | 	{"477", `
1447 | [link [foo [bar]]][ref]
1448 | 
1449 | [ref]: /uri`, `<p><a href="/uri">link [foo [bar]]</a></p>`},
1450 | 	{"484", `
1451 | [foo *bar][ref]
1452 | 
1453 | [ref]: /uri`, `<p><a href="/uri">foo *bar</a></p>`},
1454 | 	{"488", `
1455 | [foo][BaR]
1456 | 
1457 | [bar]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1458 | 	{"489", `
1459 | [Толпой][Толпой] is a Russian word.
1460 | 
1461 | [ТОЛПОЙ]: /url`, `<p><a href="/url">Толпой</a> is a Russian word.</p>`},
1462 | 	{"491", `
1463 | [foo] [bar]
1464 | 
1465 | [bar]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1466 | 	{"492", `
1467 | [foo]
1468 | [bar]
1469 | 
1470 | [bar]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1471 | 	{"493", `
1472 | [foo]: /url1
1473 | 
1474 | [foo]: /url2
1475 | 
1476 | [bar][foo]`, `<p><a href="/url1">bar</a></p>`},
1477 | 	{"496", `
1478 | [foo][ref[bar]]
1479 | 
1480 | [ref[bar]]: /uri`, `
1481 | <p>[foo][ref[bar]]</p>
1482 | <p>[ref[bar]]: /uri</p>`},
1483 | 	{"497", `
1484 | [[[foo]]]
1485 | 
1486 | [[[foo]]]: /url`, `
1487 | <p>[[[foo]]]</p>
1488 | <p>[[[foo]]]: /url</p>`},
1489 | 	{"498", `
1490 | [foo][ref\[]
1491 | 
1492 | [ref\[]: /uri`, `<p><a href="/uri">foo</a></p>`},
1493 | 	{"499", `
1494 | []
1495 | 
1496 | []: /uri`, `
1497 | <p>[]</p>
1498 | <p>[]: /uri</p>`},
1499 | 	{"501", `
1500 | [foo][]
1501 | 
1502 | [foo]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1503 | 	{"502", `
1504 | [*foo* bar][]
1505 | 
1506 | [*foo* bar]: /url "title"`, `
1507 | <p><a href="/url" title="title"><em>foo</em> bar</a></p>`},
1508 | 	{"503", `
1509 | [Foo][]
1510 | 
1511 | [foo]: /url "title"`, `<p><a href="/url" title="title">Foo</a></p>`},
1512 | 	{"504", `
1513 | [foo] 
1514 | []
1515 | 
1516 | [foo]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1517 | 	{"505", `
1518 | [foo]
1519 | 
1520 | [foo]: /url "title"`, `<p><a href="/url" title="title">foo</a></p>`},
1521 | 	{"506", `
1522 | [*foo* bar]
1523 | 
1524 | [*foo* bar]: /url "title"`, `
1525 | <p><a href="/url" title="title"><em>foo</em> bar</a></p>`},
1526 | 	{"508", `
1527 | [[bar [foo]
1528 | 
1529 | [foo]: /url`, `<p>[[bar <a href="/url">foo</a></p>`},
1530 | 	{"509", `
1531 | [Foo]
1532 | 
1533 | [foo]: /url "title"`, `<p><a href="/url" title="title">Foo</a></p>`},
1534 | 	{"510", `
1535 | [foo] bar
1536 | 
1537 | [foo]: /url`, `<p><a href="/url">foo</a> bar</p>`},
1538 | 	{"511", `
1539 | \[foo]
1540 | 
1541 | [foo]: /url "title"`, `<p>[foo]</p>`},
1542 | 	{"513", `
1543 | [foo][bar]
1544 | 
1545 | [foo]: /url1
1546 | [bar]: /url2`, `<p><a href="/url2">foo</a></p>`},
1547 | 	{"515", `
1548 | [foo][bar][baz]
1549 | 
1550 | [baz]: /url1
1551 | [bar]: /url2`, `<p><a href="/url2">foo</a><a href="/url1">baz</a></p>`},
1552 | 	{"517", `![foo](/url "title")`, `<p><img src="/url" alt="foo" title="title"></p>`},
1553 | 	{"523", `![foo](train.jpg)`, `<p><img src="train.jpg" alt="foo"></p>`},
1554 | 	{"524", `My ![foo bar](/path/to/train.jpg  "title"   )`,
1555 | 		`<p>My <img src="/path/to/train.jpg" alt="foo bar" title="title"></p>`},
1556 | 	{"525", `![foo](<url>)`, `<p><img src="url" alt="foo"></p>`},
1557 | 	{"526", `![](/url)`, `<p><img src="/url" alt=""></p>`},
1558 | 	{"527", `
1559 | ![foo] [bar]
1560 | 
1561 | [bar]: /url`, `<p><img src="/url" alt="foo"></p>`},
1562 | 	{"528", `
1563 | ![foo] [bar]
1564 | 
1565 | [BAR]: /url`, `<p><img src="/url" alt="foo"></p>`},
1566 | 	{"529", `
1567 | ![foo][]
1568 | 
1569 | [foo]: /url "title"`, `<p><img src="/url" alt="foo" title="title"></p>`},
1570 | 	{"531", `
1571 | ![Foo][]
1572 | 
1573 | [foo]: /url "title"`, `<p><img src="/url" alt="Foo" title="title"></p>`},
1574 | 	{"532", `
1575 | ![foo] 
1576 | []
1577 | 
1578 | [foo]: /url "title"`, `<p><img src="/url" alt="foo" title="title"></p>`},
1579 | 	{"533", `
1580 | ![foo]
1581 | 
1582 | [foo]: /url "title"`, `<p><img src="/url" alt="foo" title="title"></p>`},
1583 | 	{"535", `
1584 | ![[foo]]
1585 | 
1586 | [[foo]]: /url "title"`, `
1587 | <p>![[foo]]</p>
1588 | <p>[[foo]]: /url &quot;title&quot;</p>`},
1589 | 	{"536", `
1590 | ![Foo]
1591 | 
1592 | [foo]: /url "title"`, `<p><img src="/url" alt="Foo" title="title"></p>`},
1593 | 	{"537", `
1594 | \!\[foo]
1595 | 
1596 | [foo]: /url "title"`, `<p>![foo]</p>`},
1597 | 	{"538", `
1598 | \![foo]
1599 | 
1600 | [foo]: /url "title"`, `<p>!<a href="/url" title="title">foo</a></p>`},
1601 | 	{"539", `<http://foo.bar.baz>`, `<p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>`},
1602 | 	{"540", `<http://foo.bar.baz/test?q=hello&id=22&boolean>`,
1603 | 		`<p><a href="http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>`},
1604 | 	{"541", `<irc://foo.bar:2233/baz>`, `<p><a href="irc://foo.bar:2233/baz">irc://foo.bar:2233/baz</a></p>`},
1605 | 	{"542", `<MAILTO:FOO@BAR.BAZ>`, `<p><a href="MAILTO:FOO@BAR.BAZ">MAILTO:FOO@BAR.BAZ</a></p>`},
1606 | 	{"548", "<>", "<p>&lt;&gt;</p>"},
1607 | 	{"554", `foo@bar.example.com`, `<p>foo@bar.example.com</p>`},
1608 | 	{"555", "<a><bab><c2c>", "<p><a><bab><c2c></p>"},
1609 | 	{"556", "<a/><b2/>", "<p><a/><b2/></p>"},
1610 | 	{"557", `
1611 | <a  /><b2
1612 | data="foo" >`, `
1613 | <p><a  /><b2
1614 | data="foo" ></p>`},
1615 | 	{"558", `
1616 | <a foo="bar" bam = 'baz <em>"</em>'
1617 | _boolean zoop:33=zoop:33 />`, `
1618 | <p><a foo="bar" bam = 'baz <em>"</em>'
1619 | _boolean zoop:33=zoop:33 /></p>`},
1620 | 	{"572", "foo <![CDATA[>&<]]>", "<p>foo <![CDATA[>&<]]></p>"},
1621 | 	{"576", `
1622 | foo  
1623 | baz`, `
1624 | <p>foo<br>
1625 | baz</p>`},
1626 | 	{"577", `
1627 | foo\
1628 | baz`, `
1629 | <p>foo<br>
1630 | baz</p>`},
1631 | 	{"578", `
1632 | foo       
1633 | baz`, `<p>foo<br>baz</p>`},
1634 | 	{"581", `
1635 | *foo  
1636 | bar*`, `
1637 | <p><em>foo<br>
1638 | bar</em></p>`},
1639 | 	{"582", `
1640 | *foo\
1641 | bar*`, `
1642 | <p><em>foo<br>
1643 | bar</em></p>`},
1644 | 	{"587", `foo\`, `<p>foo\</p>`},
1645 | 	{"588", `foo  `, `<p>foo</p>`},
1646 | 	{"589", `### foo\`, `<h3>foo\</h3>`},
1647 | 	{"590", `### foo  `, `<h3>foo</h3>`},
1648 | 	{"591", `
1649 | foo
1650 | baz`, `
1651 | <p>foo
1652 | baz</p>`},
1653 | 	{"592", `
1654 | foo 
1655 |  baz`, `
1656 | <p>foo
1657 | baz</p>`},
1658 | 	{"594", `Foo χρῆν`, `<p>Foo χρῆν</p>`},
1659 | 	{"595", `Multiple     spaces`, `<p>Multiple     spaces</p>`},
1660 | }
1661 | 
1662 | func TestCommonMark(t *testing.T) {
1663 | 	reID := regexp.MustCompile(` +?id=".*"`)
1664 | 	for _, c := range CMCases {
1665 | 		// Remove the auto-hashing until it'll be in the configuration
1666 | 		actual := reID.ReplaceAllString(Render(c.input), "")
1667 | 		if strings.Replace(actual, "\n", "", -1) != strings.Replace(c.expected, "\n", "", -1) {
1668 | 			t.Errorf("\ninput:%s\ngot:\n%s\nexpected:\n%s\nlink: http://spec.commonmark.org/0.21/#example-%s\n",
1669 | 				c.input, actual, c.expected, c.name)
1670 | 		}
1671 | 	}
1672 | }
1673 | 


--------------------------------------------------------------------------------
/node.go:
--------------------------------------------------------------------------------
  1 | package mark
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"regexp"
  6 | 	"strconv"
  7 | 	"strings"
  8 | )
  9 | 
 10 | // A Node is an element in the parse tree.
 11 | type Node interface {
 12 | 	Type() NodeType
 13 | 	Render() string
 14 | }
 15 | 
 16 | // NodeType identifies the type of a parse tree node.
 17 | type NodeType int
 18 | 
 19 | // Type returns itself and provides an easy default implementation
 20 | // for embedding in a Node. Embedded in all non-trivial Nodes.
 21 | func (t NodeType) Type() NodeType {
 22 | 	return t
 23 | }
 24 | 
 25 | // Render function, used for overriding default rendering.
 26 | type RenderFn func(Node) string
 27 | 
 28 | const (
 29 | 	NodeText       NodeType = iota // A plain text
 30 | 	NodeParagraph                  // A Paragraph
 31 | 	NodeEmphasis                   // An emphasis(strong, em, ...)
 32 | 	NodeHeading                    // A heading (h1, h2, ...)
 33 | 	NodeBr                         // A link break
 34 | 	NodeHr                         // A horizontal rule
 35 | 	NodeImage                      // An image
 36 | 	NodeRefImage                   // A image reference
 37 | 	NodeList                       // A list of ListItems
 38 | 	NodeListItem                   // A list item node
 39 | 	NodeLink                       // A link(href)
 40 | 	NodeRefLink                    // A link reference
 41 | 	NodeDefLink                    // A link definition
 42 | 	NodeTable                      // A table of NodeRows
 43 | 	NodeRow                        // A row of NodeCells
 44 | 	NodeCell                       // A table-cell(td)
 45 | 	NodeCode                       // A code block(wrapped with pre)
 46 | 	NodeBlockQuote                 // A blockquote
 47 | 	NodeHTML                       // An inline HTML
 48 | 	NodeCheckbox                   // A checkbox
 49 | )
 50 | 
 51 | // ParagraphNode hold simple paragraph node contains text
 52 | // that may be emphasis.
 53 | type ParagraphNode struct {
 54 | 	NodeType
 55 | 	Pos
 56 | 	Nodes []Node
 57 | }
 58 | 
 59 | // Render returns the html representation of ParagraphNode
 60 | func (n *ParagraphNode) Render() (s string) {
 61 | 	for _, node := range n.Nodes {
 62 | 		s += node.Render()
 63 | 	}
 64 | 	return wrap("p", s)
 65 | }
 66 | 
 67 | func (p *parse) newParagraph(pos Pos) *ParagraphNode {
 68 | 	return &ParagraphNode{NodeType: NodeParagraph, Pos: pos}
 69 | }
 70 | 
 71 | // TextNode holds plain text.
 72 | type TextNode struct {
 73 | 	NodeType
 74 | 	Pos
 75 | 	Text string
 76 | }
 77 | 
 78 | // Render returns the string representation of TexNode
 79 | func (n *TextNode) Render() string {
 80 | 	return n.Text
 81 | }
 82 | 
 83 | func (p *parse) newText(pos Pos, text string) *TextNode {
 84 | 	return &TextNode{NodeType: NodeText, Pos: pos, Text: p.text(text)}
 85 | }
 86 | 
 87 | // HTMLNode holds the raw html source.
 88 | type HTMLNode struct {
 89 | 	NodeType
 90 | 	Pos
 91 | 	Src string
 92 | }
 93 | 
 94 | // Render returns the src of the HTMLNode
 95 | func (n *HTMLNode) Render() string {
 96 | 	return n.Src
 97 | }
 98 | 
 99 | func (p *parse) newHTML(pos Pos, src string) *HTMLNode {
100 | 	return &HTMLNode{NodeType: NodeHTML, Pos: pos, Src: src}
101 | }
102 | 
103 | // HrNode represents horizontal rule
104 | type HrNode struct {
105 | 	NodeType
106 | 	Pos
107 | }
108 | 
109 | // Render returns the html representation of hr.
110 | func (n *HrNode) Render() string {
111 | 	return "<hr>"
112 | }
113 | 
114 | func (p *parse) newHr(pos Pos) *HrNode {
115 | 	return &HrNode{NodeType: NodeHr, Pos: pos}
116 | }
117 | 
118 | // BrNode represents a link-break element.
119 | type BrNode struct {
120 | 	NodeType
121 | 	Pos
122 | }
123 | 
124 | // Render returns the html representation of line-break.
125 | func (n *BrNode) Render() string {
126 | 	return "<br>"
127 | }
128 | 
129 | func (p *parse) newBr(pos Pos) *BrNode {
130 | 	return &BrNode{NodeType: NodeBr, Pos: pos}
131 | }
132 | 
133 | // EmphasisNode holds plain-text wrapped with style.
134 | // (strong, em, del, code)
135 | type EmphasisNode struct {
136 | 	NodeType
137 | 	Pos
138 | 	Style itemType
139 | 	Nodes []Node
140 | }
141 | 
142 | // Tag return the tagName based on the Style field.
143 | func (n *EmphasisNode) Tag() (s string) {
144 | 	switch n.Style {
145 | 	case itemStrong:
146 | 		s = "strong"
147 | 	case itemItalic:
148 | 		s = "em"
149 | 	case itemStrike:
150 | 		s = "del"
151 | 	case itemCode:
152 | 		s = "code"
153 | 	}
154 | 	return
155 | }
156 | 
157 | // Return the html representation of emphasis text.
158 | func (n *EmphasisNode) Render() string {
159 | 	var s string
160 | 	for _, node := range n.Nodes {
161 | 		s += node.Render()
162 | 	}
163 | 	return wrap(n.Tag(), s)
164 | }
165 | 
166 | func (p *parse) newEmphasis(pos Pos, style itemType) *EmphasisNode {
167 | 	return &EmphasisNode{NodeType: NodeEmphasis, Pos: pos, Style: style}
168 | }
169 | 
170 | // HeadingNode holds heaing element with specific level(1-6).
171 | type HeadingNode struct {
172 | 	NodeType
173 | 	Pos
174 | 	Level int
175 | 	Text  string
176 | 	Nodes []Node
177 | }
178 | 
179 | // Render returns the html representation based on heading level.
180 | func (n *HeadingNode) Render() (s string) {
181 | 	for _, node := range n.Nodes {
182 | 		s += node.Render()
183 | 	}
184 | 	re := regexp.MustCompile(`[^\w]+`)
185 | 	id := re.ReplaceAllString(n.Text, "-")
186 | 	// ToLowerCase
187 | 	id = strings.ToLower(id)
188 | 	return fmt.Sprintf("<%[1]s id=\"%s\">%s</%[1]s>", "h"+strconv.Itoa(n.Level), id, s)
189 | }
190 | 
191 | func (p *parse) newHeading(pos Pos, level int, text string) *HeadingNode {
192 | 	return &HeadingNode{NodeType: NodeHeading, Pos: pos, Level: level, Text: p.text(text)}
193 | }
194 | 
195 | // Code holds CodeBlock node with specific lang field.
196 | type CodeNode struct {
197 | 	NodeType
198 | 	Pos
199 | 	Lang, Text string
200 | }
201 | 
202 | // Return the html representation of codeBlock
203 | func (n *CodeNode) Render() string {
204 | 	var attr string
205 | 	if n.Lang != "" {
206 | 		attr = fmt.Sprintf(" class=\"lang-%s\"", n.Lang)
207 | 	}
208 | 	code := fmt.Sprintf("<%[1]s%s>%s</%[1]s>", "code", attr, n.Text)
209 | 	return wrap("pre", code)
210 | }
211 | 
212 | func (p *parse) newCode(pos Pos, lang, text string) *CodeNode {
213 | 	// DRY: see `escape()` below
214 | 	text = strings.NewReplacer("<", "&lt;", ">", "&gt;", "\"", "&quot;", "&", "&amp;").Replace(text)
215 | 	return &CodeNode{NodeType: NodeCode, Pos: pos, Lang: lang, Text: text}
216 | }
217 | 
218 | // Link holds a tag with optional title
219 | type LinkNode struct {
220 | 	NodeType
221 | 	Pos
222 | 	Title, Href string
223 | 	Nodes       []Node
224 | }
225 | 
226 | // Return the html representation of link node
227 | func (n *LinkNode) Render() (s string) {
228 | 	for _, node := range n.Nodes {
229 | 		s += node.Render()
230 | 	}
231 | 	attrs := fmt.Sprintf("href=\"%s\"", n.Href)
232 | 	if n.Title != "" {
233 | 		attrs += fmt.Sprintf(" title=\"%s\"", n.Title)
234 | 	}
235 | 	return fmt.Sprintf("<a %s>%s</a>", attrs, s)
236 | }
237 | 
238 | func (p *parse) newLink(pos Pos, title, href string, nodes ...Node) *LinkNode {
239 | 	return &LinkNode{NodeType: NodeLink, Pos: pos, Title: p.text(title), Href: p.text(href), Nodes: nodes}
240 | }
241 | 
242 | // RefLink holds link with refrence to link definition
243 | type RefNode struct {
244 | 	NodeType
245 | 	Pos
246 | 	tr             *parse
247 | 	Text, Ref, Raw string
248 | 	Nodes          []Node
249 | }
250 | 
251 | // rendering based type
252 | func (n *RefNode) Render() string {
253 | 	var node Node
254 | 	ref := strings.ToLower(n.Ref)
255 | 	if l, ok := n.tr.links[ref]; ok {
256 | 		if n.Type() == NodeRefLink {
257 | 			node = n.tr.newLink(n.Pos, l.Title, l.Href, n.Nodes...)
258 | 		} else {
259 | 			node = n.tr.newImage(n.Pos, l.Title, l.Href, n.Text)
260 | 		}
261 | 	} else {
262 | 		node = n.tr.newText(n.Pos, n.Raw)
263 | 	}
264 | 	return node.Render()
265 | }
266 | 
267 | // newRefLink create new RefLink that suitable for link
268 | func (p *parse) newRefLink(typ itemType, pos Pos, raw, ref string, text []Node) *RefNode {
269 | 	return &RefNode{NodeType: NodeRefLink, Pos: pos, tr: p.root(), Raw: raw, Ref: ref, Nodes: text}
270 | }
271 | 
272 | // newRefImage create new RefLink that suitable for image
273 | func (p *parse) newRefImage(typ itemType, pos Pos, raw, ref, text string) *RefNode {
274 | 	return &RefNode{NodeType: NodeRefImage, Pos: pos, tr: p.root(), Raw: raw, Ref: ref, Text: text}
275 | }
276 | 
277 | // DefLinkNode refresent single reference to link-definition
278 | type DefLinkNode struct {
279 | 	NodeType
280 | 	Pos
281 | 	Name, Href, Title string
282 | }
283 | 
284 | // Deflink have no representation(Transparent node)
285 | func (n *DefLinkNode) Render() string {
286 | 	return ""
287 | }
288 | 
289 | func (p *parse) newDefLink(pos Pos, name, href, title string) *DefLinkNode {
290 | 	return &DefLinkNode{NodeType: NodeLink, Pos: pos, Name: name, Href: href, Title: title}
291 | }
292 | 
293 | // ImageNode represents an image element with optional alt and title attributes.
294 | type ImageNode struct {
295 | 	NodeType
296 | 	Pos
297 | 	Title, Src, Alt string
298 | }
299 | 
300 | // Render returns the html representation on image node
301 | func (n *ImageNode) Render() string {
302 | 	attrs := fmt.Sprintf("src=\"%s\" alt=\"%s\"", n.Src, n.Alt)
303 | 	if n.Title != "" {
304 | 		attrs += fmt.Sprintf(" title=\"%s\"", n.Title)
305 | 	}
306 | 	return fmt.Sprintf("<img %s>", attrs)
307 | }
308 | 
309 | func (p *parse) newImage(pos Pos, title, src, alt string) *ImageNode {
310 | 	return &ImageNode{NodeType: NodeImage, Pos: pos, Title: p.text(title), Src: p.text(src), Alt: p.text(alt)}
311 | }
312 | 
313 | // ListNode holds list items nodes in ordered or unordered states.
314 | type ListNode struct {
315 | 	NodeType
316 | 	Pos
317 | 	Ordered bool
318 | 	Items   []*ListItemNode
319 | }
320 | 
321 | func (n *ListNode) append(item *ListItemNode) {
322 | 	n.Items = append(n.Items, item)
323 | }
324 | 
325 | // Render returns the html representation of orderd(ol) or unordered(ul) list.
326 | func (n *ListNode) Render() (s string) {
327 | 	tag := "ul"
328 | 	if n.Ordered {
329 | 		tag = "ol"
330 | 	}
331 | 	for _, item := range n.Items {
332 | 		s += "\n" + item.Render()
333 | 	}
334 | 	s += "\n"
335 | 	return wrap(tag, s)
336 | }
337 | 
338 | func (p *parse) newList(pos Pos, ordered bool) *ListNode {
339 | 	return &ListNode{NodeType: NodeList, Pos: pos, Ordered: ordered}
340 | }
341 | 
342 | // ListItem represents single item in ListNode that may contains nested nodes.
343 | type ListItemNode struct {
344 | 	NodeType
345 | 	Pos
346 | 	Nodes []Node
347 | }
348 | 
349 | func (l *ListItemNode) append(n Node) {
350 | 	l.Nodes = append(l.Nodes, n)
351 | }
352 | 
353 | // Render returns the html representation of list-item
354 | func (l *ListItemNode) Render() (s string) {
355 | 	for _, node := range l.Nodes {
356 | 		s += node.Render()
357 | 	}
358 | 	return wrap("li", s)
359 | }
360 | 
361 | func (p *parse) newListItem(pos Pos) *ListItemNode {
362 | 	return &ListItemNode{NodeType: NodeListItem, Pos: pos}
363 | }
364 | 
365 | // TableNode represents table element contains head and body
366 | type TableNode struct {
367 | 	NodeType
368 | 	Pos
369 | 	Rows []*RowNode
370 | }
371 | 
372 | func (n *TableNode) append(row *RowNode) {
373 | 	n.Rows = append(n.Rows, row)
374 | }
375 | 
376 | // Render returns the html representation of a table
377 | func (n *TableNode) Render() string {
378 | 	var s string
379 | 	for i, row := range n.Rows {
380 | 		s += "\n"
381 | 		switch i {
382 | 		case 0:
383 | 			s += wrap("thead", "\n"+row.Render()+"\n")
384 | 		case 1:
385 | 			s += "<tbody>\n"
386 | 			fallthrough
387 | 		default:
388 | 			s += row.Render()
389 | 		}
390 | 	}
391 | 	s += "\n</tbody>\n"
392 | 	return wrap("table", s)
393 | }
394 | 
395 | func (p *parse) newTable(pos Pos) *TableNode {
396 | 	return &TableNode{NodeType: NodeTable, Pos: pos}
397 | }
398 | 
399 | // RowNode represnt tr that holds list of cell-nodes
400 | type RowNode struct {
401 | 	NodeType
402 | 	Pos
403 | 	Cells []*CellNode
404 | }
405 | 
406 | func (r *RowNode) append(cell *CellNode) {
407 | 	r.Cells = append(r.Cells, cell)
408 | }
409 | 
410 | // Render returns the html representation of table-row
411 | func (r *RowNode) Render() string {
412 | 	var s string
413 | 	for _, cell := range r.Cells {
414 | 		s += "\n" + cell.Render()
415 | 	}
416 | 	s += "\n"
417 | 	return wrap("tr", s)
418 | }
419 | 
420 | func (p *parse) newRow(pos Pos) *RowNode {
421 | 	return &RowNode{NodeType: NodeRow, Pos: pos}
422 | }
423 | 
424 | // AlignType identifies the aligment-type of specfic cell.
425 | type AlignType int
426 | 
427 | // Align returns itself and provides an easy default implementation
428 | // for embedding in a Node.
429 | func (t AlignType) Align() AlignType {
430 | 	return t
431 | }
432 | 
433 | // Alignment
434 | const (
435 | 	None AlignType = iota
436 | 	Right
437 | 	Left
438 | 	Center
439 | )
440 | 
441 | // Cell types
442 | const (
443 | 	Header = iota
444 | 	Data
445 | )
446 | 
447 | // CellNode represents table-data/cell that holds simple text(may be emphasis)
448 | // Note: the text in <th> elements are bold and centered by default.
449 | type CellNode struct {
450 | 	NodeType
451 | 	Pos
452 | 	AlignType
453 | 	Kind  int
454 | 	Nodes []Node
455 | }
456 | 
457 | // Render returns the html reprenestation of table-cell
458 | func (c *CellNode) Render() string {
459 | 	var s string
460 | 	tag := "td"
461 | 	if c.Kind == Header {
462 | 		tag = "th"
463 | 	}
464 | 	for _, node := range c.Nodes {
465 | 		s += node.Render()
466 | 	}
467 | 	return fmt.Sprintf("<%[1]s%s>%s</%[1]s>", tag, c.Style(), s)
468 | }
469 | 
470 | // Style return the cell-style based on alignment field
471 | func (c *CellNode) Style() string {
472 | 	s := " style=\"text-align:"
473 | 	switch c.Align() {
474 | 	case Right:
475 | 		s += "right\""
476 | 	case Left:
477 | 		s += "left\""
478 | 	case Center:
479 | 		s += "center\""
480 | 	default:
481 | 		s = ""
482 | 	}
483 | 	return s
484 | }
485 | 
486 | func (p *parse) newCell(pos Pos, kind int, align AlignType) *CellNode {
487 | 	return &CellNode{NodeType: NodeCell, Pos: pos, Kind: kind, AlignType: align}
488 | }
489 | 
490 | // BlockQuote represents block-quote tag.
491 | type BlockQuoteNode struct {
492 | 	NodeType
493 | 	Pos
494 | 	Nodes []Node
495 | }
496 | 
497 | // Render returns the html representation of BlockQuote
498 | func (n *BlockQuoteNode) Render() string {
499 | 	var s string
500 | 	for _, node := range n.Nodes {
501 | 		s += node.Render()
502 | 	}
503 | 	return wrap("blockquote", s)
504 | }
505 | 
506 | func (p *parse) newBlockQuote(pos Pos) *BlockQuoteNode {
507 | 	return &BlockQuoteNode{NodeType: NodeBlockQuote, Pos: pos}
508 | }
509 | 
510 | // CheckboxNode represents checked and unchecked checkbox tag.
511 | // Used in task lists.
512 | type CheckboxNode struct {
513 | 	NodeType
514 | 	Pos
515 | 	Checked bool
516 | }
517 | 
518 | // Render returns the html representation of checked and unchecked CheckBox.
519 | func (n *CheckboxNode) Render() string {
520 | 	s := "<input type=\"checkbox\""
521 | 	if n.Checked {
522 | 		s += " checked"
523 | 	}
524 | 	return s + ">"
525 | }
526 | 
527 | func (p *parse) newCheckbox(pos Pos, checked bool) *CheckboxNode {
528 | 	return &CheckboxNode{NodeType: NodeCheckbox, Pos: pos, Checked: checked}
529 | }
530 | 
531 | // Wrap text with specific tag.
532 | func wrap(tag, body string) string {
533 | 	return fmt.Sprintf("<%[1]s>%s</%[1]s>", tag, body)
534 | }
535 | 
536 | // Group all text configuration in one place(escaping, smartypants, etc..)
537 | func (p *parse) text(input string) string {
538 | 	opts := p.root().options
539 | 	if opts.Smartypants {
540 | 		input = smartypants(input)
541 | 	}
542 | 	if opts.Fractions {
543 | 		input = smartyfractions(input)
544 | 	}
545 | 	return escape(input)
546 | }
547 | 
548 | // Helper escaper
549 | func escape(str string) (cpy string) {
550 | 	emp := regexp.MustCompile(`&\w+;`)
551 | 	for i := 0; i < len(str); i++ {
552 | 		switch s := str[i]; s {
553 | 		case '>':
554 | 			cpy += "&gt;"
555 | 		case '"':
556 | 			cpy += "&quot;"
557 | 		case '\'':
558 | 			cpy += "&#39;"
559 | 		case '<':
560 | 			if res := reHTML.tag.FindString(str[i:]); res != "" {
561 | 				cpy += res
562 | 				i += len(res) - 1
563 | 			} else {
564 | 				cpy += "&lt;"
565 | 			}
566 | 		case '&':
567 | 			if res := emp.FindString(str[i:]); res != "" {
568 | 				cpy += res
569 | 				i += len(res) - 1
570 | 			} else {
571 | 				cpy += "&amp;"
572 | 			}
573 | 		default:
574 | 			cpy += str[i : i+1]
575 | 		}
576 | 	}
577 | 	return
578 | }
579 | 
580 | // Smartypants transformation helper, translate from marked.js
581 | func smartypants(text string) string {
582 | 	// em-dashes, en-dashes, ellipses
583 | 	re := strings.NewReplacer("---", "\u2014", "--", "\u2013", "...", "\u2026")
584 | 	text = re.Replace(text)
585 | 	// opening singles
586 | 	text = regexp.MustCompile("(^|[-\u2014/(\\[{\"\\s])'").ReplaceAllString(text, "$1\u2018")
587 | 	// closing singles & apostrophes
588 | 	text = strings.Replace(text, "'", "\u2019", -1)
589 | 	// opening doubles
590 | 	text = regexp.MustCompile("(^|[-\u2014/(\\[{\u2018\\s])\"").ReplaceAllString(text, "$1\u201c")
591 | 	// closing doubles
592 | 	text = strings.Replace(text, "\"", "\u201d", -1)
593 | 	return text
594 | }
595 | 
596 | // Smartyfractions transformation helper.
597 | func smartyfractions(text string) string {
598 | 	re := regexp.MustCompile(`(\d+)(/\d+)(/\d+|)`)
599 | 	return re.ReplaceAllStringFunc(text, func(str string) string {
600 | 		var match []string
601 | 		// If it's date like
602 | 		if match = re.FindStringSubmatch(str); match[3] != "" {
603 | 			return str
604 | 		}
605 | 		switch n := match[1] + match[2]; n {
606 | 		case "1/2", "1/3", "2/3", "1/4", "3/4", "1/5", "2/5", "3/5", "4/5",
607 | 			"1/6", "5/6", "1/7", "1/8", "3/8", "5/8", "7/8":
608 | 			return fmt.Sprintf("&frac%s;", strings.Replace(n, "/", "", 1))
609 | 		default:
610 | 			return fmt.Sprintf("<sup>%s</sup>&frasl;<sub>%s</sub>",
611 | 				match[1], strings.Replace(match[2], "/", "", 1))
612 | 		}
613 | 	})
614 | }
615 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
  1 | package mark
  2 | 
  3 | import (
  4 | 	"regexp"
  5 | 	"strings"
  6 | 	"unicode"
  7 | 	"unicode/utf8"
  8 | )
  9 | 
 10 | // parse holds the state of the parser.
 11 | type parse struct {
 12 | 	Nodes     []Node
 13 | 	lex       Lexer
 14 | 	options   *Options
 15 | 	tr        *parse
 16 | 	output    string
 17 | 	peekCount int
 18 | 	token     [3]item                 // three-token lookahead for parser
 19 | 	links     map[string]*DefLinkNode // Deflink parsing, used RefLinks
 20 | 	renderFn  map[NodeType]RenderFn   // Custom overridden fns
 21 | }
 22 | 
 23 | // Return new parser
 24 | func newParse(input string, opts *Options) *parse {
 25 | 	return &parse{
 26 | 		lex:      lex(input),
 27 | 		options:  opts,
 28 | 		links:    make(map[string]*DefLinkNode),
 29 | 		renderFn: make(map[NodeType]RenderFn),
 30 | 	}
 31 | }
 32 | 
 33 | // parse convert the raw text to Nodeparse.
 34 | func (p *parse) parse() {
 35 | Loop:
 36 | 	for {
 37 | 		var n Node
 38 | 		switch t := p.peek(); t.typ {
 39 | 		case itemEOF, itemError:
 40 | 			break Loop
 41 | 		case itemNewLine:
 42 | 			p.next()
 43 | 		case itemHr:
 44 | 			n = p.newHr(p.next().pos)
 45 | 		case itemHTML:
 46 | 			t = p.next()
 47 | 			n = p.newHTML(t.pos, t.val)
 48 | 		case itemDefLink:
 49 | 			n = p.parseDefLink()
 50 | 		case itemHeading, itemLHeading:
 51 | 			n = p.parseHeading()
 52 | 		case itemCodeBlock, itemGfmCodeBlock:
 53 | 			n = p.parseCodeBlock()
 54 | 		case itemList:
 55 | 			n = p.parseList()
 56 | 		case itemTable, itemLpTable:
 57 | 			n = p.parseTable()
 58 | 		case itemBlockQuote:
 59 | 			n = p.parseBlockQuote()
 60 | 		case itemIndent:
 61 | 			space := p.next()
 62 | 			// If it isn't followed by itemText
 63 | 			if p.peek().typ != itemText {
 64 | 				continue
 65 | 			}
 66 | 			p.backup2(space)
 67 | 			fallthrough
 68 | 		// itemText
 69 | 		default:
 70 | 			tmp := p.newParagraph(t.pos)
 71 | 			tmp.Nodes = p.parseText(p.next().val + p.scanLines())
 72 | 			n = tmp
 73 | 		}
 74 | 		if n != nil {
 75 | 			p.append(n)
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | // Root getter
 81 | func (p *parse) root() *parse {
 82 | 	if p.tr == nil {
 83 | 		return p
 84 | 	}
 85 | 	return p.tr.root()
 86 | }
 87 | 
 88 | // Render parse nodes to the wanted output
 89 | func (p *parse) render() {
 90 | 	var output string
 91 | 	for i, node := range p.Nodes {
 92 | 		// If there's a custom render function, use it instead.
 93 | 		if fn, ok := p.renderFn[node.Type()]; ok {
 94 | 			output = fn(node)
 95 | 		} else {
 96 | 			output = node.Render()
 97 | 		}
 98 | 		p.output += output
 99 | 		if output != "" && i != len(p.Nodes)-1 {
100 | 			p.output += "\n"
101 | 		}
102 | 	}
103 | }
104 | 
105 | // append new node to nodes-list
106 | func (p *parse) append(n Node) {
107 | 	p.Nodes = append(p.Nodes, n)
108 | }
109 | 
110 | // next returns the next token
111 | func (p *parse) next() item {
112 | 	if p.peekCount > 0 {
113 | 		p.peekCount--
114 | 	} else {
115 | 		p.token[0] = p.lex.nextItem()
116 | 	}
117 | 	return p.token[p.peekCount]
118 | }
119 | 
120 | // peek returns but does not consume the next token.
121 | func (p *parse) peek() item {
122 | 	if p.peekCount > 0 {
123 | 		return p.token[p.peekCount-1]
124 | 	}
125 | 	p.peekCount = 1
126 | 	p.token[0] = p.lex.nextItem()
127 | 	return p.token[0]
128 | }
129 | 
130 | // backup backs the input stream tp one token
131 | func (p *parse) backup() {
132 | 	p.peekCount++
133 | }
134 | 
135 | // backup2 backs the input stream up two tokens.
136 | // The zeroth token is already there.
137 | func (p *parse) backup2(t1 item) {
138 | 	p.token[1] = t1
139 | 	p.peekCount = 2
140 | }
141 | 
142 | // parseText
143 | func (p *parse) parseText(input string) (nodes []Node) {
144 | 	// Trim whitespaces that not a line-break
145 | 	input = regexp.MustCompile(`(?m)^ +| +(\n|$)`).ReplaceAllStringFunc(input, func(s string) string {
146 | 		if reBr.MatchString(s) {
147 | 			return s
148 | 		}
149 | 		return strings.Replace(s, " ", "", -1)
150 | 	})
151 | 	l := lexInline(input)
152 | 	for token := range l.items {
153 | 		var node Node
154 | 		switch token.typ {
155 | 		case itemBr:
156 | 			node = p.newBr(token.pos)
157 | 		case itemStrong, itemItalic, itemStrike, itemCode:
158 | 			node = p.parseEmphasis(token.typ, token.pos, token.val)
159 | 		case itemLink, itemAutoLink, itemGfmLink:
160 | 			var title, href string
161 | 			var text []Node
162 | 			if token.typ == itemLink {
163 | 				match := reLink.FindStringSubmatch(token.val)
164 | 				text = p.parseText(match[1])
165 | 				href, title = match[2], match[3]
166 | 			} else {
167 | 				var match []string
168 | 				if token.typ == itemGfmLink {
169 | 					match = reGfmLink.FindStringSubmatch(token.val)
170 | 				} else {
171 | 					match = reAutoLink.FindStringSubmatch(token.val)
172 | 				}
173 | 				href = match[1]
174 | 				text = append(text, p.newText(token.pos, match[1]))
175 | 			}
176 | 			node = p.newLink(token.pos, title, href, text...)
177 | 		case itemImage:
178 | 			match := reImage.FindStringSubmatch(token.val)
179 | 			node = p.newImage(token.pos, match[3], match[2], match[1])
180 | 		case itemRefLink, itemRefImage:
181 | 			match := reRefLink.FindStringSubmatch(token.val)
182 | 			text, ref := match[1], match[2]
183 | 			if ref == "" {
184 | 				ref = text
185 | 			}
186 | 			if token.typ == itemRefLink {
187 | 				node = p.newRefLink(token.typ, token.pos, token.val, ref, p.parseText(text))
188 | 			} else {
189 | 				node = p.newRefImage(token.typ, token.pos, token.val, ref, text)
190 | 			}
191 | 		case itemHTML:
192 | 			node = p.newHTML(token.pos, token.val)
193 | 		default:
194 | 			node = p.newText(token.pos, token.val)
195 | 		}
196 | 		nodes = append(nodes, node)
197 | 	}
198 | 	return nodes
199 | }
200 | 
201 | // parse inline emphasis
202 | func (p *parse) parseEmphasis(typ itemType, pos Pos, val string) *EmphasisNode {
203 | 	var re *regexp.Regexp
204 | 	switch typ {
205 | 	case itemStrike:
206 | 		re = reStrike
207 | 	case itemStrong:
208 | 		re = reStrong
209 | 	case itemCode:
210 | 		re = reCode
211 | 	case itemItalic:
212 | 		re = reItalic
213 | 	}
214 | 	node := p.newEmphasis(pos, typ)
215 | 	match := re.FindStringSubmatch(val)
216 | 	text := match[len(match)-1]
217 | 	if text == "" {
218 | 		text = match[1]
219 | 	}
220 | 	node.Nodes = p.parseText(text)
221 | 	return node
222 | }
223 | 
224 | // parse heading block
225 | func (p *parse) parseHeading() (node *HeadingNode) {
226 | 	token := p.next()
227 | 	level := 1
228 | 	var text string
229 | 	if token.typ == itemHeading {
230 | 		match := reHeading.FindStringSubmatch(token.val)
231 | 		level, text = len(match[1]), match[2]
232 | 	} else {
233 | 		match := reLHeading.FindStringSubmatch(token.val)
234 | 		// using equal signs for first-level, and dashes for second-level.
235 | 		text = match[1]
236 | 		if match[2] == "-" {
237 | 			level = 2
238 | 		}
239 | 	}
240 | 	node = p.newHeading(token.pos, level, text)
241 | 	node.Nodes = p.parseText(text)
242 | 	return
243 | }
244 | 
245 | func (p *parse) parseDefLink() *DefLinkNode {
246 | 	token := p.next()
247 | 	match := reDefLink.FindStringSubmatch(token.val)
248 | 	name := strings.ToLower(match[1])
249 | 	// name(lowercase), href, title
250 | 	n := p.newDefLink(token.pos, name, match[2], match[3])
251 | 	// store in links
252 | 	links := p.root().links
253 | 	if _, ok := links[name]; !ok {
254 | 		links[name] = n
255 | 	}
256 | 	return n
257 | }
258 | 
259 | // parse codeBlock
260 | func (p *parse) parseCodeBlock() *CodeNode {
261 | 	var lang, text string
262 | 	token := p.next()
263 | 	if token.typ == itemGfmCodeBlock {
264 | 		codeStart := reGfmCode.FindStringSubmatch(token.val)
265 | 		lang = codeStart[3]
266 | 		text = token.val[len(codeStart[0]):]
267 | 	} else {
268 | 		text = reCodeBlock.trim(token.val, "")
269 | 	}
270 | 	return p.newCode(token.pos, lang, text)
271 | }
272 | 
273 | func (p *parse) parseBlockQuote() (n *BlockQuoteNode) {
274 | 	token := p.next()
275 | 	// replacer
276 | 	re := regexp.MustCompile(`(?m)^ *> ?`)
277 | 	raw := re.ReplaceAllString(token.val, "")
278 | 	// TODO(a8m): doesn't work right now with defLink(inside the blockQuote)
279 | 	tr := &parse{lex: lex(raw), tr: p}
280 | 	tr.parse()
281 | 	n = p.newBlockQuote(token.pos)
282 | 	n.Nodes = tr.Nodes
283 | 	return
284 | }
285 | 
286 | // parse list
287 | func (p *parse) parseList() *ListNode {
288 | 	token := p.next()
289 | 	list := p.newList(token.pos, isDigit(token.val))
290 | Loop:
291 | 	for {
292 | 		switch token = p.peek(); token.typ {
293 | 		case itemLooseItem, itemListItem:
294 | 			list.append(p.parseListItem())
295 | 		default:
296 | 			break Loop
297 | 		}
298 | 	}
299 | 	return list
300 | }
301 | 
302 | // parse listItem
303 | func (p *parse) parseListItem() *ListItemNode {
304 | 	token := p.next()
305 | 	item := p.newListItem(token.pos)
306 | 	token.val = strings.TrimSpace(token.val)
307 | 	if p.isTaskItem(token.val) {
308 | 		item.Nodes = p.parseTaskItem(token)
309 | 		return item
310 | 	}
311 | 	tr := &parse{lex: lex(token.val), tr: p}
312 | 	tr.parse()
313 | 	for _, node := range tr.Nodes {
314 | 		// wrap with paragraph only when it's a loose item
315 | 		if n, ok := node.(*ParagraphNode); ok && token.typ == itemListItem {
316 | 			item.Nodes = append(item.Nodes, n.Nodes...)
317 | 		} else {
318 | 			item.append(node)
319 | 		}
320 | 	}
321 | 	return item
322 | }
323 | 
324 | // parseTaskItem parses list item as a task item.
325 | func (p *parse) parseTaskItem(token item) []Node {
326 | 	checkbox := p.newCheckbox(token.pos, token.val[1] == 'x')
327 | 	token.val = strings.TrimSpace(token.val[3:])
328 | 	return append([]Node{checkbox}, p.parseText(token.val)...)
329 | }
330 | 
331 | // isTaskItem tests if the given string is list task item.
332 | func (p *parse) isTaskItem(s string) bool {
333 | 	if len(s) < 5 || s[0] != '[' || (s[1] != 'x' && s[1] != ' ') || s[2] != ']' {
334 | 		return false
335 | 	}
336 | 	return "" != strings.TrimSpace(s[3:])
337 | }
338 | 
339 | // parse table
340 | func (p *parse) parseTable() *TableNode {
341 | 	table := p.newTable(p.next().pos)
342 | 	// Align	[ None, Left, Right, ... ]
343 | 	// Header	[ Cells: [ ... ] ]
344 | 	// Data:	[ Rows: [ Cells: [ ... ] ] ]
345 | 	rows := struct {
346 | 		Align  []AlignType
347 | 		Header []item
348 | 		Cells  [][]item
349 | 	}{}
350 | Loop:
351 | 	for i := 0; ; {
352 | 		switch token := p.next(); token.typ {
353 | 		case itemTableRow:
354 | 			i++
355 | 			if i > 2 {
356 | 				rows.Cells = append(rows.Cells, []item{})
357 | 			}
358 | 		case itemTableCell:
359 | 			// Header
360 | 			if i == 1 {
361 | 				rows.Header = append(rows.Header, token)
362 | 				// Alignment
363 | 			} else if i == 2 {
364 | 				rows.Align = append(rows.Align, parseAlign(token.val))
365 | 				// Data
366 | 			} else {
367 | 				pos := i - 3
368 | 				rows.Cells[pos] = append(rows.Cells[pos], token)
369 | 			}
370 | 		default:
371 | 			p.backup()
372 | 			break Loop
373 | 		}
374 | 	}
375 | 	// Tranform to nodes
376 | 	table.append(p.parseCells(Header, rows.Header, rows.Align))
377 | 	// Table body
378 | 	for _, row := range rows.Cells {
379 | 		table.append(p.parseCells(Data, row, rows.Align))
380 | 	}
381 | 	return table
382 | }
383 | 
384 | // parse cells and return new row
385 | func (p *parse) parseCells(kind int, items []item, align []AlignType) *RowNode {
386 | 	var row *RowNode
387 | 	for i, item := range items {
388 | 		if i == 0 {
389 | 			row = p.newRow(item.pos)
390 | 		}
391 | 		cell := p.newCell(item.pos, kind, align[i])
392 | 		cell.Nodes = p.parseText(item.val)
393 | 		row.append(cell)
394 | 	}
395 | 	return row
396 | }
397 | 
398 | // Used to consume lines(itemText) for a continues paragraphs
399 | func (p *parse) scanLines() (s string) {
400 | 	for {
401 | 		tkn := p.next()
402 | 		if tkn.typ == itemText || tkn.typ == itemIndent {
403 | 			s += tkn.val
404 | 		} else if tkn.typ == itemNewLine {
405 | 			if t := p.peek().typ; t != itemText && t != itemIndent {
406 | 				p.backup2(tkn)
407 | 				break
408 | 			}
409 | 			s += tkn.val
410 | 		} else {
411 | 			p.backup()
412 | 			break
413 | 		}
414 | 	}
415 | 	return
416 | }
417 | 
418 | // get align-string and return the align type of it
419 | func parseAlign(s string) (typ AlignType) {
420 | 	sfx, pfx := strings.HasSuffix(s, ":"), strings.HasPrefix(s, ":")
421 | 	switch {
422 | 	case sfx && pfx:
423 | 		typ = Center
424 | 	case sfx:
425 | 		typ = Right
426 | 	case pfx:
427 | 		typ = Left
428 | 	}
429 | 	return
430 | }
431 | 
432 | // test if given string is digit
433 | func isDigit(s string) bool {
434 | 	r, _ := utf8.DecodeRuneInString(s)
435 | 	return unicode.IsDigit(r)
436 | }
437 | 


--------------------------------------------------------------------------------
/parser_test.go:
--------------------------------------------------------------------------------
  1 | package mark
  2 | 
  3 | import (
  4 | 	"testing"
  5 | )
  6 | 
  7 | type parseTest struct {
  8 | 	name  string
  9 | 	items []item
 10 | 	nodes []NodeType
 11 | }
 12 | 
 13 | type mockLexer struct {
 14 | 	items []item
 15 | }
 16 | 
 17 | func (l *mockLexer) nextItem() (t item) {
 18 | 	if len(l.items) == 0 {
 19 | 		return item{itemEOF, 0, ""}
 20 | 	}
 21 | 	t, l.items = l.items[0], l.items[1:]
 22 | 	return
 23 | }
 24 | 
 25 | func newMockLex(items []item) *mockLexer {
 26 | 	return &mockLexer{items: items}
 27 | }
 28 | 
 29 | var blockparseTests = []parseTest{
 30 | 	{"eof", []item{}, []NodeType{}},
 31 | 	{"text-1",
 32 | 		[]item{item{itemText, 0, "hello"}},
 33 | 		[]NodeType{NodeParagraph},
 34 | 	},
 35 | 	{"text-2",
 36 | 		[]item{
 37 | 			item{itemText, 0, "hello"},
 38 | 			item{itemNewLine, 0, "\n"},
 39 | 			item{itemText, 0, "world"},
 40 | 		},
 41 | 		[]NodeType{NodeParagraph},
 42 | 	},
 43 | 	{"text-3",
 44 | 		[]item{
 45 | 			item{itemText, 0, "hello"},
 46 | 			item{itemNewLine, 0, "\n"},
 47 | 			item{itemNewLine, 0, "\n\n"},
 48 | 			item{itemText, 0, "world"},
 49 | 		},
 50 | 		[]NodeType{NodeParagraph, NodeParagraph},
 51 | 	},
 52 | 	{"header",
 53 | 		[]item{
 54 | 			item{itemHeading, 0, "# Hello"},
 55 | 		},
 56 | 		[]NodeType{NodeHeading},
 57 | 	},
 58 | 	{"code-block",
 59 | 		[]item{
 60 | 			item{itemCodeBlock, 0, "    js\n    hello"},
 61 | 		},
 62 | 		[]NodeType{NodeCode},
 63 | 	},
 64 | 	{"table",
 65 | 		[]item{
 66 | 			item{itemTable, 0, ""},
 67 | 		},
 68 | 		[]NodeType{NodeTable},
 69 | 	},
 70 | 	{"list",
 71 | 		[]item{
 72 | 			item{itemList, 0, "-"},
 73 | 			item{itemListItem, 0, "hello"},
 74 | 		},
 75 | 		[]NodeType{NodeList},
 76 | 	},
 77 | 	{"HTML",
 78 | 		[]item{
 79 | 			item{itemHTML, 0, "<hello>\nworld</hello>"},
 80 | 		},
 81 | 		[]NodeType{NodeHTML},
 82 | 	},
 83 | }
 84 | 
 85 | func collectNodes(t *parseTest) []Node {
 86 | 	tr := &parse{
 87 | 		lex:     newMockLex(t.items),
 88 | 		links:   make(map[string]*DefLinkNode),
 89 | 		options: DefaultOptions(),
 90 | 	}
 91 | 	tr.parse()
 92 | 	return tr.Nodes
 93 | }
 94 | 
 95 | func equalTypes(n1 []Node, n2 []NodeType) bool {
 96 | 	if len(n1) != len(n2) {
 97 | 		return false
 98 | 	}
 99 | 	for i := range n1 {
100 | 		if n1[i].Type() != n2[i] {
101 | 			return false
102 | 		}
103 | 	}
104 | 	return true
105 | }
106 | 
107 | func TestBlocksparse(t *testing.T) {
108 | 	for _, test := range blockparseTests {
109 | 		nodes := collectNodes(&test)
110 | 		if !equalTypes(nodes, test.nodes) {
111 | 			t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, nodes, test.nodes)
112 | 		}
113 | 	}
114 | }
115 | 


--------------------------------------------------------------------------------
/test/auto_links.html:
--------------------------------------------------------------------------------
1 | <p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
2 | 
3 | <ul>
4 | <li>In a list?</li>
5 | <li><a href="http://example.com/">http://example.com/</a></li>
6 | <li>It should.</li>
7 | </ul>


--------------------------------------------------------------------------------
/test/auto_links.text:
--------------------------------------------------------------------------------
1 | Link: <http://example.com/>.
2 | 
3 | * In a list?
4 | * <http://example.com/>
5 | * It should.


--------------------------------------------------------------------------------
/test/backslash_escapes.html:
--------------------------------------------------------------------------------
 1 | <p>These should all get escaped:</p>
 2 | 
 3 | <p>Backtick: `</p>
 4 | 
 5 | <p>Asterisk: *</p>
 6 | 
 7 | <p>Underscore: _</p>
 8 | 
 9 | <p>Left brace: {</p>
10 | 
11 | <p>Right brace: }</p>
12 | 
13 | <p>Left bracket: [</p>
14 | 
15 | <p>Right bracket: ]</p>
16 | 
17 | <p>Left paren: (</p>
18 | 
19 | <p>Right paren: )</p>
20 | 
21 | <p>Hash: #</p>
22 | 
23 | <p>Period: .</p>
24 | 
25 | <p>Bang: !</p>
26 | 
27 | <p>Plus: +</p>
28 | 
29 | <p>Minus: -</p>
30 | 
31 | <p>These should not, because they occur within a code block:</p>
32 | 
33 | <pre><code>Backslash: \\
34 | 
35 | Backtick: \`
36 | 
37 | Asterisk: \*
38 | 
39 | Underscore: \_
40 | 
41 | Left brace: \{
42 | 
43 | Right brace: \}
44 | 
45 | Left bracket: \[
46 | 
47 | Right bracket: \]
48 | 
49 | Left paren: \(
50 | 
51 | Right paren: \)
52 | 
53 | Greater-than: \&gt;
54 | 
55 | Hash: \#
56 | 
57 | Period: \.
58 | 
59 | Bang: \!
60 | 
61 | Plus: \+
62 | 
63 | Minus: \-
64 | </code></pre>
65 | 


--------------------------------------------------------------------------------
/test/backslash_escapes.text:
--------------------------------------------------------------------------------
 1 | These should all get escaped:
 2 | 
 3 | Backtick: \`
 4 | 
 5 | Asterisk: \*
 6 | 
 7 | Underscore: \_
 8 | 
 9 | Left brace: \{
10 | 
11 | Right brace: \}
12 | 
13 | Left bracket: \[
14 | 
15 | Right bracket: \]
16 | 
17 | Left paren: \(
18 | 
19 | Right paren: \)
20 | 
21 | Hash: \#
22 | 
23 | Period: \.
24 | 
25 | Bang: \!
26 | 
27 | Plus: \+
28 | 
29 | Minus: \-
30 | 
31 | These should not, because they occur within a code block:
32 | 
33 | 	Backslash: \\
34 | 
35 | 	Backtick: \`
36 | 
37 | 	Asterisk: \*
38 | 
39 | 	Underscore: \_
40 | 
41 | 	Left brace: \{
42 | 
43 | 	Right brace: \}
44 | 
45 | 	Left bracket: \[
46 | 
47 | 	Right bracket: \]
48 | 
49 | 	Left paren: \(
50 | 
51 | 	Right paren: \)
52 | 
53 | 	Greater-than: \>
54 | 
55 | 	Hash: \#
56 | 
57 | 	Period: \.
58 | 
59 | 	Bang: \!
60 | 
61 | 	Plus: \+
62 | 
63 | 	Minus: \-
64 | 


--------------------------------------------------------------------------------
/test/blockquote_list_item.html:
--------------------------------------------------------------------------------
 1 | <p>This fails in markdown.pl and upskirt:</p>
 2 | 
 3 | <ul>
 4 | 
 5 | <li><p>hello</p>
 6 | <blockquote>
 7 | <p>world</p>
 8 | </blockquote>
 9 | </li>
10 | 
11 | <li><p>foo</p>
12 | <blockquote>
13 | <p>bar</p>
14 | </blockquote>
15 | </li>
16 | 
17 | </ul>


--------------------------------------------------------------------------------
/test/blockquote_list_item.text:
--------------------------------------------------------------------------------
1 | This fails in markdown.pl and upskirt:
2 | 
3 | * hello
4 |   > world
5 | 
6 | * foo
7 | > bar


--------------------------------------------------------------------------------
/test/blockquotes_code_blocks.html:
--------------------------------------------------------------------------------
 1 | <blockquote>
 2 | <p>Example:</p>
 3 | <pre><code>sub status {
 4 |     print working
 5 | }
 6 | </code></pre><p>Or:</p>
 7 | <pre><code>sub status {
 8 |     return working
 9 | }
10 | </code></pre></blockquote>


--------------------------------------------------------------------------------
/test/blockquotes_code_blocks.text:
--------------------------------------------------------------------------------
 1 | > Example:
 2 | >
 3 | >     sub status {
 4 | >         print working
 5 | >     }
 6 | >
 7 | > Or:
 8 | >
 9 | >     sub status {
10 | >         return working
11 | >     }


--------------------------------------------------------------------------------
/test/blockquotes_def.html:
--------------------------------------------------------------------------------
1 | <blockquote>
2 | <p>foo
3 | bar</p>
4 | </blockquote>
5 | <blockquote>
6 | <p>bar</p>
7 | </blockquote>


--------------------------------------------------------------------------------
/test/blockquotes_def.text:
--------------------------------------------------------------------------------
1 | > foo
2 | > bar
3 | [1]: foo
4 | > bar


--------------------------------------------------------------------------------
/test/blockquotes_nested.html:
--------------------------------------------------------------------------------
1 | <blockquote>
2 | <p>foo</p>
3 | <blockquote>
4 | <p>bar</p>
5 | </blockquote>
6 | <p>foo</p>
7 | </blockquote>


--------------------------------------------------------------------------------
/test/blockquotes_nested.text:
--------------------------------------------------------------------------------
1 | > foo
2 | >
3 | > > bar
4 | >
5 | > foo


--------------------------------------------------------------------------------
/test/blockquotes_text.html:
--------------------------------------------------------------------------------
 1 | <p>Blockquotes Text</p>
 2 | 
 3 | <blockquote>
 4 | <p>Hello
 5 | World.
 6 | Ariel here</p>
 7 | </blockquote>
 8 | 
 9 | <hr>
10 | 
11 | <blockquote>
12 | <p>Hello
13 | World.
14 | Ariel here</p>
15 | </blockquote>
16 | 
17 | <hr>
18 | 
19 | <blockquote>
20 | <p><strong>foo</strong> bar <em>baz</em>
21 | hello</p>
22 | <h2 id="h2">h2</h2>
23 | </blockquote>


--------------------------------------------------------------------------------
/test/blockquotes_text.text:
--------------------------------------------------------------------------------
 1 | Blockquotes Text
 2 | 
 3 | > Hello
 4 | World.
 5 | Ariel here
 6 | 
 7 | ---
 8 | 
 9 | > Hello
10 | > World.
11 | > Ariel here
12 | 
13 | ___
14 | 
15 | > **foo** bar _baz_
16 | > hello
17 | > ## h2


--------------------------------------------------------------------------------
/test/code_blocks.html:
--------------------------------------------------------------------------------
 1 | <pre><code>code block on the first line
 2 | </code></pre>
 3 | 
 4 | <p>Regular text.</p>
 5 | 
 6 | <pre><code>code block indented by spaces
 7 | </code></pre>
 8 | 
 9 | <p>Regular text.</p>
10 | 
11 | <pre><code>the lines in this block  
12 | all contain trailing spaces  
13 | </code></pre>
14 | 
15 | <p>Regular Text.</p>
16 | 
17 | <pre><code>code block on the last line
18 | </code></pre>


--------------------------------------------------------------------------------
/test/code_blocks.text:
--------------------------------------------------------------------------------
 1 | 	code block on the first line
 2 | 	
 3 | Regular text.
 4 | 
 5 |     code block indented by spaces
 6 | 
 7 | Regular text.
 8 | 
 9 | 	the lines in this block  
10 | 	all contain trailing spaces  
11 | 
12 | Regular Text.
13 | 
14 | 	code block on the last line
15 | 


--------------------------------------------------------------------------------
/test/code_spans.html:
--------------------------------------------------------------------------------
1 | <p><code>test</code></p>
2 | 
3 | <p>This is paragraph contians <code>span block</code>.</p>
4 | 
5 | 


--------------------------------------------------------------------------------
/test/code_spans.text:
--------------------------------------------------------------------------------
1 | `test`
2 | 
3 | This is paragraph contians `span block`.
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/test/emphasis.html:
--------------------------------------------------------------------------------
 1 | <p>hello <strong>world</strong></p>
 2 | 
 3 | <p>hello <strong>world</strong></p>
 4 | 
 5 | <p>hello <em>world</em></p>
 6 | 
 7 | <p>hello <em>world</em></p>
 8 | 
 9 | <p><strong><em>hello</em></strong> world</p>
10 | 
11 | <p><strong><em>hello</em></strong> world</p>
12 | 
13 | <p><strong><em>hello</em></strong> world</p>
14 | 
15 | <p><strong><em>hello</em></strong> world</p>
16 | 
17 | <p><strong>hello*</strong> world</p>
18 | 


--------------------------------------------------------------------------------
/test/emphasis.text:
--------------------------------------------------------------------------------
 1 | hello **world**
 2 | 
 3 | hello __world__
 4 | 
 5 | hello _world_
 6 | 
 7 | hello *world*
 8 | 
 9 | ***hello*** world
10 | 
11 | ___hello___ world
12 | 
13 | __*hello*__ world
14 | 
15 | **_hello_** world
16 | 
17 | __hello*__ world
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/test/gfm_code_blocks.html:
--------------------------------------------------------------------------------
1 | <pre><code class="lang-js">var foo = 1
2 | </code></pre>
3 | 
4 | <pre><code class="lang-bash">echo hello
5 | </code></pre>


--------------------------------------------------------------------------------
/test/gfm_code_blocks.text:
--------------------------------------------------------------------------------
1 | ``` js
2 | var foo = 1
3 | ```
4 | 
5 | ~~~bash
6 | echo hello
7 | ~~~
8 | 


--------------------------------------------------------------------------------
/test/gfm_del.html:
--------------------------------------------------------------------------------
1 | <p>hello <del>world</del></p>
2 | 
3 | <p>foo ~bar~ <del>baz</del></p>
4 | 


--------------------------------------------------------------------------------
/test/gfm_del.text:
--------------------------------------------------------------------------------
1 | hello ~~world~~
2 | 
3 | foo ~bar~ ~~baz~~
4 | 
5 | 


--------------------------------------------------------------------------------
/test/gfm_tables.html:
--------------------------------------------------------------------------------
  1 | <table>
  2 | <thead>
  3 | <tr>
  4 | <th>Heading 1</th>
  5 | <th>Heading 2</th>
  6 | </tr>
  7 | </thead>
  8 | <tbody>
  9 | <tr>
 10 | <td>Cell 1</td>
 11 | <td>Cell 2</td>
 12 | </tr>
 13 | <tr>
 14 | <td>Cell 3</td>
 15 | <td>Cell 4</td>
 16 | </tr>
 17 | </tbody>
 18 | </table>
 19 | 
 20 | <table>
 21 | <thead>
 22 | <tr>
 23 | <th style="text-align:center">Header 1</th>
 24 | <th style="text-align:right">Header 2</th>
 25 | <th style="text-align:left">Header 3</th>
 26 | <th>Header 4</th>
 27 | </tr>
 28 | </thead>
 29 | <tbody>
 30 | <tr>
 31 | <td style="text-align:center">Cell 1</td>
 32 | <td style="text-align:right">Cell 2</td>
 33 | <td style="text-align:left">Cell 3</td>
 34 | <td>Cell 4</td>
 35 | </tr>
 36 | <tr>
 37 | <td style="text-align:center">Cell 5</td>
 38 | <td style="text-align:right">Cell 6</td>
 39 | <td style="text-align:left">Cell 7</td>
 40 | <td>Cell 8</td>
 41 | </tr>
 42 | </tbody>
 43 | </table>
 44 | 
 45 | <table>
 46 | <thead>
 47 | <tr>
 48 | <th>Header 1</th>
 49 | <th>Header 2</th>
 50 | </tr>
 51 | </thead>
 52 | <tbody>
 53 | <tr>
 54 | <td>Cell 1</td>
 55 | <td>Cell 2</td>
 56 | </tr>
 57 | <tr>
 58 | <td>Cell 3</td>
 59 | <td>Cell 4</td>
 60 | </tr>
 61 | </tbody>
 62 | </table>
 63 | 
 64 | <table>
 65 | <thead>
 66 | <tr>
 67 | <th style="text-align:left">Header 1</th>
 68 | <th style="text-align:center">Header 2</th>
 69 | <th style="text-align:right">Header 3</th>
 70 | <th>Header 4</th>
 71 | </tr>
 72 | </thead>
 73 | <tbody>
 74 | <tr>
 75 | <td style="text-align:left">Cell 1</td>
 76 | <td style="text-align:center">Cell 2</td>
 77 | <td style="text-align:right">Cell 3</td>
 78 | <td>Cell 4</td>
 79 | </tr>
 80 | <tr>
 81 | <td style="text-align:left"><em>Cell 5</em></td>
 82 | <td style="text-align:center">Cell 6</td>
 83 | <td style="text-align:right">Cell 7</td>
 84 | <td>Cell 8</td>
 85 | </tr>
 86 | </tbody>
 87 | </table>
 88 | 
 89 | <table>
 90 | <thead>
 91 | <tr>
 92 | <th>Id</th>
 93 | <th>Description</th>
 94 | </tr>
 95 | </thead>
 96 | <tbody>
 97 | <tr>
 98 | <td>12313</td>
 99 | <td><strong>foo</strong> <em>bar</em> <code>baz</code></td>
100 | </tr>
101 | <tr>
102 | <td>67522</td>
103 | <td><strong>foo</strong> <code>bar</code> <em>baz</em></td>
104 | </tr>
105 | </tbody>
106 | </table>
107 | 


--------------------------------------------------------------------------------
/test/gfm_tables.text:
--------------------------------------------------------------------------------
 1 | | Heading 1 | Heading 2
 2 | | --------- | ---------
 3 | | Cell 1    | Cell 2
 4 | | Cell 3    | Cell 4
 5 | 
 6 | | Header 1 | Header 2 | Header 3 | Header 4 |
 7 | | :------: | -------: | :------- | -------- |
 8 | | Cell 1   | Cell 2   | Cell 3   | Cell 4   |
 9 | | Cell 5   | Cell 6   | Cell 7   | Cell 8   |
10 | 
11 | Header 1 | Header 2
12 | -------- | --------
13 | Cell 1   | Cell 2
14 | Cell 3   | Cell 4
15 | 
16 | Header 1|Header 2|Header 3|Header 4
17 | :-------|:------:|-------:|--------
18 | Cell 1  |Cell 2  |Cell 3  |Cell 4
19 | *Cell 5*|Cell 6  |Cell 7  |Cell 8
20 | 
21 | Id       | Description
22 | ---------|-----------------------
23 | 12313    | **foo** _bar_ `baz`
24 | 67522    | __foo__ `bar` *baz*
25 | 


--------------------------------------------------------------------------------
/test/headers.html:
--------------------------------------------------------------------------------
 1 | <h1 id="hello">Hello</h1>
 2 | 
 3 | <h2 id="hello-this-is-heading">Hello this is heading</h2>
 4 | 
 5 | <h3 id="hello-hello">Hello         hello</h3>
 6 | 
 7 | <p>#foo</p>
 8 | 
 9 | <p>#bar</p>
10 | 
11 | <h1 id="hello">Hello</h1>
12 | 
13 | <h2 id="hello">Hello</h2>


--------------------------------------------------------------------------------
/test/headers.text:
--------------------------------------------------------------------------------
 1 | # Hello
 2 | 
 3 | ## Hello this is heading
 4 | 
 5 | ### Hello         hello
 6 | 
 7 | #foo
 8 | 
 9 | #bar
10 | 
11 | 
12 | Hello
13 | =====
14 | 
15 | Hello
16 | -----


--------------------------------------------------------------------------------
/test/hr.html:
--------------------------------------------------------------------------------
 1 | <p>Dash</p>
 2 | 
 3 | <hr>
 4 | 
 5 | <hr>
 6 | 
 7 | <hr>
 8 | 
 9 | <hr>
10 | 
11 | <hr>
12 | 
13 | <hr>
14 | 
15 | <pre><code>---
16 | </code></pre>


--------------------------------------------------------------------------------
/test/hr.text:
--------------------------------------------------------------------------------
 1 | Dash
 2 | 
 3 | ---
 4 | 
 5 | ___
 6 | 
 7 | * * *
 8 | 
 9 |    ---
10 | 
11 |    ***
12 | 
13 |    ___
14 | 
15 |     ---


--------------------------------------------------------------------------------
/test/html_block.html:
--------------------------------------------------------------------------------
1 | <p><span>foo</span>bar</p>
2 | <div>
3 |   hello bar
4 | </div>
5 | 
6 | <directive src="foo.img"/>
7 | 
8 | <p>hello <i>world</i></p>


--------------------------------------------------------------------------------
/test/html_block.text:
--------------------------------------------------------------------------------
1 | <span>foo</span>bar
2 | 
3 | <div>
4 |   hello bar
5 | </div>
6 | 
7 | <directive src="foo.img"/>
8 | 
9 | hello <i>world</i>


--------------------------------------------------------------------------------
/test/image_reference.html:
--------------------------------------------------------------------------------
1 | <p>Image Reference</p>
2 | 
3 | <p><img src="src" alt="hello" title="title"></p>
4 | 
5 | <p><img src="src" alt="1" title="title"></p>
6 | 
7 | <p><img src="src" alt="1" title="title"></p>


--------------------------------------------------------------------------------
/test/image_reference.text:
--------------------------------------------------------------------------------
1 | Image Reference
2 | 
3 | ![hello][1]
4 | 
5 | ![1]
6 | 
7 | ![1][]
8 | 
9 | [1]: src "title"


--------------------------------------------------------------------------------
/test/images.html:
--------------------------------------------------------------------------------
1 | <p><img src="/path/to/img.jpg" alt="Alt text"></p>
2 | 
3 | <p><img src="/path/to/img.jpg" alt="Alt text" title="Optional title"></p>


--------------------------------------------------------------------------------
/test/images.text:
--------------------------------------------------------------------------------
1 | ![Alt text](/path/to/img.jpg)
2 | 
3 | ![Alt text](/path/to/img.jpg "Optional title")


--------------------------------------------------------------------------------
/test/link_reference.html:
--------------------------------------------------------------------------------
 1 | <p>Foo <a href="/url/" title="Title">bar</a>.</p>
 2 | 
 3 | <p>Foo <a href="/url/" title="Title">bar</a>.</p>
 4 | 
 5 | <p>Foo <a href="/url/" title="Title">bar</a>.</p>
 6 | 
 7 | <p>With <a href="/url/">embedded [brackets]</a>.</p>
 8 | 
 9 | <p>Indented <a href="/url">once</a>.</p>
10 | 
11 | <p>Indented <a href="/url">twice</a>.</p>
12 | 
13 | <p>Indented <a href="/url">thrice</a>.</p>
14 | 
15 | <p>Indented [four][] times.</p>
16 | 
17 | <pre><code>[four]: /url
18 | </code></pre>
19 | 
20 | <hr>
21 | 
22 | <p><a href="foo">this</a> should work</p>
23 | 
24 | <p>So should <a href="foo">this</a>.</p>
25 | 
26 | <p>And <a href="foo">this</a>.</p>
27 | 
28 | <p>And <a href="foo">this</a>.</p>
29 | 
30 | <p>And <a href="foo">this</a>.</p>
31 | 
32 | <p>But not [that] [].</p>
33 | 
34 | <p>Nor [that][].</p>
35 | 
36 | <p>Nor [that].</p>
37 | 
38 | <p>In this case, <a href="/somethingelse/">this</a> points to something else.</p>


--------------------------------------------------------------------------------
/test/link_reference.text:
--------------------------------------------------------------------------------
 1 | Foo [bar] [1].
 2 | 
 3 | Foo [bar][1].
 4 | 
 5 | Foo [bar]
 6 | [1].
 7 | 
 8 | [1]: /url/  "Title"
 9 | 
10 | 
11 | With [embedded [brackets]] [b].
12 | 
13 | 
14 | Indented [once][].
15 | 
16 | Indented [twice][].
17 | 
18 | Indented [thrice][].
19 | 
20 | Indented [four][] times.
21 | 
22 |  [once]: /url
23 | 
24 |   [twice]: /url
25 | 
26 |    [thrice]: /url
27 | 
28 |     [four]: /url
29 | 
30 | 
31 | [b]: /url/
32 | 
33 | ***
34 | 
35 | [this] [this] should work
36 | 
37 | So should [this][this].
38 | 
39 | And [this] [].
40 | 
41 | And [this][].
42 | 
43 | And [this].
44 | 
45 | But not [that] [].
46 | 
47 | Nor [that][].
48 | 
49 | Nor [that].
50 | 
51 | In this case, [this](/somethingelse/) points to something else.
52 | 
53 | [this]: foo
54 | 


--------------------------------------------------------------------------------
/test/links_shortcut_references.html:
--------------------------------------------------------------------------------
1 | <p>This is the <a href="/simple">simple case</a>.</p>
2 | <p><a href="/that">this</a> and the <a href="/other">other</a></p>


--------------------------------------------------------------------------------
/test/links_shortcut_references.text:
--------------------------------------------------------------------------------
1 | This is the [simple case].
2 | 
3 | [simple case]: /simple
4 | 
5 | [this] [that] and the [other]
6 | 
7 | [this]: /this
8 | [that]: /that
9 | [other]: /other


--------------------------------------------------------------------------------
/test/loose_list.html:
--------------------------------------------------------------------------------
 1 | <p>Loose list:</p>
 2 | 
 3 | <ul>
 4 | <li><p>foo</p>
 5 | </li>
 6 | <li><p>foo</p>
 7 | </li>
 8 | <li><p>foo</p>
 9 | </li>
10 | </ul>
11 | 
12 | <ol>
13 | <li><p>hello</p>
14 | </li>
15 | <li><p>world</p>
16 | </li>
17 | <li><p>buddy</p>
18 | </li>
19 | </ol>
20 | 
21 | <ul>
22 | <li><p>should work</p>
23 | <ul>
24 | <li><p>in nested</p>
25 | </li>
26 | <li><p>haha, yap!</p>
27 | </li>
28 | </ul>
29 | </li>
30 | <li>not loose</li>
31 | </ul>


--------------------------------------------------------------------------------
/test/loose_list.text:
--------------------------------------------------------------------------------
 1 | Loose list:
 2 | 
 3 | - foo
 4 | 
 5 | - foo
 6 | 
 7 | * foo
 8 | 
 9 | 
10 | 1. hello
11 | 
12 | 2. world
13 | 
14 | 4. buddy
15 | 
16 | 
17 | * should work
18 |   - in nested
19 | 
20 |   - haha, yap!
21 | * not loose


--------------------------------------------------------------------------------
/test/main.html:
--------------------------------------------------------------------------------
 1 | <h1 id="a-heading">A heading</h1>
 2 | <p>Just a note, I&#39;ve found that I can&#39;t test my markdown parser vs others.
 3 | For example, both markdown.js and showdown code blocks in lists wrong. They&#39;re
 4 | also completely <a href="http://google.com/" title="Google">inconsistent</a> with regards to paragraphs in list items.</p>
 5 | <p>A link. Not anymore.</p>
 6 | <aside>This will make me fail the test because
 7 | markdown.js doesnt acknowledge arbitrary html blocks =/</aside>
 8 | 
 9 | <ul>
10 | <li><p>List Item 1</p>
11 | </li>
12 | <li><p>List Item 2</p>
13 | <ul>
14 | <li>New List Item 1
15 | Hi, this is a list item.</li>
16 | <li>
17 | <p>New List Item 2
18 | Another item</p>
19 | <pre><code>Code goes here.
20 | Lots of it...
21 | </code></pre></li>
22 | <li>New List Item 3
23 | The last item</li>
24 | </ul>
25 | </li>
26 | <li><p>List Item 3
27 | The final item.</p>
28 | </li>
29 | <li><p>List Item 4
30 | The real final item.</p>
31 | </li>
32 | </ul>
33 | <p>Paragraph.</p>
34 | <blockquote>
35 | <ul>
36 | <li>bq Item 1</li>
37 | <li>bq Item 2<ul>
38 | <li>New bq Item 1</li>
39 | <li>New bq Item 2
40 | Text here</li>
41 | </ul>
42 | </li>
43 | </ul>
44 | </blockquote>
45 | <hr>
46 | <blockquote>
47 | <p>Another blockquote!
48 | I really need to get
49 | more creative with
50 | mockup text..
51 | markdown.js breaks here again</p>
52 | </blockquote>
53 | <h2 id="another-heading">Another Heading</h2>
54 | <p>Hello <em>world</em>. Here is a <a href="//hello">link</a>.
55 | And an image <img src="src" alt="alt">.</p>
56 | <pre><code>Code goes here.
57 | Lots of it...
58 | </code></pre>


--------------------------------------------------------------------------------
/test/main.text:
--------------------------------------------------------------------------------
 1 | [test]: http://google.com/ "Google"
 2 | 
 3 | # A heading
 4 | 
 5 | Just a note, I've found that I can't test my markdown parser vs others.
 6 | For example, both markdown.js and showdown code blocks in lists wrong. They're
 7 | also completely [inconsistent][test] with regards to paragraphs in list items.
 8 | 
 9 | A link. Not anymore.
10 | 
11 | <aside>This will make me fail the test because
12 | markdown.js doesnt acknowledge arbitrary html blocks =/</aside>
13 | 
14 | * List Item 1
15 | 
16 | * List Item 2
17 |   * New List Item 1
18 |     Hi, this is a list item.
19 |   * New List Item 2
20 |     Another item
21 | 
22 |         Code goes here.
23 |         Lots of it...
24 |         
25 |   * New List Item 3
26 |     The last item
27 | 
28 | * List Item 3
29 | The final item.
30 | 
31 | * List Item 4
32 | The real final item.
33 | 
34 | Paragraph.
35 | 
36 | > * bq Item 1
37 | > * bq Item 2
38 | >   * New bq Item 1
39 | >   * New bq Item 2
40 | >   Text here
41 | 
42 | * * *
43 | 
44 | > Another blockquote!
45 | > I really need to get
46 | > more creative with
47 | > mockup text..
48 | > markdown.js breaks here again
49 | 
50 | Another Heading
51 | -------------
52 | 
53 | Hello *world*. Here is a [link](//hello).
54 | And an image ![alt](src).
55 | 
56 |     Code goes here.
57 |     Lots of it...


--------------------------------------------------------------------------------
/test/nested_emphasis.html:
--------------------------------------------------------------------------------
1 | <p><em>hello <strong>world</strong></em>
2 | <strong><em>hello</em></strong>
3 | <strong><em>world</em></strong>
4 | <em><strong>Ariel</strong></em>
5 | <em><strong>here</strong></em></p>


--------------------------------------------------------------------------------
/test/nested_emphasis.text:
--------------------------------------------------------------------------------
1 | _hello **world**_
2 | ___hello___
3 | ***world***
4 | *__Ariel__*
5 | _**here**_


--------------------------------------------------------------------------------
/test/same_bullet.html:
--------------------------------------------------------------------------------
1 | <ul>
2 | <li>test</li>
3 | <li>test</li>
4 | <li>test</li>
5 | <li>test</li>
6 | </ul>


--------------------------------------------------------------------------------
/test/same_bullet.text:
--------------------------------------------------------------------------------
1 | * test
2 | + test
3 | - test
4 | 1. test


--------------------------------------------------------------------------------
/test/smartyfractions.html:
--------------------------------------------------------------------------------
1 | <p>&frac12;, &frac14; and &frac34;; &frac14;th and &frac34;ths</p>
2 | 
3 | <p>1/2/2015, 1/4/2015, 3/4/2015; 2015/1/2, 2015/1/4, 2015/3/4</p>
4 | 
5 | <p>&frac12;, &frac23;, <sup>81</sup>&frasl;<sub>100</sub> and <sup>1000000</sup>&frasl;<sub>1048576</sub></p>


--------------------------------------------------------------------------------
/test/smartyfractions.text:
--------------------------------------------------------------------------------
1 | 1/2, 1/4 and 3/4; 1/4th and 3/4ths
2 | 
3 | 1/2/2015, 1/4/2015, 3/4/2015; 2015/1/2, 2015/1/4, 2015/3/4
4 | 
5 | 1/2, 2/3, 81/100 and 1000000/1048576


--------------------------------------------------------------------------------
/test/smartypants.html:
--------------------------------------------------------------------------------
1 | <p>Hello world ‘how’ “are” you – today…</p>
2 | 
3 | <p>“It’s a more ‘challenging’ smartypants test…”</p>
4 | 
5 | <p>‘And,’ as a bonus — “one
6 | multiline” test!</p>


--------------------------------------------------------------------------------
/test/smartypants.text:
--------------------------------------------------------------------------------
1 | Hello world 'how' "are" you -- today...
2 | 
3 | "It's a more 'challenging' smartypants test..."
4 | 
5 | 'And,' as a bonus --- "one
6 | multiline" test!


--------------------------------------------------------------------------------
/test/task_list.html:
--------------------------------------------------------------------------------
 1 | <ul>
 2 | <li><input type="checkbox">foo</li>
 3 | <li><input type="checkbox">bar</li>
 4 | <li><input type="checkbox" checked>baz</li>
 5 | <li>qux</li>
 6 | <li><input type="checkbox">foo again</li>
 7 | <li><input type="checkbox" checked>baz again</li>
 8 | </ul>
 9 | 
10 | <ul>
11 | <li><input type="checkbox"><code>foo</code></li>
12 | <li><input type="checkbox" checked><code>bar</code></li>
13 | <li><input type="checkbox">#### hello</li>
14 | </ul>
15 | 


--------------------------------------------------------------------------------
/test/task_list.text:
--------------------------------------------------------------------------------
 1 | - [ ] foo
 2 | - [ ] bar
 3 | - [x] baz
 4 | - qux
 5 | - [ ] foo again
 6 | - [x] baz again
 7 | 
 8 | 
 9 | - [ ] `foo`
10 | - [x] `bar`
11 | - [ ] #### hello
12 | 


--------------------------------------------------------------------------------
/test/text_list.html:
--------------------------------------------------------------------------------
 1 | <ul>
 2 | <li>one<ul>
 3 | <li>one of one<ul>
 4 | <li>one of one of one</li>
 5 | <li>two of one of one</li>
 6 | </ul>
 7 | </li>
 8 | <li>two of one</li>
 9 | <li>three of one</li>
10 | </ul>
11 | </li>
12 | <li>two<ul>
13 | <li>one of two</li>
14 | <li>two of two<ul>
15 | <li>one of two of two</li>
16 | </ul>
17 | </li>
18 | </ul>
19 | </li>
20 | <li>three</li>
21 | </ul>


--------------------------------------------------------------------------------
/test/text_list.text:
--------------------------------------------------------------------------------
 1 | - one
 2 |   - one of one
 3 |     - one of one of one
 4 |     - two of one of one
 5 |   - two of one
 6 |   - three of one
 7 | - two
 8 |   - one of two
 9 |   - two of two
10 |     - one of two of two
11 | - three
12 | 


--------------------------------------------------------------------------------
/test/unordered_lists.html:
--------------------------------------------------------------------------------
 1 | <p>Unordered lists:</p>
 2 | 
 3 | <ul>
 4 | <li>foo<ul>
 5 | <li>foo</li>
 6 | </ul>
 7 | </li>
 8 | <li>foo</li>
 9 | </ul>
10 | 
11 | <ul>
12 | <li>bar</li>
13 | <li>bar<ul>
14 | <li>bar</li>
15 | </ul>
16 | </li>
17 | </ul>
18 | 
19 | <ul>
20 | <li>baz<ul>
21 | <li>baz<ul>
22 | <li>baz</li>
23 | </ul>
24 | </li>
25 | </ul>
26 | </li>
27 | </ul>
28 | 
29 | <p>-paragraph</p>
30 | 
31 | <p>+paragraph</p>
32 | 
33 | <p>*paragraph</p>


--------------------------------------------------------------------------------
/test/unordered_lists.text:
--------------------------------------------------------------------------------
 1 | Unordered lists:
 2 | - foo
 3 |   - foo
 4 | - foo
 5 | 
 6 | 
 7 | * bar
 8 | * bar
 9 |   * bar
10 | 
11 | 
12 | + baz
13 |   + baz
14 |     + baz
15 | 
16 | 
17 | -paragraph
18 | 
19 | +paragraph
20 | 
21 | *paragraph


--------------------------------------------------------------------------------