├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── README.md ├── dub.json └── source └── dmarkdown ├── html.d ├── markdown.d ├── package.d └── string.d /.gitignore: -------------------------------------------------------------------------------- 1 | /libdmarkdown.a 2 | /dub.selections.json 3 | /__test__library__ 4 | .dub 5 | docs.json 6 | __dummy.html 7 | *.o 8 | *.obj 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: d 2 | os: 3 | - linux 4 | d: 5 | - dmd 6 | - ldc 7 | branches: 8 | only: 9 | - master 10 | - /^v\d+\.\d+\.\d+([+-]\S*)*$/ 11 | script: 12 | - dub build --build=release --compiler=${DC} 13 | - dub test --compiler=${DC} 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2014 RejectedSoftware e.K. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## DMarkdown [![Build Status](https://travis-ci.org/dlang-community/dmarkdown.svg?branch=master)](https://travis-ci.org/dlang-community/dmarkdown) 3 | 4 | ### Introduction 5 | 6 | 7 | DMarkdown is an open source [Markdown](https://en.wikipedia.org/wiki/Markdown) to HTML processor for the [D programming language](https://dlang.org). 8 | It provides a simple, minimal API to generate HTML from markdown strings or ranges. 9 | 10 | DMarkdown code was originally written as a part of the [vibe.d](https://vibed.org/) project and its API (and most of its implementation) is identical to the `vibe.textfilter.markdown` module from vibe.d . 11 | 12 | 13 | ### License 14 | 15 | DMarkdown is released under the terms of the [MIT](https://en.wikipedia.org/wiki/MIT_License) license. 16 | This license allows you to use the source code in your own projects, open source or proprietary, 17 | and to modify it to suit your needs. 18 | 19 | Full text of the license can be found in file `LICENSE.txt` and is also displayed here 20 | 21 | Copyright (c) 2012-2014 RejectedSoftware e.K. 22 | 23 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 24 | software and associated documentation files (the "Software"), to deal in the Software 25 | without restriction, including without limitation the rights to use, copy, modify, 26 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 27 | permit persons to whom the Software is furnished to do so, subject to the following 28 | conditions: 29 | 30 | The above copyright notice and this permission notice shall be included in all copies 31 | or substantial portions of the Software. 32 | 33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 34 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR 35 | A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 36 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 37 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 38 | THE USE OR OTHER DEALINGS IN THE SOFTWARE. 39 | 40 | ### Credits 41 | 42 | DMarkdown code was written as a part of the [vibe.d](https://vibed.org) project by rejectedsoftware e.K.. 43 | Modifications by Ferdinand Majerech. 44 | -------------------------------------------------------------------------------- /dub.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dmarkdown", 3 | "description": "Markdown to HTML processor", 4 | "copyright": "Copyright © 2012-2014 rejectedsoftware e.K.", 5 | "authors": ["Sönke Ludwig", "Martin Nowak", "Mike Wey", "cybevnm"], 6 | "importPaths": ["source"], 7 | "license": "MIT", 8 | "homepage": "https://github.com/dlang-community/dmarkdown", 9 | 10 | "buildTypes": 11 | { 12 | "debug": { "buildOptions": ["debugMode", "debugInfoC"] }, 13 | "release": { "buildOptions": ["releaseMode", "optimize", "inline", "noBoundsCheck"] }, 14 | "profile": { "buildOptions": ["releaseMode", "optimize", "noBoundsCheck", "debugInfoC"] } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /source/dmarkdown/html.d: -------------------------------------------------------------------------------- 1 | /** 2 | HTML character entity escaping. 3 | 4 | TODO: Make things @safe once Appender is. 5 | 6 | Copyright: © 2012-2014 RejectedSoftware e.K. 7 | License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 8 | Authors: Sönke Ludwig 9 | */ 10 | module dmarkdown.html; 11 | 12 | import std.array; 13 | import std.conv; 14 | import std.range; 15 | 16 | 17 | package: 18 | 19 | 20 | /** Writes the HTML escaped version of a given string to an output range. 21 | */ 22 | void filterHTMLEscape(R, S)(ref R dst, S str, HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline) 23 | if (isOutputRange!(R, dchar) && isInputRange!S) 24 | { 25 | for (;!str.empty;str.popFront()) 26 | filterHTMLEscape(dst, str.front, flags); 27 | } 28 | 29 | /** Writes the HTML escaped version of a given string to an output range (also escapes double quotes). 30 | */ 31 | void filterHTMLAttribEscape(R, S)(ref R dst, S str) 32 | if (isOutputRange!(R, dchar) && isInputRange!S) 33 | { 34 | for (; !str.empty; str.popFront()) 35 | filterHTMLEscape(dst, str.front, HTMLEscapeFlags.escapeNewline|HTMLEscapeFlags.escapeQuotes); 36 | } 37 | 38 | /** Writes the HTML escaped version of a given string to an output range (escapes every character). 39 | */ 40 | void filterHTMLAllEscape(R, S)(ref R dst, S str) 41 | if (isOutputRange!(R, dchar) && isInputRange!S) 42 | { 43 | for (; !str.empty; str.popFront()) { 44 | dst.put("&#"); 45 | dst.put(to!string(cast(uint)str.front)); 46 | dst.put(';'); 47 | } 48 | } 49 | 50 | /** 51 | Writes the HTML escaped version of a character to an output range. 52 | */ 53 | void filterHTMLEscape(R)(ref R dst, dchar ch, HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline ) 54 | { 55 | switch (ch) { 56 | default: 57 | if (flags & HTMLEscapeFlags.escapeUnknown) { 58 | dst.put("&#"); 59 | dst.put(to!string(cast(uint)ch)); 60 | dst.put(';'); 61 | } else dst.put(ch); 62 | break; 63 | case '"': 64 | if (flags & HTMLEscapeFlags.escapeQuotes) dst.put("""); 65 | else dst.put('"'); 66 | break; 67 | case '\'': 68 | if (flags & HTMLEscapeFlags.escapeQuotes) dst.put("'"); 69 | else dst.put('\''); 70 | break; 71 | case '\r', '\n': 72 | if (flags & HTMLEscapeFlags.escapeNewline) { 73 | dst.put("&#"); 74 | dst.put(to!string(cast(uint)ch)); 75 | dst.put(';'); 76 | } else dst.put(ch); 77 | break; 78 | case 'a': .. case 'z': goto case; 79 | case 'A': .. case 'Z': goto case; 80 | case '0': .. case '9': goto case; 81 | case ' ', '\t', '-', '_', '.', ':', ',', ';', 82 | '#', '+', '*', '?', '=', '(', ')', '/', '!', 83 | '%' , '{', '}', '[', ']', '`', '´', '$', '^', '~': 84 | dst.put(cast(char)ch); 85 | break; 86 | case '<': dst.put("<"); break; 87 | case '>': dst.put(">"); break; 88 | case '&': dst.put("&"); break; 89 | } 90 | } 91 | 92 | 93 | enum HTMLEscapeFlags { 94 | escapeMinimal = 0, 95 | escapeQuotes = 1<<0, 96 | escapeNewline = 1<<1, 97 | escapeUnknown = 1<<2 98 | } 99 | -------------------------------------------------------------------------------- /source/dmarkdown/markdown.d: -------------------------------------------------------------------------------- 1 | /** 2 | Markdown parser implementation 3 | 4 | Copyright: © 2012-2014 RejectedSoftware e.K. 5 | License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 | Authors: Sönke Ludwig 7 | */ 8 | module dmarkdown.markdown; 9 | 10 | import dmarkdown.html; 11 | import dmarkdown.string; 12 | 13 | import std.algorithm : joiner, map, canFind, countUntil, min; 14 | import std.array; 15 | import std.ascii : isAlpha, isWhite; 16 | import std.format; 17 | import std.range; 18 | import std.string; 19 | 20 | /* 21 | TODO: 22 | detect inline HTML tags 23 | */ 24 | 25 | // TODO (dmarkdown) detailed API docs and examples for everything 26 | 27 | unittest 28 | { 29 | auto text = 30 | `======= 31 | Heading 32 | ======= 33 | 34 | **bold** *italic* 35 | 36 | List: 37 | 38 | * a 39 | * b 40 | * c 41 | `; 42 | 43 | import std.stdio; 44 | writeln("==========="); 45 | writeln(text); 46 | writeln("==========="); 47 | writeln(filterMarkdown(text)); 48 | } 49 | 50 | unittest 51 | { 52 | auto source = 53 | `Merged prototype. The prototype is not locked, allowing to add more components. 54 | To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().`; 55 | auto expected = 56 | `

Merged prototype. The prototype is not locked, allowing to add more components. 57 | To be used it must be locked by calling EntityPrototype.lockAndTrimMemory(). 58 |

59 | `; 60 | string result = filterMarkdown(source); 61 | assert(result == expected); 62 | } 63 | 64 | unittest 65 | { 66 | auto source = `*stars* under_score_s`; 67 | auto expectedUnderscores = `

stars underscores 68 |

69 | `; 70 | auto expectedNoUnderscores = `

stars under_score_s 71 |

72 | `; 73 | 74 | string resultUnderscores = filterMarkdown(source); 75 | string resultNoUnderscores = filterMarkdown(source, MarkdownFlags.disableUnderscoreEmphasis); 76 | 77 | assert(resultUnderscores == expectedUnderscores, 78 | "'%s' != '%s'".format(resultUnderscores, expectedUnderscores)); 79 | assert(resultNoUnderscores == expectedNoUnderscores, 80 | "'%s' != '%s'".format(resultNoUnderscores, expectedNoUnderscores)); 81 | } 82 | 83 | /** Returns a Markdown filtered HTML string. 84 | */ 85 | string filterMarkdown()(string str, MarkdownFlags flags) 86 | { 87 | scope settings = new MarkdownSettings; 88 | settings.flags = flags; 89 | return filterMarkdown(str, settings); 90 | } 91 | /// ditto 92 | string filterMarkdown()(string str, scope MarkdownSettings settings = null) 93 | @trusted { // Appender not @safe as of 2.065 94 | auto dst = appender!string(); 95 | filterMarkdown(dst, str, settings); 96 | return dst.data; 97 | } 98 | 99 | 100 | /** Markdown filters the given string and writes the corresponding HTML to an output range. 101 | */ 102 | void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags) 103 | { 104 | scope settings = new MarkdownSettings; 105 | settings.flags = flags; 106 | filterMarkdown(dst, src, settings); 107 | } 108 | /// ditto 109 | void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null) 110 | { 111 | if (!settings) settings = new MarkdownSettings; 112 | 113 | auto all_lines = splitLines(src); 114 | auto links = scanForReferences(all_lines); 115 | auto lines = parseLines(all_lines, settings); 116 | Block root_block; 117 | parseBlocks(root_block, lines, null, settings); 118 | writeBlock(dst, root_block, links, settings); 119 | } 120 | 121 | /** 122 | Returns the hierarchy of sections 123 | */ 124 | Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null) 125 | { 126 | import std.conv : to; 127 | 128 | if (!settings) settings = new MarkdownSettings; 129 | auto all_lines = splitLines(markdown_source); 130 | auto lines = parseLines(all_lines, settings); 131 | Block root_block; 132 | parseBlocks(root_block, lines, null, settings); 133 | Section root; 134 | 135 | foreach (ref sb; root_block.blocks) { 136 | if (sb.type == BlockType.Header) { 137 | auto s = &root; 138 | while (true) { 139 | if (s.subSections.length == 0) break; 140 | if (s.subSections[$-1].headingLevel >= sb.headerLevel) break; 141 | s = &s.subSections[$-1]; 142 | } 143 | s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string); 144 | } 145 | } 146 | 147 | return root.subSections; 148 | } 149 | 150 | /// 151 | unittest { 152 | import std.conv : to; 153 | assert(getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") == 154 | [ 155 | Section(2, " first", "first"), 156 | Section(2, " second", "second", [ 157 | Section(3, " third", "third") 158 | ]), 159 | Section(1, " fourth", "fourth", [ 160 | Section(3, " fifth", "fifth") 161 | ]) 162 | ] 163 | ); 164 | } 165 | 166 | final class MarkdownSettings { 167 | /// Controls the capabilities of the parser. 168 | MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 169 | 170 | /// Heading tags will start at this level. 171 | size_t headingBaseLevel = 1; 172 | 173 | /// Called for every link/image URL to perform arbitrary transformations. 174 | string delegate(string url_or_path, bool is_image) urlFilter; 175 | 176 | /** An optional delegate to post-process code blocks and inline code. 177 | * 178 | * Useful to e.g. add code highlighting. 179 | */ 180 | string delegate(string) @safe nothrow processCode = null; 181 | } 182 | // Unittest for code post-processing 183 | unittest 184 | { 185 | auto text = 186 | "`inline code`" ~ ` 187 | block: 188 | 189 | code block 190 | `; 191 | auto expected = 192 | `

AAAAAAAAAAA 193 | block: 194 |

195 |
AAAAAAAAAA
`; 196 | 197 | import std.algorithm: filter; 198 | string processCode(string input) @safe nothrow 199 | { 200 | import std.conv; 201 | import std.exception: assumeWontThrow; 202 | // ignore newlines generated by code block processing 203 | input = input.filter!(c => c != '\n').array.to!string.assumeWontThrow; 204 | return 'A'.repeat(input.length).array.to!string.assumeWontThrow; 205 | } 206 | auto settings = new MarkdownSettings; 207 | settings.processCode = &processCode; 208 | auto result = filterMarkdown(text, settings); 209 | 210 | assert(result == expected, "Unexpected code processing result:\n" ~ 211 | result ~ "\nExpected:\n" ~ expected); 212 | } 213 | 214 | enum MarkdownFlags { 215 | none = 0, 216 | keepLineBreaks = 1<<0, 217 | backtickCodeBlocks = 1<<1, 218 | noInlineHtml = 1<<2, 219 | //noLinks = 1<<3, 220 | //allowUnsafeHtml = 1<<4, 221 | /// If used, subheadings are underlined by stars ('*') instead of dashes ('-') 222 | alternateSubheaders = 1 << 5, 223 | /// If used, '_' may not be used for emphasis ('*' may still be used) 224 | disableUnderscoreEmphasis = 1 << 6, 225 | supportTables = 1 << 7, 226 | vanillaMarkdown = none, 227 | forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml, 228 | githubInspired = backtickCodeBlocks|supportTables, 229 | } 230 | 231 | struct Section { 232 | size_t headingLevel; 233 | string caption; 234 | string anchor; 235 | Section[] subSections; 236 | } 237 | 238 | private { 239 | immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 240 | } 241 | 242 | private enum IndentType { 243 | White, 244 | Quote 245 | } 246 | 247 | private enum LineType { 248 | Undefined, 249 | Blank, 250 | Plain, 251 | Hline, 252 | AtxHeader, 253 | SetextHeader, 254 | UList, 255 | OList, 256 | HtmlBlock, 257 | CodeBlockDelimiter, 258 | Table, 259 | } 260 | 261 | private struct Line { 262 | LineType type; 263 | IndentType[] indent; 264 | string text; 265 | string unindented; 266 | 267 | string unindent(size_t n) 268 | pure @safe { 269 | assert(n <= indent.length); 270 | string ln = text; 271 | foreach( i; 0 .. n ){ 272 | final switch(indent[i]){ 273 | case IndentType.White: 274 | if( ln[0] == ' ' ) ln = ln[4 .. $]; 275 | else ln = ln[1 .. $]; 276 | break; 277 | case IndentType.Quote: 278 | ln = ln.stripLeft()[1 .. $]; 279 | break; 280 | } 281 | } 282 | return ln; 283 | } 284 | } 285 | 286 | private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings) 287 | pure @safe { 288 | Line[] ret; 289 | const subHeaderChar = settings.flags * MarkdownFlags.alternateSubheaders ? '*' : '-'; 290 | while( !lines.empty ){ 291 | auto ln = lines.front; 292 | lines.popFront(); 293 | 294 | Line lninfo; 295 | lninfo.text = ln; 296 | 297 | void determineIndent() { 298 | while( ln.length > 0 ){ 299 | if( ln[0] == '\t' ){ 300 | lninfo.indent ~= IndentType.White; 301 | ln.popFront(); 302 | } else if( ln.startsWith(" ") ){ 303 | lninfo.indent ~= IndentType.White; 304 | ln.popFrontN(4); 305 | } else { 306 | ln = ln.stripLeft(); 307 | if( ln.startsWith(">") ){ 308 | lninfo.indent ~= IndentType.Quote; 309 | ln.popFront(); 310 | } else break; 311 | } 312 | } 313 | lninfo.unindented = ln; 314 | } 315 | 316 | determineIndent(); 317 | 318 | if( (settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln) ) lninfo.type = LineType.CodeBlockDelimiter; 319 | else if( isAtxHeaderLine(ln) ) lninfo.type = LineType.AtxHeader; 320 | else if( isSetextHeaderLine(ln, subHeaderChar) ) lninfo.type = LineType.SetextHeader; 321 | else if( (settings.flags & MarkdownFlags.supportTables) && isTableRowLine!false(ln) ) lninfo.type = LineType.Table; 322 | else if( isHlineLine(ln) ) lninfo.type = LineType.Hline; 323 | else if( isOListLine(ln) ) lninfo.type = LineType.OList; 324 | else if( isUListLine(ln) ) lninfo.type = LineType.UList; 325 | else if( isLineBlank(ln) ) lninfo.type = LineType.Blank; 326 | else if( !(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln) ) lninfo.type = LineType.HtmlBlock; 327 | else lninfo.type = LineType.Plain; 328 | 329 | ret ~= lninfo; 330 | } 331 | return ret; 332 | } 333 | 334 | private enum BlockType { 335 | Plain, 336 | Text, 337 | Paragraph, 338 | Header, 339 | OList, 340 | UList, 341 | ListItem, 342 | Code, 343 | Quote, 344 | Table, 345 | TableRow, 346 | TableHeader, 347 | TableData, 348 | } 349 | 350 | private struct Block { 351 | BlockType type; 352 | string[] text; 353 | Block[] blocks; 354 | size_t headerLevel; 355 | 356 | // A human-readable toString for debugging. 357 | string toString() 358 | { 359 | return toStringNested; 360 | } 361 | 362 | // toString implementation; capable of indenting nested blocks. 363 | string toStringNested(uint depth = 0) 364 | { 365 | import std.conv: to; 366 | string indent = " ".repeat(depth * 2).joiner.array.to!string; 367 | return indent ~ "%s\n".format(type) ~ 368 | indent ~ "%s\n".format(text) ~ 369 | blocks.map!((ref b) => b.toStringNested(depth + 1)).joiner.array.to!string ~ 370 | indent ~ "%s\n".format(headerLevel); 371 | } 372 | } 373 | 374 | private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings) 375 | pure @safe { 376 | if( base_indent.length == 0 ) root.type = BlockType.Text; 377 | else if( base_indent[$-1] == IndentType.Quote ) root.type = BlockType.Quote; 378 | 379 | while( !lines.empty ){ 380 | auto ln = lines.front; 381 | 382 | if( ln.type == LineType.Blank ){ 383 | lines.popFront(); 384 | continue; 385 | } 386 | 387 | if( ln.indent != base_indent ){ 388 | if( ln.indent.length < base_indent.length || ln.indent[0 .. base_indent.length] != base_indent ) 389 | return; 390 | 391 | auto cindent = base_indent ~ IndentType.White; 392 | if( ln.indent == cindent ){ 393 | Block cblock; 394 | cblock.type = BlockType.Code; 395 | while( !lines.empty && lines.front.indent.length >= cindent.length 396 | && lines.front.indent[0 .. cindent.length] == cindent) 397 | { 398 | cblock.text ~= lines.front.unindent(cindent.length); 399 | lines.popFront(); 400 | } 401 | root.blocks ~= cblock; 402 | } else { 403 | Block subblock; 404 | parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings); 405 | root.blocks ~= subblock; 406 | } 407 | } else { 408 | Block b; 409 | void processPlain() { 410 | b.type = BlockType.Paragraph; 411 | b.text = skipText(lines, base_indent); 412 | } 413 | final switch(ln.type){ 414 | case LineType.Undefined: assert(false); 415 | case LineType.Blank: assert(false); 416 | case LineType.Plain: 417 | if( lines.length >= 2 && lines[1].type == LineType.SetextHeader ){ 418 | auto setln = lines[1].unindented; 419 | b.type = BlockType.Header; 420 | b.text = [ln.unindented]; 421 | b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 422 | lines.popFrontN(2); 423 | } else { 424 | processPlain(); 425 | } 426 | break; 427 | case LineType.Hline: 428 | b.type = BlockType.Plain; 429 | b.text = ["
"]; 430 | lines.popFront(); 431 | break; 432 | case LineType.AtxHeader: 433 | b.type = BlockType.Header; 434 | string hl = ln.unindented; 435 | b.headerLevel = 0; 436 | while( hl.length > 0 && hl[0] == '#' ){ 437 | b.headerLevel++; 438 | hl = hl[1 .. $]; 439 | } 440 | while( hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ') ) 441 | hl = hl[0 .. $-1]; 442 | b.text = [hl]; 443 | lines.popFront(); 444 | break; 445 | case LineType.SetextHeader: 446 | lines.popFront(); 447 | break; 448 | case LineType.UList: 449 | case LineType.OList: 450 | b.type = ln.type == LineType.UList ? BlockType.UList : BlockType.OList; 451 | auto itemindent = base_indent ~ IndentType.White; 452 | bool firstItem = true, paraMode = false; 453 | while(!lines.empty && lines.front.type == ln.type && lines.front.indent == base_indent ){ 454 | Block itm; 455 | itm.text = skipText(lines, itemindent); 456 | itm.text[0] = removeListPrefix(itm.text[0], ln.type); 457 | 458 | // emit

if there are blank lines between the items 459 | if( firstItem && !lines.empty && lines.front.type == LineType.Blank ) 460 | paraMode = true; 461 | firstItem = false; 462 | if( paraMode ){ 463 | Block para; 464 | para.type = BlockType.Paragraph; 465 | para.text = itm.text; 466 | itm.blocks ~= para; 467 | itm.text = null; 468 | } 469 | 470 | parseBlocks(itm, lines, itemindent, settings); 471 | itm.type = BlockType.ListItem; 472 | b.blocks ~= itm; 473 | } 474 | break; 475 | case LineType.HtmlBlock: 476 | int nestlevel = 0; 477 | auto starttag = parseHtmlBlockLine(ln.unindented); 478 | if( !starttag.isHtmlBlock || !starttag.open ) 479 | break; 480 | 481 | b.type = BlockType.Plain; 482 | while(!lines.empty){ 483 | if( lines.front.indent.length < base_indent.length ) break; 484 | if( lines.front.indent[0 .. base_indent.length] != base_indent ) break; 485 | 486 | auto str = lines.front.unindent(base_indent.length); 487 | auto taginfo = parseHtmlBlockLine(str); 488 | b.text ~= lines.front.unindent(base_indent.length); 489 | lines.popFront(); 490 | if( taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName ) 491 | nestlevel += taginfo.open ? 1 : -1; 492 | if( nestlevel <= 0 ) break; 493 | } 494 | break; 495 | case LineType.CodeBlockDelimiter: 496 | lines.popFront(); // TODO: get language from line 497 | b.type = BlockType.Code; 498 | while(!lines.empty){ 499 | if( lines.front.indent.length < base_indent.length ) break; 500 | if( lines.front.indent[0 .. base_indent.length] != base_indent ) break; 501 | if( lines.front.type == LineType.CodeBlockDelimiter ){ 502 | lines.popFront(); 503 | break; 504 | } 505 | b.text ~= lines.front.unindent(base_indent.length); 506 | lines.popFront(); 507 | } 508 | break; 509 | case LineType.Table: 510 | lines.popFront(); 511 | // Can this be a valid table (is there a next line that could be a header separator)? 512 | if (lines.empty) { 513 | processPlain(); 514 | break; 515 | } 516 | Line lnNext = lines.front; 517 | immutable bool isTableHeader = ( 518 | (lnNext.type == LineType.Table) 519 | && (lnNext.text.indexOf(" -") >= 0) 520 | && (lnNext.text.indexOf("- ") >= 0) 521 | && lnNext.text.allOf("-:| ") 522 | ); 523 | if (!isTableHeader) { 524 | // Not a valid table header, so let's assume it's plain markdown 525 | processPlain(); 526 | break; 527 | } 528 | b.type = BlockType.Table; 529 | // Parse header 530 | b.blocks ~= ln.splitTableRow!(BlockType.TableHeader)(); 531 | // Parse table rows 532 | lines.popFront(); 533 | while(!lines.empty) { 534 | ln = lines.front; 535 | if (ln.type != LineType.Table) 536 | break; // not a table row, so let's assume it's the end of the table 537 | b.blocks ~= ln.splitTableRow(); 538 | lines.popFront(); 539 | } 540 | break; 541 | } 542 | root.blocks ~= b; 543 | } 544 | } 545 | } 546 | 547 | private string[] skipText(ref Line[] lines, IndentType[] indent) 548 | pure @safe { 549 | static bool matchesIndent(IndentType[] indent, IndentType[] base_indent) 550 | { 551 | // Any *plain* line with a higher indent should still be a part of 552 | // a paragraph read by skipText(). Returning false here resulted in 553 | // text such as: 554 | // --- 555 | // First line 556 | // Second line 557 | // --- 558 | // being interpreted as a paragraph followed by a code block, even though 559 | // other Markdown processors would interpret it as a single paragraph. 560 | 561 | // if( indent.length > base_indent.length ) return false; 562 | if( indent.length > base_indent.length ) return true; 563 | if( indent != base_indent[0 .. indent.length] ) return false; 564 | sizediff_t qidx = -1; 565 | foreach_reverse (i, tp; base_indent) if (tp == IndentType.Quote) { qidx = i; break; } 566 | if( qidx >= 0 ){ 567 | qidx = base_indent.length-1 - qidx; 568 | if( indent.length <= qidx ) return false; 569 | } 570 | return true; 571 | } 572 | 573 | string[] ret; 574 | 575 | while(true){ 576 | ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length)); 577 | lines.popFront(); 578 | 579 | if( lines.empty || !matchesIndent(lines.front.indent, indent) || lines.front.type != LineType.Plain ) 580 | return ret; 581 | } 582 | } 583 | 584 | private Block splitTableRow(BlockType dataType = BlockType.TableData)(Line line) 585 | pure @safe { 586 | static assert(dataType == BlockType.TableHeader || dataType == BlockType.TableData); 587 | 588 | string ln = line.text.strip(); 589 | immutable size_t b = (ln[0..2] == "| ") ? 2 : 0; 590 | immutable size_t e = (ln[($ - 2) .. $] == " |") ? (ln.length - 2) : ln.length; 591 | Block ret; 592 | ret.type = BlockType.TableRow; 593 | foreach(txt; ln[b .. e].split(" | ")) 594 | { 595 | Block d; 596 | d.text = [txt.strip(" ")]; 597 | d.type = dataType; 598 | ret.blocks ~= d; 599 | } 600 | return ret; 601 | } 602 | 603 | /// private 604 | private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings) 605 | { 606 | final switch(block.type){ 607 | case BlockType.Plain: 608 | foreach( ln; block.text ){ 609 | dst.put(ln); 610 | dst.put("\n"); 611 | } 612 | foreach(b; block.blocks) 613 | writeBlock(dst, b, links, settings); 614 | break; 615 | case BlockType.Text: 616 | writeMarkdownEscaped(dst, block, links, settings); 617 | foreach(b; block.blocks) 618 | writeBlock(dst, b, links, settings); 619 | break; 620 | case BlockType.Paragraph: 621 | assert(block.blocks.length == 0); 622 | dst.put("

"); 623 | writeMarkdownEscaped(dst, block, links, settings); 624 | dst.put("

\n"); 625 | break; 626 | case BlockType.Header: 627 | assert(block.blocks.length == 0); 628 | auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); 629 | dst.formattedWrite("", hlvl, block.text[0].asSlug); 630 | assert(block.text.length == 1); 631 | writeMarkdownEscaped(dst, block.text[0], links, settings); 632 | dst.formattedWrite("\n", hlvl); 633 | break; 634 | case BlockType.OList: 635 | dst.put("
    \n"); 636 | foreach(b; block.blocks) 637 | writeBlock(dst, b, links, settings); 638 | dst.put("
\n"); 639 | break; 640 | case BlockType.UList: 641 | dst.put("\n"); 645 | break; 646 | case BlockType.ListItem: 647 | dst.put("
  • "); 648 | writeMarkdownEscaped(dst, block, links, settings); 649 | foreach(b; block.blocks) 650 | writeBlock(dst, b, links, settings); 651 | dst.put("
  • \n"); 652 | break; 653 | case BlockType.Code: 654 | assert(block.blocks.length == 0); 655 | dst.put("
    ");
     656 | 			if(settings.processCode is null)
     657 | 			{
     658 | 				foreach(ln; block.text){
     659 | 					filterHTMLEscape(dst, ln);
     660 | 					dst.put("\n");
     661 | 				}
     662 | 			}
     663 | 			else
     664 | 			{
     665 | 				auto temp = appender!string();
     666 | 				foreach(ln; block.text){
     667 | 					filterHTMLEscape(temp, ln);
     668 | 					temp.put("\n");
     669 | 				}
     670 | 				dst.put(settings.processCode(temp.data));
     671 | 			}
     672 | 			dst.put("
    "); 673 | break; 674 | case BlockType.Quote: 675 | dst.put("
    "); 676 | writeMarkdownEscaped(dst, block, links, settings); 677 | foreach(b; block.blocks) 678 | writeBlock(dst, b, links, settings); 679 | dst.put("
    \n"); 680 | break; 681 | case BlockType.Table: 682 | assert(block.blocks.length > 0); 683 | assert(block.blocks[0].type == BlockType.TableRow); 684 | dst.put("\n"); 685 | foreach(b; block.blocks[0].blocks) { 686 | assert(b.type == BlockType.TableHeader); 687 | dst.put(""); 690 | } 691 | dst.put("\n"); 692 | if (block.blocks.length > 1) { 693 | foreach(row; block.blocks[1 .. $]) { 694 | assert(row.type == BlockType.TableRow); 695 | dst.put(""); 696 | foreach(b; row.blocks) { 697 | assert(b.type == BlockType.TableData); 698 | dst.put(""); 701 | } 702 | dst.put("\n"); 703 | } 704 | } 705 | dst.put("
    "); 688 | writeMarkdownEscaped(dst, b.text[0], links, settings); 689 | dst.put("
    "); 699 | writeMarkdownEscaped(dst, b.text[0], links, settings); 700 | dst.put("
    \n"); 706 | break; 707 | case BlockType.TableRow: 708 | case BlockType.TableData: 709 | case BlockType.TableHeader: 710 | assert(0); 711 | } 712 | } 713 | 714 | private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings) 715 | { 716 | auto lines = cast(string[])block.text; 717 | auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("
    ") : lines.join("\n"); 718 | writeMarkdownEscaped(dst, text, links, settings); 719 | if (lines.length) dst.put("\n"); 720 | } 721 | 722 | /// private 723 | private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings) 724 | { 725 | string filterLink(string lnk, bool is_image) { 726 | return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk; 727 | } 728 | 729 | bool br = ln.endsWith(" "); 730 | while( ln.length > 0 ){ 731 | switch( ln[0] ){ 732 | default: 733 | dst.put(ln[0]); 734 | ln = ln[1 .. $]; 735 | break; 736 | case '\\': 737 | if( ln.length >= 2 ){ 738 | switch(ln[1]){ 739 | default: 740 | dst.put(ln[0 .. 2]); 741 | ln = ln[2 .. $]; 742 | break; 743 | case '\'', '`', '*', '_', '{', '}', '[', ']', 744 | '(', ')', '#', '+', '-', '.', '!': 745 | dst.put(ln[1]); 746 | ln = ln[2 .. $]; 747 | break; 748 | } 749 | } else { 750 | dst.put(ln[0]); 751 | ln = ln[1 .. $]; 752 | } 753 | break; 754 | case '_': 755 | if(settings.flags & MarkdownFlags.disableUnderscoreEmphasis) 756 | { 757 | dst.put(ln[0]); 758 | ln = ln[1 .. $]; 759 | break; 760 | } 761 | goto case; 762 | case '*': 763 | string text; 764 | if( auto em = parseEmphasis(ln, text) ){ 765 | dst.put(em == 1 ? "" : em == 2 ? "" : ""); 766 | filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal); 767 | dst.put(em == 1 ? "" : em == 2 ? "": ""); 768 | } else { 769 | dst.put(ln[0]); 770 | ln = ln[1 .. $]; 771 | } 772 | break; 773 | case '`': 774 | string code; 775 | if( parseInlineCode(ln, code) ){ 776 | dst.put(""); 777 | if(settings.processCode is null) 778 | { 779 | filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal); 780 | } 781 | else 782 | { 783 | auto temp = appender!string(); 784 | filterHTMLEscape(temp, code, HTMLEscapeFlags.escapeMinimal); 785 | dst.put(settings.processCode(temp.data)); 786 | } 787 | dst.put(""); 788 | } else { 789 | dst.put(ln[0]); 790 | ln = ln[1 .. $]; 791 | } 792 | break; 793 | case '[': 794 | Link link; 795 | if( parseLink(ln, link, linkrefs) ){ 796 | dst.put(""); 805 | writeMarkdownEscaped(dst, link.text, linkrefs, settings); 806 | dst.put(""); 807 | } else { 808 | dst.put(ln[0]); 809 | ln = ln[1 .. $]; 810 | } 811 | break; 812 | case '!': 813 | Link link; 814 | if( parseLink(ln, link, linkrefs) ){ 815 | dst.put("\"");"); 826 | } else if( ln.length >= 2 ){ 827 | dst.put(ln[0 .. 2]); 828 | ln = ln[2 .. $]; 829 | } else { 830 | dst.put(ln[0]); 831 | ln = ln[1 .. $]; 832 | } 833 | break; 834 | case '>': 835 | if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put(">"); 836 | else dst.put(ln[0]); 837 | ln = ln[1 .. $]; 838 | break; 839 | case '<': 840 | string url; 841 | if( parseAutoLink(ln, url) ){ 842 | bool is_email = url.startsWith("mailto:"); 843 | dst.put(""); 847 | if( is_email ) filterHTMLAllEscape(dst, url[7 .. $]); 848 | else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal); 849 | dst.put(""); 850 | } else { 851 | if (ln.startsWith("
    ")) { 852 | // always support line breaks, since we embed them here ourselves! 853 | dst.put("
    "); 854 | ln = ln[4 .. $]; 855 | } else if(ln.startsWith("
    ")) { 856 | dst.put("
    "); 857 | ln = ln[5 .. $]; 858 | } else { 859 | if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put("<"); 860 | else dst.put(ln[0]); 861 | ln = ln[1 .. $]; 862 | } 863 | } 864 | break; 865 | } 866 | } 867 | if( br ) dst.put("
    "); 868 | } 869 | 870 | private bool isLineBlank(string ln) 871 | pure @safe { 872 | return allOf(ln, " \t"); 873 | } 874 | 875 | private bool isSetextHeaderLine(string ln, char subHeaderChar) 876 | pure @safe { 877 | ln = stripLeft(ln); 878 | if( ln.length < 1 ) return false; 879 | if( ln[0] == '=' ){ 880 | while(!ln.empty && ln.front == '=') ln.popFront(); 881 | return allOf(ln, " \t"); 882 | } 883 | if( ln[0] == subHeaderChar ){ 884 | while(!ln.empty && ln.front == subHeaderChar) ln.popFront(); 885 | return allOf(ln, " \t"); 886 | } 887 | return false; 888 | } 889 | 890 | private bool isAtxHeaderLine(string ln) 891 | pure @safe { 892 | ln = stripLeft(ln); 893 | size_t i = 0; 894 | while( i < ln.length && ln[i] == '#' ) i++; 895 | if( i < 1 || i > 6 || i >= ln.length ) return false; 896 | return ln[i] == ' '; 897 | } 898 | 899 | private bool isHlineLine(string ln) 900 | pure @safe { 901 | if( allOf(ln, " -") && count(ln, '-') >= 3 ) return true; 902 | if( allOf(ln, " *") && count(ln, '*') >= 3 ) return true; 903 | if( allOf(ln, " _") && count(ln, '_') >= 3 ) return true; 904 | return false; 905 | } 906 | 907 | private bool isQuoteLine(string ln) 908 | pure @safe { 909 | return ln.stripLeft().startsWith(">"); 910 | } 911 | 912 | private size_t getQuoteLevel(string ln) 913 | pure @safe { 914 | size_t level = 0; 915 | ln = stripLeft(ln); 916 | while( ln.length > 0 && ln[0] == '>' ){ 917 | level++; 918 | ln = stripLeft(ln[1 .. $]); 919 | } 920 | return level; 921 | } 922 | 923 | private bool isUListLine(string ln) 924 | pure @safe { 925 | ln = stripLeft(ln); 926 | if (ln.length < 2) return false; 927 | if (!canFind("*+-", ln[0])) return false; 928 | if (ln[1] != ' ' && ln[1] != '\t') return false; 929 | return true; 930 | } 931 | 932 | private bool isOListLine(string ln) 933 | pure @safe { 934 | ln = stripLeft(ln); 935 | if( ln.length < 1 ) return false; 936 | if( ln[0] < '0' || ln[0] > '9' ) return false; 937 | ln = ln[1 .. $]; 938 | while( ln.length > 0 && ln[0] >= '0' && ln[0] <= '9' ) 939 | ln = ln[1 .. $]; 940 | if( ln.length < 2 ) return false; 941 | if( ln[0] != '.' ) return false; 942 | if( ln[1] != ' ' && ln[1] != '\t' ) 943 | return false; 944 | return true; 945 | } 946 | 947 | private bool isTableRowLine(bool proper = false)(string ln) 948 | pure @safe { 949 | static if (proper) { 950 | return ( 951 | (ln.indexOf(" | ") >= 0) 952 | && !ln.isOListLine 953 | && !ln.isUListLine 954 | && !ln.isAtxHeaderLine 955 | ); 956 | } else { 957 | return (ln.indexOf(" | ") >= 0); 958 | } 959 | } 960 | 961 | private string removeListPrefix(string str, LineType tp) 962 | pure @safe { 963 | switch(tp){ 964 | default: assert(false); 965 | case LineType.OList: // skip bullets and output using normal escaping 966 | auto idx = str.indexOfCT('.'); 967 | assert(idx > 0); 968 | return str[idx+1 .. $].stripLeft(); 969 | case LineType.UList: 970 | return stripLeft(str.stripLeft()[1 .. $]); 971 | } 972 | } 973 | 974 | 975 | private auto parseHtmlBlockLine(string ln) 976 | pure @safe { 977 | struct HtmlBlockInfo { 978 | bool isHtmlBlock; 979 | string tagName; 980 | bool open; 981 | } 982 | 983 | HtmlBlockInfo ret; 984 | ret.isHtmlBlock = false; 985 | ret.open = true; 986 | 987 | ln = strip(ln); 988 | if( ln.length < 3 ) return ret; 989 | if( ln[0] != '<' ) return ret; 990 | if( ln[1] == '/' ){ 991 | ret.open = false; 992 | ln = ln[1 .. $]; 993 | } 994 | if( !isAlpha(ln[1]) ) return ret; 995 | ln = ln[1 .. $]; 996 | size_t idx = 0; 997 | while( idx < ln.length && ln[idx] != ' ' && ln[idx] != '>' ) 998 | idx++; 999 | ret.tagName = ln[0 .. idx]; 1000 | ln = ln[idx .. $]; 1001 | 1002 | auto eidx = ln.indexOf('>'); 1003 | if( eidx < 0 ) return ret; 1004 | if( eidx != ln.length-1 ) return ret; 1005 | 1006 | if (!s_blockTags.canFind(ret.tagName)) return ret; 1007 | 1008 | ret.isHtmlBlock = true; 1009 | return ret; 1010 | } 1011 | 1012 | private bool isHtmlBlockLine(string ln) 1013 | pure @safe { 1014 | auto bi = parseHtmlBlockLine(ln); 1015 | return bi.isHtmlBlock && bi.open; 1016 | } 1017 | 1018 | private bool isHtmlBlockCloseLine(string ln) 1019 | pure @safe { 1020 | auto bi = parseHtmlBlockLine(ln); 1021 | return bi.isHtmlBlock && !bi.open; 1022 | } 1023 | 1024 | private bool isCodeBlockDelimiter(string ln) 1025 | pure @safe { 1026 | return ln.startsWith("```"); 1027 | } 1028 | 1029 | private string getHtmlTagName(string ln) 1030 | pure @safe { 1031 | return parseHtmlBlockLine(ln).tagName; 1032 | } 1033 | 1034 | private bool isLineIndented(string ln) 1035 | pure @safe { 1036 | return ln.startsWith("\t") || ln.startsWith(" "); 1037 | } 1038 | 1039 | private string unindentLine(string ln) 1040 | pure @safe { 1041 | if( ln.startsWith("\t") ) return ln[1 .. $]; 1042 | if( ln.startsWith(" ") ) return ln[4 .. $]; 1043 | assert(false); 1044 | } 1045 | 1046 | private int parseEmphasis(ref string str, ref string text) 1047 | pure @safe { 1048 | string pstr = str; 1049 | if( pstr.length < 3 ) return false; 1050 | 1051 | string ctag; 1052 | if( pstr.startsWith("***") ) ctag = "***"; 1053 | else if( pstr.startsWith("**") ) ctag = "**"; 1054 | else if( pstr.startsWith("*") ) ctag = "*"; 1055 | else if( pstr.startsWith("___") ) ctag = "___"; 1056 | else if( pstr.startsWith("__") ) ctag = "__"; 1057 | else if( pstr.startsWith("_") ) ctag = "_"; 1058 | else return false; 1059 | 1060 | pstr = pstr[ctag.length .. $]; 1061 | 1062 | auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1063 | if( cidx < 1 ) return false; 1064 | 1065 | text = pstr[0 .. cidx]; 1066 | 1067 | str = pstr[cidx+ctag.length .. $]; 1068 | return cast(int)ctag.length; 1069 | } 1070 | 1071 | private bool parseInlineCode(ref string str, ref string code) 1072 | pure @safe { 1073 | string pstr = str; 1074 | if( pstr.length < 3 ) return false; 1075 | string ctag; 1076 | if( pstr.startsWith("``") ) ctag = "``"; 1077 | else if( pstr.startsWith("`") ) ctag = "`"; 1078 | else return false; 1079 | pstr = pstr[ctag.length .. $]; 1080 | 1081 | auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1082 | if( cidx < 1 ) return false; 1083 | 1084 | code = pstr[0 .. cidx]; 1085 | str = pstr[cidx+ctag.length .. $]; 1086 | return true; 1087 | } 1088 | 1089 | private bool parseLink(ref string str, ref Link dst, in LinkRef[string] linkrefs) 1090 | pure @safe { 1091 | string pstr = str; 1092 | if( pstr.length < 3 ) return false; 1093 | // ignore img-link prefix 1094 | if( pstr[0] == '!' ) pstr = pstr[1 .. $]; 1095 | 1096 | // parse the text part [text] 1097 | if( pstr[0] != '[' ) return false; 1098 | auto cidx = pstr.matchBracket(); 1099 | if( cidx < 1 ) return false; 1100 | string refid; 1101 | dst.text = pstr[1 .. cidx]; 1102 | pstr = pstr[cidx+1 .. $]; 1103 | 1104 | // parse either (link '['"title"']') or '[' ']'[refid] 1105 | if( pstr.length < 2 ) return false; 1106 | if( pstr[0] == '('){ 1107 | cidx = pstr.matchBracket(); 1108 | if( cidx < 1 ) return false; 1109 | auto inner = pstr[1 .. cidx]; 1110 | immutable qidx = inner.indexOfCT('"'); 1111 | if( qidx > 1 && inner[qidx - 1].isWhite()){ 1112 | dst.url = inner[0 .. qidx].stripRight(); 1113 | immutable len = inner[qidx .. $].lastIndexOf('"'); 1114 | if( len == 0 ) return false; 1115 | assert(len > 0); 1116 | dst.title = inner[qidx + 1 .. qidx + len]; 1117 | } else { 1118 | dst.url = inner.stripRight(); 1119 | dst.title = null; 1120 | } 1121 | if (dst.url.startsWith("<") && dst.url.endsWith(">")) 1122 | dst.url = dst.url[1 .. $-1]; 1123 | pstr = pstr[cidx+1 .. $]; 1124 | } else { 1125 | if( pstr[0] == ' ' ) pstr = pstr[1 .. $]; 1126 | if( pstr[0] != '[' ) return false; 1127 | pstr = pstr[1 .. $]; 1128 | cidx = pstr.indexOfCT(']'); 1129 | if( cidx < 0 ) return false; 1130 | if( cidx == 0 ) refid = dst.text; 1131 | else refid = pstr[0 .. cidx]; 1132 | pstr = pstr[cidx+1 .. $]; 1133 | } 1134 | 1135 | 1136 | if( refid.length > 0 ){ 1137 | auto pr = toLower(refid) in linkrefs; 1138 | if( !pr ){ 1139 | // debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 1140 | return false; 1141 | } 1142 | dst.url = pr.url; 1143 | dst.title = pr.title; 1144 | } 1145 | 1146 | str = pstr; 1147 | return true; 1148 | } 1149 | 1150 | @safe unittest 1151 | { 1152 | static void testLink(string s, Link exp, in LinkRef[string] refs) 1153 | { 1154 | Link link; 1155 | assert(parseLink(s, link, refs), s); 1156 | assert(link == exp); 1157 | } 1158 | LinkRef[string] refs; 1159 | refs["ref"] = LinkRef("ref", "target", "title"); 1160 | 1161 | testLink(`[link](target)`, Link("link", "target"), null); 1162 | testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1163 | testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1164 | testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 1165 | 1166 | testLink(`[link](target)`, Link("link", "target"), null); 1167 | testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1168 | 1169 | testLink(`[link][ref]`, Link("link", "target", "title"), refs); 1170 | testLink(`[ref][]`, Link("ref", "target", "title"), refs); 1171 | 1172 | testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 1173 | testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 1174 | 1175 | testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 1176 | testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 1177 | 1178 | testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 1179 | testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 1180 | 1181 | testLink(`[link](target "")`, Link("link", "target", ""), null); 1182 | testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 1183 | 1184 | testLink(`[link]()`, Link("link", "target"), null); 1185 | 1186 | auto failing = [ 1187 | `text`, `[link](target`, `[link]target)`, `[link]`, 1188 | `[link(target)`, `link](target)`, `[link] (target)`, 1189 | `[link][noref]`, `[noref][]` 1190 | ]; 1191 | Link link; 1192 | foreach (s; failing) 1193 | assert(!parseLink(s, link, refs), s); 1194 | } 1195 | 1196 | private bool parseAutoLink(ref string str, ref string url) 1197 | pure @safe { 1198 | string pstr = str; 1199 | if( pstr.length < 3 ) return false; 1200 | if( pstr[0] != '<' ) return false; 1201 | pstr = pstr[1 .. $]; 1202 | auto cidx = pstr.indexOf('>'); 1203 | if( cidx < 0 ) return false; 1204 | url = pstr[0 .. cidx]; 1205 | if( anyOf(url, " \t") ) return false; 1206 | if( !anyOf(url, ":@") ) return false; 1207 | str = pstr[cidx+1 .. $]; 1208 | if( url.indexOf('@') > 0 ) url = "mailto:"~url; 1209 | return true; 1210 | } 1211 | 1212 | private LinkRef[string] scanForReferences(ref string[] lines) 1213 | pure @safe { 1214 | LinkRef[string] ret; 1215 | bool[size_t] reflines; 1216 | 1217 | // search for reference definitions: 1218 | // [refid] link "opt text" 1219 | // [refid] "opt text" 1220 | // "opt text", 'opt text', (opt text) 1221 | // line must not be indented 1222 | foreach( lnidx, ln; lines ){ 1223 | if( isLineIndented(ln) ) continue; 1224 | ln = strip(ln); 1225 | if( !ln.startsWith("[") ) continue; 1226 | ln = ln[1 .. $]; 1227 | 1228 | auto idx = () @trusted { return ln.indexOf("]:"); }(); 1229 | if( idx < 0 ) continue; 1230 | string refid = ln[0 .. idx]; 1231 | ln = stripLeft(ln[idx+2 .. $]); 1232 | 1233 | string url; 1234 | if( ln.startsWith("<") ){ 1235 | idx = ln.indexOfCT('>'); 1236 | if( idx < 0 ) continue; 1237 | url = ln[1 .. idx]; 1238 | ln = ln[idx+1 .. $]; 1239 | } else { 1240 | idx = ln.indexOfCT(' '); 1241 | if( idx > 0 ){ 1242 | url = ln[0 .. idx]; 1243 | ln = ln[idx+1 .. $]; 1244 | } else { 1245 | idx = ln.indexOfCT('\t'); 1246 | if( idx < 0 ){ 1247 | url = ln; 1248 | ln = ln[$ .. $]; 1249 | } else { 1250 | url = ln[0 .. idx]; 1251 | ln = ln[idx+1 .. $]; 1252 | } 1253 | } 1254 | } 1255 | ln = stripLeft(ln); 1256 | 1257 | string title; 1258 | if( ln.length >= 3 ){ 1259 | if( ln[0] == '(' && ln[$-1] == ')' || ln[0] == '\"' && ln[$-1] == '\"' || ln[0] == '\'' && ln[$-1] == '\'' ) 1260 | title = ln[1 .. $-1]; 1261 | } 1262 | 1263 | ret[toLower(refid)] = LinkRef(refid, url, title); 1264 | reflines[lnidx] = true; 1265 | 1266 | // debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1); 1267 | } 1268 | 1269 | // remove all lines containing references 1270 | auto nonreflines = appender!(string[])(); 1271 | nonreflines.reserve(lines.length); 1272 | foreach( i, ln; lines ) 1273 | if( i !in reflines ) 1274 | nonreflines.put(ln); 1275 | lines = nonreflines.data(); 1276 | 1277 | return ret; 1278 | } 1279 | 1280 | 1281 | /** 1282 | Generates an identifier suitable to use as within a URL. 1283 | 1284 | The resulting string will contain only ASCII lower case alphabetic or 1285 | numeric characters, as well as dashes (-). Every sequence of 1286 | non-alphanumeric characters will be replaced by a single dash. No dashes 1287 | will be at either the front or the back of the result string. 1288 | */ 1289 | auto asSlug(R)(R text) 1290 | if (isInputRange!R && is(typeof(R.init.front) == dchar)) 1291 | { 1292 | static struct SlugRange { 1293 | private { 1294 | R _input; 1295 | bool _dash; 1296 | } 1297 | 1298 | this(R input) 1299 | { 1300 | _input = input; 1301 | skipNonAlphaNum(); 1302 | } 1303 | 1304 | @property bool empty() const { return _dash ? false : _input.empty; } 1305 | @property char front() const { 1306 | if (_dash) return '-'; 1307 | 1308 | char r = cast(char)_input.front; 1309 | if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A')); 1310 | return r; 1311 | } 1312 | 1313 | void popFront() 1314 | { 1315 | if (_dash) { 1316 | _dash = false; 1317 | return; 1318 | } 1319 | 1320 | _input.popFront(); 1321 | auto na = skipNonAlphaNum(); 1322 | if (na && !_input.empty) 1323 | _dash = true; 1324 | } 1325 | 1326 | private bool skipNonAlphaNum() 1327 | { 1328 | bool have_skipped = false; 1329 | while (!_input.empty) { 1330 | switch (_input.front) { 1331 | default: 1332 | _input.popFront(); 1333 | have_skipped = true; 1334 | break; 1335 | case 'a': .. case 'z': 1336 | case 'A': .. case 'Z': 1337 | case '0': .. case '9': 1338 | return have_skipped; 1339 | } 1340 | } 1341 | return have_skipped; 1342 | } 1343 | } 1344 | return SlugRange(text); 1345 | } 1346 | 1347 | unittest { 1348 | import std.algorithm : equal; 1349 | assert("".asSlug.equal("")); 1350 | assert(".,-".asSlug.equal("")); 1351 | assert("abc".asSlug.equal("abc")); 1352 | assert("aBc123".asSlug.equal("abc123")); 1353 | assert("....aBc...123...".asSlug.equal("abc-123")); 1354 | } 1355 | 1356 | private struct LinkRef { 1357 | string id; 1358 | string url; 1359 | string title; 1360 | } 1361 | 1362 | private struct Link { 1363 | string text; 1364 | string url; 1365 | string title; 1366 | } 1367 | 1368 | @safe unittest { // alt and title attributes 1369 | assert(filterMarkdown("![alt](http://example.org/image)") 1370 | == "

    \"alt\"\n

    \n"); 1371 | assert(filterMarkdown("![alt](http://example.org/image \"Title\")") 1372 | == "

    \"alt\"\n

    \n"); 1373 | } 1374 | 1375 | @safe unittest { // complex links 1376 | assert(filterMarkdown("their [install\ninstructions]() and") 1377 | == "

    their install\ninstructions and\n

    \n"); 1378 | assert(filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)") 1379 | == "

    \"Build\n

    \n"); 1380 | } 1381 | 1382 | @safe unittest { // check CTFE-ability 1383 | enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar"); 1384 | assert(res == "

    some markdown

    \n

    foo\n

    \n", res); 1385 | } 1386 | 1387 | @safe unittest { // correct line breaks in restrictive mode 1388 | auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault); 1389 | assert(res == "

    hello
    world\n

    \n", res); 1390 | } 1391 | 1392 | /*@safe unittest { // code blocks and blockquotes 1393 | assert(filterMarkdown("\tthis\n\tis\n\tcode") == 1394 | "
    this\nis\ncode
    \n"); 1395 | assert(filterMarkdown(" this\n is\n code") == 1396 | "
    this\nis\ncode
    \n"); 1397 | assert(filterMarkdown(" this\n is\n\tcode") == 1398 | "
    this\nis
    \n
    code
    \n"); 1399 | assert(filterMarkdown("\tthis\n\n\tcode") == 1400 | "
    this\n\ncode
    \n"); 1401 | assert(filterMarkdown("\t> this") == 1402 | "
    > this
    \n"); 1403 | assert(filterMarkdown("> this") == 1404 | "
    this
    \n"); 1405 | assert(filterMarkdown("> this\n is code") == 1406 | "
    this\nis code
    \n"); 1407 | }*/ 1408 | 1409 | @safe unittest { // test simple border-less table 1410 | auto res = filterMarkdown( 1411 | "Col 1 | Col 2 | Col 3\n -- | -- | --\n val 1 | val 2 | val 3\n *val 4* | val 5 | value 6", 1412 | MarkdownFlags.supportTables 1413 | ); 1414 | assert(res == "\n\n\n\n
    Col 1Col 2Col 3
    val 1val 2val 3
    val 4val 5value 6
    \n", res); 1415 | } 1416 | 1417 | @safe unittest { // test simple border'ed table 1418 | auto res = filterMarkdown( 1419 | "| Col 1 | Col 2 | Col 3 |\n| -- | -- | -- |\n| val 1 | val 2 | val 3 |\n| *val 4* | val 5 | value 6 |", 1420 | MarkdownFlags.supportTables 1421 | ); 1422 | assert(res == "\n\n\n\n
    Col 1Col 2Col 3
    val 1val 2val 3
    val 4val 5value 6
    \n", res); 1423 | } 1424 | 1425 | @safe unittest { 1426 | import std.stdio; 1427 | 1428 | string input = ` 1429 | Table: 1430 | 1431 | ID | Name | Address 1432 | - | ---- | --------- 1433 | 1 | Foo | Somewhere 1434 | 2 | Bar | Nowhere `; 1435 | auto res = filterMarkdown(input, MarkdownFlags.supportTables); 1436 | writeln("==========", input, "=====", res); 1437 | assert(res == "

    Table:\n

    \n\n\n\n\n
    IDNameAddress
    1FooSomewhere
    2BarNowhere
    \n", res); 1438 | } 1439 | -------------------------------------------------------------------------------- /source/dmarkdown/package.d: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright: © 2012-2014 RejectedSoftware e.K. 3 | License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 4 | Authors: Sönke Ludwig 5 | */ 6 | module dmarkdown; 7 | 8 | public import dmarkdown.markdown; 9 | -------------------------------------------------------------------------------- /source/dmarkdown/string.d: -------------------------------------------------------------------------------- 1 | /** 2 | Utility functions for string processing 3 | Copyright: © 2012-2014 RejectedSoftware e.K. 4 | License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 5 | Authors: Sönke Ludwig 6 | */ 7 | module dmarkdown.string; 8 | 9 | public import std.string; 10 | 11 | import std.algorithm; 12 | import std.array; 13 | import std.ascii; 14 | import std.format; 15 | import std.uni; 16 | import std.utf; 17 | import core.exception; 18 | 19 | 20 | package: 21 | 22 | /** 23 | Checks if all characters in 'str' are contained in 'chars'. 24 | */ 25 | bool allOf(string str, string chars) 26 | @safe pure { 27 | foreach (dchar ch; str) 28 | if (!chars.canFind(ch)) 29 | return false; 30 | return true; 31 | } 32 | 33 | ptrdiff_t indexOfCT(Char)(in Char[] s, dchar c, CaseSensitive cs = CaseSensitive.yes) 34 | @safe pure { 35 | if (__ctfe) { 36 | if (cs == CaseSensitive.yes) { 37 | foreach (i, dchar ch; s) 38 | if (ch == c) 39 | return i; 40 | } else { 41 | c = std.uni.toLower(c); 42 | foreach (i, dchar ch; s) 43 | if (std.uni.toLower(ch) == c) 44 | return i; 45 | } 46 | return -1; 47 | } else return std.string.indexOf(s, c, cs); 48 | } 49 | 50 | /** 51 | Checks if any character in 'str' is contained in 'chars'. 52 | */ 53 | bool anyOf(string str, string chars) 54 | @safe pure { 55 | foreach (ch; str) 56 | if (chars.canFind(ch)) 57 | return true; 58 | return false; 59 | } 60 | 61 | /** 62 | Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{'). 63 | 64 | Params: 65 | str = input string 66 | nested = whether to skip nested brackets 67 | Returns: 68 | The index of the closing bracket or -1 for unbalanced strings 69 | and strings that don't start with a bracket. 70 | */ 71 | sizediff_t matchBracket(string str, bool nested = true) 72 | @safe pure nothrow { 73 | if (str.length < 2) return -1; 74 | 75 | char open = str[0], close = void; 76 | switch (str[0]) { 77 | case '[': close = ']'; break; 78 | case '(': close = ')'; break; 79 | case '<': close = '>'; break; 80 | case '{': close = '}'; break; 81 | default: return -1; 82 | } 83 | 84 | size_t level = 1; 85 | foreach (i, char c; str[1 .. $]) { 86 | if (nested && c == open) ++level; 87 | else if (c == close) --level; 88 | if (level == 0) return i + 1; 89 | } 90 | return -1; 91 | } 92 | --------------------------------------------------------------------------------