[^\r]+?<\/pre>)/gm,function(_88,m1){ 294 | var pre=m1; 295 | pre=pre.replace(/^ /mg,"~0"); 296 | pre=pre.replace(/~0/g,""); 297 | return pre; 298 | }); 299 | return _1c("

\n"+bq+"\n

"); 300 | }); 301 | return _84; 302 | }; 303 | var _20=function(_8b){ 304 | _8b=_8b.replace(/^\n+/g,""); 305 | _8b=_8b.replace(/\n+$/g,""); 306 | var _8c=_8b.split(/\n{2,}/g); 307 | var _8d=new Array(); 308 | var end=_8c.length; 309 | for(var i=0;i=0){ 312 | _8d.push(str); 313 | }else{ 314 | if(str.search(/\S/)>=0){ 315 | str=_21(str); 316 | str=str.replace(/^([ \t]*)/g,"

"); 317 | str+="

"; 318 | _8d.push(str); 319 | } 320 | } 321 | } 322 | end=_8d.length; 323 | for(var i=0;i=0){ 325 | var _91=_3[RegExp.$1]; 326 | _91=_91.replace(/\$/g,"$$$$"); 327 | _8d[i]=_8d[i].replace(/~K\d+K/,_91); 328 | } 329 | } 330 | return _8d.join("\n\n"); 331 | }; 332 | var _11=function(_92){ 333 | _92=_92.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&"); 334 | _92=_92.replace(/<(?![a-z\/?\$!])/gi,"<"); 335 | return _92; 336 | }; 337 | var _25=function(_93){ 338 | _93=_93.replace(/\$\$/g,_94); 339 | _93=_93.replace(/\\([`*_{}\[\]()>#+-.!])/g,_94); 340 | return _93; 341 | }; 342 | var _28=function(_95){ 343 | _95=_95.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"$1"); 344 | _95=_95.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,function(_96,m1){ 345 | return _98(_a(m1)); 346 | }); 347 | return _95; 348 | }; 349 | var _98=function(_99){ 350 | function char2hex(ch){ 351 | var _9b="0123456789ABCDEF"; 352 | var dec=ch.charCodeAt(0); 353 | return (_9b.charAt(dec>>4)+_9b.charAt(dec&15)); 354 | } 355 | var _9d=[function(ch){ 356 | return "&#"+ch.charCodeAt(0)+";"; 357 | },function(ch){ 358 | return "&#x"+char2hex(ch)+";"; 359 | },function(ch){ 360 | return ch; 361 | }]; 362 | _99="mailto:"+_99; 363 | _99=_99.replace(/./g,function(ch){ 364 | if(ch=="@"){ 365 | ch=_9d[Math.floor(Math.random()*2)](ch); 366 | }else{ 367 | if(ch!=":"){ 368 | var r=Math.random(); 369 | ch=(r>0.9?_9d[2](ch):r>0.45?_9d[1](ch):_9d[0](ch)); 370 | } 371 | } 372 | return ch; 373 | }); 374 | _99=""+_99+""; 375 | _99=_99.replace(/">.+:/g,"\">"); 376 | return _99; 377 | }; 378 | var _a=function(_a3){ 379 | _a3=_a3.replace(/~E(\d+)E/g,function(_a4,m1){ 380 | var _a6=parseInt(m1); 381 | return String.fromCharCode(_a6); 382 | }); 383 | return _a3; 384 | }; 385 | var _72=function(_a7){ 386 | _a7=_a7.replace(/^(\t|[ ]{1,4})/gm,"~0"); 387 | _a7=_a7.replace(/~0/g,""); 388 | return _a7; 389 | }; 390 | var _6=function(_a8){ 391 | _a8=_a8.replace(/\t(?=\t)/g," "); 392 | _a8=_a8.replace(/\t/g,"~A~B"); 393 | _a8=_a8.replace(/~B(.+?)~A/g,function(_a9,m1,m2){ 394 | var _ac=m1; 395 | var _ad=4-_ac.length%4; 396 | for(var i=0;i<_ad;i++){ 397 | _ac+=" "; 398 | } 399 | return _ac; 400 | }); 401 | _a8=_a8.replace(/~A/g," "); 402 | _a8=_a8.replace(/~B/g,""); 403 | return _a8; 404 | }; 405 | var _2e=function(_af,_b0,_b1){ 406 | var _b2="(["+_b0.replace(/([\[\]\\])/g,"\\$1")+"])"; 407 | if(_b1){ 408 | _b2="\\\\"+_b2; 409 | } 410 | var _b3=new RegExp(_b2,"g"); 411 | _af=_af.replace(_b3,_94); 412 | return _af; 413 | }; 414 | var _94=function(_b4,m1){ 415 | var _b6=m1.charCodeAt(0); 416 | return "~E"+_b6+"E"; 417 | }; 418 | }; 419 | 420 | -------------------------------------------------------------------------------- /lib/vendor/showdown/example/showdown-gui.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Showdown - Markdown in Javascript 6 | 7 | 8 | 243 | 244 | 245 | 246 | 247 |

248 |

Showdown

249 |

a javascript port of Markdown

250 |

251 | 252 |

253 |

254 | Input 255 |

256 |

Using this tool
257 | ---------------
258 | 
259 | This page lets you create HTML by entering text in a simple format that's easy to read and write.
260 | 
261 |   - Type Markdown text in the left window
262 |   - See the HTML in the right
263 | 
264 | Markdown is a lightweight markup language based on the formatting conventions that people naturally use in email.  As [John Gruber] writes on the [Markdown site] [1]:
265 | 
266 | > The overriding design goal for Markdown's
267 | > formatting syntax is to make it as readable 
268 | > as possible. The idea is that a
269 | > Markdown-formatted document should be
270 | > publishable as-is, as plain text, without
271 | > looking like it's been marked up with tags
272 | > or formatting instructions.
273 | 
274 | This document is written in Markdown; you can see the plain-text version on the left.  To get a feel for Markdown's syntax, type some text into the left window and watch the results in the right.  You can see a Markdown syntax guide by switching the right-hand window from *Preview* to *Syntax Guide*.
275 | 
276 | Showdown is a Javascript port of Markdown.  You can get the full [source code] by clicking on the version number at the bottom of the page.
277 | 
278 | **Start with a [blank page] or edit this document in the left window.**
279 | 
280 |   [john gruber]: http://daringfireball.net/
281 |   [1]: http://daringfireball.net/projects/markdown/
282 |   [source code]: http://www.attacklab.net/showdown-v0.9.zip
283 |   [blank page]: ?blank=1 "Clear all text"
284 | 
285 |

286 |

287 | 288 |

289 |

290 | 295 |

296 | 297 | 298 | 299 |

300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 |

309 | 
310 | Markdown Syntax Guide
311 | =====================
312 | 
313 | This is an overview of Markdown's syntax.  For more information, visit the [Markdown web site].
314 | 
315 |  [Markdown web site]:
316 |    http://daringfireball.net/projects/markdown/
317 | 
318 | 
319 | 
320 | 
321 | 
322 | 
323 | Italics and Bold
324 | ================
325 | 
326 | 
327 | *This is italicized*, and so is _this_.
328 | 
329 | **This is bold**, and so is __this__.
330 | 
331 | You can use ***italics and bold together*** if you ___have to___.
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | Links
339 | =====
340 | 
341 | 
342 | Simple links
343 | ------------
344 | 
345 | There are three ways to write links.  Each is easier to read than the last:
346 | 
347 | Here's an inline link to [Google](http://www.google.com/).
348 | Here's a reference-style link to [Google] [1].
349 | Here's a very readable link to [Yahoo!].
350 | 
351 |   [1]: http://www.google.com/
352 |   [yahoo!]: http://www.yahoo.com/
353 | 
354 | The link definitions can appear anywhere in the document -- before or after the place where you use them.  The link definition names (`1` and `Yahoo!`) can be any unique string, and are case-insensitive; `[Yahoo!]` is the same as `[YAHOO!]`.
355 | 
356 | 
357 | Advanced links: Title attributes
358 | --------------------------------
359 | 
360 | You can also add a `title` attribute to a link, which will show up when the user holds the mouse pointer it.  Title attributes are helpful if your link text is not descriptive enough to tell users where they're going.  (In reference links, you can use optionally parentheses for the link title instead of quotation marks.)
361 | 
362 | Here's a [poorly-named link](http://www.google.com/ "Google").
363 | Never write "[click here][^2]".
364 | Trust [me].
365 | 
366 |   [^2]: http://www.w3.org/QA/Tips/noClickHere
367 |         (Advice against the phrase "click here")
368 |   [me]: http://www.attacklab.net/ "Attacklab"
369 | 
370 | 
371 | Advanced links: Bare URLs
372 | -------------------------
373 | 
374 | You can write bare URLs by enclosing them in angle brackets:
375 | 
376 | My web site is at <http://www.attacklab.net>.
377 | 
378 | If you use this format for email addresses, Showdown will encode the address to make it harder for spammers to harvest.  Try it and look in the *HTML Output* pane to see the results:
379 | 
380 | Humans can read this, but most spam harvesting robots can't: <me@privacy.net>
381 | 
382 | 
383 | 
384 | 
385 | 
386 | 
387 | Headers
388 | =======
389 | 
390 | 
391 | There are two ways to do headers in Markdown.  (In these examples, Header 1 is the biggest, and Header 6 is the smallest.)
392 | 
393 | You can underline text to make the two top-level headers:
394 | 
395 | Header 1
396 | ========
397 | 
398 | Header 2
399 | --------
400 | 
401 | The number of `=` or `-` signs doesn't matter; you can get away with just one.  But using enough to underline the text makes your titles look better in plain text.
402 | 
403 | You can also use hash marks for all six levels of HTML headers:
404 | 
405 | # Header 1 #
406 | ## Header 2 ##
407 | ### Header 3 ###
408 | #### Header 4 ####
409 | ##### Header 5 #####
410 | ###### Header 6 ######
411 | 
412 | The closing `#` characters are optional.
413 | 
414 | 
415 | 
416 | 
417 | 
418 | 
419 | Horizontal Rules
420 | ================
421 | 
422 | 
423 | You can insert a horizontal rule by putting three or more hyphens, asterisks, or underscores on a line by themselves:
424 | 
425 | ---
426 | 
427 | *******
428 | ___
429 | 
430 | You can also use spaces between the characters:
431 | 
432 | -  -  -  -
433 | 
434 | All of these examples produce the same output.
435 | 
436 | 
437 | 
438 | 
439 | 
440 | 
441 | Lists
442 | =====
443 | 
444 | 
445 | Simple lists
446 | ------------
447 | 
448 | A bulleted list:
449 | 
450 | - You can use a minus sign for a bullet
451 | + Or plus sign
452 | * Or an asterisk
453 | 
454 | A numbered list:
455 | 
456 | 1. Numbered lists are easy
457 | 2. Markdown keeps track of the numbers for you
458 | 7. So this will be item 3.
459 | 
460 | A double-spaced list:
461 | 
462 | - This list gets wrapped in `<p>` tags
463 | 
464 | - So there will be extra space between items
465 | 
466 | 
467 | Advanced lists: Nesting
468 | -----------------------
469 | 
470 | You can put other Markdown blocks in a list; just indent four spaces for each nesting level.  So:
471 | 
472 | 1. Lists in a list item:
473 |     - Indented four spaces.
474 |         * indented eight spaces.
475 |     - Four spaces again.
476 | 
477 | 2.  Multiple paragraphs in a list items:
478 | 
479 |     It's best to indent the paragraphs four spaces
480 |     You can get away with three, but it can get
481 |     confusing when you nest other things.
482 |     Stick to four.
483 | 
484 |     We indented the first line an extra space to align
485 |     it with these paragraphs.  In real use, we might do
486 |     that to the entire list so that all items line up.
487 | 
488 |     This paragraph is still part of the list item, but it looks messy to humans.  So it's a good idea to wrap your nested paragraphs manually, as we did with the first two.
489 | 
490 | 3. Blockquotes in a list item:
491 | 
492 |     > Skip a line and
493 |     > indent the >'s four spaces.
494 | 
495 | 4. Preformatted text in a list item:
496 | 
497 |         Skip a line and indent eight spaces.
498 |         That's four spaces for the list
499 |         and four to trigger the code block.
500 | 
501 | 
502 | 
503 | 
504 | 
505 | 
506 | Blockquotes
507 | ===========
508 | 
509 | 
510 | Simple blockquotes
511 | ------------------
512 | 
513 | Blockquotes are indented:
514 | 
515 | > The syntax is based on the way email programs
516 | > usually do quotations. You don't need to hard-wrap
517 | > the paragraphs in your blockquotes, but it looks much nicer if you do.  Depends how lazy you feel.
518 | 
519 | 
520 | Advanced blockquotes: Nesting
521 | -----------------------------
522 | 
523 | You can put other Markdown blocks in a blockquote; just add a `>` followed by a space:
524 | 
525 | Parragraph breaks in a blockquote:
526 | 
527 | > The > on the blank lines is optional.
528 | > Include it or don't; Markdown doesn't care.
529 | >
530 | > But your plain text looks better to
531 | > humans if you include the extra `>`
532 | > between paragraphs.
533 | 
534 | 
535 | Blockquotes within a blockquote:
536 | 
537 | > A standard blockquote is indented
538 | > > A nested blockquote is indented more
539 | > > > > You can nest to any depth.
540 | 
541 | 
542 | Lists in a blockquote:
543 | 
544 | > - A list in a blockquote
545 | > - With a > and space in front of it
546 | >     * A sublist
547 | 
548 | Preformatted text in a blockquote:
549 | 
550 | >     Indent five spaces total.  The first
551 | >     one is part of the blockquote designator.
552 | 
553 | 
554 | 
555 | 
556 | 
557 | 
558 | Images
559 | ======
560 | 
561 | 
562 | Images are exactly like links, but they have an exclamation point in front of them:
563 | 
564 |  ![Valid XHTML] (http://w3.org/Icons/valid-xhtml10).
565 | 
566 | The word in square brackets is the alt text, which gets displayed if the browser can't show the image.  Be sure to include meaningful alt text for blind users' screen-reader software.
567 | 
568 | Just like links, images work with reference syntax and titles:
569 | 
570 |  This page is ![valid XHTML][checkmark].
571 | 
572 |  [checkmark]: http://w3.org/Icons/valid-xhtml10
573 |            "What are you smiling at?"
574 | 
575 | 
576 | **Note:**
577 | 
578 | Markdown does not currently support the shortest reference syntax for images:
579 | 
580 |   Here's a broken ![checkmark].
581 | 
582 | But you can use a slightly more verbose version of implicit reference names:
583 | 
584 |   This ![checkmark][] works.
585 | 
586 | The reference name (`valid icon`) is also used as the alt text.
587 | 
588 | 
589 | 
590 | 
591 | 
592 | 
593 | Inline HTML
594 | ===========
595 | 
596 | 
597 | If you need to do something that Markdown can't handle, you can always just use HTML:
598 | 
599 |  Strikethrough humor is <strike>funny</strike>.
600 | 
601 | Markdown is smart enough not to mangle your span-level HTML:
602 | 
603 | <u>Markdown works *fine* in here.</u>
604 | 
605 | Block-level HTML elments have a few restrictions:
606 | 
607 | 1. They must be separated from surrounding text by blank
608 |    lines.
609 | 2. The begin and end tags of the outermost block element
610 |    must not be indented.
611 | 3. You can't use Markdown within HTML blocks.
612 | 
613 | So:
614 | 
615 | <div style="background-color: lightgray">
616 |     You can <em>not</em> use Markdown in here.
617 | </div>
618 | 
619 | 
620 | 
621 | 
622 | 
623 | 
624 | Preformatted Text
625 | =================
626 | 
627 | 
628 | You can include preformatted text in a Markdown document.
629 | 
630 | To make a code block, indent four spaces:
631 | 
632 |     printf("goodbye world!");  /* his suicide note
633 |                                   was in C */
634 | 
635 | The text will be wrapped in `<pre>` and `<code>` tags, and the browser will display it in a monospaced typeface.  The first four spaces will be stripped off, but all other whitespace will be preserved.
636 | 
637 | You cannot use Markdown or HTML within a code block, which makes them a convenient way to show samples of Markdown or HTML syntax:
638 | 
639 |     <blink>
640 |        You would hate this if it weren't
641 |        wrapped in a code block.
642 |     </blink>
643 | 
644 | 
645 | 
646 | 
647 | 
648 | 
649 | Code Spans
650 | ==========
651 | 
652 | 
653 | You can make inline `<code>` tags by using code spans.  Use backticks to make a code span:
654 | 
655 |  Press the `<Tab>` key, then type a `$`.
656 | 
657 | (The backtick key is in the upper left corner of most keyboards.)
658 | 
659 | Like code blocks, code spans will be displayed in a monospaced typeface.  Markdown and HTML will not work within them:
660 | 
661 |  Markdown italicizes things like this: `I *love* it.`
662 | 
663 |  Don't use the `<font>` tag; use CSS instead.
664 | 
665 |

666 | 667 | 668 | 669 | 670 | 671 |

s around 211 | // "paragraphs" that are wrapped in non-block-level tags, such as anchors, 212 | // phrase emphasis, and spans. The list of tags we're looking for is 213 | // hard-coded: 214 | var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del" 215 | var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math" 216 | 217 | // First, look for nested blocks, e.g.: 218 | //

219 | //

220 | // tags for inner block must be indented. 221 | //

222 | //

223 | // 224 | // The outermost tags must start at the left margin for this to match, and 225 | // the inner nested divs must be indented. 226 | // We need to do this before the next, more liberal match, because the next 227 | // match will start at the first `

` and stop at the first `

`. 228 | 229 | // attacklab: This regex can be expensive when it fails. 230 | /* 231 | var text = text.replace(/ 232 | ( // save in $1 233 | ^ // start of line (with /m) 234 | <($block_tags_a) // start tag = $2 235 | \b // word break 236 | // attacklab: hack around khtml/pcre bug... 237 | [^\r]*?\n // any number of lines, minimally matching 238 | // the matching end tag 239 | [ \t]* // trailing spaces/tabs 240 | (?=\n+) // followed by a newline 241 | ) // attacklab: there are sentinel newlines at end of document 242 | /gm,function(){...}}; 243 | */ 244 | text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement); 245 | 246 | // 247 | // Now match more liberally, simply from `\n` to `\n` 248 | // 249 | 250 | /* 251 | var text = text.replace(/ 252 | ( // save in $1 253 | ^ // start of line (with /m) 254 | <($block_tags_b) // start tag = $2 255 | \b // word break 256 | // attacklab: hack around khtml/pcre bug... 257 | [^\r]*? // any number of lines, minimally matching 258 | .* // the matching end tag 259 | [ \t]* // trailing spaces/tabs 260 | (?=\n+) // followed by a newline 261 | ) // attacklab: there are sentinel newlines at end of document 262 | /gm,function(){...}}; 263 | */ 264 | text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement); 265 | 266 | // Special case just for

. It was easier to make a special case than 267 | // to make the other regex more complicated. 268 | 269 | /* 270 | text = text.replace(/ 271 | ( // save in $1 272 | \n\n // Starting after a blank line 273 | [ ]{0,3} 274 | (<(hr) // start tag = $2 275 | \b // word break 276 | ([^<>])*? // 277 | \/?>) // the matching end tag 278 | [ \t]* 279 | (?=\n{2,}) // followed by a blank line 280 | ) 281 | /g,hashElement); 282 | */ 283 | text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement); 284 | 285 | // Special case for standalone HTML comments: 286 | 287 | /* 288 | text = text.replace(/ 289 | ( // save in $1 290 | \n\n // Starting after a blank line 291 | [ ]{0,3} // attacklab: g_tab_width - 1 292 | 295 | [ \t]* 296 | (?=\n{2,}) // followed by a blank line 297 | ) 298 | /g,hashElement); 299 | */ 300 | text = text.replace(/(\n\n[ ]{0,3}[ \t]*(?=\n{2,}))/g,hashElement); 301 | 302 | // PHP and ASP-style processor instructions ( and <%...%>) 303 | 304 | /* 305 | text = text.replace(/ 306 | (?: 307 | \n\n // Starting after a blank line 308 | ) 309 | ( // save in $1 310 | [ ]{0,3} // attacklab: g_tab_width - 1 311 | (?: 312 | <([?%]) // $2 313 | [^\r]*? 314 | \2> 315 | ) 316 | [ \t]* 317 | (?=\n{2,}) // followed by a blank line 318 | ) 319 | /g,hashElement); 320 | */ 321 | text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement); 322 | 323 | // attacklab: Undo double lines (see comment at top of this function) 324 | text = text.replace(/\n\n/g,"\n"); 325 | return text; 326 | } 327 | 328 | var hashElement = function(wholeMatch,m1) { 329 | var blockText = m1; 330 | 331 | // Undo double lines 332 | blockText = blockText.replace(/\n\n/g,"\n"); 333 | blockText = blockText.replace(/^\n/,""); 334 | 335 | // strip trailing blank lines 336 | blockText = blockText.replace(/\n+$/g,""); 337 | 338 | // Replace the element text with a marker ("~KxK" where x is its key) 339 | blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n"; 340 | 341 | return blockText; 342 | }; 343 | 344 | var _RunBlockGamut = function(text) { 345 | // 346 | // These are all the transformations that form block-level 347 | // tags like paragraphs, headers, and list items. 348 | // 349 | text = _DoHeaders(text); 350 | 351 | // Do Horizontal Rules: 352 | var key = hashBlock("

"); 353 | text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key); 354 | text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key); 355 | text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key); 356 | 357 | text = _DoLists(text); 358 | text = _DoCodeBlocks(text); 359 | text = _DoBlockQuotes(text); 360 | 361 | // We already ran _HashHTMLBlocks() before, in Markdown(), but that 362 | // was to escape raw HTML in the original Markdown source. This time, 363 | // we're escaping the markup we've just created, so that we don't wrap 364 | //

" + _RunSpanGamut(m1) + "

");}); 668 | 669 | // atx-style headers: 670 | // # Header 1 671 | // ## Header 2 672 | // ## Header 2 with closing hashes ## 673 | // ... 674 | // ###### Header 6 675 | // 676 | 677 | /* 678 | text = text.replace(/ 679 | ^(\#{1,6}) // $1 = string of #'s 680 | [ \t]* 681 | (.+?) // $2 = Header text 682 | [ \t]* 683 | \#* // optional closing #'s (not counted) 684 | \n+ 685 | /gm, function() {...}); 686 | */ 687 | 688 | text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm, 689 | function(wholeMatch,m1,m2) { 690 | var h_level = m1.length; 691 | return hashBlock("" + _RunSpanGamut(m2) + ""); 692 | }); 693 | 694 | return text; 695 | } 696 | 697 | // This declaration keeps Dojo compressor from outputting garbage: 698 | var _ProcessListItems; 699 | 700 | var _DoLists = function(text) { 701 | // 702 | // Form HTML ordered (numbered) and unordered (bulleted) lists. 703 | // 704 | 705 | // attacklab: add sentinel to hack around khtml/safari bug: 706 | // http://bugs.webkit.org/show_bug.cgi?id=11231 707 | text += "~0"; 708 | 709 | // Re-usable pattern to match any entirel ul or ol list: 710 | 711 | /* 712 | var whole_list = / 713 | ( // $1 = whole list 714 | ( // $2 715 | [ ]{0,3} // attacklab: g_tab_width - 1 716 | ([*+-]|\d+[.]) // $3 = first list item marker 717 | [ \t]+ 718 | ) 719 | [^\r]+? 720 | ( // $4 721 | ~0 // sentinel for workaround; should be $ 722 | | 723 | \n{2,} 724 | (?=\S) 725 | (?! // Negative lookahead for another list item marker 726 | [ \t]* 727 | (?:[*+-]|\d+[.])[ \t]+ 728 | ) 729 | ) 730 | )/g 731 | */ 732 | var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm; 733 | 734 | if (g_list_level) { 735 | text = text.replace(whole_list,function(wholeMatch,m1,m2) { 736 | var list = m1; 737 | var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol"; 738 | 739 | // Turn double returns into triple returns, so that we can make a 740 | // paragraph for the last item in a list, if necessary: 741 | list = list.replace(/\n{2,}/g,"\n\n\n");; 742 | var result = _ProcessListItems(list); 743 | 744 | // Trim any trailing whitespace, to put the closing `` 745 | // up on the preceding line, to get it past the current stupid 746 | // HTML block parser. This is a hack to work around the terrible 747 | // hack that is the HTML block parser. 748 | result = result.replace(/\s+$/,""); 749 | result = "<"+list_type+">" + result + "\n"; 750 | return result; 751 | }); 752 | } else { 753 | whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g; 754 | text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) { 755 | var runup = m1; 756 | var list = m2; 757 | 758 | var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol"; 759 | // Turn double returns into triple returns, so that we can make a 760 | // paragraph for the last item in a list, if necessary: 761 | var list = list.replace(/\n{2,}/g,"\n\n\n");; 762 | var result = _ProcessListItems(list); 763 | result = runup + "<"+list_type+">\n" + result + "\n"; 764 | return result; 765 | }); 766 | } 767 | 768 | // attacklab: strip sentinel 769 | text = text.replace(/~0/,""); 770 | 771 | return text; 772 | } 773 | 774 | _ProcessListItems = function(list_str) { 775 | // 776 | // Process the contents of a single ordered or unordered list, splitting it 777 | // into individual list items. 778 | // 779 | // The $g_list_level global keeps track of when we're inside a list. 780 | // Each time we enter a list, we increment it; when we leave a list, 781 | // we decrement. If it's zero, we're not in a list anymore. 782 | // 783 | // We do this because when we're not inside a list, we want to treat 784 | // something like this: 785 | // 786 | // I recommend upgrading to version 787 | // 8. Oops, now this line is treated 788 | // as a sub-list. 789 | // 790 | // As a single paragraph, despite the fact that the second line starts 791 | // with a digit-period-space sequence. 792 | // 793 | // Whereas when we're inside a list (or sub-list), that line will be 794 | // treated as the start of a sub-list. What a kludge, huh? This is 795 | // an aspect of Markdown's syntax that's hard to parse perfectly 796 | // without resorting to mind-reading. Perhaps the solution is to 797 | // change the syntax rules such that sub-lists must start with a 798 | // starting cardinal number; e.g. "1." or "a.". 799 | 800 | g_list_level++; 801 | 802 | // trim trailing blank lines: 803 | list_str = list_str.replace(/\n{2,}$/,"\n"); 804 | 805 | // attacklab: add sentinel to emulate \z 806 | list_str += "~0"; 807 | 808 | /* 809 | list_str = list_str.replace(/ 810 | (\n)? // leading line = $1 811 | (^[ \t]*) // leading whitespace = $2 812 | ([*+-]|\d+[.]) [ \t]+ // list marker = $3 813 | ([^\r]+? // list item text = $4 814 | (\n{1,2})) 815 | (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+)) 816 | /gm, function(){...}); 817 | */ 818 | list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm, 819 | function(wholeMatch,m1,m2,m3,m4){ 820 | var item = m4; 821 | var leading_line = m1; 822 | var leading_space = m2; 823 | 824 | if (leading_line || (item.search(/\n{2,}/)>-1)) { 825 | item = _RunBlockGamut(_Outdent(item)); 826 | } 827 | else { 828 | // Recursion for sub-lists: 829 | item = _DoLists(_Outdent(item)); 830 | item = item.replace(/\n$/,""); // chomp(item) 831 | item = _RunSpanGamut(item); 832 | } 833 | 834 | return "

" + item + "

\n"; 835 | } 836 | ); 837 | 838 | // attacklab: strip sentinel 839 | list_str = list_str.replace(/~0/g,""); 840 | 841 | g_list_level--; 842 | return list_str; 843 | } 844 | 845 | 846 | var _DoCodeBlocks = function(text) { 847 | // 848 | // Process Markdown `

` blocks.
 849 | //  
 850 | 
 851 | 	/*
 852 | 		text = text.replace(text,
 853 | 			/(?:\n\n|^)
 854 | 			(								// $1 = the code block -- one or more lines, starting with a space/tab
 855 | 				(?:
 856 | 					(?:[ ]{4}|\t)			// Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
 857 | 					.*\n+
 858 | 				)+
 859 | 			)
 860 | 			(\n*[ ]{0,3}[^ \t\n]|(?=~0))	// attacklab: g_tab_width
 861 | 		/g,function(){...});
 862 | 	*/
 863 | 
 864 | 	// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
 865 | 	text += "~0";
 866 | 	
 867 | 	text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
 868 | 		function(wholeMatch,m1,m2) {
 869 | 			var codeblock = m1;
 870 | 			var nextChar = m2;
 871 | 		
 872 | 			codeblock = _EncodeCode( _Outdent(codeblock));
 873 | 			codeblock = _Detab(codeblock);
 874 | 			codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
 875 | 			codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
 876 | 
 877 | 			codeblock = "" + codeblock + "\n";
 878 | 
 879 | 			return hashBlock(codeblock) + nextChar;
 880 | 		}
 881 | 	);
 882 | 
 883 | 	// attacklab: strip sentinel
 884 | 	text = text.replace(/~0/,"");
 885 | 
 886 | 	return text;
 887 | }
 888 | 
 889 | var hashBlock = function(text) {
 890 | 	text = text.replace(/(^\n+|\n+$)/g,"");
 891 | 	return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
 892 | }
 893 | 
 894 | 
 895 | var _DoCodeSpans = function(text) {
 896 | //
 897 | //   *  Backtick quotes are used for  spans.
 898 | // 
 899 | //   *  You can use multiple backticks as the delimiters if you want to
 900 | //	 include literal backticks in the code span. So, this input:
 901 | //	 
 902 | //		 Just type ``foo `bar` baz`` at the prompt.
 903 | //	 
 904 | //	   Will translate to:
 905 | //	 
 906 | //		 Just type foo `bar` baz at the prompt.
 907 | //	 
 908 | //	There's no arbitrary limit to the number of backticks you
 909 | //	can use as delimters. If you need three consecutive backticks
 910 | //	in your code, use four for delimiters, etc.
 911 | //
 912 | //  *  You can use spaces to get literal backticks at the edges:
 913 | //	 
 914 | //		 ... type `` `bar` `` ...
 915 | //	 
 916 | //	   Turns to:
 917 | //	 
 918 | //		 ... type `bar` ...
 919 | //
 920 | 
 921 | 	/*
 922 | 		text = text.replace(/
 923 | 			(^|[^\\])					// Character before opening ` can't be a backslash
 924 | 			(`+)						// $2 = Opening run of `
 925 | 			(							// $3 = The code block
 926 | 				[^\r]*?
 927 | 				[^`]					// attacklab: work around lack of lookbehind
 928 | 			)
 929 | 			\2							// Matching closer
 930 | 			(?!`)
 931 | 		/gm, function(){...});
 932 | 	*/
 933 | 
 934 | 	text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
 935 | 		function(wholeMatch,m1,m2,m3,m4) {
 936 | 			var c = m3;
 937 | 			c = c.replace(/^([ \t]*)/g,"");	// leading whitespace
 938 | 			c = c.replace(/[ \t]*$/g,"");	// trailing whitespace
 939 | 			c = _EncodeCode(c);
 940 | 			return m1+""+c+"";
 941 | 		});
 942 | 
 943 | 	return text;
 944 | }
 945 | 
 946 | 
 947 | var _EncodeCode = function(text) {
 948 | //
 949 | // Encode/escape certain characters inside Markdown code runs.
 950 | // The point is that in code, these characters are literals,
 951 | // and lose their special Markdown meanings.
 952 | //
 953 | 	// Encode all ampersands; HTML entities are not
 954 | 	// entities within a Markdown code span.
 955 | 	text = text.replace(/&/g,"&");
 956 | 
 957 | 	// Do the angle bracket song and dance:
 958 | 	text = text.replace(//g,">");
 960 | 
 961 | 	// Now, escape characters that are magic in Markdown:
 962 | 	text = escapeCharacters(text,"\*_{}[]\\",false);
 963 | 
 964 | // jj the line above breaks this:
 965 | //---
 966 | 
 967 | //* Item
 968 | 
 969 | //   1. Subitem
 970 | 
 971 | //            special char: *
 972 | //---
 973 | 
 974 | 	return text;
 975 | }
 976 | 
 977 | 
 978 | var _DoItalicsAndBold = function(text) {
 979 | 
 980 | 	//  must go first:
 981 | 	text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
 982 | 		"$2");
 983 | 
 984 | 	text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
 985 | 		"$2");
 986 | 
 987 | 	return text;
 988 | }
 989 | 
 990 | 
 991 | var _DoBlockQuotes = function(text) {
 992 | 
 993 | 	/*
 994 | 		text = text.replace(/
 995 | 		(								// Wrap whole match in $1
 996 | 			(
 997 | 				^[ \t]*>[ \t]?			// '>' at the start of a line
 998 | 				.+\n					// rest of the first line
 999 | 				(.+\n)*					// subsequent consecutive lines
1000 | 				\n*						// blanks
1001 | 			)+
1002 | 		)
1003 | 		/gm, function(){...});
1004 | 	*/
1005 | 
1006 | 	text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1007 | 		function(wholeMatch,m1) {
1008 | 			var bq = m1;
1009 | 
1010 | 			// attacklab: hack around Konqueror 3.5.4 bug:
1011 | 			// "----------bug".replace(/^-/g,"") == "bug"
1012 | 
1013 | 			bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");	// trim one level of quoting
1014 | 
1015 | 			// attacklab: clean up hack
1016 | 			bq = bq.replace(/~0/g,"");
1017 | 
1018 | 			bq = bq.replace(/^[ \t]+$/gm,"");		// trim whitespace-only lines
1019 | 			bq = _RunBlockGamut(bq);				// recurse
1020 | 			
1021 | 			bq = bq.replace(/(^|\n)/g,"$1  ");
1022 | 			// These leading spaces screw with  content, so we need to fix that:
1023 | 			bq = bq.replace(
1024 | 					/(\s*[^\r]+?<\/pre>)/gm,
1025 | 				function(wholeMatch,m1) {
1026 | 					var pre = m1;
1027 | 					// attacklab: hack around Konqueror 3.5.4 bug:
1028 | 					pre = pre.replace(/^  /mg,"~0");
1029 | 					pre = pre.replace(/~0/g,"");
1030 | 					return pre;
1031 | 				});
1032 | 			
1033 | 			return hashBlock("\n" + bq + "\n");
1034 | 		});
1035 | 	return text;
1036 | }
1037 | 
1038 | 
1039 | var _FormParagraphs = function(text) {
1040 | //
1041 | //  Params:
1042 | //    $text - string to process with html  tags
1043 | //
1044 | 
1045 | 	// Strip leading and trailing lines:
1046 | 	text = text.replace(/^\n+/g,"");
1047 | 	text = text.replace(/\n+$/g,"");
1048 | 
1049 | 	var grafs = text.split(/\n{2,}/g);
1050 | 	var grafsOut = new Array();
1051 | 
1052 | 	//
1053 | 	// Wrap 
 tags.
1054 | 	//
1055 | 	var end = grafs.length;
1056 | 	for (var i=0; i= 0) {
1061 | 			grafsOut.push(str);
1062 | 		}
1063 | 		else if (str.search(/\S/) >= 0) {
1064 | 			str = _RunSpanGamut(str);
1065 | 			str = str.replace(/^([ \t]*)/g,"
");
1066 | 			str += ""
1067 | 			grafsOut.push(str);
1068 | 		}
1069 | 
1070 | 	}
1071 | 
1072 | 	//
1073 | 	// Unhashify HTML blocks
1074 | 	//
1075 | 	end = grafsOut.length;
1076 | 	for (var i=0; i= 0) {
1079 | 			var blockText = g_html_blocks[RegExp.$1];
1080 | 			blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1081 | 			grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1082 | 		}
1083 | 	}
1084 | 
1085 | 	return grafsOut.join("\n\n");
1086 | }
1087 | 
1088 | 
1089 | var _EncodeAmpsAndAngles = function(text) {
1090 | // Smart processing for ampersands and angle brackets that need to be encoded.
1091 | 	
1092 | 	// Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1093 | 	//   http://bumppo.net/projects/amputator/
1094 | 	text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&");
1095 | 	
1096 | 	// Encode naked <'s
1097 | 	text = text.replace(/<(?![a-z\/?\$!])/gi,"<");
1098 | 	
1099 | 	return text;
1100 | }
1101 | 
1102 | 
1103 | var _EncodeBackslashEscapes = function(text) {
1104 | //
1105 | //   Parameter:  String.
1106 | //   Returns:	The string, with after processing the following backslash
1107 | //			   escape sequences.
1108 | //
1109 | 
1110 | 	// attacklab: The polite way to do this is with the new
1111 | 	// escapeCharacters() function:
1112 | 	//
1113 | 	// 	text = escapeCharacters(text,"\\",true);
1114 | 	// 	text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1115 | 	//
1116 | 	// ...but we're sidestepping its use of the (slow) RegExp constructor
1117 | 	// as an optimization for Firefox.  This function gets called a LOT.
1118 | 
1119 | 	text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1120 | 	text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1121 | 	return text;
1122 | }
1123 | 
1124 | 
1125 | var _DoAutoLinks = function(text) {
1126 | 
1127 | 	text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"$1");
1128 | 
1129 | 	// Email addresses: 
1130 | 
1131 | 	/*
1132 | 		text = text.replace(/
1133 | 			<
1134 | 			(?:mailto:)?
1135 | 			(
1136 | 				[-.\w]+
1137 | 				\@
1138 | 				[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1139 | 			)
1140 | 			>
1141 | 		/gi, _DoAutoLinks_callback());
1142 | 	*/
1143 | 	text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1144 | 		function(wholeMatch,m1) {
1145 | 			return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1146 | 		}
1147 | 	);
1148 | 
1149 | 	return text;
1150 | }
1151 | 
1152 | 
1153 | var _EncodeEmailAddress = function(addr) {
1154 | //
1155 | //  Input: an email address, e.g. "foo@example.com"
1156 | //
1157 | //  Output: the email address as a mailto link, with each character
1158 | //	of the address encoded as either a decimal or hex entity, in
1159 | //	the hopes of foiling most address harvesting spam bots. E.g.:
1160 | //
1161 | //	foo
1163 | //	   @example.com
1164 | //
1165 | //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1166 | //  mailing list: 
1167 | //
1168 | 
1169 | 	// attacklab: why can't javascript speak hex?
1170 | 	function char2hex(ch) {
1171 | 		var hexDigits = '0123456789ABCDEF';
1172 | 		var dec = ch.charCodeAt(0);
1173 | 		return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1174 | 	}
1175 | 
1176 | 	var encode = [
1177 | 		function(ch){return "&#"+ch.charCodeAt(0)+";";},
1178 | 		function(ch){return "&#x"+char2hex(ch)+";";},
1179 | 		function(ch){return ch;}
1180 | 	];
1181 | 
1182 | 	addr = "mailto:" + addr;
1183 | 
1184 | 	addr = addr.replace(/./g, function(ch) {
1185 | 		if (ch == "@") {
1186 | 		   	// this *must* be encoded. I insist.
1187 | 			ch = encode[Math.floor(Math.random()*2)](ch);
1188 | 		} else if (ch !=":") {
1189 | 			// leave ':' alone (to spot mailto: later)
1190 | 			var r = Math.random();
1191 | 			// roughly 10% raw, 45% hex, 45% dec
1192 | 			ch =  (
1193 | 					r > .9  ?	encode[2](ch)   :
1194 | 					r > .45 ?	encode[1](ch)   :
1195 | 								encode[0](ch)
1196 | 				);
1197 | 		}
1198 | 		return ch;
1199 | 	});
1200 | 
1201 | 	addr = "" + addr + "";
1202 | 	addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1203 | 
1204 | 	return addr;
1205 | }
1206 | 
1207 | 
1208 | var _UnescapeSpecialChars = function(text) {
1209 | //
1210 | // Swap back in all the special characters we've hidden.
1211 | //
1212 | 	text = text.replace(/~E(\d+)E/g,
1213 | 		function(wholeMatch,m1) {
1214 | 			var charCodeToReplace = parseInt(m1);
1215 | 			return String.fromCharCode(charCodeToReplace);
1216 | 		}
1217 | 	);
1218 | 	return text;
1219 | }
1220 | 
1221 | 
1222 | var _Outdent = function(text) {
1223 | //
1224 | // Remove one level of line-leading tabs or spaces
1225 | //
1226 | 
1227 | 	// attacklab: hack around Konqueror 3.5.4 bug:
1228 | 	// "----------bug".replace(/^-/g,"") == "bug"
1229 | 
1230 | 	text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1231 | 
1232 | 	// attacklab: clean up hack
1233 | 	text = text.replace(/~0/g,"")
1234 | 
1235 | 	return text;
1236 | }
1237 | 
1238 | var _Detab = function(text) {
1239 | // attacklab: Detab's completely rewritten for speed.
1240 | // In perl we could fix it by anchoring the regexp with \G.
1241 | // In javascript we're less fortunate.
1242 | 
1243 | 	// expand first n-1 tabs
1244 | 	text = text.replace(/\t(?=\t)/g,"    "); // attacklab: g_tab_width
1245 | 
1246 | 	// replace the nth with two sentinels
1247 | 	text = text.replace(/\t/g,"~A~B");
1248 | 
1249 | 	// use the sentinel to anchor our regex so it doesn't explode
1250 | 	text = text.replace(/~B(.+?)~A/g,
1251 | 		function(wholeMatch,m1,m2) {
1252 | 			var leadingText = m1;
1253 | 			var numSpaces = 4 - leadingText.length % 4;  // attacklab: g_tab_width
1254 | 
1255 | 			// there *must* be a better way to do this:
1256 | 			for (var i=0; i
  11 | //
  12 | // Redistributable under a BSD-style open source license.
  13 | // See license.txt for more information.
  14 | //
  15 | // The full source distribution is at:
  16 | //
  17 | //				A A L
  18 | //				T C A
  19 | //				T K B
  20 | //
  21 | //   
  22 | //
  23 | 
  24 | //
  25 | // Wherever possible, Showdown is a straight, line-by-line port
  26 | // of the Perl version of Markdown.
  27 | //
  28 | // This is not a normal parser design; it's basically just a
  29 | // series of string substitutions.  It's hard to read and
  30 | // maintain this way,  but keeping Showdown close to the original
  31 | // design makes it easier to port new features.
  32 | //
  33 | // More importantly, Showdown behaves like markdown.pl in most
  34 | // edge cases.  So web applications can do client-side preview
  35 | // in Javascript, and then build identical HTML on the server.
  36 | //
  37 | // This port needs the new RegExp functionality of ECMA 262,
  38 | // 3rd Edition (i.e. Javascript 1.5).  Most modern web browsers
  39 | // should do fine.  Even with the new regular expression features,
  40 | // We do a lot of work to emulate Perl's regex functionality.
  41 | // The tricky changes in this file mostly have the "attacklab:"
  42 | // label.  Major or self-explanatory changes don't.
  43 | //
  44 | // Smart diff tools like Araxis Merge will be able to match up
  45 | // this file with markdown.pl in a useful way.  A little tweaking
  46 | // helps: in a copy of markdown.pl, replace "#" with "//" and
  47 | // replace "$text" with "text".  Be sure to ignore whitespace
  48 | // and line endings.
  49 | //
  50 | 
  51 | 
  52 | //
  53 | // Showdown usage:
  54 | //
  55 | //   var text = "Markdown *rocks*.";
  56 | //
  57 | //   var converter = new Showdown.converter();
  58 | //   var html = converter.makeHtml(text);
  59 | //
  60 | //   alert(html);
  61 | //
  62 | // Note: move the sample code to the bottom of this
  63 | // file before uncommenting it.
  64 | //
  65 | 
  66 | 
  67 | //
  68 | // Showdown namespace
  69 | //
  70 | this.Showdown = {};
  71 | 
  72 | //
  73 | // converter
  74 | //
  75 | // Wraps all "globals" so that the only thing
  76 | // exposed is makeHtml().
  77 | //
  78 | this.Showdown.converter = function() {
  79 | 
  80 | //
  81 | // Globals:
  82 | //
  83 | 
  84 | // Global hashes, used by various utility routines
  85 | var g_urls;
  86 | var g_titles;
  87 | var g_html_blocks;
  88 | 
  89 | // Used to track when we're inside an ordered or unordered list
  90 | // (see _ProcessListItems() for details):
  91 | var g_list_level = 0;
  92 | 
  93 | 
  94 | this.makeHtml = function(text) {
  95 | //
  96 | // Main function. The order in which other subs are called here is
  97 | // essential. Link and image substitutions need to happen before
  98 | // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the 
  99 | // and  tags get encoded.
 100 | //
 101 | 
 102 | 	// Clear the global hashes. If we don't clear these, you get conflicts
 103 | 	// from other articles when generating a page which contains more than
 104 | 	// one article (e.g. an index page that shows the N most recent
 105 | 	// articles):
 106 | 	g_urls = new Array();
 107 | 	g_titles = new Array();
 108 | 	g_html_blocks = new Array();
 109 | 
 110 | 	// attacklab: Replace ~ with ~T
 111 | 	// This lets us use tilde as an escape char to avoid md5 hashes
 112 | 	// The choice of character is arbitray; anything that isn't
 113 |     // magic in Markdown will work.
 114 | 	text = text.replace(/~/g,"~T");
 115 | 
 116 | 	// attacklab: Replace $ with ~D
 117 | 	// RegExp interprets $ as a special character
 118 | 	// when it's in a replacement string
 119 | 	text = text.replace(/\$/g,"~D");
 120 | 
 121 | 	// Standardize line endings
 122 | 	text = text.replace(/\r\n/g,"\n"); // DOS to Unix
 123 | 	text = text.replace(/\r/g,"\n"); // Mac to Unix
 124 | 
 125 | 	// Make sure text begins and ends with a couple of newlines:
 126 | 	text = "\n\n" + text + "\n\n";
 127 | 
 128 | 	// Convert all tabs to spaces.
 129 | 	text = _Detab(text);
 130 | 
 131 | 	// Strip any lines consisting only of spaces and tabs.
 132 | 	// This makes subsequent regexen easier to write, because we can
 133 | 	// match consecutive blank lines with /\n+/ instead of something
 134 | 	// contorted like /[ \t]*\n+/ .
 135 | 	text = text.replace(/^[ \t]+$/mg,"");
 136 | 
 137 | 	// Turn block-level HTML blocks into hash entries
 138 | 	text = _HashHTMLBlocks(text);
 139 | 
 140 | 	// Strip link definitions, store in hashes.
 141 | 	text = _StripLinkDefinitions(text);
 142 | 
 143 | 	text = _RunBlockGamut(text);
 144 | 
 145 | 	text = _UnescapeSpecialChars(text);
 146 | 
 147 | 	// attacklab: Restore dollar signs
 148 | 	text = text.replace(/~D/g,"$$");
 149 | 
 150 | 	// attacklab: Restore tildes
 151 | 	text = text.replace(/~T/g,"~");
 152 | 
 153 | 	return text;
 154 | }
 155 | 
 156 | 
 157 | var _StripLinkDefinitions = function(text) {
 158 | //
 159 | // Strips link definitions from text, stores the URLs and titles in
 160 | // hash references.
 161 | //
 162 | 
 163 | 	// Link defs are in the form: ^[id]: url "optional title"
 164 | 
 165 | 	/*
 166 | 		var text = text.replace(/
 167 | 				^[ ]{0,3}\[(.+)\]:  // id = $1  attacklab: g_tab_width - 1
 168 | 				  [ \t]*
 169 | 				  \n?				// maybe *one* newline
 170 | 				  [ \t]*
 171 | 				?			// url = $2
 172 | 				  [ \t]*
 173 | 				  \n?				// maybe one newline
 174 | 				  [ \t]*
 175 | 				(?:
 176 | 				  (\n*)				// any lines skipped = $3 attacklab: lookbehind removed
 177 | 				  ["(]
 178 | 				  (.+?)				// title = $4
 179 | 				  [")]
 180 | 				  [ \t]*
 181 | 				)?					// title is optional
 182 | 				(?:\n+|$)
 183 | 			  /gm,
 184 | 			  function(){...});
 185 | 	*/
 186 | 	var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm,
 187 | 		function (wholeMatch,m1,m2,m3,m4) {
 188 | 			m1 = m1.toLowerCase();
 189 | 			g_urls[m1] = _EncodeAmpsAndAngles(m2);  // Link IDs are case-insensitive
 190 | 			if (m3) {
 191 | 				// Oops, found blank lines, so it's not a title.
 192 | 				// Put back the parenthetical statement we stole.
 193 | 				return m3+m4;
 194 | 			} else if (m4) {
 195 | 				g_titles[m1] = m4.replace(/"/g,""");
 196 | 			}
 197 | 			
 198 | 			// Completely remove the definition from the text
 199 | 			return "";
 200 | 		}
 201 | 	);
 202 | 
 203 | 	return text;
 204 | }
 205 | 
 206 | 
 207 | var _HashHTMLBlocks = function(text) {
 208 | 	// attacklab: Double up blank lines to reduce lookaround
 209 | 	text = text.replace(/\n/g,"\n\n");
 210 | 
 211 | 	// Hashify HTML blocks:
 212 | 	// We only want to do this for block-level HTML tags, such as headers,
 213 | 	// lists, and tables. That's because we still want to wrap s around
 214 | 	// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 215 | 	// phrase emphasis, and spans. The list of tags we're looking for is
 216 | 	// hard-coded:
 217 | 	var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
 218 | 	var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
 219 | 
 220 | 	// First, look for nested blocks, e.g.:
 221 | 	//   

 222 | 	//     
 223 | 	//     tags for inner block must be indented.
 224 | 	//     
 225 | 	//   
 226 | 	//
 227 | 	// The outermost tags must start at the left margin for this to match, and
 228 | 	// the inner nested divs must be indented.
 229 | 	// We need to do this before the next, more liberal match, because the next
 230 | 	// match will start at the first `` and stop at the first ``.
 231 | 
 232 | 	// attacklab: This regex can be expensive when it fails.
 233 | 	/*
 234 | 		var text = text.replace(/
 235 | 		(						// save in $1
 236 | 			^					// start of line  (with /m)
 237 | 			<($block_tags_a)	// start tag = $2
 238 | 			\b					// word break
 239 | 								// attacklab: hack around khtml/pcre bug...
 240 | 			[^\r]*?\n			// any number of lines, minimally matching
 241 | 							// the matching end tag
 242 | 			[ \t]*				// trailing spaces/tabs
 243 | 			(?=\n+)				// followed by a newline
 244 | 		)						// attacklab: there are sentinel newlines at end of document
 245 | 		/gm,function(){...}};
 246 | 	*/
 247 | 	text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
 248 | 
 249 | 	//
 250 | 	// Now match more liberally, simply from `\n` to `\n`
 251 | 	//
 252 | 
 253 | 	/*
 254 | 		var text = text.replace(/
 255 | 		(						// save in $1
 256 | 			^					// start of line  (with /m)
 257 | 			<($block_tags_b)	// start tag = $2
 258 | 			\b					// word break
 259 | 								// attacklab: hack around khtml/pcre bug...
 260 | 			[^\r]*?				// any number of lines, minimally matching
 261 | 			.*				// the matching end tag
 262 | 			[ \t]*				// trailing spaces/tabs
 263 | 			(?=\n+)				// followed by a newline
 264 | 		)						// attacklab: there are sentinel newlines at end of document
 265 | 		/gm,function(){...}};
 266 | 	*/
 267 | 	text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
 268 | 
 269 | 	// Special case just for . It was easier to make a special case than
 270 | 	// to make the other regex more complicated.  
 271 | 
 272 | 	/*
 273 | 		text = text.replace(/
 274 | 		(						// save in $1
 275 | 			\n\n				// Starting after a blank line
 276 | 			[ ]{0,3}
 277 | 			(<(hr)				// start tag = $2
 278 | 			\b					// word break
 279 | 			([^<>])*?			// 
 280 | 			\/?>)				// the matching end tag
 281 | 			[ \t]*
 282 | 			(?=\n{2,})			// followed by a blank line
 283 | 		)
 284 | 		/g,hashElement);
 285 | 	*/
 286 | 	text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
 287 | 
 288 | 	// Special case for standalone HTML comments:
 289 | 
 290 | 	/*
 291 | 		text = text.replace(/
 292 | 		(						// save in $1
 293 | 			\n\n				// Starting after a blank line
 294 | 			[ ]{0,3}			// attacklab: g_tab_width - 1
 295 | 			
 298 | 			[ \t]*
 299 | 			(?=\n{2,})			// followed by a blank line
 300 | 		)
 301 | 		/g,hashElement);
 302 | 	*/
 303 | 	text = text.replace(/(\n\n[ ]{0,3}[ \t]*(?=\n{2,}))/g,hashElement);
 304 | 
 305 | 	// PHP and ASP-style processor instructions ( and <%...%>)
 306 | 
 307 | 	/*
 308 | 		text = text.replace(/
 309 | 		(?:
 310 | 			\n\n				// Starting after a blank line
 311 | 		)
 312 | 		(						// save in $1
 313 | 			[ ]{0,3}			// attacklab: g_tab_width - 1
 314 | 			(?:
 315 | 				<([?%])			// $2
 316 | 				[^\r]*?
 317 | 				\2>
 318 | 			)
 319 | 			[ \t]*
 320 | 			(?=\n{2,})			// followed by a blank line
 321 | 		)
 322 | 		/g,hashElement);
 323 | 	*/
 324 | 	text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
 325 | 
 326 | 	// attacklab: Undo double lines (see comment at top of this function)
 327 | 	text = text.replace(/\n\n/g,"\n");
 328 | 	return text;
 329 | }
 330 | 
 331 | var hashElement = function(wholeMatch,m1) {
 332 | 	var blockText = m1;
 333 | 
 334 | 	// Undo double lines
 335 | 	blockText = blockText.replace(/\n\n/g,"\n");
 336 | 	blockText = blockText.replace(/^\n/,"");
 337 | 	
 338 | 	// strip trailing blank lines
 339 | 	blockText = blockText.replace(/\n+$/g,"");
 340 | 	
 341 | 	// Replace the element text with a marker ("~KxK" where x is its key)
 342 | 	blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
 343 | 	
 344 | 	return blockText;
 345 | };
 346 | 
 347 | var _RunBlockGamut = function(text) {
 348 | //
 349 | // These are all the transformations that form block-level
 350 | // tags like paragraphs, headers, and list items.
 351 | //
 352 | 	text = _DoHeaders(text);
 353 | 
 354 | 	// Do Horizontal Rules:
 355 | 	var key = hashBlock("");
 356 | 	text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
 357 | 	text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key);
 358 | 	text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key);
 359 | 
 360 | 	text = _DoLists(text);
 361 | 	text = _DoCodeBlocks(text);
 362 | 	text = _DoBlockQuotes(text);
 363 | 
 364 | 	// We already ran _HashHTMLBlocks() before, in Markdown(), but that
 365 | 	// was to escape raw HTML in the original Markdown source. This time,
 366 | 	// we're escaping the markup we've just created, so that we don't wrap
 367 | 	//  tags around block-level tags.
 368 | 	text = _HashHTMLBlocks(text);
 369 | 	text = _FormParagraphs(text);
 370 | 
 371 | 	return text;
 372 | }
 373 | 
 374 | 
 375 | var _RunSpanGamut = function(text) {
 376 | //
 377 | // These are all the transformations that occur *within* block-level
 378 | // tags like paragraphs, headers, and list items.
 379 | //
 380 | 
 381 | 	text = _DoCodeSpans(text);
 382 | 	text = _EscapeSpecialCharsWithinTagAttributes(text);
 383 | 	text = _EncodeBackslashEscapes(text);
 384 | 
 385 | 	// Process anchor and image tags. Images must come first,
 386 | 	// because ![foo][f] looks like an anchor.
 387 | 	text = _DoImages(text);
 388 | 	text = _DoAnchors(text);
 389 | 
 390 | 	// Make links out of things like ``
 391 | 	// Must come after _DoAnchors(), because you can use < and >
 392 | 	// delimiters in inline links like [this]().
 393 | 	text = _DoAutoLinks(text);
 394 | 	text = _EncodeAmpsAndAngles(text);
 395 | 	text = _DoItalicsAndBold(text);
 396 | 
 397 | 	// Do hard breaks:
 398 | 	text = text.replace(/  +\n/g," 
\n");
 399 | 
 400 | 	return text;
 401 | }
 402 | 
 403 | var _EscapeSpecialCharsWithinTagAttributes = function(text) {
 404 | //
 405 | // Within tags -- meaning between < and > -- encode [\ ` * _] so they
 406 | // don't conflict with their use in Markdown for code, italics and strong.
 407 | //
 408 | 
 409 | 	// Build a regex to find HTML tags and comments.  See Friedl's 
 410 | 	// "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
 411 | 	var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|)/gi;
 412 | 
 413 | 	text = text.replace(regex, function(wholeMatch) {
 414 | 		var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
 415 | 		tag = escapeCharacters(tag,"\\`*_");
 416 | 		return tag;
 417 | 	});
 418 | 
 419 | 	return text;
 420 | }
 421 | 
 422 | var _DoAnchors = function(text) {
 423 | //
 424 | // Turn Markdown link shortcuts into XHTML  tags.
 425 | //
 426 | 	//
 427 | 	// First, handle reference-style links: [link text] [id]
 428 | 	//
 429 | 
 430 | 	/*
 431 | 		text = text.replace(/
 432 | 		(							// wrap whole match in $1
 433 | 			\[
 434 | 			(
 435 | 				(?:
 436 | 					\[[^\]]*\]		// allow brackets nested one level
 437 | 					|
 438 | 					[^\[]			// or anything else
 439 | 				)*
 440 | 			)
 441 | 			\]
 442 | 
 443 | 			[ ]?					// one optional space
 444 | 			(?:\n[ ]*)?				// one optional newline followed by spaces
 445 | 
 446 | 			\[
 447 | 			(.*?)					// id = $3
 448 | 			\]
 449 | 		)()()()()					// pad remaining backreferences
 450 | 		/g,_DoAnchors_callback);
 451 | 	*/
 452 | 	text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
 453 | 
 454 | 	//
 455 | 	// Next, inline-style links: [link text](url "optional title")
 456 | 	//
 457 | 
 458 | 	/*
 459 | 		text = text.replace(/
 460 | 			(						// wrap whole match in $1
 461 | 				\[
 462 | 				(
 463 | 					(?:
 464 | 						\[[^\]]*\]	// allow brackets nested one level
 465 | 					|
 466 | 					[^\[\]]			// or anything else
 467 | 				)
 468 | 			)
 469 | 			\]
 470 | 			\(						// literal paren
 471 | 			[ \t]*
 472 | 			()						// no id, so leave $3 empty
 473 | 			?				// href = $4
 474 | 			[ \t]*
 475 | 			(						// $5
 476 | 				(['"])				// quote char = $6
 477 | 				(.*?)				// Title = $7
 478 | 				\6					// matching quote
 479 | 				[ \t]*				// ignore any spaces/tabs between closing quote and )
 480 | 			)?						// title is optional
 481 | 			\)
 482 | 		)
 483 | 		/g,writeAnchorTag);
 484 | 	*/
 485 | 	text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
 486 | 
 487 | 	//
 488 | 	// Last, handle reference-style shortcuts: [link text]
 489 | 	// These must come last in case you've also got [link test][1]
 490 | 	// or [link test](/foo)
 491 | 	//
 492 | 
 493 | 	/*
 494 | 		text = text.replace(/
 495 | 		(		 					// wrap whole match in $1
 496 | 			\[
 497 | 			([^\[\]]+)				// link text = $2; can't contain '[' or ']'
 498 | 			\]
 499 | 		)()()()()()					// pad rest of backreferences
 500 | 		/g, writeAnchorTag);
 501 | 	*/
 502 | 	text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
 503 | 
 504 | 	return text;
 505 | }
 506 | 
 507 | var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 508 | 	if (m7 == undefined) m7 = "";
 509 | 	var whole_match = m1;
 510 | 	var link_text   = m2;
 511 | 	var link_id	 = m3.toLowerCase();
 512 | 	var url		= m4;
 513 | 	var title	= m7;
 514 | 	
 515 | 	if (url == "") {
 516 | 		if (link_id == "") {
 517 | 			// lower-case and turn embedded newlines into spaces
 518 | 			link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
 519 | 		}
 520 | 		url = "#"+link_id;
 521 | 		
 522 | 		if (g_urls[link_id] != undefined) {
 523 | 			url = g_urls[link_id];
 524 | 			if (g_titles[link_id] != undefined) {
 525 | 				title = g_titles[link_id];
 526 | 			}
 527 | 		}
 528 | 		else {
 529 | 			if (whole_match.search(/\(\s*\)$/m)>-1) {
 530 | 				// Special case for explicit empty url
 531 | 				url = "";
 532 | 			} else {
 533 | 				return whole_match;
 534 | 			}
 535 | 		}
 536 | 	}	
 537 | 	
 538 | 	url = escapeCharacters(url,"*_");
 539 | 	var result = "";
 548 | 	
 549 | 	return result;
 550 | }
 551 | 
 552 | 
 553 | var _DoImages = function(text) {
 554 | //
 555 | // Turn Markdown image shortcuts into  tags.
 556 | //
 557 | 
 558 | 	//
 559 | 	// First, handle reference-style labeled images: ![alt text][id]
 560 | 	//
 561 | 
 562 | 	/*
 563 | 		text = text.replace(/
 564 | 		(						// wrap whole match in $1
 565 | 			!\[
 566 | 			(.*?)				// alt text = $2
 567 | 			\]
 568 | 
 569 | 			[ ]?				// one optional space
 570 | 			(?:\n[ ]*)?			// one optional newline followed by spaces
 571 | 
 572 | 			\[
 573 | 			(.*?)				// id = $3
 574 | 			\]
 575 | 		)()()()()				// pad rest of backreferences
 576 | 		/g,writeImageTag);
 577 | 	*/
 578 | 	text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
 579 | 
 580 | 	//
 581 | 	// Next, handle inline images:  ![alt text](url "optional title")
 582 | 	// Don't forget: encode * and _
 583 | 
 584 | 	/*
 585 | 		text = text.replace(/
 586 | 		(						// wrap whole match in $1
 587 | 			!\[
 588 | 			(.*?)				// alt text = $2
 589 | 			\]
 590 | 			\s?					// One optional whitespace character
 591 | 			\(					// literal paren
 592 | 			[ \t]*
 593 | 			()					// no id, so leave $3 empty
 594 | 			?			// src url = $4
 595 | 			[ \t]*
 596 | 			(					// $5
 597 | 				(['"])			// quote char = $6
 598 | 				(.*?)			// title = $7
 599 | 				\6				// matching quote
 600 | 				[ \t]*
 601 | 			)?					// title is optional
 602 | 		\)
 603 | 		)
 604 | 		/g,writeImageTag);
 605 | 	*/
 606 | 	text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
 607 | 
 608 | 	return text;
 609 | }
 610 | 
 611 | var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 612 | 	var whole_match = m1;
 613 | 	var alt_text   = m2;
 614 | 	var link_id	 = m3.toLowerCase();
 615 | 	var url		= m4;
 616 | 	var title	= m7;
 617 | 
 618 | 	if (!title) title = "";
 619 | 	
 620 | 	if (url == "") {
 621 | 		if (link_id == "") {
 622 | 			// lower-case and turn embedded newlines into spaces
 623 | 			link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
 624 | 		}
 625 | 		url = "#"+link_id;
 626 | 		
 627 | 		if (g_urls[link_id] != undefined) {
 628 | 			url = g_urls[link_id];
 629 | 			if (g_titles[link_id] != undefined) {
 630 | 				title = g_titles[link_id];
 631 | 			}
 632 | 		}
 633 | 		else {
 634 | 			return whole_match;
 635 | 		}
 636 | 	}	
 637 | 	
 638 | 	alt_text = alt_text.replace(/"/g,""");
 639 | 	url = escapeCharacters(url,"*_");
 640 | 	var result = "" + _RunSpanGamut(m1) + "");});
 668 | 
 669 | 	text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
 670 | 		function(matchFound,m1){return hashBlock("
" + _RunSpanGamut(m1) + "
");});
 671 | 
 672 | 	// atx-style headers:
 673 | 	//  # Header 1
 674 | 	//  ## Header 2
 675 | 	//  ## Header 2 with closing hashes ##
 676 | 	//  ...
 677 | 	//  ###### Header 6
 678 | 	//
 679 | 
 680 | 	/*
 681 | 		text = text.replace(/
 682 | 			^(\#{1,6})				// $1 = string of #'s
 683 | 			[ \t]*
 684 | 			(.+?)					// $2 = Header text
 685 | 			[ \t]*
 686 | 			\#*						// optional closing #'s (not counted)
 687 | 			\n+
 688 | 		/gm, function() {...});
 689 | 	*/
 690 | 
 691 | 	text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
 692 | 		function(wholeMatch,m1,m2) {
 693 | 			var h_level = m1.length;
 694 | 			return hashBlock("" + _RunSpanGamut(m2) + "");
 695 | 		});
 696 | 
 697 | 	return text;
 698 | }
 699 | 
 700 | // This declaration keeps Dojo compressor from outputting garbage:
 701 | var _ProcessListItems;
 702 | 
 703 | var _DoLists = function(text) {
 704 | //
 705 | // Form HTML ordered (numbered) and unordered (bulleted) lists.
 706 | //
 707 | 
 708 | 	// attacklab: add sentinel to hack around khtml/safari bug:
 709 | 	// http://bugs.webkit.org/show_bug.cgi?id=11231
 710 | 	text += "~0";
 711 | 
 712 | 	// Re-usable pattern to match any entirel ul or ol list:
 713 | 
 714 | 	/*
 715 | 		var whole_list = /
 716 | 		(									// $1 = whole list
 717 | 			(								// $2
 718 | 				[ ]{0,3}					// attacklab: g_tab_width - 1
 719 | 				([*+-]|\d+[.])				// $3 = first list item marker
 720 | 				[ \t]+
 721 | 			)
 722 | 			[^\r]+?
 723 | 			(								// $4
 724 | 				~0							// sentinel for workaround; should be $
 725 | 			|
 726 | 				\n{2,}
 727 | 				(?=\S)
 728 | 				(?!							// Negative lookahead for another list item marker
 729 | 					[ \t]*
 730 | 					(?:[*+-]|\d+[.])[ \t]+
 731 | 				)
 732 | 			)
 733 | 		)/g
 734 | 	*/
 735 | 	var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
 736 | 
 737 | 	if (g_list_level) {
 738 | 		text = text.replace(whole_list,function(wholeMatch,m1,m2) {
 739 | 			var list = m1;
 740 | 			var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
 741 | 
 742 | 			// Turn double returns into triple returns, so that we can make a
 743 | 			// paragraph for the last item in a list, if necessary:
 744 | 			list = list.replace(/\n{2,}/g,"\n\n\n");;
 745 | 			var result = _ProcessListItems(list);
 746 | 	
 747 | 			// Trim any trailing whitespace, to put the closing ``
 748 | 			// up on the preceding line, to get it past the current stupid
 749 | 			// HTML block parser. This is a hack to work around the terrible
 750 | 			// hack that is the HTML block parser.
 751 | 			result = result.replace(/\s+$/,"");
 752 | 			result = "<"+list_type+">" + result + "\n";
 753 | 			return result;
 754 | 		});
 755 | 	} else {
 756 | 		whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
 757 | 		text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
 758 | 			var runup = m1;
 759 | 			var list = m2;
 760 | 
 761 | 			var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
 762 | 			// Turn double returns into triple returns, so that we can make a
 763 | 			// paragraph for the last item in a list, if necessary:
 764 | 			var list = list.replace(/\n{2,}/g,"\n\n\n");;
 765 | 			var result = _ProcessListItems(list);
 766 | 			result = runup + "<"+list_type+">\n" + result + "\n";	
 767 | 			return result;
 768 | 		});
 769 | 	}
 770 | 
 771 | 	// attacklab: strip sentinel
 772 | 	text = text.replace(/~0/,"");
 773 | 
 774 | 	return text;
 775 | }
 776 | 
 777 | _ProcessListItems = function(list_str) {
 778 | //
 779 | //  Process the contents of a single ordered or unordered list, splitting it
 780 | //  into individual list items.
 781 | //
 782 | 	// The $g_list_level global keeps track of when we're inside a list.
 783 | 	// Each time we enter a list, we increment it; when we leave a list,
 784 | 	// we decrement. If it's zero, we're not in a list anymore.
 785 | 	//
 786 | 	// We do this because when we're not inside a list, we want to treat
 787 | 	// something like this:
 788 | 	//
 789 | 	//    I recommend upgrading to version
 790 | 	//    8. Oops, now this line is treated
 791 | 	//    as a sub-list.
 792 | 	//
 793 | 	// As a single paragraph, despite the fact that the second line starts
 794 | 	// with a digit-period-space sequence.
 795 | 	//
 796 | 	// Whereas when we're inside a list (or sub-list), that line will be
 797 | 	// treated as the start of a sub-list. What a kludge, huh? This is
 798 | 	// an aspect of Markdown's syntax that's hard to parse perfectly
 799 | 	// without resorting to mind-reading. Perhaps the solution is to
 800 | 	// change the syntax rules such that sub-lists must start with a
 801 | 	// starting cardinal number; e.g. "1." or "a.".
 802 | 
 803 | 	g_list_level++;
 804 | 
 805 | 	// trim trailing blank lines:
 806 | 	list_str = list_str.replace(/\n{2,}$/,"\n");
 807 | 
 808 | 	// attacklab: add sentinel to emulate \z
 809 | 	list_str += "~0";
 810 | 
 811 | 	/*
 812 | 		list_str = list_str.replace(/
 813 | 			(\n)?							// leading line = $1
 814 | 			(^[ \t]*)						// leading whitespace = $2
 815 | 			([*+-]|\d+[.]) [ \t]+			// list marker = $3
 816 | 			([^\r]+?						// list item text   = $4
 817 | 			(\n{1,2}))
 818 | 			(?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+))
 819 | 		/gm, function(){...});
 820 | 	*/
 821 | 	list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm,
 822 | 		function(wholeMatch,m1,m2,m3,m4){
 823 | 			var item = m4;
 824 | 			var leading_line = m1;
 825 | 			var leading_space = m2;
 826 | 
 827 | 			if (leading_line || (item.search(/\n{2,}/)>-1)) {
 828 | 				item = _RunBlockGamut(_Outdent(item));
 829 | 			}
 830 | 			else {
 831 | 				// Recursion for sub-lists:
 832 | 				item = _DoLists(_Outdent(item));
 833 | 				item = item.replace(/\n$/,""); // chomp(item)
 834 | 				item = _RunSpanGamut(item);
 835 | 			}
 836 | 
 837 | 			return  "" + item + "\n";
 838 | 		}
 839 | 	);
 840 | 
 841 | 	// attacklab: strip sentinel
 842 | 	list_str = list_str.replace(/~0/g,"");
 843 | 
 844 | 	g_list_level--;
 845 | 	return list_str;
 846 | }
 847 | 
 848 | 
 849 | var _DoCodeBlocks = function(text) {
 850 | //
 851 | //  Process Markdown `` blocks.
 852 | //  
 853 | 
 854 | 	/*
 855 | 		text = text.replace(text,
 856 | 			/(?:\n\n|^)
 857 | 			(								// $1 = the code block -- one or more lines, starting with a space/tab
 858 | 				(?:
 859 | 					(?:[ ]{4}|\t)			// Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
 860 | 					.*\n+
 861 | 				)+
 862 | 			)
 863 | 			(\n*[ ]{0,3}[^ \t\n]|(?=~0))	// attacklab: g_tab_width
 864 | 		/g,function(){...});
 865 | 	*/
 866 | 
 867 | 	// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
 868 | 	text += "~0";
 869 | 	
 870 | 	text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
 871 | 		function(wholeMatch,m1,m2) {
 872 | 			var codeblock = m1;
 873 | 			var nextChar = m2;
 874 | 		
 875 | 			codeblock = _EncodeCode( _Outdent(codeblock));
 876 | 			codeblock = _Detab(codeblock);
 877 | 			codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
 878 | 			codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
 879 | 
 880 | 			codeblock = "" + codeblock + "\n";
 881 | 
 882 | 			return hashBlock(codeblock) + nextChar;
 883 | 		}
 884 | 	);
 885 | 
 886 | 	// attacklab: strip sentinel
 887 | 	text = text.replace(/~0/,"");
 888 | 
 889 | 	return text;
 890 | }
 891 | 
 892 | var hashBlock = function(text) {
 893 | 	text = text.replace(/(^\n+|\n+$)/g,"");
 894 | 	return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
 895 | }
 896 | 
 897 | 
 898 | var _DoCodeSpans = function(text) {
 899 | //
 900 | //   *  Backtick quotes are used for  spans.
 901 | // 
 902 | //   *  You can use multiple backticks as the delimiters if you want to
 903 | //	 include literal backticks in the code span. So, this input:
 904 | //	 
 905 | //		 Just type ``foo `bar` baz`` at the prompt.
 906 | //	 
 907 | //	   Will translate to:
 908 | //	 
 909 | //		 Just type foo `bar` baz at the prompt.
 910 | //	 
 911 | //	There's no arbitrary limit to the number of backticks you
 912 | //	can use as delimters. If you need three consecutive backticks
 913 | //	in your code, use four for delimiters, etc.
 914 | //
 915 | //  *  You can use spaces to get literal backticks at the edges:
 916 | //	 
 917 | //		 ... type `` `bar` `` ...
 918 | //	 
 919 | //	   Turns to:
 920 | //	 
 921 | //		 ... type `bar` ...
 922 | //
 923 | 
 924 | 	/*
 925 | 		text = text.replace(/
 926 | 			(^|[^\\])					// Character before opening ` can't be a backslash
 927 | 			(`+)						// $2 = Opening run of `
 928 | 			(							// $3 = The code block
 929 | 				[^\r]*?
 930 | 				[^`]					// attacklab: work around lack of lookbehind
 931 | 			)
 932 | 			\2							// Matching closer
 933 | 			(?!`)
 934 | 		/gm, function(){...});
 935 | 	*/
 936 | 
 937 | 	text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
 938 | 		function(wholeMatch,m1,m2,m3,m4) {
 939 | 			var c = m3;
 940 | 			c = c.replace(/^([ \t]*)/g,"");	// leading whitespace
 941 | 			c = c.replace(/[ \t]*$/g,"");	// trailing whitespace
 942 | 			c = _EncodeCode(c);
 943 | 			return m1+""+c+"";
 944 | 		});
 945 | 
 946 | 	return text;
 947 | }
 948 | 
 949 | 
 950 | var _EncodeCode = function(text) {
 951 | //
 952 | // Encode/escape certain characters inside Markdown code runs.
 953 | // The point is that in code, these characters are literals,
 954 | // and lose their special Markdown meanings.
 955 | //
 956 | 	// Encode all ampersands; HTML entities are not
 957 | 	// entities within a Markdown code span.
 958 | 	text = text.replace(/&/g,"&");
 959 | 
 960 | 	// Do the angle bracket song and dance:
 961 | 	text = text.replace(//g,">");
 963 | 
 964 | 	// Now, escape characters that are magic in Markdown:
 965 | 	text = escapeCharacters(text,"\*_{}[]\\",false);
 966 | 
 967 | // jj the line above breaks this:
 968 | //---
 969 | 
 970 | //* Item
 971 | 
 972 | //   1. Subitem
 973 | 
 974 | //            special char: *
 975 | //---
 976 | 
 977 | 	return text;
 978 | }
 979 | 
 980 | 
 981 | var _DoItalicsAndBold = function(text) {
 982 | 
 983 | 	//  must go first:
 984 | 	text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
 985 | 		"$2");
 986 | 
 987 | 	text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
 988 | 		"$2");
 989 | 
 990 | 	return text;
 991 | }
 992 | 
 993 | 
 994 | var _DoBlockQuotes = function(text) {
 995 | 
 996 | 	/*
 997 | 		text = text.replace(/
 998 | 		(								// Wrap whole match in $1
 999 | 			(
1000 | 				^[ \t]*>[ \t]?			// '>' at the start of a line
1001 | 				.+\n					// rest of the first line
1002 | 				(.+\n)*					// subsequent consecutive lines
1003 | 				\n*						// blanks
1004 | 			)+
1005 | 		)
1006 | 		/gm, function(){...});
1007 | 	*/
1008 | 
1009 | 	text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1010 | 		function(wholeMatch,m1) {
1011 | 			var bq = m1;
1012 | 
1013 | 			// attacklab: hack around Konqueror 3.5.4 bug:
1014 | 			// "----------bug".replace(/^-/g,"") == "bug"
1015 | 
1016 | 			bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");	// trim one level of quoting
1017 | 
1018 | 			// attacklab: clean up hack
1019 | 			bq = bq.replace(/~0/g,"");
1020 | 
1021 | 			bq = bq.replace(/^[ \t]+$/gm,"");		// trim whitespace-only lines
1022 | 			bq = _RunBlockGamut(bq);				// recurse
1023 | 			
1024 | 			bq = bq.replace(/(^|\n)/g,"$1  ");
1025 | 			// These leading spaces screw with  content, so we need to fix that:
1026 | 			bq = bq.replace(
1027 | 					/(\s*[^\r]+?<\/pre>)/gm,
1028 | 				function(wholeMatch,m1) {
1029 | 					var pre = m1;
1030 | 					// attacklab: hack around Konqueror 3.5.4 bug:
1031 | 					pre = pre.replace(/^  /mg,"~0");
1032 | 					pre = pre.replace(/~0/g,"");
1033 | 					return pre;
1034 | 				});
1035 | 			
1036 | 			return hashBlock("\n" + bq + "\n");
1037 | 		});
1038 | 	return text;
1039 | }
1040 | 
1041 | 
1042 | var _FormParagraphs = function(text) {
1043 | //
1044 | //  Params:
1045 | //    $text - string to process with html  tags
1046 | //
1047 | 
1048 | 	// Strip leading and trailing lines:
1049 | 	text = text.replace(/^\n+/g,"");
1050 | 	text = text.replace(/\n+$/g,"");
1051 | 
1052 | 	var grafs = text.split(/\n{2,}/g);
1053 | 	var grafsOut = new Array();
1054 | 
1055 | 	//
1056 | 	// Wrap 
 tags.
1057 | 	//
1058 | 	var end = grafs.length;
1059 | 	for (var i=0; i= 0) {
1064 | 			grafsOut.push(str);
1065 | 		}
1066 | 		else if (str.search(/\S/) >= 0) {
1067 | 			str = _RunSpanGamut(str);
1068 | 			str = str.replace(/^([ \t]*)/g,"
");
1069 | 			str += ""
1070 | 			grafsOut.push(str);
1071 | 		}
1072 | 
1073 | 	}
1074 | 
1075 | 	//
1076 | 	// Unhashify HTML blocks
1077 | 	//
1078 | 	end = grafsOut.length;
1079 | 	for (var i=0; i= 0) {
1082 | 			var blockText = g_html_blocks[RegExp.$1];
1083 | 			blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1084 | 			grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1085 | 		}
1086 | 	}
1087 | 
1088 | 	return grafsOut.join("\n\n");
1089 | }
1090 | 
1091 | 
1092 | var _EncodeAmpsAndAngles = function(text) {
1093 | // Smart processing for ampersands and angle brackets that need to be encoded.
1094 | 	
1095 | 	// Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1096 | 	//   http://bumppo.net/projects/amputator/
1097 | 	text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&");
1098 | 	
1099 | 	// Encode naked <'s
1100 | 	text = text.replace(/<(?![a-z\/?\$!])/gi,"<");
1101 | 	
1102 | 	return text;
1103 | }
1104 | 
1105 | 
1106 | var _EncodeBackslashEscapes = function(text) {
1107 | //
1108 | //   Parameter:  String.
1109 | //   Returns:	The string, with after processing the following backslash
1110 | //			   escape sequences.
1111 | //
1112 | 
1113 | 	// attacklab: The polite way to do this is with the new
1114 | 	// escapeCharacters() function:
1115 | 	//
1116 | 	// 	text = escapeCharacters(text,"\\",true);
1117 | 	// 	text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1118 | 	//
1119 | 	// ...but we're sidestepping its use of the (slow) RegExp constructor
1120 | 	// as an optimization for Firefox.  This function gets called a LOT.
1121 | 
1122 | 	text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1123 | 	text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1124 | 	return text;
1125 | }
1126 | 
1127 | 
1128 | var _DoAutoLinks = function(text) {
1129 | 
1130 | 	text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"$1");
1131 | 
1132 | 	// Email addresses: 
1133 | 
1134 | 	/*
1135 | 		text = text.replace(/
1136 | 			<
1137 | 			(?:mailto:)?
1138 | 			(
1139 | 				[-.\w]+
1140 | 				\@
1141 | 				[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1142 | 			)
1143 | 			>
1144 | 		/gi, _DoAutoLinks_callback());
1145 | 	*/
1146 | 	text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1147 | 		function(wholeMatch,m1) {
1148 | 			return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1149 | 		}
1150 | 	);
1151 | 
1152 | 	return text;
1153 | }
1154 | 
1155 | 
1156 | var _EncodeEmailAddress = function(addr) {
1157 | //
1158 | //  Input: an email address, e.g. "foo@example.com"
1159 | //
1160 | //  Output: the email address as a mailto link, with each character
1161 | //	of the address encoded as either a decimal or hex entity, in
1162 | //	the hopes of foiling most address harvesting spam bots. E.g.:
1163 | //
1164 | //	foo
1166 | //	   @example.com
1167 | //
1168 | //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1169 | //  mailing list: 
1170 | //
1171 | 
1172 | 	// attacklab: why can't javascript speak hex?
1173 | 	function char2hex(ch) {
1174 | 		var hexDigits = '0123456789ABCDEF';
1175 | 		var dec = ch.charCodeAt(0);
1176 | 		return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1177 | 	}
1178 | 
1179 | 	var encode = [
1180 | 		function(ch){return "&#"+ch.charCodeAt(0)+";";},
1181 | 		function(ch){return "&#x"+char2hex(ch)+";";},
1182 | 		function(ch){return ch;}
1183 | 	];
1184 | 
1185 | 	addr = "mailto:" + addr;
1186 | 
1187 | 	addr = addr.replace(/./g, function(ch) {
1188 | 		if (ch == "@") {
1189 | 		   	// this *must* be encoded. I insist.
1190 | 			ch = encode[Math.floor(Math.random()*2)](ch);
1191 | 		} else if (ch !=":") {
1192 | 			// leave ':' alone (to spot mailto: later)
1193 | 			var r = Math.random();
1194 | 			// roughly 10% raw, 45% hex, 45% dec
1195 | 			ch =  (
1196 | 					r > .9  ?	encode[2](ch)   :
1197 | 					r > .45 ?	encode[1](ch)   :
1198 | 								encode[0](ch)
1199 | 				);
1200 | 		}
1201 | 		return ch;
1202 | 	});
1203 | 
1204 | 	addr = "" + addr + "";
1205 | 	addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1206 | 
1207 | 	return addr;
1208 | }
1209 | 
1210 | 
1211 | var _UnescapeSpecialChars = function(text) {
1212 | //
1213 | // Swap back in all the special characters we've hidden.
1214 | //
1215 | 	text = text.replace(/~E(\d+)E/g,
1216 | 		function(wholeMatch,m1) {
1217 | 			var charCodeToReplace = parseInt(m1);
1218 | 			return String.fromCharCode(charCodeToReplace);
1219 | 		}
1220 | 	);
1221 | 	return text;
1222 | }
1223 | 
1224 | 
1225 | var _Outdent = function(text) {
1226 | //
1227 | // Remove one level of line-leading tabs or spaces
1228 | //
1229 | 
1230 | 	// attacklab: hack around Konqueror 3.5.4 bug:
1231 | 	// "----------bug".replace(/^-/g,"") == "bug"
1232 | 
1233 | 	text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1234 | 
1235 | 	// attacklab: clean up hack
1236 | 	text = text.replace(/~0/g,"")
1237 | 
1238 | 	return text;
1239 | }
1240 | 
1241 | var _Detab = function(text) {
1242 | // attacklab: Detab's completely rewritten for speed.
1243 | // In perl we could fix it by anchoring the regexp with \G.
1244 | // In javascript we're less fortunate.
1245 | 
1246 | 	// expand first n-1 tabs
1247 | 	text = text.replace(/\t(?=\t)/g,"    "); // attacklab: g_tab_width
1248 | 
1249 | 	// replace the nth with two sentinels
1250 | 	text = text.replace(/\t/g,"~A~B");
1251 | 
1252 | 	// use the sentinel to anchor our regex so it doesn't explode
1253 | 	text = text.replace(/~B(.+?)~A/g,
1254 | 		function(wholeMatch,m1,m2) {
1255 | 			var leadingText = m1;
1256 | 			var numSpaces = 4 - leadingText.length % 4;  // attacklab: g_tab_width
1257 | 
1258 | 			// there *must* be a better way to do this:
1259 | 			for (var i=0; i
   8 | #
   9 | 
  10 | 
  11 | package Markdown;
  12 | require 5.006_000;
  13 | use strict;
  14 | use warnings;
  15 | 
  16 | use Digest::MD5 qw(md5_hex);
  17 | use vars qw($VERSION);
  18 | $VERSION = '1.0.2b2';
  19 | # Sat 26 Mar 2005
  20 | 
  21 | ## Disabled; causes problems under Perl 5.6.1:
  22 | # use utf8;
  23 | # binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
  24 | 
  25 | 
  26 | #
  27 | # Global default settings:
  28 | #
  29 | my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
  30 | my $g_tab_width = 4;
  31 | 
  32 | 
  33 | #
  34 | # Globals:
  35 | #
  36 | 
  37 | # Regex to match balanced [brackets]. See Friedl's
  38 | # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
  39 | my $g_nested_brackets;
  40 | $g_nested_brackets = qr{
  41 |   (?>                 # Atomic matching
  42 |      [^\[\]]+              # Anything other than brackets
  43 |    | 
  44 |      \[
  45 |      (??{ $g_nested_brackets })    # Recursive set of nested brackets
  46 |      \]
  47 |   )*
  48 | }x;
  49 | 
  50 | 
  51 | # Table of hash values for escaped characters:
  52 | my %g_escape_table;
  53 | foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
  54 |   $g_escape_table{$char} = md5_hex($char);
  55 | }
  56 | 
  57 | 
  58 | # Global hashes, used by various utility routines
  59 | my %g_urls;
  60 | my %g_titles;
  61 | my %g_html_blocks;
  62 | 
  63 | # Used to track when we're inside an ordered or unordered list
  64 | # (see _ProcessListItems() for details):
  65 | my $g_list_level = 0;
  66 | 
  67 | 
  68 | #### Blosxom plug-in interface ##########################################
  69 | 
  70 | # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
  71 | # which posts Markdown should process, using a "meta-markup: markdown"
  72 | # header. If it's set to 0 (the default), Markdown will process all
  73 | # entries.
  74 | my $g_blosxom_use_meta = 0;
  75 | 
  76 | sub start { 1; }
  77 | sub story {
  78 |   my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
  79 | 
  80 |   if ( (! $g_blosxom_use_meta) or
  81 |        (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
  82 |        ){
  83 |       $$body_ref  = Markdown($$body_ref);
  84 |      }
  85 |      1;
  86 | }
  87 | 
  88 | 
  89 | #### Movable Type plug-in interface #####################################
  90 | eval {require MT};  # Test to see if we're running in MT.
  91 | unless ($@) {
  92 |     require MT;
  93 |     import  MT;
  94 |     require MT::Template::Context;
  95 |     import  MT::Template::Context;
  96 | 
  97 |   eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
  98 |   unless ($@) {
  99 |     require MT::Plugin;
 100 |     import  MT::Plugin;
 101 |     my $plugin = new MT::Plugin({
 102 |       name => "Markdown",
 103 |       description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 104 |       doc_link => 'http://daringfireball.net/projects/markdown/'
 105 |     });
 106 |     MT->add_plugin( $plugin );
 107 |   }
 108 | 
 109 |   MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 110 |     my $ctx   = shift;
 111 |     my $args = shift;
 112 |     my $builder = $ctx->stash('builder');
 113 |     my $tokens = $ctx->stash('tokens');
 114 | 
 115 |     if (defined ($args->{'output'}) ) {
 116 |       $ctx->stash('markdown_output', lc $args->{'output'});
 117 |     }
 118 | 
 119 |     defined (my $str = $builder->build($ctx, $tokens) )
 120 |       or return $ctx->error($builder->errstr);
 121 |     $str;    # return value
 122 |   });
 123 | 
 124 |   MT->add_text_filter('markdown' => {
 125 |     label     => 'Markdown',
 126 |     docs      => 'http://daringfireball.net/projects/markdown/',
 127 |     on_format => sub {
 128 |       my $text = shift;
 129 |       my $ctx  = shift;
 130 |       my $raw  = 0;
 131 |         if (defined $ctx) {
 132 |           my $output = $ctx->stash('markdown_output'); 
 133 |         if (defined $output  &&  $output =~ m/^html/i) {
 134 |           $g_empty_element_suffix = ">";
 135 |           $ctx->stash('markdown_output', '');
 136 |         }
 137 |         elsif (defined $output  &&  $output eq 'raw') {
 138 |           $raw = 1;
 139 |           $ctx->stash('markdown_output', '');
 140 |         }
 141 |         else {
 142 |           $raw = 0;
 143 |           $g_empty_element_suffix = " />";
 144 |         }
 145 |       }
 146 |       $text = $raw ? $text : Markdown($text);
 147 |       $text;
 148 |     },
 149 |   });
 150 | 
 151 |   # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 152 |   my $smartypants;
 153 | 
 154 |   {
 155 |     no warnings "once";
 156 |     $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 157 |   }
 158 | 
 159 |   if ($smartypants) {
 160 |     MT->add_text_filter('markdown_with_smartypants' => {
 161 |       label     => 'Markdown With SmartyPants',
 162 |       docs      => 'http://daringfireball.net/projects/markdown/',
 163 |       on_format => sub {
 164 |         my $text = shift;
 165 |         my $ctx  = shift;
 166 |         if (defined $ctx) {
 167 |           my $output = $ctx->stash('markdown_output'); 
 168 |           if (defined $output  &&  $output eq 'html') {
 169 |             $g_empty_element_suffix = ">";
 170 |           }
 171 |           else {
 172 |             $g_empty_element_suffix = " />";
 173 |           }
 174 |         }
 175 |         $text = Markdown($text);
 176 |         $text = $smartypants->($text, '1');
 177 |       },
 178 |     });
 179 |   }
 180 | }
 181 | else {
 182 | #### BBEdit/command-line text filter interface ##########################
 183 | # Needs to be hidden from MT (and Blosxom when running in static mode).
 184 | 
 185 |     # We're only using $blosxom::version once; tell Perl not to warn us:
 186 |   no warnings 'once';
 187 |     unless ( defined($blosxom::version) ) {
 188 |     use warnings;
 189 | 
 190 |     #### Check for command-line switches: #################
 191 |     my %cli_opts;
 192 |     use Getopt::Long;
 193 |     Getopt::Long::Configure('pass_through');
 194 |     GetOptions(\%cli_opts,
 195 |       'version',
 196 |       'shortversion',
 197 |       'html4tags',
 198 |     );
 199 |     if ($cli_opts{'version'}) {    # Version info
 200 |       print "\nThis is Markdown, version $VERSION.\n";
 201 |       print "Copyright 2004 John Gruber\n";
 202 |       print "http://daringfireball.net/projects/markdown/\n\n";
 203 |       exit 0;
 204 |     }
 205 |     if ($cli_opts{'shortversion'}) {    # Just the version number string.
 206 |       print $VERSION;
 207 |       exit 0;
 208 |     }
 209 |     if ($cli_opts{'html4tags'}) {      # Use HTML tag style instead of XHTML
 210 |       $g_empty_element_suffix = ">";
 211 |     }
 212 | 
 213 | 
 214 |     #### Process incoming text: ###########################
 215 |     my $text;
 216 |     {
 217 |       local $/;               # Slurp the whole file
 218 |       $text = <>;
 219 |     }
 220 |         print Markdown($text);
 221 |     }
 222 | }
 223 | 
 224 | 
 225 | 
 226 | sub Markdown {
 227 | #
 228 | # Main function. The order in which other subs are called here is
 229 | # essential. Link and image substitutions need to happen before
 230 | # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the 
 231 | # and  tags get encoded.
 232 | #
 233 |   my $text = shift;
 234 | 
 235 |   # Clear the global hashes. If we don't clear these, you get conflicts
 236 |   # from other articles when generating a page which contains more than
 237 |   # one article (e.g. an index page that shows the N most recent
 238 |   # articles):
 239 |   %g_urls = ();
 240 |   %g_titles = ();
 241 |   %g_html_blocks = ();
 242 | 
 243 | 
 244 |   # Standardize line endings:
 245 |   $text =~ s{\r\n}{\n}g;   # DOS to Unix
 246 |   $text =~ s{\r}{\n}g;   # Mac to Unix
 247 | 
 248 |   # Make sure $text ends with a couple of newlines:
 249 |   $text .= "\n\n";
 250 | 
 251 |   # Convert all tabs to spaces.
 252 |   $text = _Detab($text);
 253 | 
 254 |   # Strip any lines consisting only of spaces and tabs.
 255 |   # This makes subsequent regexen easier to write, because we can
 256 |   # match consecutive blank lines with /\n+/ instead of something
 257 |   # contorted like /[ \t]*\n+/ .
 258 |   $text =~ s/^[ \t]+$//mg;
 259 | 
 260 |   # Turn block-level HTML blocks into hash entries
 261 |   $text = _HashHTMLBlocks($text);
 262 | 
 263 |   # Strip link definitions, store in hashes.
 264 |   $text = _StripLinkDefinitions($text);
 265 | 
 266 |   $text = _RunBlockGamut($text);
 267 | 
 268 |   $text = _UnescapeSpecialChars($text);
 269 | 
 270 |   return $text . "\n";
 271 | }
 272 | 
 273 | 
 274 | sub _StripLinkDefinitions {
 275 | #
 276 | # Strips link definitions from text, stores the URLs and titles in
 277 | # hash references.
 278 | #
 279 |   my $text = shift;
 280 |   my $less_than_tab = $g_tab_width - 1;
 281 | 
 282 |   # Link defs are in the form: ^[id]: url "optional title"
 283 |   while ($text =~ s{
 284 |             ^[ ]{0,$less_than_tab}\[(.+)\]:  # id = $1
 285 |               [ \t]*
 286 |               \n?        # maybe *one* newline
 287 |               [ \t]*
 288 |             ?      # url = $2
 289 |               [ \t]*
 290 |               \n?        # maybe one newline
 291 |               [ \t]*
 292 |             (?:
 293 |               (?<=\s)      # lookbehind for whitespace
 294 |               ["(]
 295 |               (.+?)      # title = $3
 296 |               [")]
 297 |               [ \t]*
 298 |             )?  # title is optional
 299 |             (?:\n+|\Z)
 300 |           }
 301 |           {}mx) {
 302 |     $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );  # Link IDs are case-insensitive
 303 |     if ($3) {
 304 |       $g_titles{lc $1} = $3;
 305 |       $g_titles{lc $1} =~ s/"/"/g;
 306 |     }
 307 |   }
 308 | 
 309 |   return $text;
 310 | }
 311 | 
 312 | 
 313 | sub _HashHTMLBlocks {
 314 |   my $text = shift;
 315 |   my $less_than_tab = $g_tab_width - 1;
 316 | 
 317 |   # Hashify HTML blocks:
 318 |   # We only want to do this for block-level HTML tags, such as headers,
 319 |   # lists, and tables. That's because we still want to wrap s around
 320 |   # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 321 |   # phrase emphasis, and spans. The list of tags we're looking for is
 322 |   # hard-coded:
 323 |   my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
 324 |   my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
 325 | 
 326 |   # First, look for nested blocks, e.g.:
 327 |   #   

 328 |   #     
 329 |   #     tags for inner block must be indented.
 330 |   #     
 331 |   #   
 332 |   #
 333 |   # The outermost tags must start at the left margin for this to match, and
 334 |   # the inner nested divs must be indented.
 335 |   # We need to do this before the next, more liberal match, because the next
 336 |   # match will start at the first `` and stop at the first ``.
 337 |   $text =~ s{
 338 |         (            # save in $1
 339 |           ^          # start of line  (with /m)
 340 |           <($block_tags_a)  # start tag = $2
 341 |           \b          # word break
 342 |           (.*\n)*?      # any number of lines, minimally matching
 343 |                   # the matching end tag
 344 |           [ \t]*        # trailing spaces/tabs
 345 |           (?=\n+|\Z)  # followed by a newline or end of document
 346 |         )
 347 |       }{
 348 |         my $key = md5_hex($1);
 349 |         $g_html_blocks{$key} = $1;
 350 |         "\n\n" . $key . "\n\n";
 351 |       }egmx;
 352 | 
 353 | 
 354 |   #
 355 |   # Now match more liberally, simply from `\n` to `\n`
 356 |   #
 357 |   $text =~ s{
 358 |         (            # save in $1
 359 |           ^          # start of line  (with /m)
 360 |           <($block_tags_b)  # start tag = $2
 361 |           \b          # word break
 362 |           (.*\n)*?      # any number of lines, minimally matching
 363 |           .*        # the matching end tag
 364 |           [ \t]*        # trailing spaces/tabs
 365 |           (?=\n+|\Z)  # followed by a newline or end of document
 366 |         )
 367 |       }{
 368 |         my $key = md5_hex($1);
 369 |         $g_html_blocks{$key} = $1;
 370 |         "\n\n" . $key . "\n\n";
 371 |       }egmx;
 372 |   # Special case just for . It was easier to make a special case than
 373 |   # to make the other regex more complicated.  
 374 |   $text =~ s{
 375 |         (?:
 376 |           (?<=\n\n)    # Starting after a blank line
 377 |           |        # or
 378 |           \A\n?      # the beginning of the doc
 379 |         )
 380 |         (            # save in $1
 381 |           [ ]{0,$less_than_tab}
 382 |           <(hr)        # start tag = $2
 383 |           \b          # word break
 384 |           ([^<>])*?      # 
 385 |           /?>          # the matching end tag
 386 |           [ \t]*
 387 |           (?=\n{2,}|\Z)    # followed by a blank line or end of document
 388 |         )
 389 |       }{
 390 |         my $key = md5_hex($1);
 391 |         $g_html_blocks{$key} = $1;
 392 |         "\n\n" . $key . "\n\n";
 393 |       }egx;
 394 | 
 395 |   # Special case for standalone HTML comments:
 396 |   $text =~ s{
 397 |         (?:
 398 |           (?<=\n\n)    # Starting after a blank line
 399 |           |        # or
 400 |           \A\n?      # the beginning of the doc
 401 |         )
 402 |         (            # save in $1
 403 |           [ ]{0,$less_than_tab}
 404 |           (?s:
 405 |             
 408 |           )
 409 |           [ \t]*
 410 |           (?=\n{2,}|\Z)    # followed by a blank line or end of document
 411 |         )
 412 |       }{
 413 |         my $key = md5_hex($1);
 414 |         $g_html_blocks{$key} = $1;
 415 |         "\n\n" . $key . "\n\n";
 416 |       }egx;
 417 | 
 418 | 
 419 |   return $text;
 420 | }
 421 | 
 422 | 
 423 | sub _RunBlockGamut {
 424 | #
 425 | # These are all the transformations that form block-level
 426 | # tags like paragraphs, headers, and list items.
 427 | #
 428 |   my $text = shift;
 429 | 
 430 |   $text = _DoHeaders($text);
 431 | 
 432 |   # Do Horizontal Rules:
 433 |   $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags.
 445 |   $text = _HashHTMLBlocks($text);
 446 |   $text = _FormParagraphs($text);
 447 | 
 448 |   return $text;
 449 | }
 450 | 
 451 | 
 452 | sub _RunSpanGamut {
 453 | #
 454 | # These are all the transformations that occur *within* block-level
 455 | # tags like paragraphs, headers, and list items.
 456 | #
 457 |   my $text = shift;
 458 | 
 459 |   $text = _EscapeSpecialCharsWithinTagAttributes($text);
 460 |   $text = _DoCodeSpans($text);
 461 |   $text = _EncodeBackslashEscapes($text);
 462 | 
 463 |   # Process anchor and image tags. Images must come first,
 464 |   # because ![foo][f] looks like an anchor.
 465 |   $text = _DoImages($text);
 466 |   $text = _DoAnchors($text);
 467 | 
 468 |   # Make links out of things like ``
 469 |   # Must come after _DoAnchors(), because you can use < and >
 470 |   # delimiters in inline links like [this]().
 471 |   $text = _DoAutoLinks($text);
 472 |   $text = _EncodeAmpsAndAngles($text);
 473 |   $text = _DoItalicsAndBold($text);
 474 | 
 475 |   # Do hard breaks:
 476 |   $text =~ s/ {2,}\n/  -- encode [\ ` * _] so they
 485 | # don't conflict with their use in Markdown for code, italics and strong.
 486 | # We're replacing each such character with its corresponding MD5 checksum
 487 | # value; this is likely overkill, but it should prevent us from colliding
 488 | # with the escape values by accident.
 489 | #
 490 |   my $text = shift;
 491 |   my $tokens ||= _TokenizeHTML($text);
 492 |   $text = '';   # rebuild $text from the tokens
 493 | 
 494 |   foreach my $cur_token (@$tokens) {
 495 |     if ($cur_token->[0] eq "tag") {
 496 |       $cur_token->[1] =~  s! \\ !$g_escape_table{'\\'}!gx;
 497 |       $cur_token->[1] =~  s! `  !$g_escape_table{'`'}!gx;
 498 |       $cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
 499 |       $cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
 500 |     }
 501 |     $text .= $cur_token->[1];
 502 |   }
 503 |   return $text;
 504 | }
 505 | 
 506 | 
 507 | sub _DoAnchors {
 508 | #
 509 | # Turn Markdown link shortcuts into XHTML  tags.
 510 | #
 511 |   my $text = shift;
 512 | 
 513 |   #
 514 |   # First, handle reference-style links: [link text] [id]
 515 |   #
 516 |   $text =~ s{
 517 |     (          # wrap whole match in $1
 518 |       \[
 519 |         ($g_nested_brackets)  # link text = $2
 520 |       \]
 521 | 
 522 |       [ ]?        # one optional space
 523 |       (?:\n[ ]*)?    # one optional newline followed by spaces
 524 | 
 525 |       \[
 526 |         (.*?)    # id = $3
 527 |       \]
 528 |     )
 529 |   }{
 530 |     my $result;
 531 |     my $whole_match = $1;
 532 |     my $link_text   = $2;
 533 |     my $link_id     = lc $3;
 534 | 
 535 |     if ($link_id eq "") {
 536 |       $link_id = lc $link_text;     # for shortcut links like [this][].
 537 |     }
 538 | 
 539 |     if (defined $g_urls{$link_id}) {
 540 |       my $url = $g_urls{$link_id};
 541 |       $url =~ s! \* !$g_escape_table{'*'}!gx;    # We've got to encode these to avoid
 542 |       $url =~ s!  _ !$g_escape_table{'_'}!gx;    # conflicting with italics/bold.
 543 |       $result = "?  # href = $3
 569 |         [ \t]*
 570 |       (      # $4
 571 |         (['"])  # quote char = $5
 572 |         (.*?)    # Title = $6
 573 |         \5    # matching quote
 574 |       )?      # title is optional
 575 |       \)
 576 |     )
 577 |   }{
 578 |     my $result;
 579 |     my $whole_match = $1;
 580 |     my $link_text   = $2;
 581 |     my $url        = $3;
 582 |     my $title    = $6;
 583 | 
 584 |     $url =~ s! \* !$g_escape_table{'*'}!gx;    # We've got to encode these to avoid
 585 |     $url =~ s!  _ !$g_escape_table{'_'}!gx;    # conflicting with italics/bold.
 586 |     $result = " tags.
 643 | #
 644 |   my $text = shift;
 645 | 
 646 |   #
 647 |   # First, handle reference-style labeled images: ![alt text][id]
 648 |   #
 649 |   $text =~ s{
 650 |     (        # wrap whole match in $1
 651 |       !\[
 652 |         (.*?)    # alt text = $2
 653 |       \]
 654 | 
 655 |       [ ]?        # one optional space
 656 |       (?:\n[ ]*)?    # one optional newline followed by spaces
 657 | 
 658 |       \[
 659 |         (.*?)    # id = $3
 660 |       \]
 661 | 
 662 |     )
 663 |   }{
 664 |     my $result;
 665 |     my $whole_match = $1;
 666 |     my $alt_text    = $2;
 667 |     my $link_id     = lc $3;
 668 | 
 669 |     if ($link_id eq "") {
 670 |       $link_id = lc $alt_text;     # for shortcut links like ![this][].
 671 |     }
 672 | 
 673 |     $alt_text =~ s/"/"/g;
 674 |     if (defined $g_urls{$link_id}) {
 675 |       my $url = $g_urls{$link_id};
 676 |       $url =~ s! \* !$g_escape_table{'*'}!gx;    # We've got to encode these to avoid
 677 |       $url =~ s!  _ !$g_escape_table{'_'}!gx;    # conflicting with italics/bold.
 678 |       $result = "?  # src url = $3
 707 |         [ \t]*
 708 |       (      # $4
 709 |         (['"])  # quote char = $5
 710 |         (.*?)    # title = $6
 711 |         \5    # matching quote
 712 |         [ \t]*
 713 |       )?      # title is optional
 714 |       \)
 715 |     )
 716 |   }{
 717 |     my $result;
 718 |     my $whole_match = $1;
 719 |     my $alt_text    = $2;
 720 |     my $url        = $3;
 721 |     my $title    = '';
 722 |     if (defined($6)) {
 723 |       $title    = $6;
 724 |     }
 725 | 
 726 |     $alt_text =~ s/"/"/g;
 727 |     $title    =~ s/"/"/g;
 728 |     $url =~ s! \* !$g_escape_table{'*'}!gx;    # We've got to encode these to avoid
 729 |     $url =~ s!  _ !$g_escape_table{'_'}!gx;    # conflicting with italics/bold.
 730 |     $result = ""  .  _RunSpanGamut($1)  .  "\n\n";
 757 |   }egmx;
 758 | 
 759 |   $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
 760 |     ""  .  _RunSpanGamut($1)  .  "\n\n";
 761 |   }egmx;
 762 | 
 763 | 
 764 |   # atx-style headers:
 765 |   #  # Header 1
 766 |   #  ## Header 2
 767 |   #  ## Header 2 with closing hashes ##
 768 |   #  ...
 769 |   #  ###### Header 6
 770 |   #
 771 |   $text =~ s{
 772 |       ^(\#{1,6})  # $1 = string of #'s
 773 |       [ \t]*
 774 |       (.+?)    # $2 = Header text
 775 |       [ \t]*
 776 |       \#*      # optional closing #'s (not counted)
 777 |       \n+
 778 |     }{
 779 |       my $h_level = length($1);
 780 |       ""  .  _RunSpanGamut($2)  .  "\n\n";
 781 |     }egmx;
 782 | 
 783 |   return $text;
 784 | }
 785 | 
 786 | 
 787 | sub _DoLists {
 788 | #
 789 | # Form HTML ordered (numbered) and unordered (bulleted) lists.
 790 | #
 791 |   my $text = shift;
 792 |   my $less_than_tab = $g_tab_width - 1;
 793 | 
 794 |   # Re-usable patterns to match list item bullets and number markers:
 795 |   my $marker_ul  = qr/[*+-]/;
 796 |   my $marker_ol  = qr/\d+[.]/;
 797 |   my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
 798 | 
 799 |   # Re-usable pattern to match any entirel ul or ol list:
 800 |   my $whole_list = qr{
 801 |     (                # $1 = whole list
 802 |       (                # $2
 803 |       [ ]{0,$less_than_tab}
 804 |       (${marker_any})        # $3 = first list item marker
 805 |       [ \t]+
 806 |       )
 807 |       (?s:.+?)
 808 |       (                # $4
 809 |         \z
 810 |       |
 811 |         \n{2,}
 812 |         (?=\S)
 813 |         (?!            # Negative lookahead for another list item marker
 814 |         [ \t]*
 815 |         ${marker_any}[ \t]+
 816 |         )
 817 |       )
 818 |     )
 819 |   }mx;
 820 | 
 821 |   # We use a different prefix before nested lists than top-level lists.
 822 |   # See extended comment in _ProcessListItems().
 823 |   #
 824 |   # Note: There's a bit of duplication here. My original implementation
 825 |   # created a scalar regex pattern as the conditional result of the test on
 826 |   # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
 827 |   # substitution once, using the scalar as the pattern. This worked,
 828 |   # everywhere except when running under MT on my hosting account at Pair
 829 |   # Networks. There, this caused all rebuilds to be killed by the reaper (or
 830 |   # perhaps they crashed, but that seems incredibly unlikely given that the
 831 |   # same script on the same server ran fine *except* under MT. I've spent
 832 |   # more time trying to figure out why this is happening than I'd like to
 833 |   # admit. My only guess, backed up by the fact that this workaround works,
 834 |   # is that Perl optimizes the substition when it can figure out that the
 835 |   # pattern will never change, and when this optimization isn't on, we run
 836 |   # afoul of the reaper. Thus, the slightly redundant code that uses two
 837 |   # static s/// patterns rather than one conditional pattern.
 838 | 
 839 |   if ($g_list_level) {
 840 |     $text =~ s{
 841 |         ^
 842 |         $whole_list
 843 |       }{
 844 |         my $list = $1;
 845 |         my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
 846 | 
 847 |         # Turn double returns into triple returns, so that we can make a
 848 |         # paragraph for the last item in a list, if necessary:
 849 |         $list =~ s/\n{2,}/\n\n\n/g;
 850 |         my $result = _ProcessListItems($list, $marker_any);
 851 | 
 852 |         # Trim any trailing whitespace, to put the closing ``
 853 |         # up on the preceding line, to get it past the current stupid
 854 |         # HTML block parser. This is a hack to work around the terrible
 855 |         # hack that is the HTML block parser.
 856 |         $result =~ s{\s+$}{};
 857 |         $result = "<$list_type>" . $result . "\n";
 858 |         $result;
 859 |       }egmx;
 860 |   }
 861 |   else {
 862 |     $text =~ s{
 863 |         (?:(?<=\n\n)|\A\n?)
 864 |         $whole_list
 865 |       }{
 866 |         my $list = $1;
 867 |         my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
 868 |         # Turn double returns into triple returns, so that we can make a
 869 |         # paragraph for the last item in a list, if necessary:
 870 |         $list =~ s/\n{2,}/\n\n\n/g;
 871 |         my $result = _ProcessListItems($list, $marker_any);
 872 |         $result = "<$list_type>\n" . $result . "\n";
 873 |         $result;
 874 |       }egmx;
 875 |   }
 876 | 
 877 | 
 878 |   return $text;
 879 | }
 880 | 
 881 | 
 882 | sub _ProcessListItems {
 883 | #
 884 | #  Process the contents of a single ordered or unordered list, splitting it
 885 | #  into individual list items.
 886 | #
 887 | 
 888 |   my $list_str = shift;
 889 |   my $marker_any = shift;
 890 | 
 891 | 
 892 |   # The $g_list_level global keeps track of when we're inside a list.
 893 |   # Each time we enter a list, we increment it; when we leave a list,
 894 |   # we decrement. If it's zero, we're not in a list anymore.
 895 |   #
 896 |   # We do this because when we're not inside a list, we want to treat
 897 |   # something like this:
 898 |   #
 899 |   #    I recommend upgrading to version
 900 |   #    8. Oops, now this line is treated
 901 |   #    as a sub-list.
 902 |   #
 903 |   # As a single paragraph, despite the fact that the second line starts
 904 |   # with a digit-period-space sequence.
 905 |   #
 906 |   # Whereas when we're inside a list (or sub-list), that line will be
 907 |   # treated as the start of a sub-list. What a kludge, huh? This is
 908 |   # an aspect of Markdown's syntax that's hard to parse perfectly
 909 |   # without resorting to mind-reading. Perhaps the solution is to
 910 |   # change the syntax rules such that sub-lists must start with a
 911 |   # starting cardinal number; e.g. "1." or "a.".
 912 | 
 913 |   $g_list_level++;
 914 | 
 915 |   # trim trailing blank lines:
 916 |   $list_str =~ s/\n{2,}\z/\n/;
 917 | 
 918 | 
 919 |   $list_str =~ s{
 920 |     (\n)?              # leading line = $1
 921 |     (^[ \t]*)            # leading whitespace = $2
 922 |     ($marker_any) [ \t]+      # list marker = $3
 923 |     ((?s:.+?)            # list item text   = $4
 924 |     (\n{1,2}))
 925 |     (?= \n* (\z | \2 ($marker_any) [ \t]+))
 926 |   }{
 927 |     my $item = $4;
 928 |     my $leading_line = $1;
 929 |     my $leading_space = $2;
 930 | 
 931 |     if ($leading_line or ($item =~ m/\n{2,}/)) {
 932 |       $item = _RunBlockGamut(_Outdent($item));
 933 |     }
 934 |     else {
 935 |       # Recursion for sub-lists:
 936 |       $item = _DoLists(_Outdent($item));
 937 |       chomp $item;
 938 |       $item = _RunSpanGamut($item);
 939 |     }
 940 | 
 941 |     "" . $item . "\n";
 942 |   }egmx;
 943 | 
 944 |   $g_list_level--;
 945 |   return $list_str;
 946 | }
 947 | 
 948 | 
 949 | 
 950 | sub _DoCodeBlocks {
 951 | #
 952 | #  Process Markdown `` blocks.
 953 | #  
 954 | 
 955 |   my $text = shift;
 956 | 
 957 |   $text =~ s{
 958 |       (?:\n\n|\A)
 959 |       (              # $1 = the code block -- one or more lines, starting with a space/tab
 960 |         (?:
 961 |           (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
 962 |           .*\n+
 963 |         )+
 964 |       )
 965 |       ((?=^[ ]{0,$g_tab_width}\S)|\Z)  # Lookahead for non-space at line-start, or end of doc
 966 |     }{
 967 |       my $codeblock = $1;
 968 |       my $result; # return value
 969 | 
 970 |       $codeblock = _EncodeCode(_Outdent($codeblock));
 971 |       $codeblock = _Detab($codeblock);
 972 |       $codeblock =~ s/\A\n+//; # trim leading newlines
 973 |       $codeblock =~ s/\s+\z//; # trim trailing whitespace
 974 | 
 975 |       $result = "\n\n" . $codeblock . "\n\n\n";
 976 | 
 977 |       $result;
 978 |     }egmx;
 979 | 
 980 |   return $text;
 981 | }
 982 | 
 983 | 
 984 | sub _DoCodeSpans {
 985 | #
 986 | #   *  Backtick quotes are used for  spans.
 987 | # 
 988 | #   *  You can use multiple backticks as the delimiters if you want to
 989 | #     include literal backticks in the code span. So, this input:
 990 | #     
 991 | #         Just type ``foo `bar` baz`` at the prompt.
 992 | #     
 993 | #       Will translate to:
 994 | #     
 995 | #         Just type foo `bar` baz at the prompt.
 996 | #     
 997 | #    There's no arbitrary limit to the number of backticks you
 998 | #    can use as delimters. If you need three consecutive backticks
 999 | #    in your code, use four for delimiters, etc.
1000 | #
1001 | #  *  You can use spaces to get literal backticks at the edges:
1002 | #     
1003 | #         ... type `` `bar` `` ...
1004 | #     
1005 | #       Turns to:
1006 | #     
1007 | #         ... type `bar` ...
1008 | #
1009 | 
1010 |   my $text = shift;
1011 | 
1012 |   $text =~ s@
1013 |       (?$c";
1025 |     @egsx;
1026 | 
1027 |   return $text;
1028 | }
1029 | 
1030 | 
1031 | sub _EncodeCode {
1032 | #
1033 | # Encode/escape certain characters inside Markdown code runs.
1034 | # The point is that in code, these characters are literals,
1035 | # and lose their special Markdown meanings.
1036 | #
1037 |     local $_ = shift;
1038 | 
1039 |   # Encode all ampersands; HTML entities are not
1040 |   # entities within a Markdown code span.
1041 |   s/&/&/g;
1042 | 
1043 |   # Encode $'s, but only if we're running under Blosxom.
1044 |   # (Blosxom interpolates Perl variables in article bodies.)
1045 |   {
1046 |     no warnings 'once';
1047 |       if (defined($blosxom::version)) {
1048 |         s/\$/$/g;  
1049 |       }
1050 |     }
1051 | 
1052 | 
1053 |   # Do the angle bracket song and dance:
1054 |   s! <  !<!gx;
1055 |   s! >  !>!gx;
1056 | 
1057 |   # Now, escape characters that are magic in Markdown:
1058 |   s! \* !$g_escape_table{'*'}!gx;
1059 |   s! _  !$g_escape_table{'_'}!gx;
1060 |   s! {  !$g_escape_table{'{'}!gx;
1061 |   s! }  !$g_escape_table{'}'}!gx;
1062 |   s! \[ !$g_escape_table{'['}!gx;
1063 |   s! \] !$g_escape_table{']'}!gx;
1064 |   s! \\ !$g_escape_table{'\\'}!gx;
1065 | 
1066 |   return $_;
1067 | }
1068 | 
1069 | 
1070 | sub _DoItalicsAndBold {
1071 |   my $text = shift;
1072 | 
1073 |   #  must go first:
1074 |   $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1075 |     {$2}gsx;
1076 | 
1077 |   $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1078 |     {$2}gsx;
1079 | 
1080 |   return $text;
1081 | }
1082 | 
1083 | 
1084 | sub _DoBlockQuotes {
1085 |   my $text = shift;
1086 | 
1087 |   $text =~ s{
1088 |       (                # Wrap whole match in $1
1089 |       (
1090 |         ^[ \t]*>[ \t]?      # '>' at the start of a line
1091 |           .+\n          # rest of the first line
1092 |         (.+\n)*          # subsequent consecutive lines
1093 |         \n*            # blanks
1094 |       )+
1095 |       )
1096 |     }{
1097 |       my $bq = $1;
1098 |       $bq =~ s/^[ \t]*>[ \t]?//gm;  # trim one level of quoting
1099 |       $bq =~ s/^[ \t]+$//mg;      # trim whitespace-only lines
1100 |       $bq = _RunBlockGamut($bq);    # recurse
1101 | 
1102 |       $bq =~ s/^/  /g;
1103 |       # These leading spaces screw with  content, so we need to fix that:
1104 |       $bq =~ s{
1105 |           (\s*.+?)
1106 |         }{
1107 |           my $pre = $1;
1108 |           $pre =~ s/^  //mg;
1109 |           $pre;
1110 |         }egsx;
1111 | 
1112 |       "\n$bq\n\n\n";
1113 |     }egmx;
1114 | 
1115 | 
1116 |   return $text;
1117 | }
1118 | 
1119 | 
1120 | sub _FormParagraphs {
1121 | #
1122 | #  Params:
1123 | #    $text - string to process with html  tags
1124 | #
1125 |   my $text = shift;
1126 | 
1127 |   # Strip leading and trailing lines:
1128 |   $text =~ s/\A\n+//;
1129 |   $text =~ s/\n+\z//;
1130 | 
1131 |   my @grafs = split(/\n{2,}/, $text);
1132 | 
1133 |   #
1134 |   # Wrap 
 tags.
1135 |   #
1136 |   foreach (@grafs) {
1137 |     unless (defined( $g_html_blocks{$_} )) {
1138 |       $_ = _RunSpanGamut($_);
1139 |       s/^([ \t]*)/
/;
1140 |       $_ .= "";
1141 |     }
1142 |   }
1143 | 
1144 |   #
1145 |   # Unhashify HTML blocks
1146 |   #
1147 |   foreach (@grafs) {
1148 |     if (defined( $g_html_blocks{$_} )) {
1149 |       $_ = $g_html_blocks{$_};
1150 |     }
1151 |   }
1152 | 
1153 |   return join "\n\n", @grafs;
1154 | }
1155 | 
1156 | 
1157 | sub _EncodeAmpsAndAngles {
1158 | # Smart processing for ampersands and angle brackets that need to be encoded.
1159 | 
1160 |   my $text = shift;
1161 | 
1162 |   # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1163 |   #   http://bumppo.net/projects/amputator/
1164 |    $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1165 | 
1166 |   # Encode naked <'s
1167 |    $text =~ s{<(?![a-z/?\$!])}{<}gi;
1168 | 
1169 |   return $text;
1170 | }
1171 | 
1172 | 
1173 | sub _EncodeBackslashEscapes {
1174 | #
1175 | #   Parameter:  String.
1176 | #   Returns:    The string, with after processing the following backslash
1177 | #               escape sequences.
1178 | #
1179 |     local $_ = shift;
1180 | 
1181 |     s! \\\\  !$g_escape_table{'\\'}!gx;    # Must process escaped backslashes first.
1182 |     s! \\`   !$g_escape_table{'`'}!gx;
1183 |     s! \\\*  !$g_escape_table{'*'}!gx;
1184 |     s! \\_   !$g_escape_table{'_'}!gx;
1185 |     s! \\\{  !$g_escape_table{'{'}!gx;
1186 |     s! \\\}  !$g_escape_table{'}'}!gx;
1187 |     s! \\\[  !$g_escape_table{'['}!gx;
1188 |     s! \\\]  !$g_escape_table{']'}!gx;
1189 |     s! \\\(  !$g_escape_table{'('}!gx;
1190 |     s! \\\)  !$g_escape_table{')'}!gx;
1191 |     s! \\>   !$g_escape_table{'>'}!gx;
1192 |     s! \\\#  !$g_escape_table{'#'}!gx;
1193 |     s! \\\+  !$g_escape_table{'+'}!gx;
1194 |     s! \\\-  !$g_escape_table{'-'}!gx;
1195 |     s! \\\.  !$g_escape_table{'.'}!gx;
1196 |     s{ \\!  }{$g_escape_table{'!'}}gx;
1197 | 
1198 |     return $_;
1199 | }
1200 | 
1201 | 
1202 | sub _DoAutoLinks {
1203 |   my $text = shift;
1204 | 
1205 |   $text =~ s{<((https?|ftp):[^'">\s]+)>}{$1}gi;
1206 | 
1207 |   # Email addresses: 
1208 |   $text =~ s{
1209 |     <
1210 |         (?:mailto:)?
1211 |     (
1212 |       [-.\w]+
1213 |       \@
1214 |       [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1215 |     )
1216 |     >
1217 |   }{
1218 |     _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1219 |   }egix;
1220 | 
1221 |   return $text;
1222 | }
1223 | 
1224 | 
1225 | sub _EncodeEmailAddress {
1226 | #
1227 | #  Input: an email address, e.g. "foo@example.com"
1228 | #
1229 | #  Output: the email address as a mailto link, with each character
1230 | #    of the address encoded as either a decimal or hex entity, in
1231 | #    the hopes of foiling most address harvesting spam bots. E.g.:
1232 | #
1233 | #    foo
1235 | #       @example.com
1236 | #
1237 | #  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1238 | #  mailing list: 
1239 | #
1240 | 
1241 |   my $addr = shift;
1242 | 
1243 |   srand;
1244 |   my @encode = (
1245 |     sub { '&#' .                 ord(shift)   . ';' },
1246 |     sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1247 |     sub {                            shift          },
1248 |   );
1249 | 
1250 |   $addr = "mailto:" . $addr;
1251 | 
1252 |   $addr =~ s{(.)}{
1253 |     my $char = $1;
1254 |     if ( $char eq '@' ) {
1255 |       # this *must* be encoded. I insist.
1256 |       $char = $encode[int rand 1]->($char);
1257 |     } elsif ( $char ne ':' ) {
1258 |       # leave ':' alone (to spot mailto: later)
1259 |       my $r = rand;
1260 |       # roughly 10% raw, 45% hex, 45% dec
1261 |       $char = (
1262 |         $r > .9   ?  $encode[2]->($char)  :
1263 |         $r < .45  ?  $encode[1]->($char)  :
1264 |                $encode[0]->($char)
1265 |       );
1266 |     }
1267 |     $char;
1268 |   }gex;
1269 | 
1270 |   $addr = qq{$addr};
1271 |   $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1272 | 
1273 |   return $addr;
1274 | }
1275 | 
1276 | 
1277 | sub _UnescapeSpecialChars {
1278 | #
1279 | # Swap back in all the special characters we've hidden.
1280 | #
1281 |   my $text = shift;
1282 | 
1283 |   while( my($char, $hash) = each(%g_escape_table) ) {
1284 |     $text =~ s/$hash/$char/g;
1285 |   }
1286 |     return $text;
1287 | }
1288 | 
1289 | 
1290 | sub _TokenizeHTML {
1291 | #
1292 | #   Parameter:  String containing HTML markup.
1293 | #   Returns:    Reference to an array of the tokens comprising the input
1294 | #               string. Each token is either a tag (possibly with nested,
1295 | #               tags contained therein, such as , or a
1296 | #               run of text between tags. Each element of the array is a
1297 | #               two-element array; the first is either 'tag' or 'text';
1298 | #               the second is the actual value.
1299 | #
1300 | #
1301 | #   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1302 | #       
1303 | #
1304 | 
1305 |     my $str = shift;
1306 |     my $pos = 0;
1307 |     my $len = length $str;
1308 |     my @tokens;
1309 | 
1310 |     my $depth = 6;
1311 |     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
1312 |     my $match = qr/(?s:  ) |  # comment
1313 |                    (?s: <\? .*? \?> ) |              # processing instruction
1314 |                    $nested_tags/ix;                   # nested tags
1315 | 
1316 |     while ($str =~ m/($match)/g) {
1317 |         my $whole_tag = $1;
1318 |         my $sec_start = pos $str;
1319 |         my $tag_start = $sec_start - length $whole_tag;
1320 |         if ($pos < $tag_start) {
1321 |             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1322 |         }
1323 |         push @tokens, ['tag', $whole_tag];
1324 |         $pos = pos $str;
1325 |     }
1326 |     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1327 |     \@tokens;
1328 | }
1329 | 
1330 | 
1331 | sub _Outdent {
1332 | #
1333 | # Remove one level of line-leading tabs or spaces
1334 | #
1335 |   my $text = shift;
1336 | 
1337 |   $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1338 |   return $text;
1339 | }
1340 | 
1341 | 
1342 | sub _Detab {
1343 | #
1344 | # Cribbed from a post by Bart Lateur:
1345 | # 
1346 | #
1347 |   my $text = shift;
1348 | 
1349 |   $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1350 |   return $text;
1351 | }
1352 | 
1353 | 
1354 | 1;
1355 | 
1356 | __END__
1357 | 
1358 | 
1359 | =pod
1360 | 
1361 | =head1 NAME
1362 | 
1363 | B
1364 | 
1365 | 
1366 | =head1 SYNOPSIS
1367 | 
1368 | B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
1369 |     [ I ... ]
1370 | 
1371 | 
1372 | =head1 DESCRIPTION
1373 | 
1374 | Markdown is a text-to-HTML filter; it translates an easy-to-read /
1375 | easy-to-write structured text format into HTML. Markdown's text format
1376 | is most similar to that of plain text email, and supports features such
1377 | as headers, *emphasis*, code blocks, blockquotes, and links.
1378 | 
1379 | Markdown's syntax is designed not as a generic markup language, but
1380 | specifically to serve as a front-end to (X)HTML. You can  use span-level
1381 | HTML tags anywhere in a Markdown document, and you can use block level
1382 | HTML tags (like  and  as well).
1383 | 
1384 | For more information about Markdown's syntax, see:
1385 | 
1386 |     http://daringfireball.net/projects/markdown/
1387 | 
1388 | 
1389 | =head1 OPTIONS
1390 | 
1391 | Use "--" to end switch parsing. For example, to open a file named "-z", use:
1392 | 
1393 |   Markdown.pl -- -z
1394 | 
1395 | =over 4
1396 | 
1397 | 
1398 | =item B<--html4tags>
1399 | 
1400 | Use HTML 4 style for empty element tags, e.g.:
1401 | 
1402 |     

1403 | 
1404 | instead of Markdown's default XHTML style tags, e.g.:
1405 | 
1406 |     

1407 | 
1408 | 
1409 | =item B<-v>, B<--version>
1410 | 
1411 | Display Markdown's version number and copyright information.
1412 | 
1413 | 
1414 | =item B<-s>, B<--shortversion>
1415 | 
1416 | Display the short-form version number.
1417 | 
1418 | 
1419 | =back
1420 | 
1421 | 
1422 | 
1423 | =head1 BUGS
1424 | 
1425 | To file bug reports or feature requests (other than topics listed in the
1426 | Caveats section above) please send email to:
1427 | 
1428 |     support@daringfireball.net
1429 | 
1430 | Please include with your report: (1) the example input; (2) the output
1431 | you expected; (3) the output Markdown actually produced.
1432 | 
1433 | 
1434 | =head1 VERSION HISTORY
1435 | 
1436 | See the readme file for detailed release notes for this version.
1437 | 
1438 | 1.0.2b2 - 20 Mar 2005
1439 | 
1440 |   +  Fix for nested sub-lists in list-paragraph mode. Previously we got
1441 |     a spurious extra level of `` tags for something like this:
1442 |     
1443 |       *  this
1444 |       
1445 |         *  sub
1446 |       
1447 |         that
1448 |     
1449 |   +  Experimental support for [this] as a synonym for [this][].
1450 |     (Note to self: No test yet for this.)
1451 |     Be sure to test, e.g.: [permutations of this sort of [thing][].]
1452 | 
1453 | 
1454 | 1.0.2b1 - 28  Feb 2005
1455 | 
1456 |   +  Fix for backticks within HTML tag: like this
1457 | 
1458 |   +  Fix for escaped backticks still triggering code spans:
1459 |   
1460 |       There are two raw backticks here: \` and here: \`, not a code span
1461 | 
1462 | 1.0.1 - 14 Dec 2004
1463 | 
1464 | 1.0 - 28 Aug 2004
1465 | 
1466 | 
1467 | =head1 AUTHOR
1468 | 
1469 |     John Gruber
1470 |     http://daringfireball.net
1471 | 
1472 |     PHP port and other contributions by Michel Fortin
1473 |     http://michelf.com
1474 | 
1475 | 
1476 | =head1 COPYRIGHT AND LICENSE
1477 | 
1478 | Copyright (c) 2003-2005 John Gruber   
1479 |    
1480 | All rights reserved.
1481 | 
1482 | Redistribution and use in source and binary forms, with or without
1483 | modification, are permitted provided that the following conditions are
1484 | met:
1485 | 
1486 | * Redistributions of source code must retain the above copyright notice,
1487 |   this list of conditions and the following disclaimer.
1488 | 
1489 | * Redistributions in binary form must reproduce the above copyright
1490 |   notice, this list of conditions and the following disclaimer in the
1491 |   documentation and/or other materials provided with the distribution.
1492 | 
1493 | * Neither the name "Markdown" nor the names of its contributors may
1494 |   be used to endorse or promote products derived from this software
1495 |   without specific prior written permission.
1496 | 
1497 | This software is provided by the copyright holders and contributors "as
1498 | is" and any express or implied warranties, including, but not limited
1499 | to, the implied warranties of merchantability and fitness for a
1500 | particular purpose are disclaimed. In no event shall the copyright owner
1501 | or contributors be liable for any direct, indirect, incidental, special,
1502 | exemplary, or consequential damages (including, but not limited to,
1503 | procurement of substitute goods or services; loss of use, data, or
1504 | profits; or business interruption) however caused and on any theory of
1505 | liability, whether in contract, strict liability, or tort (including
1506 | negligence or otherwise) arising in any way out of the use of this
1507 | software, even if advised of the possibility of such damage.
1508 | 
1509 | =cut
1510 | 


--------------------------------------------------------------------------------

"+_21(m1)+"

Showdown

a javascript port of Markdown

" + _RunSpanGamut(m1) + "

" + _RunSpanGamut(m1) + "

" . _RunSpanGamut($1) . "