234 | # Hello World. <-- Is this a Markdown code block or text?
235 | #
237 | #
238 | # If you don't like this, just don't indent the tag on which
239 | # you apply the markdown="1" attribute.
240 | #
241 | # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
242 | # tag with that name. Nested tags supported.
243 | #
244 | # * If $span is true, text inside must treated as span. So any double
245 | # newline will be replaced by a single newline so that it does not create
246 | # paragraphs.
247 | #
248 | # Returns an array of that form: ( processed text , remaining text )
249 | #
250 | if ($text === '') return array('', '');
251 |
252 | # Regex to check for the presense of newlines around a block tag.
253 | $newline_before_re = '/(?:^\n?|\n\n)*$/';
254 | $newline_after_re =
255 | '{
256 | ^ # Start of text following the tag.
257 | (?>[ ]*)? # Optional comment.
258 | [ ]*\n # Must be followed by newline.
259 | }xs';
260 |
261 | # Regex to match any tag.
262 | $block_tag_re =
263 | '{
264 | ( # $2: Capture hole tag.
265 | ? # Any opening or closing tag.
266 | (?> # Tag name.
267 | '.$this->block_tags_re.' |
268 | '.$this->context_block_tags_re.' |
269 | '.$this->clean_tags_re.' |
270 | (?!\s)'.$enclosing_tag_re.'
271 | )
272 | (?:
273 | (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
274 | (?>
275 | ".*?" | # Double quotes (can contain `>`)
276 | \'.*?\' | # Single quotes (can contain `>`)
277 | .+? # Anything but quotes and `>`.
278 | )*?
279 | )?
280 | > # End of tag.
281 | |
282 | # HTML Comment
283 | |
284 | <\?.*?\?> | <%.*?%> # Processing instruction
285 | |
286 | # CData Block
287 | |
288 | # Code span marker
289 | `+
290 | '. ( !$span ? ' # If not in span.
291 | |
292 | # Indented code block
293 | (?: ^[ ]*\n | ^ | \n[ ]*\n )
294 | [ ]{'.($indent+4).'}[^\n]* \n
295 | (?>
296 | (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
297 | )*
298 | |
299 | # Fenced code block marker
300 | (?> ^ | \n )
301 | [ ]{0,'.($indent).'}~~~+[ ]*\n
302 | ' : '' ). ' # End (if not is span).
303 | )
304 | }xs';
305 |
306 |
307 | $depth = 0; # Current depth inside the tag tree.
308 | $parsed = ""; # Parsed text that will be returned.
309 |
310 | #
311 | # Loop through every tag until we find the closing tag of the parent
312 | # or loop until reaching the end of text if no parent tag specified.
313 | #
314 | do {
315 | #
316 | # Split the text using the first $tag_match pattern found.
317 | # Text before pattern will be first in the array, text after
318 | # pattern will be at the end, and between will be any catches made
319 | # by the pattern.
320 | #
321 | $parts = preg_split($block_tag_re, $text, 2,
322 | PREG_SPLIT_DELIM_CAPTURE);
323 |
324 | # If in Markdown span mode, add a empty-string span-level hash
325 | # after each newline to prevent triggering any block element.
326 | if ($span) {
327 | $void = $this->hashPart("", ':');
328 | $newline = "$void\n";
329 | $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
330 | }
331 |
332 | $parsed .= $parts[0]; # Text before current tag.
333 |
334 | # If end of $text has been reached. Stop loop.
335 | if (count($parts) < 3) {
336 | $text = "";
337 | break;
338 | }
339 |
340 | $tag = $parts[1]; # Tag to handle.
341 | $text = $parts[2]; # Remaining text after current tag.
342 | $tag_re = preg_quote($tag); # For use in a regular expression.
343 |
344 | #
345 | # Check for: Code span marker
346 | #
347 | if ($tag{0} == "`") {
348 | # Find corresponding end marker.
349 | $tag_re = preg_quote($tag);
350 | if (preg_match('{^(?>.+?|\n(?!\n))*?(?.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text,
385 | $matches))
386 | {
387 | # End marker found: pass text unchanged until marker.
388 | $parsed .= $tag . $matches[0];
389 | $text = substr($text, strlen($matches[0]));
390 | }
391 | else {
392 | # No end marker: just skip it.
393 | $parsed .= $tag;
394 | }
395 | }
396 | #
397 | # Check for: Indented code block.
398 | #
399 | else if ($tag{0} == "\n" || $tag{0} == " ") {
400 | # Indented code block: pass it unchanged, will be handled
401 | # later.
402 | $parsed .= $tag;
403 | }
404 | #
405 | # Check for: Opening Block level tag or
406 | # Opening Context Block tag (like ins and del)
407 | # used as a block tag (tag is alone on it's line).
408 | #
409 | else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
410 | ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
411 | preg_match($newline_before_re, $parsed) &&
412 | preg_match($newline_after_re, $text) )
413 | )
414 | {
415 | # Need to parse tag and following text using the HTML parser.
416 | list($block_text, $text) =
417 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
418 |
419 | # Make sure it stays outside of any paragraph by adding newlines.
420 | $parsed .= "\n\n$block_text\n\n";
421 | }
422 | #
423 | # Check for: Clean tag (like script, math)
424 | # HTML Comments, processing instructions.
425 | #
426 | else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
427 | $tag{1} == '!' || $tag{1} == '?')
428 | {
429 | # Need to parse tag and following text using the HTML parser.
430 | # (don't check for markdown attribute)
431 | list($block_text, $text) =
432 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
433 |
434 | $parsed .= $block_text;
435 | }
436 | #
437 | # Check for: Tag with same name as enclosing tag.
438 | #
439 | else if ($enclosing_tag_re !== '' &&
440 | # Same name as enclosing tag.
441 | preg_match('{^?(?:'.$enclosing_tag_re.')\b}', $tag))
442 | {
443 | #
444 | # Increase/decrease nested tag count.
445 | #
446 | if ($tag{1} == '/') $depth--;
447 | else if ($tag{strlen($tag)-2} != '/') $depth++;
448 |
449 | if ($depth < 0) {
450 | #
451 | # Going out of parent element. Clean up and break so we
452 | # return to the calling function.
453 | #
454 | $text = $tag . $text;
455 | break;
456 | }
457 |
458 | $parsed .= $tag;
459 | }
460 | else {
461 | $parsed .= $tag;
462 | }
463 | } while ($depth >= 0);
464 |
465 | return array($parsed, $text);
466 | }
467 | function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
468 | #
469 | # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
470 | #
471 | # * Calls $hash_method to convert any blocks.
472 | # * Stops when the first opening tag closes.
473 | # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
474 | # (it is not inside clean tags)
475 | #
476 | # Returns an array of that form: ( processed text , remaining text )
477 | #
478 | if ($text === '') return array('', '');
479 |
480 | # Regex to match `markdown` attribute inside of a tag.
481 | $markdown_attr_re = '
482 | {
483 | \s* # Eat whitespace before the `markdown` attribute
484 | markdown
485 | \s*=\s*
486 | (?>
487 | (["\']) # $1: quote delimiter
488 | (.*?) # $2: attribute value
489 | \1 # matching delimiter
490 | |
491 | ([^\s>]*) # $3: unquoted attribute value
492 | )
493 | () # $4: make $3 always defined (avoid warnings)
494 | }xs';
495 |
496 | # Regex to match any tag.
497 | $tag_re = '{
498 | ( # $2: Capture hole tag.
499 | ? # Any opening or closing tag.
500 | [\w:$]+ # Tag name.
501 | (?:
502 | (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
503 | (?>
504 | ".*?" | # Double quotes (can contain `>`)
505 | \'.*?\' | # Single quotes (can contain `>`)
506 | .+? # Anything but quotes and `>`.
507 | )*?
508 | )?
509 | > # End of tag.
510 | |
511 | # HTML Comment
512 | |
513 | <\?.*?\?> | <%.*?%> # Processing instruction
514 | |
515 | # CData Block
516 | )
517 | }xs';
518 |
519 | $original_text = $text; # Save original text in case of faliure.
520 |
521 | $depth = 0; # Current depth inside the tag tree.
522 | $block_text = ""; # Temporary text holder for current text.
523 | $parsed = ""; # Parsed text that will be returned.
524 |
525 | #
526 | # Get the name of the starting tag.
527 | # (This pattern makes $base_tag_name_re safe without quoting.)
528 | #
529 | if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
530 | $base_tag_name_re = $matches[1];
531 |
532 | #
533 | # Loop through every tag until we find the corresponding closing tag.
534 | #
535 | do {
536 | #
537 | # Split the text using the first $tag_match pattern found.
538 | # Text before pattern will be first in the array, text after
539 | # pattern will be at the end, and between will be any catches made
540 | # by the pattern.
541 | #
542 | $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
543 |
544 | if (count($parts) < 3) {
545 | #
546 | # End of $text reached with unbalenced tag(s).
547 | # In that case, we return original text unchanged and pass the
548 | # first character as filtered to prevent an infinite loop in the
549 | # parent function.
550 | #
551 | return array($original_text{0}, substr($original_text, 1));
552 | }
553 |
554 | $block_text .= $parts[0]; # Text before current tag.
555 | $tag = $parts[1]; # Tag to handle.
556 | $text = $parts[2]; # Remaining text after current tag.
557 |
558 | #
559 | # Check for: Auto-close tag (like
)
560 | # Comments and Processing Instructions.
561 | #
562 | if (preg_match('{^?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
563 | $tag{1} == '!' || $tag{1} == '?')
564 | {
565 | # Just add the tag to the block as if it was text.
566 | $block_text .= $tag;
567 | }
568 | else {
569 | #
570 | # Increase/decrease nested tag count. Only do so if
571 | # the tag's name match base tag's.
572 | #
573 | if (preg_match('{^?'.$base_tag_name_re.'\b}', $tag)) {
574 | if ($tag{1} == '/') $depth--;
575 | else if ($tag{strlen($tag)-2} != '/') $depth++;
576 | }
577 |
578 | #
579 | # Check for `markdown="1"` attribute and handle it.
580 | #
581 | if ($md_attr &&
582 | preg_match($markdown_attr_re, $tag, $attr_m) &&
583 | preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
584 | {
585 | # Remove `markdown` attribute from opening tag.
586 | $tag = preg_replace($markdown_attr_re, '', $tag);
587 |
588 | # Check if text inside this tag must be parsed in span mode.
589 | $this->mode = $attr_m[2] . $attr_m[3];
590 | $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
591 | preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
592 |
593 | # Calculate indent before tag.
594 | if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
595 | $strlen = $this->utf8_strlen;
596 | $indent = $strlen($matches[1], 'UTF-8');
597 | } else {
598 | $indent = 0;
599 | }
600 |
601 | # End preceding block with this tag.
602 | $block_text .= $tag;
603 | $parsed .= $this->$hash_method($block_text);
604 |
605 | # Get enclosing tag name for the ParseMarkdown function.
606 | # (This pattern makes $tag_name_re safe without quoting.)
607 | preg_match('/^<([\w:$]*)\b/', $tag, $matches);
608 | $tag_name_re = $matches[1];
609 |
610 | # Parse the content using the HTML-in-Markdown parser.
611 | list ($block_text, $text)
612 | = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
613 | $tag_name_re, $span_mode);
614 |
615 | # Outdent markdown text.
616 | if ($indent > 0) {
617 | $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
618 | $block_text);
619 | }
620 |
621 | # Append tag content to parsed text.
622 | if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
623 | else $parsed .= "$block_text";
624 |
625 | # Start over a new block.
626 | $block_text = "";
627 | }
628 | else $block_text .= $tag;
629 | }
630 |
631 | } while ($depth > 0);
632 |
633 | #
634 | # Hash last block text that wasn't processed inside the loop.
635 | #
636 | $parsed .= $this->$hash_method($block_text);
637 |
638 | return array($parsed, $text);
639 | }
640 |
641 |
642 | function hashClean($text) {
643 | #
644 | # Called whenever a tag must be hashed when a function insert a "clean" tag
645 | # in $text, it pass through this function and is automaticaly escaped,
646 | # blocking invalid nested overlap.
647 | #
648 | return $this->hashPart($text, 'C');
649 | }
650 |
651 |
652 | function doHeaders($text) {
653 | #
654 | # Redefined to add id attribute support.
655 | #
656 | # Setext-style headers:
657 | # Header 1 {#header1}
658 | # ========
659 | #
660 | # Header 2 {#header2}
661 | # --------
662 | #
663 | $text = preg_replace_callback(
664 | '{
665 | (^.+?) # $1: Header text
666 | (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute
667 | [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
668 | }mx',
669 | array(&$this, '_doHeaders_callback_setext'), $text);
670 |
671 | # atx-style headers:
672 | # # Header 1 {#header1}
673 | # ## Header 2 {#header2}
674 | # ## Header 2 with closing hashes ## {#header3}
675 | # ...
676 | # ###### Header 6 {#header2}
677 | #
678 | $text = preg_replace_callback('{
679 | ^(\#{1,6}) # $1 = string of #\'s
680 | [ ]*
681 | (.+?) # $2 = Header text
682 | [ ]*
683 | \#* # optional closing #\'s (not counted)
684 | (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
685 | [ ]*
686 | \n+
687 | }xm',
688 | array(&$this, '_doHeaders_callback_atx'), $text);
689 |
690 | return $text;
691 | }
692 | function _doHeaders_attr($attr) {
693 | if (empty($attr)) return "";
694 | return " id=\"$attr\"";
695 | }
696 | function _doHeaders_callback_setext($matches) {
697 | if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
698 | return $matches[0];
699 | $level = $matches[3]{0} == '=' ? 1 : 2;
700 | $attr = $this->_doHeaders_attr($id =& $matches[2]);
701 | $block = "
".$this->runSpanGamut($matches[1])."";
702 | return "\n" . $this->hashBlock($block) . "\n\n";
703 | }
704 | function _doHeaders_callback_atx($matches) {
705 | $level = strlen($matches[1]);
706 | $attr = $this->_doHeaders_attr($id =& $matches[3]);
707 | $block = "
".$this->runSpanGamut($matches[2])."";
708 | return "\n" . $this->hashBlock($block) . "\n\n";
709 | }
710 |
711 |
712 | function doTables($text) {
713 | #
714 | # Form HTML tables.
715 | #
716 | $less_than_tab = $this->tab_width - 1;
717 | #
718 | # Find tables with leading pipe.
719 | #
720 | # | Header 1 | Header 2
721 | # | -------- | --------
722 | # | Cell 1 | Cell 2
723 | # | Cell 3 | Cell 4
724 | #
725 | $text = preg_replace_callback('
726 | {
727 | ^ # Start of a line
728 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
729 | [|] # Optional leading pipe (present)
730 | (.+) \n # $1: Header row (at least one pipe)
731 |
732 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
733 | [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
734 |
735 | ( # $3: Cells
736 | (?>
737 | [ ]* # Allowed whitespace.
738 | [|] .* \n # Row content.
739 | )*
740 | )
741 | (?=\n|\Z) # Stop at final double newline.
742 | }xm',
743 | array(&$this, '_doTable_leadingPipe_callback'), $text);
744 |
745 | #
746 | # Find tables without leading pipe.
747 | #
748 | # Header 1 | Header 2
749 | # -------- | --------
750 | # Cell 1 | Cell 2
751 | # Cell 3 | Cell 4
752 | #
753 | $text = preg_replace_callback('
754 | {
755 | ^ # Start of a line
756 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
757 | (\S.*[|].*) \n # $1: Header row (at least one pipe)
758 |
759 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
760 | ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
761 |
762 | ( # $3: Cells
763 | (?>
764 | .* [|] .* \n # Row content
765 | )*
766 | )
767 | (?=\n|\Z) # Stop at final double newline.
768 | }xm',
769 | array(&$this, '_DoTable_callback'), $text);
770 |
771 | return $text;
772 | }
773 | function _doTable_leadingPipe_callback($matches) {
774 | $head = $matches[1];
775 | $underline = $matches[2];
776 | $content = $matches[3];
777 |
778 | # Remove leading pipe for each row.
779 | $content = preg_replace('/^ *[|]/m', '', $content);
780 |
781 | return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
782 | }
783 | function _doTable_callback($matches) {
784 | $head = $matches[1];
785 | $underline = $matches[2];
786 | $content = $matches[3];
787 |
788 | # Remove any tailing pipes for each line.
789 | $head = preg_replace('/[|] *$/m', '', $head);
790 | $underline = preg_replace('/[|] *$/m', '', $underline);
791 | $content = preg_replace('/[|] *$/m', '', $content);
792 |
793 | # Reading alignement from header underline.
794 | $separators = preg_split('/ *[|] */', $underline);
795 | foreach ($separators as $n => $s) {
796 | if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
797 | else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
798 | else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
799 | else $attr[$n] = '';
800 | }
801 |
802 | # Parsing span elements, including code spans, character escapes,
803 | # and inline HTML tags, so that pipes inside those gets ignored.
804 | $head = $this->parseSpan($head);
805 | $headers = preg_split('/ *[|] */', $head);
806 | $col_count = count($headers);
807 |
808 | # Write column headers.
809 | $text = "
\n";
810 | $text .= "\n";
811 | $text .= "\n";
812 | foreach ($headers as $n => $header)
813 | $text .= " ".$this->runSpanGamut(trim($header))." | \n";
814 | $text .= "
\n";
815 | $text .= "\n";
816 |
817 | # Split content by row.
818 | $rows = explode("\n", trim($content, "\n"));
819 |
820 | $text .= "\n";
821 | foreach ($rows as $row) {
822 | # Parsing span elements, including code spans, character escapes,
823 | # and inline HTML tags, so that pipes inside those gets ignored.
824 | $row = $this->parseSpan($row);
825 |
826 | # Split row by cell.
827 | $row_cells = preg_split('/ *[|] */', $row, $col_count);
828 | $row_cells = array_pad($row_cells, $col_count, '');
829 |
830 | $text .= "\n";
831 | foreach ($row_cells as $n => $cell)
832 | $text .= " ".$this->runSpanGamut(trim($cell))." | \n";
833 | $text .= "
\n";
834 | }
835 | $text .= "\n";
836 | $text .= "
";
837 |
838 | return $this->hashBlock($text) . "\n";
839 | }
840 |
841 |
842 | function doDefLists($text) {
843 | #
844 | # Form HTML definition lists.
845 | #
846 | $less_than_tab = $this->tab_width - 1;
847 |
848 | # Re-usable pattern to match any entire dl list:
849 | $whole_list_re = '(?>
850 | ( # $1 = whole list
851 | ( # $2
852 | [ ]{0,'.$less_than_tab.'}
853 | ((?>.*\S.*\n)+) # $3 = defined term
854 | \n?
855 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
856 | )
857 | (?s:.+?)
858 | ( # $4
859 | \z
860 | |
861 | \n{2,}
862 | (?=\S)
863 | (?! # Negative lookahead for another term
864 | [ ]{0,'.$less_than_tab.'}
865 | (?: \S.*\n )+? # defined term
866 | \n?
867 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
868 | )
869 | (?! # Negative lookahead for another definition
870 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
871 | )
872 | )
873 | )
874 | )'; // mx
875 |
876 | $text = preg_replace_callback('{
877 | (?>\A\n?|(?<=\n\n))
878 | '.$whole_list_re.'
879 | }mx',
880 | array(&$this, '_doDefLists_callback'), $text);
881 |
882 | return $text;
883 | }
884 | function _doDefLists_callback($matches) {
885 | # Re-usable patterns to match list item bullets and number markers:
886 | $list = $matches[1];
887 |
888 | # Turn double returns into triple returns, so that we can make a
889 | # paragraph for the last item in a list, if necessary:
890 | $result = trim($this->processDefListItems($list));
891 | $result = "
\n" . $result . "\n
";
892 | return $this->hashBlock($result) . "\n\n";
893 | }
894 |
895 |
896 | function processDefListItems($list_str) {
897 | #
898 | # Process the contents of a single definition list, splitting it
899 | # into individual term and definition list items.
900 | #
901 | $less_than_tab = $this->tab_width - 1;
902 |
903 | # trim trailing blank lines:
904 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
905 |
906 | # Process definition terms.
907 | $list_str = preg_replace_callback('{
908 | (?>\A\n?|\n\n+) # leading line
909 | ( # definition terms = $1
910 | [ ]{0,'.$less_than_tab.'} # leading whitespace
911 | (?![:][ ]|[ ]) # negative lookahead for a definition
912 | # mark (colon) or more whitespace.
913 | (?> \S.* \n)+? # actual term (not whitespace).
914 | )
915 | (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
916 | # with a definition mark.
917 | }xm',
918 | array(&$this, '_processDefListItems_callback_dt'), $list_str);
919 |
920 | # Process actual definitions.
921 | $list_str = preg_replace_callback('{
922 | \n(\n+)? # leading line = $1
923 | ( # marker space = $2
924 | [ ]{0,'.$less_than_tab.'} # whitespace before colon
925 | [:][ ]+ # definition mark (colon)
926 | )
927 | ((?s:.+?)) # definition text = $3
928 | (?= \n+ # stop at next definition mark,
929 | (?: # next term or end of text
930 | [ ]{0,'.$less_than_tab.'} [:][ ] |
931 |
| \z
932 | )
933 | )
934 | }xm',
935 | array(&$this, '_processDefListItems_callback_dd'), $list_str);
936 |
937 | return $list_str;
938 | }
939 | function _processDefListItems_callback_dt($matches) {
940 | $terms = explode("\n", trim($matches[1]));
941 | $text = '';
942 | foreach ($terms as $term) {
943 | $term = $this->runSpanGamut(trim($term));
944 | $text .= "\n" . $term . "";
945 | }
946 | return $text . "\n";
947 | }
948 | function _processDefListItems_callback_dd($matches) {
949 | $leading_line = $matches[1];
950 | $marker_space = $matches[2];
951 | $def = $matches[3];
952 |
953 | if ($leading_line || preg_match('/\n{2,}/', $def)) {
954 | # Replace marker with the appropriate whitespace indentation
955 | $def = str_repeat(' ', strlen($marker_space)) . $def;
956 | $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
957 | $def = "\n". $def ."\n";
958 | }
959 | else {
960 | $def = rtrim($def);
961 | $def = $this->runSpanGamut($this->outdent($def));
962 | }
963 |
964 | return "\n" . $def . "\n";
965 | }
966 |
967 |
968 | function doFencedCodeBlocks($text) {
969 | #
970 | # Adding the fenced code block syntax to regular Markdown:
971 | #
972 | # ~~~
973 | # Code block
974 | # ~~~
975 | #
976 | $less_than_tab = $this->tab_width;
977 |
978 | $text = preg_replace_callback('{
979 | (?:\n|\A)
980 | # 1: Opening marker
981 | (
982 | ~{3,} # Marker: three tilde or more.
983 | )
984 | [ ]* \n # Whitespace and newline following marker.
985 |
986 | # 2: Content
987 | (
988 | (?>
989 | (?!\1 [ ]* \n) # Not a closing marker.
990 | .*\n+
991 | )+
992 | )
993 |
994 | # Closing marker.
995 | \1 [ ]* \n
996 | }xm',
997 | array(&$this, '_doFencedCodeBlocks_callback'), $text);
998 |
999 | return $text;
1000 | }
1001 | function _doFencedCodeBlocks_callback($matches) {
1002 | $codeblock = $matches[2];
1003 | $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1004 | $codeblock = preg_replace_callback('/^\n+/',
1005 | array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
1006 | $codeblock = "
$codeblock
";
1007 | return "\n\n".$this->hashBlock($codeblock)."\n\n";
1008 | }
1009 | function _doFencedCodeBlocks_newlines($matches) {
1010 | return str_repeat("
empty_element_suffix",
1011 | strlen($matches[0]));
1012 | }
1013 |
1014 |
1015 | #
1016 | # Redefining emphasis markers so that emphasis by underscore does not
1017 | # work in the middle of a word.
1018 | #
1019 | var $em_relist = array(
1020 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? tags
1040 | #
1041 | # Strip leading and trailing lines:
1042 | $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1043 |
1044 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1045 |
1046 | #
1047 | # Wrap
tags and unhashify HTML blocks
1048 | #
1049 | foreach ($grafs as $key => $value) {
1050 | $value = trim($this->runSpanGamut($value));
1051 |
1052 | # Check if this should be enclosed in a paragraph.
1053 | # Clean tag hashes & block tag hashes are left alone.
1054 | $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1055 |
1056 | if ($is_p) {
1057 | $value = "
$value
";
1058 | }
1059 | $grafs[$key] = $value;
1060 | }
1061 |
1062 | # Join grafs in one text, then unhash HTML tags.
1063 | $text = implode("\n\n", $grafs);
1064 |
1065 | # Finish by removing any tag hashes still present in $text.
1066 | $text = $this->unhash($text);
1067 |
1068 | return $text;
1069 | }
1070 |
1071 |
1072 | ### Footnotes
1073 |
1074 | function stripFootnotes($text) {
1075 | #
1076 | # Strips link definitions from text, stores the URLs and titles in
1077 | # hash references.
1078 | #
1079 | $less_than_tab = $this->tab_width - 1;
1080 |
1081 | # Link defs are in the form: [^id]: url "optional title"
1082 | $text = preg_replace_callback('{
1083 | ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
1084 | [ ]*
1085 | \n? # maybe *one* newline
1086 | ( # text = $2 (no blank lines allowed)
1087 | (?:
1088 | .+ # actual text
1089 | |
1090 | \n # newlines but
1091 | (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
1092 | (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1093 | # by non-indented content
1094 | )*
1095 | )
1096 | }xm',
1097 | array(&$this, '_stripFootnotes_callback'),
1098 | $text);
1099 | return $text;
1100 | }
1101 | function _stripFootnotes_callback($matches) {
1102 | $note_id = $this->fn_id_prefix . $matches[1];
1103 | $this->footnotes[$note_id] = $this->outdent($matches[2]);
1104 | return ''; # String that will replace the block
1105 | }
1106 |
1107 |
1108 | function doFootnotes($text) {
1109 | #
1110 | # Replace footnote references in $text [^id] with a special text-token
1111 | # which will be replaced by the actual footnote marker in appendFootnotes.
1112 | #
1113 | if (!$this->in_anchor) {
1114 | $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1115 | }
1116 | return $text;
1117 | }
1118 |
1119 |
1120 | function appendFootnotes($text) {
1121 | #
1122 | # Append footnote list to text.
1123 | #
1124 | $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1125 | array(&$this, '_appendFootnotes_callback'), $text);
1126 |
1127 | if (!empty($this->footnotes_ordered)) {
1128 | $text .= "\n\n";
1129 | $text .= "";
1174 | }
1175 | return $text;
1176 | }
1177 | function _appendFootnotes_callback($matches) {
1178 | $node_id = $this->fn_id_prefix . $matches[1];
1179 |
1180 | # Create footnote marker only if it has a corresponding footnote *and*
1181 | # the footnote hasn't been used by another marker.
1182 | if (isset($this->footnotes[$node_id])) {
1183 | # Transfert footnote content to the ordered list.
1184 | $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1185 | unset($this->footnotes[$node_id]);
1186 |
1187 | $num = $this->footnote_counter++;
1188 | $attr = " rel=\"footnote\"";
1189 | if ($this->fn_link_class != "") {
1190 | $class = $this->fn_link_class;
1191 | $class = $this->encodeAttribute($class);
1192 | $attr .= " class=\"$class\"";
1193 | }
1194 | if ($this->fn_link_title != "") {
1195 | $title = $this->fn_link_title;
1196 | $title = $this->encodeAttribute($title);
1197 | $attr .= " title=\"$title\"";
1198 | }
1199 |
1200 | $attr = str_replace("%%", $num, $attr);
1201 | $node_id = $this->encodeAttribute($node_id);
1202 |
1203 | return
1204 | "
".
1205 | "$num".
1206 | "";
1207 | }
1208 |
1209 | return "[^".$matches[1]."]";
1210 | }
1211 |
1212 |
1213 | ### Abbreviations ###
1214 |
1215 | function stripAbbreviations($text) {
1216 | #
1217 | # Strips abbreviations from text, stores titles in hash references.
1218 | #
1219 | $less_than_tab = $this->tab_width - 1;
1220 |
1221 | # Link defs are in the form: [id]*: url "optional title"
1222 | $text = preg_replace_callback('{
1223 | ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
1224 | (.*) # text = $2 (no blank lines allowed)
1225 | }xm',
1226 | array(&$this, '_stripAbbreviations_callback'),
1227 | $text);
1228 | return $text;
1229 | }
1230 | function _stripAbbreviations_callback($matches) {
1231 | $abbr_word = $matches[1];
1232 | $abbr_desc = $matches[2];
1233 | if ($this->abbr_word_re)
1234 | $this->abbr_word_re .= '|';
1235 | $this->abbr_word_re .= preg_quote($abbr_word);
1236 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1237 | return ''; # String that will replace the block
1238 | }
1239 |
1240 |
1241 | function doAbbreviations($text) {
1242 | #
1243 | # Find defined abbreviations in text and wrap them in
elements.
1244 | #
1245 | if ($this->abbr_word_re) {
1246 | // cannot use the /x modifier because abbr_word_re may
1247 | // contain significant spaces:
1248 | $text = preg_replace_callback('{'.
1249 | '(?abbr_word_re.')'.
1251 | '(?![\w\x1A])'.
1252 | '}',
1253 | array(&$this, '_doAbbreviations_callback'), $text);
1254 | }
1255 | return $text;
1256 | }
1257 | function _doAbbreviations_callback($matches) {
1258 | $abbr = $matches[0];
1259 | if (isset($this->abbr_desciptions[$abbr])) {
1260 | $desc = $this->abbr_desciptions[$abbr];
1261 | if (empty($desc)) {
1262 | return $this->hashPart("$abbr");
1263 | } else {
1264 | $desc = $this->encodeAttribute($desc);
1265 | return $this->hashPart("$abbr");
1266 | }
1267 | } else {
1268 | return $matches[0];
1269 | }
1270 | }
1271 |
1272 | }
1273 |
1274 |
1275 | /*
1276 |
1277 | PHP Markdown Extra
1278 | ==================
1279 |
1280 | Description
1281 | -----------
1282 |
1283 | This is a PHP port of the original Markdown formatter written in Perl
1284 | by John Gruber. This special "Extra" version of PHP Markdown features
1285 | further enhancements to the syntax for making additional constructs
1286 | such as tables and definition list.
1287 |
1288 | Markdown is a text-to-HTML filter; it translates an easy-to-read /
1289 | easy-to-write structured text format into HTML. Markdown's text format
1290 | is most similar to that of plain text email, and supports features such
1291 | as headers, *emphasis*, code blocks, blockquotes, and links.
1292 |
1293 | Markdown's syntax is designed not as a generic markup language, but
1294 | specifically to serve as a front-end to (X)HTML. You can use span-level
1295 | HTML tags anywhere in a Markdown document, and you can use block level
1296 | HTML tags (like and
as well).
1297 |
1298 | For more information about Markdown's syntax, see:
1299 |
1300 |
1301 |
1302 |
1303 | Bugs
1304 | ----
1305 |
1306 | To file bug reports please send email to:
1307 |
1308 |
1309 |
1310 | Please include with your report: (1) the example input; (2) the output you
1311 | expected; (3) the output Markdown actually produced.
1312 |
1313 |
1314 | Version History
1315 | ---------------
1316 |
1317 | See the readme file for detailed release notes for this version.
1318 |
1319 |
1320 | Copyright and License
1321 | ---------------------
1322 |
1323 | PHP Markdown & Extra
1324 | Copyright (c) 2004-2009 Michel Fortin
1325 |
1326 | All rights reserved.
1327 |
1328 | Based on Markdown
1329 | Copyright (c) 2003-2006 John Gruber
1330 |
1331 | All rights reserved.
1332 |
1333 | Redistribution and use in source and binary forms, with or without
1334 | modification, are permitted provided that the following conditions are
1335 | met:
1336 |
1337 | * Redistributions of source code must retain the above copyright notice,
1338 | this list of conditions and the following disclaimer.
1339 |
1340 | * Redistributions in binary form must reproduce the above copyright
1341 | notice, this list of conditions and the following disclaimer in the
1342 | documentation and/or other materials provided with the distribution.
1343 |
1344 | * Neither the name "Markdown" nor the names of its contributors may
1345 | be used to endorse or promote products derived from this software
1346 | without specific prior written permission.
1347 |
1348 | This software is provided by the copyright holders and contributors "as
1349 | is" and any express or implied warranties, including, but not limited
1350 | to, the implied warranties of merchantability and fitness for a
1351 | particular purpose are disclaimed. In no event shall the copyright owner
1352 | or contributors be liable for any direct, indirect, incidental, special,
1353 | exemplary, or consequential damages (including, but not limited to,
1354 | procurement of substitute goods or services; loss of use, data, or
1355 | profits; or business interruption) however caused and on any theory of
1356 | liability, whether in contract, strict liability, or tort (including
1357 | negligence or otherwise) arising in any way out of the use of this
1358 | software, even if advised of the possibility of such damage.
1359 |
1360 | */
1361 | ?>
--------------------------------------------------------------------------------
/src/dflydev/markdown/MarkdownParser.php:
--------------------------------------------------------------------------------
1 | ";
21 |
22 | /**
23 | * Default tab width for code blocks
24 | * @var integer
25 | */
26 | const DEFAULT_TAB_WIDTH = 4;
27 |
28 | /**
29 | * Configuration key for changing the empty element suffix
30 | * @var string
31 | */
32 | const CONFIG_EMPTY_ELEMENT_SUFFIX = 'config.emptyElementSuffix';
33 |
34 | /**
35 | * Configuration key for changing the tab width for code blocks
36 | * @var string
37 | */
38 | const CONFIG_TAB_WIDTH = 'config.tabWidth';
39 |
40 | # Regex to match balanced [brackets].
41 | # Needed to insert a maximum bracked depth while converting to PHP.
42 | var $nested_brackets_depth = 6;
43 | var $nested_brackets_re;
44 |
45 | var $nested_url_parenthesis_depth = 4;
46 | var $nested_url_parenthesis_re;
47 |
48 | # Table of hash values for escaped characters:
49 | var $escape_chars = '\`*_{}[]()>#+-.!';
50 | var $escape_chars_re;
51 |
52 | # Change to ">" for HTML output.
53 | var $empty_element_suffix = self::DEFAULT_EMPTY_ELEMENT_SUFFIX;
54 | var $tab_width = self::DEFAULT_TAB_WIDTH;
55 |
56 | # Change to `true` to disallow markup or entities.
57 | var $no_markup = false;
58 | var $no_entities = false;
59 |
60 | # Predefined urls and titles for reference links and images.
61 | var $predef_urls = array();
62 | var $predef_titles = array();
63 |
64 |
65 | function __construct(array $configuration = null)
66 | {
67 | #
68 | # Constructor function. Initialize appropriate member variables.
69 | #
70 | $this->_initDetab();
71 | $this->prepareItalicsAndBold();
72 |
73 | $this->nested_brackets_re =
74 | str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
75 | str_repeat('\])*', $this->nested_brackets_depth);
76 |
77 | $this->nested_url_parenthesis_re =
78 | str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
79 | str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
80 |
81 | $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
82 |
83 | # Sort document, block, and span gamut in ascendent priority order.
84 | asort($this->document_gamut);
85 | asort($this->block_gamut);
86 | asort($this->span_gamut);
87 | if ($configuration) {
88 | foreach ($configuration as $key => $value) {
89 | $this->configureMarkdownParser($key, $value);
90 | }
91 | }
92 | }
93 |
94 | /**
95 | * Configure parser
96 | * @param string $key
97 | * @param mixed $value
98 | */
99 | public function configureMarkdownParser($key, $value)
100 | {
101 | switch($key) {
102 | case self::CONFIG_TAB_WIDTH:
103 | $this->tab_width = $value;
104 | break;
105 | case self::CONFIG_EMPTY_ELEMENT_SUFFIX:
106 | $this->empty_element_suffix = $value;
107 | break;
108 | default:
109 | // TODO: Warning?
110 | break;
111 | }
112 | //
113 | }
114 |
115 | /**
116 | * (non-PHPdoc)
117 | * @see dflydev\markdown.IMarkdownParser::transformMarkdown()
118 | */
119 | public function transformMarkdown($text)
120 | {
121 | return $this->transform($text);
122 | }
123 |
124 |
125 | # Internal hashes used during transformation.
126 | var $urls = array();
127 | var $titles = array();
128 | var $html_hashes = array();
129 |
130 | # Status flag to avoid invalid nesting.
131 | var $in_anchor = false;
132 |
133 |
134 | function setup() {
135 | #
136 | # Called before the transformation process starts to setup parser
137 | # states.
138 | #
139 | # Clear global hashes.
140 | $this->urls = $this->predef_urls;
141 | $this->titles = $this->predef_titles;
142 | $this->html_hashes = array();
143 |
144 | $in_anchor = false;
145 | }
146 |
147 | function teardown() {
148 | #
149 | # Called after the transformation process to clear any variable
150 | # which may be taking up memory unnecessarly.
151 | #
152 | $this->urls = array();
153 | $this->titles = array();
154 | $this->html_hashes = array();
155 | }
156 |
157 |
158 | function transform($text) {
159 | #
160 | # Main function. Performs some preprocessing on the input text
161 | # and pass it through the document gamut.
162 | #
163 | $this->setup();
164 |
165 | # Remove UTF-8 BOM and marker character in input, if present.
166 | $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
167 |
168 | # Standardize line endings:
169 | # DOS to Unix and Mac to Unix
170 | $text = preg_replace('{\r\n?}', "\n", $text);
171 |
172 | # Make sure $text ends with a couple of newlines:
173 | $text .= "\n\n";
174 |
175 | # Convert all tabs to spaces.
176 | $text = $this->detab($text);
177 |
178 | # Turn block-level HTML blocks into hash entries
179 | $text = $this->hashHTMLBlocks($text);
180 |
181 | # Strip any lines consisting only of spaces and tabs.
182 | # This makes subsequent regexen easier to write, because we can
183 | # match consecutive blank lines with /\n+/ instead of something
184 | # contorted like /[ ]*\n+/ .
185 | $text = preg_replace('/^[ ]+$/m', '', $text);
186 |
187 | # Run document gamut methods.
188 | foreach ($this->document_gamut as $method => $priority) {
189 | $text = $this->$method($text);
190 | }
191 |
192 | $this->teardown();
193 |
194 | return $text . "\n";
195 | }
196 |
197 | var $document_gamut = array(
198 | # Strip link definitions, store in hashes.
199 | "stripLinkDefinitions" => 20,
200 |
201 | "runBasicBlockGamut" => 30,
202 | );
203 |
204 |
205 | function stripLinkDefinitions($text) {
206 | #
207 | # Strips link definitions from text, stores the URLs and titles in
208 | # hash references.
209 | #
210 | $less_than_tab = $this->tab_width - 1;
211 |
212 | # Link defs are in the form: ^[id]: url "optional title"
213 | $text = preg_replace_callback('{
214 | ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
215 | [ ]*
216 | \n? # maybe *one* newline
217 | [ ]*
218 | (?:
219 | <(.+?)> # url = $2
220 | |
221 | (\S+?) # url = $3
222 | )
223 | [ ]*
224 | \n? # maybe one newline
225 | [ ]*
226 | (?:
227 | (?<=\s) # lookbehind for whitespace
228 | ["(]
229 | (.*?) # title = $4
230 | [")]
231 | [ ]*
232 | )? # title is optional
233 | (?:\n+|\Z)
234 | }xm',
235 | array(&$this, '_stripLinkDefinitions_callback'),
236 | $text);
237 | return $text;
238 | }
239 | function _stripLinkDefinitions_callback($matches) {
240 | $link_id = strtolower($matches[1]);
241 | $url = $matches[2] == '' ? $matches[3] : $matches[2];
242 | $this->urls[$link_id] = $url;
243 | $this->titles[$link_id] =& $matches[4];
244 | return ''; # String that will replace the block
245 | }
246 |
247 |
248 | function hashHTMLBlocks($text) {
249 | if ($this->no_markup) return $text;
250 |
251 | $less_than_tab = $this->tab_width - 1;
252 |
253 | # Hashify HTML blocks:
254 | # We only want to do this for block-level HTML tags, such as headers,
255 | # lists, and tables. That's because we still want to wrap s around
256 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
257 | # phrase emphasis, and spans. The list of tags we're looking for is
258 | # hard-coded:
259 | #
260 | # * List "a" is made of tags which can be both inline or block-level.
261 | # These will be treated block-level when the start tag is alone on
262 | # its line, otherwise they're not matched here and will be taken as
263 | # inline later.
264 | # * List "b" is made of tags which are always block-level;
265 | #
266 | $block_tags_a_re = 'ins|del';
267 | $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
268 | 'script|noscript|form|fieldset|iframe|math';
269 |
270 | # Regular expression for the content of a block tag.
271 | $nested_tags_level = 4;
272 | $attr = '
273 | (?> # optional tag attributes
274 | \s # starts with whitespace
275 | (?>
276 | [^>"/]+ # text outside quotes
277 | |
278 | /+(?!>) # slash not followed by ">"
279 | |
280 | "[^"]*" # text inside double quotes (tolerate ">")
281 | |
282 | \'[^\']*\' # text inside single quotes (tolerate ">")
283 | )*
284 | )?
285 | ';
286 | $content =
287 | str_repeat('
288 | (?>
289 | [^<]+ # content without tag
290 | |
291 | <\2 # nested opening tag
292 | '.$attr.' # attributes
293 | (?>
294 | />
295 | |
296 | >', $nested_tags_level). # end of opening tag
297 | '.*?'. # last level nested tag content
298 | str_repeat('
299 | \2\s*> # closing nested tag
300 | )
301 | |
302 | <(?!/\2\s*> # other tags with a different name
303 | )
304 | )*',
305 | $nested_tags_level);
306 | $content2 = str_replace('\2', '\3', $content);
307 |
308 | # First, look for nested blocks, e.g.:
309 | #
310 | #
311 | # tags for inner block must be indented.
312 | #
313 | #
314 | #
315 | # The outermost tags must start at the left margin for this to match, and
316 | # the inner nested divs must be indented.
317 | # We need to do this before the next, more liberal match, because the next
318 | # match will start at the first `` and stop at the first `
`.
319 | $text = preg_replace_callback('{(?>
320 | (?>
321 | (?<=\n\n) # Starting after a blank line
322 | | # or
323 | \A\n? # the beginning of the doc
324 | )
325 | ( # save in $1
326 |
327 | # Match from `\n` to `\n`, handling nested tags
328 | # in between.
329 |
330 | [ ]{0,'.$less_than_tab.'}
331 | <('.$block_tags_b_re.')# start tag = $2
332 | '.$attr.'> # attributes followed by > and \n
333 | '.$content.' # content, support nesting
334 | \2> # the matching end tag
335 | [ ]* # trailing spaces/tabs
336 | (?=\n+|\Z) # followed by a newline or end of document
337 |
338 | | # Special version for tags of group a.
339 |
340 | [ ]{0,'.$less_than_tab.'}
341 | <('.$block_tags_a_re.')# start tag = $3
342 | '.$attr.'>[ ]*\n # attributes followed by >
343 | '.$content2.' # content, support nesting
344 | \3> # the matching end tag
345 | [ ]* # trailing spaces/tabs
346 | (?=\n+|\Z) # followed by a newline or end of document
347 |
348 | | # Special case just for
. It was easier to make a special
349 | # case than to make the other regex more complicated.
350 |
351 | [ ]{0,'.$less_than_tab.'}
352 | <(hr) # start tag = $2
353 | '.$attr.' # attributes
354 | /?> # the matching end tag
355 | [ ]*
356 | (?=\n{2,}|\Z) # followed by a blank line or end of document
357 |
358 | | # Special case for standalone HTML comments:
359 |
360 | [ ]{0,'.$less_than_tab.'}
361 | (?s:
362 |
363 | )
364 | [ ]*
365 | (?=\n{2,}|\Z) # followed by a blank line or end of document
366 |
367 | | # PHP and ASP-style processor instructions ( and <%)
368 |
369 | [ ]{0,'.$less_than_tab.'}
370 | (?s:
371 | <([?%]) # $2
372 | .*?
373 | \2>
374 | )
375 | [ ]*
376 | (?=\n{2,}|\Z) # followed by a blank line or end of document
377 |
378 | )
379 | )}Sxmi',
380 | array(&$this, '_hashHTMLBlocks_callback'),
381 | $text);
382 |
383 | return $text;
384 | }
385 | function _hashHTMLBlocks_callback($matches) {
386 | $text = $matches[1];
387 | $key = $this->hashBlock($text);
388 | return "\n\n$key\n\n";
389 | }
390 |
391 |
392 | function hashPart($text, $boundary = 'X') {
393 | #
394 | # Called whenever a tag must be hashed when a function insert an atomic
395 | # element in the text stream. Passing $text to through this function gives
396 | # a unique text-token which will be reverted back when calling unhash.
397 | #
398 | # The $boundary argument specify what character should be used to surround
399 | # the token. By convension, "B" is used for block elements that needs not
400 | # to be wrapped into paragraph tags at the end, ":" is used for elements
401 | # that are word separators and "X" is used in the general case.
402 | #
403 | # Swap back any tag hash found in $text so we do not have to `unhash`
404 | # multiple times at the end.
405 | $text = $this->unhash($text);
406 |
407 | # Then hash the block.
408 | static $i = 0;
409 | $key = "$boundary\x1A" . ++$i . $boundary;
410 | $this->html_hashes[$key] = $text;
411 | return $key; # String that will replace the tag.
412 | }
413 |
414 |
415 | function hashBlock($text) {
416 | #
417 | # Shortcut function for hashPart with block-level boundaries.
418 | #
419 | return $this->hashPart($text, 'B');
420 | }
421 |
422 |
423 | var $block_gamut = array(
424 | #
425 | # These are all the transformations that form block-level
426 | # tags like paragraphs, headers, and list items.
427 | #
428 | "doHeaders" => 10,
429 | "doHorizontalRules" => 20,
430 |
431 | "doLists" => 40,
432 | "doCodeBlocks" => 50,
433 | "doBlockQuotes" => 60,
434 | );
435 |
436 | function runBlockGamut($text) {
437 | #
438 | # Run block gamut tranformations.
439 | #
440 | # We need to escape raw HTML in Markdown source before doing anything
441 | # else. This need to be done for each block, and not only at the
442 | # begining in the Markdown function since hashed blocks can be part of
443 | # list items and could have been indented. Indented blocks would have
444 | # been seen as a code block in a previous pass of hashHTMLBlocks.
445 | $text = $this->hashHTMLBlocks($text);
446 |
447 | return $this->runBasicBlockGamut($text);
448 | }
449 |
450 | function runBasicBlockGamut($text) {
451 | #
452 | # Run block gamut tranformations, without hashing HTML blocks. This is
453 | # useful when HTML blocks are known to be already hashed, like in the first
454 | # whole-document pass.
455 | #
456 | foreach ($this->block_gamut as $method => $priority) {
457 | $text = $this->$method($text);
458 | }
459 |
460 | # Finally form paragraph and restore hashed blocks.
461 | $text = $this->formParagraphs($text);
462 |
463 | return $text;
464 | }
465 |
466 |
467 | function doHorizontalRules($text) {
468 | # Do Horizontal Rules:
469 | return preg_replace(
470 | '{
471 | ^[ ]{0,3} # Leading space
472 | ([-*_]) # $1: First marker
473 | (?> # Repeated marker group
474 | [ ]{0,2} # Zero, one, or two spaces.
475 | \1 # Marker character
476 | ){2,} # Group repeated at least twice
477 | [ ]* # Tailing spaces
478 | $ # End of line.
479 | }mx',
480 | "\n".$this->hashBlock("
empty_element_suffix")."\n",
481 | $text);
482 | }
483 |
484 |
485 | var $span_gamut = array(
486 | #
487 | # These are all the transformations that occur *within* block-level
488 | # tags like paragraphs, headers, and list items.
489 | #
490 | # Process character escapes, code spans, and inline HTML
491 | # in one shot.
492 | "parseSpan" => -30,
493 |
494 | # Process anchor and image tags. Images must come first,
495 | # because ![foo][f] looks like an anchor.
496 | "doImages" => 10,
497 | "doAnchors" => 20,
498 |
499 | # Make links out of things like ``
500 | # Must come after doAnchors, because you can use < and >
501 | # delimiters in inline links like [this]().
502 | "doAutoLinks" => 30,
503 | "encodeAmpsAndAngles" => 40,
504 |
505 | "doItalicsAndBold" => 50,
506 | "doHardBreaks" => 60,
507 | );
508 |
509 | function runSpanGamut($text) {
510 | #
511 | # Run span gamut tranformations.
512 | #
513 | foreach ($this->span_gamut as $method => $priority) {
514 | $text = $this->$method($text);
515 | }
516 |
517 | return $text;
518 | }
519 |
520 |
521 | function doHardBreaks($text) {
522 | # Do hard breaks:
523 | return preg_replace_callback('/ {2,}\n/',
524 | array(&$this, '_doHardBreaks_callback'), $text);
525 | }
526 | function _doHardBreaks_callback($matches) {
527 | return $this->hashPart("
empty_element_suffix\n");
528 | }
529 |
530 |
531 | function doAnchors($text) {
532 | #
533 | # Turn Markdown link shortcuts into XHTML tags.
534 | #
535 | if ($this->in_anchor) return $text;
536 | $this->in_anchor = true;
537 |
538 | #
539 | # First, handle reference-style links: [link text] [id]
540 | #
541 | $text = preg_replace_callback('{
542 | ( # wrap whole match in $1
543 | \[
544 | ('.$this->nested_brackets_re.') # link text = $2
545 | \]
546 |
547 | [ ]? # one optional space
548 | (?:\n[ ]*)? # one optional newline followed by spaces
549 |
550 | \[
551 | (.*?) # id = $3
552 | \]
553 | )
554 | }xs',
555 | array(&$this, '_doAnchors_reference_callback'), $text);
556 |
557 | #
558 | # Next, inline-style links: [link text](url "optional title")
559 | #
560 | $text = preg_replace_callback('{
561 | ( # wrap whole match in $1
562 | \[
563 | ('.$this->nested_brackets_re.') # link text = $2
564 | \]
565 | \( # literal paren
566 | [ \n]*
567 | (?:
568 | <(.+?)> # href = $3
569 | |
570 | ('.$this->nested_url_parenthesis_re.') # href = $4
571 | )
572 | [ \n]*
573 | ( # $5
574 | ([\'"]) # quote char = $6
575 | (.*?) # Title = $7
576 | \6 # matching quote
577 | [ \n]* # ignore any spaces/tabs between closing quote and )
578 | )? # title is optional
579 | \)
580 | )
581 | }xs',
582 | array(&$this, '_doAnchors_inline_callback'), $text);
583 |
584 | #
585 | # Last, handle reference-style shortcuts: [link text]
586 | # These must come last in case you've also got [link text][1]
587 | # or [link text](/foo)
588 | #
589 | $text = preg_replace_callback('{
590 | ( # wrap whole match in $1
591 | \[
592 | ([^\[\]]+) # link text = $2; can\'t contain [ or ]
593 | \]
594 | )
595 | }xs',
596 | array(&$this, '_doAnchors_reference_callback'), $text);
597 |
598 | $this->in_anchor = false;
599 | return $text;
600 | }
601 | function _doAnchors_reference_callback($matches) {
602 | $whole_match = $matches[1];
603 | $link_text = $matches[2];
604 | $link_id =& $matches[3];
605 |
606 | if ($link_id == "") {
607 | # for shortcut links like [this][] or [this].
608 | $link_id = $link_text;
609 | }
610 |
611 | # lower-case and turn embedded newlines into spaces
612 | $link_id = strtolower($link_id);
613 | $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
614 |
615 | if (isset($this->urls[$link_id])) {
616 | $url = $this->urls[$link_id];
617 | $url = $this->encodeAttribute($url);
618 |
619 | $result = "titles[$link_id] ) ) {
621 | $title = $this->titles[$link_id];
622 | $title = $this->encodeAttribute($title);
623 | $result .= " title=\"$title\"";
624 | }
625 |
626 | $link_text = $this->runSpanGamut($link_text);
627 | $result .= ">$link_text";
628 | $result = $this->hashPart($result);
629 | }
630 | else {
631 | $result = $whole_match;
632 | }
633 | return $result;
634 | }
635 | function _doAnchors_inline_callback($matches) {
636 | $whole_match = $matches[1];
637 | $link_text = $this->runSpanGamut($matches[2]);
638 | $url = $matches[3] == '' ? $matches[4] : $matches[3];
639 | $title =& $matches[7];
640 |
641 | $url = $this->encodeAttribute($url);
642 |
643 | $result = "encodeAttribute($title);
646 | $result .= " title=\"$title\"";
647 | }
648 |
649 | $link_text = $this->runSpanGamut($link_text);
650 | $result .= ">$link_text";
651 |
652 | return $this->hashPart($result);
653 | }
654 |
655 |
656 | function doImages($text) {
657 | #
658 | # Turn Markdown image shortcuts into
tags.
659 | #
660 | #
661 | # First, handle reference-style labeled images: ![alt text][id]
662 | #
663 | $text = preg_replace_callback('{
664 | ( # wrap whole match in $1
665 | !\[
666 | ('.$this->nested_brackets_re.') # alt text = $2
667 | \]
668 |
669 | [ ]? # one optional space
670 | (?:\n[ ]*)? # one optional newline followed by spaces
671 |
672 | \[
673 | (.*?) # id = $3
674 | \]
675 |
676 | )
677 | }xs',
678 | array(&$this, '_doImages_reference_callback'), $text);
679 |
680 | #
681 | # Next, handle inline images: 
682 | # Don't forget: encode * and _
683 | #
684 | $text = preg_replace_callback('{
685 | ( # wrap whole match in $1
686 | !\[
687 | ('.$this->nested_brackets_re.') # alt text = $2
688 | \]
689 | \s? # One optional whitespace character
690 | \( # literal paren
691 | [ \n]*
692 | (?:
693 | <(\S*)> # src url = $3
694 | |
695 | ('.$this->nested_url_parenthesis_re.') # src url = $4
696 | )
697 | [ \n]*
698 | ( # $5
699 | ([\'"]) # quote char = $6
700 | (.*?) # title = $7
701 | \6 # matching quote
702 | [ \n]*
703 | )? # title is optional
704 | \)
705 | )
706 | }xs',
707 | array(&$this, '_doImages_inline_callback'), $text);
708 |
709 | return $text;
710 | }
711 | function _doImages_reference_callback($matches) {
712 | $whole_match = $matches[1];
713 | $alt_text = $matches[2];
714 | $link_id = strtolower($matches[3]);
715 |
716 | if ($link_id == "") {
717 | $link_id = strtolower($alt_text); # for shortcut links like ![this][].
718 | }
719 |
720 | $alt_text = $this->encodeAttribute($alt_text);
721 | if (isset($this->urls[$link_id])) {
722 | $url = $this->encodeAttribute($this->urls[$link_id]);
723 | $result = "
titles[$link_id])) {
725 | $title = $this->titles[$link_id];
726 | $title = $this->encodeAttribute($title);
727 | $result .= " title=\"$title\"";
728 | }
729 | $result .= $this->empty_element_suffix;
730 | $result = $this->hashPart($result);
731 | }
732 | else {
733 | # If there's no such link ID, leave intact:
734 | $result = $whole_match;
735 | }
736 |
737 | return $result;
738 | }
739 | function _doImages_inline_callback($matches) {
740 | $whole_match = $matches[1];
741 | $alt_text = $matches[2];
742 | $url = $matches[3] == '' ? $matches[4] : $matches[3];
743 | $title =& $matches[7];
744 |
745 | $alt_text = $this->encodeAttribute($alt_text);
746 | $url = $this->encodeAttribute($url);
747 | $result = "
encodeAttribute($title);
750 | $result .= " title=\"$title\""; # $title already quoted
751 | }
752 | $result .= $this->empty_element_suffix;
753 |
754 | return $this->hashPart($result);
755 | }
756 |
757 |
758 | function doHeaders($text) {
759 | # Setext-style headers:
760 | # Header 1
761 | # ========
762 | #
763 | # Header 2
764 | # --------
765 | #
766 | $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
767 | array(&$this, '_doHeaders_callback_setext'), $text);
768 |
769 | # atx-style headers:
770 | # # Header 1
771 | # ## Header 2
772 | # ## Header 2 with closing hashes ##
773 | # ...
774 | # ###### Header 6
775 | #
776 | $text = preg_replace_callback('{
777 | ^(\#{1,6}) # $1 = string of #\'s
778 | [ ]*
779 | (.+?) # $2 = Header text
780 | [ ]*
781 | \#* # optional closing #\'s (not counted)
782 | \n+
783 | }xm',
784 | array(&$this, '_doHeaders_callback_atx'), $text);
785 |
786 | return $text;
787 | }
788 | function _doHeaders_callback_setext($matches) {
789 | # Terrible hack to check we haven't found an empty list item.
790 | if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
791 | return $matches[0];
792 |
793 | $level = $matches[2]{0} == '=' ? 1 : 2;
794 | $block = "".$this->runSpanGamut($matches[1])."";
795 | return "\n" . $this->hashBlock($block) . "\n\n";
796 | }
797 | function _doHeaders_callback_atx($matches) {
798 | $level = strlen($matches[1]);
799 | $block = "".$this->runSpanGamut($matches[2])."";
800 | return "\n" . $this->hashBlock($block) . "\n\n";
801 | }
802 |
803 |
804 | function doLists($text) {
805 | #
806 | # Form HTML ordered (numbered) and unordered (bulleted) lists.
807 | #
808 | $less_than_tab = $this->tab_width - 1;
809 |
810 | # Re-usable patterns to match list item bullets and number markers:
811 | $marker_ul_re = '[*+-]';
812 | $marker_ol_re = '\d+[\.]';
813 | $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
814 |
815 | $markers_relist = array(
816 | $marker_ul_re => $marker_ol_re,
817 | $marker_ol_re => $marker_ul_re,
818 | );
819 |
820 | foreach ($markers_relist as $marker_re => $other_marker_re) {
821 | # Re-usable pattern to match any entirel ul or ol list:
822 | $whole_list_re = '
823 | ( # $1 = whole list
824 | ( # $2
825 | ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
826 | ('.$marker_re.') # $4 = first list item marker
827 | [ ]+
828 | )
829 | (?s:.+?)
830 | ( # $5
831 | \z
832 | |
833 | \n{2,}
834 | (?=\S)
835 | (?! # Negative lookahead for another list item marker
836 | [ ]*
837 | '.$marker_re.'[ ]+
838 | )
839 | |
840 | (?= # Lookahead for another kind of list
841 | \n
842 | \3 # Must have the same indentation
843 | '.$other_marker_re.'[ ]+
844 | )
845 | )
846 | )
847 | '; // mx
848 |
849 | # We use a different prefix before nested lists than top-level lists.
850 | # See extended comment in _ProcessListItems().
851 |
852 | if ($this->list_level) {
853 | $text = preg_replace_callback('{
854 | ^
855 | '.$whole_list_re.'
856 | }mx',
857 | array(&$this, '_doLists_callback'), $text);
858 | }
859 | else {
860 | $text = preg_replace_callback('{
861 | (?:(?<=\n)\n|\A\n?) # Must eat the newline
862 | '.$whole_list_re.'
863 | }mx',
864 | array(&$this, '_doLists_callback'), $text);
865 | }
866 | }
867 |
868 | return $text;
869 | }
870 | function _doLists_callback($matches) {
871 | # Re-usable patterns to match list item bullets and number markers:
872 | $marker_ul_re = '[*+-]';
873 | $marker_ol_re = '\d+[\.]';
874 | $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
875 |
876 | $list = $matches[1];
877 | $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
878 |
879 | $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
880 |
881 | $list .= "\n";
882 | $result = $this->processListItems($list, $marker_any_re);
883 |
884 | $result = $this->hashBlock("<$list_type>\n" . $result . "$list_type>");
885 | return "\n". $result ."\n\n";
886 | }
887 |
888 | var $list_level = 0;
889 |
890 | function processListItems($list_str, $marker_any_re) {
891 | #
892 | # Process the contents of a single ordered or unordered list, splitting it
893 | # into individual list items.
894 | #
895 | # The $this->list_level global keeps track of when we're inside a list.
896 | # Each time we enter a list, we increment it; when we leave a list,
897 | # we decrement. If it's zero, we're not in a list anymore.
898 | #
899 | # We do this because when we're not inside a list, we want to treat
900 | # something like this:
901 | #
902 | # I recommend upgrading to version
903 | # 8. Oops, now this line is treated
904 | # as a sub-list.
905 | #
906 | # As a single paragraph, despite the fact that the second line starts
907 | # with a digit-period-space sequence.
908 | #
909 | # Whereas when we're inside a list (or sub-list), that line will be
910 | # treated as the start of a sub-list. What a kludge, huh? This is
911 | # an aspect of Markdown's syntax that's hard to parse perfectly
912 | # without resorting to mind-reading. Perhaps the solution is to
913 | # change the syntax rules such that sub-lists must start with a
914 | # starting cardinal number; e.g. "1." or "a.".
915 |
916 | $this->list_level++;
917 |
918 | # trim trailing blank lines:
919 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
920 |
921 | $list_str = preg_replace_callback('{
922 | (\n)? # leading line = $1
923 | (^[ ]*) # leading whitespace = $2
924 | ('.$marker_any_re.' # list marker and space = $3
925 | (?:[ ]+|(?=\n)) # space only required if item is not empty
926 | )
927 | ((?s:.*?)) # list item text = $4
928 | (?:(\n+(?=\n))|\n) # tailing blank line = $5
929 | (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
930 | }xm',
931 | array(&$this, '_processListItems_callback'), $list_str);
932 |
933 | $this->list_level--;
934 | return $list_str;
935 | }
936 | function _processListItems_callback($matches) {
937 | $item = $matches[4];
938 | $leading_line =& $matches[1];
939 | $leading_space =& $matches[2];
940 | $marker_space = $matches[3];
941 | $tailing_blank_line =& $matches[5];
942 |
943 | if ($leading_line || $tailing_blank_line ||
944 | preg_match('/\n{2,}/', $item))
945 | {
946 | # Replace marker with the appropriate whitespace indentation
947 | $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
948 | $item = $this->runBlockGamut($this->outdent($item)."\n");
949 | }
950 | else {
951 | # Recursion for sub-lists:
952 | $item = $this->doLists($this->outdent($item));
953 | $item = preg_replace('/\n+$/', '', $item);
954 | $item = $this->runSpanGamut($item);
955 | }
956 |
957 | return "" . $item . "\n";
958 | }
959 |
960 |
961 | function doCodeBlocks($text) {
962 | #
963 | # Process Markdown `` blocks.
964 | #
965 | $text = preg_replace_callback('{
966 | (?:\n\n|\A\n?)
967 | ( # $1 = the code block -- one or more lines, starting with a space/tab
968 | (?>
969 | [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
970 | .*\n+
971 | )+
972 | )
973 | ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
974 | }xm',
975 | array(&$this, '_doCodeBlocks_callback'), $text);
976 |
977 | return $text;
978 | }
979 | function _doCodeBlocks_callback($matches) {
980 | $codeblock = $matches[1];
981 |
982 | $codeblock = $this->outdent($codeblock);
983 | $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
984 |
985 | # trim leading newlines and trailing newlines
986 | $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
987 |
988 | $codeblock = "$codeblock\n
";
989 | return "\n\n".$this->hashBlock($codeblock)."\n\n";
990 | }
991 |
992 |
993 | function makeCodeSpan($code) {
994 | #
995 | # Create a code span markup for $code. Called from handleSpanToken.
996 | #
997 | $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
998 | return $this->hashPart("$code
");
999 | }
1000 |
1001 |
1002 | var $em_relist = array(
1003 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(?em_relist as $em => $em_re) {
1025 | foreach ($this->strong_relist as $strong => $strong_re) {
1026 | # Construct list of allowed token expressions.
1027 | $token_relist = array();
1028 | if (isset($this->em_strong_relist["$em$strong"])) {
1029 | $token_relist[] = $this->em_strong_relist["$em$strong"];
1030 | }
1031 | $token_relist[] = $em_re;
1032 | $token_relist[] = $strong_re;
1033 |
1034 | # Construct master expression from list.
1035 | $token_re = '{('. implode('|', $token_relist) .')}';
1036 | $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1037 | }
1038 | }
1039 | }
1040 |
1041 | function doItalicsAndBold($text) {
1042 | $token_stack = array('');
1043 | $text_stack = array('');
1044 | $em = '';
1045 | $strong = '';
1046 | $tree_char_em = false;
1047 |
1048 | while (1) {
1049 | #
1050 | # Get prepared regular expression for seraching emphasis tokens
1051 | # in current context.
1052 | #
1053 | $token_re = $this->em_strong_prepared_relist["$em$strong"];
1054 |
1055 | #
1056 | # Each loop iteration search for the next emphasis token.
1057 | # Each token is then passed to handleSpanToken.
1058 | #
1059 | $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1060 | $text_stack[0] .= $parts[0];
1061 | $token =& $parts[1];
1062 | $text =& $parts[2];
1063 |
1064 | if (empty($token)) {
1065 | # Reached end of text span: empty stack without emitting.
1066 | # any more emphasis.
1067 | while ($token_stack[0]) {
1068 | $text_stack[1] .= array_shift($token_stack);
1069 | $text_stack[0] .= array_shift($text_stack);
1070 | }
1071 | break;
1072 | }
1073 |
1074 | $token_len = strlen($token);
1075 | if ($tree_char_em) {
1076 | # Reached closing marker while inside a three-char emphasis.
1077 | if ($token_len == 3) {
1078 | # Three-char closing marker, close em and strong.
1079 | array_shift($token_stack);
1080 | $span = array_shift($text_stack);
1081 | $span = $this->runSpanGamut($span);
1082 | $span = "$span";
1083 | $text_stack[0] .= $this->hashPart($span);
1084 | $em = '';
1085 | $strong = '';
1086 | } else {
1087 | # Other closing marker: close one em or strong and
1088 | # change current token state to match the other
1089 | $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1090 | $tag = $token_len == 2 ? "strong" : "em";
1091 | $span = $text_stack[0];
1092 | $span = $this->runSpanGamut($span);
1093 | $span = "<$tag>$span$tag>";
1094 | $text_stack[0] = $this->hashPart($span);
1095 | $$tag = ''; # $$tag stands for $em or $strong
1096 | }
1097 | $tree_char_em = false;
1098 | } else if ($token_len == 3) {
1099 | if ($em) {
1100 | # Reached closing marker for both em and strong.
1101 | # Closing strong marker:
1102 | for ($i = 0; $i < 2; ++$i) {
1103 | $shifted_token = array_shift($token_stack);
1104 | $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1105 | $span = array_shift($text_stack);
1106 | $span = $this->runSpanGamut($span);
1107 | $span = "<$tag>$span$tag>";
1108 | $text_stack[0] .= $this->hashPart($span);
1109 | $$tag = ''; # $$tag stands for $em or $strong
1110 | }
1111 | } else {
1112 | # Reached opening three-char emphasis marker. Push on token
1113 | # stack; will be handled by the special condition above.
1114 | $em = $token{0};
1115 | $strong = "$em$em";
1116 | array_unshift($token_stack, $token);
1117 | array_unshift($text_stack, '');
1118 | $tree_char_em = true;
1119 | }
1120 | } else if ($token_len == 2) {
1121 | if ($strong) {
1122 | # Unwind any dangling emphasis marker:
1123 | if (strlen($token_stack[0]) == 1) {
1124 | $text_stack[1] .= array_shift($token_stack);
1125 | $text_stack[0] .= array_shift($text_stack);
1126 | }
1127 | # Closing strong marker:
1128 | array_shift($token_stack);
1129 | $span = array_shift($text_stack);
1130 | $span = $this->runSpanGamut($span);
1131 | $span = "$span";
1132 | $text_stack[0] .= $this->hashPart($span);
1133 | $strong = '';
1134 | } else {
1135 | array_unshift($token_stack, $token);
1136 | array_unshift($text_stack, '');
1137 | $strong = $token;
1138 | }
1139 | } else {
1140 | # Here $token_len == 1
1141 | if ($em) {
1142 | if (strlen($token_stack[0]) == 1) {
1143 | # Closing emphasis marker:
1144 | array_shift($token_stack);
1145 | $span = array_shift($text_stack);
1146 | $span = $this->runSpanGamut($span);
1147 | $span = "$span";
1148 | $text_stack[0] .= $this->hashPart($span);
1149 | $em = '';
1150 | } else {
1151 | $text_stack[0] .= $token;
1152 | }
1153 | } else {
1154 | array_unshift($token_stack, $token);
1155 | array_unshift($text_stack, '');
1156 | $em = $token;
1157 | }
1158 | }
1159 | }
1160 | return $text_stack[0];
1161 | }
1162 |
1163 |
1164 | function doBlockQuotes($text) {
1165 | $text = preg_replace_callback('/
1166 | ( # Wrap whole match in $1
1167 | (?>
1168 | ^[ ]*>[ ]? # ">" at the start of a line
1169 | .+\n # rest of the first line
1170 | (.+\n)* # subsequent consecutive lines
1171 | \n* # blanks
1172 | )+
1173 | )
1174 | /xm',
1175 | array(&$this, '_doBlockQuotes_callback'), $text);
1176 |
1177 | return $text;
1178 | }
1179 | function _doBlockQuotes_callback($matches) {
1180 | $bq = $matches[1];
1181 | # trim one level of quoting - trim whitespace-only lines
1182 | $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1183 | $bq = $this->runBlockGamut($bq); # recurse
1184 |
1185 | $bq = preg_replace('/^/m', " ", $bq);
1186 | # These leading spaces cause problem with content,
1187 | # so we need to fix that:
1188 | $bq = preg_replace_callback('{(\s*.+?
)}sx',
1189 | array(&$this, '_doBlockQuotes_callback2'), $bq);
1190 |
1191 | return "\n". $this->hashBlock("\n$bq\n
")."\n\n";
1192 | }
1193 | function _doBlockQuotes_callback2($matches) {
1194 | $pre = $matches[1];
1195 | $pre = preg_replace('/^ /m', '', $pre);
1196 | return $pre;
1197 | }
1198 |
1199 |
1200 | function formParagraphs($text) {
1201 | #
1202 | # Params:
1203 | # $text - string to process with html tags
1204 | #
1205 | # Strip leading and trailing lines:
1206 | $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1207 |
1208 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1209 |
1210 | #
1211 | # Wrap
tags and unhashify HTML blocks
1212 | #
1213 | foreach ($grafs as $key => $value) {
1214 | if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1215 | # Is a paragraph.
1216 | $value = $this->runSpanGamut($value);
1217 | $value = preg_replace('/^([ ]*)/', "
", $value);
1218 | $value .= "
";
1219 | $grafs[$key] = $this->unhash($value);
1220 | }
1221 | else {
1222 | # Is a block.
1223 | # Modify elements of @grafs in-place...
1224 | $graf = $value;
1225 | $block = $this->html_hashes[$graf];
1226 | $graf = $block;
1227 | // if (preg_match('{
1228 | // \A
1229 | // ( # $1 = tag
1230 | //
]*
1232 | // \b
1233 | // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1234 | // 1
1235 | // \2
1236 | // [^>]*
1237 | // >
1238 | // )
1239 | // ( # $3 = contents
1240 | // .*
1241 | // )
1242 | // (
) # $4 = closing tag
1243 | // \z
1244 | // }xs', $block, $matches))
1245 | // {
1246 | // list(, $div_open, , $div_content, $div_close) = $matches;
1247 | //
1248 | // # We can't call Markdown(), because that resets the hash;
1249 | // # that initialization code should be pulled into its own sub, though.
1250 | // $div_content = $this->hashHTMLBlocks($div_content);
1251 | //
1252 | // # Run document gamut methods on the content.
1253 | // foreach ($this->document_gamut as $method => $priority) {
1254 | // $div_content = $this->$method($div_content);
1255 | // }
1256 | //
1257 | // $div_open = preg_replace(
1258 | // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1259 | //
1260 | // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1261 | // }
1262 | $grafs[$key] = $graf;
1263 | }
1264 | }
1265 |
1266 | return implode("\n\n", $grafs);
1267 | }
1268 |
1269 |
1270 | function encodeAttribute($text) {
1271 | #
1272 | # Encode text for a double-quoted HTML attribute. This function
1273 | # is *not* suitable for attributes enclosed in single quotes.
1274 | #
1275 | $text = $this->encodeAmpsAndAngles($text);
1276 | $text = str_replace('"', '"', $text);
1277 | return $text;
1278 | }
1279 |
1280 |
1281 | function encodeAmpsAndAngles($text) {
1282 | #
1283 | # Smart processing for ampersands and angle brackets that need to
1284 | # be encoded. Valid character entities are left alone unless the
1285 | # no-entities mode is set.
1286 | #
1287 | if ($this->no_entities) {
1288 | $text = str_replace('&', '&', $text);
1289 | } else {
1290 | # Ampersand-encoding based entirely on Nat Irons's Amputator
1291 | # MT plugin:
1292 | $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1293 | '&', $text);;
1294 | }
1295 | # Encode remaining <'s
1296 | $text = str_replace('<', '<', $text);
1297 |
1298 | return $text;
1299 | }
1300 |
1301 |
1302 | function doAutoLinks($text) {
1303 | $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1304 | array(&$this, '_doAutoLinks_url_callback'), $text);
1305 |
1306 | # Email addresses:
1307 | $text = preg_replace_callback('{
1308 | <
1309 | (?:mailto:)?
1310 | (
1311 | (?:
1312 | [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1313 | |
1314 | ".*?"
1315 | )
1316 | \@
1317 | (?:
1318 | [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1319 | |
1320 | \[[\d.a-fA-F:]+\] # IPv4 & IPv6
1321 | )
1322 | )
1323 | >
1324 | }xi',
1325 | array(&$this, '_doAutoLinks_email_callback'), $text);
1326 |
1327 | return $text;
1328 | }
1329 | function _doAutoLinks_url_callback($matches) {
1330 | $url = $this->encodeAttribute($matches[1]);
1331 | $link = "$url";
1332 | return $this->hashPart($link);
1333 | }
1334 | function _doAutoLinks_email_callback($matches) {
1335 | $address = $matches[1];
1336 | $link = $this->encodeEmailAddress($address);
1337 | return $this->hashPart($link);
1338 | }
1339 |
1340 |
1341 | function encodeEmailAddress($addr) {
1342 | #
1343 | # Input: an email address, e.g. "foo@example.com"
1344 | #
1345 | # Output: the email address as a mailto link, with each character
1346 | # of the address encoded as either a decimal or hex entity, in
1347 | # the hopes of foiling most address harvesting spam bots. E.g.:
1348 | #
1349 | # foo@exampl
1352 | # e.com
1353 | #
1354 | # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1355 | # With some optimizations by Milian Wolff.
1356 | #
1357 | $addr = "mailto:" . $addr;
1358 | $chars = preg_split('/(? $char) {
1362 | $ord = ord($char);
1363 | # Ignore non-ascii chars.
1364 | if ($ord < 128) {
1365 | $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1366 | # roughly 10% raw, 45% hex, 45% dec
1367 | # '@' *must* be encoded. I insist.
1368 | if ($r > 90 && $char != '@') /* do nothing */;
1369 | else if ($r < 45) $chars[$key] = ''.dechex($ord).';';
1370 | else $chars[$key] = ''.$ord.';';
1371 | }
1372 | }
1373 |
1374 | $addr = implode('', $chars);
1375 | $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1376 | $addr = "$text";
1377 |
1378 | return $addr;
1379 | }
1380 |
1381 |
1382 | function parseSpan($str) {
1383 | #
1384 | # Take the string $str and parse it into tokens, hashing embeded HTML,
1385 | # escaped characters and handling code spans.
1386 | #
1387 | $output = '';
1388 |
1389 | $span_re = '{
1390 | (
1391 | \\\\'.$this->escape_chars_re.'
1392 | |
1393 | (?no_markup ? '' : '
1396 | |
1397 | # comment
1398 | |
1399 | <\?.*?\?> | <%.*?%> # processing instruction
1400 | |
1401 | <[/!$]?[-a-zA-Z0-9:_]+ # regular tags
1402 | (?>
1403 | \s
1404 | (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1405 | )?
1406 | >
1407 | ').'
1408 | )
1409 | }xs';
1410 |
1411 | while (1) {
1412 | #
1413 | # Each loop iteration seach for either the next tag, the next
1414 | # openning code span marker, or the next escaped character.
1415 | # Each token is then passed to handleSpanToken.
1416 | #
1417 | $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1418 |
1419 | # Create token from text preceding tag.
1420 | if ($parts[0] != "") {
1421 | $output .= $parts[0];
1422 | }
1423 |
1424 | # Check if we reach the end.
1425 | if (isset($parts[1])) {
1426 | $output .= $this->handleSpanToken($parts[1], $parts[2]);
1427 | $str = $parts[2];
1428 | }
1429 | else {
1430 | break;
1431 | }
1432 | }
1433 |
1434 | return $output;
1435 | }
1436 |
1437 |
1438 | function handleSpanToken($token, &$str) {
1439 | #
1440 | # Handle $token provided by parseSpan by determining its nature and
1441 | # returning the corresponding value that should replace it.
1442 | #
1443 | switch ($token{0}) {
1444 | case "\\":
1445 | return $this->hashPart("". ord($token{1}). ";");
1446 | case "`":
1447 | # Search for end marker in remaining text.
1448 | if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1449 | $str, $matches))
1450 | {
1451 | $str = $matches[2];
1452 | $codespan = $this->makeCodeSpan($matches[1]);
1453 | return $this->hashPart($codespan);
1454 | }
1455 | return $token; // return as text since no ending marker found.
1456 | default:
1457 | return $this->hashPart($token);
1458 | }
1459 | }
1460 |
1461 |
1462 | function outdent($text) {
1463 | #
1464 | # Remove one level of line-leading tabs or spaces
1465 | #
1466 | return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1467 | }
1468 |
1469 |
1470 | # String length function for detab. `_initDetab` will create a function to
1471 | # hanlde UTF-8 if the default function does not exist.
1472 | var $utf8_strlen = 'mb_strlen';
1473 |
1474 | function detab($text) {
1475 | #
1476 | # Replace tabs with the appropriate amount of space.
1477 | #
1478 | # For each line we separate the line in blocks delemited by
1479 | # tab characters. Then we reconstruct every line by adding the
1480 | # appropriate number of space between each blocks.
1481 |
1482 | $text = preg_replace_callback('/^.*\t.*$/m',
1483 | array(&$this, '_detab_callback'), $text);
1484 |
1485 | return $text;
1486 | }
1487 | function _detab_callback($matches) {
1488 | $line = $matches[0];
1489 | $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1490 |
1491 | # Split in blocks.
1492 | $blocks = explode("\t", $line);
1493 | # Add each blocks to the line.
1494 | $line = $blocks[0];
1495 | unset($blocks[0]); # Do not add first block twice.
1496 | foreach ($blocks as $block) {
1497 | # Calculate amount of space, insert spaces, insert block.
1498 | $amount = $this->tab_width -
1499 | $strlen($line, 'UTF-8') % $this->tab_width;
1500 | $line .= str_repeat(" ", $amount) . $block;
1501 | }
1502 | return $line;
1503 | }
1504 | function _initDetab() {
1505 | #
1506 | # Check for the availability of the function in the `utf8_strlen` property
1507 | # (initially `mb_strlen`). If the function is not available, create a
1508 | # function that will loosely count the number of UTF-8 characters with a
1509 | # regular expression.
1510 | #
1511 | if (function_exists($this->utf8_strlen)) return;
1512 | $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1513 | "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1514 | $text, $m);');
1515 | }
1516 |
1517 |
1518 | function unhash($text) {
1519 | #
1520 | # Swap back in all the tags hashed by _HashHTMLBlocks.
1521 | #
1522 | return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1523 | array(&$this, '_unhash_callback'), $text);
1524 | }
1525 | function _unhash_callback($matches) {
1526 | return $this->html_hashes[$matches[0]];
1527 | }
1528 |
1529 | }
1530 |
--------------------------------------------------------------------------------
/tests/bootstrap.php:
--------------------------------------------------------------------------------
1 | add('dflydev\\tests\\markdown', 'tests');
13 |
--------------------------------------------------------------------------------
/tests/dflydev/tests/markdown/MarkdownExtraParserTest.php:
--------------------------------------------------------------------------------
1 | createParser();
41 | $html = $markdownParser->transformMarkdown('#Hello World');
42 | $this->assertEquals("Hello World
\n", $html, 'Simple H1 works');
43 | }
44 |
45 | /**
46 | * Test tab width for code blocks
47 | */
48 | public function testTabWidth()
49 | {
50 | $markdownParser = $this->createParser();
51 | $html = $markdownParser->transformMarkdown(' Hello World');
52 | $this->assertEquals("Hello World\n
\n", $html, 'Default 4 space tab code block works');
53 | $this->configureTabWidth($markdownParser, 6);
54 | $html = $markdownParser->transformMarkdown(' Hello World');
55 | $this->assertEquals("Hello World
\n", $html, 'Default 4 space tab code block not triggered when tab width set to 6');
56 | $html = $markdownParser->transformMarkdown(' Hello World');
57 | $this->assertEquals("Hello World\n
\n", $html, 'Setting 6 space tab code block (via method) works');
58 | $markdownParser = $this->createParser(array($this->configKeyTabWidth => 8));
59 | $html = $markdownParser->transformMarkdown(' Hello World');
60 | $this->assertEquals("Hello World\n
\n", $html, 'Setting 8 space tab code block (via constructor) works');
61 | }
62 |
63 | /**
64 | * Configure a Markdown parser for a specific tab width
65 | * @param \dflydev\markdown\MarkdownParser $markdownParser
66 | * @param integer $width
67 | */
68 | protected function configureTabWidth(MarkdownParser $markdownParser, $width)
69 | {
70 | $markdownParser->configureMarkdownParser($this->configKeyTabWidth, $width);
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------