├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── composer.json ├── phpunit.xml.dist ├── src └── dflydev │ └── markdown │ ├── IMarkdownParser.php │ ├── MarkdownExtraParser.php │ └── MarkdownParser.php └── tests ├── bootstrap.php └── dflydev └── tests └── markdown ├── MarkdownExtraParserTest.php └── MarkdownParserTest.php /.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | composer.lock 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | 3 | php: 4 | - 5.3.3 5 | - 5.3 6 | - 5.4 7 | - 5.5 8 | 9 | before_script: 10 | - composer self-update 11 | - composer install --no-interaction --prefer-source --dev 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # This Project Has Been Deprecated 2 | 3 | Thank you for your interest in contributing to this package. Unfortunately, **I do not plan to continue maintaining this package.** 4 | 5 | The original purpose of this package was to provide a way to get a PSR-0 and Composer friendly port of Michel Fortin's PHP Markdown library on Packagist. Michel was originally uninterested in Composer and did not want to make changes and said I was free to fork and maintain a special Composer package. 6 | 7 | In January of 2013 (or around then) Michel finally joined the Composer bandwagon. More details can be found [here][1]. This means that the major reason this port existed (to be a Composer friendly port) effectively went away. This corresponded with the 1.3 release of PHP Markdown. 8 | 9 | Additionally, other versions of PHP Markdown parsers have come into existence and are quite nice. 10 | 11 | Please feel free to continue to use it as-is but if you are starting a new project I would recommend finding another library. If you have a project already depending on this package, I would recommend planning to migrate to something that is better maintained. 12 | 13 | Thanks for your support and using this library over the years. 14 | 15 | [1]: https://github.com/michelf/php-markdown/issues/31 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | PHP Markdown & Extra 2 | Copyright (c) 2011, Dragonfly Development Inc 3 | All rights reserved. 4 | 5 | Based on PHP Markdown & Extra 6 | Copyright (c) 2004-2009 Michel Fortin 7 | 8 | All rights reserved. 9 | 10 | Based on Markdown 11 | Copyright (c) 2003-2006 John Gruber 12 | 13 | All rights reserved. 14 | 15 | Redistribution and use in source and binary forms, with or without 16 | modification, are permitted provided that the following conditions are 17 | met: 18 | 19 | * Redistributions of source code must retain the above copyright notice, 20 | this list of conditions and the following disclaimer. 21 | 22 | * Redistributions in binary form must reproduce the above copyright 23 | notice, this list of conditions and the following disclaimer in the 24 | documentation and/or other materials provided with the distribution. 25 | 26 | * Neither the name "Markdown" nor the names of its contributors may 27 | be used to endorse or promote products derived from this software 28 | without specific prior written permission. 29 | 30 | This software is provided by the copyright holders and contributors "as 31 | is" and any express or implied warranties, including, but not limited 32 | to, the implied warranties of merchantability and fitness for a 33 | particular purpose are disclaimed. In no event shall the copyright owner 34 | or contributors be liable for any direct, indirect, incidental, special, 35 | exemplary, or consequential damages (including, but not limited to, 36 | procurement of substitute goods or services; loss of use, data, or 37 | profits; or business interruption) however caused and on any theory of 38 | liability, whether in contract, strict liability, or tort (including 39 | negligence or otherwise) arising in any way out of the use of this 40 | software, even if advised of the possibility of such damage. 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PHP Markdown & Extra 2 | ==================== 3 | 4 | An updated and stripped version of the original [PHP Markdown](http://michelf.com/projects/php-markdown/) 5 | by [Michel Fortin](http://michelf.com/). Works quite well with PSR-0 6 | autoloaders and is [Composer](http://packagist.org/) friendly. 7 | 8 | This Project Has Been Deprecated 9 | ================================ 10 | 11 | Thank you for your interest in contributing to this package. Unfortunately, **I 12 | do not plan to continue maintaining this package.** 13 | 14 | The original purpose of this package was to provide a way to get a PSR-0 and 15 | Composer friendly port of Michel Fortin's PHP Markdown library on Packagist. 16 | Michel was originally uninterested in Composer and did not want to make changes 17 | and said I was free to fork and maintain a special Composer package. 18 | 19 | In January of 2013 (or around then) Michel finally joined the Composer 20 | bandwagon. More details can be found [here][1]. This means that the major reason 21 | this port existed (to be a Composer friendly port) effectively went away. This 22 | corresponded with the 1.3 release of PHP Markdown. 23 | 24 | Additionally, other versions of PHP Markdown parsers have come into existence 25 | and are quite nice. 26 | 27 | Please feel free to continue to use it as-is but if you are starting a new 28 | project I would recommend finding another library. If you have a project already 29 | depending on this package, I would recommend planning to migrate to something 30 | that is better maintained. 31 | 32 | Thanks for your support and using this library over the years. 33 | 34 | --- 35 | 36 | 37 | Changes from the official PHP Markdown & Extra 38 | ---------------------------------------------- 39 | 40 | The initial pass at updating PHP Markdown & Extra left the core of 41 | the code more or less intact but the changes to the organization 42 | and naming were quite substantial. This effectively makes this package 43 | a hard fork from Markdown 1.0.1n and MarkdownExtra 1.2.4. 44 | 45 | Updated in the following ways: 46 | 47 | * Moved parser classes into their own files 48 | * Using PHP 5.3 namespaces 49 | * Following [PSR-0](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md) standards 50 | * Replaced `@define` configuration variables with class `const` variables 51 | * Integrated with [Travis CI](http://travis-ci.org/) 52 | * Made [Composer](http://packagist.org/) friendly 53 | 54 | Stripped in the following ways: 55 | 56 | * No more embedded plugin code (WordPress, bBlog, etc.) 57 | * No more top level function calls (`Markdown()`, etc.) 58 | 59 | Last synced with: 60 | 61 | * PHP Markdown v1.0.1o 62 | * PHP Markdown Extra v1.2.5 63 | 64 | 65 | Requirements 66 | ------------ 67 | 68 | * PHP 5.3+ 69 | 70 | Installation 71 | ------------ 72 | 73 | To add this package as a local, per-project dependency to your project, simply add a 74 | dependency on `dflydev/markdown` to your project's `composer.json` file. 75 | Here is a minimal example of a `composer.json` file that just defines a dependency on 76 | version 1.0 of this package: 77 | 78 | { 79 | "require": { 80 | "dflydev/markdown": "1.0.*" 81 | } 82 | } 83 | 84 | Usage 85 | ----- 86 | 87 | Simple usage for the standard Markdown ([details](http://michelf.com/projects/php-markdown/)) parser: 88 | 89 | Hello World 95 | $markdownParser->transformMarkdown("#Hello World"); 96 | 97 | Simple usage for the Markdown Extra ([details](http://michelf.com/projects/php-markdown/extra/)) parser: 98 | 99 | Hello World 105 | $markdownParser->transformMarkdown("#Hello World"); 106 | 107 | 108 | License 109 | ------- 110 | 111 | This library is licensed under the New BSD License - see the LICENSE file for details. 112 | 113 | 114 | Community 115 | --------- 116 | 117 | If you have questions or want to help out, join us in the 118 | [#dflydev](irc://irc.freenode.net/#dflydev) channel on irc.freenode.net. 119 | 120 | 121 | Not Invented Here 122 | ----------------- 123 | 124 | The original [PHP Markdown](http://michelf.com/projects/php-markdown/) was 125 | quite excellent but was not as easy to use as it could be in more modern PHP 126 | applications. Having started to use [Composer](http://packagist.org/) for a 127 | few newer applications that needed to transform Markdown, I decided to strip 128 | and update the original PHP Markdown so that it could be more easily managed 129 | by the likes of Composer. 130 | 131 | All of the initial work done for this library (which I can only assume 132 | was quite substantial after having looked at the code) was done by 133 | [Michel Fortin](http://michelf.com/) during the original port from Perl to 134 | PHP. 135 | 136 | If you do not need to install PHP Markdown by way of Composer or need to 137 | leverage PSR-0 autoloading, I suggest you continue to use the official and 138 | likely more stable and well used original version of 139 | [PHP Markdown](http://michelf.com/projects/php-markdown/) 140 | 141 | [1]: https://github.com/michelf/php-markdown/issues/31 142 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dflydev/markdown", 3 | "type": "library", 4 | "description": "PHP Markdown & Extra - DEPRECATED", 5 | "homepage": "http://github.com/dflydev/dflydev-markdown", 6 | "keywords": ["markdown"], 7 | "license": "BSD-3-Clause", 8 | "authors": [ 9 | { 10 | "name": "Dragonfly Development Inc.", 11 | "email": "info@dflydev.com", 12 | "homepage": "http://dflydev.com" 13 | }, 14 | { 15 | "name": "Beau Simensen", 16 | "email": "beau@dflydev.com", 17 | "homepage": "http://beausimensen.com" 18 | }, 19 | { 20 | "name": "Michel Fortin", 21 | "homepage": "http://michelf.com" 22 | }, 23 | { 24 | "name": "John Gruber", 25 | "homepage": "http://daringfireball.net" 26 | } 27 | ], 28 | "require": { 29 | "php": ">=5.3" 30 | }, 31 | "autoload": { 32 | "psr-0": { "dflydev\\markdown": "src" } 33 | }, 34 | "extra": { 35 | "branch-alias": { 36 | "dev-master": "1.0-dev" 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | ./tests/dflydev/ 16 | 17 | 18 | 19 | 20 | 21 | ./src/dflydev/ 22 | 23 | ./src/dflydev/*/resources 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/dflydev/markdown/IMarkdownParser.php: -------------------------------------------------------------------------------- 1 | escape_chars .= ':|'; 86 | 87 | # Insert extra document, block, and span transformations. 88 | # Parent constructor will do the sorting. 89 | $this->document_gamut += array( 90 | "doFencedCodeBlocks" => 5, 91 | "stripFootnotes" => 15, 92 | "stripAbbreviations" => 25, 93 | "appendFootnotes" => 50, 94 | ); 95 | $this->block_gamut += array( 96 | "doFencedCodeBlocks" => 5, 97 | "doTables" => 15, 98 | "doDefLists" => 45, 99 | ); 100 | $this->span_gamut += array( 101 | "doFootnotes" => 5, 102 | "doAbbreviations" => 70, 103 | ); 104 | 105 | parent::__construct($configuration); 106 | } 107 | 108 | /** 109 | * (non-PHPdoc) 110 | * @see dflydev\markdown.IMarkdownParser::configureMarkdownParser() 111 | */ 112 | public function configureMarkdownParser($key, $value) 113 | { 114 | switch($key) { 115 | case self::CONFIG_FOOTNOTE_LINK_TITLE: 116 | $this->fn_link_title = $value; 117 | break; 118 | case self::CONFIG_FOOTNOTE_LINK_CLASS: 119 | $this->fn_link_class = $value; 120 | break; 121 | case self::CONFIG_FOOTNOTE_BACKLINK_TITLE: 122 | $this->fn_backlink_title = $value; 123 | break; 124 | case self::CONFIG_FOOTNOTE_BACKLINK_CLASS: 125 | $this->fn_backlink_class = $value; 126 | break; 127 | default: 128 | // Try to handle this in our parent. 129 | parent::configureMarkdownParser($key, $value); 130 | break; 131 | } 132 | // 133 | } 134 | 135 | # Extra variables used during extra transformations. 136 | var $footnotes = array(); 137 | var $footnotes_ordered = array(); 138 | var $abbr_desciptions = array(); 139 | var $abbr_word_re = ''; 140 | 141 | # Give the current footnote number. 142 | var $footnote_counter = 1; 143 | 144 | 145 | function setup() { 146 | # 147 | # Setting up Extra-specific variables. 148 | # 149 | parent::setup(); 150 | 151 | $this->footnotes = array(); 152 | $this->footnotes_ordered = array(); 153 | $this->abbr_desciptions = array(); 154 | $this->abbr_word_re = ''; 155 | $this->footnote_counter = 1; 156 | 157 | foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 158 | if ($this->abbr_word_re) 159 | $this->abbr_word_re .= '|'; 160 | $this->abbr_word_re .= preg_quote($abbr_word); 161 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 162 | } 163 | } 164 | 165 | function teardown() { 166 | # 167 | # Clearing Extra-specific variables. 168 | # 169 | $this->footnotes = array(); 170 | $this->footnotes_ordered = array(); 171 | $this->abbr_desciptions = array(); 172 | $this->abbr_word_re = ''; 173 | 174 | parent::teardown(); 175 | } 176 | 177 | 178 | ### HTML Block Parser ### 179 | 180 | # Tags that are always treated as block tags: 181 | var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 182 | 183 | # Tags treated as block tags only if the opening tag is alone on it's line: 184 | var $context_block_tags_re = 'script|noscript|math|ins|del'; 185 | 186 | # Tags where markdown="1" default to span mode: 187 | var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 188 | 189 | # Tags which must not have their contents modified, no matter where 190 | # they appear: 191 | var $clean_tags_re = 'script|math'; 192 | 193 | # Tags that do not need to be closed. 194 | var $auto_close_tags_re = 'hr|img'; 195 | 196 | 197 | function hashHTMLBlocks($text) { 198 | # 199 | # Hashify HTML Blocks and "clean tags". 200 | # 201 | # We only want to do this for block-level HTML tags, such as headers, 202 | # lists, and tables. That's because we still want to wrap

s around 203 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 204 | # phrase emphasis, and spans. The list of tags we're looking for is 205 | # hard-coded. 206 | # 207 | # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 208 | # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 209 | # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 210 | # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 211 | # These two functions are calling each other. It's recursive! 212 | # 213 | if ($this->no_markup) return $text; 214 | 215 | # 216 | # Call the HTML-in-Markdown hasher. 217 | # 218 | list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 219 | 220 | return $text; 221 | } 222 | function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 223 | $enclosing_tag_re = '', $span = false) 224 | { 225 | # 226 | # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 227 | # 228 | # * $indent is the number of space to be ignored when checking for code 229 | # blocks. This is important because if we don't take the indent into 230 | # account, something like this (which looks right) won't work as expected: 231 | # 232 | #

233 | #
234 | # Hello World. <-- Is this a Markdown code block or text? 235 | #
<-- Is this a Markdown code block or a real tag? 236 | #
237 | # 238 | # If you don't like this, just don't indent the tag on which 239 | # you apply the markdown="1" attribute. 240 | # 241 | # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 242 | # tag with that name. Nested tags supported. 243 | # 244 | # * If $span is true, text inside must treated as span. So any double 245 | # newline will be replaced by a single newline so that it does not create 246 | # paragraphs. 247 | # 248 | # Returns an array of that form: ( processed text , remaining text ) 249 | # 250 | if ($text === '') return array('', ''); 251 | 252 | # Regex to check for the presense of newlines around a block tag. 253 | $newline_before_re = '/(?:^\n?|\n\n)*$/'; 254 | $newline_after_re = 255 | '{ 256 | ^ # Start of text following the tag. 257 | (?>[ ]*)? # Optional comment. 258 | [ ]*\n # Must be followed by newline. 259 | }xs'; 260 | 261 | # Regex to match any tag. 262 | $block_tag_re = 263 | '{ 264 | ( # $2: Capture hole tag. 265 | # Tag name. 267 | '.$this->block_tags_re.' | 268 | '.$this->context_block_tags_re.' | 269 | '.$this->clean_tags_re.' | 270 | (?!\s)'.$enclosing_tag_re.' 271 | ) 272 | (?: 273 | (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 274 | (?> 275 | ".*?" | # Double quotes (can contain `>`) 276 | \'.*?\' | # Single quotes (can contain `>`) 277 | .+? # Anything but quotes and `>`. 278 | )*? 279 | )? 280 | > # End of tag. 281 | | 282 | # HTML Comment 283 | | 284 | <\?.*?\?> | <%.*?%> # Processing instruction 285 | | 286 | # CData Block 287 | | 288 | # Code span marker 289 | `+ 290 | '. ( !$span ? ' # If not in span. 291 | | 292 | # Indented code block 293 | (?: ^[ ]*\n | ^ | \n[ ]*\n ) 294 | [ ]{'.($indent+4).'}[^\n]* \n 295 | (?> 296 | (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 297 | )* 298 | | 299 | # Fenced code block marker 300 | (?> ^ | \n ) 301 | [ ]{0,'.($indent).'}~~~+[ ]*\n 302 | ' : '' ). ' # End (if not is span). 303 | ) 304 | }xs'; 305 | 306 | 307 | $depth = 0; # Current depth inside the tag tree. 308 | $parsed = ""; # Parsed text that will be returned. 309 | 310 | # 311 | # Loop through every tag until we find the closing tag of the parent 312 | # or loop until reaching the end of text if no parent tag specified. 313 | # 314 | do { 315 | # 316 | # Split the text using the first $tag_match pattern found. 317 | # Text before pattern will be first in the array, text after 318 | # pattern will be at the end, and between will be any catches made 319 | # by the pattern. 320 | # 321 | $parts = preg_split($block_tag_re, $text, 2, 322 | PREG_SPLIT_DELIM_CAPTURE); 323 | 324 | # If in Markdown span mode, add a empty-string span-level hash 325 | # after each newline to prevent triggering any block element. 326 | if ($span) { 327 | $void = $this->hashPart("", ':'); 328 | $newline = "$void\n"; 329 | $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 330 | } 331 | 332 | $parsed .= $parts[0]; # Text before current tag. 333 | 334 | # If end of $text has been reached. Stop loop. 335 | if (count($parts) < 3) { 336 | $text = ""; 337 | break; 338 | } 339 | 340 | $tag = $parts[1]; # Tag to handle. 341 | $text = $parts[2]; # Remaining text after current tag. 342 | $tag_re = preg_quote($tag); # For use in a regular expression. 343 | 344 | # 345 | # Check for: Code span marker 346 | # 347 | if ($tag{0} == "`") { 348 | # Find corresponding end marker. 349 | $tag_re = preg_quote($tag); 350 | if (preg_match('{^(?>.+?|\n(?!\n))*?(?.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text, 385 | $matches)) 386 | { 387 | # End marker found: pass text unchanged until marker. 388 | $parsed .= $tag . $matches[0]; 389 | $text = substr($text, strlen($matches[0])); 390 | } 391 | else { 392 | # No end marker: just skip it. 393 | $parsed .= $tag; 394 | } 395 | } 396 | # 397 | # Check for: Indented code block. 398 | # 399 | else if ($tag{0} == "\n" || $tag{0} == " ") { 400 | # Indented code block: pass it unchanged, will be handled 401 | # later. 402 | $parsed .= $tag; 403 | } 404 | # 405 | # Check for: Opening Block level tag or 406 | # Opening Context Block tag (like ins and del) 407 | # used as a block tag (tag is alone on it's line). 408 | # 409 | else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 410 | ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 411 | preg_match($newline_before_re, $parsed) && 412 | preg_match($newline_after_re, $text) ) 413 | ) 414 | { 415 | # Need to parse tag and following text using the HTML parser. 416 | list($block_text, $text) = 417 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 418 | 419 | # Make sure it stays outside of any paragraph by adding newlines. 420 | $parsed .= "\n\n$block_text\n\n"; 421 | } 422 | # 423 | # Check for: Clean tag (like script, math) 424 | # HTML Comments, processing instructions. 425 | # 426 | else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 427 | $tag{1} == '!' || $tag{1} == '?') 428 | { 429 | # Need to parse tag and following text using the HTML parser. 430 | # (don't check for markdown attribute) 431 | list($block_text, $text) = 432 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 433 | 434 | $parsed .= $block_text; 435 | } 436 | # 437 | # Check for: Tag with same name as enclosing tag. 438 | # 439 | else if ($enclosing_tag_re !== '' && 440 | # Same name as enclosing tag. 441 | preg_match('{^= 0); 464 | 465 | return array($parsed, $text); 466 | } 467 | function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 468 | # 469 | # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 470 | # 471 | # * Calls $hash_method to convert any blocks. 472 | # * Stops when the first opening tag closes. 473 | # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 474 | # (it is not inside clean tags) 475 | # 476 | # Returns an array of that form: ( processed text , remaining text ) 477 | # 478 | if ($text === '') return array('', ''); 479 | 480 | # Regex to match `markdown` attribute inside of a tag. 481 | $markdown_attr_re = ' 482 | { 483 | \s* # Eat whitespace before the `markdown` attribute 484 | markdown 485 | \s*=\s* 486 | (?> 487 | (["\']) # $1: quote delimiter 488 | (.*?) # $2: attribute value 489 | \1 # matching delimiter 490 | | 491 | ([^\s>]*) # $3: unquoted attribute value 492 | ) 493 | () # $4: make $3 always defined (avoid warnings) 494 | }xs'; 495 | 496 | # Regex to match any tag. 497 | $tag_re = '{ 498 | ( # $2: Capture hole tag. 499 | 504 | ".*?" | # Double quotes (can contain `>`) 505 | \'.*?\' | # Single quotes (can contain `>`) 506 | .+? # Anything but quotes and `>`. 507 | )*? 508 | )? 509 | > # End of tag. 510 | | 511 | # HTML Comment 512 | | 513 | <\?.*?\?> | <%.*?%> # Processing instruction 514 | | 515 | # CData Block 516 | ) 517 | }xs'; 518 | 519 | $original_text = $text; # Save original text in case of faliure. 520 | 521 | $depth = 0; # Current depth inside the tag tree. 522 | $block_text = ""; # Temporary text holder for current text. 523 | $parsed = ""; # Parsed text that will be returned. 524 | 525 | # 526 | # Get the name of the starting tag. 527 | # (This pattern makes $base_tag_name_re safe without quoting.) 528 | # 529 | if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 530 | $base_tag_name_re = $matches[1]; 531 | 532 | # 533 | # Loop through every tag until we find the corresponding closing tag. 534 | # 535 | do { 536 | # 537 | # Split the text using the first $tag_match pattern found. 538 | # Text before pattern will be first in the array, text after 539 | # pattern will be at the end, and between will be any catches made 540 | # by the pattern. 541 | # 542 | $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 543 | 544 | if (count($parts) < 3) { 545 | # 546 | # End of $text reached with unbalenced tag(s). 547 | # In that case, we return original text unchanged and pass the 548 | # first character as filtered to prevent an infinite loop in the 549 | # parent function. 550 | # 551 | return array($original_text{0}, substr($original_text, 1)); 552 | } 553 | 554 | $block_text .= $parts[0]; # Text before current tag. 555 | $tag = $parts[1]; # Tag to handle. 556 | $text = $parts[2]; # Remaining text after current tag. 557 | 558 | # 559 | # Check for: Auto-close tag (like
) 560 | # Comments and Processing Instructions. 561 | # 562 | if (preg_match('{^auto_close_tags_re.')\b}', $tag) || 563 | $tag{1} == '!' || $tag{1} == '?') 564 | { 565 | # Just add the tag to the block as if it was text. 566 | $block_text .= $tag; 567 | } 568 | else { 569 | # 570 | # Increase/decrease nested tag count. Only do so if 571 | # the tag's name match base tag's. 572 | # 573 | if (preg_match('{^mode = $attr_m[2] . $attr_m[3]; 590 | $span_mode = $this->mode == 'span' || $this->mode != 'block' && 591 | preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 592 | 593 | # Calculate indent before tag. 594 | if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 595 | $strlen = $this->utf8_strlen; 596 | $indent = $strlen($matches[1], 'UTF-8'); 597 | } else { 598 | $indent = 0; 599 | } 600 | 601 | # End preceding block with this tag. 602 | $block_text .= $tag; 603 | $parsed .= $this->$hash_method($block_text); 604 | 605 | # Get enclosing tag name for the ParseMarkdown function. 606 | # (This pattern makes $tag_name_re safe without quoting.) 607 | preg_match('/^<([\w:$]*)\b/', $tag, $matches); 608 | $tag_name_re = $matches[1]; 609 | 610 | # Parse the content using the HTML-in-Markdown parser. 611 | list ($block_text, $text) 612 | = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 613 | $tag_name_re, $span_mode); 614 | 615 | # Outdent markdown text. 616 | if ($indent > 0) { 617 | $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 618 | $block_text); 619 | } 620 | 621 | # Append tag content to parsed text. 622 | if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 623 | else $parsed .= "$block_text"; 624 | 625 | # Start over a new block. 626 | $block_text = ""; 627 | } 628 | else $block_text .= $tag; 629 | } 630 | 631 | } while ($depth > 0); 632 | 633 | # 634 | # Hash last block text that wasn't processed inside the loop. 635 | # 636 | $parsed .= $this->$hash_method($block_text); 637 | 638 | return array($parsed, $text); 639 | } 640 | 641 | 642 | function hashClean($text) { 643 | # 644 | # Called whenever a tag must be hashed when a function insert a "clean" tag 645 | # in $text, it pass through this function and is automaticaly escaped, 646 | # blocking invalid nested overlap. 647 | # 648 | return $this->hashPart($text, 'C'); 649 | } 650 | 651 | 652 | function doHeaders($text) { 653 | # 654 | # Redefined to add id attribute support. 655 | # 656 | # Setext-style headers: 657 | # Header 1 {#header1} 658 | # ======== 659 | # 660 | # Header 2 {#header2} 661 | # -------- 662 | # 663 | $text = preg_replace_callback( 664 | '{ 665 | (^.+?) # $1: Header text 666 | (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 667 | [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 668 | }mx', 669 | array(&$this, '_doHeaders_callback_setext'), $text); 670 | 671 | # atx-style headers: 672 | # # Header 1 {#header1} 673 | # ## Header 2 {#header2} 674 | # ## Header 2 with closing hashes ## {#header3} 675 | # ... 676 | # ###### Header 6 {#header2} 677 | # 678 | $text = preg_replace_callback('{ 679 | ^(\#{1,6}) # $1 = string of #\'s 680 | [ ]* 681 | (.+?) # $2 = Header text 682 | [ ]* 683 | \#* # optional closing #\'s (not counted) 684 | (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute 685 | [ ]* 686 | \n+ 687 | }xm', 688 | array(&$this, '_doHeaders_callback_atx'), $text); 689 | 690 | return $text; 691 | } 692 | function _doHeaders_attr($attr) { 693 | if (empty($attr)) return ""; 694 | return " id=\"$attr\""; 695 | } 696 | function _doHeaders_callback_setext($matches) { 697 | if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 698 | return $matches[0]; 699 | $level = $matches[3]{0} == '=' ? 1 : 2; 700 | $attr = $this->_doHeaders_attr($id =& $matches[2]); 701 | $block = "".$this->runSpanGamut($matches[1]).""; 702 | return "\n" . $this->hashBlock($block) . "\n\n"; 703 | } 704 | function _doHeaders_callback_atx($matches) { 705 | $level = strlen($matches[1]); 706 | $attr = $this->_doHeaders_attr($id =& $matches[3]); 707 | $block = "".$this->runSpanGamut($matches[2]).""; 708 | return "\n" . $this->hashBlock($block) . "\n\n"; 709 | } 710 | 711 | 712 | function doTables($text) { 713 | # 714 | # Form HTML tables. 715 | # 716 | $less_than_tab = $this->tab_width - 1; 717 | # 718 | # Find tables with leading pipe. 719 | # 720 | # | Header 1 | Header 2 721 | # | -------- | -------- 722 | # | Cell 1 | Cell 2 723 | # | Cell 3 | Cell 4 724 | # 725 | $text = preg_replace_callback(' 726 | { 727 | ^ # Start of a line 728 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 729 | [|] # Optional leading pipe (present) 730 | (.+) \n # $1: Header row (at least one pipe) 731 | 732 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 733 | [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 734 | 735 | ( # $3: Cells 736 | (?> 737 | [ ]* # Allowed whitespace. 738 | [|] .* \n # Row content. 739 | )* 740 | ) 741 | (?=\n|\Z) # Stop at final double newline. 742 | }xm', 743 | array(&$this, '_doTable_leadingPipe_callback'), $text); 744 | 745 | # 746 | # Find tables without leading pipe. 747 | # 748 | # Header 1 | Header 2 749 | # -------- | -------- 750 | # Cell 1 | Cell 2 751 | # Cell 3 | Cell 4 752 | # 753 | $text = preg_replace_callback(' 754 | { 755 | ^ # Start of a line 756 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 757 | (\S.*[|].*) \n # $1: Header row (at least one pipe) 758 | 759 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 760 | ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 761 | 762 | ( # $3: Cells 763 | (?> 764 | .* [|] .* \n # Row content 765 | )* 766 | ) 767 | (?=\n|\Z) # Stop at final double newline. 768 | }xm', 769 | array(&$this, '_DoTable_callback'), $text); 770 | 771 | return $text; 772 | } 773 | function _doTable_leadingPipe_callback($matches) { 774 | $head = $matches[1]; 775 | $underline = $matches[2]; 776 | $content = $matches[3]; 777 | 778 | # Remove leading pipe for each row. 779 | $content = preg_replace('/^ *[|]/m', '', $content); 780 | 781 | return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 782 | } 783 | function _doTable_callback($matches) { 784 | $head = $matches[1]; 785 | $underline = $matches[2]; 786 | $content = $matches[3]; 787 | 788 | # Remove any tailing pipes for each line. 789 | $head = preg_replace('/[|] *$/m', '', $head); 790 | $underline = preg_replace('/[|] *$/m', '', $underline); 791 | $content = preg_replace('/[|] *$/m', '', $content); 792 | 793 | # Reading alignement from header underline. 794 | $separators = preg_split('/ *[|] */', $underline); 795 | foreach ($separators as $n => $s) { 796 | if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 797 | else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 798 | else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 799 | else $attr[$n] = ''; 800 | } 801 | 802 | # Parsing span elements, including code spans, character escapes, 803 | # and inline HTML tags, so that pipes inside those gets ignored. 804 | $head = $this->parseSpan($head); 805 | $headers = preg_split('/ *[|] */', $head); 806 | $col_count = count($headers); 807 | 808 | # Write column headers. 809 | $text = "\n"; 810 | $text .= "\n"; 811 | $text .= "\n"; 812 | foreach ($headers as $n => $header) 813 | $text .= " ".$this->runSpanGamut(trim($header))."\n"; 814 | $text .= "\n"; 815 | $text .= "\n"; 816 | 817 | # Split content by row. 818 | $rows = explode("\n", trim($content, "\n")); 819 | 820 | $text .= "\n"; 821 | foreach ($rows as $row) { 822 | # Parsing span elements, including code spans, character escapes, 823 | # and inline HTML tags, so that pipes inside those gets ignored. 824 | $row = $this->parseSpan($row); 825 | 826 | # Split row by cell. 827 | $row_cells = preg_split('/ *[|] */', $row, $col_count); 828 | $row_cells = array_pad($row_cells, $col_count, ''); 829 | 830 | $text .= "\n"; 831 | foreach ($row_cells as $n => $cell) 832 | $text .= " ".$this->runSpanGamut(trim($cell))."\n"; 833 | $text .= "\n"; 834 | } 835 | $text .= "\n"; 836 | $text .= "
"; 837 | 838 | return $this->hashBlock($text) . "\n"; 839 | } 840 | 841 | 842 | function doDefLists($text) { 843 | # 844 | # Form HTML definition lists. 845 | # 846 | $less_than_tab = $this->tab_width - 1; 847 | 848 | # Re-usable pattern to match any entire dl list: 849 | $whole_list_re = '(?> 850 | ( # $1 = whole list 851 | ( # $2 852 | [ ]{0,'.$less_than_tab.'} 853 | ((?>.*\S.*\n)+) # $3 = defined term 854 | \n? 855 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 856 | ) 857 | (?s:.+?) 858 | ( # $4 859 | \z 860 | | 861 | \n{2,} 862 | (?=\S) 863 | (?! # Negative lookahead for another term 864 | [ ]{0,'.$less_than_tab.'} 865 | (?: \S.*\n )+? # defined term 866 | \n? 867 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 868 | ) 869 | (?! # Negative lookahead for another definition 870 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 871 | ) 872 | ) 873 | ) 874 | )'; // mx 875 | 876 | $text = preg_replace_callback('{ 877 | (?>\A\n?|(?<=\n\n)) 878 | '.$whole_list_re.' 879 | }mx', 880 | array(&$this, '_doDefLists_callback'), $text); 881 | 882 | return $text; 883 | } 884 | function _doDefLists_callback($matches) { 885 | # Re-usable patterns to match list item bullets and number markers: 886 | $list = $matches[1]; 887 | 888 | # Turn double returns into triple returns, so that we can make a 889 | # paragraph for the last item in a list, if necessary: 890 | $result = trim($this->processDefListItems($list)); 891 | $result = "
\n" . $result . "\n
"; 892 | return $this->hashBlock($result) . "\n\n"; 893 | } 894 | 895 | 896 | function processDefListItems($list_str) { 897 | # 898 | # Process the contents of a single definition list, splitting it 899 | # into individual term and definition list items. 900 | # 901 | $less_than_tab = $this->tab_width - 1; 902 | 903 | # trim trailing blank lines: 904 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 905 | 906 | # Process definition terms. 907 | $list_str = preg_replace_callback('{ 908 | (?>\A\n?|\n\n+) # leading line 909 | ( # definition terms = $1 910 | [ ]{0,'.$less_than_tab.'} # leading whitespace 911 | (?![:][ ]|[ ]) # negative lookahead for a definition 912 | # mark (colon) or more whitespace. 913 | (?> \S.* \n)+? # actual term (not whitespace). 914 | ) 915 | (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 916 | # with a definition mark. 917 | }xm', 918 | array(&$this, '_processDefListItems_callback_dt'), $list_str); 919 | 920 | # Process actual definitions. 921 | $list_str = preg_replace_callback('{ 922 | \n(\n+)? # leading line = $1 923 | ( # marker space = $2 924 | [ ]{0,'.$less_than_tab.'} # whitespace before colon 925 | [:][ ]+ # definition mark (colon) 926 | ) 927 | ((?s:.+?)) # definition text = $3 928 | (?= \n+ # stop at next definition mark, 929 | (?: # next term or end of text 930 | [ ]{0,'.$less_than_tab.'} [:][ ] | 931 |
| \z 932 | ) 933 | ) 934 | }xm', 935 | array(&$this, '_processDefListItems_callback_dd'), $list_str); 936 | 937 | return $list_str; 938 | } 939 | function _processDefListItems_callback_dt($matches) { 940 | $terms = explode("\n", trim($matches[1])); 941 | $text = ''; 942 | foreach ($terms as $term) { 943 | $term = $this->runSpanGamut(trim($term)); 944 | $text .= "\n
" . $term . "
"; 945 | } 946 | return $text . "\n"; 947 | } 948 | function _processDefListItems_callback_dd($matches) { 949 | $leading_line = $matches[1]; 950 | $marker_space = $matches[2]; 951 | $def = $matches[3]; 952 | 953 | if ($leading_line || preg_match('/\n{2,}/', $def)) { 954 | # Replace marker with the appropriate whitespace indentation 955 | $def = str_repeat(' ', strlen($marker_space)) . $def; 956 | $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 957 | $def = "\n". $def ."\n"; 958 | } 959 | else { 960 | $def = rtrim($def); 961 | $def = $this->runSpanGamut($this->outdent($def)); 962 | } 963 | 964 | return "\n
" . $def . "
\n"; 965 | } 966 | 967 | 968 | function doFencedCodeBlocks($text) { 969 | # 970 | # Adding the fenced code block syntax to regular Markdown: 971 | # 972 | # ~~~ 973 | # Code block 974 | # ~~~ 975 | # 976 | $less_than_tab = $this->tab_width; 977 | 978 | $text = preg_replace_callback('{ 979 | (?:\n|\A) 980 | # 1: Opening marker 981 | ( 982 | ~{3,} # Marker: three tilde or more. 983 | ) 984 | [ ]* \n # Whitespace and newline following marker. 985 | 986 | # 2: Content 987 | ( 988 | (?> 989 | (?!\1 [ ]* \n) # Not a closing marker. 990 | .*\n+ 991 | )+ 992 | ) 993 | 994 | # Closing marker. 995 | \1 [ ]* \n 996 | }xm', 997 | array(&$this, '_doFencedCodeBlocks_callback'), $text); 998 | 999 | return $text; 1000 | } 1001 | function _doFencedCodeBlocks_callback($matches) { 1002 | $codeblock = $matches[2]; 1003 | $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1004 | $codeblock = preg_replace_callback('/^\n+/', 1005 | array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); 1006 | $codeblock = "
$codeblock
"; 1007 | return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1008 | } 1009 | function _doFencedCodeBlocks_newlines($matches) { 1010 | return str_repeat("empty_element_suffix", 1011 | strlen($matches[0])); 1012 | } 1013 | 1014 | 1015 | # 1016 | # Redefining emphasis markers so that emphasis by underscore does not 1017 | # work in the middle of a word. 1018 | # 1019 | var $em_relist = array( 1020 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? tags 1040 | # 1041 | # Strip leading and trailing lines: 1042 | $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1043 | 1044 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1045 | 1046 | # 1047 | # Wrap

tags and unhashify HTML blocks 1048 | # 1049 | foreach ($grafs as $key => $value) { 1050 | $value = trim($this->runSpanGamut($value)); 1051 | 1052 | # Check if this should be enclosed in a paragraph. 1053 | # Clean tag hashes & block tag hashes are left alone. 1054 | $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 1055 | 1056 | if ($is_p) { 1057 | $value = "

$value

"; 1058 | } 1059 | $grafs[$key] = $value; 1060 | } 1061 | 1062 | # Join grafs in one text, then unhash HTML tags. 1063 | $text = implode("\n\n", $grafs); 1064 | 1065 | # Finish by removing any tag hashes still present in $text. 1066 | $text = $this->unhash($text); 1067 | 1068 | return $text; 1069 | } 1070 | 1071 | 1072 | ### Footnotes 1073 | 1074 | function stripFootnotes($text) { 1075 | # 1076 | # Strips link definitions from text, stores the URLs and titles in 1077 | # hash references. 1078 | # 1079 | $less_than_tab = $this->tab_width - 1; 1080 | 1081 | # Link defs are in the form: [^id]: url "optional title" 1082 | $text = preg_replace_callback('{ 1083 | ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 1084 | [ ]* 1085 | \n? # maybe *one* newline 1086 | ( # text = $2 (no blank lines allowed) 1087 | (?: 1088 | .+ # actual text 1089 | | 1090 | \n # newlines but 1091 | (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 1092 | (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 1093 | # by non-indented content 1094 | )* 1095 | ) 1096 | }xm', 1097 | array(&$this, '_stripFootnotes_callback'), 1098 | $text); 1099 | return $text; 1100 | } 1101 | function _stripFootnotes_callback($matches) { 1102 | $note_id = $this->fn_id_prefix . $matches[1]; 1103 | $this->footnotes[$note_id] = $this->outdent($matches[2]); 1104 | return ''; # String that will replace the block 1105 | } 1106 | 1107 | 1108 | function doFootnotes($text) { 1109 | # 1110 | # Replace footnote references in $text [^id] with a special text-token 1111 | # which will be replaced by the actual footnote marker in appendFootnotes. 1112 | # 1113 | if (!$this->in_anchor) { 1114 | $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 1115 | } 1116 | return $text; 1117 | } 1118 | 1119 | 1120 | function appendFootnotes($text) { 1121 | # 1122 | # Append footnote list to text. 1123 | # 1124 | $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1125 | array(&$this, '_appendFootnotes_callback'), $text); 1126 | 1127 | if (!empty($this->footnotes_ordered)) { 1128 | $text .= "\n\n"; 1129 | $text .= "
\n"; 1130 | $text .= "empty_element_suffix ."\n"; 1131 | $text .= "
    \n\n"; 1132 | 1133 | $attr = " rev=\"footnote\""; 1134 | if ($this->fn_backlink_class != "") { 1135 | $class = $this->fn_backlink_class; 1136 | $class = $this->encodeAttribute($class); 1137 | $attr .= " class=\"$class\""; 1138 | } 1139 | if ($this->fn_backlink_title != "") { 1140 | $title = $this->fn_backlink_title; 1141 | $title = $this->encodeAttribute($title); 1142 | $attr .= " title=\"$title\""; 1143 | } 1144 | $num = 0; 1145 | 1146 | while (!empty($this->footnotes_ordered)) { 1147 | $footnote = reset($this->footnotes_ordered); 1148 | $note_id = key($this->footnotes_ordered); 1149 | unset($this->footnotes_ordered[$note_id]); 1150 | 1151 | $footnote .= "\n"; # Need to append newline before parsing. 1152 | $footnote = $this->runBlockGamut("$footnote\n"); 1153 | $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1154 | array(&$this, '_appendFootnotes_callback'), $footnote); 1155 | 1156 | $attr = str_replace("%%", ++$num, $attr); 1157 | $note_id = $this->encodeAttribute($note_id); 1158 | 1159 | # Add backlink to last paragraph; create new paragraph if needed. 1160 | $backlink = ""; 1161 | if (preg_match('{

    $}', $footnote)) { 1162 | $footnote = substr($footnote, 0, -4) . " $backlink

    "; 1163 | } else { 1164 | $footnote .= "\n\n

    $backlink

    "; 1165 | } 1166 | 1167 | $text .= "
  1. \n"; 1168 | $text .= $footnote . "\n"; 1169 | $text .= "
  2. \n\n"; 1170 | } 1171 | 1172 | $text .= "
\n"; 1173 | $text .= "
"; 1174 | } 1175 | return $text; 1176 | } 1177 | function _appendFootnotes_callback($matches) { 1178 | $node_id = $this->fn_id_prefix . $matches[1]; 1179 | 1180 | # Create footnote marker only if it has a corresponding footnote *and* 1181 | # the footnote hasn't been used by another marker. 1182 | if (isset($this->footnotes[$node_id])) { 1183 | # Transfert footnote content to the ordered list. 1184 | $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 1185 | unset($this->footnotes[$node_id]); 1186 | 1187 | $num = $this->footnote_counter++; 1188 | $attr = " rel=\"footnote\""; 1189 | if ($this->fn_link_class != "") { 1190 | $class = $this->fn_link_class; 1191 | $class = $this->encodeAttribute($class); 1192 | $attr .= " class=\"$class\""; 1193 | } 1194 | if ($this->fn_link_title != "") { 1195 | $title = $this->fn_link_title; 1196 | $title = $this->encodeAttribute($title); 1197 | $attr .= " title=\"$title\""; 1198 | } 1199 | 1200 | $attr = str_replace("%%", $num, $attr); 1201 | $node_id = $this->encodeAttribute($node_id); 1202 | 1203 | return 1204 | "". 1205 | "$num". 1206 | ""; 1207 | } 1208 | 1209 | return "[^".$matches[1]."]"; 1210 | } 1211 | 1212 | 1213 | ### Abbreviations ### 1214 | 1215 | function stripAbbreviations($text) { 1216 | # 1217 | # Strips abbreviations from text, stores titles in hash references. 1218 | # 1219 | $less_than_tab = $this->tab_width - 1; 1220 | 1221 | # Link defs are in the form: [id]*: url "optional title" 1222 | $text = preg_replace_callback('{ 1223 | ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 1224 | (.*) # text = $2 (no blank lines allowed) 1225 | }xm', 1226 | array(&$this, '_stripAbbreviations_callback'), 1227 | $text); 1228 | return $text; 1229 | } 1230 | function _stripAbbreviations_callback($matches) { 1231 | $abbr_word = $matches[1]; 1232 | $abbr_desc = $matches[2]; 1233 | if ($this->abbr_word_re) 1234 | $this->abbr_word_re .= '|'; 1235 | $this->abbr_word_re .= preg_quote($abbr_word); 1236 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1237 | return ''; # String that will replace the block 1238 | } 1239 | 1240 | 1241 | function doAbbreviations($text) { 1242 | # 1243 | # Find defined abbreviations in text and wrap them in elements. 1244 | # 1245 | if ($this->abbr_word_re) { 1246 | // cannot use the /x modifier because abbr_word_re may 1247 | // contain significant spaces: 1248 | $text = preg_replace_callback('{'. 1249 | '(?abbr_word_re.')'. 1251 | '(?![\w\x1A])'. 1252 | '}', 1253 | array(&$this, '_doAbbreviations_callback'), $text); 1254 | } 1255 | return $text; 1256 | } 1257 | function _doAbbreviations_callback($matches) { 1258 | $abbr = $matches[0]; 1259 | if (isset($this->abbr_desciptions[$abbr])) { 1260 | $desc = $this->abbr_desciptions[$abbr]; 1261 | if (empty($desc)) { 1262 | return $this->hashPart("$abbr"); 1263 | } else { 1264 | $desc = $this->encodeAttribute($desc); 1265 | return $this->hashPart("$abbr"); 1266 | } 1267 | } else { 1268 | return $matches[0]; 1269 | } 1270 | } 1271 | 1272 | } 1273 | 1274 | 1275 | /* 1276 | 1277 | PHP Markdown Extra 1278 | ================== 1279 | 1280 | Description 1281 | ----------- 1282 | 1283 | This is a PHP port of the original Markdown formatter written in Perl 1284 | by John Gruber. This special "Extra" version of PHP Markdown features 1285 | further enhancements to the syntax for making additional constructs 1286 | such as tables and definition list. 1287 | 1288 | Markdown is a text-to-HTML filter; it translates an easy-to-read / 1289 | easy-to-write structured text format into HTML. Markdown's text format 1290 | is most similar to that of plain text email, and supports features such 1291 | as headers, *emphasis*, code blocks, blockquotes, and links. 1292 | 1293 | Markdown's syntax is designed not as a generic markup language, but 1294 | specifically to serve as a front-end to (X)HTML. You can use span-level 1295 | HTML tags anywhere in a Markdown document, and you can use block level 1296 | HTML tags (like
and as well). 1297 | 1298 | For more information about Markdown's syntax, see: 1299 | 1300 | 1301 | 1302 | 1303 | Bugs 1304 | ---- 1305 | 1306 | To file bug reports please send email to: 1307 | 1308 | 1309 | 1310 | Please include with your report: (1) the example input; (2) the output you 1311 | expected; (3) the output Markdown actually produced. 1312 | 1313 | 1314 | Version History 1315 | --------------- 1316 | 1317 | See the readme file for detailed release notes for this version. 1318 | 1319 | 1320 | Copyright and License 1321 | --------------------- 1322 | 1323 | PHP Markdown & Extra 1324 | Copyright (c) 2004-2009 Michel Fortin 1325 | 1326 | All rights reserved. 1327 | 1328 | Based on Markdown 1329 | Copyright (c) 2003-2006 John Gruber 1330 | 1331 | All rights reserved. 1332 | 1333 | Redistribution and use in source and binary forms, with or without 1334 | modification, are permitted provided that the following conditions are 1335 | met: 1336 | 1337 | * Redistributions of source code must retain the above copyright notice, 1338 | this list of conditions and the following disclaimer. 1339 | 1340 | * Redistributions in binary form must reproduce the above copyright 1341 | notice, this list of conditions and the following disclaimer in the 1342 | documentation and/or other materials provided with the distribution. 1343 | 1344 | * Neither the name "Markdown" nor the names of its contributors may 1345 | be used to endorse or promote products derived from this software 1346 | without specific prior written permission. 1347 | 1348 | This software is provided by the copyright holders and contributors "as 1349 | is" and any express or implied warranties, including, but not limited 1350 | to, the implied warranties of merchantability and fitness for a 1351 | particular purpose are disclaimed. In no event shall the copyright owner 1352 | or contributors be liable for any direct, indirect, incidental, special, 1353 | exemplary, or consequential damages (including, but not limited to, 1354 | procurement of substitute goods or services; loss of use, data, or 1355 | profits; or business interruption) however caused and on any theory of 1356 | liability, whether in contract, strict liability, or tort (including 1357 | negligence or otherwise) arising in any way out of the use of this 1358 | software, even if advised of the possibility of such damage. 1359 | 1360 | */ 1361 | ?> -------------------------------------------------------------------------------- /src/dflydev/markdown/MarkdownParser.php: -------------------------------------------------------------------------------- 1 | "; 21 | 22 | /** 23 | * Default tab width for code blocks 24 | * @var integer 25 | */ 26 | const DEFAULT_TAB_WIDTH = 4; 27 | 28 | /** 29 | * Configuration key for changing the empty element suffix 30 | * @var string 31 | */ 32 | const CONFIG_EMPTY_ELEMENT_SUFFIX = 'config.emptyElementSuffix'; 33 | 34 | /** 35 | * Configuration key for changing the tab width for code blocks 36 | * @var string 37 | */ 38 | const CONFIG_TAB_WIDTH = 'config.tabWidth'; 39 | 40 | # Regex to match balanced [brackets]. 41 | # Needed to insert a maximum bracked depth while converting to PHP. 42 | var $nested_brackets_depth = 6; 43 | var $nested_brackets_re; 44 | 45 | var $nested_url_parenthesis_depth = 4; 46 | var $nested_url_parenthesis_re; 47 | 48 | # Table of hash values for escaped characters: 49 | var $escape_chars = '\`*_{}[]()>#+-.!'; 50 | var $escape_chars_re; 51 | 52 | # Change to ">" for HTML output. 53 | var $empty_element_suffix = self::DEFAULT_EMPTY_ELEMENT_SUFFIX; 54 | var $tab_width = self::DEFAULT_TAB_WIDTH; 55 | 56 | # Change to `true` to disallow markup or entities. 57 | var $no_markup = false; 58 | var $no_entities = false; 59 | 60 | # Predefined urls and titles for reference links and images. 61 | var $predef_urls = array(); 62 | var $predef_titles = array(); 63 | 64 | 65 | function __construct(array $configuration = null) 66 | { 67 | # 68 | # Constructor function. Initialize appropriate member variables. 69 | # 70 | $this->_initDetab(); 71 | $this->prepareItalicsAndBold(); 72 | 73 | $this->nested_brackets_re = 74 | str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 75 | str_repeat('\])*', $this->nested_brackets_depth); 76 | 77 | $this->nested_url_parenthesis_re = 78 | str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 79 | str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 80 | 81 | $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 82 | 83 | # Sort document, block, and span gamut in ascendent priority order. 84 | asort($this->document_gamut); 85 | asort($this->block_gamut); 86 | asort($this->span_gamut); 87 | if ($configuration) { 88 | foreach ($configuration as $key => $value) { 89 | $this->configureMarkdownParser($key, $value); 90 | } 91 | } 92 | } 93 | 94 | /** 95 | * Configure parser 96 | * @param string $key 97 | * @param mixed $value 98 | */ 99 | public function configureMarkdownParser($key, $value) 100 | { 101 | switch($key) { 102 | case self::CONFIG_TAB_WIDTH: 103 | $this->tab_width = $value; 104 | break; 105 | case self::CONFIG_EMPTY_ELEMENT_SUFFIX: 106 | $this->empty_element_suffix = $value; 107 | break; 108 | default: 109 | // TODO: Warning? 110 | break; 111 | } 112 | // 113 | } 114 | 115 | /** 116 | * (non-PHPdoc) 117 | * @see dflydev\markdown.IMarkdownParser::transformMarkdown() 118 | */ 119 | public function transformMarkdown($text) 120 | { 121 | return $this->transform($text); 122 | } 123 | 124 | 125 | # Internal hashes used during transformation. 126 | var $urls = array(); 127 | var $titles = array(); 128 | var $html_hashes = array(); 129 | 130 | # Status flag to avoid invalid nesting. 131 | var $in_anchor = false; 132 | 133 | 134 | function setup() { 135 | # 136 | # Called before the transformation process starts to setup parser 137 | # states. 138 | # 139 | # Clear global hashes. 140 | $this->urls = $this->predef_urls; 141 | $this->titles = $this->predef_titles; 142 | $this->html_hashes = array(); 143 | 144 | $in_anchor = false; 145 | } 146 | 147 | function teardown() { 148 | # 149 | # Called after the transformation process to clear any variable 150 | # which may be taking up memory unnecessarly. 151 | # 152 | $this->urls = array(); 153 | $this->titles = array(); 154 | $this->html_hashes = array(); 155 | } 156 | 157 | 158 | function transform($text) { 159 | # 160 | # Main function. Performs some preprocessing on the input text 161 | # and pass it through the document gamut. 162 | # 163 | $this->setup(); 164 | 165 | # Remove UTF-8 BOM and marker character in input, if present. 166 | $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 167 | 168 | # Standardize line endings: 169 | # DOS to Unix and Mac to Unix 170 | $text = preg_replace('{\r\n?}', "\n", $text); 171 | 172 | # Make sure $text ends with a couple of newlines: 173 | $text .= "\n\n"; 174 | 175 | # Convert all tabs to spaces. 176 | $text = $this->detab($text); 177 | 178 | # Turn block-level HTML blocks into hash entries 179 | $text = $this->hashHTMLBlocks($text); 180 | 181 | # Strip any lines consisting only of spaces and tabs. 182 | # This makes subsequent regexen easier to write, because we can 183 | # match consecutive blank lines with /\n+/ instead of something 184 | # contorted like /[ ]*\n+/ . 185 | $text = preg_replace('/^[ ]+$/m', '', $text); 186 | 187 | # Run document gamut methods. 188 | foreach ($this->document_gamut as $method => $priority) { 189 | $text = $this->$method($text); 190 | } 191 | 192 | $this->teardown(); 193 | 194 | return $text . "\n"; 195 | } 196 | 197 | var $document_gamut = array( 198 | # Strip link definitions, store in hashes. 199 | "stripLinkDefinitions" => 20, 200 | 201 | "runBasicBlockGamut" => 30, 202 | ); 203 | 204 | 205 | function stripLinkDefinitions($text) { 206 | # 207 | # Strips link definitions from text, stores the URLs and titles in 208 | # hash references. 209 | # 210 | $less_than_tab = $this->tab_width - 1; 211 | 212 | # Link defs are in the form: ^[id]: url "optional title" 213 | $text = preg_replace_callback('{ 214 | ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 215 | [ ]* 216 | \n? # maybe *one* newline 217 | [ ]* 218 | (?: 219 | <(.+?)> # url = $2 220 | | 221 | (\S+?) # url = $3 222 | ) 223 | [ ]* 224 | \n? # maybe one newline 225 | [ ]* 226 | (?: 227 | (?<=\s) # lookbehind for whitespace 228 | ["(] 229 | (.*?) # title = $4 230 | [")] 231 | [ ]* 232 | )? # title is optional 233 | (?:\n+|\Z) 234 | }xm', 235 | array(&$this, '_stripLinkDefinitions_callback'), 236 | $text); 237 | return $text; 238 | } 239 | function _stripLinkDefinitions_callback($matches) { 240 | $link_id = strtolower($matches[1]); 241 | $url = $matches[2] == '' ? $matches[3] : $matches[2]; 242 | $this->urls[$link_id] = $url; 243 | $this->titles[$link_id] =& $matches[4]; 244 | return ''; # String that will replace the block 245 | } 246 | 247 | 248 | function hashHTMLBlocks($text) { 249 | if ($this->no_markup) return $text; 250 | 251 | $less_than_tab = $this->tab_width - 1; 252 | 253 | # Hashify HTML blocks: 254 | # We only want to do this for block-level HTML tags, such as headers, 255 | # lists, and tables. That's because we still want to wrap

s around 256 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 257 | # phrase emphasis, and spans. The list of tags we're looking for is 258 | # hard-coded: 259 | # 260 | # * List "a" is made of tags which can be both inline or block-level. 261 | # These will be treated block-level when the start tag is alone on 262 | # its line, otherwise they're not matched here and will be taken as 263 | # inline later. 264 | # * List "b" is made of tags which are always block-level; 265 | # 266 | $block_tags_a_re = 'ins|del'; 267 | $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 268 | 'script|noscript|form|fieldset|iframe|math'; 269 | 270 | # Regular expression for the content of a block tag. 271 | $nested_tags_level = 4; 272 | $attr = ' 273 | (?> # optional tag attributes 274 | \s # starts with whitespace 275 | (?> 276 | [^>"/]+ # text outside quotes 277 | | 278 | /+(?!>) # slash not followed by ">" 279 | | 280 | "[^"]*" # text inside double quotes (tolerate ">") 281 | | 282 | \'[^\']*\' # text inside single quotes (tolerate ">") 283 | )* 284 | )? 285 | '; 286 | $content = 287 | str_repeat(' 288 | (?> 289 | [^<]+ # content without tag 290 | | 291 | <\2 # nested opening tag 292 | '.$attr.' # attributes 293 | (?> 294 | /> 295 | | 296 | >', $nested_tags_level). # end of opening tag 297 | '.*?'. # last level nested tag content 298 | str_repeat(' 299 | # closing nested tag 300 | ) 301 | | 302 | <(?!/\2\s*> # other tags with a different name 303 | ) 304 | )*', 305 | $nested_tags_level); 306 | $content2 = str_replace('\2', '\3', $content); 307 | 308 | # First, look for nested blocks, e.g.: 309 | #

310 | #
311 | # tags for inner block must be indented. 312 | #
313 | #
314 | # 315 | # The outermost tags must start at the left margin for this to match, and 316 | # the inner nested divs must be indented. 317 | # We need to do this before the next, more liberal match, because the next 318 | # match will start at the first `
` and stop at the first `
`. 319 | $text = preg_replace_callback('{(?> 320 | (?> 321 | (?<=\n\n) # Starting after a blank line 322 | | # or 323 | \A\n? # the beginning of the doc 324 | ) 325 | ( # save in $1 326 | 327 | # Match from `\n` to `\n`, handling nested tags 328 | # in between. 329 | 330 | [ ]{0,'.$less_than_tab.'} 331 | <('.$block_tags_b_re.')# start tag = $2 332 | '.$attr.'> # attributes followed by > and \n 333 | '.$content.' # content, support nesting 334 | # the matching end tag 335 | [ ]* # trailing spaces/tabs 336 | (?=\n+|\Z) # followed by a newline or end of document 337 | 338 | | # Special version for tags of group a. 339 | 340 | [ ]{0,'.$less_than_tab.'} 341 | <('.$block_tags_a_re.')# start tag = $3 342 | '.$attr.'>[ ]*\n # attributes followed by > 343 | '.$content2.' # content, support nesting 344 | # the matching end tag 345 | [ ]* # trailing spaces/tabs 346 | (?=\n+|\Z) # followed by a newline or end of document 347 | 348 | | # Special case just for
. It was easier to make a special 349 | # case than to make the other regex more complicated. 350 | 351 | [ ]{0,'.$less_than_tab.'} 352 | <(hr) # start tag = $2 353 | '.$attr.' # attributes 354 | /?> # the matching end tag 355 | [ ]* 356 | (?=\n{2,}|\Z) # followed by a blank line or end of document 357 | 358 | | # Special case for standalone HTML comments: 359 | 360 | [ ]{0,'.$less_than_tab.'} 361 | (?s: 362 | 363 | ) 364 | [ ]* 365 | (?=\n{2,}|\Z) # followed by a blank line or end of document 366 | 367 | | # PHP and ASP-style processor instructions ( 374 | ) 375 | [ ]* 376 | (?=\n{2,}|\Z) # followed by a blank line or end of document 377 | 378 | ) 379 | )}Sxmi', 380 | array(&$this, '_hashHTMLBlocks_callback'), 381 | $text); 382 | 383 | return $text; 384 | } 385 | function _hashHTMLBlocks_callback($matches) { 386 | $text = $matches[1]; 387 | $key = $this->hashBlock($text); 388 | return "\n\n$key\n\n"; 389 | } 390 | 391 | 392 | function hashPart($text, $boundary = 'X') { 393 | # 394 | # Called whenever a tag must be hashed when a function insert an atomic 395 | # element in the text stream. Passing $text to through this function gives 396 | # a unique text-token which will be reverted back when calling unhash. 397 | # 398 | # The $boundary argument specify what character should be used to surround 399 | # the token. By convension, "B" is used for block elements that needs not 400 | # to be wrapped into paragraph tags at the end, ":" is used for elements 401 | # that are word separators and "X" is used in the general case. 402 | # 403 | # Swap back any tag hash found in $text so we do not have to `unhash` 404 | # multiple times at the end. 405 | $text = $this->unhash($text); 406 | 407 | # Then hash the block. 408 | static $i = 0; 409 | $key = "$boundary\x1A" . ++$i . $boundary; 410 | $this->html_hashes[$key] = $text; 411 | return $key; # String that will replace the tag. 412 | } 413 | 414 | 415 | function hashBlock($text) { 416 | # 417 | # Shortcut function for hashPart with block-level boundaries. 418 | # 419 | return $this->hashPart($text, 'B'); 420 | } 421 | 422 | 423 | var $block_gamut = array( 424 | # 425 | # These are all the transformations that form block-level 426 | # tags like paragraphs, headers, and list items. 427 | # 428 | "doHeaders" => 10, 429 | "doHorizontalRules" => 20, 430 | 431 | "doLists" => 40, 432 | "doCodeBlocks" => 50, 433 | "doBlockQuotes" => 60, 434 | ); 435 | 436 | function runBlockGamut($text) { 437 | # 438 | # Run block gamut tranformations. 439 | # 440 | # We need to escape raw HTML in Markdown source before doing anything 441 | # else. This need to be done for each block, and not only at the 442 | # begining in the Markdown function since hashed blocks can be part of 443 | # list items and could have been indented. Indented blocks would have 444 | # been seen as a code block in a previous pass of hashHTMLBlocks. 445 | $text = $this->hashHTMLBlocks($text); 446 | 447 | return $this->runBasicBlockGamut($text); 448 | } 449 | 450 | function runBasicBlockGamut($text) { 451 | # 452 | # Run block gamut tranformations, without hashing HTML blocks. This is 453 | # useful when HTML blocks are known to be already hashed, like in the first 454 | # whole-document pass. 455 | # 456 | foreach ($this->block_gamut as $method => $priority) { 457 | $text = $this->$method($text); 458 | } 459 | 460 | # Finally form paragraph and restore hashed blocks. 461 | $text = $this->formParagraphs($text); 462 | 463 | return $text; 464 | } 465 | 466 | 467 | function doHorizontalRules($text) { 468 | # Do Horizontal Rules: 469 | return preg_replace( 470 | '{ 471 | ^[ ]{0,3} # Leading space 472 | ([-*_]) # $1: First marker 473 | (?> # Repeated marker group 474 | [ ]{0,2} # Zero, one, or two spaces. 475 | \1 # Marker character 476 | ){2,} # Group repeated at least twice 477 | [ ]* # Tailing spaces 478 | $ # End of line. 479 | }mx', 480 | "\n".$this->hashBlock("empty_element_suffix")."\n", 481 | $text); 482 | } 483 | 484 | 485 | var $span_gamut = array( 486 | # 487 | # These are all the transformations that occur *within* block-level 488 | # tags like paragraphs, headers, and list items. 489 | # 490 | # Process character escapes, code spans, and inline HTML 491 | # in one shot. 492 | "parseSpan" => -30, 493 | 494 | # Process anchor and image tags. Images must come first, 495 | # because ![foo][f] looks like an anchor. 496 | "doImages" => 10, 497 | "doAnchors" => 20, 498 | 499 | # Make links out of things like `` 500 | # Must come after doAnchors, because you can use < and > 501 | # delimiters in inline links like [this](). 502 | "doAutoLinks" => 30, 503 | "encodeAmpsAndAngles" => 40, 504 | 505 | "doItalicsAndBold" => 50, 506 | "doHardBreaks" => 60, 507 | ); 508 | 509 | function runSpanGamut($text) { 510 | # 511 | # Run span gamut tranformations. 512 | # 513 | foreach ($this->span_gamut as $method => $priority) { 514 | $text = $this->$method($text); 515 | } 516 | 517 | return $text; 518 | } 519 | 520 | 521 | function doHardBreaks($text) { 522 | # Do hard breaks: 523 | return preg_replace_callback('/ {2,}\n/', 524 | array(&$this, '_doHardBreaks_callback'), $text); 525 | } 526 | function _doHardBreaks_callback($matches) { 527 | return $this->hashPart("empty_element_suffix\n"); 528 | } 529 | 530 | 531 | function doAnchors($text) { 532 | # 533 | # Turn Markdown link shortcuts into XHTML tags. 534 | # 535 | if ($this->in_anchor) return $text; 536 | $this->in_anchor = true; 537 | 538 | # 539 | # First, handle reference-style links: [link text] [id] 540 | # 541 | $text = preg_replace_callback('{ 542 | ( # wrap whole match in $1 543 | \[ 544 | ('.$this->nested_brackets_re.') # link text = $2 545 | \] 546 | 547 | [ ]? # one optional space 548 | (?:\n[ ]*)? # one optional newline followed by spaces 549 | 550 | \[ 551 | (.*?) # id = $3 552 | \] 553 | ) 554 | }xs', 555 | array(&$this, '_doAnchors_reference_callback'), $text); 556 | 557 | # 558 | # Next, inline-style links: [link text](url "optional title") 559 | # 560 | $text = preg_replace_callback('{ 561 | ( # wrap whole match in $1 562 | \[ 563 | ('.$this->nested_brackets_re.') # link text = $2 564 | \] 565 | \( # literal paren 566 | [ \n]* 567 | (?: 568 | <(.+?)> # href = $3 569 | | 570 | ('.$this->nested_url_parenthesis_re.') # href = $4 571 | ) 572 | [ \n]* 573 | ( # $5 574 | ([\'"]) # quote char = $6 575 | (.*?) # Title = $7 576 | \6 # matching quote 577 | [ \n]* # ignore any spaces/tabs between closing quote and ) 578 | )? # title is optional 579 | \) 580 | ) 581 | }xs', 582 | array(&$this, '_doAnchors_inline_callback'), $text); 583 | 584 | # 585 | # Last, handle reference-style shortcuts: [link text] 586 | # These must come last in case you've also got [link text][1] 587 | # or [link text](/foo) 588 | # 589 | $text = preg_replace_callback('{ 590 | ( # wrap whole match in $1 591 | \[ 592 | ([^\[\]]+) # link text = $2; can\'t contain [ or ] 593 | \] 594 | ) 595 | }xs', 596 | array(&$this, '_doAnchors_reference_callback'), $text); 597 | 598 | $this->in_anchor = false; 599 | return $text; 600 | } 601 | function _doAnchors_reference_callback($matches) { 602 | $whole_match = $matches[1]; 603 | $link_text = $matches[2]; 604 | $link_id =& $matches[3]; 605 | 606 | if ($link_id == "") { 607 | # for shortcut links like [this][] or [this]. 608 | $link_id = $link_text; 609 | } 610 | 611 | # lower-case and turn embedded newlines into spaces 612 | $link_id = strtolower($link_id); 613 | $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 614 | 615 | if (isset($this->urls[$link_id])) { 616 | $url = $this->urls[$link_id]; 617 | $url = $this->encodeAttribute($url); 618 | 619 | $result = "titles[$link_id] ) ) { 621 | $title = $this->titles[$link_id]; 622 | $title = $this->encodeAttribute($title); 623 | $result .= " title=\"$title\""; 624 | } 625 | 626 | $link_text = $this->runSpanGamut($link_text); 627 | $result .= ">$link_text"; 628 | $result = $this->hashPart($result); 629 | } 630 | else { 631 | $result = $whole_match; 632 | } 633 | return $result; 634 | } 635 | function _doAnchors_inline_callback($matches) { 636 | $whole_match = $matches[1]; 637 | $link_text = $this->runSpanGamut($matches[2]); 638 | $url = $matches[3] == '' ? $matches[4] : $matches[3]; 639 | $title =& $matches[7]; 640 | 641 | $url = $this->encodeAttribute($url); 642 | 643 | $result = "encodeAttribute($title); 646 | $result .= " title=\"$title\""; 647 | } 648 | 649 | $link_text = $this->runSpanGamut($link_text); 650 | $result .= ">$link_text"; 651 | 652 | return $this->hashPart($result); 653 | } 654 | 655 | 656 | function doImages($text) { 657 | # 658 | # Turn Markdown image shortcuts into tags. 659 | # 660 | # 661 | # First, handle reference-style labeled images: ![alt text][id] 662 | # 663 | $text = preg_replace_callback('{ 664 | ( # wrap whole match in $1 665 | !\[ 666 | ('.$this->nested_brackets_re.') # alt text = $2 667 | \] 668 | 669 | [ ]? # one optional space 670 | (?:\n[ ]*)? # one optional newline followed by spaces 671 | 672 | \[ 673 | (.*?) # id = $3 674 | \] 675 | 676 | ) 677 | }xs', 678 | array(&$this, '_doImages_reference_callback'), $text); 679 | 680 | # 681 | # Next, handle inline images: ![alt text](url "optional title") 682 | # Don't forget: encode * and _ 683 | # 684 | $text = preg_replace_callback('{ 685 | ( # wrap whole match in $1 686 | !\[ 687 | ('.$this->nested_brackets_re.') # alt text = $2 688 | \] 689 | \s? # One optional whitespace character 690 | \( # literal paren 691 | [ \n]* 692 | (?: 693 | <(\S*)> # src url = $3 694 | | 695 | ('.$this->nested_url_parenthesis_re.') # src url = $4 696 | ) 697 | [ \n]* 698 | ( # $5 699 | ([\'"]) # quote char = $6 700 | (.*?) # title = $7 701 | \6 # matching quote 702 | [ \n]* 703 | )? # title is optional 704 | \) 705 | ) 706 | }xs', 707 | array(&$this, '_doImages_inline_callback'), $text); 708 | 709 | return $text; 710 | } 711 | function _doImages_reference_callback($matches) { 712 | $whole_match = $matches[1]; 713 | $alt_text = $matches[2]; 714 | $link_id = strtolower($matches[3]); 715 | 716 | if ($link_id == "") { 717 | $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 718 | } 719 | 720 | $alt_text = $this->encodeAttribute($alt_text); 721 | if (isset($this->urls[$link_id])) { 722 | $url = $this->encodeAttribute($this->urls[$link_id]); 723 | $result = "\"$alt_text\"";titles[$link_id])) { 725 | $title = $this->titles[$link_id]; 726 | $title = $this->encodeAttribute($title); 727 | $result .= " title=\"$title\""; 728 | } 729 | $result .= $this->empty_element_suffix; 730 | $result = $this->hashPart($result); 731 | } 732 | else { 733 | # If there's no such link ID, leave intact: 734 | $result = $whole_match; 735 | } 736 | 737 | return $result; 738 | } 739 | function _doImages_inline_callback($matches) { 740 | $whole_match = $matches[1]; 741 | $alt_text = $matches[2]; 742 | $url = $matches[3] == '' ? $matches[4] : $matches[3]; 743 | $title =& $matches[7]; 744 | 745 | $alt_text = $this->encodeAttribute($alt_text); 746 | $url = $this->encodeAttribute($url); 747 | $result = "\"$alt_text\"";encodeAttribute($title); 750 | $result .= " title=\"$title\""; # $title already quoted 751 | } 752 | $result .= $this->empty_element_suffix; 753 | 754 | return $this->hashPart($result); 755 | } 756 | 757 | 758 | function doHeaders($text) { 759 | # Setext-style headers: 760 | # Header 1 761 | # ======== 762 | # 763 | # Header 2 764 | # -------- 765 | # 766 | $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 767 | array(&$this, '_doHeaders_callback_setext'), $text); 768 | 769 | # atx-style headers: 770 | # # Header 1 771 | # ## Header 2 772 | # ## Header 2 with closing hashes ## 773 | # ... 774 | # ###### Header 6 775 | # 776 | $text = preg_replace_callback('{ 777 | ^(\#{1,6}) # $1 = string of #\'s 778 | [ ]* 779 | (.+?) # $2 = Header text 780 | [ ]* 781 | \#* # optional closing #\'s (not counted) 782 | \n+ 783 | }xm', 784 | array(&$this, '_doHeaders_callback_atx'), $text); 785 | 786 | return $text; 787 | } 788 | function _doHeaders_callback_setext($matches) { 789 | # Terrible hack to check we haven't found an empty list item. 790 | if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 791 | return $matches[0]; 792 | 793 | $level = $matches[2]{0} == '=' ? 1 : 2; 794 | $block = "".$this->runSpanGamut($matches[1]).""; 795 | return "\n" . $this->hashBlock($block) . "\n\n"; 796 | } 797 | function _doHeaders_callback_atx($matches) { 798 | $level = strlen($matches[1]); 799 | $block = "".$this->runSpanGamut($matches[2]).""; 800 | return "\n" . $this->hashBlock($block) . "\n\n"; 801 | } 802 | 803 | 804 | function doLists($text) { 805 | # 806 | # Form HTML ordered (numbered) and unordered (bulleted) lists. 807 | # 808 | $less_than_tab = $this->tab_width - 1; 809 | 810 | # Re-usable patterns to match list item bullets and number markers: 811 | $marker_ul_re = '[*+-]'; 812 | $marker_ol_re = '\d+[\.]'; 813 | $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 814 | 815 | $markers_relist = array( 816 | $marker_ul_re => $marker_ol_re, 817 | $marker_ol_re => $marker_ul_re, 818 | ); 819 | 820 | foreach ($markers_relist as $marker_re => $other_marker_re) { 821 | # Re-usable pattern to match any entirel ul or ol list: 822 | $whole_list_re = ' 823 | ( # $1 = whole list 824 | ( # $2 825 | ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 826 | ('.$marker_re.') # $4 = first list item marker 827 | [ ]+ 828 | ) 829 | (?s:.+?) 830 | ( # $5 831 | \z 832 | | 833 | \n{2,} 834 | (?=\S) 835 | (?! # Negative lookahead for another list item marker 836 | [ ]* 837 | '.$marker_re.'[ ]+ 838 | ) 839 | | 840 | (?= # Lookahead for another kind of list 841 | \n 842 | \3 # Must have the same indentation 843 | '.$other_marker_re.'[ ]+ 844 | ) 845 | ) 846 | ) 847 | '; // mx 848 | 849 | # We use a different prefix before nested lists than top-level lists. 850 | # See extended comment in _ProcessListItems(). 851 | 852 | if ($this->list_level) { 853 | $text = preg_replace_callback('{ 854 | ^ 855 | '.$whole_list_re.' 856 | }mx', 857 | array(&$this, '_doLists_callback'), $text); 858 | } 859 | else { 860 | $text = preg_replace_callback('{ 861 | (?:(?<=\n)\n|\A\n?) # Must eat the newline 862 | '.$whole_list_re.' 863 | }mx', 864 | array(&$this, '_doLists_callback'), $text); 865 | } 866 | } 867 | 868 | return $text; 869 | } 870 | function _doLists_callback($matches) { 871 | # Re-usable patterns to match list item bullets and number markers: 872 | $marker_ul_re = '[*+-]'; 873 | $marker_ol_re = '\d+[\.]'; 874 | $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 875 | 876 | $list = $matches[1]; 877 | $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 878 | 879 | $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 880 | 881 | $list .= "\n"; 882 | $result = $this->processListItems($list, $marker_any_re); 883 | 884 | $result = $this->hashBlock("<$list_type>\n" . $result . ""); 885 | return "\n". $result ."\n\n"; 886 | } 887 | 888 | var $list_level = 0; 889 | 890 | function processListItems($list_str, $marker_any_re) { 891 | # 892 | # Process the contents of a single ordered or unordered list, splitting it 893 | # into individual list items. 894 | # 895 | # The $this->list_level global keeps track of when we're inside a list. 896 | # Each time we enter a list, we increment it; when we leave a list, 897 | # we decrement. If it's zero, we're not in a list anymore. 898 | # 899 | # We do this because when we're not inside a list, we want to treat 900 | # something like this: 901 | # 902 | # I recommend upgrading to version 903 | # 8. Oops, now this line is treated 904 | # as a sub-list. 905 | # 906 | # As a single paragraph, despite the fact that the second line starts 907 | # with a digit-period-space sequence. 908 | # 909 | # Whereas when we're inside a list (or sub-list), that line will be 910 | # treated as the start of a sub-list. What a kludge, huh? This is 911 | # an aspect of Markdown's syntax that's hard to parse perfectly 912 | # without resorting to mind-reading. Perhaps the solution is to 913 | # change the syntax rules such that sub-lists must start with a 914 | # starting cardinal number; e.g. "1." or "a.". 915 | 916 | $this->list_level++; 917 | 918 | # trim trailing blank lines: 919 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 920 | 921 | $list_str = preg_replace_callback('{ 922 | (\n)? # leading line = $1 923 | (^[ ]*) # leading whitespace = $2 924 | ('.$marker_any_re.' # list marker and space = $3 925 | (?:[ ]+|(?=\n)) # space only required if item is not empty 926 | ) 927 | ((?s:.*?)) # list item text = $4 928 | (?:(\n+(?=\n))|\n) # tailing blank line = $5 929 | (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 930 | }xm', 931 | array(&$this, '_processListItems_callback'), $list_str); 932 | 933 | $this->list_level--; 934 | return $list_str; 935 | } 936 | function _processListItems_callback($matches) { 937 | $item = $matches[4]; 938 | $leading_line =& $matches[1]; 939 | $leading_space =& $matches[2]; 940 | $marker_space = $matches[3]; 941 | $tailing_blank_line =& $matches[5]; 942 | 943 | if ($leading_line || $tailing_blank_line || 944 | preg_match('/\n{2,}/', $item)) 945 | { 946 | # Replace marker with the appropriate whitespace indentation 947 | $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 948 | $item = $this->runBlockGamut($this->outdent($item)."\n"); 949 | } 950 | else { 951 | # Recursion for sub-lists: 952 | $item = $this->doLists($this->outdent($item)); 953 | $item = preg_replace('/\n+$/', '', $item); 954 | $item = $this->runSpanGamut($item); 955 | } 956 | 957 | return "
  • " . $item . "
  • \n"; 958 | } 959 | 960 | 961 | function doCodeBlocks($text) { 962 | # 963 | # Process Markdown `
    ` blocks.
     964 | 	#
     965 | 		$text = preg_replace_callback('{
     966 | 				(?:\n\n|\A\n?)
     967 | 				(	            # $1 = the code block -- one or more lines, starting with a space/tab
     968 | 				  (?>
     969 | 					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
     970 | 					.*\n+
     971 | 				  )+
     972 | 				)
     973 | 				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
     974 | 			}xm',
     975 | 			array(&$this, '_doCodeBlocks_callback'), $text);
     976 | 
     977 | 		return $text;
     978 | 	}
     979 | 	function _doCodeBlocks_callback($matches) {
     980 | 		$codeblock = $matches[1];
     981 | 
     982 | 		$codeblock = $this->outdent($codeblock);
     983 | 		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
     984 | 
     985 | 		# trim leading newlines and trailing newlines
     986 | 		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
     987 | 
     988 | 		$codeblock = "
    $codeblock\n
    "; 989 | return "\n\n".$this->hashBlock($codeblock)."\n\n"; 990 | } 991 | 992 | 993 | function makeCodeSpan($code) { 994 | # 995 | # Create a code span markup for $code. Called from handleSpanToken. 996 | # 997 | $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 998 | return $this->hashPart("$code"); 999 | } 1000 | 1001 | 1002 | var $em_relist = array( 1003 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(?em_relist as $em => $em_re) { 1025 | foreach ($this->strong_relist as $strong => $strong_re) { 1026 | # Construct list of allowed token expressions. 1027 | $token_relist = array(); 1028 | if (isset($this->em_strong_relist["$em$strong"])) { 1029 | $token_relist[] = $this->em_strong_relist["$em$strong"]; 1030 | } 1031 | $token_relist[] = $em_re; 1032 | $token_relist[] = $strong_re; 1033 | 1034 | # Construct master expression from list. 1035 | $token_re = '{('. implode('|', $token_relist) .')}'; 1036 | $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1037 | } 1038 | } 1039 | } 1040 | 1041 | function doItalicsAndBold($text) { 1042 | $token_stack = array(''); 1043 | $text_stack = array(''); 1044 | $em = ''; 1045 | $strong = ''; 1046 | $tree_char_em = false; 1047 | 1048 | while (1) { 1049 | # 1050 | # Get prepared regular expression for seraching emphasis tokens 1051 | # in current context. 1052 | # 1053 | $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1054 | 1055 | # 1056 | # Each loop iteration search for the next emphasis token. 1057 | # Each token is then passed to handleSpanToken. 1058 | # 1059 | $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1060 | $text_stack[0] .= $parts[0]; 1061 | $token =& $parts[1]; 1062 | $text =& $parts[2]; 1063 | 1064 | if (empty($token)) { 1065 | # Reached end of text span: empty stack without emitting. 1066 | # any more emphasis. 1067 | while ($token_stack[0]) { 1068 | $text_stack[1] .= array_shift($token_stack); 1069 | $text_stack[0] .= array_shift($text_stack); 1070 | } 1071 | break; 1072 | } 1073 | 1074 | $token_len = strlen($token); 1075 | if ($tree_char_em) { 1076 | # Reached closing marker while inside a three-char emphasis. 1077 | if ($token_len == 3) { 1078 | # Three-char closing marker, close em and strong. 1079 | array_shift($token_stack); 1080 | $span = array_shift($text_stack); 1081 | $span = $this->runSpanGamut($span); 1082 | $span = "$span"; 1083 | $text_stack[0] .= $this->hashPart($span); 1084 | $em = ''; 1085 | $strong = ''; 1086 | } else { 1087 | # Other closing marker: close one em or strong and 1088 | # change current token state to match the other 1089 | $token_stack[0] = str_repeat($token{0}, 3-$token_len); 1090 | $tag = $token_len == 2 ? "strong" : "em"; 1091 | $span = $text_stack[0]; 1092 | $span = $this->runSpanGamut($span); 1093 | $span = "<$tag>$span"; 1094 | $text_stack[0] = $this->hashPart($span); 1095 | $$tag = ''; # $$tag stands for $em or $strong 1096 | } 1097 | $tree_char_em = false; 1098 | } else if ($token_len == 3) { 1099 | if ($em) { 1100 | # Reached closing marker for both em and strong. 1101 | # Closing strong marker: 1102 | for ($i = 0; $i < 2; ++$i) { 1103 | $shifted_token = array_shift($token_stack); 1104 | $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1105 | $span = array_shift($text_stack); 1106 | $span = $this->runSpanGamut($span); 1107 | $span = "<$tag>$span"; 1108 | $text_stack[0] .= $this->hashPart($span); 1109 | $$tag = ''; # $$tag stands for $em or $strong 1110 | } 1111 | } else { 1112 | # Reached opening three-char emphasis marker. Push on token 1113 | # stack; will be handled by the special condition above. 1114 | $em = $token{0}; 1115 | $strong = "$em$em"; 1116 | array_unshift($token_stack, $token); 1117 | array_unshift($text_stack, ''); 1118 | $tree_char_em = true; 1119 | } 1120 | } else if ($token_len == 2) { 1121 | if ($strong) { 1122 | # Unwind any dangling emphasis marker: 1123 | if (strlen($token_stack[0]) == 1) { 1124 | $text_stack[1] .= array_shift($token_stack); 1125 | $text_stack[0] .= array_shift($text_stack); 1126 | } 1127 | # Closing strong marker: 1128 | array_shift($token_stack); 1129 | $span = array_shift($text_stack); 1130 | $span = $this->runSpanGamut($span); 1131 | $span = "$span"; 1132 | $text_stack[0] .= $this->hashPart($span); 1133 | $strong = ''; 1134 | } else { 1135 | array_unshift($token_stack, $token); 1136 | array_unshift($text_stack, ''); 1137 | $strong = $token; 1138 | } 1139 | } else { 1140 | # Here $token_len == 1 1141 | if ($em) { 1142 | if (strlen($token_stack[0]) == 1) { 1143 | # Closing emphasis marker: 1144 | array_shift($token_stack); 1145 | $span = array_shift($text_stack); 1146 | $span = $this->runSpanGamut($span); 1147 | $span = "$span"; 1148 | $text_stack[0] .= $this->hashPart($span); 1149 | $em = ''; 1150 | } else { 1151 | $text_stack[0] .= $token; 1152 | } 1153 | } else { 1154 | array_unshift($token_stack, $token); 1155 | array_unshift($text_stack, ''); 1156 | $em = $token; 1157 | } 1158 | } 1159 | } 1160 | return $text_stack[0]; 1161 | } 1162 | 1163 | 1164 | function doBlockQuotes($text) { 1165 | $text = preg_replace_callback('/ 1166 | ( # Wrap whole match in $1 1167 | (?> 1168 | ^[ ]*>[ ]? # ">" at the start of a line 1169 | .+\n # rest of the first line 1170 | (.+\n)* # subsequent consecutive lines 1171 | \n* # blanks 1172 | )+ 1173 | ) 1174 | /xm', 1175 | array(&$this, '_doBlockQuotes_callback'), $text); 1176 | 1177 | return $text; 1178 | } 1179 | function _doBlockQuotes_callback($matches) { 1180 | $bq = $matches[1]; 1181 | # trim one level of quoting - trim whitespace-only lines 1182 | $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1183 | $bq = $this->runBlockGamut($bq); # recurse 1184 | 1185 | $bq = preg_replace('/^/m', " ", $bq); 1186 | # These leading spaces cause problem with
     content, 
    1187 | 		# so we need to fix that:
    1188 | 		$bq = preg_replace_callback('{(\s*
    .+?
    )}sx', 1189 | array(&$this, '_doBlockQuotes_callback2'), $bq); 1190 | 1191 | return "\n". $this->hashBlock("
    \n$bq\n
    ")."\n\n"; 1192 | } 1193 | function _doBlockQuotes_callback2($matches) { 1194 | $pre = $matches[1]; 1195 | $pre = preg_replace('/^ /m', '', $pre); 1196 | return $pre; 1197 | } 1198 | 1199 | 1200 | function formParagraphs($text) { 1201 | # 1202 | # Params: 1203 | # $text - string to process with html

    tags 1204 | # 1205 | # Strip leading and trailing lines: 1206 | $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1207 | 1208 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1209 | 1210 | # 1211 | # Wrap

    tags and unhashify HTML blocks 1212 | # 1213 | foreach ($grafs as $key => $value) { 1214 | if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1215 | # Is a paragraph. 1216 | $value = $this->runSpanGamut($value); 1217 | $value = preg_replace('/^([ ]*)/', "

    ", $value); 1218 | $value .= "

    "; 1219 | $grafs[$key] = $this->unhash($value); 1220 | } 1221 | else { 1222 | # Is a block. 1223 | # Modify elements of @grafs in-place... 1224 | $graf = $value; 1225 | $block = $this->html_hashes[$graf]; 1226 | $graf = $block; 1227 | // if (preg_match('{ 1228 | // \A 1229 | // ( # $1 =
    tag 1230 | //
    ]* 1232 | // \b 1233 | // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1234 | // 1 1235 | // \2 1236 | // [^>]* 1237 | // > 1238 | // ) 1239 | // ( # $3 = contents 1240 | // .* 1241 | // ) 1242 | // (
    ) # $4 = closing tag 1243 | // \z 1244 | // }xs', $block, $matches)) 1245 | // { 1246 | // list(, $div_open, , $div_content, $div_close) = $matches; 1247 | // 1248 | // # We can't call Markdown(), because that resets the hash; 1249 | // # that initialization code should be pulled into its own sub, though. 1250 | // $div_content = $this->hashHTMLBlocks($div_content); 1251 | // 1252 | // # Run document gamut methods on the content. 1253 | // foreach ($this->document_gamut as $method => $priority) { 1254 | // $div_content = $this->$method($div_content); 1255 | // } 1256 | // 1257 | // $div_open = preg_replace( 1258 | // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1259 | // 1260 | // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1261 | // } 1262 | $grafs[$key] = $graf; 1263 | } 1264 | } 1265 | 1266 | return implode("\n\n", $grafs); 1267 | } 1268 | 1269 | 1270 | function encodeAttribute($text) { 1271 | # 1272 | # Encode text for a double-quoted HTML attribute. This function 1273 | # is *not* suitable for attributes enclosed in single quotes. 1274 | # 1275 | $text = $this->encodeAmpsAndAngles($text); 1276 | $text = str_replace('"', '"', $text); 1277 | return $text; 1278 | } 1279 | 1280 | 1281 | function encodeAmpsAndAngles($text) { 1282 | # 1283 | # Smart processing for ampersands and angle brackets that need to 1284 | # be encoded. Valid character entities are left alone unless the 1285 | # no-entities mode is set. 1286 | # 1287 | if ($this->no_entities) { 1288 | $text = str_replace('&', '&', $text); 1289 | } else { 1290 | # Ampersand-encoding based entirely on Nat Irons's Amputator 1291 | # MT plugin: 1292 | $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1293 | '&', $text);; 1294 | } 1295 | # Encode remaining <'s 1296 | $text = str_replace('<', '<', $text); 1297 | 1298 | return $text; 1299 | } 1300 | 1301 | 1302 | function doAutoLinks($text) { 1303 | $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 1304 | array(&$this, '_doAutoLinks_url_callback'), $text); 1305 | 1306 | # Email addresses: 1307 | $text = preg_replace_callback('{ 1308 | < 1309 | (?:mailto:)? 1310 | ( 1311 | (?: 1312 | [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1313 | | 1314 | ".*?" 1315 | ) 1316 | \@ 1317 | (?: 1318 | [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1319 | | 1320 | \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1321 | ) 1322 | ) 1323 | > 1324 | }xi', 1325 | array(&$this, '_doAutoLinks_email_callback'), $text); 1326 | 1327 | return $text; 1328 | } 1329 | function _doAutoLinks_url_callback($matches) { 1330 | $url = $this->encodeAttribute($matches[1]); 1331 | $link = "$url"; 1332 | return $this->hashPart($link); 1333 | } 1334 | function _doAutoLinks_email_callback($matches) { 1335 | $address = $matches[1]; 1336 | $link = $this->encodeEmailAddress($address); 1337 | return $this->hashPart($link); 1338 | } 1339 | 1340 | 1341 | function encodeEmailAddress($addr) { 1342 | # 1343 | # Input: an email address, e.g. "foo@example.com" 1344 | # 1345 | # Output: the email address as a mailto link, with each character 1346 | # of the address encoded as either a decimal or hex entity, in 1347 | # the hopes of foiling most address harvesting spam bots. E.g.: 1348 | # 1349 | #

    foo@exampl 1352 | # e.com

    1353 | # 1354 | # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1355 | # With some optimizations by Milian Wolff. 1356 | # 1357 | $addr = "mailto:" . $addr; 1358 | $chars = preg_split('/(? $char) { 1362 | $ord = ord($char); 1363 | # Ignore non-ascii chars. 1364 | if ($ord < 128) { 1365 | $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1366 | # roughly 10% raw, 45% hex, 45% dec 1367 | # '@' *must* be encoded. I insist. 1368 | if ($r > 90 && $char != '@') /* do nothing */; 1369 | else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1370 | else $chars[$key] = '&#'.$ord.';'; 1371 | } 1372 | } 1373 | 1374 | $addr = implode('', $chars); 1375 | $text = implode('', array_slice($chars, 7)); # text without `mailto:` 1376 | $addr = "$text"; 1377 | 1378 | return $addr; 1379 | } 1380 | 1381 | 1382 | function parseSpan($str) { 1383 | # 1384 | # Take the string $str and parse it into tokens, hashing embeded HTML, 1385 | # escaped characters and handling code spans. 1386 | # 1387 | $output = ''; 1388 | 1389 | $span_re = '{ 1390 | ( 1391 | \\\\'.$this->escape_chars_re.' 1392 | | 1393 | (?no_markup ? '' : ' 1396 | | 1397 | # comment 1398 | | 1399 | <\?.*?\?> | <%.*?%> # processing instruction 1400 | | 1401 | <[/!$]?[-a-zA-Z0-9:_]+ # regular tags 1402 | (?> 1403 | \s 1404 | (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1405 | )? 1406 | > 1407 | ').' 1408 | ) 1409 | }xs'; 1410 | 1411 | while (1) { 1412 | # 1413 | # Each loop iteration seach for either the next tag, the next 1414 | # openning code span marker, or the next escaped character. 1415 | # Each token is then passed to handleSpanToken. 1416 | # 1417 | $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1418 | 1419 | # Create token from text preceding tag. 1420 | if ($parts[0] != "") { 1421 | $output .= $parts[0]; 1422 | } 1423 | 1424 | # Check if we reach the end. 1425 | if (isset($parts[1])) { 1426 | $output .= $this->handleSpanToken($parts[1], $parts[2]); 1427 | $str = $parts[2]; 1428 | } 1429 | else { 1430 | break; 1431 | } 1432 | } 1433 | 1434 | return $output; 1435 | } 1436 | 1437 | 1438 | function handleSpanToken($token, &$str) { 1439 | # 1440 | # Handle $token provided by parseSpan by determining its nature and 1441 | # returning the corresponding value that should replace it. 1442 | # 1443 | switch ($token{0}) { 1444 | case "\\": 1445 | return $this->hashPart("&#". ord($token{1}). ";"); 1446 | case "`": 1447 | # Search for end marker in remaining text. 1448 | if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1449 | $str, $matches)) 1450 | { 1451 | $str = $matches[2]; 1452 | $codespan = $this->makeCodeSpan($matches[1]); 1453 | return $this->hashPart($codespan); 1454 | } 1455 | return $token; // return as text since no ending marker found. 1456 | default: 1457 | return $this->hashPart($token); 1458 | } 1459 | } 1460 | 1461 | 1462 | function outdent($text) { 1463 | # 1464 | # Remove one level of line-leading tabs or spaces 1465 | # 1466 | return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1467 | } 1468 | 1469 | 1470 | # String length function for detab. `_initDetab` will create a function to 1471 | # hanlde UTF-8 if the default function does not exist. 1472 | var $utf8_strlen = 'mb_strlen'; 1473 | 1474 | function detab($text) { 1475 | # 1476 | # Replace tabs with the appropriate amount of space. 1477 | # 1478 | # For each line we separate the line in blocks delemited by 1479 | # tab characters. Then we reconstruct every line by adding the 1480 | # appropriate number of space between each blocks. 1481 | 1482 | $text = preg_replace_callback('/^.*\t.*$/m', 1483 | array(&$this, '_detab_callback'), $text); 1484 | 1485 | return $text; 1486 | } 1487 | function _detab_callback($matches) { 1488 | $line = $matches[0]; 1489 | $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1490 | 1491 | # Split in blocks. 1492 | $blocks = explode("\t", $line); 1493 | # Add each blocks to the line. 1494 | $line = $blocks[0]; 1495 | unset($blocks[0]); # Do not add first block twice. 1496 | foreach ($blocks as $block) { 1497 | # Calculate amount of space, insert spaces, insert block. 1498 | $amount = $this->tab_width - 1499 | $strlen($line, 'UTF-8') % $this->tab_width; 1500 | $line .= str_repeat(" ", $amount) . $block; 1501 | } 1502 | return $line; 1503 | } 1504 | function _initDetab() { 1505 | # 1506 | # Check for the availability of the function in the `utf8_strlen` property 1507 | # (initially `mb_strlen`). If the function is not available, create a 1508 | # function that will loosely count the number of UTF-8 characters with a 1509 | # regular expression. 1510 | # 1511 | if (function_exists($this->utf8_strlen)) return; 1512 | $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1513 | "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1514 | $text, $m);'); 1515 | } 1516 | 1517 | 1518 | function unhash($text) { 1519 | # 1520 | # Swap back in all the tags hashed by _HashHTMLBlocks. 1521 | # 1522 | return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1523 | array(&$this, '_unhash_callback'), $text); 1524 | } 1525 | function _unhash_callback($matches) { 1526 | return $this->html_hashes[$matches[0]]; 1527 | } 1528 | 1529 | } 1530 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | add('dflydev\\tests\\markdown', 'tests'); 13 | -------------------------------------------------------------------------------- /tests/dflydev/tests/markdown/MarkdownExtraParserTest.php: -------------------------------------------------------------------------------- 1 | createParser(); 41 | $html = $markdownParser->transformMarkdown('#Hello World'); 42 | $this->assertEquals("

    Hello World

    \n", $html, 'Simple H1 works'); 43 | } 44 | 45 | /** 46 | * Test tab width for code blocks 47 | */ 48 | public function testTabWidth() 49 | { 50 | $markdownParser = $this->createParser(); 51 | $html = $markdownParser->transformMarkdown(' Hello World'); 52 | $this->assertEquals("
    Hello World\n
    \n", $html, 'Default 4 space tab code block works'); 53 | $this->configureTabWidth($markdownParser, 6); 54 | $html = $markdownParser->transformMarkdown(' Hello World'); 55 | $this->assertEquals("

    Hello World

    \n", $html, 'Default 4 space tab code block not triggered when tab width set to 6'); 56 | $html = $markdownParser->transformMarkdown(' Hello World'); 57 | $this->assertEquals("
    Hello World\n
    \n", $html, 'Setting 6 space tab code block (via method) works'); 58 | $markdownParser = $this->createParser(array($this->configKeyTabWidth => 8)); 59 | $html = $markdownParser->transformMarkdown(' Hello World'); 60 | $this->assertEquals("
    Hello World\n
    \n", $html, 'Setting 8 space tab code block (via constructor) works'); 61 | } 62 | 63 | /** 64 | * Configure a Markdown parser for a specific tab width 65 | * @param \dflydev\markdown\MarkdownParser $markdownParser 66 | * @param integer $width 67 | */ 68 | protected function configureTabWidth(MarkdownParser $markdownParser, $width) 69 | { 70 | $markdownParser->configureMarkdownParser($this->configKeyTabWidth, $width); 71 | } 72 | 73 | } 74 | --------------------------------------------------------------------------------