├── cache └── .gitignore ├── php-markdown ├── Michelf │ ├── MarkdownInterface.inc.php │ ├── Markdown.inc.php │ ├── MarkdownExtra.inc.php │ ├── MarkdownInterface.php │ ├── MarkdownExtra.php │ └── Markdown.php ├── composer.json ├── Readme.php ├── License.md └── Readme.md ├── README.md └── issues.php /cache/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /php-markdown/Michelf/MarkdownInterface.inc.php: -------------------------------------------------------------------------------- 1 | =5.3.0" 22 | }, 23 | "autoload": { 24 | "psr-0": { "Michelf": "" } 25 | }, 26 | "extra": { 27 | "branch-alias": { 28 | "dev-lib": "1.4.x-dev" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /php-markdown/Readme.php: -------------------------------------------------------------------------------- 1 | 20 | 21 | 22 | 23 | PHP Markdown Lib - Readme 24 | 25 | 26 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /php-markdown/Michelf/MarkdownInterface.php: -------------------------------------------------------------------------------- 1 | 8 | # 9 | # Original Markdown 10 | # Copyright (c) 2004-2006 John Gruber 11 | # 12 | # 13 | namespace Michelf; 14 | 15 | 16 | # 17 | # Markdown Parser Interface 18 | # 19 | 20 | interface MarkdownInterface { 21 | 22 | # 23 | # Initialize the parser and return the result of its transform method. 24 | # This will work fine for derived classes too. 25 | # 26 | public static function defaultTransform($text); 27 | 28 | # 29 | # Main function. Performs some preprocessing on the input text 30 | # and pass it through the document gamut. 31 | # 32 | public function transform($text); 33 | 34 | } 35 | 36 | 37 | ?> -------------------------------------------------------------------------------- /php-markdown/Michelf/MarkdownExtra.php: -------------------------------------------------------------------------------- 1 | 8 | # 9 | # Original Markdown 10 | # Copyright (c) 2004-2006 John Gruber 11 | # 12 | # 13 | namespace Michelf; 14 | 15 | 16 | # Just force Michelf/Markdown.php to load. This is needed to load 17 | # the temporary implementation class. See below for details. 18 | \Michelf\Markdown::MARKDOWNLIB_VERSION; 19 | 20 | # 21 | # Markdown Extra Parser Class 22 | # 23 | # Note: Currently the implementation resides in the temporary class 24 | # \Michelf\MarkdownExtra_TmpImpl (in the same file as \Michelf\Markdown). 25 | # This makes it easier to propagate the changes between the three different 26 | # packaging styles of PHP Markdown. Once this issue is resolved, the 27 | # _MarkdownExtra_TmpImpl will disappear and this one will contain the code. 28 | # 29 | 30 | class MarkdownExtra extends \Michelf\_MarkdownExtra_TmpImpl { 31 | 32 | ### Parser Implementation ### 33 | 34 | # Temporarily, the implemenation is in the _MarkdownExtra_TmpImpl class. 35 | # See note above. 36 | 37 | } 38 | 39 | -------------------------------------------------------------------------------- /php-markdown/License.md: -------------------------------------------------------------------------------- 1 | PHP Markdown Lib 2 | Copyright (c) 2004-2013 Michel Fortin 3 | 4 | All rights reserved. 5 | 6 | Based on Markdown 7 | Copyright (c) 2003-2006 John Gruber 8 | 9 | All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are 13 | met: 14 | 15 | * Redistributions of source code must retain the above copyright notice, 16 | this list of conditions and the following disclaimer. 17 | 18 | * Redistributions in binary form must reproduce the above copyright 19 | notice, this list of conditions and the following disclaimer in the 20 | documentation and/or other materials provided with the distribution. 21 | 22 | * Neither the name "Markdown" nor the names of its contributors may 23 | be used to endorse or promote products derived from this software 24 | without specific prior written permission. 25 | 26 | This software is provided by the copyright holders and contributors "as 27 | is" and any express or implied warranties, including, but not limited 28 | to, the implied warranties of merchantability and fitness for a 29 | particular purpose are disclaimed. In no event shall the copyright owner 30 | or contributors be liable for any direct, indirect, incidental, special, 31 | exemplary, or consequential damages (including, but not limited to, 32 | procurement of substitute goods or services; loss of use, data, or 33 | profits; or business interruption) however caused and on any theory of 34 | liability, whether in contract, strict liability, or tort (including 35 | negligence or otherwise) arising in any way out of the use of this 36 | software, even if advised of the possibility of such damage. 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Github Issues MediaWiki Extension 2 | ================================= 3 | 4 | Adds a tag to render a list of Github issues inline. 5 | 6 | Usage 7 | ----- 8 | 9 | ```html 10 | 11 | ``` 12 | 13 | The base URL must be of the format `https://github.com/{user}/{repo}/issues`. Any of 14 | the filters on the [issues list API endpoint](http://developer.github.com/v3/issues/#list-issues-for-a-repository) 15 | are accepted in the query string. 16 | 17 | When the page is rendered, the titles and description of all referenced issues will 18 | be rendered in place of the tag. You can set the header level for the titles with 19 | the `header` attribute (the default is h3). For example: 20 | 21 | ```html 22 | 23 | ``` 24 | 25 | You can specify how long you would like to cache the list from Github by specifying the 26 | number of hours in the `cache` attribute. The default is 2 hours. 27 | 28 | ```html 29 | 30 | ``` 31 | 32 | 33 | 34 | License 35 | ------- 36 | 37 | Copyright 2013 by Aaron Parecki 38 | 39 | Licensed under the Apache License, Version 2.0 (the "License"); 40 | you may not use this file except in compliance with the License. 41 | You may obtain a copy of the License at 42 | 43 | http://www.apache.org/licenses/LICENSE-2.0 44 | 45 | Unless required by applicable law or agreed to in writing, software 46 | distributed under the License is distributed on an "AS IS" BASIS, 47 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 48 | See the License for the specific language governing permissions and 49 | limitations under the License. 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /issues.php: -------------------------------------------------------------------------------- 1 | 'Github Issues', 8 | 'author' => 'Aaron Parecki', 9 | 'description' => 'Adds tag to embed github issues in the wiki', 10 | 'url' => 'https://github.com/aaronpk/MediaWiki-Github-Issues' 11 | ); 12 | 13 | function wikiGithubIssues() { 14 | global $wgParser; 15 | $wgParser->setHook("githubissues", "embedGithubIssues"); 16 | } 17 | 18 | function embedGithubIssues($input, $args) { 19 | global $wgParser; 20 | 21 | $wgParser->disableCache(); 22 | 23 | ob_start(); 24 | 25 | if(!array_key_exists('src', $args)) { 26 | echo 'Error! Usage: '; 27 | } else { 28 | 29 | if(preg_match('/https:\/\/github.com\/([^\/]+)\/([^\/]+)\/issues(\?.+)?/', $args['src'], $match)) { 30 | $username = $match[1]; 31 | $repo = $match[2]; 32 | $query = $match[3]; 33 | 34 | $cacheHours = 2; 35 | if(array_key_exists('cache', $args)) { 36 | $cacheHours = $args['cache']; 37 | } 38 | 39 | $cacheFile = dirname(__FILE__).'/cache/'.md5(implode($args)) . '.html'; 40 | if(file_exists($cacheFile) && filemtime($cacheFile) >= (time() - (60*60*$cacheHours))) { 41 | echo file_get_contents($cacheFile); 42 | } else { 43 | $ch = curl_init(); 44 | curl_setopt($ch, CURLOPT_URL, 'https://api.github.com/repos/'.$username.'/'.$repo.'/issues'.$query); 45 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 46 | curl_setopt($ch, CURLOPT_USERAGENT, 'MediaWiki Github Issues Extension'); 47 | $response = curl_exec($ch); 48 | if($response) { 49 | $issues = json_decode($response); 50 | 51 | if($issues) { 52 | $headerTag = 'h3'; 53 | if(array_key_exists('header', $args)) { 54 | $headerTag = $args['header']; 55 | } 56 | 57 | $html = ''; 58 | foreach($issues as $issue) { 59 | $html .= '<'.$headerTag.'>' . $issue->title . ''; 60 | $body = MarkdownExtra::defaultTransform($issue->body); 61 | $html .= '

' . $body . '

'; 62 | if($issue->comments) { 63 | $html .= '

'; 64 | $html .= $issue->comments . ' comment' . ($issue->comments == 1 ? '' : 's'); 65 | $html .= '

'; 66 | } 67 | } 68 | 69 | file_put_contents($cacheFile, $html); 70 | echo $html; 71 | } else { 72 | echo '

Error retrieving content from GitHub. Malformed JSON was returned from the API.

'; 73 | echo '

URL is: ' . $args['src'] . '

'; 74 | } 75 | } else { 76 | echo 'Error retrieving content from GitHub'; 77 | echo '

URL is: ' . $args['src'] . '

'; 78 | } 79 | } 80 | 81 | } else { 82 | echo 'Error! src must be a URL like the following: https://github.com/aaronpk/p3k/issues?labels=priority%3Aitching'; 83 | } 84 | 85 | } 86 | 87 | return array(ob_get_clean(), 'noparse' => true, 'isHTML' => true); 88 | } -------------------------------------------------------------------------------- /php-markdown/Readme.md: -------------------------------------------------------------------------------- 1 | PHP Markdown 2 | ============ 3 | 4 | PHP Markdown Lib 1.4.0 - 29 Nov 2013 5 | 6 | by Michel Fortin 7 | 8 | 9 | based on Markdown by John Gruber 10 | 11 | 12 | 13 | Introduction 14 | ------------ 15 | 16 | This is a library package that includes the PHP Markdown parser and its 17 | sibling PHP Markdown Extra with additional features. 18 | 19 | Markdown is a text-to-HTML conversion tool for web writers. Markdown 20 | allows you to write using an easy-to-read, easy-to-write plain text 21 | format, then convert it to structurally valid XHTML (or HTML). 22 | 23 | "Markdown" is actually two things: a plain text markup syntax, and a 24 | software tool, originally written in Perl, that converts the plain text 25 | markup to HTML. PHP Markdown is a port to PHP of the original Markdown 26 | program by John Gruber. 27 | 28 | * [Full documentation of the Markdown syntax]() 29 | - Daring Fireball (John Gruber) 30 | * [Markdown Extra syntax additions]() 31 | - Michel Fortin 32 | 33 | 34 | Requirement 35 | ----------- 36 | 37 | This library package requires PHP 5.3 or later. 38 | 39 | Note: The older plugin/library hybrid package for PHP Markdown and 40 | PHP Markdown Extra is still maintained and will work with PHP 4.0.5 and later. 41 | 42 | Before PHP 5.3.7, pcre.backtrack_limit defaults to 100 000, which is too small 43 | in many situations. You might need to set it to higher values. Later PHP 44 | releases defaults to 1 000 000, which is usually fine. 45 | 46 | 47 | Usage 48 | ----- 49 | 50 | This library package is meant to be used with class autoloading. For autoloading 51 | to work, your project needs have setup a PSR-0-compatible autoloader. See the 52 | included Readme.php file for a minimal autoloader setup. (If you cannot use 53 | autoloading, see below.) 54 | 55 | With class autoloading in place, putting the 'Michelf' folder in your 56 | include path should be enough for this to work: 57 | 58 | use \Michelf\Markdown; 59 | $my_html = Markdown::defaultTransform($my_text); 60 | 61 | Markdown Extra syntax is also available the same way: 62 | 63 | use \Michelf\MarkdownExtra; 64 | $my_html = MarkdownExtra::defaultTransform($my_text); 65 | 66 | If you wish to use PHP Markdown with another text filter function 67 | built to parse HTML, you should filter the text *after* the `transform` 68 | function call. This is an example with [PHP SmartyPants][psp]: 69 | 70 | use \Michelf\Markdown, \Michelf\SmartyPants; 71 | $my_html = Markdown::defaultTransform($my_text); 72 | $my_html = SmartyPants::defaultTransform($my_html); 73 | 74 | All these examples are using the static `defaultTransform` static function 75 | found inside the parser class. If you want to customize the parser 76 | configuration, you can also instantiate it directly and change some 77 | configuration variables: 78 | 79 | use \Michelf\MarkdownExtra; 80 | $parser = new MarkdownExtra; 81 | $parser->fn_id_prefix = "post22-"; 82 | $my_html = $parser->transform($my_text); 83 | 84 | To learn more, see the full list of [configuration variables]. 85 | 86 | [configuration variables]: http://michelf.ca/projects/php-markdown/configuration/ 87 | 88 | 89 | ### Usage without an autoloader 90 | 91 | If you cannot use class autoloading, you can still use `include` or `require` 92 | to access the parser. To load the `\Michelf\Markdown` parser, do it this way: 93 | 94 | require_once 'Michelf/Markdown.inc.php'; 95 | 96 | Or, if you need the `\Michelf\MarkdownExtra` parser: 97 | 98 | require_once 'Michelf/MarkdownExtra.inc.php'; 99 | 100 | While the plain `.php` files depend on autoloading to work correctly, using the 101 | `.inc.php` files instead will eagerly load the dependencies that would be 102 | loaded on demand if you were using autoloading. 103 | 104 | 105 | Public API and Versioning Policy 106 | --------------------------------- 107 | 108 | Version numbers are of the form *major*.*minor*.*patch*. 109 | 110 | The public API of PHP Markdown consist of the two parser classes `Markdown` 111 | and `MarkdownExtra`, their constructors, the `transform` and `defaultTransform` 112 | functions and their configuration variables. The public API is stable for 113 | a given major version number. It might get additions when the minor version 114 | number increments. 115 | 116 | **Protected members are not considered public API.** This is unconventional 117 | and deserves an explanation. Incrementing the major version number every time 118 | the underlying implementation of something changes is going to give 119 | nonessential version numbers for the vast majority of people who just use the 120 | parser. Protected members are meant to create parser subclasses that behave in 121 | different ways. Very few people create parser subclasses. I don't want to 122 | discourage it by making everything private, but at the same time I can't 123 | guarantee any stable hook between versions if you use protected members. 124 | 125 | **Syntax changes** will increment the minor number for new features, and the 126 | patch number for small corrections. A *new feature* is something that needs a 127 | change in the syntax documentation. Note that since PHP Markdown Lib includes 128 | two parsers, a syntax change for either of them will increment the minor 129 | number. Also note that there is nothing perfectly backward-compatible with the 130 | Markdown syntax: all inputs are always valid, so new features always replace 131 | something that was previously legal, although generally nonsensical to do. 132 | 133 | 134 | Bugs 135 | ---- 136 | 137 | To file bug reports please send email to: 138 | 139 | 140 | Please include with your report: (1) the example input; (2) the output you 141 | expected; (3) the output PHP Markdown actually produced. 142 | 143 | If you have a problem where Markdown gives you an empty result, first check 144 | that the backtrack limit is not too low by running `php --info | grep pcre`. 145 | See Installation and Requirement above for details. 146 | 147 | 148 | Development and Testing 149 | ----------------------- 150 | 151 | Pull requests for fixing bugs are welcome. Proposed new features are 152 | going meticulously reviewed -- taking into account backward compatibility, 153 | potential side effects, and future extensibility -- before deciding on 154 | acceptance or rejection. 155 | 156 | If you make a pull request that includes changes to the parser please add 157 | tests for what is being changed to [MDTest][] and make a pull request there 158 | too. 159 | 160 | [MDTest]: https://github.com/michelf/mdtest/ 161 | 162 | 163 | Version History 164 | --------------- 165 | 166 | PHP Markdown Lib 1.4.0 (29 Nov 2013) 167 | 168 | * Added support for the `tel:` URL scheme in automatic links. 169 | 170 | 171 | 172 | It gets converted to this (note the `tel:` prefix becomes invisible): 173 | 174 | +1-111-111-1111 175 | 176 | * Added backtick fenced code blocks to MarkdownExtra, originally from 177 | Github-Flavored Markdown. 178 | 179 | * Added an interface called MarkdownInterface implemented by both 180 | the Markdown and MarkdownExtra parsers. You can use the interface if 181 | you want to create a mockup parser object for unit testing. 182 | 183 | * For those of you who cannot use class autoloading, you can now 184 | include `Michelf/Markdown.inc.php` or `Michelf/MarkdownExtra.inc.php` (note 185 | the `.inc.php` extension) to automatically include other files required 186 | by the parser. 187 | 188 | 189 | PHP Markdown Lib 1.3 (11 Apr 2013) 190 | 191 | This is the first release of PHP Markdown Lib. This package requires PHP 192 | version 5.3 or later and is designed to work with PSR-0 autoloading and, 193 | optionally with Composer. Here is a list of the changes since 194 | PHP Markdown Extra 1.2.6: 195 | 196 | * Plugin interface for WordPress and other systems is no longer present in 197 | the Lib package. The classic package is still available if you need it: 198 | 199 | 200 | * Added `public` and `protected` protection attributes, plus a section about 201 | what is "public API" and what isn't in the Readme file. 202 | 203 | * Changed HTML output for footnotes: now instead of adding `rel` and `rev` 204 | attributes, footnotes links have the class name `footnote-ref` and 205 | backlinks `footnote-backref`. 206 | 207 | * Fixed some regular expressions to make PCRE not shout warnings about POSIX 208 | collation classes (dependent on your version of PCRE). 209 | 210 | * Added optional class and id attributes to images and links using the same 211 | syntax as for headers: 212 | 213 | [link](url){#id .class} 214 | ![img](url){#id .class} 215 | 216 | It work too for reference-style links and images. In this case you need 217 | to put those attributes at the reference definition: 218 | 219 | [link][linkref] or [linkref] 220 | ![img][linkref] 221 | 222 | [linkref]: url "optional title" {#id .class} 223 | 224 | * Fixed a PHP notice message triggered when some table column separator 225 | markers are missing on the separator line below column headers. 226 | 227 | * Fixed a small mistake that could cause the parser to retain an invalid 228 | state related to parsing links across multiple runs. This was never 229 | observed (that I know of), but it's still worth fixing. 230 | 231 | 232 | Copyright and License 233 | --------------------- 234 | 235 | PHP Markdown Lib 236 | Copyright (c) 2004-2013 Michel Fortin 237 | 238 | All rights reserved. 239 | 240 | Based on Markdown 241 | Copyright (c) 2003-2005 John Gruber 242 | 243 | All rights reserved. 244 | 245 | Redistribution and use in source and binary forms, with or without 246 | modification, are permitted provided that the following conditions are 247 | met: 248 | 249 | * Redistributions of source code must retain the above copyright 250 | notice, this list of conditions and the following disclaimer. 251 | 252 | * Redistributions in binary form must reproduce the above copyright 253 | notice, this list of conditions and the following disclaimer in the 254 | documentation and/or other materials provided with the 255 | distribution. 256 | 257 | * Neither the name "Markdown" nor the names of its contributors may 258 | be used to endorse or promote products derived from this software 259 | without specific prior written permission. 260 | 261 | This software is provided by the copyright holders and contributors "as 262 | is" and any express or implied warranties, including, but not limited 263 | to, the implied warranties of merchantability and fitness for a 264 | particular purpose are disclaimed. In no event shall the copyright owner 265 | or contributors be liable for any direct, indirect, incidental, special, 266 | exemplary, or consequential damages (including, but not limited to, 267 | procurement of substitute goods or services; loss of use, data, or 268 | profits; or business interruption) however caused and on any theory of 269 | liability, whether in contract, strict liability, or tort (including 270 | negligence or otherwise) arising in any way out of the use of this 271 | software, even if advised of the possibility of such damage. 272 | -------------------------------------------------------------------------------- /php-markdown/Michelf/Markdown.php: -------------------------------------------------------------------------------- 1 | 8 | # 9 | # Original Markdown 10 | # Copyright (c) 2004-2006 John Gruber 11 | # 12 | # 13 | namespace Michelf; 14 | 15 | 16 | # 17 | # Markdown Parser Class 18 | # 19 | 20 | class Markdown implements MarkdownInterface { 21 | 22 | ### Version ### 23 | 24 | const MARKDOWNLIB_VERSION = "1.4.0"; 25 | 26 | ### Simple Function Interface ### 27 | 28 | public static function defaultTransform($text) { 29 | # 30 | # Initialize the parser and return the result of its transform method. 31 | # This will work fine for derived classes too. 32 | # 33 | # Take parser class on which this function was called. 34 | $parser_class = \get_called_class(); 35 | 36 | # try to take parser from the static parser list 37 | static $parser_list; 38 | $parser =& $parser_list[$parser_class]; 39 | 40 | # create the parser it not already set 41 | if (!$parser) 42 | $parser = new $parser_class; 43 | 44 | # Transform text using parser. 45 | return $parser->transform($text); 46 | } 47 | 48 | ### Configuration Variables ### 49 | 50 | # Change to ">" for HTML output. 51 | public $empty_element_suffix = " />"; 52 | public $tab_width = 4; 53 | 54 | # Change to `true` to disallow markup or entities. 55 | public $no_markup = false; 56 | public $no_entities = false; 57 | 58 | # Predefined urls and titles for reference links and images. 59 | public $predef_urls = array(); 60 | public $predef_titles = array(); 61 | 62 | 63 | ### Parser Implementation ### 64 | 65 | # Regex to match balanced [brackets]. 66 | # Needed to insert a maximum bracked depth while converting to PHP. 67 | protected $nested_brackets_depth = 6; 68 | protected $nested_brackets_re; 69 | 70 | protected $nested_url_parenthesis_depth = 4; 71 | protected $nested_url_parenthesis_re; 72 | 73 | # Table of hash values for escaped characters: 74 | protected $escape_chars = '\`*_{}[]()>#+-.!'; 75 | protected $escape_chars_re; 76 | 77 | 78 | public function __construct() { 79 | # 80 | # Constructor function. Initialize appropriate member variables. 81 | # 82 | $this->_initDetab(); 83 | $this->prepareItalicsAndBold(); 84 | 85 | $this->nested_brackets_re = 86 | str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 87 | str_repeat('\])*', $this->nested_brackets_depth); 88 | 89 | $this->nested_url_parenthesis_re = 90 | str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 91 | str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 92 | 93 | $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 94 | 95 | # Sort document, block, and span gamut in ascendent priority order. 96 | asort($this->document_gamut); 97 | asort($this->block_gamut); 98 | asort($this->span_gamut); 99 | } 100 | 101 | 102 | # Internal hashes used during transformation. 103 | protected $urls = array(); 104 | protected $titles = array(); 105 | protected $html_hashes = array(); 106 | 107 | # Status flag to avoid invalid nesting. 108 | protected $in_anchor = false; 109 | 110 | 111 | protected function setup() { 112 | # 113 | # Called before the transformation process starts to setup parser 114 | # states. 115 | # 116 | # Clear global hashes. 117 | $this->urls = $this->predef_urls; 118 | $this->titles = $this->predef_titles; 119 | $this->html_hashes = array(); 120 | 121 | $this->in_anchor = false; 122 | } 123 | 124 | protected function teardown() { 125 | # 126 | # Called after the transformation process to clear any variable 127 | # which may be taking up memory unnecessarly. 128 | # 129 | $this->urls = array(); 130 | $this->titles = array(); 131 | $this->html_hashes = array(); 132 | } 133 | 134 | 135 | public function transform($text) { 136 | # 137 | # Main function. Performs some preprocessing on the input text 138 | # and pass it through the document gamut. 139 | # 140 | $this->setup(); 141 | 142 | # Remove UTF-8 BOM and marker character in input, if present. 143 | $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 144 | 145 | # Standardize line endings: 146 | # DOS to Unix and Mac to Unix 147 | $text = preg_replace('{\r\n?}', "\n", $text); 148 | 149 | # Make sure $text ends with a couple of newlines: 150 | $text .= "\n\n"; 151 | 152 | # Convert all tabs to spaces. 153 | $text = $this->detab($text); 154 | 155 | # Turn block-level HTML blocks into hash entries 156 | $text = $this->hashHTMLBlocks($text); 157 | 158 | # Strip any lines consisting only of spaces and tabs. 159 | # This makes subsequent regexen easier to write, because we can 160 | # match consecutive blank lines with /\n+/ instead of something 161 | # contorted like /[ ]*\n+/ . 162 | $text = preg_replace('/^[ ]+$/m', '', $text); 163 | 164 | # Run document gamut methods. 165 | foreach ($this->document_gamut as $method => $priority) { 166 | $text = $this->$method($text); 167 | } 168 | 169 | $this->teardown(); 170 | 171 | return $text . "\n"; 172 | } 173 | 174 | protected $document_gamut = array( 175 | # Strip link definitions, store in hashes. 176 | "stripLinkDefinitions" => 20, 177 | 178 | "runBasicBlockGamut" => 30, 179 | ); 180 | 181 | 182 | protected function stripLinkDefinitions($text) { 183 | # 184 | # Strips link definitions from text, stores the URLs and titles in 185 | # hash references. 186 | # 187 | $less_than_tab = $this->tab_width - 1; 188 | 189 | # Link defs are in the form: ^[id]: url "optional title" 190 | $text = preg_replace_callback('{ 191 | ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 192 | [ ]* 193 | \n? # maybe *one* newline 194 | [ ]* 195 | (?: 196 | <(.+?)> # url = $2 197 | | 198 | (\S+?) # url = $3 199 | ) 200 | [ ]* 201 | \n? # maybe one newline 202 | [ ]* 203 | (?: 204 | (?<=\s) # lookbehind for whitespace 205 | ["(] 206 | (.*?) # title = $4 207 | [")] 208 | [ ]* 209 | )? # title is optional 210 | (?:\n+|\Z) 211 | }xm', 212 | array(&$this, '_stripLinkDefinitions_callback'), 213 | $text); 214 | return $text; 215 | } 216 | protected function _stripLinkDefinitions_callback($matches) { 217 | $link_id = strtolower($matches[1]); 218 | $url = $matches[2] == '' ? $matches[3] : $matches[2]; 219 | $this->urls[$link_id] = $url; 220 | $this->titles[$link_id] =& $matches[4]; 221 | return ''; # String that will replace the block 222 | } 223 | 224 | 225 | protected function hashHTMLBlocks($text) { 226 | if ($this->no_markup) return $text; 227 | 228 | $less_than_tab = $this->tab_width - 1; 229 | 230 | # Hashify HTML blocks: 231 | # We only want to do this for block-level HTML tags, such as headers, 232 | # lists, and tables. That's because we still want to wrap

s around 233 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 234 | # phrase emphasis, and spans. The list of tags we're looking for is 235 | # hard-coded: 236 | # 237 | # * List "a" is made of tags which can be both inline or block-level. 238 | # These will be treated block-level when the start tag is alone on 239 | # its line, otherwise they're not matched here and will be taken as 240 | # inline later. 241 | # * List "b" is made of tags which are always block-level; 242 | # 243 | $block_tags_a_re = 'ins|del'; 244 | $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 245 | 'script|noscript|form|fieldset|iframe|math|svg|'. 246 | 'article|section|nav|aside|hgroup|header|footer|'. 247 | 'figure'; 248 | 249 | # Regular expression for the content of a block tag. 250 | $nested_tags_level = 4; 251 | $attr = ' 252 | (?> # optional tag attributes 253 | \s # starts with whitespace 254 | (?> 255 | [^>"/]+ # text outside quotes 256 | | 257 | /+(?!>) # slash not followed by ">" 258 | | 259 | "[^"]*" # text inside double quotes (tolerate ">") 260 | | 261 | \'[^\']*\' # text inside single quotes (tolerate ">") 262 | )* 263 | )? 264 | '; 265 | $content = 266 | str_repeat(' 267 | (?> 268 | [^<]+ # content without tag 269 | | 270 | <\2 # nested opening tag 271 | '.$attr.' # attributes 272 | (?> 273 | /> 274 | | 275 | >', $nested_tags_level). # end of opening tag 276 | '.*?'. # last level nested tag content 277 | str_repeat(' 278 | # closing nested tag 279 | ) 280 | | 281 | <(?!/\2\s*> # other tags with a different name 282 | ) 283 | )*', 284 | $nested_tags_level); 285 | $content2 = str_replace('\2', '\3', $content); 286 | 287 | # First, look for nested blocks, e.g.: 288 | #

289 | #
290 | # tags for inner block must be indented. 291 | #
292 | #
293 | # 294 | # The outermost tags must start at the left margin for this to match, and 295 | # the inner nested divs must be indented. 296 | # We need to do this before the next, more liberal match, because the next 297 | # match will start at the first `
` and stop at the first `
`. 298 | $text = preg_replace_callback('{(?> 299 | (?> 300 | (?<=\n\n) # Starting after a blank line 301 | | # or 302 | \A\n? # the beginning of the doc 303 | ) 304 | ( # save in $1 305 | 306 | # Match from `\n` to `\n`, handling nested tags 307 | # in between. 308 | 309 | [ ]{0,'.$less_than_tab.'} 310 | <('.$block_tags_b_re.')# start tag = $2 311 | '.$attr.'> # attributes followed by > and \n 312 | '.$content.' # content, support nesting 313 | # the matching end tag 314 | [ ]* # trailing spaces/tabs 315 | (?=\n+|\Z) # followed by a newline or end of document 316 | 317 | | # Special version for tags of group a. 318 | 319 | [ ]{0,'.$less_than_tab.'} 320 | <('.$block_tags_a_re.')# start tag = $3 321 | '.$attr.'>[ ]*\n # attributes followed by > 322 | '.$content2.' # content, support nesting 323 | # the matching end tag 324 | [ ]* # trailing spaces/tabs 325 | (?=\n+|\Z) # followed by a newline or end of document 326 | 327 | | # Special case just for
. It was easier to make a special 328 | # case than to make the other regex more complicated. 329 | 330 | [ ]{0,'.$less_than_tab.'} 331 | <(hr) # start tag = $2 332 | '.$attr.' # attributes 333 | /?> # the matching end tag 334 | [ ]* 335 | (?=\n{2,}|\Z) # followed by a blank line or end of document 336 | 337 | | # Special case for standalone HTML comments: 338 | 339 | [ ]{0,'.$less_than_tab.'} 340 | (?s: 341 | 342 | ) 343 | [ ]* 344 | (?=\n{2,}|\Z) # followed by a blank line or end of document 345 | 346 | | # PHP and ASP-style processor instructions ( 353 | ) 354 | [ ]* 355 | (?=\n{2,}|\Z) # followed by a blank line or end of document 356 | 357 | ) 358 | )}Sxmi', 359 | array(&$this, '_hashHTMLBlocks_callback'), 360 | $text); 361 | 362 | return $text; 363 | } 364 | protected function _hashHTMLBlocks_callback($matches) { 365 | $text = $matches[1]; 366 | $key = $this->hashBlock($text); 367 | return "\n\n$key\n\n"; 368 | } 369 | 370 | 371 | protected function hashPart($text, $boundary = 'X') { 372 | # 373 | # Called whenever a tag must be hashed when a function insert an atomic 374 | # element in the text stream. Passing $text to through this function gives 375 | # a unique text-token which will be reverted back when calling unhash. 376 | # 377 | # The $boundary argument specify what character should be used to surround 378 | # the token. By convension, "B" is used for block elements that needs not 379 | # to be wrapped into paragraph tags at the end, ":" is used for elements 380 | # that are word separators and "X" is used in the general case. 381 | # 382 | # Swap back any tag hash found in $text so we do not have to `unhash` 383 | # multiple times at the end. 384 | $text = $this->unhash($text); 385 | 386 | # Then hash the block. 387 | static $i = 0; 388 | $key = "$boundary\x1A" . ++$i . $boundary; 389 | $this->html_hashes[$key] = $text; 390 | return $key; # String that will replace the tag. 391 | } 392 | 393 | 394 | protected function hashBlock($text) { 395 | # 396 | # Shortcut function for hashPart with block-level boundaries. 397 | # 398 | return $this->hashPart($text, 'B'); 399 | } 400 | 401 | 402 | protected $block_gamut = array( 403 | # 404 | # These are all the transformations that form block-level 405 | # tags like paragraphs, headers, and list items. 406 | # 407 | "doHeaders" => 10, 408 | "doHorizontalRules" => 20, 409 | 410 | "doLists" => 40, 411 | "doCodeBlocks" => 50, 412 | "doBlockQuotes" => 60, 413 | ); 414 | 415 | protected function runBlockGamut($text) { 416 | # 417 | # Run block gamut tranformations. 418 | # 419 | # We need to escape raw HTML in Markdown source before doing anything 420 | # else. This need to be done for each block, and not only at the 421 | # begining in the Markdown function since hashed blocks can be part of 422 | # list items and could have been indented. Indented blocks would have 423 | # been seen as a code block in a previous pass of hashHTMLBlocks. 424 | $text = $this->hashHTMLBlocks($text); 425 | 426 | return $this->runBasicBlockGamut($text); 427 | } 428 | 429 | protected function runBasicBlockGamut($text) { 430 | # 431 | # Run block gamut tranformations, without hashing HTML blocks. This is 432 | # useful when HTML blocks are known to be already hashed, like in the first 433 | # whole-document pass. 434 | # 435 | foreach ($this->block_gamut as $method => $priority) { 436 | $text = $this->$method($text); 437 | } 438 | 439 | # Finally form paragraph and restore hashed blocks. 440 | $text = $this->formParagraphs($text); 441 | 442 | return $text; 443 | } 444 | 445 | 446 | protected function doHorizontalRules($text) { 447 | # Do Horizontal Rules: 448 | return preg_replace( 449 | '{ 450 | ^[ ]{0,3} # Leading space 451 | ([-*_]) # $1: First marker 452 | (?> # Repeated marker group 453 | [ ]{0,2} # Zero, one, or two spaces. 454 | \1 # Marker character 455 | ){2,} # Group repeated at least twice 456 | [ ]* # Tailing spaces 457 | $ # End of line. 458 | }mx', 459 | "\n".$this->hashBlock("empty_element_suffix")."\n", 460 | $text); 461 | } 462 | 463 | 464 | protected $span_gamut = array( 465 | # 466 | # These are all the transformations that occur *within* block-level 467 | # tags like paragraphs, headers, and list items. 468 | # 469 | # Process character escapes, code spans, and inline HTML 470 | # in one shot. 471 | "parseSpan" => -30, 472 | 473 | # Process anchor and image tags. Images must come first, 474 | # because ![foo][f] looks like an anchor. 475 | "doImages" => 10, 476 | "doAnchors" => 20, 477 | 478 | # Make links out of things like `` 479 | # Must come after doAnchors, because you can use < and > 480 | # delimiters in inline links like [this](). 481 | "doAutoLinks" => 30, 482 | "encodeAmpsAndAngles" => 40, 483 | 484 | "doItalicsAndBold" => 50, 485 | "doHardBreaks" => 60, 486 | ); 487 | 488 | protected function runSpanGamut($text) { 489 | # 490 | # Run span gamut tranformations. 491 | # 492 | foreach ($this->span_gamut as $method => $priority) { 493 | $text = $this->$method($text); 494 | } 495 | 496 | return $text; 497 | } 498 | 499 | 500 | protected function doHardBreaks($text) { 501 | # Do hard breaks: 502 | return preg_replace_callback('/ {2,}\n/', 503 | array(&$this, '_doHardBreaks_callback'), $text); 504 | } 505 | protected function _doHardBreaks_callback($matches) { 506 | return $this->hashPart("empty_element_suffix\n"); 507 | } 508 | 509 | 510 | protected function doAnchors($text) { 511 | # 512 | # Turn Markdown link shortcuts into XHTML tags. 513 | # 514 | if ($this->in_anchor) return $text; 515 | $this->in_anchor = true; 516 | 517 | # 518 | # First, handle reference-style links: [link text] [id] 519 | # 520 | $text = preg_replace_callback('{ 521 | ( # wrap whole match in $1 522 | \[ 523 | ('.$this->nested_brackets_re.') # link text = $2 524 | \] 525 | 526 | [ ]? # one optional space 527 | (?:\n[ ]*)? # one optional newline followed by spaces 528 | 529 | \[ 530 | (.*?) # id = $3 531 | \] 532 | ) 533 | }xs', 534 | array(&$this, '_doAnchors_reference_callback'), $text); 535 | 536 | # 537 | # Next, inline-style links: [link text](url "optional title") 538 | # 539 | $text = preg_replace_callback('{ 540 | ( # wrap whole match in $1 541 | \[ 542 | ('.$this->nested_brackets_re.') # link text = $2 543 | \] 544 | \( # literal paren 545 | [ \n]* 546 | (?: 547 | <(.+?)> # href = $3 548 | | 549 | ('.$this->nested_url_parenthesis_re.') # href = $4 550 | ) 551 | [ \n]* 552 | ( # $5 553 | ([\'"]) # quote char = $6 554 | (.*?) # Title = $7 555 | \6 # matching quote 556 | [ \n]* # ignore any spaces/tabs between closing quote and ) 557 | )? # title is optional 558 | \) 559 | ) 560 | }xs', 561 | array(&$this, '_doAnchors_inline_callback'), $text); 562 | 563 | # 564 | # Last, handle reference-style shortcuts: [link text] 565 | # These must come last in case you've also got [link text][1] 566 | # or [link text](/foo) 567 | # 568 | $text = preg_replace_callback('{ 569 | ( # wrap whole match in $1 570 | \[ 571 | ([^\[\]]+) # link text = $2; can\'t contain [ or ] 572 | \] 573 | ) 574 | }xs', 575 | array(&$this, '_doAnchors_reference_callback'), $text); 576 | 577 | $this->in_anchor = false; 578 | return $text; 579 | } 580 | protected function _doAnchors_reference_callback($matches) { 581 | $whole_match = $matches[1]; 582 | $link_text = $matches[2]; 583 | $link_id =& $matches[3]; 584 | 585 | if ($link_id == "") { 586 | # for shortcut links like [this][] or [this]. 587 | $link_id = $link_text; 588 | } 589 | 590 | # lower-case and turn embedded newlines into spaces 591 | $link_id = strtolower($link_id); 592 | $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 593 | 594 | if (isset($this->urls[$link_id])) { 595 | $url = $this->urls[$link_id]; 596 | $url = $this->encodeAttribute($url); 597 | 598 | $result = "titles[$link_id] ) ) { 600 | $title = $this->titles[$link_id]; 601 | $title = $this->encodeAttribute($title); 602 | $result .= " title=\"$title\""; 603 | } 604 | 605 | $link_text = $this->runSpanGamut($link_text); 606 | $result .= ">$link_text"; 607 | $result = $this->hashPart($result); 608 | } 609 | else { 610 | $result = $whole_match; 611 | } 612 | return $result; 613 | } 614 | protected function _doAnchors_inline_callback($matches) { 615 | $whole_match = $matches[1]; 616 | $link_text = $this->runSpanGamut($matches[2]); 617 | $url = $matches[3] == '' ? $matches[4] : $matches[3]; 618 | $title =& $matches[7]; 619 | 620 | $url = $this->encodeAttribute($url); 621 | 622 | $result = "encodeAttribute($title); 625 | $result .= " title=\"$title\""; 626 | } 627 | 628 | $link_text = $this->runSpanGamut($link_text); 629 | $result .= ">$link_text"; 630 | 631 | return $this->hashPart($result); 632 | } 633 | 634 | 635 | protected function doImages($text) { 636 | # 637 | # Turn Markdown image shortcuts into tags. 638 | # 639 | # 640 | # First, handle reference-style labeled images: ![alt text][id] 641 | # 642 | $text = preg_replace_callback('{ 643 | ( # wrap whole match in $1 644 | !\[ 645 | ('.$this->nested_brackets_re.') # alt text = $2 646 | \] 647 | 648 | [ ]? # one optional space 649 | (?:\n[ ]*)? # one optional newline followed by spaces 650 | 651 | \[ 652 | (.*?) # id = $3 653 | \] 654 | 655 | ) 656 | }xs', 657 | array(&$this, '_doImages_reference_callback'), $text); 658 | 659 | # 660 | # Next, handle inline images: ![alt text](url "optional title") 661 | # Don't forget: encode * and _ 662 | # 663 | $text = preg_replace_callback('{ 664 | ( # wrap whole match in $1 665 | !\[ 666 | ('.$this->nested_brackets_re.') # alt text = $2 667 | \] 668 | \s? # One optional whitespace character 669 | \( # literal paren 670 | [ \n]* 671 | (?: 672 | <(\S*)> # src url = $3 673 | | 674 | ('.$this->nested_url_parenthesis_re.') # src url = $4 675 | ) 676 | [ \n]* 677 | ( # $5 678 | ([\'"]) # quote char = $6 679 | (.*?) # title = $7 680 | \6 # matching quote 681 | [ \n]* 682 | )? # title is optional 683 | \) 684 | ) 685 | }xs', 686 | array(&$this, '_doImages_inline_callback'), $text); 687 | 688 | return $text; 689 | } 690 | protected function _doImages_reference_callback($matches) { 691 | $whole_match = $matches[1]; 692 | $alt_text = $matches[2]; 693 | $link_id = strtolower($matches[3]); 694 | 695 | if ($link_id == "") { 696 | $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 697 | } 698 | 699 | $alt_text = $this->encodeAttribute($alt_text); 700 | if (isset($this->urls[$link_id])) { 701 | $url = $this->encodeAttribute($this->urls[$link_id]); 702 | $result = "\"$alt_text\"";titles[$link_id])) { 704 | $title = $this->titles[$link_id]; 705 | $title = $this->encodeAttribute($title); 706 | $result .= " title=\"$title\""; 707 | } 708 | $result .= $this->empty_element_suffix; 709 | $result = $this->hashPart($result); 710 | } 711 | else { 712 | # If there's no such link ID, leave intact: 713 | $result = $whole_match; 714 | } 715 | 716 | return $result; 717 | } 718 | protected function _doImages_inline_callback($matches) { 719 | $whole_match = $matches[1]; 720 | $alt_text = $matches[2]; 721 | $url = $matches[3] == '' ? $matches[4] : $matches[3]; 722 | $title =& $matches[7]; 723 | 724 | $alt_text = $this->encodeAttribute($alt_text); 725 | $url = $this->encodeAttribute($url); 726 | $result = "\"$alt_text\"";encodeAttribute($title); 729 | $result .= " title=\"$title\""; # $title already quoted 730 | } 731 | $result .= $this->empty_element_suffix; 732 | 733 | return $this->hashPart($result); 734 | } 735 | 736 | 737 | protected function doHeaders($text) { 738 | # Setext-style headers: 739 | # Header 1 740 | # ======== 741 | # 742 | # Header 2 743 | # -------- 744 | # 745 | $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 746 | array(&$this, '_doHeaders_callback_setext'), $text); 747 | 748 | # atx-style headers: 749 | # # Header 1 750 | # ## Header 2 751 | # ## Header 2 with closing hashes ## 752 | # ... 753 | # ###### Header 6 754 | # 755 | $text = preg_replace_callback('{ 756 | ^(\#{1,6}) # $1 = string of #\'s 757 | [ ]* 758 | (.+?) # $2 = Header text 759 | [ ]* 760 | \#* # optional closing #\'s (not counted) 761 | \n+ 762 | }xm', 763 | array(&$this, '_doHeaders_callback_atx'), $text); 764 | 765 | return $text; 766 | } 767 | protected function _doHeaders_callback_setext($matches) { 768 | # Terrible hack to check we haven't found an empty list item. 769 | if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 770 | return $matches[0]; 771 | 772 | $level = $matches[2]{0} == '=' ? 1 : 2; 773 | $block = "".$this->runSpanGamut($matches[1]).""; 774 | return "\n" . $this->hashBlock($block) . "\n\n"; 775 | } 776 | protected function _doHeaders_callback_atx($matches) { 777 | $level = strlen($matches[1]); 778 | $block = "".$this->runSpanGamut($matches[2]).""; 779 | return "\n" . $this->hashBlock($block) . "\n\n"; 780 | } 781 | 782 | 783 | protected function doLists($text) { 784 | # 785 | # Form HTML ordered (numbered) and unordered (bulleted) lists. 786 | # 787 | $less_than_tab = $this->tab_width - 1; 788 | 789 | # Re-usable patterns to match list item bullets and number markers: 790 | $marker_ul_re = '[*+-]'; 791 | $marker_ol_re = '\d+[\.]'; 792 | $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 793 | 794 | $markers_relist = array( 795 | $marker_ul_re => $marker_ol_re, 796 | $marker_ol_re => $marker_ul_re, 797 | ); 798 | 799 | foreach ($markers_relist as $marker_re => $other_marker_re) { 800 | # Re-usable pattern to match any entirel ul or ol list: 801 | $whole_list_re = ' 802 | ( # $1 = whole list 803 | ( # $2 804 | ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 805 | ('.$marker_re.') # $4 = first list item marker 806 | [ ]+ 807 | ) 808 | (?s:.+?) 809 | ( # $5 810 | \z 811 | | 812 | \n{2,} 813 | (?=\S) 814 | (?! # Negative lookahead for another list item marker 815 | [ ]* 816 | '.$marker_re.'[ ]+ 817 | ) 818 | | 819 | (?= # Lookahead for another kind of list 820 | \n 821 | \3 # Must have the same indentation 822 | '.$other_marker_re.'[ ]+ 823 | ) 824 | ) 825 | ) 826 | '; // mx 827 | 828 | # We use a different prefix before nested lists than top-level lists. 829 | # See extended comment in _ProcessListItems(). 830 | 831 | if ($this->list_level) { 832 | $text = preg_replace_callback('{ 833 | ^ 834 | '.$whole_list_re.' 835 | }mx', 836 | array(&$this, '_doLists_callback'), $text); 837 | } 838 | else { 839 | $text = preg_replace_callback('{ 840 | (?:(?<=\n)\n|\A\n?) # Must eat the newline 841 | '.$whole_list_re.' 842 | }mx', 843 | array(&$this, '_doLists_callback'), $text); 844 | } 845 | } 846 | 847 | return $text; 848 | } 849 | protected function _doLists_callback($matches) { 850 | # Re-usable patterns to match list item bullets and number markers: 851 | $marker_ul_re = '[*+-]'; 852 | $marker_ol_re = '\d+[\.]'; 853 | $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 854 | 855 | $list = $matches[1]; 856 | $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 857 | 858 | $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 859 | 860 | $list .= "\n"; 861 | $result = $this->processListItems($list, $marker_any_re); 862 | 863 | $result = $this->hashBlock("<$list_type>\n" . $result . ""); 864 | return "\n". $result ."\n\n"; 865 | } 866 | 867 | protected $list_level = 0; 868 | 869 | protected function processListItems($list_str, $marker_any_re) { 870 | # 871 | # Process the contents of a single ordered or unordered list, splitting it 872 | # into individual list items. 873 | # 874 | # The $this->list_level global keeps track of when we're inside a list. 875 | # Each time we enter a list, we increment it; when we leave a list, 876 | # we decrement. If it's zero, we're not in a list anymore. 877 | # 878 | # We do this because when we're not inside a list, we want to treat 879 | # something like this: 880 | # 881 | # I recommend upgrading to version 882 | # 8. Oops, now this line is treated 883 | # as a sub-list. 884 | # 885 | # As a single paragraph, despite the fact that the second line starts 886 | # with a digit-period-space sequence. 887 | # 888 | # Whereas when we're inside a list (or sub-list), that line will be 889 | # treated as the start of a sub-list. What a kludge, huh? This is 890 | # an aspect of Markdown's syntax that's hard to parse perfectly 891 | # without resorting to mind-reading. Perhaps the solution is to 892 | # change the syntax rules such that sub-lists must start with a 893 | # starting cardinal number; e.g. "1." or "a.". 894 | 895 | $this->list_level++; 896 | 897 | # trim trailing blank lines: 898 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 899 | 900 | $list_str = preg_replace_callback('{ 901 | (\n)? # leading line = $1 902 | (^[ ]*) # leading whitespace = $2 903 | ('.$marker_any_re.' # list marker and space = $3 904 | (?:[ ]+|(?=\n)) # space only required if item is not empty 905 | ) 906 | ((?s:.*?)) # list item text = $4 907 | (?:(\n+(?=\n))|\n) # tailing blank line = $5 908 | (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 909 | }xm', 910 | array(&$this, '_processListItems_callback'), $list_str); 911 | 912 | $this->list_level--; 913 | return $list_str; 914 | } 915 | protected function _processListItems_callback($matches) { 916 | $item = $matches[4]; 917 | $leading_line =& $matches[1]; 918 | $leading_space =& $matches[2]; 919 | $marker_space = $matches[3]; 920 | $tailing_blank_line =& $matches[5]; 921 | 922 | if ($leading_line || $tailing_blank_line || 923 | preg_match('/\n{2,}/', $item)) 924 | { 925 | # Replace marker with the appropriate whitespace indentation 926 | $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 927 | $item = $this->runBlockGamut($this->outdent($item)."\n"); 928 | } 929 | else { 930 | # Recursion for sub-lists: 931 | $item = $this->doLists($this->outdent($item)); 932 | $item = preg_replace('/\n+$/', '', $item); 933 | $item = $this->runSpanGamut($item); 934 | } 935 | 936 | return "
  • " . $item . "
  • \n"; 937 | } 938 | 939 | 940 | protected function doCodeBlocks($text) { 941 | # 942 | # Process Markdown `
    ` blocks.
     943 | 	#
     944 | 		$text = preg_replace_callback('{
     945 | 				(?:\n\n|\A\n?)
     946 | 				(	            # $1 = the code block -- one or more lines, starting with a space/tab
     947 | 				  (?>
     948 | 					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
     949 | 					.*\n+
     950 | 				  )+
     951 | 				)
     952 | 				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
     953 | 			}xm',
     954 | 			array(&$this, '_doCodeBlocks_callback'), $text);
     955 | 
     956 | 		return $text;
     957 | 	}
     958 | 	protected function _doCodeBlocks_callback($matches) {
     959 | 		$codeblock = $matches[1];
     960 | 
     961 | 		$codeblock = $this->outdent($codeblock);
     962 | 		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
     963 | 
     964 | 		# trim leading newlines and trailing newlines
     965 | 		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
     966 | 
     967 | 		$codeblock = "
    $codeblock\n
    "; 968 | return "\n\n".$this->hashBlock($codeblock)."\n\n"; 969 | } 970 | 971 | 972 | protected function makeCodeSpan($code) { 973 | # 974 | # Create a code span markup for $code. Called from handleSpanToken. 975 | # 976 | $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 977 | return $this->hashPart("$code"); 978 | } 979 | 980 | 981 | protected $em_relist = array( 982 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(?em_relist as $em => $em_re) { 1004 | foreach ($this->strong_relist as $strong => $strong_re) { 1005 | # Construct list of allowed token expressions. 1006 | $token_relist = array(); 1007 | if (isset($this->em_strong_relist["$em$strong"])) { 1008 | $token_relist[] = $this->em_strong_relist["$em$strong"]; 1009 | } 1010 | $token_relist[] = $em_re; 1011 | $token_relist[] = $strong_re; 1012 | 1013 | # Construct master expression from list. 1014 | $token_re = '{('. implode('|', $token_relist) .')}'; 1015 | $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1016 | } 1017 | } 1018 | } 1019 | 1020 | protected function doItalicsAndBold($text) { 1021 | $token_stack = array(''); 1022 | $text_stack = array(''); 1023 | $em = ''; 1024 | $strong = ''; 1025 | $tree_char_em = false; 1026 | 1027 | while (1) { 1028 | # 1029 | # Get prepared regular expression for seraching emphasis tokens 1030 | # in current context. 1031 | # 1032 | $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1033 | 1034 | # 1035 | # Each loop iteration search for the next emphasis token. 1036 | # Each token is then passed to handleSpanToken. 1037 | # 1038 | $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1039 | $text_stack[0] .= $parts[0]; 1040 | $token =& $parts[1]; 1041 | $text =& $parts[2]; 1042 | 1043 | if (empty($token)) { 1044 | # Reached end of text span: empty stack without emitting. 1045 | # any more emphasis. 1046 | while ($token_stack[0]) { 1047 | $text_stack[1] .= array_shift($token_stack); 1048 | $text_stack[0] .= array_shift($text_stack); 1049 | } 1050 | break; 1051 | } 1052 | 1053 | $token_len = strlen($token); 1054 | if ($tree_char_em) { 1055 | # Reached closing marker while inside a three-char emphasis. 1056 | if ($token_len == 3) { 1057 | # Three-char closing marker, close em and strong. 1058 | array_shift($token_stack); 1059 | $span = array_shift($text_stack); 1060 | $span = $this->runSpanGamut($span); 1061 | $span = "$span"; 1062 | $text_stack[0] .= $this->hashPart($span); 1063 | $em = ''; 1064 | $strong = ''; 1065 | } else { 1066 | # Other closing marker: close one em or strong and 1067 | # change current token state to match the other 1068 | $token_stack[0] = str_repeat($token{0}, 3-$token_len); 1069 | $tag = $token_len == 2 ? "strong" : "em"; 1070 | $span = $text_stack[0]; 1071 | $span = $this->runSpanGamut($span); 1072 | $span = "<$tag>$span"; 1073 | $text_stack[0] = $this->hashPart($span); 1074 | $$tag = ''; # $$tag stands for $em or $strong 1075 | } 1076 | $tree_char_em = false; 1077 | } else if ($token_len == 3) { 1078 | if ($em) { 1079 | # Reached closing marker for both em and strong. 1080 | # Closing strong marker: 1081 | for ($i = 0; $i < 2; ++$i) { 1082 | $shifted_token = array_shift($token_stack); 1083 | $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1084 | $span = array_shift($text_stack); 1085 | $span = $this->runSpanGamut($span); 1086 | $span = "<$tag>$span"; 1087 | $text_stack[0] .= $this->hashPart($span); 1088 | $$tag = ''; # $$tag stands for $em or $strong 1089 | } 1090 | } else { 1091 | # Reached opening three-char emphasis marker. Push on token 1092 | # stack; will be handled by the special condition above. 1093 | $em = $token{0}; 1094 | $strong = "$em$em"; 1095 | array_unshift($token_stack, $token); 1096 | array_unshift($text_stack, ''); 1097 | $tree_char_em = true; 1098 | } 1099 | } else if ($token_len == 2) { 1100 | if ($strong) { 1101 | # Unwind any dangling emphasis marker: 1102 | if (strlen($token_stack[0]) == 1) { 1103 | $text_stack[1] .= array_shift($token_stack); 1104 | $text_stack[0] .= array_shift($text_stack); 1105 | } 1106 | # Closing strong marker: 1107 | array_shift($token_stack); 1108 | $span = array_shift($text_stack); 1109 | $span = $this->runSpanGamut($span); 1110 | $span = "$span"; 1111 | $text_stack[0] .= $this->hashPart($span); 1112 | $strong = ''; 1113 | } else { 1114 | array_unshift($token_stack, $token); 1115 | array_unshift($text_stack, ''); 1116 | $strong = $token; 1117 | } 1118 | } else { 1119 | # Here $token_len == 1 1120 | if ($em) { 1121 | if (strlen($token_stack[0]) == 1) { 1122 | # Closing emphasis marker: 1123 | array_shift($token_stack); 1124 | $span = array_shift($text_stack); 1125 | $span = $this->runSpanGamut($span); 1126 | $span = "$span"; 1127 | $text_stack[0] .= $this->hashPart($span); 1128 | $em = ''; 1129 | } else { 1130 | $text_stack[0] .= $token; 1131 | } 1132 | } else { 1133 | array_unshift($token_stack, $token); 1134 | array_unshift($text_stack, ''); 1135 | $em = $token; 1136 | } 1137 | } 1138 | } 1139 | return $text_stack[0]; 1140 | } 1141 | 1142 | 1143 | protected function doBlockQuotes($text) { 1144 | $text = preg_replace_callback('/ 1145 | ( # Wrap whole match in $1 1146 | (?> 1147 | ^[ ]*>[ ]? # ">" at the start of a line 1148 | .+\n # rest of the first line 1149 | (.+\n)* # subsequent consecutive lines 1150 | \n* # blanks 1151 | )+ 1152 | ) 1153 | /xm', 1154 | array(&$this, '_doBlockQuotes_callback'), $text); 1155 | 1156 | return $text; 1157 | } 1158 | protected function _doBlockQuotes_callback($matches) { 1159 | $bq = $matches[1]; 1160 | # trim one level of quoting - trim whitespace-only lines 1161 | $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1162 | $bq = $this->runBlockGamut($bq); # recurse 1163 | 1164 | $bq = preg_replace('/^/m', " ", $bq); 1165 | # These leading spaces cause problem with
     content, 
    1166 | 		# so we need to fix that:
    1167 | 		$bq = preg_replace_callback('{(\s*
    .+?
    )}sx', 1168 | array(&$this, '_doBlockQuotes_callback2'), $bq); 1169 | 1170 | return "\n". $this->hashBlock("
    \n$bq\n
    ")."\n\n"; 1171 | } 1172 | protected function _doBlockQuotes_callback2($matches) { 1173 | $pre = $matches[1]; 1174 | $pre = preg_replace('/^ /m', '', $pre); 1175 | return $pre; 1176 | } 1177 | 1178 | 1179 | protected function formParagraphs($text) { 1180 | # 1181 | # Params: 1182 | # $text - string to process with html

    tags 1183 | # 1184 | # Strip leading and trailing lines: 1185 | $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1186 | 1187 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1188 | 1189 | # 1190 | # Wrap

    tags and unhashify HTML blocks 1191 | # 1192 | foreach ($grafs as $key => $value) { 1193 | if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1194 | # Is a paragraph. 1195 | $value = $this->runSpanGamut($value); 1196 | $value = preg_replace('/^([ ]*)/', "

    ", $value); 1197 | $value .= "

    "; 1198 | $grafs[$key] = $this->unhash($value); 1199 | } 1200 | else { 1201 | # Is a block. 1202 | # Modify elements of @grafs in-place... 1203 | $graf = $value; 1204 | $block = $this->html_hashes[$graf]; 1205 | $graf = $block; 1206 | // if (preg_match('{ 1207 | // \A 1208 | // ( # $1 =
    tag 1209 | //
    ]* 1211 | // \b 1212 | // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1213 | // 1 1214 | // \2 1215 | // [^>]* 1216 | // > 1217 | // ) 1218 | // ( # $3 = contents 1219 | // .* 1220 | // ) 1221 | // (
    ) # $4 = closing tag 1222 | // \z 1223 | // }xs', $block, $matches)) 1224 | // { 1225 | // list(, $div_open, , $div_content, $div_close) = $matches; 1226 | // 1227 | // # We can't call Markdown(), because that resets the hash; 1228 | // # that initialization code should be pulled into its own sub, though. 1229 | // $div_content = $this->hashHTMLBlocks($div_content); 1230 | // 1231 | // # Run document gamut methods on the content. 1232 | // foreach ($this->document_gamut as $method => $priority) { 1233 | // $div_content = $this->$method($div_content); 1234 | // } 1235 | // 1236 | // $div_open = preg_replace( 1237 | // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1238 | // 1239 | // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1240 | // } 1241 | $grafs[$key] = $graf; 1242 | } 1243 | } 1244 | 1245 | return implode("\n\n", $grafs); 1246 | } 1247 | 1248 | 1249 | protected function encodeAttribute($text) { 1250 | # 1251 | # Encode text for a double-quoted HTML attribute. This function 1252 | # is *not* suitable for attributes enclosed in single quotes. 1253 | # 1254 | $text = $this->encodeAmpsAndAngles($text); 1255 | $text = str_replace('"', '"', $text); 1256 | return $text; 1257 | } 1258 | 1259 | 1260 | protected function encodeAmpsAndAngles($text) { 1261 | # 1262 | # Smart processing for ampersands and angle brackets that need to 1263 | # be encoded. Valid character entities are left alone unless the 1264 | # no-entities mode is set. 1265 | # 1266 | if ($this->no_entities) { 1267 | $text = str_replace('&', '&', $text); 1268 | } else { 1269 | # Ampersand-encoding based entirely on Nat Irons's Amputator 1270 | # MT plugin: 1271 | $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1272 | '&', $text); 1273 | } 1274 | # Encode remaining <'s 1275 | $text = str_replace('<', '<', $text); 1276 | 1277 | return $text; 1278 | } 1279 | 1280 | 1281 | protected function doAutoLinks($text) { 1282 | $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 1283 | array(&$this, '_doAutoLinks_url_callback'), $text); 1284 | 1285 | # Email addresses: 1286 | $text = preg_replace_callback('{ 1287 | < 1288 | (?:mailto:)? 1289 | ( 1290 | (?: 1291 | [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1292 | | 1293 | ".*?" 1294 | ) 1295 | \@ 1296 | (?: 1297 | [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1298 | | 1299 | \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1300 | ) 1301 | ) 1302 | > 1303 | }xi', 1304 | array(&$this, '_doAutoLinks_email_callback'), $text); 1305 | $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array(&$this, '_doAutoLinks_tel_callback'), $text); 1306 | 1307 | return $text; 1308 | } 1309 | protected function _doAutoLinks_tel_callback($matches) { 1310 | $url = $this->encodeAttribute($matches[1]); 1311 | $tel = $this->encodeAttribute($matches[2]); 1312 | $link = "$tel"; 1313 | return $this->hashPart($link); 1314 | } 1315 | protected function _doAutoLinks_url_callback($matches) { 1316 | $url = $this->encodeAttribute($matches[1]); 1317 | $link = "$url"; 1318 | return $this->hashPart($link); 1319 | } 1320 | protected function _doAutoLinks_email_callback($matches) { 1321 | $address = $matches[1]; 1322 | $link = $this->encodeEmailAddress($address); 1323 | return $this->hashPart($link); 1324 | } 1325 | 1326 | 1327 | protected function encodeEmailAddress($addr) { 1328 | # 1329 | # Input: an email address, e.g. "foo@example.com" 1330 | # 1331 | # Output: the email address as a mailto link, with each character 1332 | # of the address encoded as either a decimal or hex entity, in 1333 | # the hopes of foiling most address harvesting spam bots. E.g.: 1334 | # 1335 | #

    foo@exampl 1338 | # e.com

    1339 | # 1340 | # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1341 | # With some optimizations by Milian Wolff. 1342 | # 1343 | $addr = "mailto:" . $addr; 1344 | $chars = preg_split('/(? $char) { 1348 | $ord = ord($char); 1349 | # Ignore non-ascii chars. 1350 | if ($ord < 128) { 1351 | $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1352 | # roughly 10% raw, 45% hex, 45% dec 1353 | # '@' *must* be encoded. I insist. 1354 | if ($r > 90 && $char != '@') /* do nothing */; 1355 | else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1356 | else $chars[$key] = '&#'.$ord.';'; 1357 | } 1358 | } 1359 | 1360 | $addr = implode('', $chars); 1361 | $text = implode('', array_slice($chars, 7)); # text without `mailto:` 1362 | $addr = "$text"; 1363 | 1364 | return $addr; 1365 | } 1366 | 1367 | 1368 | protected function parseSpan($str) { 1369 | # 1370 | # Take the string $str and parse it into tokens, hashing embeded HTML, 1371 | # escaped characters and handling code spans. 1372 | # 1373 | $output = ''; 1374 | 1375 | $span_re = '{ 1376 | ( 1377 | \\\\'.$this->escape_chars_re.' 1378 | | 1379 | (?no_markup ? '' : ' 1382 | | 1383 | # comment 1384 | | 1385 | <\?.*?\?> | <%.*?%> # processing instruction 1386 | | 1387 | <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1388 | (?> 1389 | \s 1390 | (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1391 | )? 1392 | > 1393 | | 1394 | <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1395 | | 1396 | # closing tag 1397 | ').' 1398 | ) 1399 | }xs'; 1400 | 1401 | while (1) { 1402 | # 1403 | # Each loop iteration seach for either the next tag, the next 1404 | # openning code span marker, or the next escaped character. 1405 | # Each token is then passed to handleSpanToken. 1406 | # 1407 | $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1408 | 1409 | # Create token from text preceding tag. 1410 | if ($parts[0] != "") { 1411 | $output .= $parts[0]; 1412 | } 1413 | 1414 | # Check if we reach the end. 1415 | if (isset($parts[1])) { 1416 | $output .= $this->handleSpanToken($parts[1], $parts[2]); 1417 | $str = $parts[2]; 1418 | } 1419 | else { 1420 | break; 1421 | } 1422 | } 1423 | 1424 | return $output; 1425 | } 1426 | 1427 | 1428 | protected function handleSpanToken($token, &$str) { 1429 | # 1430 | # Handle $token provided by parseSpan by determining its nature and 1431 | # returning the corresponding value that should replace it. 1432 | # 1433 | switch ($token{0}) { 1434 | case "\\": 1435 | return $this->hashPart("&#". ord($token{1}). ";"); 1436 | case "`": 1437 | # Search for end marker in remaining text. 1438 | if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1439 | $str, $matches)) 1440 | { 1441 | $str = $matches[2]; 1442 | $codespan = $this->makeCodeSpan($matches[1]); 1443 | return $this->hashPart($codespan); 1444 | } 1445 | return $token; // return as text since no ending marker found. 1446 | default: 1447 | return $this->hashPart($token); 1448 | } 1449 | } 1450 | 1451 | 1452 | protected function outdent($text) { 1453 | # 1454 | # Remove one level of line-leading tabs or spaces 1455 | # 1456 | return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1457 | } 1458 | 1459 | 1460 | # String length function for detab. `_initDetab` will create a function to 1461 | # hanlde UTF-8 if the default function does not exist. 1462 | protected $utf8_strlen = 'mb_strlen'; 1463 | 1464 | protected function detab($text) { 1465 | # 1466 | # Replace tabs with the appropriate amount of space. 1467 | # 1468 | # For each line we separate the line in blocks delemited by 1469 | # tab characters. Then we reconstruct every line by adding the 1470 | # appropriate number of space between each blocks. 1471 | 1472 | $text = preg_replace_callback('/^.*\t.*$/m', 1473 | array(&$this, '_detab_callback'), $text); 1474 | 1475 | return $text; 1476 | } 1477 | protected function _detab_callback($matches) { 1478 | $line = $matches[0]; 1479 | $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1480 | 1481 | # Split in blocks. 1482 | $blocks = explode("\t", $line); 1483 | # Add each blocks to the line. 1484 | $line = $blocks[0]; 1485 | unset($blocks[0]); # Do not add first block twice. 1486 | foreach ($blocks as $block) { 1487 | # Calculate amount of space, insert spaces, insert block. 1488 | $amount = $this->tab_width - 1489 | $strlen($line, 'UTF-8') % $this->tab_width; 1490 | $line .= str_repeat(" ", $amount) . $block; 1491 | } 1492 | return $line; 1493 | } 1494 | protected function _initDetab() { 1495 | # 1496 | # Check for the availability of the function in the `utf8_strlen` property 1497 | # (initially `mb_strlen`). If the function is not available, create a 1498 | # function that will loosely count the number of UTF-8 characters with a 1499 | # regular expression. 1500 | # 1501 | if (function_exists($this->utf8_strlen)) return; 1502 | $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1503 | "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1504 | $text, $m);'); 1505 | } 1506 | 1507 | 1508 | protected function unhash($text) { 1509 | # 1510 | # Swap back in all the tags hashed by _HashHTMLBlocks. 1511 | # 1512 | return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1513 | array(&$this, '_unhash_callback'), $text); 1514 | } 1515 | protected function _unhash_callback($matches) { 1516 | return $this->html_hashes[$matches[0]]; 1517 | } 1518 | 1519 | } 1520 | 1521 | 1522 | # 1523 | # Temporary Markdown Extra Parser Implementation Class 1524 | # 1525 | # NOTE: DON'T USE THIS CLASS 1526 | # Currently the implementation of of Extra resides here in this temporary class. 1527 | # This makes it easier to propagate the changes between the three different 1528 | # packaging styles of PHP Markdown. When this issue is resolved, this 1529 | # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra 1530 | # will contain the code. So please use \Michelf\MarkdownExtra and ignore this 1531 | # one. 1532 | # 1533 | 1534 | abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown { 1535 | 1536 | ### Configuration Variables ### 1537 | 1538 | # Prefix for footnote ids. 1539 | public $fn_id_prefix = ""; 1540 | 1541 | # Optional title attribute for footnote links and backlinks. 1542 | public $fn_link_title = ""; 1543 | public $fn_backlink_title = ""; 1544 | 1545 | # Optional class attribute for footnote links and backlinks. 1546 | public $fn_link_class = "footnote-ref"; 1547 | public $fn_backlink_class = "footnote-backref"; 1548 | 1549 | # Class name for table cell alignment (%% replaced left/center/right) 1550 | # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 1551 | # If empty, the align attribute is used instead of a class name. 1552 | public $table_align_class_tmpl = ''; 1553 | 1554 | # Optional class prefix for fenced code block. 1555 | public $code_class_prefix = ""; 1556 | # Class attribute for code blocks goes on the `code` tag; 1557 | # setting this to true will put attributes on the `pre` tag instead. 1558 | public $code_attr_on_pre = false; 1559 | 1560 | # Predefined abbreviations. 1561 | public $predef_abbr = array(); 1562 | 1563 | 1564 | ### Parser Implementation ### 1565 | 1566 | public function __construct() { 1567 | # 1568 | # Constructor function. Initialize the parser object. 1569 | # 1570 | # Add extra escapable characters before parent constructor 1571 | # initialize the table. 1572 | $this->escape_chars .= ':|'; 1573 | 1574 | # Insert extra document, block, and span transformations. 1575 | # Parent constructor will do the sorting. 1576 | $this->document_gamut += array( 1577 | "doFencedCodeBlocks" => 5, 1578 | "stripFootnotes" => 15, 1579 | "stripAbbreviations" => 25, 1580 | "appendFootnotes" => 50, 1581 | ); 1582 | $this->block_gamut += array( 1583 | "doFencedCodeBlocks" => 5, 1584 | "doTables" => 15, 1585 | "doDefLists" => 45, 1586 | ); 1587 | $this->span_gamut += array( 1588 | "doFootnotes" => 5, 1589 | "doAbbreviations" => 70, 1590 | ); 1591 | 1592 | parent::__construct(); 1593 | } 1594 | 1595 | 1596 | # Extra variables used during extra transformations. 1597 | protected $footnotes = array(); 1598 | protected $footnotes_ordered = array(); 1599 | protected $footnotes_ref_count = array(); 1600 | protected $footnotes_numbers = array(); 1601 | protected $abbr_desciptions = array(); 1602 | protected $abbr_word_re = ''; 1603 | 1604 | # Give the current footnote number. 1605 | protected $footnote_counter = 1; 1606 | 1607 | 1608 | protected function setup() { 1609 | # 1610 | # Setting up Extra-specific variables. 1611 | # 1612 | parent::setup(); 1613 | 1614 | $this->footnotes = array(); 1615 | $this->footnotes_ordered = array(); 1616 | $this->footnotes_ref_count = array(); 1617 | $this->footnotes_numbers = array(); 1618 | $this->abbr_desciptions = array(); 1619 | $this->abbr_word_re = ''; 1620 | $this->footnote_counter = 1; 1621 | 1622 | foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 1623 | if ($this->abbr_word_re) 1624 | $this->abbr_word_re .= '|'; 1625 | $this->abbr_word_re .= preg_quote($abbr_word); 1626 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1627 | } 1628 | } 1629 | 1630 | protected function teardown() { 1631 | # 1632 | # Clearing Extra-specific variables. 1633 | # 1634 | $this->footnotes = array(); 1635 | $this->footnotes_ordered = array(); 1636 | $this->footnotes_ref_count = array(); 1637 | $this->footnotes_numbers = array(); 1638 | $this->abbr_desciptions = array(); 1639 | $this->abbr_word_re = ''; 1640 | 1641 | parent::teardown(); 1642 | } 1643 | 1644 | 1645 | ### Extra Attribute Parser ### 1646 | 1647 | # Expression to use to catch attributes (includes the braces) 1648 | protected $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}'; 1649 | # Expression to use when parsing in a context when no capture is desired 1650 | protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}'; 1651 | 1652 | protected function doExtraAttributes($tag_name, $attr) { 1653 | # 1654 | # Parse attributes caught by the $this->id_class_attr_catch_re expression 1655 | # and return the HTML-formatted list of attributes. 1656 | # 1657 | # Currently supported attributes are .class and #id. 1658 | # 1659 | if (empty($attr)) return ""; 1660 | 1661 | # Split on components 1662 | preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches); 1663 | $elements = $matches[0]; 1664 | 1665 | # handle classes and ids (only first id taken into account) 1666 | $classes = array(); 1667 | $id = false; 1668 | foreach ($elements as $element) { 1669 | if ($element{0} == '.') { 1670 | $classes[] = substr($element, 1); 1671 | } else if ($element{0} == '#') { 1672 | if ($id === false) $id = substr($element, 1); 1673 | } 1674 | } 1675 | 1676 | # compose attributes as string 1677 | $attr_str = ""; 1678 | if (!empty($id)) { 1679 | $attr_str .= ' id="'.$id.'"'; 1680 | } 1681 | if (!empty($classes)) { 1682 | $attr_str .= ' class="'.implode(" ", $classes).'"'; 1683 | } 1684 | return $attr_str; 1685 | } 1686 | 1687 | 1688 | protected function stripLinkDefinitions($text) { 1689 | # 1690 | # Strips link definitions from text, stores the URLs and titles in 1691 | # hash references. 1692 | # 1693 | $less_than_tab = $this->tab_width - 1; 1694 | 1695 | # Link defs are in the form: ^[id]: url "optional title" 1696 | $text = preg_replace_callback('{ 1697 | ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 1698 | [ ]* 1699 | \n? # maybe *one* newline 1700 | [ ]* 1701 | (?: 1702 | <(.+?)> # url = $2 1703 | | 1704 | (\S+?) # url = $3 1705 | ) 1706 | [ ]* 1707 | \n? # maybe one newline 1708 | [ ]* 1709 | (?: 1710 | (?<=\s) # lookbehind for whitespace 1711 | ["(] 1712 | (.*?) # title = $4 1713 | [")] 1714 | [ ]* 1715 | )? # title is optional 1716 | (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 1717 | (?:\n+|\Z) 1718 | }xm', 1719 | array(&$this, '_stripLinkDefinitions_callback'), 1720 | $text); 1721 | return $text; 1722 | } 1723 | protected function _stripLinkDefinitions_callback($matches) { 1724 | $link_id = strtolower($matches[1]); 1725 | $url = $matches[2] == '' ? $matches[3] : $matches[2]; 1726 | $this->urls[$link_id] = $url; 1727 | $this->titles[$link_id] =& $matches[4]; 1728 | $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 1729 | return ''; # String that will replace the block 1730 | } 1731 | 1732 | 1733 | ### HTML Block Parser ### 1734 | 1735 | # Tags that are always treated as block tags: 1736 | protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption'; 1737 | 1738 | # Tags treated as block tags only if the opening tag is alone on its line: 1739 | protected $context_block_tags_re = 'script|noscript|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 1740 | 1741 | # Tags where markdown="1" default to span mode: 1742 | protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1743 | 1744 | # Tags which must not have their contents modified, no matter where 1745 | # they appear: 1746 | protected $clean_tags_re = 'script|math|svg'; 1747 | 1748 | # Tags that do not need to be closed. 1749 | protected $auto_close_tags_re = 'hr|img|param|source|track'; 1750 | 1751 | 1752 | protected function hashHTMLBlocks($text) { 1753 | # 1754 | # Hashify HTML Blocks and "clean tags". 1755 | # 1756 | # We only want to do this for block-level HTML tags, such as headers, 1757 | # lists, and tables. That's because we still want to wrap

    s around 1758 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1759 | # phrase emphasis, and spans. The list of tags we're looking for is 1760 | # hard-coded. 1761 | # 1762 | # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1763 | # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1764 | # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 1765 | # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1766 | # These two functions are calling each other. It's recursive! 1767 | # 1768 | if ($this->no_markup) return $text; 1769 | 1770 | # 1771 | # Call the HTML-in-Markdown hasher. 1772 | # 1773 | list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1774 | 1775 | return $text; 1776 | } 1777 | protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1778 | $enclosing_tag_re = '', $span = false) 1779 | { 1780 | # 1781 | # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1782 | # 1783 | # * $indent is the number of space to be ignored when checking for code 1784 | # blocks. This is important because if we don't take the indent into 1785 | # account, something like this (which looks right) won't work as expected: 1786 | # 1787 | #

    1788 | #
    1789 | # Hello World. <-- Is this a Markdown code block or text? 1790 | #
    <-- Is this a Markdown code block or a real tag? 1791 | #
    1792 | # 1793 | # If you don't like this, just don't indent the tag on which 1794 | # you apply the markdown="1" attribute. 1795 | # 1796 | # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 1797 | # tag with that name. Nested tags supported. 1798 | # 1799 | # * If $span is true, text inside must treated as span. So any double 1800 | # newline will be replaced by a single newline so that it does not create 1801 | # paragraphs. 1802 | # 1803 | # Returns an array of that form: ( processed text , remaining text ) 1804 | # 1805 | if ($text === '') return array('', ''); 1806 | 1807 | # Regex to check for the presense of newlines around a block tag. 1808 | $newline_before_re = '/(?:^\n?|\n\n)*$/'; 1809 | $newline_after_re = 1810 | '{ 1811 | ^ # Start of text following the tag. 1812 | (?>[ ]*)? # Optional comment. 1813 | [ ]*\n # Must be followed by newline. 1814 | }xs'; 1815 | 1816 | # Regex to match any tag. 1817 | $block_tag_re = 1818 | '{ 1819 | ( # $2: Capture whole tag. 1820 | # Tag name. 1822 | '.$this->block_tags_re.' | 1823 | '.$this->context_block_tags_re.' | 1824 | '.$this->clean_tags_re.' | 1825 | (?!\s)'.$enclosing_tag_re.' 1826 | ) 1827 | (?: 1828 | (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 1829 | (?> 1830 | ".*?" | # Double quotes (can contain `>`) 1831 | \'.*?\' | # Single quotes (can contain `>`) 1832 | .+? # Anything but quotes and `>`. 1833 | )*? 1834 | )? 1835 | > # End of tag. 1836 | | 1837 | # HTML Comment 1838 | | 1839 | <\?.*?\?> | <%.*?%> # Processing instruction 1840 | | 1841 | # CData Block 1842 | '. ( !$span ? ' # If not in span. 1843 | | 1844 | # Indented code block 1845 | (?: ^[ ]*\n | ^ | \n[ ]*\n ) 1846 | [ ]{'.($indent+4).'}[^\n]* \n 1847 | (?> 1848 | (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 1849 | )* 1850 | | 1851 | # Fenced code block marker 1852 | (?<= ^ | \n ) 1853 | [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,}) 1854 | [ ]* 1855 | (?: 1856 | \.?[-_:a-zA-Z0-9]+ # standalone class name 1857 | | 1858 | '.$this->id_class_attr_nocatch_re.' # extra attributes 1859 | )? 1860 | [ ]* 1861 | (?= \n ) 1862 | ' : '' ). ' # End (if not is span). 1863 | | 1864 | # Code span marker 1865 | # Note, this regex needs to go after backtick fenced 1866 | # code blocks but it should also be kept outside of the 1867 | # "if not in span" condition adding backticks to the parser 1868 | `+ 1869 | ) 1870 | }xs'; 1871 | 1872 | 1873 | $depth = 0; # Current depth inside the tag tree. 1874 | $parsed = ""; # Parsed text that will be returned. 1875 | 1876 | # 1877 | # Loop through every tag until we find the closing tag of the parent 1878 | # or loop until reaching the end of text if no parent tag specified. 1879 | # 1880 | do { 1881 | # 1882 | # Split the text using the first $tag_match pattern found. 1883 | # Text before pattern will be first in the array, text after 1884 | # pattern will be at the end, and between will be any catches made 1885 | # by the pattern. 1886 | # 1887 | $parts = preg_split($block_tag_re, $text, 2, 1888 | PREG_SPLIT_DELIM_CAPTURE); 1889 | 1890 | # If in Markdown span mode, add a empty-string span-level hash 1891 | # after each newline to prevent triggering any block element. 1892 | if ($span) { 1893 | $void = $this->hashPart("", ':'); 1894 | $newline = "$void\n"; 1895 | $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1896 | } 1897 | 1898 | $parsed .= $parts[0]; # Text before current tag. 1899 | 1900 | # If end of $text has been reached. Stop loop. 1901 | if (count($parts) < 3) { 1902 | $text = ""; 1903 | break; 1904 | } 1905 | 1906 | $tag = $parts[1]; # Tag to handle. 1907 | $text = $parts[2]; # Remaining text after current tag. 1908 | $tag_re = preg_quote($tag); # For use in a regular expression. 1909 | 1910 | # 1911 | # Check for: Fenced code block marker. 1912 | # Note: need to recheck the whole tag to disambiguate backtick 1913 | # fences from code spans 1914 | # 1915 | if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) { 1916 | # Fenced code block marker: find matching end marker. 1917 | $fence_indent = strlen($capture[1]); # use captured indent in re 1918 | $fence_re = $capture[2]; # use captured fence in re 1919 | if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text, 1920 | $matches)) 1921 | { 1922 | # End marker found: pass text unchanged until marker. 1923 | $parsed .= $tag . $matches[0]; 1924 | $text = substr($text, strlen($matches[0])); 1925 | } 1926 | else { 1927 | # No end marker: just skip it. 1928 | $parsed .= $tag; 1929 | } 1930 | } 1931 | # 1932 | # Check for: Indented code block. 1933 | # 1934 | else if ($tag{0} == "\n" || $tag{0} == " ") { 1935 | # Indented code block: pass it unchanged, will be handled 1936 | # later. 1937 | $parsed .= $tag; 1938 | } 1939 | # 1940 | # Check for: Code span marker 1941 | # Note: need to check this after backtick fenced code blocks 1942 | # 1943 | else if ($tag{0} == "`") { 1944 | # Find corresponding end marker. 1945 | $tag_re = preg_quote($tag); 1946 | if (preg_match('{^(?>.+?|\n(?!\n))*?(?block_tags_re.')\b}', $tag) || 1964 | ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 1965 | preg_match($newline_before_re, $parsed) && 1966 | preg_match($newline_after_re, $text) ) 1967 | ) 1968 | { 1969 | # Need to parse tag and following text using the HTML parser. 1970 | list($block_text, $text) = 1971 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 1972 | 1973 | # Make sure it stays outside of any paragraph by adding newlines. 1974 | $parsed .= "\n\n$block_text\n\n"; 1975 | } 1976 | # 1977 | # Check for: Clean tag (like script, math) 1978 | # HTML Comments, processing instructions. 1979 | # 1980 | else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 1981 | $tag{1} == '!' || $tag{1} == '?') 1982 | { 1983 | # Need to parse tag and following text using the HTML parser. 1984 | # (don't check for markdown attribute) 1985 | list($block_text, $text) = 1986 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 1987 | 1988 | $parsed .= $block_text; 1989 | } 1990 | # 1991 | # Check for: Tag with same name as enclosing tag. 1992 | # 1993 | else if ($enclosing_tag_re !== '' && 1994 | # Same name as enclosing tag. 1995 | preg_match('{^= 0); 2018 | 2019 | return array($parsed, $text); 2020 | } 2021 | protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 2022 | # 2023 | # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 2024 | # 2025 | # * Calls $hash_method to convert any blocks. 2026 | # * Stops when the first opening tag closes. 2027 | # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 2028 | # (it is not inside clean tags) 2029 | # 2030 | # Returns an array of that form: ( processed text , remaining text ) 2031 | # 2032 | if ($text === '') return array('', ''); 2033 | 2034 | # Regex to match `markdown` attribute inside of a tag. 2035 | $markdown_attr_re = ' 2036 | { 2037 | \s* # Eat whitespace before the `markdown` attribute 2038 | markdown 2039 | \s*=\s* 2040 | (?> 2041 | (["\']) # $1: quote delimiter 2042 | (.*?) # $2: attribute value 2043 | \1 # matching delimiter 2044 | | 2045 | ([^\s>]*) # $3: unquoted attribute value 2046 | ) 2047 | () # $4: make $3 always defined (avoid warnings) 2048 | }xs'; 2049 | 2050 | # Regex to match any tag. 2051 | $tag_re = '{ 2052 | ( # $2: Capture whole tag. 2053 | 2058 | ".*?" | # Double quotes (can contain `>`) 2059 | \'.*?\' | # Single quotes (can contain `>`) 2060 | .+? # Anything but quotes and `>`. 2061 | )*? 2062 | )? 2063 | > # End of tag. 2064 | | 2065 | # HTML Comment 2066 | | 2067 | <\?.*?\?> | <%.*?%> # Processing instruction 2068 | | 2069 | # CData Block 2070 | ) 2071 | }xs'; 2072 | 2073 | $original_text = $text; # Save original text in case of faliure. 2074 | 2075 | $depth = 0; # Current depth inside the tag tree. 2076 | $block_text = ""; # Temporary text holder for current text. 2077 | $parsed = ""; # Parsed text that will be returned. 2078 | 2079 | # 2080 | # Get the name of the starting tag. 2081 | # (This pattern makes $base_tag_name_re safe without quoting.) 2082 | # 2083 | if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 2084 | $base_tag_name_re = $matches[1]; 2085 | 2086 | # 2087 | # Loop through every tag until we find the corresponding closing tag. 2088 | # 2089 | do { 2090 | # 2091 | # Split the text using the first $tag_match pattern found. 2092 | # Text before pattern will be first in the array, text after 2093 | # pattern will be at the end, and between will be any catches made 2094 | # by the pattern. 2095 | # 2096 | $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 2097 | 2098 | if (count($parts) < 3) { 2099 | # 2100 | # End of $text reached with unbalenced tag(s). 2101 | # In that case, we return original text unchanged and pass the 2102 | # first character as filtered to prevent an infinite loop in the 2103 | # parent function. 2104 | # 2105 | return array($original_text{0}, substr($original_text, 1)); 2106 | } 2107 | 2108 | $block_text .= $parts[0]; # Text before current tag. 2109 | $tag = $parts[1]; # Tag to handle. 2110 | $text = $parts[2]; # Remaining text after current tag. 2111 | 2112 | # 2113 | # Check for: Auto-close tag (like
    ) 2114 | # Comments and Processing Instructions. 2115 | # 2116 | if (preg_match('{^auto_close_tags_re.')\b}', $tag) || 2117 | $tag{1} == '!' || $tag{1} == '?') 2118 | { 2119 | # Just add the tag to the block as if it was text. 2120 | $block_text .= $tag; 2121 | } 2122 | else { 2123 | # 2124 | # Increase/decrease nested tag count. Only do so if 2125 | # the tag's name match base tag's. 2126 | # 2127 | if (preg_match('{^mode = $attr_m[2] . $attr_m[3]; 2144 | $span_mode = $this->mode == 'span' || $this->mode != 'block' && 2145 | preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 2146 | 2147 | # Calculate indent before tag. 2148 | if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 2149 | $strlen = $this->utf8_strlen; 2150 | $indent = $strlen($matches[1], 'UTF-8'); 2151 | } else { 2152 | $indent = 0; 2153 | } 2154 | 2155 | # End preceding block with this tag. 2156 | $block_text .= $tag; 2157 | $parsed .= $this->$hash_method($block_text); 2158 | 2159 | # Get enclosing tag name for the ParseMarkdown function. 2160 | # (This pattern makes $tag_name_re safe without quoting.) 2161 | preg_match('/^<([\w:$]*)\b/', $tag, $matches); 2162 | $tag_name_re = $matches[1]; 2163 | 2164 | # Parse the content using the HTML-in-Markdown parser. 2165 | list ($block_text, $text) 2166 | = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 2167 | $tag_name_re, $span_mode); 2168 | 2169 | # Outdent markdown text. 2170 | if ($indent > 0) { 2171 | $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 2172 | $block_text); 2173 | } 2174 | 2175 | # Append tag content to parsed text. 2176 | if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 2177 | else $parsed .= "$block_text"; 2178 | 2179 | # Start over with a new block. 2180 | $block_text = ""; 2181 | } 2182 | else $block_text .= $tag; 2183 | } 2184 | 2185 | } while ($depth > 0); 2186 | 2187 | # 2188 | # Hash last block text that wasn't processed inside the loop. 2189 | # 2190 | $parsed .= $this->$hash_method($block_text); 2191 | 2192 | return array($parsed, $text); 2193 | } 2194 | 2195 | 2196 | protected function hashClean($text) { 2197 | # 2198 | # Called whenever a tag must be hashed when a function inserts a "clean" tag 2199 | # in $text, it passes through this function and is automaticaly escaped, 2200 | # blocking invalid nested overlap. 2201 | # 2202 | return $this->hashPart($text, 'C'); 2203 | } 2204 | 2205 | 2206 | protected function doAnchors($text) { 2207 | # 2208 | # Turn Markdown link shortcuts into XHTML tags. 2209 | # 2210 | if ($this->in_anchor) return $text; 2211 | $this->in_anchor = true; 2212 | 2213 | # 2214 | # First, handle reference-style links: [link text] [id] 2215 | # 2216 | $text = preg_replace_callback('{ 2217 | ( # wrap whole match in $1 2218 | \[ 2219 | ('.$this->nested_brackets_re.') # link text = $2 2220 | \] 2221 | 2222 | [ ]? # one optional space 2223 | (?:\n[ ]*)? # one optional newline followed by spaces 2224 | 2225 | \[ 2226 | (.*?) # id = $3 2227 | \] 2228 | ) 2229 | }xs', 2230 | array(&$this, '_doAnchors_reference_callback'), $text); 2231 | 2232 | # 2233 | # Next, inline-style links: [link text](url "optional title") 2234 | # 2235 | $text = preg_replace_callback('{ 2236 | ( # wrap whole match in $1 2237 | \[ 2238 | ('.$this->nested_brackets_re.') # link text = $2 2239 | \] 2240 | \( # literal paren 2241 | [ \n]* 2242 | (?: 2243 | <(.+?)> # href = $3 2244 | | 2245 | ('.$this->nested_url_parenthesis_re.') # href = $4 2246 | ) 2247 | [ \n]* 2248 | ( # $5 2249 | ([\'"]) # quote char = $6 2250 | (.*?) # Title = $7 2251 | \6 # matching quote 2252 | [ \n]* # ignore any spaces/tabs between closing quote and ) 2253 | )? # title is optional 2254 | \) 2255 | (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 2256 | ) 2257 | }xs', 2258 | array(&$this, '_doAnchors_inline_callback'), $text); 2259 | 2260 | # 2261 | # Last, handle reference-style shortcuts: [link text] 2262 | # These must come last in case you've also got [link text][1] 2263 | # or [link text](/foo) 2264 | # 2265 | $text = preg_replace_callback('{ 2266 | ( # wrap whole match in $1 2267 | \[ 2268 | ([^\[\]]+) # link text = $2; can\'t contain [ or ] 2269 | \] 2270 | ) 2271 | }xs', 2272 | array(&$this, '_doAnchors_reference_callback'), $text); 2273 | 2274 | $this->in_anchor = false; 2275 | return $text; 2276 | } 2277 | protected function _doAnchors_reference_callback($matches) { 2278 | $whole_match = $matches[1]; 2279 | $link_text = $matches[2]; 2280 | $link_id =& $matches[3]; 2281 | 2282 | if ($link_id == "") { 2283 | # for shortcut links like [this][] or [this]. 2284 | $link_id = $link_text; 2285 | } 2286 | 2287 | # lower-case and turn embedded newlines into spaces 2288 | $link_id = strtolower($link_id); 2289 | $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 2290 | 2291 | if (isset($this->urls[$link_id])) { 2292 | $url = $this->urls[$link_id]; 2293 | $url = $this->encodeAttribute($url); 2294 | 2295 | $result = "titles[$link_id] ) ) { 2297 | $title = $this->titles[$link_id]; 2298 | $title = $this->encodeAttribute($title); 2299 | $result .= " title=\"$title\""; 2300 | } 2301 | if (isset($this->ref_attr[$link_id])) 2302 | $result .= $this->ref_attr[$link_id]; 2303 | 2304 | $link_text = $this->runSpanGamut($link_text); 2305 | $result .= ">$link_text"; 2306 | $result = $this->hashPart($result); 2307 | } 2308 | else { 2309 | $result = $whole_match; 2310 | } 2311 | return $result; 2312 | } 2313 | protected function _doAnchors_inline_callback($matches) { 2314 | $whole_match = $matches[1]; 2315 | $link_text = $this->runSpanGamut($matches[2]); 2316 | $url = $matches[3] == '' ? $matches[4] : $matches[3]; 2317 | $title =& $matches[7]; 2318 | $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 2319 | 2320 | 2321 | $url = $this->encodeAttribute($url); 2322 | 2323 | $result = "encodeAttribute($title); 2326 | $result .= " title=\"$title\""; 2327 | } 2328 | $result .= $attr; 2329 | 2330 | $link_text = $this->runSpanGamut($link_text); 2331 | $result .= ">$link_text"; 2332 | 2333 | return $this->hashPart($result); 2334 | } 2335 | 2336 | 2337 | protected function doImages($text) { 2338 | # 2339 | # Turn Markdown image shortcuts into tags. 2340 | # 2341 | # 2342 | # First, handle reference-style labeled images: ![alt text][id] 2343 | # 2344 | $text = preg_replace_callback('{ 2345 | ( # wrap whole match in $1 2346 | !\[ 2347 | ('.$this->nested_brackets_re.') # alt text = $2 2348 | \] 2349 | 2350 | [ ]? # one optional space 2351 | (?:\n[ ]*)? # one optional newline followed by spaces 2352 | 2353 | \[ 2354 | (.*?) # id = $3 2355 | \] 2356 | 2357 | ) 2358 | }xs', 2359 | array(&$this, '_doImages_reference_callback'), $text); 2360 | 2361 | # 2362 | # Next, handle inline images: ![alt text](url "optional title") 2363 | # Don't forget: encode * and _ 2364 | # 2365 | $text = preg_replace_callback('{ 2366 | ( # wrap whole match in $1 2367 | !\[ 2368 | ('.$this->nested_brackets_re.') # alt text = $2 2369 | \] 2370 | \s? # One optional whitespace character 2371 | \( # literal paren 2372 | [ \n]* 2373 | (?: 2374 | <(\S*)> # src url = $3 2375 | | 2376 | ('.$this->nested_url_parenthesis_re.') # src url = $4 2377 | ) 2378 | [ \n]* 2379 | ( # $5 2380 | ([\'"]) # quote char = $6 2381 | (.*?) # title = $7 2382 | \6 # matching quote 2383 | [ \n]* 2384 | )? # title is optional 2385 | \) 2386 | (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 2387 | ) 2388 | }xs', 2389 | array(&$this, '_doImages_inline_callback'), $text); 2390 | 2391 | return $text; 2392 | } 2393 | protected function _doImages_reference_callback($matches) { 2394 | $whole_match = $matches[1]; 2395 | $alt_text = $matches[2]; 2396 | $link_id = strtolower($matches[3]); 2397 | 2398 | if ($link_id == "") { 2399 | $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 2400 | } 2401 | 2402 | $alt_text = $this->encodeAttribute($alt_text); 2403 | if (isset($this->urls[$link_id])) { 2404 | $url = $this->encodeAttribute($this->urls[$link_id]); 2405 | $result = "\"$alt_text\"";titles[$link_id])) { 2407 | $title = $this->titles[$link_id]; 2408 | $title = $this->encodeAttribute($title); 2409 | $result .= " title=\"$title\""; 2410 | } 2411 | if (isset($this->ref_attr[$link_id])) 2412 | $result .= $this->ref_attr[$link_id]; 2413 | $result .= $this->empty_element_suffix; 2414 | $result = $this->hashPart($result); 2415 | } 2416 | else { 2417 | # If there's no such link ID, leave intact: 2418 | $result = $whole_match; 2419 | } 2420 | 2421 | return $result; 2422 | } 2423 | protected function _doImages_inline_callback($matches) { 2424 | $whole_match = $matches[1]; 2425 | $alt_text = $matches[2]; 2426 | $url = $matches[3] == '' ? $matches[4] : $matches[3]; 2427 | $title =& $matches[7]; 2428 | $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 2429 | 2430 | $alt_text = $this->encodeAttribute($alt_text); 2431 | $url = $this->encodeAttribute($url); 2432 | $result = "\"$alt_text\"";encodeAttribute($title); 2435 | $result .= " title=\"$title\""; # $title already quoted 2436 | } 2437 | $result .= $attr; 2438 | $result .= $this->empty_element_suffix; 2439 | 2440 | return $this->hashPart($result); 2441 | } 2442 | 2443 | 2444 | protected function doHeaders($text) { 2445 | # 2446 | # Redefined to add id and class attribute support. 2447 | # 2448 | # Setext-style headers: 2449 | # Header 1 {#header1} 2450 | # ======== 2451 | # 2452 | # Header 2 {#header2 .class1 .class2} 2453 | # -------- 2454 | # 2455 | $text = preg_replace_callback( 2456 | '{ 2457 | (^.+?) # $1: Header text 2458 | (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 2459 | [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 2460 | }mx', 2461 | array(&$this, '_doHeaders_callback_setext'), $text); 2462 | 2463 | # atx-style headers: 2464 | # # Header 1 {#header1} 2465 | # ## Header 2 {#header2} 2466 | # ## Header 2 with closing hashes ## {#header3.class1.class2} 2467 | # ... 2468 | # ###### Header 6 {.class2} 2469 | # 2470 | $text = preg_replace_callback('{ 2471 | ^(\#{1,6}) # $1 = string of #\'s 2472 | [ ]* 2473 | (.+?) # $2 = Header text 2474 | [ ]* 2475 | \#* # optional closing #\'s (not counted) 2476 | (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 2477 | [ ]* 2478 | \n+ 2479 | }xm', 2480 | array(&$this, '_doHeaders_callback_atx'), $text); 2481 | 2482 | return $text; 2483 | } 2484 | protected function _doHeaders_callback_setext($matches) { 2485 | if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 2486 | return $matches[0]; 2487 | $level = $matches[3]{0} == '=' ? 1 : 2; 2488 | $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]); 2489 | $block = "".$this->runSpanGamut($matches[1]).""; 2490 | return "\n" . $this->hashBlock($block) . "\n\n"; 2491 | } 2492 | protected function _doHeaders_callback_atx($matches) { 2493 | $level = strlen($matches[1]); 2494 | $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]); 2495 | $block = "".$this->runSpanGamut($matches[2]).""; 2496 | return "\n" . $this->hashBlock($block) . "\n\n"; 2497 | } 2498 | 2499 | 2500 | protected function doTables($text) { 2501 | # 2502 | # Form HTML tables. 2503 | # 2504 | $less_than_tab = $this->tab_width - 1; 2505 | # 2506 | # Find tables with leading pipe. 2507 | # 2508 | # | Header 1 | Header 2 2509 | # | -------- | -------- 2510 | # | Cell 1 | Cell 2 2511 | # | Cell 3 | Cell 4 2512 | # 2513 | $text = preg_replace_callback(' 2514 | { 2515 | ^ # Start of a line 2516 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2517 | [|] # Optional leading pipe (present) 2518 | (.+) \n # $1: Header row (at least one pipe) 2519 | 2520 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2521 | [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2522 | 2523 | ( # $3: Cells 2524 | (?> 2525 | [ ]* # Allowed whitespace. 2526 | [|] .* \n # Row content. 2527 | )* 2528 | ) 2529 | (?=\n|\Z) # Stop at final double newline. 2530 | }xm', 2531 | array(&$this, '_doTable_leadingPipe_callback'), $text); 2532 | 2533 | # 2534 | # Find tables without leading pipe. 2535 | # 2536 | # Header 1 | Header 2 2537 | # -------- | -------- 2538 | # Cell 1 | Cell 2 2539 | # Cell 3 | Cell 4 2540 | # 2541 | $text = preg_replace_callback(' 2542 | { 2543 | ^ # Start of a line 2544 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2545 | (\S.*[|].*) \n # $1: Header row (at least one pipe) 2546 | 2547 | [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2548 | ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2549 | 2550 | ( # $3: Cells 2551 | (?> 2552 | .* [|] .* \n # Row content 2553 | )* 2554 | ) 2555 | (?=\n|\Z) # Stop at final double newline. 2556 | }xm', 2557 | array(&$this, '_DoTable_callback'), $text); 2558 | 2559 | return $text; 2560 | } 2561 | protected function _doTable_leadingPipe_callback($matches) { 2562 | $head = $matches[1]; 2563 | $underline = $matches[2]; 2564 | $content = $matches[3]; 2565 | 2566 | # Remove leading pipe for each row. 2567 | $content = preg_replace('/^ *[|]/m', '', $content); 2568 | 2569 | return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2570 | } 2571 | protected function _doTable_makeAlignAttr($alignname) 2572 | { 2573 | if (empty($this->table_align_class_tmpl)) 2574 | return " align=\"$alignname\""; 2575 | 2576 | $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 2577 | return " class=\"$classname\""; 2578 | } 2579 | protected function _doTable_callback($matches) { 2580 | $head = $matches[1]; 2581 | $underline = $matches[2]; 2582 | $content = $matches[3]; 2583 | 2584 | # Remove any tailing pipes for each line. 2585 | $head = preg_replace('/[|] *$/m', '', $head); 2586 | $underline = preg_replace('/[|] *$/m', '', $underline); 2587 | $content = preg_replace('/[|] *$/m', '', $content); 2588 | 2589 | # Reading alignement from header underline. 2590 | $separators = preg_split('/ *[|] */', $underline); 2591 | foreach ($separators as $n => $s) { 2592 | if (preg_match('/^ *-+: *$/', $s)) 2593 | $attr[$n] = $this->_doTable_makeAlignAttr('right'); 2594 | else if (preg_match('/^ *:-+: *$/', $s)) 2595 | $attr[$n] = $this->_doTable_makeAlignAttr('center'); 2596 | else if (preg_match('/^ *:-+ *$/', $s)) 2597 | $attr[$n] = $this->_doTable_makeAlignAttr('left'); 2598 | else 2599 | $attr[$n] = ''; 2600 | } 2601 | 2602 | # Parsing span elements, including code spans, character escapes, 2603 | # and inline HTML tags, so that pipes inside those gets ignored. 2604 | $head = $this->parseSpan($head); 2605 | $headers = preg_split('/ *[|] */', $head); 2606 | $col_count = count($headers); 2607 | $attr = array_pad($attr, $col_count, ''); 2608 | 2609 | # Write column headers. 2610 | $text = "\n"; 2611 | $text .= "\n"; 2612 | $text .= "\n"; 2613 | foreach ($headers as $n => $header) 2614 | $text .= " ".$this->runSpanGamut(trim($header))."\n"; 2615 | $text .= "\n"; 2616 | $text .= "\n"; 2617 | 2618 | # Split content by row. 2619 | $rows = explode("\n", trim($content, "\n")); 2620 | 2621 | $text .= "\n"; 2622 | foreach ($rows as $row) { 2623 | # Parsing span elements, including code spans, character escapes, 2624 | # and inline HTML tags, so that pipes inside those gets ignored. 2625 | $row = $this->parseSpan($row); 2626 | 2627 | # Split row by cell. 2628 | $row_cells = preg_split('/ *[|] */', $row, $col_count); 2629 | $row_cells = array_pad($row_cells, $col_count, ''); 2630 | 2631 | $text .= "\n"; 2632 | foreach ($row_cells as $n => $cell) 2633 | $text .= " ".$this->runSpanGamut(trim($cell))."\n"; 2634 | $text .= "\n"; 2635 | } 2636 | $text .= "\n"; 2637 | $text .= "
    "; 2638 | 2639 | return $this->hashBlock($text) . "\n"; 2640 | } 2641 | 2642 | 2643 | protected function doDefLists($text) { 2644 | # 2645 | # Form HTML definition lists. 2646 | # 2647 | $less_than_tab = $this->tab_width - 1; 2648 | 2649 | # Re-usable pattern to match any entire dl list: 2650 | $whole_list_re = '(?> 2651 | ( # $1 = whole list 2652 | ( # $2 2653 | [ ]{0,'.$less_than_tab.'} 2654 | ((?>.*\S.*\n)+) # $3 = defined term 2655 | \n? 2656 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2657 | ) 2658 | (?s:.+?) 2659 | ( # $4 2660 | \z 2661 | | 2662 | \n{2,} 2663 | (?=\S) 2664 | (?! # Negative lookahead for another term 2665 | [ ]{0,'.$less_than_tab.'} 2666 | (?: \S.*\n )+? # defined term 2667 | \n? 2668 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2669 | ) 2670 | (?! # Negative lookahead for another definition 2671 | [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2672 | ) 2673 | ) 2674 | ) 2675 | )'; // mx 2676 | 2677 | $text = preg_replace_callback('{ 2678 | (?>\A\n?|(?<=\n\n)) 2679 | '.$whole_list_re.' 2680 | }mx', 2681 | array(&$this, '_doDefLists_callback'), $text); 2682 | 2683 | return $text; 2684 | } 2685 | protected function _doDefLists_callback($matches) { 2686 | # Re-usable patterns to match list item bullets and number markers: 2687 | $list = $matches[1]; 2688 | 2689 | # Turn double returns into triple returns, so that we can make a 2690 | # paragraph for the last item in a list, if necessary: 2691 | $result = trim($this->processDefListItems($list)); 2692 | $result = "
    \n" . $result . "\n
    "; 2693 | return $this->hashBlock($result) . "\n\n"; 2694 | } 2695 | 2696 | 2697 | protected function processDefListItems($list_str) { 2698 | # 2699 | # Process the contents of a single definition list, splitting it 2700 | # into individual term and definition list items. 2701 | # 2702 | $less_than_tab = $this->tab_width - 1; 2703 | 2704 | # trim trailing blank lines: 2705 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2706 | 2707 | # Process definition terms. 2708 | $list_str = preg_replace_callback('{ 2709 | (?>\A\n?|\n\n+) # leading line 2710 | ( # definition terms = $1 2711 | [ ]{0,'.$less_than_tab.'} # leading whitespace 2712 | (?!\:[ ]|[ ]) # negative lookahead for a definition 2713 | # mark (colon) or more whitespace. 2714 | (?> \S.* \n)+? # actual term (not whitespace). 2715 | ) 2716 | (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2717 | # with a definition mark. 2718 | }xm', 2719 | array(&$this, '_processDefListItems_callback_dt'), $list_str); 2720 | 2721 | # Process actual definitions. 2722 | $list_str = preg_replace_callback('{ 2723 | \n(\n+)? # leading line = $1 2724 | ( # marker space = $2 2725 | [ ]{0,'.$less_than_tab.'} # whitespace before colon 2726 | \:[ ]+ # definition mark (colon) 2727 | ) 2728 | ((?s:.+?)) # definition text = $3 2729 | (?= \n+ # stop at next definition mark, 2730 | (?: # next term or end of text 2731 | [ ]{0,'.$less_than_tab.'} \:[ ] | 2732 |
    | \z 2733 | ) 2734 | ) 2735 | }xm', 2736 | array(&$this, '_processDefListItems_callback_dd'), $list_str); 2737 | 2738 | return $list_str; 2739 | } 2740 | protected function _processDefListItems_callback_dt($matches) { 2741 | $terms = explode("\n", trim($matches[1])); 2742 | $text = ''; 2743 | foreach ($terms as $term) { 2744 | $term = $this->runSpanGamut(trim($term)); 2745 | $text .= "\n
    " . $term . "
    "; 2746 | } 2747 | return $text . "\n"; 2748 | } 2749 | protected function _processDefListItems_callback_dd($matches) { 2750 | $leading_line = $matches[1]; 2751 | $marker_space = $matches[2]; 2752 | $def = $matches[3]; 2753 | 2754 | if ($leading_line || preg_match('/\n{2,}/', $def)) { 2755 | # Replace marker with the appropriate whitespace indentation 2756 | $def = str_repeat(' ', strlen($marker_space)) . $def; 2757 | $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2758 | $def = "\n". $def ."\n"; 2759 | } 2760 | else { 2761 | $def = rtrim($def); 2762 | $def = $this->runSpanGamut($this->outdent($def)); 2763 | } 2764 | 2765 | return "\n
    " . $def . "
    \n"; 2766 | } 2767 | 2768 | 2769 | protected function doFencedCodeBlocks($text) { 2770 | # 2771 | # Adding the fenced code block syntax to regular Markdown: 2772 | # 2773 | # ~~~ 2774 | # Code block 2775 | # ~~~ 2776 | # 2777 | $less_than_tab = $this->tab_width; 2778 | 2779 | $text = preg_replace_callback('{ 2780 | (?:\n|\A) 2781 | # 1: Opening marker 2782 | ( 2783 | (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 2784 | ) 2785 | [ ]* 2786 | (?: 2787 | \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 2788 | | 2789 | '.$this->id_class_attr_catch_re.' # 3: Extra attributes 2790 | )? 2791 | [ ]* \n # Whitespace and newline following marker. 2792 | 2793 | # 4: Content 2794 | ( 2795 | (?> 2796 | (?!\1 [ ]* \n) # Not a closing marker. 2797 | .*\n+ 2798 | )+ 2799 | ) 2800 | 2801 | # Closing marker. 2802 | \1 [ ]* (?= \n ) 2803 | }xm', 2804 | array(&$this, '_doFencedCodeBlocks_callback'), $text); 2805 | 2806 | return $text; 2807 | } 2808 | protected function _doFencedCodeBlocks_callback($matches) { 2809 | $classname =& $matches[2]; 2810 | $attrs =& $matches[3]; 2811 | $codeblock = $matches[4]; 2812 | $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 2813 | $codeblock = preg_replace_callback('/^\n+/', 2814 | array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); 2815 | 2816 | if ($classname != "") { 2817 | if ($classname{0} == '.') 2818 | $classname = substr($classname, 1); 2819 | $attr_str = ' class="'.$this->code_class_prefix.$classname.'"'; 2820 | } else { 2821 | $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs); 2822 | } 2823 | $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 2824 | $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 2825 | $codeblock = "$codeblock
    "; 2826 | 2827 | return "\n\n".$this->hashBlock($codeblock)."\n\n"; 2828 | } 2829 | protected function _doFencedCodeBlocks_newlines($matches) { 2830 | return str_repeat("empty_element_suffix", 2831 | strlen($matches[0])); 2832 | } 2833 | 2834 | 2835 | # 2836 | # Redefining emphasis markers so that emphasis by underscore does not 2837 | # work in the middle of a word. 2838 | # 2839 | protected $em_relist = array( 2840 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? tags 2860 | # 2861 | # Strip leading and trailing lines: 2862 | $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2863 | 2864 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2865 | 2866 | # 2867 | # Wrap

    tags and unhashify HTML blocks 2868 | # 2869 | foreach ($grafs as $key => $value) { 2870 | $value = trim($this->runSpanGamut($value)); 2871 | 2872 | # Check if this should be enclosed in a paragraph. 2873 | # Clean tag hashes & block tag hashes are left alone. 2874 | $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 2875 | 2876 | if ($is_p) { 2877 | $value = "

    $value

    "; 2878 | } 2879 | $grafs[$key] = $value; 2880 | } 2881 | 2882 | # Join grafs in one text, then unhash HTML tags. 2883 | $text = implode("\n\n", $grafs); 2884 | 2885 | # Finish by removing any tag hashes still present in $text. 2886 | $text = $this->unhash($text); 2887 | 2888 | return $text; 2889 | } 2890 | 2891 | 2892 | ### Footnotes 2893 | 2894 | protected function stripFootnotes($text) { 2895 | # 2896 | # Strips link definitions from text, stores the URLs and titles in 2897 | # hash references. 2898 | # 2899 | $less_than_tab = $this->tab_width - 1; 2900 | 2901 | # Link defs are in the form: [^id]: url "optional title" 2902 | $text = preg_replace_callback('{ 2903 | ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 2904 | [ ]* 2905 | \n? # maybe *one* newline 2906 | ( # text = $2 (no blank lines allowed) 2907 | (?: 2908 | .+ # actual text 2909 | | 2910 | \n # newlines but 2911 | (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 2912 | (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 2913 | # by non-indented content 2914 | )* 2915 | ) 2916 | }xm', 2917 | array(&$this, '_stripFootnotes_callback'), 2918 | $text); 2919 | return $text; 2920 | } 2921 | protected function _stripFootnotes_callback($matches) { 2922 | $note_id = $this->fn_id_prefix . $matches[1]; 2923 | $this->footnotes[$note_id] = $this->outdent($matches[2]); 2924 | return ''; # String that will replace the block 2925 | } 2926 | 2927 | 2928 | protected function doFootnotes($text) { 2929 | # 2930 | # Replace footnote references in $text [^id] with a special text-token 2931 | # which will be replaced by the actual footnote marker in appendFootnotes. 2932 | # 2933 | if (!$this->in_anchor) { 2934 | $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 2935 | } 2936 | return $text; 2937 | } 2938 | 2939 | 2940 | protected function appendFootnotes($text) { 2941 | # 2942 | # Append footnote list to text. 2943 | # 2944 | $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2945 | array(&$this, '_appendFootnotes_callback'), $text); 2946 | 2947 | if (!empty($this->footnotes_ordered)) { 2948 | $text .= "\n\n"; 2949 | $text .= "
    \n"; 2950 | $text .= "empty_element_suffix ."\n"; 2951 | $text .= "
      \n\n"; 2952 | 2953 | $attr = ""; 2954 | if ($this->fn_backlink_class != "") { 2955 | $class = $this->fn_backlink_class; 2956 | $class = $this->encodeAttribute($class); 2957 | $attr .= " class=\"$class\""; 2958 | } 2959 | if ($this->fn_backlink_title != "") { 2960 | $title = $this->fn_backlink_title; 2961 | $title = $this->encodeAttribute($title); 2962 | $attr .= " title=\"$title\""; 2963 | } 2964 | $num = 0; 2965 | 2966 | while (!empty($this->footnotes_ordered)) { 2967 | $footnote = reset($this->footnotes_ordered); 2968 | $note_id = key($this->footnotes_ordered); 2969 | unset($this->footnotes_ordered[$note_id]); 2970 | $ref_count = $this->footnotes_ref_count[$note_id]; 2971 | unset($this->footnotes_ref_count[$note_id]); 2972 | unset($this->footnotes[$note_id]); 2973 | 2974 | $footnote .= "\n"; # Need to append newline before parsing. 2975 | $footnote = $this->runBlockGamut("$footnote\n"); 2976 | $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2977 | array(&$this, '_appendFootnotes_callback'), $footnote); 2978 | 2979 | $attr = str_replace("%%", ++$num, $attr); 2980 | $note_id = $this->encodeAttribute($note_id); 2981 | 2982 | # Prepare backlink, multiple backlinks if multiple references 2983 | $backlink = ""; 2984 | for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) { 2985 | $backlink .= " "; 2986 | } 2987 | # Add backlink to last paragraph; create new paragraph if needed. 2988 | if (preg_match('{

      $}', $footnote)) { 2989 | $footnote = substr($footnote, 0, -4) . " $backlink

      "; 2990 | } else { 2991 | $footnote .= "\n\n

      $backlink

      "; 2992 | } 2993 | 2994 | $text .= "
    1. \n"; 2995 | $text .= $footnote . "\n"; 2996 | $text .= "
    2. \n\n"; 2997 | } 2998 | 2999 | $text .= "
    \n"; 3000 | $text .= "
    "; 3001 | } 3002 | return $text; 3003 | } 3004 | protected function _appendFootnotes_callback($matches) { 3005 | $node_id = $this->fn_id_prefix . $matches[1]; 3006 | 3007 | # Create footnote marker only if it has a corresponding footnote *and* 3008 | # the footnote hasn't been used by another marker. 3009 | if (isset($this->footnotes[$node_id])) { 3010 | $num =& $this->footnotes_numbers[$node_id]; 3011 | if (!isset($num)) { 3012 | # Transfer footnote content to the ordered list and give it its 3013 | # number 3014 | $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 3015 | $this->footnotes_ref_count[$node_id] = 1; 3016 | $num = $this->footnote_counter++; 3017 | $ref_count_mark = ''; 3018 | } else { 3019 | $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 3020 | } 3021 | 3022 | $attr = ""; 3023 | if ($this->fn_link_class != "") { 3024 | $class = $this->fn_link_class; 3025 | $class = $this->encodeAttribute($class); 3026 | $attr .= " class=\"$class\""; 3027 | } 3028 | if ($this->fn_link_title != "") { 3029 | $title = $this->fn_link_title; 3030 | $title = $this->encodeAttribute($title); 3031 | $attr .= " title=\"$title\""; 3032 | } 3033 | 3034 | $attr = str_replace("%%", $num, $attr); 3035 | $node_id = $this->encodeAttribute($node_id); 3036 | 3037 | return 3038 | "". 3039 | "$num". 3040 | ""; 3041 | } 3042 | 3043 | return "[^".$matches[1]."]"; 3044 | } 3045 | 3046 | 3047 | ### Abbreviations ### 3048 | 3049 | protected function stripAbbreviations($text) { 3050 | # 3051 | # Strips abbreviations from text, stores titles in hash references. 3052 | # 3053 | $less_than_tab = $this->tab_width - 1; 3054 | 3055 | # Link defs are in the form: [id]*: url "optional title" 3056 | $text = preg_replace_callback('{ 3057 | ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 3058 | (.*) # text = $2 (no blank lines allowed) 3059 | }xm', 3060 | array(&$this, '_stripAbbreviations_callback'), 3061 | $text); 3062 | return $text; 3063 | } 3064 | protected function _stripAbbreviations_callback($matches) { 3065 | $abbr_word = $matches[1]; 3066 | $abbr_desc = $matches[2]; 3067 | if ($this->abbr_word_re) 3068 | $this->abbr_word_re .= '|'; 3069 | $this->abbr_word_re .= preg_quote($abbr_word); 3070 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 3071 | return ''; # String that will replace the block 3072 | } 3073 | 3074 | 3075 | protected function doAbbreviations($text) { 3076 | # 3077 | # Find defined abbreviations in text and wrap them in elements. 3078 | # 3079 | if ($this->abbr_word_re) { 3080 | // cannot use the /x modifier because abbr_word_re may 3081 | // contain significant spaces: 3082 | $text = preg_replace_callback('{'. 3083 | '(?abbr_word_re.')'. 3085 | '(?![\w\x1A])'. 3086 | '}', 3087 | array(&$this, '_doAbbreviations_callback'), $text); 3088 | } 3089 | return $text; 3090 | } 3091 | protected function _doAbbreviations_callback($matches) { 3092 | $abbr = $matches[0]; 3093 | if (isset($this->abbr_desciptions[$abbr])) { 3094 | $desc = $this->abbr_desciptions[$abbr]; 3095 | if (empty($desc)) { 3096 | return $this->hashPart("$abbr"); 3097 | } else { 3098 | $desc = $this->encodeAttribute($desc); 3099 | return $this->hashPart("$abbr"); 3100 | } 3101 | } else { 3102 | return $matches[0]; 3103 | } 3104 | } 3105 | 3106 | } 3107 | --------------------------------------------------------------------------------