├── index.html ├── settings.inc.php ├── import-instagram.php ├── import-facebook.php └── markdown.inc.php /index.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /settings.inc.php: -------------------------------------------------------------------------------- 1 | 'Mr. H', 'Chewie' => 'Mr. C'); 38 | 39 | // ID of WordPress user to import posts as 40 | $wp_post_author_id = 1; 41 | 42 | // Should comments and pings be open or closed on the imported posts?... 43 | $wp_comment_status = 'open'; // or 'closed' 44 | $wp_ping_status = 'open'; // or 'closed' 45 | 46 | // ############################### 47 | // NOTHING MORE TO CONFIGURE BELOW 48 | // ############################### 49 | 50 | date_default_timezone_set('UTC'); 51 | ini_set('display_errors', '1'); 52 | error_reporting(-1); 53 | 54 | define('DOC_ROOT', realpath(dirname(__FILE__))); 55 | 56 | $wp_web_root = rtrim($wp_web_root, '/'); 57 | $wp_file_root = rtrim($wp_file_root, '/'); 58 | 59 | $fb_backup_path = rtrim($fb_backup_path, '/'); 60 | $instagram_backup_path = rtrim($instagram_backup_path, '/'); 61 | 62 | $db = mysqli_connect($db_host, $db_user, $db_pass) or die('Could not connect to database.'); 63 | mysqli_select_db($db, $db_name) or die('Could not select database.'); 64 | mysqli_set_charset($db, 'utf8mb4') or die('Could not set database charset to utf8mb4.'); 65 | 66 | $upload_dir = "$wp_file_root/wp-content/uploads/"; 67 | if(!file_exists($upload_dir)) { 68 | if(mkdir($upload_dir, 0777, true) === false) { 69 | die("Could not create WordPress uploads directory at $upload_dir"); 70 | } 71 | } else { 72 | if(!is_writable($upload_dir)) { 73 | die("WordPress uploads directory is not writable at $upload_dir"); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /import-instagram.php: -------------------------------------------------------------------------------- 1 | photos, false); 10 | combine_dates($combined_items, $media->videos, true); 11 | 12 | foreach($combined_items as $taken_at => $arr) { 13 | $ts = strtotime($taken_at); 14 | $date = date('Y-m-d H:i:s', $ts); 15 | $year = date('Y', $ts); 16 | $month = date('m', $ts); 17 | $day = date('d', $ts); 18 | 19 | // Create the WP upload directory... 20 | $dest_dir = "$wp_file_root/wp-content/uploads/$year/$month/$day"; 21 | if(!file_exists($dest_dir)) { 22 | if(mkdir($dest_dir, 0777, true) === false) { 23 | die("Could not create directory: $dest_dir"); 24 | } 25 | } 26 | 27 | // For lack of a better option, we'll make the new WordPress post's title the FB post's date 28 | $post_title = date('F j, Y g:ia', $ts); 29 | 30 | // Clear out the post data on each loop through... 31 | $post_content = ''; 32 | 33 | foreach($arr as $item) { 34 | $data = $item['data']; 35 | 36 | $caption = @$data->caption; 37 | $location = @$data->location; 38 | 39 | // Figure out where everything's gonna go... 40 | $path = $instagram_backup_path . '/' . $data->path; 41 | $img_path = DOC_ROOT . "/$path"; 42 | $fn = basename($img_path); 43 | $dest_fn = "$dest_dir/$fn"; 44 | 45 | // And copy the media into WP uploads... 46 | if(copy($path, $dest_fn) === false) { 47 | die("Could not copy $path to $dest_fn"); 48 | } 49 | 50 | // Build the media item's URL 51 | $item_url = "$wp_web_root/wp-content/uploads/$year/$month/$day/$fn"; 52 | 53 | // Add the media item into the post... 54 | if($item['video']) { 55 | $post_content .= "
"; 56 | } else { 57 | $post_content .= "$location
"; 69 | } 70 | } 71 | 72 | // Insert the new post into WordPress... 73 | $wp_post_author_id = intval($wp_post_author_id); 74 | $post_content = mysqli_real_escape_string($db, $post_content); 75 | $post_title = mysqli_real_escape_string($db, $post_title); 76 | $wp_comment_status = mysqli_real_escape_string($db, $wp_comment_status); 77 | $wp_ping_status = mysqli_real_escape_string($db, $wp_ping_status); 78 | 79 | $sql = 'INSERT INTO wp_posts (post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_modified, post_modified_gmt, post_parent, post_type, comment_count, to_ping, pinged, post_content_filtered) '; 80 | $sql .= "VALUES ($wp_post_author_id, '$date', '$date', '$post_content', '$post_title', '', 'publish', '$wp_comment_status', '$wp_ping_status', '$date', '$date', 0, 'post', 0, '', '', '')"; 81 | mysqli_query($db, $sql) or die('MySQL error: ' . mysqli_error($db)); 82 | $post_id = mysqli_insert_id($db); 83 | 84 | $post_name = $post_id; 85 | $guid = "$wp_web_root/?p=$post_id"; 86 | 87 | $sql = "UPDATE wp_posts SET post_name = '$post_name', guid = '$guid' WHERE ID = $post_id"; 88 | mysqli_query($db, $sql) or die('MySQL error: ' . mysqli_error($db)); 89 | 90 | echo "Imported Instagram post from $date$place
"; 120 | } 121 | 122 | // Insert the new post into WordPress... 123 | $wp_post_author_id = intval($wp_post_author_id); 124 | $post_content = mysqli_real_escape_string($db, $post_content); 125 | $post_title = mysqli_real_escape_string($db, $post_title); 126 | $wp_comment_status = mysqli_real_escape_string($db, $wp_comment_status); 127 | $wp_ping_status = mysqli_real_escape_string($db, $wp_ping_status); 128 | 129 | $sql = 'INSERT INTO wp_posts (post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_modified, post_modified_gmt, post_parent, post_type, comment_count, to_ping, pinged, post_content_filtered) '; 130 | $sql .= "VALUES ($wp_post_author_id, '$date', '$date', '$post_content', '$post_title', '', 'publish', '$wp_comment_status', '$wp_ping_status', '$date', '$date', 0, 'post', 0, '', '', '')"; 131 | mysqli_query($db, $sql) or die('MySQL error: ' . mysqli_error($db)); 132 | $post_id = mysqli_insert_id($db); 133 | 134 | $post_name = $post_id; 135 | $guid = "$wp_web_root/?p=$post_id"; 136 | 137 | $sql = "UPDATE wp_posts SET post_name = '$post_name', guid = '$guid' WHERE ID = $post_id"; 138 | mysqli_query($db, $sql) or die('MySQL error: ' . mysqli_error($db)); 139 | 140 | echo "Imported Facebook post from $date
'.$text.'
'; 121 | $text = preg_replace('{\n{2,}}', "\n\n", $text); 122 | } 123 | return $text; 124 | } 125 | 126 | function mdwp_strip_p($t) { return preg_replace('{?p>}i', '', $t); } 127 | 128 | function mdwp_hide_tags($text) { 129 | global $mdwp_hidden_tags, $mdwp_placeholders; 130 | return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 131 | } 132 | function mdwp_show_tags($text) { 133 | global $mdwp_hidden_tags, $mdwp_placeholders; 134 | return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 135 | } 136 | } 137 | 138 | 139 | ### bBlog Plugin Info ### 140 | 141 | function identify_modifier_markdown() { 142 | return array( 143 | 'name' => 'markdown', 144 | 'type' => 'modifier', 145 | 'nicename' => 'Markdown', 146 | 'description' => 'A text-to-HTML conversion tool for web writers', 147 | 'authors' => 'Michel Fortin and John Gruber', 148 | 'licence' => 'BSD-like', 149 | 'version' => MARKDOWN_VERSION, 150 | 'help' => 'Markdown syntax allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by John Gruber. More...' 151 | ); 152 | } 153 | 154 | 155 | ### Smarty Modifier Interface ### 156 | 157 | function smarty_modifier_markdown($text) { 158 | return Markdown($text); 159 | } 160 | 161 | 162 | ### Textile Compatibility Mode ### 163 | 164 | # Rename this file to "classTextile.php" and it can replace Textile everywhere. 165 | 166 | if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 167 | # Try to include PHP SmartyPants. Should be in the same directory. 168 | @include_once 'smartypants.php'; 169 | # Fake Textile class. It calls Markdown instead. 170 | class Textile { 171 | function TextileThis($text, $lite='', $encode='') { 172 | if ($lite == '' && $encode == '') $text = Markdown($text); 173 | if (function_exists('SmartyPants')) $text = SmartyPants($text); 174 | return $text; 175 | } 176 | # Fake restricted version: restrictions are not supported for now. 177 | function TextileRestricted($text, $lite='', $noimage='') { 178 | return $this->TextileThis($text, $lite); 179 | } 180 | # Workaround to ensure compatibility with TextPattern 4.0.3. 181 | function blockLite($text) { return $text; } 182 | } 183 | } 184 | 185 | 186 | 187 | # 188 | # Markdown Parser Class 189 | # 190 | 191 | class Markdown_Parser { 192 | 193 | # Regex to match balanced [brackets]. 194 | # Needed to insert a maximum bracked depth while converting to PHP. 195 | var $nested_brackets_depth = 6; 196 | var $nested_brackets_re; 197 | 198 | var $nested_url_parenthesis_depth = 4; 199 | var $nested_url_parenthesis_re; 200 | 201 | # Table of hash values for escaped characters: 202 | var $escape_chars = '\`*_{}[]()>#+-.!'; 203 | var $escape_chars_re; 204 | 205 | # Change to ">" for HTML output. 206 | var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 207 | var $tab_width = MARKDOWN_TAB_WIDTH; 208 | 209 | # Change to `true` to disallow markup or entities. 210 | var $no_markup = false; 211 | var $no_entities = false; 212 | 213 | # Predefined urls and titles for reference links and images. 214 | var $predef_urls = array(); 215 | var $predef_titles = array(); 216 | 217 | 218 | function __construct() { 219 | # 220 | # Constructor function. Initialize appropriate member variables. 221 | # 222 | $this->_initDetab(); 223 | $this->prepareItalicsAndBold(); 224 | 225 | $this->nested_brackets_re = 226 | str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 227 | str_repeat('\])*', $this->nested_brackets_depth); 228 | 229 | $this->nested_url_parenthesis_re = 230 | str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 231 | str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 232 | 233 | $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 234 | 235 | # Sort document, block, and span gamut in ascendent priority order. 236 | asort($this->document_gamut); 237 | asort($this->block_gamut); 238 | asort($this->span_gamut); 239 | } 240 | 241 | 242 | # Internal hashes used during transformation. 243 | var $urls = array(); 244 | var $titles = array(); 245 | var $html_hashes = array(); 246 | 247 | # Status flag to avoid invalid nesting. 248 | var $in_anchor = false; 249 | 250 | 251 | function setup() { 252 | # 253 | # Called before the transformation process starts to setup parser 254 | # states. 255 | # 256 | # Clear global hashes. 257 | $this->urls = $this->predef_urls; 258 | $this->titles = $this->predef_titles; 259 | $this->html_hashes = array(); 260 | 261 | $in_anchor = false; 262 | } 263 | 264 | function teardown() { 265 | # 266 | # Called after the transformation process to clear any variable 267 | # which may be taking up memory unnecessarly. 268 | # 269 | $this->urls = array(); 270 | $this->titles = array(); 271 | $this->html_hashes = array(); 272 | } 273 | 274 | 275 | function transform($text) { 276 | # 277 | # Main function. Performs some preprocessing on the input text 278 | # and pass it through the document gamut. 279 | # 280 | $this->setup(); 281 | 282 | # Remove UTF-8 BOM and marker character in input, if present. 283 | $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 284 | 285 | # Standardize line endings: 286 | # DOS to Unix and Mac to Unix 287 | $text = preg_replace('{\r\n?}', "\n", $text); 288 | 289 | # Make sure $text ends with a couple of newlines: 290 | $text .= "\n\n"; 291 | 292 | # Convert all tabs to spaces. 293 | $text = $this->detab($text); 294 | 295 | # Turn block-level HTML blocks into hash entries 296 | $text = $this->hashHTMLBlocks($text); 297 | 298 | # Strip any lines consisting only of spaces and tabs. 299 | # This makes subsequent regexen easier to write, because we can 300 | # match consecutive blank lines with /\n+/ instead of something 301 | # contorted like /[ ]*\n+/ . 302 | $text = preg_replace('/^[ ]+$/m', '', $text); 303 | 304 | # Run document gamut methods. 305 | foreach ($this->document_gamut as $method => $priority) { 306 | $text = $this->$method($text); 307 | } 308 | 309 | $this->teardown(); 310 | 311 | return $text . "\n"; 312 | } 313 | 314 | var $document_gamut = array( 315 | # Strip link definitions, store in hashes. 316 | "stripLinkDefinitions" => 20, 317 | 318 | "runBasicBlockGamut" => 30, 319 | ); 320 | 321 | 322 | function stripLinkDefinitions($text) { 323 | # 324 | # Strips link definitions from text, stores the URLs and titles in 325 | # hash references. 326 | # 327 | $less_than_tab = $this->tab_width - 1; 328 | 329 | # Link defs are in the form: ^[id]: url "optional title" 330 | $text = preg_replace_callback('{ 331 | ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 332 | [ ]* 333 | \n? # maybe *one* newline 334 | [ ]* 335 | (?: 336 | <(.+?)> # url = $2 337 | | 338 | (\S+?) # url = $3 339 | ) 340 | [ ]* 341 | \n? # maybe one newline 342 | [ ]* 343 | (?: 344 | (?<=\s) # lookbehind for whitespace 345 | ["(] 346 | (.*?) # title = $4 347 | [")] 348 | [ ]* 349 | )? # title is optional 350 | (?:\n+|\Z) 351 | }xm', 352 | array(&$this, '_stripLinkDefinitions_callback'), 353 | $text); 354 | return $text; 355 | } 356 | function _stripLinkDefinitions_callback($matches) { 357 | $link_id = strtolower($matches[1]); 358 | $url = $matches[2] == '' ? $matches[3] : $matches[2]; 359 | $this->urls[$link_id] = $url; 360 | $this->titles[$link_id] =& $matches[4]; 361 | return ''; # String that will replace the block 362 | } 363 | 364 | 365 | function hashHTMLBlocks($text) { 366 | if ($this->no_markup) return $text; 367 | 368 | $less_than_tab = $this->tab_width - 1; 369 | 370 | # Hashify HTML blocks: 371 | # We only want to do this for block-level HTML tags, such as headers, 372 | # lists, and tables. That's because we still want to wrap
s around 373 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 374 | # phrase emphasis, and spans. The list of tags we're looking for is 375 | # hard-coded: 376 | # 377 | # * List "a" is made of tags which can be both inline or block-level. 378 | # These will be treated block-level when the start tag is alone on 379 | # its line, otherwise they're not matched here and will be taken as 380 | # inline later. 381 | # * List "b" is made of tags which are always block-level; 382 | # 383 | $block_tags_a_re = 'ins|del'; 384 | $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 385 | 'script|noscript|form|fieldset|iframe|math'; 386 | 387 | # Regular expression for the content of a block tag. 388 | $nested_tags_level = 4; 389 | $attr = ' 390 | (?> # optional tag attributes 391 | \s # starts with whitespace 392 | (?> 393 | [^>"/]+ # text outside quotes 394 | | 395 | /+(?!>) # slash not followed by ">" 396 | | 397 | "[^"]*" # text inside double quotes (tolerate ">") 398 | | 399 | \'[^\']*\' # text inside single quotes (tolerate ">") 400 | )* 401 | )? 402 | '; 403 | $content = 404 | str_repeat(' 405 | (?> 406 | [^<]+ # content without tag 407 | | 408 | <\2 # nested opening tag 409 | '.$attr.' # attributes 410 | (?> 411 | /> 412 | | 413 | >', $nested_tags_level). # end of opening tag 414 | '.*?'. # last level nested tag content 415 | str_repeat(' 416 | \2\s*> # closing nested tag 417 | ) 418 | | 419 | <(?!/\2\s*> # other tags with a different name 420 | ) 421 | )*', 422 | $nested_tags_level); 423 | $content2 = str_replace('\2', '\3', $content); 424 | 425 | # First, look for nested blocks, e.g.: 426 | #
` blocks.
1081 | #
1082 | $text = preg_replace_callback('{
1083 | (?:\n\n|\A\n?)
1084 | ( # $1 = the code block -- one or more lines, starting with a space/tab
1085 | (?>
1086 | [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
1087 | .*\n+
1088 | )+
1089 | )
1090 | ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1091 | }xm',
1092 | array(&$this, '_doCodeBlocks_callback'), $text);
1093 |
1094 | return $text;
1095 | }
1096 | function _doCodeBlocks_callback($matches) {
1097 | $codeblock = $matches[1];
1098 |
1099 | $codeblock = $this->outdent($codeblock);
1100 | $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1101 |
1102 | # trim leading newlines and trailing newlines
1103 | $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1104 |
1105 | $codeblock = "$codeblock\n
";
1106 | return "\n\n".$this->hashBlock($codeblock)."\n\n";
1107 | }
1108 |
1109 |
1110 | function makeCodeSpan($code) {
1111 | #
1112 | # Create a code span markup for $code. Called from handleSpanToken.
1113 | #
1114 | $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1115 | return $this->hashPart("$code");
1116 | }
1117 |
1118 |
1119 | var $em_relist = array(
1120 | '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(?em_relist as $em => $em_re) {
1142 | foreach ($this->strong_relist as $strong => $strong_re) {
1143 | # Construct list of allowed token expressions.
1144 | $token_relist = array();
1145 | if (isset($this->em_strong_relist["$em$strong"])) {
1146 | $token_relist[] = $this->em_strong_relist["$em$strong"];
1147 | }
1148 | $token_relist[] = $em_re;
1149 | $token_relist[] = $strong_re;
1150 |
1151 | # Construct master expression from list.
1152 | $token_re = '{('. implode('|', $token_relist) .')}';
1153 | $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1154 | }
1155 | }
1156 | }
1157 |
1158 | function doItalicsAndBold($text) {
1159 | $token_stack = array('');
1160 | $text_stack = array('');
1161 | $em = '';
1162 | $strong = '';
1163 | $tree_char_em = false;
1164 |
1165 | while (1) {
1166 | #
1167 | # Get prepared regular expression for seraching emphasis tokens
1168 | # in current context.
1169 | #
1170 | $token_re = $this->em_strong_prepared_relist["$em$strong"];
1171 |
1172 | #
1173 | # Each loop iteration search for the next emphasis token.
1174 | # Each token is then passed to handleSpanToken.
1175 | #
1176 | $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1177 | $text_stack[0] .= $parts[0];
1178 | $token =& $parts[1];
1179 | $text =& $parts[2];
1180 |
1181 | if (empty($token)) {
1182 | # Reached end of text span: empty stack without emitting.
1183 | # any more emphasis.
1184 | while ($token_stack[0]) {
1185 | $text_stack[1] .= array_shift($token_stack);
1186 | $text_stack[0] .= array_shift($text_stack);
1187 | }
1188 | break;
1189 | }
1190 |
1191 | $token_len = strlen($token);
1192 | if ($tree_char_em) {
1193 | # Reached closing marker while inside a three-char emphasis.
1194 | if ($token_len == 3) {
1195 | # Three-char closing marker, close em and strong.
1196 | array_shift($token_stack);
1197 | $span = array_shift($text_stack);
1198 | $span = $this->runSpanGamut($span);
1199 | $span = "$span";
1200 | $text_stack[0] .= $this->hashPart($span);
1201 | $em = '';
1202 | $strong = '';
1203 | } else {
1204 | # Other closing marker: close one em or strong and
1205 | # change current token state to match the other
1206 | $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1207 | $tag = $token_len == 2 ? "strong" : "em";
1208 | $span = $text_stack[0];
1209 | $span = $this->runSpanGamut($span);
1210 | $span = "<$tag>$span$tag>";
1211 | $text_stack[0] = $this->hashPart($span);
1212 | $$tag = ''; # $$tag stands for $em or $strong
1213 | }
1214 | $tree_char_em = false;
1215 | } else if ($token_len == 3) {
1216 | if ($em) {
1217 | # Reached closing marker for both em and strong.
1218 | # Closing strong marker:
1219 | for ($i = 0; $i < 2; ++$i) {
1220 | $shifted_token = array_shift($token_stack);
1221 | $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1222 | $span = array_shift($text_stack);
1223 | $span = $this->runSpanGamut($span);
1224 | $span = "<$tag>$span$tag>";
1225 | $text_stack[0] .= $this->hashPart($span);
1226 | $$tag = ''; # $$tag stands for $em or $strong
1227 | }
1228 | } else {
1229 | # Reached opening three-char emphasis marker. Push on token
1230 | # stack; will be handled by the special condition above.
1231 | $em = $token{0};
1232 | $strong = "$em$em";
1233 | array_unshift($token_stack, $token);
1234 | array_unshift($text_stack, '');
1235 | $tree_char_em = true;
1236 | }
1237 | } else if ($token_len == 2) {
1238 | if ($strong) {
1239 | # Unwind any dangling emphasis marker:
1240 | if (strlen($token_stack[0]) == 1) {
1241 | $text_stack[1] .= array_shift($token_stack);
1242 | $text_stack[0] .= array_shift($text_stack);
1243 | }
1244 | # Closing strong marker:
1245 | array_shift($token_stack);
1246 | $span = array_shift($text_stack);
1247 | $span = $this->runSpanGamut($span);
1248 | $span = "$span";
1249 | $text_stack[0] .= $this->hashPart($span);
1250 | $strong = '';
1251 | } else {
1252 | array_unshift($token_stack, $token);
1253 | array_unshift($text_stack, '');
1254 | $strong = $token;
1255 | }
1256 | } else {
1257 | # Here $token_len == 1
1258 | if ($em) {
1259 | if (strlen($token_stack[0]) == 1) {
1260 | # Closing emphasis marker:
1261 | array_shift($token_stack);
1262 | $span = array_shift($text_stack);
1263 | $span = $this->runSpanGamut($span);
1264 | $span = "$span";
1265 | $text_stack[0] .= $this->hashPart($span);
1266 | $em = '';
1267 | } else {
1268 | $text_stack[0] .= $token;
1269 | }
1270 | } else {
1271 | array_unshift($token_stack, $token);
1272 | array_unshift($text_stack, '');
1273 | $em = $token;
1274 | }
1275 | }
1276 | }
1277 | return $text_stack[0];
1278 | }
1279 |
1280 |
1281 | function doBlockQuotes($text) {
1282 | $text = preg_replace_callback('/
1283 | ( # Wrap whole match in $1
1284 | (?>
1285 | ^[ ]*>[ ]? # ">" at the start of a line
1286 | .+\n # rest of the first line
1287 | (.+\n)* # subsequent consecutive lines
1288 | \n* # blanks
1289 | )+
1290 | )
1291 | /xm',
1292 | array(&$this, '_doBlockQuotes_callback'), $text);
1293 |
1294 | return $text;
1295 | }
1296 | function _doBlockQuotes_callback($matches) {
1297 | $bq = $matches[1];
1298 | # trim one level of quoting - trim whitespace-only lines
1299 | $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1300 | $bq = $this->runBlockGamut($bq); # recurse
1301 |
1302 | $bq = preg_replace('/^/m', " ", $bq);
1303 | # These leading spaces cause problem with content,
1304 | # so we need to fix that:
1305 | $bq = preg_replace_callback('{(\s*.+?
)}sx',
1306 | array(&$this, '_doBlockQuotes_callback2'), $bq);
1307 |
1308 | return "\n". $this->hashBlock("\n$bq\n
")."\n\n";
1309 | }
1310 | function _doBlockQuotes_callback2($matches) {
1311 | $pre = $matches[1];
1312 | $pre = preg_replace('/^ /m', '', $pre);
1313 | return $pre;
1314 | }
1315 |
1316 |
1317 | function formParagraphs($text) {
1318 | #
1319 | # Params:
1320 | # $text - string to process with html tags
1321 | #
1322 | # Strip leading and trailing lines:
1323 | $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1324 |
1325 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1326 |
1327 | #
1328 | # Wrap
tags and unhashify HTML blocks
1329 | #
1330 | foreach ($grafs as $key => $value) {
1331 | if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1332 | # Is a paragraph.
1333 | $value = $this->runSpanGamut($value);
1334 | $value = preg_replace('/^([ ]*)/', "
", $value);
1335 | $value .= "
";
1336 | $grafs[$key] = $this->unhash($value);
1337 | }
1338 | else {
1339 | # Is a block.
1340 | # Modify elements of @grafs in-place...
1341 | $graf = $value;
1342 | $block = $this->html_hashes[$graf];
1343 | $graf = $block;
1344 | // if (preg_match('{
1345 | // \A
1346 | // ( # $1 = tag
1347 | // ]*
1349 | // \b
1350 | // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1351 | // 1
1352 | // \2
1353 | // [^>]*
1354 | // >
1355 | // )
1356 | // ( # $3 = contents
1357 | // .*
1358 | // )
1359 | // () # $4 = closing tag
1360 | // \z
1361 | // }xs', $block, $matches))
1362 | // {
1363 | // list(, $div_open, , $div_content, $div_close) = $matches;
1364 | //
1365 | // # We can't call Markdown(), because that resets the hash;
1366 | // # that initialization code should be pulled into its own sub, though.
1367 | // $div_content = $this->hashHTMLBlocks($div_content);
1368 | //
1369 | // # Run document gamut methods on the content.
1370 | // foreach ($this->document_gamut as $method => $priority) {
1371 | // $div_content = $this->$method($div_content);
1372 | // }
1373 | //
1374 | // $div_open = preg_replace(
1375 | // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1376 | //
1377 | // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1378 | // }
1379 | $grafs[$key] = $graf;
1380 | }
1381 | }
1382 |
1383 | return implode("\n\n", $grafs);
1384 | }
1385 |
1386 |
1387 | function encodeAttribute($text) {
1388 | #
1389 | # Encode text for a double-quoted HTML attribute. This function
1390 | # is *not* suitable for attributes enclosed in single quotes.
1391 | #
1392 | $text = $this->encodeAmpsAndAngles($text);
1393 | $text = str_replace('"', '"', $text);
1394 | return $text;
1395 | }
1396 |
1397 |
1398 | function encodeAmpsAndAngles($text) {
1399 | #
1400 | # Smart processing for ampersands and angle brackets that need to
1401 | # be encoded. Valid character entities are left alone unless the
1402 | # no-entities mode is set.
1403 | #
1404 | if ($this->no_entities) {
1405 | $text = str_replace('&', '&', $text);
1406 | } else {
1407 | # Ampersand-encoding based entirely on Nat Irons's Amputator
1408 | # MT plugin:
1409 | $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1410 | '&', $text);;
1411 | }
1412 | # Encode remaining <'s
1413 | $text = str_replace('<', '<', $text);
1414 |
1415 | return $text;
1416 | }
1417 |
1418 |
1419 | function doAutoLinks($text) {
1420 | $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1421 | array(&$this, '_doAutoLinks_url_callback'), $text);
1422 |
1423 | # Email addresses:
1424 | $text = preg_replace_callback('{
1425 | <
1426 | (?:mailto:)?
1427 | (
1428 | (?:
1429 | [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1430 | |
1431 | ".*?"
1432 | )
1433 | \@
1434 | (?:
1435 | [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1436 | |
1437 | \[[\d.a-fA-F:]+\] # IPv4 & IPv6
1438 | )
1439 | )
1440 | >
1441 | }xi',
1442 | array(&$this, '_doAutoLinks_email_callback'), $text);
1443 |
1444 | return $text;
1445 | }
1446 | function _doAutoLinks_url_callback($matches) {
1447 | $url = $this->encodeAttribute($matches[1]);
1448 | $link = "$url";
1449 | return $this->hashPart($link);
1450 | }
1451 | function _doAutoLinks_email_callback($matches) {
1452 | $address = $matches[1];
1453 | $link = $this->encodeEmailAddress($address);
1454 | return $this->hashPart($link);
1455 | }
1456 |
1457 |
1458 | function encodeEmailAddress($addr) {
1459 | #
1460 | # Input: an email address, e.g. "foo@example.com"
1461 | #
1462 | # Output: the email address as a mailto link, with each character
1463 | # of the address encoded as either a decimal or hex entity, in
1464 | # the hopes of foiling most address harvesting spam bots. E.g.:
1465 | #
1466 | #
1470 | #
1471 | # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1472 | # With some optimizations by Milian Wolff.
1473 | #
1474 | $addr = "mailto:" . $addr;
1475 | $chars = preg_split('/(? $char) {
1479 | $ord = ord($char);
1480 | # Ignore non-ascii chars.
1481 | if ($ord < 128) {
1482 | $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1483 | # roughly 10% raw, 45% hex, 45% dec
1484 | # '@' *must* be encoded. I insist.
1485 | if ($r > 90 && $char != '@') /* do nothing */;
1486 | else if ($r < 45) $chars[$key] = ''.dechex($ord).';';
1487 | else $chars[$key] = ''.$ord.';';
1488 | }
1489 | }
1490 |
1491 | $addr = implode('', $chars);
1492 | $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1493 | $addr = "$text";
1494 |
1495 | return $addr;
1496 | }
1497 |
1498 |
1499 | function parseSpan($str) {
1500 | #
1501 | # Take the string $str and parse it into tokens, hashing embeded HTML,
1502 | # escaped characters and handling code spans.
1503 | #
1504 | $output = '';
1505 |
1506 | $span_re = '{
1507 | (
1508 | \\\\'.$this->escape_chars_re.'
1509 | |
1510 | (?no_markup ? '' : '
1513 | |
1514 | # comment
1515 | |
1516 | <\?.*?\?> | <%.*?%> # processing instruction
1517 | |
1518 | <[/!$]?[-a-zA-Z0-9:_]+ # regular tags
1519 | (?>
1520 | \s
1521 | (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1522 | )?
1523 | >
1524 | ').'
1525 | )
1526 | }xs';
1527 |
1528 | while (1) {
1529 | #
1530 | # Each loop iteration seach for either the next tag, the next
1531 | # openning code span marker, or the next escaped character.
1532 | # Each token is then passed to handleSpanToken.
1533 | #
1534 | $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1535 |
1536 | # Create token from text preceding tag.
1537 | if ($parts[0] != "") {
1538 | $output .= $parts[0];
1539 | }
1540 |
1541 | # Check if we reach the end.
1542 | if (isset($parts[1])) {
1543 | $output .= $this->handleSpanToken($parts[1], $parts[2]);
1544 | $str = $parts[2];
1545 | }
1546 | else {
1547 | break;
1548 | }
1549 | }
1550 |
1551 | return $output;
1552 | }
1553 |
1554 |
1555 | function handleSpanToken($token, &$str) {
1556 | #
1557 | # Handle $token provided by parseSpan by determining its nature and
1558 | # returning the corresponding value that should replace it.
1559 | #
1560 | switch ($token{0}) {
1561 | case "\\":
1562 | return $this->hashPart("". ord($token{1}). ";");
1563 | case "`":
1564 | # Search for end marker in remaining text.
1565 | if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1566 | $str, $matches))
1567 | {
1568 | $str = $matches[2];
1569 | $codespan = $this->makeCodeSpan($matches[1]);
1570 | return $this->hashPart($codespan);
1571 | }
1572 | return $token; // return as text since no ending marker found.
1573 | default:
1574 | return $this->hashPart($token);
1575 | }
1576 | }
1577 |
1578 |
1579 | function outdent($text) {
1580 | #
1581 | # Remove one level of line-leading tabs or spaces
1582 | #
1583 | return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1584 | }
1585 |
1586 |
1587 | # String length function for detab. `_initDetab` will create a function to
1588 | # hanlde UTF-8 if the default function does not exist.
1589 | var $utf8_strlen = 'mb_strlen';
1590 |
1591 | function detab($text) {
1592 | #
1593 | # Replace tabs with the appropriate amount of space.
1594 | #
1595 | # For each line we separate the line in blocks delemited by
1596 | # tab characters. Then we reconstruct every line by adding the
1597 | # appropriate number of space between each blocks.
1598 |
1599 | $text = preg_replace_callback('/^.*\t.*$/m',
1600 | array(&$this, '_detab_callback'), $text);
1601 |
1602 | return $text;
1603 | }
1604 | function _detab_callback($matches) {
1605 | $line = $matches[0];
1606 | $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1607 |
1608 | # Split in blocks.
1609 | $blocks = explode("\t", $line);
1610 | # Add each blocks to the line.
1611 | $line = $blocks[0];
1612 | unset($blocks[0]); # Do not add first block twice.
1613 | foreach ($blocks as $block) {
1614 | # Calculate amount of space, insert spaces, insert block.
1615 | $amount = $this->tab_width -
1616 | $strlen($line, 'UTF-8') % $this->tab_width;
1617 | $line .= str_repeat(" ", $amount) . $block;
1618 | }
1619 | return $line;
1620 | }
1621 | function _initDetab() {
1622 | #
1623 | # Check for the availability of the function in the `utf8_strlen` property
1624 | # (initially `mb_strlen`). If the function is not available, create a
1625 | # function that will loosely count the number of UTF-8 characters with a
1626 | # regular expression.
1627 | #
1628 | if (function_exists($this->utf8_strlen)) return;
1629 | $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1630 | "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1631 | $text, $m);');
1632 | }
1633 |
1634 |
1635 | function unhash($text) {
1636 | #
1637 | # Swap back in all the tags hashed by _HashHTMLBlocks.
1638 | #
1639 | return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1640 | array(&$this, '_unhash_callback'), $text);
1641 | }
1642 | function _unhash_callback($matches) {
1643 | return $this->html_hashes[$matches[0]];
1644 | }
1645 |
1646 | }
1647 |
1648 | /*
1649 |
1650 | PHP Markdown
1651 | ============
1652 |
1653 | Description
1654 | -----------
1655 |
1656 | This is a PHP translation of the original Markdown formatter written in
1657 | Perl by John Gruber.
1658 |
1659 | Markdown is a text-to-HTML filter; it translates an easy-to-read /
1660 | easy-to-write structured text format into HTML. Markdown's text format
1661 | is most similar to that of plain text email, and supports features such
1662 | as headers, *emphasis*, code blocks, blockquotes, and links.
1663 |
1664 | Markdown's syntax is designed not as a generic markup language, but
1665 | specifically to serve as a front-end to (X)HTML. You can use span-level
1666 | HTML tags anywhere in a Markdown document, and you can use block level
1667 | HTML tags (like and as well).
1668 |
1669 | For more information about Markdown's syntax, see:
1670 |
1671 |
1672 |
1673 |
1674 | Bugs
1675 | ----
1676 |
1677 | To file bug reports please send email to:
1678 |
1679 |
1680 |
1681 | Please include with your report: (1) the example input; (2) the output you
1682 | expected; (3) the output Markdown actually produced.
1683 |
1684 |
1685 | Version History
1686 | ---------------
1687 |
1688 | See the readme file for detailed release notes for this version.
1689 |
1690 |
1691 | Copyright and License
1692 | ---------------------
1693 |
1694 | PHP Markdown
1695 | Copyright (c) 2004-2009 Michel Fortin
1696 |
1697 | All rights reserved.
1698 |
1699 | Based on Markdown
1700 | Copyright (c) 2003-2006 John Gruber
1701 |
1702 | All rights reserved.
1703 |
1704 | Redistribution and use in source and binary forms, with or without
1705 | modification, are permitted provided that the following conditions are
1706 | met:
1707 |
1708 | * Redistributions of source code must retain the above copyright notice,
1709 | this list of conditions and the following disclaimer.
1710 |
1711 | * Redistributions in binary form must reproduce the above copyright
1712 | notice, this list of conditions and the following disclaimer in the
1713 | documentation and/or other materials provided with the distribution.
1714 |
1715 | * Neither the name "Markdown" nor the names of its contributors may
1716 | be used to endorse or promote products derived from this software
1717 | without specific prior written permission.
1718 |
1719 | This software is provided by the copyright holders and contributors "as
1720 | is" and any express or implied warranties, including, but not limited
1721 | to, the implied warranties of merchantability and fitness for a
1722 | particular purpose are disclaimed. In no event shall the copyright owner
1723 | or contributors be liable for any direct, indirect, incidental, special,
1724 | exemplary, or consequential damages (including, but not limited to,
1725 | procurement of substitute goods or services; loss of use, data, or
1726 | profits; or business interruption) however caused and on any theory of
1727 | liability, whether in contract, strict liability, or tort (including
1728 | negligence or otherwise) arising in any way out of the use of this
1729 | software, even if advised of the possibility of such damage.
1730 |
1731 | */
--------------------------------------------------------------------------------