Suggested: " 262 | msgstr "" 263 | 264 | #: html-import-options.php:242 265 | msgid "If you have data tables, also include:" 266 | msgstr "" 267 | 268 | #: html-import-options.php:256 269 | msgid "Allowed attributes" 270 | msgstr "" 271 | 272 | #: html-import-options.php:260 273 | msgid "" 274 | "Enter attributes separated by commas. All attributes not listed here will be " 275 | "removed.
Suggested: href,src,alt,title
\n" 276 | "\t\t\t \t\t\tIf you have data tables, also include: summary," 277 | "rowspan,colspan,span" 278 | msgstr "" 279 | 280 | #: html-import-options.php:273 281 | msgid "Select title by" 282 | msgstr "" 283 | 284 | #: html-import-options.php:302 285 | msgid "" 286 | "Leave blank to use a tag without an attribute, or when the attributes don't " 287 | "matter, such as <title>" 288 | msgstr "" 289 | 290 | #: html-import-options.php:320 291 | msgid "The name of the editable region (e.g. 'Page Title')" 292 | msgstr "" 293 | 294 | #: html-import-options.php:324 295 | msgid "Phrase to remove from page title: " 296 | msgstr "" 297 | 298 | #: html-import-options.php:327 299 | msgid "" 300 | "Any common title phrase (such as the site name, which most themes will print " 301 | "automatically)" 302 | msgstr "" 303 | 304 | #: html-import-options.php:339 305 | msgid "Import files as" 306 | msgstr "" 307 | 308 | #: html-import-options.php:360 309 | msgid "Set status to" 310 | msgstr "" 311 | 312 | #: html-import-options.php:363 313 | msgid "publish" 314 | msgstr "" 315 | 316 | #: html-import-options.php:364 317 | msgid "draft" 318 | msgstr "" 319 | 320 | #: html-import-options.php:365 321 | msgid "private" 322 | msgstr "" 323 | 324 | #: html-import-options.php:366 325 | msgid "pending" 326 | msgstr "" 327 | 328 | #: html-import-options.php:371 329 | msgid "Set timestamps to" 330 | msgstr "" 331 | 332 | #: html-import-options.php:374 333 | msgid "now" 334 | msgstr "" 335 | 336 | #: html-import-options.php:376 337 | msgid "last time the file was modified" 338 | msgstr "" 339 | 340 | #: html-import-options.php:381 341 | msgid "Set author to" 342 | msgstr "" 343 | 344 | #: html-import-options.php:387 345 | msgid "Import pages as children of: " 346 | msgstr "" 347 | 348 | #: html-import-options.php:390 html-import-options.php:391 349 | msgid "None (top level)" 350 | msgstr "" 351 | 352 | #: html-import-options.php:402 353 | msgid "Taxonomies" 354 | msgstr "" 355 | 356 | #: html-import-options.php:410 357 | msgid "" 358 | "Assign categories, tags, and custom taxonomy terms to your imported posts:" 359 | msgstr "" 360 | 361 | #: html-import-options.php:452 362 | msgid "Regenerate .htaccess redirects" 363 | msgstr "" 364 | 365 | #: html-import-options.php:453 366 | msgid "" 367 | "If you changed your permalink structure after you " 368 | "imported files, you can regenerate the redirects." 369 | msgstr "" 370 | 371 | #: html-import-options.php:456 372 | msgid "Other helpful plugins" 373 | msgstr "" 374 | 375 | #: html-import-options.php:458 376 | msgid "" 377 | "Broken Link Checker finds broken links and references to " 378 | "missing media files. Since the importer does not handle links or media files " 379 | "other than images, you should run this to see what else needs to be copied " 380 | "or updated from your old site." 381 | msgstr "" 382 | 383 | #: html-import-options.php:459 384 | msgid "" 385 | "Search and Replace helps you fix many broken links at " 386 | "once, if you have many links to the same files or if there is a pattern " 387 | "(like <a href=\"../../files\">) to your broken links." 388 | msgstr "" 389 | 390 | #: html-import-options.php:460 391 | msgid "" 392 | "Redirection provides a nice admin interface for managing " 393 | "redirects. If you would rather not edit your .htaccess file, or " 394 | "if you just want to redirect one or two of your old pages, you can ignore " 395 | "the redirects generated by the importer. Instead, copy the post's old URL " 396 | "from the custom fields and paste it into Redirection's options." 397 | msgstr "" 398 | 399 | #: html-import-options.php:461 400 | msgid "" 401 | "Add from Server lets you import media files that are on " 402 | "your server but not part of the WordPress media library." 403 | msgstr "" 404 | 405 | #: html-import-options.php:462 406 | msgid "" 407 | "Add Linked Images to Gallery is helpful if you have " 408 | "imported data using other plugins and you would like to import linked " 409 | "images. However, it handles only images that are referenced with complete " 410 | "URLs; relative paths will not work." 411 | msgstr "" 412 | 413 | #: html-import-options.php:466 414 | msgid "Donate" 415 | msgstr "" 416 | 417 | #: html-import-options.php:468 418 | msgid "" 419 | "If this importer has saved you hours of copying and pasting, a donation toward future development would be much appreciated!" 421 | msgstr "" 422 | 423 | #: html-import-options.php:475 424 | msgid "Save settings" 425 | msgstr "" 426 | 427 | #: html-import-options.php:507 html-importer.php:677 428 | msgid "" 429 | "The beginning directory you entered is not an absolute path. Relative paths " 430 | "are not allowed here." 431 | msgstr "" 432 | 433 | #: html-import-options.php:554 434 | msgid "You did not enter an HTML content tag to import." 435 | msgstr "" 436 | 437 | #: html-import-options.php:556 438 | msgid "You did not enter a Dreamweaver content template region to import." 439 | msgstr "" 440 | 441 | #: html-import-options.php:558 442 | msgid "You did not enter an HTML title tag to import." 443 | msgstr "" 444 | 445 | #: html-import-options.php:560 446 | msgid "You did not enter a Dreamweaver title template region to import." 447 | msgstr "" 448 | 449 | #: html-import-options.php:596 450 | msgid "" 451 | "If you intend to set a permalink structure, you should do " 452 | "it \n" 453 | "\t\t\t\tbefore importing so the .htaccess redirects will be " 454 | "accurate." 455 | msgstr "" 456 | 457 | #: html-import-options.php:599 458 | msgid "Settings saved. %s Ready to import files?" 459 | msgstr "" 460 | 461 | #: html-importer.php:24 462 | msgid "HTML Importer" 463 | msgstr "" 464 | 465 | #: html-importer.php:37 466 | msgid "" 467 | "It looks like you have not yet visited the HTML Import " 468 | "options page. Please do so now! You need to specify which portions of " 469 | "your HTML files should be imported before you proceed." 470 | msgstr "" 471 | 472 | #: html-importer.php:40 473 | msgid "What are you importing today?" 474 | msgstr "" 475 | 476 | #: html-importer.php:45 477 | msgid "a directory of files" 478 | msgstr "" 479 | 480 | #: html-importer.php:48 481 | msgid "a single file" 482 | msgstr "" 483 | 484 | #: html-importer.php:52 485 | msgid "Choose an HTML file from your computer:" 486 | msgstr "" 487 | 488 | #: html-importer.php:58 489 | msgid "" 490 | "Your files will be imported from %s. Change " 491 | "directories." 492 | msgstr "" 493 | 494 | #: html-importer.php:65 495 | msgid "Submit" 496 | msgstr "" 497 | 498 | #: html-importer.php:82 html-importer.php:644 499 | msgid ".htaccess Redirects" 500 | msgstr "" 501 | 502 | #: html-importer.php:83 503 | msgid "" 504 | "Copy these lines into your .htaccess above the WordPress " 505 | "section." 506 | msgstr "" 507 | 508 | #: html-importer.php:85 509 | msgid "" 510 | "All done! You can change your permalink structure and regenerate the redirects again, or start " 512 | "over." 513 | msgstr "" 514 | 515 | #: html-importer.php:87 516 | msgid "" 517 | "No posts were found with the URL_before_HTML_Import custom field. Could not " 518 | "generate rewrite rules." 519 | msgstr "" 520 | 521 | #: html-importer.php:387 522 | msgid "the uploaded file" 523 | msgstr "" 524 | 525 | #: html-importer.php:393 526 | msgid "%s (%s) has already been imported" 527 | msgstr "" 528 | 529 | #: html-importer.php:413 530 | msgid "Could not import %s. You should copy its contents manually." 531 | msgstr "" 532 | 533 | #: html-importer.php:448 534 | msgid "Imported the file as %s." 535 | msgstr "" 536 | 537 | #: html-importer.php:471 538 | msgid "Sorry, this file type is not permitted for security reasons." 539 | msgstr "" 540 | 541 | #: html-importer.php:479 542 | msgid "" 543 | "Could not find the right path to %s (tried %s). It could not be imported. " 544 | "Please upload it manually." 545 | msgstr "" 546 | 547 | #: html-importer.php:562 548 | msgid "Found %d image in %s. Importing... " 549 | msgid_plural "Found %d images in %s. Importing... " 550 | msgstr[0] "" 551 | msgstr[1] "" 552 | 553 | #: html-importer.php:607 554 | msgid "done." 555 | msgstr "" 556 | 557 | #: html-importer.php:614 558 | msgid "Importing images..." 559 | msgstr "" 560 | 561 | #: html-importer.php:622 562 | msgid "All done. Go to the Media Library." 563 | msgstr "" 564 | 565 | #: html-importer.php:634 566 | msgid "ID" 567 | msgstr "" 568 | 569 | #: html-importer.php:635 570 | msgid "Old path" 571 | msgstr "" 572 | 573 | #: html-importer.php:636 574 | msgid "New path" 575 | msgstr "" 576 | 577 | #: html-importer.php:646 578 | msgid "" 579 | "If you need to change your permalink structure, you can " 580 | "regenerate the redirects (or do it later from the options screen under Tools)." 582 | msgstr "" 583 | 584 | #: html-importer.php:650 585 | msgid "All done. Have fun!" 586 | msgstr "" 587 | 588 | #: html-importer.php:666 589 | msgid "Importing HTML file..." 590 | msgstr "" 591 | 592 | #: html-importer.php:686 593 | msgid "Importing HTML files..." 594 | msgstr "" 595 | 596 | #: html-importer.php:693 597 | msgid "Your file upload didn't work. Try again?" 598 | msgstr "" 599 | 600 | #: html-importer.php:745 601 | msgid "HTML" 602 | msgstr "" 603 | 604 | #: html-importer.php:745 605 | msgid "" 606 | "Import the contents of HTML files as posts, pages, or any custom post type. " 607 | "Visit the options page first to select which portions of " 608 | "your documents should be imported." 609 | msgstr "" 610 | -------------------------------------------------------------------------------- /html-import-options.php: -------------------------------------------------------------------------------- 1 | ABSPATH.__( 'html-files-to-import', 'import-html-pages' ), 6 | 'old_url' => '', 7 | 'index_file' => 'index.html', 8 | 'file_extensions' => 'html,htm,shtml', 9 | 'skipdirs' => __( 'images,includes,Templates', 'import-html-pages' ), 10 | 'preserve_slugs' => 0, 11 | 'status' => 'publish', 12 | 'root_parent' => 0, 13 | 'type' => 'page', 14 | 'timestamp' => 'filemtime', 15 | 'import_content' => 0, 16 | 'content_region' => '', 17 | 'content_tag' => __( 'div', 'import-html-pages' ), 18 | 'content_tagatt' => __( 'id', 'import-html-pages' ), 19 | 'content_attval' => __( 'content', 'import-html-pages' ), 20 | 'clean_html' => 0, 21 | 'encode' => 1, 22 | 'allow_tags' => '
.htaccess above the WordPress section.', 'import-html-pages' ); ?>
and regenerate the redirects again, or start over.', 'import-html-pages' ), 'options-permalink.php', wp_nonce_url( 'admin.php?import=html&step=2', 'html_import_regenerate' ), 'admin.php?import=html' ) ?>
85 | parent_directory( $path ), '/' );
92 |
93 | // create array of parent directories, starting with the index file's parent and moving up to the root directory
94 | while ( $parentdir != $options['root_directory'] ) {
95 | $parentarr[] = $parentdir;
96 | $parentdir = rtrim( $this->parent_directory( $parentdir ), '/' );
97 | }
98 | // reverse the array so we start at the root -- this way the parents can be found when we search in $this->get_post
99 | $parentarr = array_reverse( $parentarr );
100 |
101 | // DEBUG
102 | // echo '
',
23 | 'allow_attributes' => 'href,alt,title,src',
24 | 'import_images' => 0,
25 | 'remove_srcset' => 0,
26 | 'import_documents' => 0,
27 | 'document_mimes' => 'rtf,doc,docx,xls,xlsx,csv,ppt,pps,pptx,ppsx,pdf,zip,wmv,avi,flv,mov,mpeg,mp3,m4a,wav',
28 | 'fix_links' => 0,
29 | 'import_title' => 0,
30 | 'title_region' => '',
31 | 'title_tag' => __( 'title', 'import-html-pages' ),
32 | 'title_tagatt' => '',
33 | 'title_attval' => '',
34 | 'remove_from_title' => '',
35 | 'title_inside' => 0,
36 | 'meta_desc' => 1,
37 | 'user' => 0,
38 | 'page_template' => 0,
39 | 'firstrun' => true,
40 | 'import_date' => 0,
41 | 'date_region' => '',
42 | 'date_tag' => __( 'div', 'import-html-pages' ),
43 | 'date_tagatt' => __( 'id', 'import-html-pages' ),
44 | 'date_attval' => __( 'date', 'import-html-pages' ),
45 | 'import_field' => array( '0' ),
46 | 'customfield_name' => array( '' ),
47 | 'customfield_region' => array( '' ),
48 | 'customfield_tag' => array( __( 'div', 'import-html-pages' ) ),
49 | 'customfield_tagatt' => array( __( 'class', 'import-html-pages' ) ),
50 | 'customfield_attval' => array( __( 'fieldclass', 'import-html-pages' ) ),
51 | 'customfield_html' => array( '' )
52 | );
53 | $options = get_option( 'html_import' );
54 | if ( !is_array( $options ) ) $options = array();
55 | return array_merge( $defaults, $options );
56 | }
57 |
58 | function html_import_options_page() { ?>
59 |
', $msg );
898 |
899 | if ( empty( $msg ) ) {
900 |
901 | $linkstructure = get_option( 'permalink_structure' );
902 | if ( empty( $linkstructure ) )
903 | $linkmsg = sprintf( __( 'If you intend to set a permalink structure, you should do it
904 | before importing so the .htaccess redirects will be accurate.', 'import-html-pages' ), 'options-permalink.php' );
905 |
906 | $msg = sprintf( __( 'Settings saved. %s Ready to import files?', 'import-html-pages' ),
907 | $linkmsg, 'admin.php?import=html' );
908 | // $msg .= ''. print_r( $input, false ) .'
';
909 | $msgtype = 'updated';
910 | }
911 |
912 | add_settings_error( 'html_import', 'html_import', $msg, $msgtype );
913 | return $input;
914 | }
915 |
916 | // custom file validator to accommodate Win32 paths starting with drive letter
917 | // based on WP's validate_file()
918 | function validate_import_file( $file, $allowed_files = '' ) {
919 | if ( false !== strpos( $file, '..' ) )
920 | return 1;
921 |
922 | if ( false !== strpos( $file, './' ) )
923 | return 1;
924 |
925 | if ( !empty ( $allowed_files ) && ( !in_array( $file, $allowed_files ) ) )
926 | return 3;
927 | /*
928 | if ( ':' == substr( $file, 1, 1 ) )
929 | return 2;
930 | */
931 | return 0;
932 | }
933 |
934 | // custom walker so we can change the name attribute of the category checkboxes ( until #16437 is fixed )
935 | // mostly a duplicate of Walker_Category_Checklist
936 | class HTML_Import_Walker_Category_Checklist extends Walker {
937 | var $tree_type = 'category';
938 | var $db_fields = array ( 'parent' => 'parent', 'id' => 'term_id' );
939 |
940 | function start_lvl( &$output, $depth = 0, $args = array() ) {
941 | $indent = str_repeat( "\t", $depth );
942 | $output .= "$indent\n";
943 | }
944 |
945 | function end_lvl( &$output, $depth = 0, $args = array() ) {
946 | $indent = str_repeat( "\t", $depth );
947 | $output .= "$indent
\n";
948 | }
949 |
950 | function start_el( &$output, $object, $depth = 0, $args = array(), $current_object_id = 0 ) {
951 | extract( $args );
952 | if ( empty( $taxonomy ) )
953 | $taxonomy = 'category';
954 |
955 | // This is the part we changed
956 | $name = 'html_import['.$taxonomy.']';
957 |
958 | $class = in_array( $object->term_id, $popular_cats ) ? ' class="popular-category"' : '';
959 | $output .= "\n'.__( 'HTML Importer', 'import-html-pages' ).'
';
25 | }
26 |
27 | function footer() {
28 | echo '';
29 | }
30 |
31 | function greet() {
32 | $options = get_option( 'html_import' );
33 | ?>
34 | change your permalink structure
'.print_r( $parentarr, true ).''; 103 | 104 | foreach ( $parentarr as $parentdir ) { 105 | $parentID = array_search( $parentdir, $this->filearr ); 106 | if ( $parentID === false ) 107 | $this->get_post( $parentdir, true ); 108 | } 109 | 110 | // now fix the parent ID of the original index file ( in $postid ) 111 | // it's the next to last element in the array we want. ( The last one is the index file. ) If this doesn't exist, we don't need to fix the parent. 112 | $grandparent = count( $parentarr )-2; 113 | if ( isset( $parentarr[$grandparent] ) ) { 114 | $parentdir = $parentarr[$grandparent]; 115 | $my_post['ID'] = $postid; 116 | $my_post['post_parent'] = array_search( $parentdir, $this->filearr ); 117 | 118 | //echo "\n
The parent of $postid should be ".$my_post['post_parent'].""; 119 | 120 | if ( !empty( $my_post['post_parent'] ) ) 121 | wp_update_post( $my_post ); 122 | } 123 | } 124 | 125 | function parent_directory( $path ) { 126 | $win = false; 127 | if ( strpos( $path, '\\' ) !== FALSE ) { 128 | $win = true; 129 | $path = str_replace( '\\', '/', $path ); 130 | } 131 | if ( substr( $path, strlen( $path ) - 1 ) != '/' ) $path .= '/'; 132 | $path = substr( $path, 0, strlen( $path ) - 1 ); 133 | $path = substr( $path, 0, strrpos( $path, '/' ) ) . '/'; 134 | if ( $win ) $path = str_replace( '/', '\\', $path ); 135 | return $path; 136 | } 137 | 138 | function fix_internal_links( $content, $id ) { 139 | // find all href attributes 140 | preg_match_all( '/]* href=[\'"]?([^>\'" ]+ )/i', $content, $matches ); 141 | for ( $i=0; $i
Old path: '.$oldpath; 166 | $oldfile = strrchr( $oldpath, '/' ); 167 | $linkpath = str_replace( $oldfile, '/'.$href, $oldpath ); 168 | $linkpath = $this->remove_dot_segments( $linkpath ); 169 | //echo ' Link path: '.$linkpath . '
'; 170 | } 171 | 172 | $linkpath = rtrim( $linkpath, '/' ); 173 | // DEBUG 174 | //echo 'Old link: '.$href.' Full path: '.$linkpath; 175 | 176 | // now replace the old URL with the new permalink 177 | $postkey = array_search( $linkpath, $this->filearr ); 178 | 179 | // DEBUG 180 | //echo ' Post ID:'.$postkey.'.
'; 181 | if ( !empty( $postkey ) ) { 182 | 183 | // DEBUG 184 | //echo 'I think '.$linkpath.' has moved to '.get_permalink( $postkey ).'.
'; 185 | $content = str_replace( $href, get_permalink( $postkey ), $content ); 186 | } 187 | } // if #/mailto 188 | } // foreach 189 | } // if empty 190 | return $content; 191 | } 192 | 193 | function remove_dot_segments( $path ) { 194 | $inSegs = preg_split( '!/!u', $path ); 195 | $outSegs = array(); 196 | foreach ( $inSegs as $seg ) 197 | { 198 | if ( empty( $seg ) || $seg == '.' ) 199 | continue; 200 | if ( $seg == '..' ) 201 | array_pop( $outSegs ); 202 | else 203 | array_push( $outSegs, $seg ); 204 | } 205 | $outPath = implode( '/', $outSegs ); 206 | if ( isset( $path[0] ) && $path[0] == '/' ) 207 | $outPath = '/' . $outPath; 208 | if ( $outPath != '/' && 209 | ( mb_strlen( $path )-1 ) == mb_strrpos( $path, '/', 'UTF-8' ) ) 210 | $outPath .= '/'; 211 | $outPath = str_replace( 'http:/', 'http://', $outPath ); 212 | $outPath = str_replace( 'https:/', 'https://', $outPath ); 213 | $outPath = str_replace( ':///', '://', $outPath ); 214 | return rawurldecode( $outPath ); 215 | } 216 | 217 | function clean_html( $string, $allowtags = NULL, $allowattributes = NULL ) { 218 | // from: http://us3.php.net/manual/en/function.strip-tags.php#91498 219 | $string = strip_tags( $string,$allowtags ); 220 | if ( !is_null( $allowattributes ) ) { 221 | if( !is_array( $allowattributes ) ) 222 | $allowattributes = explode( ",",$allowattributes ); 223 | if( is_array( $allowattributes ) ) 224 | $allowattributes = implode( " )( ? 0 ) 226 | $allowattributes = "( ?]*>/i",create_function( 228 | '$matches', 229 | 'return preg_replace( "/ [^ =]*'.$allowattributes.'=( \"[^\"]*\"|\'[^\']*\' )/i", "", $matches[0] );' 230 | ),$string ); 231 | } 232 | // reduce line breaks and remove empty tags 233 | $string = str_replace( array( "\n", "\r", "\t" ), '', $string ); 234 | $string = preg_replace( "/<[^\/>]*>( [\s]? )*<\/[^>]*>/", ' ', $string ); 235 | // get rid of remaining newlines; basic HTML cleanup 236 | $string = str_replace( ' ', ' ', $string ); 237 | $string = preg_replace_callback( '|<( /?[A-Z]+ )|', create_function( '$match', 'return "<" . strtolower( $match[1] );' ), $string ); 238 | $string = str_replace( '', '
', $string ); 239 | $string = str_replace( '
', '
', $string ); 240 | return $string; 241 | } 242 | 243 | function handle_accents() { 244 | // from: http://www.php.net/manual/en/domdocument.loadhtml.php#91513 245 | $content = $this->file; 246 | if ( !empty( $content ) && function_exists( 'mb_convert_encoding' ) ) { 247 | mb_detect_order( "ASCII,UTF-8,ISO-8859-1,windows-1252,iso-8859-15" ); 248 | if ( empty( $encod ) ) 249 | $encod = mb_detect_encoding( $content ); 250 | $headpos = mb_strpos( $content,'' ); 251 | if ( FALSE === $headpos ) 252 | $headpos= mb_strpos( $content,'' ); 253 | if ( FALSE !== $headpos ) { 254 | $headpos+=6; 255 | $content = mb_substr( $content,0,$headpos ) . '' .mb_substr( $content,$headpos ); 256 | } 257 | $content = mb_convert_encoding( $content, 'HTML-ENTITIES', $encod ); 258 | } 259 | return $content; 260 | } 261 | 262 | function get_single_file( $txt = false ) { 263 | $importfile = file( $this->file ); // Read the file into an array 264 | $importfile = implode( '', $importfile ); // squish it 265 | // this strips whitespace out of
. Need to find a better way to handle that. For now, leave it alone.
266 | //$this->file = str_replace( array ( "\r\n", "\r" ), "\n", $importfile );
267 | $this->file = $importfile;
268 | $this->get_post( '', false );
269 | }
270 |
271 | function get_files_from_directory( $rootdir ) {
272 | $options = get_option( 'html_import' );
273 | $dir_content = scandir( $rootdir );
274 | foreach( $dir_content as $key => $val ) {
275 | set_time_limit( 30 );
276 | $path = $rootdir.'/'.$val;
277 | if( is_file( $path ) && is_readable( $path ) ) {
278 | $filename_parts = pathinfo( $path );
279 | $ext = '';
280 | if ( isset( $filename_parts['extension'] ) )
281 | $ext = strtolower( $filename_parts['extension'] );
282 | // allowed extensions only, please
283 | if ( !empty( $ext ) && in_array( $ext, $this->allowed ) ) {
284 | if ( filesize( $path ) > 0 ) { // silently skip empty files
285 | // read the HTML file
286 | $contents = @fopen( $path ); // read entire file
287 | if ( empty( $contents ) )
288 | $contents = @file_get_contents( $path );
289 | if ( !empty( $contents ) ) { // silently skip files we can't open
290 | $this->file = $contents;
291 | $this->get_post( $path, false ); // import the post
292 | }
293 | }
294 | }
295 | }
296 |
297 | elseif( is_dir( $path ) && is_readable( $path ) ) {
298 | if( !in_array( $val, $this->skip ) ) {
299 | $createpage = array();
300 | // get list of files in this directory only ( checking children )
301 | $files = scandir( $path );
302 | $exts = array();
303 | foreach ( $files as $file ) {
304 | $ext = '';
305 | $filename_parts = pathinfo( $file );
306 | if ( isset( $filename_parts['extension'] ) )
307 | $ext = strtolower( $filename_parts['extension'] );
308 | $ext = trim( $ext,'.' ); // dratted double dots
309 | if ( !empty( $ext ) ) $exts[] .= $ext;
310 | }
311 |
312 | // allowed extensions only, please. If there are files of the proper type, we should create a placeholder page
313 | $createpage = @array_intersect( $exts, $this->allowed ); // suppress warnings about not being an array
314 |
315 | if ( !empty( $createpage ) && is_post_type_hierarchical( $options['type'] ) ) {
316 | $this->get_post( $path, true );
317 | }
318 |
319 | // handle the files in this directory -- recurse!
320 | $this->get_files_from_directory( $path );
321 | }
322 | }
323 | } // end foreach
324 | }
325 |
326 | function get_post( $path = '', $placeholder = false ) {
327 | // this gets the content AND imports the post because we have to build $this->filearr as we go so we can find the new post IDs of files' parent directories
328 | set_time_limit( 540 );
329 | $options = get_option( 'html_import' );
330 | $updatepost = false;
331 |
332 | if ( $placeholder ) {
333 | $title = trim( strrchr( $path,'/' ),'/' );
334 | $title = str_replace( '_', ' ', $title );
335 | $title = str_replace( '-', ' ', $title );
336 | $my_post['post_title'] = ucwords( $title );
337 |
338 | if ( isset( $options['preserve_slugs'] ) && '1' == $options['preserve_slugs'] ) {
339 | $filename = basename( $path );
340 | $my_post['post_name'] = substr( $filename,0,strrpos( $filename,'.' ) );
341 | }
342 |
343 | if ( $options['timestamp'] == 'filemtime' )
344 | $date = filemtime( $path );
345 | else $date = time();
346 | $my_post['post_date'] = date( "Y-m-d H:i:s", $date );
347 | $my_post['post_date_gmt'] = date( "Y-m-d H:i:s", $date );
348 |
349 | $my_post['post_type'] = $options['type'];
350 |
351 | $parentdir = rtrim( $this->parent_directory( $path ), '/' );
352 |
353 | $my_post['post_parent'] = array_search( $parentdir, $this->filearr );
354 | if ( $my_post['post_parent'] === false )
355 | $my_post['post_parent'] = $options['root_parent'];
356 |
357 | $my_post['post_content'] = '';
358 | $my_post['post_status'] = $options['status'];
359 | $my_post['post_author'] = $options['user'];
360 | }
361 | else {
362 | $doc = new DOMDocument();
363 | $doc->strictErrorChecking = false; // ignore invalid HTML, we hope
364 | $doc->preserveWhiteSpace = false;
365 | $doc->formatOutput = false; // speed this up
366 | if ( !empty( $options['encode'] ) ) { // we have to deal with character encoding BEFORE calling loadHTML() - eureka!
367 | $content = $this->handle_accents();
368 | @$doc->loadHTML( $content );
369 | }
370 | else
371 | @$doc->loadHTML( $this->file );
372 | $xml = @simplexml_import_dom( $doc );
373 | // bail out if we got no XML to work with
374 | if ( $xml === NULL )
375 | return;
376 | // avoid asXML errors when it encounters character range issues
377 | libxml_clear_errors();
378 | libxml_use_internal_errors( false );
379 |
380 | // start building the WP post object to insert
381 | $my_post = array();
382 |
383 | // title
384 | if ( $options['import_title'] == "region" ) {
385 | // appending strings unnecessarily so this plugin can be edited in Dreamweaver if needed
386 | $titlematch = '/<'.'!-- InstanceBeginEditable name="'.$options['title_region'].'" --'.'>( .* )<'.'!-- InstanceEndEditable --'.'>/isU';
387 | preg_match( $titlematch, $this->file, $titlematches );
388 | $my_post['post_title'] = strip_tags( trim( $titlematches[1] ) );
389 | }
390 | else if ( $options['import_title'] == "filename" ) {
391 | $path_split = explode( '/',$path );
392 | $file_name = trim( end( $path_split ) );
393 | $file_name = preg_replace( '/\.[^.]*$/', '', $file_name ); // remove extension
394 | $parent_directory = trim( prev( $path_split ) );
395 |
396 | if( basename( $path ) == $options['index_file'] ) {
397 | $title = $parent_directory;
398 | } else {
399 | $title = $file_name;
400 | }
401 | $title = str_replace( '_', ' ', $title );
402 | $title = str_replace( '-', ' ', $title );
403 | $my_post['post_title'] = ucwords( $title );
404 | }
405 | else { // it's a tag
406 | $titletag = $options['title_tag'];
407 | $titletagatt = $options['title_tagatt'];
408 | $titleattval = $options['title_attval'];
409 | $titlequery = '//' . $titletag;
410 | if ( !empty( $titletagatt ) )
411 | $titlequery .= '[@'.$titletagatt.'="'.$titleattval.'"]';
412 | $title = $xml->xpath( $titlequery );
413 | if ( isset( $title[0] ) && is_object( $title[0] ) )
414 | $title = $title[0]->asXML(); // asXML() preserves HTML in content
415 | else { // fallback
416 | $title = $xml->xpath( '//title' );
417 | if ( isset( $title[0] ) )
418 | $title = $title[0];
419 | if ( empty( $title ) )
420 | $title = '';
421 | else
422 | $title = ( string )$title;
423 | }
424 | // last resort: filename
425 | if ( empty( $title ) ) {
426 | $path_split = explode( '/',$path );
427 | $title = trim( end( $path_split ) );
428 | }
429 | // replace break tags with spaces before we strip tags, to avoid accidentally concatenating words
430 | $title = str_replace( '
', ' ', $title );
431 | $my_post['post_title'] = trim( strip_tags( $title ) );
432 | }
433 |
434 | $remove = $options['remove_from_title'];
435 | if ( !empty( $remove ) )
436 | $my_post['post_title'] = str_replace( $remove, '', $my_post['post_title'] );
437 |
438 | // DEBUG
439 | //echo ''.$my_post['post_title'].'
'; exit;
440 |
441 | // slug
442 | if ( isset( $options['preserve_slugs'] ) && '1' == $options['preserve_slugs'] ) {
443 | // there is no path when we're working with a single uploaded file instead of a directory
444 | if ( '' == trim( $path ) )
445 | $filename = $this->filename;
446 | else
447 | $filename = basename( $path );
448 | $my_post['post_name'] = substr( $filename,0,strrpos( $filename,'.' ) );
449 | }
450 |
451 | // post type
452 | $my_post['post_type'] = $options['type'];
453 |
454 | if ( is_post_type_hierarchical( $my_post['post_type'] ) ) {
455 | if ( '' == trim( $path ) )
456 | $my_post['post_parent'] = $options['root_parent'];
457 | else {
458 | $parentdir = rtrim( $this->parent_directory( $path ), '/' );
459 | $my_post['post_parent'] = array_search( $parentdir, $this->filearr );
460 | if ( $my_post['post_parent'] === false )
461 | $my_post['post_parent'] = $options['root_parent'];
462 | }
463 | }
464 |
465 | // date
466 | if ( $options['timestamp'] == 'filemtime' && !empty( $path ) ) {
467 | $date = filemtime( $path );
468 | $my_post['post_date'] = date( "Y-m-d H:i:s", $date );
469 | $my_post['post_date_gmt'] = date( "Y-m-d H:i:s", $date );
470 | }
471 | else if ( $options['timestamp'] == 'customfield' ) {
472 | if ( $options['import_date'] == "region" ) {
473 | // appending strings unnecessarily so this plugin can be edited in Dreamweaver if needed
474 | $datematch = '/<'.'!-- InstanceBeginEditable name="'.$options['date_region'].'" --'.'>( .* )<'.'!-- InstanceEndEditable --'.'>/isU';
475 | preg_match( $datematch, $this->file, $datematches );
476 | $date = $datematches[1];
477 | }
478 | else { // it's a tag
479 | $tag = $options['date_tag'];
480 | $tagatt = $options['date_tagatt'];
481 | $attval = $options['date_attval'];
482 | $xquery = '//'.$tag;
483 | if ( !empty( $tagatt ) )
484 | $xquery .= '[@'.$tagatt.'="'.$attval.'"]';
485 | $date = $xml->xpath( $xquery );
486 | if ( is_array( $date ) && isset( $date[0] ) && is_object( $date[0] ) ) {
487 | if ( isset( $date[0] ) && is_object( $date[0] ) )
488 | $stripdate = $date[0]->asXML(); // asXML() preserves HTML in content
489 | $date = strip_tags( $date[0] );
490 | $date = strtotime( $date );
491 | //echo $date; exit;
492 | }
493 | else { // fallback
494 | $date = time();
495 | }
496 |
497 | }
498 | }
499 | else {
500 | $date = time();
501 | }
502 | $my_post['post_date'] = date( "Y-m-d H:i:s", $date );
503 | $my_post['post_date_gmt'] = date( "Y-m-d H:i:s", $date );
504 |
505 | // content
506 | if ( $options['import_content'] == "region" ) {
507 | // appending strings unnecessarily so this plugin can be edited in Dreamweaver if needed
508 | $contentmatch = '/<'.'!-- InstanceBeginEditable name="'.$options['content_region'].'" --'.'>( .* )<'.'!-- InstanceEndEditable --'.'>/isU';
509 | preg_match( $contentmatch, $this->file, $contentmatches );
510 | $my_post['post_content'] = $contentmatches[1];
511 | }
512 | else if ( $options['import_content'] == "file" ) { // import entire file
513 | $my_post['post_content'] = $this->file;
514 | }
515 | else { // it's a tag
516 | $tag = $options['content_tag'];
517 | $tagatt = $options['content_tagatt'];
518 | $attval = $options['content_attval'];
519 | $xquery = '//'.$tag;
520 | if ( !empty( $tagatt ) )
521 | $xquery .= '[@'.$tagatt.'="'.$attval.'"]';
522 | $content = $xml->xpath( $xquery );
523 | if ( is_array( $content ) && isset( $content[0] ) && is_object( $content[0] ) ) {
524 | $my_post['post_content'] = $content[0]->asXML(); // asXML() preserves HTML in content
525 | }
526 | else { // fallback
527 | $content = $xml->xpath( '//body' );
528 | if ( is_array( $content ) && isset( $content[0] ) && is_object( $content[0] ) )
529 | $my_post['post_content'] = $content[0]->asXML();
530 | else
531 | $my_post['post_content'] = '';
532 | }
533 | }
534 |
535 | // $my_post['post_content'] = (string) $my_post['post_content'];
536 |
537 | if ( $options['title_inside'] )
538 | $my_post['post_content'] = str_replace( $title, '', $my_post['post_content'] );
539 |
540 | if ( !empty( $my_post['post_content'] ) ) {
541 | if ( !empty( $options['clean_html'] ) )
542 | $my_post['post_content'] = $this->clean_html( $my_post['post_content'], $options['allow_tags'], $options['allow_attributes'] );
543 | }
544 |
545 | // custom fields
546 | $customfields = array();
547 | foreach ( $options['customfield_name'] as $index => $fieldname ) {
548 | if ( !empty( $fieldname ) ) {
549 | if ( $options['import_field'][$index] == "region" ) {
550 | // appending strings unnecessarily so this plugin can be edited in Dreamweaver if needed
551 | $custommatch = '/<'.'!-- InstanceBeginEditable name="'.$options['customfield_region'][$index].'" --'.'>( .* )<'.'!-- InstanceEndEditable --'.'>/isU';
552 | preg_match( $custommatch, $this->file, $custommatches );
553 | if ( isset( $custommatches[1] ) )
554 | $customfields[$fieldname] = $custommatches[1];
555 | }
556 | else { // it's a tag
557 | $tag = $options['customfield_tag'][$index];
558 | $tagatt = $options['customfield_tagatt'][$index];
559 | $attval = $options['customfield_attval'][$index];
560 | $xquery = '//'.$tag;
561 | if ( !empty( $tagatt ) )
562 | $xquery .= '[@'.$tagatt.'="'.$attval.'"]';
563 | $content = $xml->xpath( $xquery );
564 |
565 | if ( is_array( $content ) && isset( $content[0] ) && is_object( $content[0] ) ) {
566 | $fieldcontent = $content[0]->asXML();
567 | if ( !empty( $options['customfield_html'][$index] ) && !empty( $options['clean_html'] ) ) {
568 | $fieldcontent = $content[0]->asXML();
569 | $fieldcontent = $this->clean_html( $fieldcontent, $options['allow_tags'], $options['allow_attributes'] );
570 | if ( !empty( $fieldcontent ) )
571 | $customfields[$fieldname] = $fieldcontent;
572 | }
573 | else {
574 | $fieldcontent = trim( strip_tags( $fieldcontent ) );
575 | if ( !empty( $fieldcontent ) )
576 | $customfields[$fieldname] = $fieldcontent;
577 | }
578 | }
579 | }
580 | }
581 | }
582 |
583 | // excerpt
584 | $excerpt = $options['meta_desc'];
585 | if ( !empty( $excerpt ) ) {
586 | $my_post['post_excerpt'] = $xml->xpath( '//meta[@name="description"]' );
587 | if ( isset( $my_post['post_excerpt'][0] ) )
588 | $my_post['post_excerpt'] = $my_post['post_excerpt'][0]['content'];
589 | if ( is_array( $my_post['post_excerpt'] ) )
590 | $my_post['post_excerpt'] = implode( '',$my_post['post_excerpt'] );
591 | $my_post['post_excerpt'] = ( string )$my_post['post_excerpt'];
592 | }
593 |
594 | // status
595 | $my_post['post_status'] = $options['status'];
596 |
597 | // author
598 | $my_post['post_author'] = $options['user'];
599 | }
600 |
601 | // if it's a single file, we can use a substitute for $path from here on
602 | if ( '' == trim( $path ) )
603 | $handle = __( "the uploaded file", 'import-html-pages' );
604 | else
605 | $handle = $path;
606 |
607 | // see if the post already exists
608 | // but don't bother printing this message if we're doing an index file; we know its parent already exists
609 | if ( $post_id = post_exists( $my_post['post_title'], $my_post['post_content'], $my_post['post_date'] ) && basename( $path ) != $options['index_file'] )
610 | $this->table[] = "-- " . sprintf( __( "%s ( %s ) has already been imported", 'html-import-pages' ), $my_post['post_title'], $handle ) . " ";
611 |
612 | // if we're doing hierarchicals and this is an index file of a subdirectory, instead of importing this as a separate page, update the content of the placeholder page we created for the directory
613 | $index_files = explode( ',',$options['index_file'] );
614 | if ( is_post_type_hierarchical( $options['type'] ) && dirname( $path ) != $options['root_directory'] && in_array( basename( $path ), $index_files ) ) {
615 | $post_id = array_search( dirname( $path ), $this->filearr );
616 | if ( $post_id !== 0 )
617 | $updatepost = true;
618 | }
619 |
620 | // find old path
621 | if ( '' !== trim( $path ) && !$updatepost ) {
622 | $url = esc_url( $options['old_url'] );
623 | $url = rtrim( $url, '/' );
624 | if ( !empty( $url ) )
625 | $old_path = str_replace( $options['root_directory'], $url, $path );
626 | else $old_path = $path;
627 | }
628 |
629 | // see if this file has been previously imported based on path
630 | $previous_import = get_posts(
631 | array (
632 | 'post_type' => $my_post['post_type'],
633 | 'meta_key' => 'URL_before_HTML_Import',
634 | 'meta_value' => $old_path,
635 | 'posts_per_page' => 1
636 | )
637 | );
638 |
639 | // if so, set to update instead of import
640 | if ( !is_wp_error( $previous_import ) && !empty( $previous_import )
641 | && $previous_import->post_title = $my_post['post_title'] ) {
642 | $post_id = $previous_import->ID;
643 | $updatepost = true;
644 | }
645 |
646 | // insert or update post
647 | if ( $updatepost ) {
648 | $my_post['ID'] = $post_id;
649 | wp_update_post( $my_post );
650 | }
651 | else
652 | $post_id = wp_insert_post( $my_post );
653 |
654 | // handle errors
655 | if ( is_wp_error( $post_id ) )
656 | $this->table[] = "-- " . $post_id /* error msg */ . " ";
657 | if ( !$post_id )
658 | $this->table[] = "-- " . sprintf( __( "Could not import %s. You should copy its contents manually.", 'html-import-pages' ), $handle ) . " ";
659 |
660 | // if no errors, handle custom fields
661 | if ( isset( $customfields ) ) {
662 | foreach ( $customfields as $fieldname => $fieldvalue ) {
663 | // allow user to set tags via custom field named 'post_tag'
664 | if ( $fieldname == 'post_tag' )
665 | $customfieldtags = $fieldvalue;
666 | else
667 | add_post_meta( $post_id, $fieldname, $fieldvalue, true );
668 | }
669 | }
670 |
671 | // ... and all the taxonomies...
672 | $taxonomies = get_taxonomies( array( 'public' => true ), 'objects', 'and' );
673 | foreach ( $taxonomies as $tax ) {
674 | if ( isset( $options[$tax->name] ) )
675 | wp_set_post_terms( $post_id, $options[$tax->name], $tax->name, false );
676 | }
677 | if ( isset( $customfieldtags ) )
678 | wp_set_post_terms( $post_id, $customfieldtags, 'post_tag', false );
679 |
680 | // ...and set the page template, if any
681 | if ( isset( $options['page_template'] ) && !empty( $options['page_template'] ) )
682 | add_post_meta( $post_id, '_wp_page_template', $options['page_template'], true );
683 |
684 | // add redirects from old to new path; store old path in custom field
685 | if ( '' !== trim( $old_path ) && !$updatepost ) {
686 | $this->redirects .= "Redirect\t".$old_path."\t".get_permalink( $post_id )."\t[R=301,NC,L]\n";
687 | add_post_meta( $post_id, 'URL_before_HTML_Import', $old_path, true );
688 | }
689 |
690 | // store path so we can check for parents later ( even if it's empty; need that info for image imports ).
691 | // Don't store the index file updates; they'll screw up the parent search, and they can use their parents' path anyway
692 | if ( !$updatepost )
693 | $this->filearr[$post_id] = $path;
694 | else { // index files will have an incomplete hierarchy if there were empty directories in their path
695 | $this->fix_hierarchy( $post_id, $path );
696 | }
697 |
698 | // create the results table row AFTER fixing hierarchy
699 | if ( '' !== trim($path ) ) {
700 | if ( empty( $my_post['post_title'] ) )
701 | $my_post['post_title'] = __( '( no title )', 'html-import' );
702 | $this->table[$post_id] = " ".$post_id." ".$path." ".get_permalink( $post_id ).'
703 | '.esc_html( $my_post['post_title'] )." ";
704 | }
705 | else {
706 | $this->single_result = sprintf( __( 'Imported the file as %s.', 'import-html-pages' ), ''.$my_post['post_title'].'' );
707 | }
708 | }
709 |
710 | //Handle an individual file import. Borrowed almost entirely from dd32's Add From Server plugin
711 | function handle_import_media_file( $file, $post_id = 0 ) {
712 |
713 | // Remove the query string from the file URL.
714 | $src_file = $file;
715 | $file = preg_replace( '|\?.+$|', '', $file );
716 |
717 | // see if the attachment already exists
718 | $id = array_search( $file, $this->filearr );
719 | if ( $id === false ) {
720 |
721 | set_time_limit( 120 );
722 | $post = get_post( $post_id );
723 | $time = $post->post_date_gmt;
724 |
725 | // A writable uploads dir will pass this test. Again, there's no point overriding this one.
726 | if ( ! ( ( $uploads = wp_upload_dir( $time ) ) && false === $uploads['error'] ) )
727 | return new WP_Error( 'upload_error', $uploads['error'] );
728 |
729 | $filename = wp_unique_filename( $uploads['path'], basename( $file ) );
730 |
731 | // copy the file to the uploads dir
732 | $new_file = $uploads['path'] . '/' . $filename;
733 | if ( false === @copy( $src_file, $new_file ) )
734 | return new WP_Error( 'upload_error', sprintf( __( 'Could not find the right path to %s ( tried %s ). It could not be imported. Please upload it manually.', 'html-import-pages' ), basename( $file ), $file ) );
735 | // DEBUG
736 | // else
737 | // printf( __( '
%s is being copied to the uploads directory as %s.', 'html-import-pages' ), $file, $new_file );
738 |
739 | // Set correct file permissions
740 | $stat = stat( dirname( $new_file ) );
741 | $perms = $stat['mode'] & 0000666;
742 | @chmod( $new_file, $perms );
743 | // Compute the URL
744 | $url = $uploads['url'] . '/' . $filename;
745 |
746 | //Apply upload filters
747 | $return = apply_filters( 'wp_handle_upload', array( 'file' => $new_file, 'url' => $url, 'type' => wp_check_filetype( $file, null ) ) );
748 | $new_file = $return['file'];
749 | $url = $return['url'];
750 | $type = $return['type'];
751 |
752 | $title = preg_replace( '!\.[^.]+$!', '', basename( $file ) );
753 | $content = '';
754 |
755 | // use image exif/iptc data for title and caption defaults if possible
756 | if ( $image_meta = @wp_read_image_metadata( $new_file ) ) {
757 | if ( '' != trim( $image_meta['title'] ) )
758 | $title = trim( $image_meta['title'] );
759 | if ( '' != trim( $image_meta['caption'] ) )
760 | $content = trim( $image_meta['caption'] );
761 | }
762 |
763 | if ( $time ) {
764 | $post_date_gmt = $time;
765 | $post_date = $time;
766 | }
767 | else {
768 | $post_date = current_time( 'mysql' );
769 | $post_date_gmt = current_time( 'mysql', 1 );
770 | }
771 |
772 | // Construct the attachment array
773 | $wp_filetype = wp_check_filetype( basename( $filename ), null );
774 | $attachment = array(
775 | 'post_mime_type' => $wp_filetype['type'],
776 | 'guid' => $url,
777 | 'post_parent' => $post_id,
778 | 'post_title' => $title,
779 | 'post_name' => $title,
780 | 'post_content' => $content,
781 | 'post_date' => $post_date,
782 | 'post_date_gmt' => $post_date_gmt
783 | );
784 |
785 | //Win32 fix:
786 | $new_file = str_replace( strtolower( str_replace( '\\', '/', $uploads['basedir'] ) ), $uploads['basedir'], $new_file );
787 |
788 |
789 | // Insert attachment
790 | $id = wp_insert_attachment( $attachment, $new_file, $post_id );
791 | if ( !is_wp_error( $id ) ) {
792 | $data = wp_generate_attachment_metadata( $id, $new_file );
793 | wp_update_attachment_metadata( $id, $data );
794 | $this->filearr[$id] = $file; // $file contains the original, absolute path to the file
795 | }
796 |
797 | } // if attachment already exists
798 | return $id;
799 | }
800 |
801 | // Remove responsive images srcsets.
802 | function remove_srcsets() {
803 |
804 | foreach ( $this->filearr as $id => $path ) {
805 |
806 | $post = get_post( $id );
807 | $content = preg_replace( '/(
]* )srcset=[\'"][^>\'"]+[\'"]/i', '$1', $post->post_content );
808 |
809 | wp_update_post( array(
810 | 'ID' => $id,
811 | 'post_content' => $content
812 | ) );
813 | }
814 | }
815 |
816 | // largely borrowed from the Add Linked Images to Gallery plugin, except we do a simple str_replace at the end
817 | function import_images( $id, $path ) {
818 | $post = get_post( $id );
819 | $options = get_option( 'html_import' );
820 | $result = array();
821 | $srcs = array();
822 | $content = $post->post_content;
823 | $title = $post->post_title;
824 | if ( empty( $title ) ) $title = __( '( no title )', 'html-import' );
825 | $update = false;
826 |
827 | // find all src attributes
828 | preg_match_all( '/
]* src=[\'"]?([^>\'" ]+)/i', $post->post_content, $matches );
829 | for ( $i=0; $i]* src=[\'"]?([^>\'" ]+)[\'"]/i', $custom, $matches );
836 | for ( $i=0; $i";
844 | printf( _n( 'Found %d image in %s. Importing... ', 'Found %d images in %s. Importing... ', $count, 'html-import-pages' ), $count, get_permalink( $post->ID ), $title );
845 | foreach ( $srcs as $src ) {
846 | // src="http://foo.com/images/foo"
847 |
848 | if ( preg_match( '/^https?:\/\//', $src ) ) {
849 | $imgpath = $src;
850 | }
851 | // src="/images/foo"
852 | elseif ( '/' == substr( $src, 0, 1 ) ) {
853 | $imgpath = $options['root_directory'] . $src;
854 | }
855 | // src="../../images/foo" or src="images/foo" or no $path
856 | else {
857 | if ( empty( $path ) )
858 | $imgpath = $options['root_directory']. '/' . $src;
859 | else
860 | $imgpath = ( is_file( $path ) ? dirname( $path ) : $path ) . '/' . $src;
861 | }
862 | // intersect base path and src, or just clean up junk
863 | $_imgpath = $this->remove_dot_segments( $imgpath );
864 |
865 | // load the image from $imgpath
866 | $imgid = $this->handle_import_media_file( $_imgpath, $id );
867 | if ( is_wp_error( $imgid ) )
868 | echo '';
869 | else {
870 | $imgpath = wp_get_attachment_url( $imgid );
871 |
872 | // replace paths in the content
873 | if ( !is_wp_error( $imgpath ) ) {
874 |
875 | $content = str_replace( $src, $imgpath, $content );
876 | $custom = str_replace( $src, $imgpath, $custom );
877 | $update = true;
878 | }
879 |
880 | } // is_wp_error else
881 |
882 | } // foreach
883 |
884 | // update the post only once
885 | if ( $update == true ) {
886 | $my_post = array();
887 | $my_post['ID'] = $id;
888 | $my_post['post_content'] = $content;
889 | wp_update_post( $my_post );
890 | }
891 |
892 | _e( 'done.', 'html-import-images' );
893 | echo '';
894 | flush();
895 | } // if empty
896 | }
897 |
898 | function import_documents( $id, $path ) {
899 | $post = get_post( $id );
900 | $options = get_option( 'html_import' );
901 | $result = $srcs = array();
902 | $content = $post->post_content;
903 | $title = $post->post_title;
904 | if ( empty( $title ) ) $title = __( '( no title )', 'html-import' );
905 | $update = false;
906 | $mimes = explode( ',', $options['document_mimes'] );
907 |
908 | // find all href attributes
909 | preg_match_all( '/]* href=[\'"]?([^>\'" ]+)/i', $content, $matches );
910 | for ( $i=0; $i";
917 | printf( _n( 'Found %d link in %s. Checking file types... ', 'Found %d links in %s. Checking file types... ', $count, 'html-import-pages' ), $count, get_permalink( $post->ID ), $title );
918 |
919 | //echo 'Looking in '.get_permalink( $id ).'
';
920 | $options = get_option( 'html_import' );
921 | $site = $options['old_url'];
922 | $rootdir = $options['root_directory'];
923 | foreach ( $hrefs as $href ) {
924 | $linkpath = '';
925 | if ( '#' != substr( $href, 0, 1 ) && 'mailto:' != substr( $href, 0, 7 ) ) { // skip anchors and mailtos
926 | if ( preg_match( '/^http:\/\//', $href ) || preg_match( '/^https:\/\//', $href ) ) {
927 | // is it a link to something on this server?
928 | if ( stripos( $site, $href ) !== false )
929 | // if it's an internal link, let's get a local file path
930 | $linkpath = str_replace( $site, $rootdir, $href );
931 | }
932 | // href="/images/foo"
933 | elseif ( '/' == substr( $href, 0, 1 ) ) {
934 | $linkpath = $rootdir . $href;
935 | $linkpath = $this->remove_dot_segments( $linkpath );
936 | }
937 | // href="../../images/foo" or href="images/foo"
938 | else {
939 | // we need to know where we are in the hierarchy
940 | $oldpath = get_post_meta( $id, 'URL_before_HTML_Import', true );
941 | $oldpath = str_replace( $site, $rootdir, $oldpath );
942 | // DEBUG
943 | //echo 'Old path: '.$oldpath;
944 | $oldfile = strrchr( $oldpath, '/' );
945 | $linkpath = str_replace( $oldfile, '/'.$href, $oldpath );
946 | $linkpath = $this->remove_dot_segments( $linkpath );
947 | // DEBUG
948 | //echo ' Link path: '.$linkpath . '
';
949 | }
950 |
951 | if ( !empty( $linkpath ) ) { // then we found an internal link
952 | $linkpath = rtrim( $linkpath, '/' );
953 | // DEBUG
954 | //echo 'Old link: '.$href.' Full path: '.$linkpath;
955 |
956 | $filename_parts = explode( ".",$linkpath );
957 | $ext = strtolower( $filename_parts[count( $filename_parts ) - 1] );
958 |
959 | if ( in_array( $ext, $mimes ) ) { // allowed upload types only
960 | echo '
Importing '.ltrim( strrchr( $linkpath, '/' ), '/' ).'... ';
961 | // load the file from $linkpath
962 | $fileid = $this->handle_import_media_file( $linkpath, $id );
963 | if ( is_wp_error( $fileid ) )
964 | echo '';
965 | else {
966 | $filepath = wp_get_attachment_url( $fileid );
967 |
968 | // replace paths in the content
969 | if ( !is_wp_error( $filepath ) ) {
970 | $content = str_replace( $href, $filepath, $content );
971 | $update = true;
972 | }
973 |
974 | } // is_wp_error $fileid
975 | } // if in array
976 |
977 |
978 | } // if empty linkpath
979 | } // if #/mailto
980 | } // foreach
981 |
982 | // update the post only once
983 | if ( $update == true ) {
984 | $my_post = array();
985 | $my_post['ID'] = $id;
986 | $my_post['post_content'] = $content;
987 | wp_update_post( $my_post );
988 | }
989 |
990 | _e( 'done.', 'html-import-images' );
991 | echo '
';
992 | flush();
993 | } // if empty $hrefs
994 | }
995 |
996 | function find_internal_links() {
997 | echo ''.__( 'Fixing relative links...', 'import-html-pages' ).'
';
998 | echo ''.__( 'The importer is searching your imported posts for links. This might take a few minutes.', 'import-html-pages' ).'
';
999 |
1000 | $fixedlinks = array();
1001 | foreach ( $this->filearr as $id => $path ) {
1002 | $new_post = array();
1003 | $post = get_post( $id );
1004 | $new_post['ID'] = $post->ID;
1005 | $new_post['post_content'] = $this->fix_internal_links( $post->post_content, $post->ID );
1006 |
1007 | if ( !empty( $new_post['post_content'] ) )
1008 | wp_update_post( $new_post );
1009 | $fixedlinks[] .= $post->ID;
1010 | }
1011 | if ( !empty( $fixedlinks ) ) { ?>
1012 |
1013 | '.print_r( $this->filearr, true ).' ';
1017 | }
1018 |
1019 | function find_images() {
1020 | echo ''.__( 'Importing images...', 'import-html-pages' ).'
'; 1021 | $results = ''; 1022 | foreach ( $this->filearr as $id => $path ) { 1023 | $results .= $this->import_images( $id, $path ); 1024 | } 1025 | if ( !empty( $results ) ) 1026 | echo $results; 1027 | echo ''; 1028 | printf( __( 'All done. Go to the Media Library.' ), 'media.php' ); 1029 | echo '
'; 1030 | // DEBUG 1031 | //echo ''.print_r( $this->filearr, true ).''; 1032 | } 1033 | 1034 | function find_documents() { 1035 | echo '
'.__( 'Importing media files...', 'import-html-pages' ).'
'; 1036 | echo ''.__( 'The importer is searching your imported posts for links to media files. This might take a few minutes.', 'import-html-pages' ).'
'; 1037 | 1038 | $results = ''; 1039 | foreach ( $this->filearr as $id => $path ) { 1040 | $results .= $this->import_documents( $id, $path ); 1041 | } 1042 | if ( !empty( $results ) ) 1043 | echo $results; 1044 | echo ''; 1045 | printf( __( 'All done. Go to the Media Library.' ), 'media.php' ); 1046 | echo '
'; 1047 | // DEBUG 1048 | //echo ''.print_r( $this->filearr, true ).''; 1049 | } 1050 | 1051 | function print_results( $posttype ) { 1052 | if ( !empty( $this->single_result ) ) 1053 | echo $this->single_result; 1054 | else { 1055 | ?> 1056 |
| 1059 | | 1060 | | 1061 | | 1062 | |
|---|
change your permalink structure
'.print_r( $this->filearr, true ).''; 1079 | } 1080 | 1081 | function import() { 1082 | $options = get_option( 'html_import' ); 1083 | 1084 | if ( $_POST['import_files'] == 'file' ) { 1085 | // preserve original file name so we can use it for slugs later ( maybe ) 1086 | $this->filename = $_FILES['import']['name']; 1087 | 1088 | // upload the file 1089 | $file = wp_import_handle_upload(); 1090 | if ( isset( $file['error'] ) ) { 1091 | echo $file['error']; 1092 | return; 1093 | } 1094 | 1095 | echo '