65 | { __(
66 | 'The type of HTML content to be converted to Gutenberg blocks is specified here.'
67 | ) }
68 |
69 |
74 |
79 |
80 |
81 |
87 |
88 |
89 |
90 | );
91 | }
92 | }
93 |
94 | export default Settings;
95 |
--------------------------------------------------------------------------------
/lib/content-patcher/patchers/class-block-decode-patcher.php:
--------------------------------------------------------------------------------
1 | decode_post_content( $block_content );
26 | }
27 |
28 | /**
29 | * Decode blocks in string from base64.
30 | *
31 | * @param string $html_content String to decode.
32 | *
33 | * @return string The string with all blocks decoded.
34 | */
35 | private function decode_post_content( string $html_content ): string {
36 | if ( ! str_contains( $html_content, BlockEncodePatcher::ENCODED_ANCHOR ) ) {
37 | return $html_content;
38 | }
39 | $blocks = parse_blocks( $html_content );
40 | $encoded_blocks = array_filter( $blocks, fn( $block ) => str_contains( $block['innerHTML'], BlockEncodePatcher::ENCODED_ANCHOR ) );
41 |
42 | if ( empty( $encoded_blocks ) ) {
43 | return $html_content;
44 | }
45 | foreach ( $encoded_blocks as $idx => $encoded ) {
46 | $decoded = $this->decode_block( $encoded['innerHTML'] );
47 | if ( ! empty( $decoded ) ) {
48 | $blocks[ $idx ] = $decoded;
49 | }
50 | }
51 |
52 | return serialize_blocks( $blocks );
53 | }
54 |
55 | /**
56 | * Decode a block from base64.
57 | *
58 | * @param string $encoded_block Block to decode.
59 | *
60 | * @return array The decoded block.
61 | */
62 | private function decode_block( string $encoded_block ): array {
63 | $pattern = '/\\' . BlockEncodePatcher::ENCODED_ANCHOR . '([A-Za-z0-9+\\/=]+)\]/';
64 | // See https://base64.guru/learn/base64-characters for chars in base64.
65 | preg_match( $pattern, $encoded_block, $matches );
66 | if ( empty( $matches[1] ) ) {
67 | return [];
68 | }
69 |
70 | $parsed = parse_blocks( base64_decode( $matches[1], true ) );
71 | if ( ! empty( $parsed[0]['blockName'] ) ) {
72 | return $parsed[0];
73 | }
74 |
75 | return [];
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/lib/content-patcher/patchers/class-block-encode-patcher.php:
--------------------------------------------------------------------------------
1 | encode_post_content( $html_content );
27 | }
28 |
29 | /**
30 | * Encode Gutenberg blocks in given string as base64.
31 | *
32 | * @param string $html The string content to encode.
33 | *
34 | * @return string The string with all blocks base64 encoded.
35 | */
36 | private function encode_post_content( $html ) {
37 | if ( ! str_contains( $html, '
25 | [pullquote author="Mary Filardo, president of the 21st Century School Fund" description="" style="new-pullquote"]“It’s just one more nail in the coffin of small towns that are already struggling. The county hospital closed, and the mom-and-pop shops are gone because Walmart opened. When you lose the schools, you lose the community.” [/pullquote]
26 |
27 | CONTENT;
28 | }
29 |
30 | /**
31 | * Get a patched pullquote shortcode.
32 | *
33 | * @return string Patched block content.
34 | */
35 | public static function get_patched_block_expected() {
36 | return <<
38 |
“It’s just one more nail in the coffin of small towns that are already struggling. The county hospital closed, and the mom-and-pop shops are gone because Walmart opened. When you lose the schools, you lose the community.”
Mary Filardo, president of the 21st Century School Fund
39 |
40 | CONTENT;
41 | }
42 |
43 | /**
44 | * Get an unpatched pullquote shortcode with no defined author.
45 | *
46 | * @return string Unpatched block content.
47 | */
48 | public static function get_unpatched_block_no_author() {
49 | return <<
51 | [pullquote author="" description="" style="new-pullquote"]The nation’s school districts spend about $46 billion less per year on facility upkeep than is needed to maintain “healthy and safe” learning environments, according to the 21st Century School Fund.[/pullquote]
52 |
53 | CONTENT;
54 | }
55 |
56 | /**
57 | * Get a patched pullquote shortcode with no defined author.
58 | *
59 | * @return string Patched block content.
60 | */
61 | public static function get_patched_block_no_author_expected() {
62 | return <<
64 |
The nation’s school districts spend about $46 billion less per year on facility upkeep than is needed to maintain “healthy and safe” learning environments, according to the 21st Century School Fund.
138 |
139 | { __(
140 | 'To convert another batch in parallel and increase conversion speed (depending on your computer performance, no more than 10 max parallel browser tabs are usually recommended), '
141 | ) }
142 | open an additional conversion tab.
143 |
144 |
174 | );
175 | }
176 | }
177 | }
178 |
179 | export default ContentConverter;
180 |
181 |
--------------------------------------------------------------------------------
/lib/content-patcher/elementManipulators/class-wpblockmanipulator.php:
--------------------------------------------------------------------------------
1 | # end of opening tag
26 | .*? # anything in the middle
27 | (\<\!-- # beginning of the closing tag
28 | \s # followed by a space
29 | / # one forward slash
30 | %1$s # element name/designation, should be substituted by using sprintf(), eg. sprintf( $this_pattern, \'wp:video\' );
31 | \s # followed by a space
32 | --\>) # end of block
33 | # "s" modifier also needed here to match accross multi-lines
34 | |xims';
35 |
36 | /**
37 | * Matches a self-closing block element -- which is one that does NOT have both an opening tag `` and a closing
38 | * tag ``, but rather has just one "self-closing tag", e.g. ``.
39 | */
40 | const PATTERN_WP_BLOCK_ELEMENT_SELFCLOSING = '|
41 | \<\!-- # beginning of the block element
42 | \s # followed by a space
43 | %s # element name/designation, should be substituted by using sprintf()
44 | .*? # anything in the middle
45 | \/--\> # ends with a self-closing tag
46 | |xims';
47 |
48 | /**
49 | * Searches and matches block elements in given source.
50 | * Runs the preg_match_all() with the PREG_OFFSET_CAPTURE option, and returns the $match.
51 | *
52 | * @param string $block_name Block name to search for (match).
53 | * @param string $subject Blocks content source in which to search for blocks.
54 | *
55 | * @return array|null| $matches from the preg_match_all() or null.
56 | */
57 | public function match_wp_block( $block_name, $subject ) {
58 |
59 | $pattern = sprintf( self::PATTERN_WP_BLOCK_ELEMENT, $block_name );
60 |
61 | $preg_match_all_result = preg_match_all( $pattern, $subject, $matches, PREG_OFFSET_CAPTURE );
62 | return ( false === $preg_match_all_result || 0 === $preg_match_all_result ) ? null : $matches;
63 | }
64 |
65 | /**
66 | * Searches and matches blocks in given source.
67 | *
68 | * Uses preg_match_all() with the PREG_OFFSET_CAPTURE option, and returns its $match.
69 | *
70 | * @param string $block_name Block name/designation to search for.
71 | * @param string $subject The Block source in which to search for the block occurences.
72 | *
73 | * @return array|null The `$matches` array as set by preg_match_all() with the PREG_OFFSET_CAPTURE option, or null if no matches found.
74 | */
75 | public function match_wp_block_selfclosing( $block_name, $subject ) {
76 |
77 | $pattern = sprintf( self::PATTERN_WP_BLOCK_ELEMENT_SELFCLOSING, $block_name );
78 | $preg_match_all_result = preg_match_all( $pattern, $subject, $matches, PREG_OFFSET_CAPTURE );
79 |
80 | return ( false === $preg_match_all_result || 0 === $preg_match_all_result ) ? null : $matches;
81 | }
82 |
83 | /**
84 | * Gets an attribute's value from the block element's header.
85 | *
86 | * @param string $block_element The block element, accepts a multiline string.
87 | * @param string $attribute_name Attribute name.
88 | *
89 | * @return string|null Attribute value.
90 | */
91 | public function get_attribute( $block_element, $attribute_name ) {
92 | $block_element_lines = explode( "\n", $block_element );
93 | $block_element_1st_line = $block_element_lines[0];
94 |
95 | $curly_open_pos = strpos( $block_element_1st_line, '{' );
96 | $curly_close_pos = strpos( $block_element_1st_line, '}' );
97 | if ( false === $curly_open_pos || false === $curly_close_pos ) {
98 | return null;
99 | }
100 |
101 | $attributes_json = substr( $block_element_1st_line, $curly_open_pos, $curly_close_pos - $curly_open_pos + 1 );
102 | $attributes = json_decode( $attributes_json, true );
103 |
104 | return $attributes[ $attribute_name ] ?? null;
105 | }
106 |
107 | /**
108 | * Adds an attribute to the block element's header.
109 | * It doesn't check whethet the attribute already exists, simply appends it to the block definition.
110 | *
111 | * @param string $block_element The block element, accepts a multiline string.
112 | * @param string $attribute_name Attribute name.
113 | * @param string $attribute_value Attribute value.
114 | *
115 | * @return string Updated block element.
116 | */
117 | public function add_attribute( $block_element, $attribute_name, $attribute_value ) {
118 | $block_element_lines = explode( "\n", $block_element );
119 | $block_element_1st_line = $block_element_lines[0];
120 |
121 | $pos_close_curly = strrpos( $block_element_1st_line, '}' );
122 | if ( false !== $pos_close_curly ) {
123 | // If some attributes already exist, append to those.
124 | $block_element_1st_line_patched = substr_replace( $block_element_1st_line, ',"' . $attribute_name . '":"' . $attribute_value . '"}', $pos_close_curly, $length = 1 );
125 | } else {
126 | // Otherwise, add the curly brackets first.
127 | $pos_close_comment = strrpos( $block_element_1st_line, '-->' );
128 | $block_element_1st_line_patched = substr_replace( $block_element_1st_line, '{"' . $attribute_name . '":"' . $attribute_value . '"} -->', $pos_close_comment, $length = 3 );
129 | }
130 |
131 | $block_element_lines[0] = $block_element_1st_line_patched;
132 | $block_element_patched = implode( "\n", $block_element_lines );
133 |
134 | return $block_element_patched;
135 | }
136 | }
137 |
--------------------------------------------------------------------------------
/tests/unit/content-patcher/patchers/test-video-patcher.php:
--------------------------------------------------------------------------------
1 | patcher = new VideoPatcher();
39 | $this->data_provider = new DataProviderVideoPatcher();
40 | }
41 |
42 | /**
43 | * The patcher should patch the lost video element.
44 | */
45 | public function test_should_patch_lost_video_element() {
46 | $html = $this->data_provider->get_lost_video_html();
47 | $blocks_before_patching = $this->data_provider->get_lost_video_blocks_before_patching();
48 | $expected = $this->data_provider->get_lost_video_blocks_patched_expected();
49 |
50 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
51 |
52 | $this->assertSame( $expected, $actual );
53 | }
54 |
55 | /**
56 | * Tests that blocks code will not get modified if running into an inconsistency btw. HTML and non-patched blocks code.
57 | */
58 | public function test_should_not_modify_source_if_html_is_html_code_inconsistent_with_blocks_code() {
59 | $html = $this->data_provider->get_inconsistent_sources_html();
60 | $blocks_before_patching = $this->data_provider->get_inconsistent_sources_before_patching();
61 | $expected = $this->data_provider->get_inconsistent_sources_blocks_patched_expected();
62 |
63 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
64 |
65 | $this->assertSame( $expected, $actual );
66 | }
67 |
68 | /**
69 | * Tests that blocks code will not get modified if HTML code is not supposed to be patched by this patcher.
70 | */
71 | public function test_should_not_modify_source_if_html_not_pertinent_to_this_patcher() {
72 | $html = $this->data_provider->get_html_is_non_pertinent_html();
73 | $blocks_before_patching = $this->data_provider->get_html_is_non_pertinent_before_patching();
74 | $expected = $this->data_provider->get_html_is_non_pertinent_blocks_patched_expected();
75 |
76 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
77 |
78 | $this->assertSame( $expected, $actual );
79 | }
80 |
81 | /**
82 | * Should patch multiple videos.
83 | */
84 | public function test_should_patch_multiple_video_elements() {
85 | $html = $this->data_provider->get_multiple_videos_html();
86 | $blocks_before_patching = $this->data_provider->get_multiple_video_blocks_before_patching();
87 | $expected = $this->data_provider->get_multiple_video_blocks_patched_expected();
88 |
89 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
90 |
91 | $this->assertSame( $expected, $actual );
92 | }
93 |
94 | /**
95 | * This is a case where not all videos have the src attribute. Patcher should not patch those that don't.
96 | */
97 | public function test_should_skip_patching_videos_which_dont_have_a_valid_src_attribute() {
98 | $html = $this->data_provider->get_some_skipped_videos_html();
99 | $blocks_before_patching = $this->data_provider->get_some_skipped_videos_blocks_before_patching();
100 | $expected = $this->data_provider->get_some_skipped_videos_blocks_patched_expected();
101 |
102 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
103 |
104 | $this->assertSame( $expected, $actual );
105 | }
106 |
107 | /**
108 | * This is a case where not all video elements are valid. Patcher should not patch those.
109 | */
110 | public function test_should_skip_patching_invalid_videos() {
111 | $html = $this->data_provider->get_some_invalid_videos_html();
112 | $blocks_before_patching = $this->data_provider->get_some_invalid_videos_blocks_before_patching();
113 | $expected = $this->data_provider->get_some_invalid_videos_blocks_patched_expected();
114 |
115 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
116 |
117 | $this->assertSame( $expected, $actual );
118 | }
119 |
120 | /**
121 | * This test will become important when Gutenberg fixes conversion of this particular case, so we need to make sure
122 | * we won't patch it twice.
123 | */
124 | public function test_should_skip_patching_videos_that_already_have_the_dir_attribute() {
125 | $html = $this->data_provider->get_some_videos_ok_html();
126 | $blocks_before_patching = $this->data_provider->get_some_videos_ok_blocks_before_patching();
127 | $expected = $this->data_provider->get_some_videos_ok_blocks_patched_expected();
128 |
129 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
130 |
131 | $this->assertSame( $expected, $actual );
132 | }
133 |
134 | /**
135 | * Runs a comprehensive example and checks for validity.
136 | */
137 | public function test_should_correctly_patch_a_comprehensive_video_conversion_example() {
138 | $html = $this->data_provider->get_comprehensive_html();
139 | $blocks_before_patching = $this->data_provider->get_comprehensive_blocks_before_patching();
140 | $expected = $this->data_provider->get_comprehensive_blocks_patched_expected();
141 |
142 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
143 |
144 | $this->assertSame( $expected, $actual );
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/tests/unit/content-patcher/patchers/test-audio-patcher.php:
--------------------------------------------------------------------------------
1 | patcher = new AudioPatcher();
39 | $this->data_provider = new DataProviderAudioPatcher();
40 | }
41 |
42 | /**
43 | * The patcher should patch the lost audio element.
44 | */
45 | public function test_should_patch_lost_audio_element() {
46 | $html = $this->data_provider->get_lost_audio_html();
47 | $blocks_before_patching = $this->data_provider->get_lost_audio_blocks_before_patching();
48 | $expected = $this->data_provider->get_lost_audio_blocks_patched_expected();
49 |
50 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
51 |
52 | $this->assertSame( $expected, $actual );
53 | }
54 |
55 | /**
56 | * Tests that blocks code will not get modified if running into an inconsistency btw. HTML and non-patched blocks code.
57 | */
58 | public function test_should_not_modify_source_if_html_is_html_code_inconsistent_with_blocks_code() {
59 | $html = $this->data_provider->get_inconsistent_sources_html();
60 | $blocks_before_patching = $this->data_provider->get_inconsistent_sources_before_patching();
61 | $expected = $this->data_provider->get_inconsistent_sources_blocks_patched_expected();
62 |
63 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
64 |
65 | $this->assertSame( $expected, $actual );
66 | }
67 |
68 | /**
69 | * Tests that blocks code will not get modified if HTML code is not supposed to be patched by this patcher.
70 | */
71 | public function test_should_not_modify_source_if_html_not_pertinent_to_this_patcher() {
72 | $html = $this->data_provider->get_html_is_non_pertinent_html();
73 | $blocks_before_patching = $this->data_provider->get_html_is_non_pertinent_before_patching();
74 | $expected = $this->data_provider->get_html_is_non_pertinent_blocks_patched_expected();
75 |
76 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
77 |
78 |
79 | $this->assertSame( $expected, $actual );
80 | }
81 |
82 | /**
83 | * Should patch multiple audios.
84 | */
85 | public function test_should_patch_multiple_audio_elements() {
86 | $html = $this->data_provider->get_multiple_audios_html();
87 | $blocks_before_patching = $this->data_provider->get_multiple_audio_blocks_before_patching();
88 | $expected = $this->data_provider->get_multiple_audio_blocks_patched_expected();
89 |
90 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
91 |
92 | $this->assertSame( $expected, $actual );
93 | }
94 |
95 | /**
96 | * This is a case where not all audios have the src attribute. Patcher should not patch those that don't.
97 | */
98 | public function test_should_skip_patching_audios_which_dont_have_a_valid_src_attribute() {
99 | $html = $this->data_provider->get_some_skipped_audios_html();
100 | $blocks_before_patching = $this->data_provider->get_some_skipped_audios_blocks_before_patching();
101 | $expected = $this->data_provider->get_some_skipped_audios_blocks_patched_expected();
102 |
103 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
104 |
105 | $this->assertSame( $expected, $actual );
106 | }
107 |
108 | /**
109 | * This is a case where not all audio elements are valid. Patcher should not patch those.
110 | */
111 | public function test_should_skip_patching_invalid_audios() {
112 | $html = $this->data_provider->get_some_invalid_audios_html();
113 | $blocks_before_patching = $this->data_provider->get_some_invalid_audios_blocks_before_patching();
114 | $expected = $this->data_provider->get_some_invalid_audios_blocks_patched_expected();
115 |
116 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
117 |
118 | $this->assertSame( $expected, $actual );
119 | }
120 |
121 | /**
122 | * This test will become important when Gutenberg fixes conversion of this particular case, so we need to make sure
123 | * we won't patch it twice.
124 | */
125 | public function test_should_skip_patching_audios_that_already_have_the_dir_attribute() {
126 | $html = $this->data_provider->get_some_audios_ok_html();
127 | $blocks_before_patching = $this->data_provider->get_some_audios_ok_blocks_before_patching();
128 | $expected = $this->data_provider->get_some_audios_ok_blocks_patched_expected();
129 |
130 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
131 |
132 | $this->assertSame( $expected, $actual );
133 | }
134 |
135 | /**
136 | * Runs a comprehensive example and checks for validity.
137 | */
138 | public function test_should_correctly_patch_a_comprehensive_audio_conversion_example() {
139 | $html = $this->data_provider->get_comprehensive_html();
140 | $blocks_before_patching = $this->data_provider->get_comprehensive_blocks_before_patching();
141 | $expected = $this->data_provider->get_comprehensive_blocks_patched_expected();
142 |
143 | $actual = $this->patcher->patch_blocks_contents( $blocks_before_patching, $html, 1 );
144 |
145 | $this->assertSame( $expected, $actual );
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/assets/src/conversion/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * WordPress dependencies.
3 | */
4 | import { Component, Fragment } from '@wordpress/element';
5 | import { __ } from '@wordpress/i18n';
6 | import {
7 | Button,
8 | Card,
9 | CardBody,
10 | CardFooter,
11 | Notice,
12 | TextControl
13 | } from '@wordpress/components';
14 |
15 | /**
16 | * Newspack dependencies.
17 | */
18 | import { NewspackIcon } from 'newspack-components';
19 |
20 | /**
21 | * Internal dependencies.
22 | */
23 | import {
24 | fetchConversionInfo,
25 | fetchPrepareConversion,
26 | fetchResetConversion,
27 | downloadListConvertedIds,
28 | downloadListUnsuccessfullyConvertedIds,
29 | } from '../utilities';
30 |
31 | class Conversion extends Component {
32 | constructor( props ) {
33 | super( props );
34 |
35 | this.state = {
36 | isConversionPrepared: false,
37 | unconvertedCount: '...',
38 | totalNumberOfBatches: '...',
39 | areThereSuccessfullyConvertedIds: false,
40 | areThereUnconvertedIds: false,
41 | minIdToProcess: -1,
42 | maxIdToProcess: -1,
43 | };
44 | }
45 |
46 | componentDidMount() {
47 | return fetchConversionInfo().then( response => {
48 | if ( response ) {
49 | const {
50 | isConversionPrepared,
51 | unconvertedCount,
52 | totalNumberOfBatches,
53 | areThereSuccessfullyConvertedIds,
54 | areThereUnconvertedIds,
55 | minIdToProcess,
56 | maxIdToProcess,
57 | } = response;
58 | this.setState( {
59 | isConversionPrepared,
60 | unconvertedCount,
61 | totalNumberOfBatches,
62 | areThereSuccessfullyConvertedIds,
63 | areThereUnconvertedIds,
64 | minIdToProcess,
65 | maxIdToProcess,
66 | } );
67 | }
68 | return new Promise( ( resolve, reject ) => resolve() );
69 | } );
70 | }
71 |
72 | handleOnClickRunConversion = () => {
73 | return fetchPrepareConversion().then( response => {
74 | if ( response && response.success ) {
75 | window.parent.location = '/wp-admin/post-new.php?newspack-content-converter';
76 | }
77 | } );
78 | };
79 |
80 | handleDownloadListConverted = () => {
81 | downloadListConvertedIds();
82 | };
83 |
84 | handleDownloadListUnsuccessfullyConverted = () => {
85 | downloadListUnsuccessfullyConvertedIds();
86 | };
87 |
88 | handleOnClickResetConversion = () => {
89 | return fetchResetConversion().then( response => {
90 | if ( response ) {
91 | location.reload();
92 | }
93 | } );
94 | };
95 |
96 | render() {
97 | const {
98 | isConversionPrepared,
99 | unconvertedCount,
100 | totalNumberOfBatches,
101 | areThereSuccessfullyConvertedIds,
102 | areThereUnconvertedIds,
103 | minIdToProcess,
104 | maxIdToProcess,
105 | } = this.state;
106 | if ( '1' == isConversionPrepared ) {
107 | return (
108 |
109 |
110 |
111 |
{ __( 'Content Converter / Converting...' ) }
112 |
113 |
114 |
115 |
116 | { __(
117 | 'Conversion of your content has already been started in a designated browser tab. In case it was terminated or closed unexpectedly, you can reset the conversion here and resume converting again.'
118 | ) }
119 |
120 |
121 | { __(
122 | 'Before attempting to see results on this page or to convert again, wait for the ongoing conversion to finish up.'
123 | ) }
124 |
141 |
142 |
143 |
144 | { __(
145 | 'Once started, the conversion should not be interrupted! Your browser tab needs to remain active until conversion is complete.'
146 | ) }
147 |
148 |
149 | { __( 'Conversion permanently modifies content so it is recommended to perform a full database backup before running it.' ) }
150 |
{ sprintf( __( 'Min post ID to process is set to %d' ), minIdToProcess) }
) }
165 | { ( maxIdToProcess > 0 ) && (
{ sprintf( __( 'Max post ID to process is set to %d' ), maxIdToProcess) }
) }
166 |
167 | ) }
168 | { ( areThereSuccessfullyConvertedIds || areThereUnconvertedIds )&& (
169 |
170 | { areThereSuccessfullyConvertedIds && (
171 | { __( 'Download IDs of all converted entries' ) }
172 | ) }
173 | { areThereSuccessfullyConvertedIds && areThereUnconvertedIds && (
174 |
175 | ) }
176 | { areThereUnconvertedIds && (
177 | { __( 'Download IDs of unconverted entries' ) }
178 | ) }
179 |
180 | ) }
181 |
182 |
185 |
186 |
187 |
188 | );
189 | }
190 | }
191 | }
192 |
193 | export default Conversion;
194 |
--------------------------------------------------------------------------------
/lib/content-patcher/patchers/class-audiopatcher.php:
--------------------------------------------------------------------------------
1 | square_brackets_element_manipulator = new SquareBracketsElementManipulator();
49 | $this->wp_block_manipulator = new WpBlockManipulator();
50 | $this->html_element_manipulator = new HtmlElementManipulator();
51 | }
52 |
53 | /**
54 | * See the \NewspackContentConverter\ContentPatcher\Patchers\PatcherInterface::patch_blocks_contents for description.
55 | *
56 | * @param string $source_blocks Block content as result of Gutenberg "conversion to blocks".
57 | * @param string $source_html HTML source, original content being converted.
58 | * @param int $post_id Post ID.
59 | *
60 | * @return string|false
61 | */
62 | public function patch_blocks_contents( $source_blocks, $source_html, $post_id ) {
63 |
64 | $matches_html = $this->square_brackets_element_manipulator->match_elements_with_closing_tags( 'audio', $source_html );
65 | if ( ! $matches_html ) {
66 | // TODO: DEBUG LOG 'no elements matched in HTML'.
67 | return $source_blocks;
68 | }
69 |
70 | $matches_blocks = $this->wp_block_manipulator->match_wp_block( 'wp:audio', $source_blocks );
71 | if ( is_null( $matches_blocks ) ) {
72 | return $source_blocks;
73 | }
74 |
75 | if ( ! $this->validate_html_and_block_matches( $matches_html[0], $matches_blocks[0] ) ) {
76 | // TODO: DEBUG LOG 'HTML and block matches do not correspond'.
77 | return $source_blocks;
78 | }
79 |
80 | // Applying array_reverse() on matched results, because when iterating over them, the patcher might apply several patches,
81 | // and the easiest way to preserve the positions of all the strings which are being replaced, is to just patch (replace)
82 | // from end to start.
83 | $matches_html[0] = array_reverse( $matches_html[0] );
84 | $matches_blocks[0] = array_reverse( $matches_blocks[0] );
85 |
86 | foreach ( $matches_html[0] as $key => $match_html ) {
87 | $html_element = $match_html[0];
88 | $position_html_element = $match_html[1];
89 | $blocks_element = $matches_blocks[0][ $key ][0];
90 | $position_blocks_element = $matches_blocks[0][ $key ][1];
91 |
92 | $patched_block_element = $this->patch_audio_src_attribute( $html_element, $blocks_element );
93 | if ( $patched_block_element ) {
94 | $source_blocks = substr_replace( $source_blocks, $patched_block_element, $position_blocks_element, strlen( $blocks_element ) );
95 | }
96 | }
97 |
98 | return $source_blocks;
99 | }
100 |
101 | /**
102 | * Patches the audio src attribute, by searching for it in the HTML element, then applying it to the block element.
103 | *
104 | * @param string $html_element HTML element.
105 | * @param string $block_element Block element.
106 | *
107 | * @return string|false Updated block element, or false.
108 | */
109 | private function patch_audio_src_attribute( $html_element, $block_element ) {
110 |
111 | // Extract the specific src attribute from HTML [audio][/audio] element.
112 | // Different possible names of the src attributes: https://en.support.wordpress.com/accepted-filetypes/#audio .
113 | $possible_src_attributes = [ 'mp3', 'm4a', 'ogg', 'wav' ];
114 |
115 | foreach ( $possible_src_attributes as $attribute_name ) {
116 | $attribute_value = $this->square_brackets_element_manipulator->get_attribute_value( $attribute_name, $html_element );
117 | if ( $attribute_value ) {
118 | break;
119 | }
120 | }
121 |
122 | if ( ! $attribute_value ) {
123 | // TODO: DEBUG LOG 'no src audio matched in HTML'.
124 | return false;
125 | }
126 |
127 | // The found src is to be patched as a new
25 |
AAA
26 |
The second paragraph doesn’t have any attributes, so it should be skipped by the patcher
27 |
BBB
28 |
The third paragraph has a different dir attribute, so it should also be patched
29 |
CCC
30 |
Some content in the end
31 | CONTENT;
32 | }
33 |
34 | /**
35 | * Blocks contents before patching, for a comprehensive example, containing multiple elements.
36 | *
37 | * @return string Gutenberg blocks contents before patching.
38 | */
39 | public static function get_comprehensive_blocks_before_patching() {
40 | return <<
42 |
Some content before
43 |
44 |
45 |
46 |
AAA
47 |
48 |
49 |
50 |
The second paragraph doesn't have any attributes, so it should be skipped by the patcher
51 |
52 |
53 |
54 |
BBB
55 |
56 |
57 |
58 |
The third paragraph has a different dir attribute, so it should also be patched
59 |
60 |
61 |
62 |
CCC
63 |
64 |
65 |
66 |
Some content in the end
67 |
68 | CONTENT;
69 | }
70 |
71 | /**
72 | * Expected blocks contents after patching, for a comprehensive example, containing multiple elements.
73 | *
74 | * @return string Expected Gutenberg blocks contents after patching.
75 | */
76 | public static function get_comprehensive_blocks_patched_expected() {
77 | return <<
79 |
Some content before
80 |
81 |
82 |
83 |
AAA
84 |
85 |
86 |
87 |
The second paragraph doesn't have any attributes, so it should be skipped by the patcher
88 |
89 |
90 |
91 |
BBB
92 |
93 |
94 |
95 |
The third paragraph has a different dir attribute, so it should also be patched
96 |
97 |
98 |
99 |
CCC
100 |
101 |
102 |
103 |
Some content in the end
104 |
105 | CONTENT;
106 | }
107 |
108 | /**
109 | * HTML source with multiple paragraphs.
110 | *
111 | * @return string HTML
112 | */
113 | public static function get_multiple_paragraphs_html() {
114 | return <<AAA
116 |
BBB
117 |
CCC
118 | CONTENT;
119 | }
120 |
121 | /**
122 | * Blocks contents before patching, for an example containing multiple paragraphs.
123 | *
124 | * @return string Gutenberg blocks contents before patching.
125 | */
126 | public static function get_multiple_paragraphs_blocks_before_patching() {
127 | return <<
129 |
AAA
130 |
131 |
132 |
133 |
BBB
134 |
135 |
136 |
137 |
CCC
138 |
139 | CONTENT;
140 | }
141 |
142 | /**
143 | * Expected blocks contents after patching, for an example containing multiple paragraphs.
144 | *
145 | * @return string Expected Gutenberg blocks contents after patching.
146 | */
147 | public static function get_multiple_paragraphs_blocks_patched_expected() {
148 | return <<
150 |
AAA
151 |
152 |
153 |
154 |
BBB
155 |
156 |
157 |
158 |
CCC
159 |
160 | CONTENT;
161 | }
162 |
163 | /**
164 | * HTML source with multiple paragraphs out of which some don't have the dir attribute.
165 | *
166 | * @return string HTML
167 | */
168 | public static function get_some_skipped_paragraphs_html() {
169 | return <<AAA
171 |
BBB
172 |
CCC
173 | CONTENT;
174 | }
175 |
176 | /**
177 | * Blocks contents before patching, for an example where some paragraphs don't have the dir attribute.
178 | *
179 | * @return string Gutenberg blocks contents before patching.
180 | */
181 | public static function get_some_skipped_paragraphs_blocks_before_patching() {
182 | return <<
184 |
AAA
185 |
186 |
187 |
188 |
BBB
189 |
190 |
191 |
192 |
CCC
193 |
194 | CONTENT;
195 | }
196 |
197 | /**
198 | * Expected blocks contents after patching, for an example where some paragraphs don't have the dir attribute.
199 | *
200 | * @return string Expected Gutenberg blocks contents after patching.
201 | */
202 | public static function get_some_skipped_paragraphs_blocks_patched_expected() {
203 | return <<
205 |
AAA
206 |
207 |
208 |
209 |
BBB
210 |
211 |
212 |
213 |
CCC
214 |
215 | CONTENT;
216 | }
217 |
218 | /**
219 | * HTML source with multiple paragraphs where some will not have their content lost.
220 | *
221 | * @return string HTML
222 | */
223 | public static function get_some_paragraphs_ok_html() {
224 | return <<AAA
226 |
BBB
227 |
CCC
228 | CONTENT;
229 | }
230 |
231 | /**
232 | * Blocks contents before patching, multiple paragraphs where some will not have their content lost.
233 | *
234 | * @return string Gutenberg blocks contents before patching.
235 | */
236 | public static function get_some_paragraphs_ok_blocks_before_patching() {
237 | return <<
239 |
AAA
240 |
241 |
242 |
243 |
BBB
244 |
245 |
246 |
247 |
CCC
248 |
249 | CONTENT;
250 | }
251 |
252 | /**
253 | * Expected blocks contents after patching, multiple paragraphs where some will not have their content lost.
254 | *
255 | * @return string Expected Gutenberg blocks contents after patching.
256 | */
257 | public static function get_some_paragraphs_ok_blocks_patched_expected() {
258 | return <<
260 |
AAA
261 |
262 |
263 |
264 |
BBB
265 |
266 |
267 |
268 |
CCC
269 |
270 | CONTENT;
271 | }
272 |
273 | /**
274 | * HTML source for inconsistent sources.
275 | *
276 | * @return string HTML
277 | */
278 | public static function get_inconsistent_sources_html() {
279 | return <<
281 | CONTENT;
282 | }
283 |
284 | /**
285 | * Blocks contents before patching for inconsistent sources.
286 | *
287 | * @return string Gutenberg blocks contents before patching.
288 | */
289 | public static function get_inconsistent_sources_before_patching() {
290 | return <<
292 |
Some inconsistent blocks content, not supposed to get modified
Some inconsistent blocks content, not supposed to get modified
306 |
307 | CONTENT;
308 | }
309 |
310 | /**
311 | * HTML source for non pertinent HTML.
312 | *
313 | * @return string HTML
314 | */
315 | public static function get_html_is_non_pertinent_html() {
316 | return <<This has nothing to do with the patcher
318 | CONTENT;
319 | }
320 |
321 | /**
322 | * Blocks contents before patching for non pertinent HTML.
323 | *
324 | * @return string Gutenberg blocks contents before patching.
325 | */
326 | public static function get_html_is_non_pertinent_before_patching() {
327 | return <<
329 |
This has nothing to do with the patcher
330 |
331 | CONTENT;
332 | }
333 |
334 | /**
335 | * Expected blocks contents after patching for non pertinent HTML.
336 | *
337 | * @return string Expected Gutenberg blocks contents after patching.
338 | */
339 | public static function get_html_is_non_pertinent_blocks_patched_expected() {
340 | return <<
342 |
This has nothing to do with the patcher
343 |
344 | CONTENT;
345 | }
346 | }
347 |
--------------------------------------------------------------------------------
/lib/content-patcher/elementManipulators/class-squarebracketselementmanipulator.php:
--------------------------------------------------------------------------------
1 | match_inner_text( $element_name, $subject );
102 |
103 | return isset( $inner_text_matches[1][0][0] ) ? $inner_text_matches[1][0][0] : null;
104 | }
105 |
106 | /**
107 | * Extracts a shortcode attribute.
108 | *
109 | * @param string $attribute_name Attribute name.
110 | * @param string $shortcode Shortcode element.
111 | *
112 | * @return string|null
113 | */
114 | public function get_shortcode_attribute( $attribute_name, $shortcode ) {
115 | $attributes_values = shortcode_parse_atts( $shortcode );
116 | if ( empty( $attributes_values ) || ! $attributes_values ) {
117 | return null;
118 | }
119 |
120 | // The WP's shortcode_parse_atts() explodes the attributes' values using spaces as delimiters, so let's combine the whole attribute values from the result.
121 | $previous_key = null;
122 | foreach ( $attributes_values as $key => $value ) {
123 | if ( $previous_key && is_numeric( $key ) ) {
124 | $attributes_values[ $previous_key ] .= ' ' . $value;
125 | unset( $attributes_values[ $key ] );
126 | continue;
127 | }
128 | $previous_key = $key;
129 | }
130 |
131 | return isset( $attributes_values[ $attribute_name ] ) ? $attributes_values[ $attribute_name ] : null;
132 | }
133 |
134 | /**
135 | * Gets the element's attribute value.
136 | *
137 | * @param string $attribute_name The attribute name.
138 | * @param string $element The element.
139 | *
140 | * @return string|false Attribute value, or false.
141 | */
142 | public function get_attribute_value( $attribute_name, $element ) {
143 | $match = $this->get_element_square_brackets_attribute_value_preg_match( $attribute_name, $element );
144 |
145 | return false !== $match ? $match[0] : false;
146 | }
147 |
148 | /**
149 | * Matches the inner text of a square brackets element.
150 | * Runs the preg_match_all() with the PREG_OFFSET_CAPTURE option, and returns the $match if found.
151 | *
152 | * @param string $element_name Name of the square bracket element, e.g. "caption", for the [caption]...[/caption] element.
153 | * @param string $subject Source in which to search for matches.
154 | *
155 | * @return array|null preg_match_all's $match, or null.
156 | */
157 | private function match_inner_text( $element_name, $subject ) {
158 | $pattern = sprintf( self::PATTERN_SQUARE_BRACKETS_ELEMENT_INNER_TEXT, $element_name );
159 | $preg_match_all_result = preg_match_all( $pattern, $subject, $matches, PREG_OFFSET_CAPTURE );
160 |
161 | return ( false === $preg_match_all_result || 0 === $preg_match_all_result ) ? null : $matches;
162 | }
163 |
164 | /**
165 | * Matches the attribute value, and returns that portion of the match.
166 | * Runs the preg_match() with the PREG_OFFSET_CAPTURE option, and returns the $match if found.
167 | *
168 | * @param string $attribute_name Attribute name.
169 | * @param string $html_element Attribute element.
170 | *
171 | * @return array|false preg_match's $match, or false.
172 | */
173 | private function get_element_square_brackets_attribute_value_preg_match( $attribute_name, $html_element ) {
174 | $pattern = sprintf( self::PATTERN_ELEMENT_ATTRIBUTE_VALUE, $attribute_name );
175 | $res = preg_match( $pattern, $html_element, $match, PREG_OFFSET_CAPTURE );
176 | if ( 1 === $res ) {
177 | // Check that the result is within bounds of element's tags, not within it's inner html.
178 | $pos_attribute_value = $match[1][1];
179 | $pos_1st_closing_square_bracket = strpos( $html_element, ']' );
180 | if ( $pos_attribute_value < $pos_1st_closing_square_bracket ) {
181 | return $match[1];
182 | }
183 | }
184 |
185 | return false;
186 | }
187 |
188 | /**
189 | * Matches all shortcodes.
190 | *
191 | * @param string $content Content.
192 | *
193 | * @return array `preg_match_all`'s $match array with all shortcode designations.
194 | */
195 | public function match_all_shortcode_designations( $content ) {
196 | $matches = [];
197 | $pattern_shortcode_designation = '|
198 | \[ # shortcode opening bracket
199 | ([^\s/\]]+) # match the shortcode designation string (which is anything except space, forward slash, and closing bracket)
200 | [^\]]+ # zero or more of any char except closing bracket
201 | \] # closing bracket
202 | |xim';
203 | preg_match_all( $pattern_shortcode_designation, $content, $matches );
204 |
205 | return $matches;
206 | }
207 |
208 | /**
209 | * Matches a specific shortcode, e.g `[video ...]`.
210 | *
211 | * @param string $shortcode_designation Shortcode designation, e.g. "video" in `[video ...]`.
212 | * @param string $content Content to look for matches in.
213 | *
214 | * @return array `preg_match_all`'s $match array with all shortcode designations.
215 | */
216 | public function match_shortcode_designations( $shortcode_designation, $content ) {
217 | $matches = [];
218 | $pattern_shortcode_designation = '|
219 | \[ # shortcode opening bracket
220 | %s # shortcode designation
221 | [^\]]+ # zero or more of any char except closing bracket
222 | \] # closing bracket
223 | |xim';
224 | $pattern_shortcode_designation = sprintf( $pattern_shortcode_designation, $shortcode_designation );
225 | preg_match_all( $pattern_shortcode_designation, $content, $matches );
226 |
227 | return $matches;
228 | }
229 | }
230 |
--------------------------------------------------------------------------------
/tests/fixtures/unit/content-patcher/patchers/class-dataproviderblockquotepatcher.php:
--------------------------------------------------------------------------------
1 | Some content before
25 |
AAA
26 |
The second blockquote doesn’t have any attributes, so it should be skipped by the patcher
27 |
BBB
28 |
The third blockquotes has a different data-lang attribute, so it should also be patched
29 |
CCC
30 |
Some content in the end
31 | CONTENT;
32 | }
33 |
34 | /**
35 | * Blocks contents before patching, for a comprehensive example, containing multiple elements.
36 | *
37 | * @return string Gutenberg blocks contents before patching.
38 | */
39 | public static function get_comprehensive_blocks_before_patching() {
40 | return <<
42 |
Some content before
43 |
44 |
45 |
46 |
AAA
47 |
48 |
49 |
50 |
The second blockquote doesn't have any attributes, so it should be skipped by the patcher
51 |
52 |
53 |
54 |
BBB
55 |
56 |
57 |
58 |
The third blockquotes has a different data-lang attribute, so it should also be patched
59 |
60 |
61 |
62 |
CCC
63 |
64 |
65 |
66 |
Some content in the end
67 |
68 | CONTENT;
69 | }
70 |
71 | /**
72 | * Expected blocks contents after patching, for a comprehensive example, containing multiple elements.
73 | *
74 | * @return string Expected Gutenberg blocks contents after patching.
75 | */
76 | public static function get_comprehensive_blocks_patched_expected() {
77 | return <<
79 |
Some content before
80 |
81 |
82 |
83 |
AAA
84 |
85 |
86 |
87 |
The second blockquote doesn't have any attributes, so it should be skipped by the patcher
88 |
89 |
90 |
91 |
BBB
92 |
93 |
94 |
95 |
The third blockquotes has a different data-lang attribute, so it should also be patched
96 |
97 |
98 |
99 |
CCC
100 |
101 |
102 |
103 |
Some content in the end
104 |
105 | CONTENT;
106 | }
107 |
108 | /**
109 | * HTML source with multiple blockquotes.
110 | *
111 | * @return string HTML
112 | */
113 | public static function get_multiple_blockquotes_html() {
114 | return <<AAA
116 |
BBB
117 |
CCC
118 | CONTENT;
119 | }
120 |
121 | /**
122 | * Blocks contents before patching, for an example containing multiple blockquote.
123 | *
124 | * @return string Gutenberg blocks contents before patching.
125 | */
126 | public static function get_multiple_blockquotes_blocks_before_patching() {
127 | return <<
129 |
AAA
130 |
131 |
132 |
133 |
BBB
134 |
135 |
136 |
137 |
CCC
138 |
139 | CONTENT;
140 | }
141 |
142 | /**
143 | * Expected blocks contents after patching, for an example containing multiple blockquotes.
144 | *
145 | * @return string Expected Gutenberg blocks contents after patching.
146 | */
147 | public static function get_multiple_blockquotes_blocks_patched_expected() {
148 | return <<
150 |
AAA
151 |
152 |
153 |
154 |
BBB
155 |
156 |
157 |
158 |
CCC
159 |
160 | CONTENT;
161 | }
162 |
163 | /**
164 | * HTML source with multiple blockquotes out of which some don't have the data-lang attribute.
165 | *
166 | * @return string HTML
167 | */
168 | public static function get_some_skipped_blockquotes_html() {
169 | return <<AAA
171 |
BBB
172 |
CCC
173 | CONTENT;
174 | }
175 |
176 | /**
177 | * Blocks contents before patching, for an example where some blockquotes don't have the height attribute.
178 | *
179 | * @return string Gutenberg blocks contents before patching.
180 | */
181 | public static function get_some_skipped_blockquotes_blocks_before_patching() {
182 | return <<
184 |
AAA
185 |
186 |
187 |
188 |
BBB
189 |
190 |
191 |
192 |
CCC
193 |
194 | CONTENT;
195 | }
196 |
197 | /**
198 | * Expected blocks contents after patching, for an example where some blockquotes don't have the data-lang attribute.
199 | *
200 | * @return string Expected Gutenberg blocks contents after patching.
201 | */
202 | public static function get_some_skipped_blockquotes_blocks_patched_expected() {
203 | return <<
205 |
AAA
206 |
207 |
208 |
209 |
BBB
210 |
211 |
212 |
213 |
CCC
214 |
215 | CONTENT;
216 | }
217 |
218 | /**
219 | * HTML source with multiple blockquotes where some will not have their content lost.
220 | *
221 | * @return string HTML
222 | */
223 | public static function get_some_blockquotes_ok_html() {
224 | return <<AAA
226 |
BBB
227 |
CCC
228 | CONTENT;
229 | }
230 |
231 | /**
232 | * Blocks contents before patching, multiple blockquotes where some will not have their content lost.
233 | *
234 | * @return string Gutenberg blocks contents before patching.
235 | */
236 | public static function get_some_blockquotes_ok_blocks_before_patching() {
237 | return <<
239 |
AAA
240 |
241 |
242 |
243 |
BBB
244 |
245 |
246 |
247 |
CCC
248 |
249 | CONTENT;
250 | }
251 |
252 | /**
253 | * Expected blocks contents after patching, multiple blockquotes where some will not have their content lost.
254 | *
255 | * @return string Expected Gutenberg blocks contents after patching.
256 | */
257 | public static function get_some_blockquotes_ok_blocks_patched_expected() {
258 | return <<
260 |
AAA
261 |
262 |
263 |
264 |
BBB
265 |
266 |
267 |
268 |
CCC
269 |
270 | CONTENT;
271 | }
272 |
273 | /**
274 | * HTML source for inconsistent sources.
275 | *
276 | * @return string HTML
277 | */
278 | public static function get_inconsistent_sources_html() {
279 | return <<AAA
281 | CONTENT;
282 | }
283 |
284 | /**
285 | * Blocks contents before patching for inconsistent sources.
286 | *
287 | * @return string Gutenberg blocks contents before patching.
288 | */
289 | public static function get_inconsistent_sources_before_patching() {
290 | return <<
292 |
Some inconsistent blocks content, not supposed to get modified
Some inconsistent blocks content, not supposed to get modified
306 |
307 | CONTENT;
308 | }
309 |
310 | /**
311 | * HTML source for non pertinent HTML.
312 | *
313 | * @return string HTML
314 | */
315 | public static function get_html_is_non_pertinent_html() {
316 | return <<This has nothing to do with the patcher
318 | CONTENT;
319 | }
320 |
321 | /**
322 | * Blocks contents before patching for non pertinent HTML.
323 | *
324 | * @return string Gutenberg blocks contents before patching.
325 | */
326 | public static function get_html_is_non_pertinent_before_patching() {
327 | return <<
329 |
This has nothing to do with the patcher
330 |
331 | CONTENT;
332 | }
333 |
334 | /**
335 | * Expected blocks contents after patching for non pertinent HTML.
336 | *
337 | * @return string Expected Gutenberg blocks contents after patching.
338 | */
339 | public static function get_html_is_non_pertinent_blocks_patched_expected() {
340 | return <<
342 |
This has nothing to do with the patcher
343 |
344 | CONTENT;
345 | }
346 | }
347 |
--------------------------------------------------------------------------------
/assets/src/utilities/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * WordPress dependencies.
3 | */
4 | import apiFetch from '@wordpress/api-fetch';
5 | import { createBlock, parse, rawHandler, serialize } from '@wordpress/blocks';
6 | import { dispatch, select, resolveSelect } from '@wordpress/data';
7 | import { store as coreStore } from '@wordpress/core-data';
8 |
9 | const NEWSPACK_CONVERTER_API_BASE_URL = '/newspack-content-converter';
10 |
11 | /**
12 | * Runs conversion of multiple Posts.
13 | *
14 | * @param string postIdsCsv CSV string of Post IDs.
15 | * @returns {Promise}
16 | */
17 | export function runMultiplePosts( postIds ) {
18 | let result = Promise.resolve();
19 | postIds.forEach( ( postId, key ) => {
20 | postId = parseInt( postId );
21 | result = result.then( () => {
22 | console.log( `converting ${ postId }, ${ key + 1 }/${ postIds.length } ` );
23 | return runSinglePost( postId );
24 | } );
25 | } );
26 |
27 | return result;
28 | }
29 |
30 | /**
31 | * Conversion of a single Post.
32 | * Uses promise chaining to ensure sequential execution of async operations.
33 | *
34 | * @param postId
35 | * @returns {Promise}
36 | */
37 | export function runSinglePost( postId ) {
38 | return removeAllBlocks()
39 | .then( () => getPostContentById( postId ) )
40 | .then( html => insertClassicBlockWithContent( html ) )
41 | .then( html => dispatchConvertClassicToBlocks( html ) )
42 | .then( html => getAllBlocksContents( postId, html ) )
43 | .then( ( [ blocks, html ] ) => updatePost( postId, blocks, html ) )
44 | .catch( error => {
45 | console.error( 'A conversion error occured in runSinglePost:' );
46 | console.error( error );
47 | } );
48 | }
49 |
50 | /**
51 | * Clears all blocks from the Block Editor.
52 | * @returns {Promise | Promise}
53 | */
54 | export function removeAllBlocks() {
55 | return new Promise( function( resolve, reject ) {
56 | dispatch( 'core/block-editor' ).resetBlocks( [] );
57 | return resolve();
58 | } );
59 | }
60 |
61 | /**
62 | * Fetches contents of a single Post or Page.
63 | *
64 | * @param id
65 | * @returns string
66 | */
67 | export function getPostContentById( id ) {
68 | return apiFetch( {
69 | path: NEWSPACK_CONVERTER_API_BASE_URL + `/get-post-content/${ id }`,
70 | } ).then( response => {
71 | return Promise.resolve( response );
72 | } );
73 | }
74 |
75 | /**
76 | * Prepares a Classic Block with Post's data loaded as content, and inserts it into the Block Editor.
77 | *
78 | * @param String html HTML source before conversion.
79 | * @returns {Promise | Promise}
80 | */
81 | export function insertClassicBlockWithContent( html ) {
82 | return new Promise( function( resolve, reject ) {
83 | const block = parse( html );
84 |
85 | dispatch( 'core/block-editor' ).insertBlocks( block );
86 |
87 | resolve( html );
88 | } );
89 | }
90 |
91 | /**
92 | * Triggers conversion of all Classic Blocks found in the Block Editor into Gutenberg Blocks.
93 | *
94 | * Gallery Blocks need to have their attachments data pulled from REST API and their inner blocks
95 | * need to be populated after that with the correct image data.
96 | *
97 | * @param String html HTML source before conversion.
98 | * @returns {Promise | Promise}
99 | */
100 | export function dispatchConvertClassicToBlocks( html ) {
101 | return new Promise( async function( resolve, reject ) {
102 | const blocks = select( 'core/block-editor' ).getBlocks();
103 |
104 | const classicBlocks = blocks.filter( ( block ) => block.name === 'core/freeform' );
105 |
106 | for ( let classicBlock of classicBlocks ) {
107 | const convertedBlocks = rawHandler( {
108 | HTML: serialize( classicBlock ),
109 | } );
110 |
111 | const galleryBlocks = convertedBlocks.filter( ( block ) => block.name === 'core/gallery' );
112 |
113 | for ( let galleryBlock of galleryBlocks ) {
114 | const attachmentIds = galleryBlock.innerBlocks
115 | .filter( ( imageBlock ) => Number.isInteger( imageBlock.attributes.id ) )
116 | .map( ( imageBlock ) => imageBlock.attributes.id );
117 |
118 | // Fetch Image Data from API
119 | const attachments = await resolveSelect( coreStore ).getMediaItems( {
120 | include: attachmentIds,
121 | per_page: -1,
122 | orderby: 'include',
123 | } );
124 |
125 | galleryBlock.innerBlocks.forEach( ( galleryImageBlock, galleryImageBlockIndex ) => {
126 | const { sizeSlug, linkTo } = galleryBlock.attributes;
127 | const { id } = galleryImageBlock.attributes;
128 |
129 | if ( ! id ) {
130 | // Image Block has an empty ID and needs to be removed
131 | galleryBlock.innerBlocks.splice( galleryImageBlockIndex, 1 );
132 | } else {
133 | const attachment = attachments.find( ( attachment ) => attachment.id === id );
134 |
135 | const imageBlock = createBlock( 'core/image', {
136 | url: attachment?.source_url,
137 | id: id ? parseInt( id, 10 ) : null,
138 | alt: attachment?.alt_text,
139 | sizeSlug: sizeSlug,
140 | linkDestination: linkTo,
141 | caption: attachment?.caption?.raw,
142 | } );
143 |
144 | wp.data.dispatch( 'core/block-editor' ).replaceBlocks(
145 | galleryImageBlock.clientId,
146 | imageBlock
147 | );
148 |
149 | galleryBlock.innerBlocks[ galleryImageBlockIndex ] = imageBlock;
150 | }
151 | } );
152 | }
153 |
154 | wp.data.dispatch( 'core/block-editor' ).replaceBlocks(
155 | classicBlock.clientId,
156 | convertedBlocks
157 | );
158 | }
159 |
160 | resolve( html );
161 | } );
162 | }
163 |
164 | /**
165 | * Fetches all blocks' contents from the Block Editor.
166 | *
167 | * TODO: getEditedPostContent() works only on select("core/editor") but not on select("core/block-editor") -- might be deprecated; find another approach to do this.
168 | *
169 | * @param Int postId
170 | * @param String html HTML source before conversion.
171 | * @returns {Promise | Promise}
172 | */
173 | export function getAllBlocksContents( postId, html ) {
174 | return new Promise( function( resolve, reject ) {
175 | const allBlocksContents = select( 'core/editor' ).getEditedPostContent();
176 | resolve( [ allBlocksContents, html ] );
177 | } );
178 | }
179 |
180 | /**
181 | * Updates Post content.
182 | *
183 | * @param int postId
184 | * @param string blocks Blocks Post content.
185 | * @param string html HTML source before conversion.
186 | * @returns {*}
187 | */
188 | export function updatePost( postId, blocks, html ) {
189 | if ( ! blocks ) {
190 | throw new Error( 'No resulting blocks content.' );
191 | }
192 | return apiFetch( {
193 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/update-post',
194 | method: 'POST',
195 | data: {
196 | post_id: postId,
197 | content_blocks: blocks,
198 | content_html: html,
199 | },
200 | } ).then( response => Promise.resolve( response ) );
201 | }
202 |
203 | export function fetchConversionBatch() {
204 | return apiFetch( {
205 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/get-batch-data',
206 | } ).then( response => Promise.resolve( response ) );
207 | }
208 |
209 | export function fetchSettingsInfo() {
210 | return apiFetch( {
211 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/settings/get-info',
212 | } ).then( response => Promise.resolve( response ) );
213 | }
214 |
215 | export function fetchRestoreInfo() {
216 | return apiFetch( {
217 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/restore/get-info',
218 | } ).then( response => Promise.resolve( response ) );
219 | }
220 |
221 | export function fetchConversionInfo() {
222 | return apiFetch( {
223 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/get-info',
224 | } ).then( response => Promise.resolve( response ) );
225 | }
226 |
227 | export function fetchPrepareConversion() {
228 | return apiFetch( {
229 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/prepare',
230 | } ).then( response => Promise.resolve( response ) );
231 | }
232 |
233 | export function fetchResetConversion() {
234 | return apiFetch( {
235 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/reset',
236 | } ).then( response => Promise.resolve( response ) );
237 | }
238 |
239 | /**
240 | * Restores post contents.
241 | * @returns {Promise | Promise}
242 | */
243 | export function fetchRestorePostContents( postIds ) {
244 | return apiFetch( {
245 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/restore/restore-post-contents',
246 | method: 'POST',
247 | data: {
248 | post_ids: postIds,
249 | },
250 | } ).then( response => Promise.resolve( response ) );
251 | }
252 |
253 | /**
254 | * Flush all meta backups.
255 | * @returns {Promise | Promise}
256 | */
257 | export function fetchFlushAllMetaBackups() {
258 | return apiFetch( {
259 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/flush-all-meta-backups',
260 | } ).then( response => Promise.resolve( response ) );
261 | }
262 |
263 | export function downloadListConvertedIds() {
264 | return apiFetch( {
265 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/get-all-converted-ids',
266 | } ).then( response => {
267 | console.log(response);
268 | if ( response && response.ids ) {
269 | // Create a Blob from the CSV content with an URL.
270 | const blob = new Blob([response.ids], { type: 'text/csv;charset=utf-8;' });
271 | const url = URL.createObjectURL(blob);
272 |
273 | // Create a temporary anchor element with URL and filename.
274 | const link = document.createElement('a');
275 | link.href = url;
276 | link.download = 'converted_ids.csv';
277 |
278 | // Click the anchor to start download.
279 | link.click();
280 |
281 | // Clean up.
282 | URL.revokeObjectURL(url);
283 | }
284 | });
285 | }
286 |
287 | export function downloadListUnsuccessfullyConvertedIds() {
288 | return apiFetch( {
289 | path: NEWSPACK_CONVERTER_API_BASE_URL + '/conversion/get-all-unconverted-ids',
290 | } ).then( response => {
291 | console.log(response);
292 | if ( response && response.ids ) {
293 | // Create a Blob from the CSV content with an URL.
294 | const blob = new Blob([response.ids], { type: 'text/csv;charset=utf-8;' });
295 | const url = URL.createObjectURL(blob);
296 |
297 | // Create a temporary anchor element with URL and filename.
298 | const link = document.createElement('a');
299 | link.href = url;
300 | link.download = 'unconverted_ids.csv';
301 |
302 | // Click the anchor to start download.
303 | link.click();
304 |
305 | // Clean up.
306 | URL.revokeObjectURL(url);
307 | }
308 | });
309 | }
310 |
311 | export default {
312 | runSinglePost,
313 | runMultiplePosts,
314 | fetchConversionBatch,
315 | fetchSettingsInfo,
316 | fetchRestoreInfo,
317 | fetchConversionInfo,
318 | fetchPrepareConversion,
319 | fetchResetConversion,
320 | fetchRestorePostContents,
321 | downloadListConvertedIds,
322 | };
323 |
--------------------------------------------------------------------------------
/lib/content-patcher/elementManipulators/class-htmlelementmanipulator.php:
--------------------------------------------------------------------------------
1 | This can be matched by this regex
23 | */
24 | const PATTERN_HTML_ELEMENT = '|
25 | < # beginning of the HTML element
26 | %1$s # element name/designation, should be substituted by using sprintf(), eg. sprintf( $this_pattern, \'img\' );
27 | .*? # anything in the middle
28 | > # closing this part
29 | .*? # anything in the middle
30 | # beginning of the closing tag
31 | %1$s # element name/designation
32 | > # end of element
33 | |xims';
34 |
35 | /**
36 | * Regex pattern to match all occurences of a self closing HTML element.
37 | * The element name/designation needs to be substituted by sprintf().
38 | *
39 | * For example, can match all image elements:
40 | *
41 | *
42 | * Used for "self closing elements".
43 | * "Self closing elements" are those that don't have specific closing tags. For example,
uses a closing tag
:
44 | *
...
45 | * but doesn't, and it is a "self closing element":
46 | *
47 | */
48 | const PATTERN_HTML_ELEMENT_SELF_CLOSING = '/
49 | < # beginning of the HTML element
50 | %s # element name, should be substituted by using sprintf(), eg. sprintf( $this_pattern, \'img\' );
51 | .*? # anything in the middle
52 | > # > is the end of the element
53 | /xims';
54 |
55 | /**
56 | * Regex pattern to match the attribute in a HTML element. It matches two groups: the attribute name, and the value.
57 | * The element name/designation needs to be substituted by sprintf().
58 | */
59 | const PATTERN_HTML_ELEMENT_ATTRIBUTE_WITH_VALUE = '/
60 | .*? # Subject is HTML element, so beginning is not important
61 | (%s) # find and group the attribute name
62 | \s* # next zero or more spaces
63 | = # the equals char
64 | \s* # once again, with possible zero or more spaces
65 | ["\'] # the attribute value starts with one double or a single quote
66 | ([^"\']+) # capture (using parenthesis) the attr value, one or more chars except (and up to) double or single quote
67 | .*? # we captured all we were looking for, take the rest of the element.
68 |
69 | /xims';
70 |
71 | /**
72 | * Match elements with closing tags.
73 | * For example, for the
element, a closing tag is
, as in:
74 | *
...
75 | * but the doesn't use a closing tag.:
76 | *
77 | *
78 | * @param string $element_name HTML designation, eg. 'p', or 'img'.
79 | * @param string $subject HTML.
80 | *
81 | * @return array|null preg_match_all with PREG_OFFSET_CAPTURE matches, or null if no matches.
82 | */
83 | public function match_elements_with_closing_tags( $element_name, $subject ) {
84 | $pattern = sprintf( self::PATTERN_HTML_ELEMENT, $element_name );
85 | $preg_match_all_result = preg_match_all( $pattern, $subject, $matches, PREG_OFFSET_CAPTURE );
86 |
87 | return ( false === $preg_match_all_result || 0 === $preg_match_all_result ) ? null : $matches;
88 | }
89 |
90 | /**
91 | * Match elements with self closing tags.
92 | * For example, the is an element with self closing tag:
93 | *
94 | * while the
element uses a separate closing tag is
, as in:
95 | *
...
96 | *
97 | * @param string $element_name HTML designation, eg. 'p', or 'img'.
98 | * @param string $subject HTML.
99 | *
100 | * @return array|null preg_match_all with PREG_OFFSET_CAPTURE matches, or null if no matches.
101 | */
102 | public function match_elements_with_self_closing_tags( $element_name, $subject ) {
103 | $pattern = sprintf( self::PATTERN_HTML_ELEMENT_SELF_CLOSING, $element_name );
104 | $preg_match_all_result = preg_match_all( $pattern, $subject, $matches, PREG_OFFSET_CAPTURE );
105 |
106 | return ( false === $preg_match_all_result || 0 === $preg_match_all_result ) ? null : $matches;
107 | }
108 |
109 | /**
110 | * Gets HTML element's attribute value.
111 | *
112 | * @param string $attribute_name The attribute name.
113 | * @param string $html_element The HTML element.
114 | *
115 | * @return bool|mixed
116 | */
117 | public function get_attribute_value( $attribute_name, $html_element ) {
118 | $match = $this->get_attribute_with_value_preg_match( $attribute_name, $html_element );
119 | $match = false !== $match ? $match[2] : false;
120 |
121 | return false !== $match ? $match[0] : false;
122 | }
123 |
124 | /**
125 | * Gets the position of HTML element's attribute value.
126 | *
127 | * @param string $attribute_name The attribute name.
128 | * @param string $html_element The HTML element.
129 | *
130 | * @return bool|mixed
131 | */
132 | public function get_attribute_value_position( $attribute_name, $html_element ) {
133 | $match = $this->get_attribute_with_value_preg_match( $attribute_name, $html_element );
134 | $match = false !== $match ? $match[2] : false;
135 |
136 | return false !== $match ? $match[1] : false;
137 | }
138 |
139 | /**
140 | * Runs a search for the attribute on the HTML element. Runs the preg_match() with PREG_OFFSET_CAPTURE option, and matches
141 | * two regex groups: attribute name, attribute value.
142 | * Returns the $match, or false if no match found.
143 | *
144 | * @param string $attribute_name The attribute name.
145 | * @param string $html_element HTML element.
146 | *
147 | * @return array|bool preg_match() with PREG_OFFSET_CAPTURE result, or false.
148 | */
149 | private function get_attribute_with_value_preg_match( $attribute_name, $html_element ) {
150 | $pattern = sprintf( self::PATTERN_HTML_ELEMENT_ATTRIBUTE_WITH_VALUE, $attribute_name );
151 | $res = preg_match( $pattern, $html_element, $match, PREG_OFFSET_CAPTURE );
152 | if ( 1 !== $res ) {
153 | return false;
154 | }
155 |
156 | // Check that the result is matched within bounds of element's tags, not within it's inner text (inner text could contain another element with its own attribute and value, we don't want to match that).
157 | $pos_attribute_value = $match[2][1];
158 | $pos_1st_closing_angle_bracket = strpos( $html_element, '>' );
159 | if ( $pos_attribute_value < $pos_1st_closing_angle_bracket ) {
160 | return $match;
161 | }
162 |
163 | return false;
164 | }
165 |
166 | /**
167 | * Replaces the element's attribute value.
168 | * If attribute not found, returns element unchanged.
169 | *
170 | * @param string $element HTML element.
171 | * @param string $attribute_name Attribute name.
172 | * @param string $attribute_value_new Attribute value.
173 | *
174 | * @return string Updated element.
175 | */
176 | public function replace_attribute_value( $element, $attribute_name, $attribute_value_new ) {
177 |
178 | $attribute_value = $this->get_attribute_value( $attribute_name, $element );
179 | if ( false === $attribute_value ) {
180 | // TODO: DEBUG LOG 'attribute not found in element'.
181 | return $element;
182 | }
183 |
184 | $attribute_value_pos = $this->get_attribute_value_position( $attribute_name, $element );
185 | if ( false === $attribute_value_pos ) {
186 | // TODO: DEBUG LOG 'attribute not found in element'.
187 | return $element;
188 | }
189 |
190 | return substr_replace( $element, $attribute_value_new, $attribute_value_pos, strlen( $attribute_value ) );
191 | }
192 |
193 | /**
194 | * Adds an attribute to the HTML element. Warning -- it doesn't search if attribute already exists.
195 | *
196 | * @param string $element The HTML element.
197 | * @param string $attribute_name The attribute name.
198 | * @param string $attribute_value The attribute value.
199 | *
200 | * @return mixed
201 | */
202 | public function add_attribute( $element, $attribute_name, $attribute_value ) {
203 |
204 | // Element tag could end with either '>' or '/>'.
205 | $search_1 = '>';
206 | $pos_1 = strpos( $element, $search_1 );
207 | $search_2 = '/>';
208 | $pos_2 = strpos( $element, $search_2 );
209 | if ( false !== $pos_2 && $pos_2 <= $pos_1 ) {
210 | $search = $search_2;
211 | $pos = $pos_2;
212 | } else {
213 | $search = $search_1;
214 | $pos = $pos_1;
215 | }
216 |
217 | $replace = ' ' . $attribute_name . '="' . $attribute_value . '"' . $search;
218 |
219 | return substr_replace( $element, $replace, $pos, strlen( $search ) );
220 | }
221 |
222 | /**
223 | * The method reads the attribute value from the source element, and patches its value to the destination element.
224 | *
225 | * @param string $element_source Source element from which the attribute is read.
226 | * @param string $element_destination Destination element, to which the patch is applied.
227 | * @param string $attribute_name The attribute name.
228 | *
229 | * @return string mixed Patched HTML element.
230 | */
231 | public function patch_attribute( $element_source, $element_destination, $attribute_name ) {
232 |
233 | $attribute_value = $this->get_attribute_value( $attribute_name, $element_source );
234 | if ( ! $attribute_value ) {
235 | // TODO: DEBUG LOG 'attribute not found in source HTML'.
236 | return $element_destination;
237 | }
238 |
239 | if ( $this->get_attribute_value( $attribute_name, $element_destination ) ) {
240 | // TODO: DEBUG LOG 'attribute value not matched'.
241 | return $element_destination;
242 | }
243 |
244 | // Element tag could end with either '>' or '/>'.
245 | $search_1 = '>';
246 | $pos_1 = strpos( $element_destination, $search_1 );
247 | $search_2 = '/>';
248 | $pos_2 = strpos( $element_destination, $search_2 );
249 | if ( false !== $pos_2 && $pos_2 <= $pos_1 ) {
250 | $search = $search_2;
251 | $pos = $pos_2;
252 | } else {
253 | $search = $search_1;
254 | $pos = $pos_1;
255 | }
256 |
257 | if ( false === $pos ) {
258 | return $element_destination;
259 | }
260 |
261 | $replace = sprintf( ' %s="%s"' . $search, $attribute_name, $attribute_value );
262 | $html_element_patched = substr_replace( $element_destination, $replace, $pos, strlen( $search ) );
263 |
264 | return $html_element_patched;
265 | }
266 |
267 |
268 | /**
269 | * This checks whether class is already assigned, and appends the class if not.
270 | *
271 | * @param string $element The HTML element.
272 | * @param string $class_value Class value.
273 | *
274 | * @return mixed
275 | */
276 | public function patch_class( $element, $class_value ) {
277 | $class_value_existing = $this->get_attribute_value( 'class', $element );
278 |
279 | // This is determining the new class value.
280 | if ( false !== $class_value_existing ) {
281 | $classes = explode( ' ', $class_value_existing );
282 | if ( ! in_array( $class_value, $classes ) ) {
283 | $classes[] = $class_value;
284 | }
285 | $classes_value_patched = implode( ' ', $classes );
286 | } else {
287 | $classes_value_patched = $class_value;
288 | }
289 |
290 | $classes_expression_patched = 'class="' . $classes_value_patched . '"';
291 |
292 | // This is updating existing class attribvute with new value.
293 | if ( false !== $class_value_existing ) {
294 |
295 | $matches = $this->get_attribute_with_value_preg_match( 'class', $element );
296 | if ( false === $matches ) {
297 | // TODO: DEBUG LOG class not found -- this if segment will never be reached, sooo... it's actually unneccessary.
298 | return $element;
299 | }
300 | $class_pos = $matches[1][1];
301 | $existing_classes_pos = $matches[2][1];
302 |
303 | $pos_start = $class_pos;
304 | $pos_end = $existing_classes_pos + strlen( $class_value_existing ) + strlen( '"' );
305 |
306 | $length_existing_classes = $pos_end - $pos_start;
307 |
308 | $element_patched = substr_replace( $element, $classes_expression_patched, $pos_start, $length_existing_classes );
309 |
310 | } else {
311 | // And this is setting the class attribute anew.
312 | // Element tag could end with either '>' or '/>'.
313 | $search_1 = '>';
314 | $pos_1 = strpos( $element, $search_1 );
315 | $search_2 = '/>';
316 | $pos_2 = strpos( $element, $search_2 );
317 | if ( false !== $pos_2 && $pos_2 <= $pos_1 ) {
318 | $search = $search_2;
319 | $pos = $pos_2;
320 | } else {
321 | $search = $search_1;
322 | $pos = $pos_1;
323 | }
324 |
325 | $replace = ' ' . $classes_expression_patched . $search;
326 | $element_patched = substr_replace( $element, $replace, $pos, strlen( $search ) );
327 | }
328 |
329 | return $element_patched;
330 | }
331 | }
332 |
--------------------------------------------------------------------------------