├── lib
    ├── html5
    │   ├── HTML5
    │   │   ├── Exception.php
    │   │   ├── Parser
    │   │   │   ├── ParseError.php
    │   │   │   ├── FileInputStream.php
    │   │   │   ├── CharacterReference.php
    │   │   │   ├── README.md
    │   │   │   ├── InputStream.php
    │   │   │   ├── TreeBuildingRules.php
    │   │   │   ├── EventHandler.php
    │   │   │   ├── UTF8Utils.php
    │   │   │   ├── StringInputStream.php
    │   │   │   └── Scanner.php
    │   │   ├── Serializer
    │   │   │   ├── README.md
    │   │   │   ├── RulesInterface.php
    │   │   │   └── Traverser.php
    │   │   └── InstructionProcessor.php
    │   ├── autoloader.php
    │   ├── UPGRADING.md
    │   ├── RELEASE.md
    │   ├── HTML5.php
    │   └── README.md
    └── mf2
    │   └── LICENSE.md
├── includes
    ├── autoload.php
    ├── class-parse-this-json.php
    ├── class-parse-this-youtube.php
    ├── class-parse-this-opml.php
    ├── class-parse-this-twitter.php
    ├── class-parse-this-jsonfeed.php
    ├── class-rest-parse-this.php
    ├── class-parse-this-instagram.php
    ├── compat-functions.php
    ├── class-parse-this-base.php
    ├── class-parse-this-discovery.php
    ├── class-parse-this-rss.php
    ├── class-parse-this-restapi.php
    ├── class-parse-this-mf2-utils.php
    ├── class-parse-this-html.php
    └── class-parse-this.php
├── parse-this.php
└── readme.txt


/lib/html5/HTML5/Exception.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Masterminds\HTML5;
 4 | 
 5 | /**
 6 |  * The base exception for the HTML5 project.
 7 |  */
 8 | class Exception extends \Exception
 9 | {
10 | }
11 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/ParseError.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Masterminds\HTML5\Parser;
 4 | 
 5 | /**
 6 |  * Emit when the parser has an error.
 7 |  */
 8 | class ParseError extends \Exception
 9 | {
10 | }
11 | 


--------------------------------------------------------------------------------
/lib/html5/autoloader.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | // autoloader
 4 | spl_autoload_register(
 5 | 	function ( $class ) {
 6 | 		$prefix   = 'Masterminds';
 7 | 		$base_dir = __DIR__;
 8 | 		$len      = strlen( $prefix );
 9 | 		if ( strncmp( $prefix, $class, $len ) !== 0 ) {
10 | 			return;
11 | 		}
12 | 		$name = substr( $class, $len );
13 | 		$file = __DIR__ . str_replace( '\\', '/', $name ) . '.php';
14 | 		if ( file_exists( $file ) ) {
15 | 			require $file;
16 | 		}
17 | 	}
18 | );
19 | 


--------------------------------------------------------------------------------
/includes/autoload.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | // autoloader for Parse This
 4 | spl_autoload_register(
 5 | 	function ( $class ) {
 6 | 		$base_dir = trailingslashit( __DIR__ );
 7 | 		$bases    = array( 'Parse_This' );
 8 | 		foreach ( $bases as $base ) {
 9 | 			if ( strncmp( $class, $base, strlen( $base ) ) === 0 ) {
10 | 				$filename = 'class-' . strtolower( str_replace( '_', '-', $class ) );
11 | 				$file     = $base_dir . $filename . '.php';
12 | 				if ( file_exists( $file ) ) {
13 | 					require $file;
14 | 				}
15 | 			}
16 | 		}
17 | 	}
18 | );
19 | 


--------------------------------------------------------------------------------
/lib/html5/UPGRADING.md:
--------------------------------------------------------------------------------
 1 | From 1.x to 2.x
 2 | =================
 3 | 
 4 | - All classes uses `Masterminds` namespace.
 5 | - All public static methods has been removed from `HTML5` class and the general API to access the HTML5 functionalities has changed. 
 6 | 
 7 |     Before:
 8 |     
 9 |         $dom = \HTML5::loadHTML('<html>....');
10 |         \HTML5::saveHTML($dom);
11 |         
12 |     After:
13 | 
14 |         use Masterminds\HTML5;
15 |         
16 |         $html5 = new HTML5();
17 |         
18 |         $dom = $html5->loadHTML('<html>....');
19 |         echo $html5->saveHTML($dom);
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/parse-this.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Plugin Name: Parse This
 4 |  * Plugin URI: https://github.com/dshanske/parse-this
 5 |  * Description:
 6 |  * Version: 1.0.1
 7 |  * Author: David Shanske
 8 |  * Author URI: https://david.shanske.com
 9 |  * Text Domain: parse-this
10 |  * Domain Path:  /languages
11 |  */
12 | 
13 | 
14 | /*
15 |  Parse This Load
16 |  */
17 | 
18 | if ( ! function_exists( 'parse_this_loader' ) ) {
19 | 	function parse_this_loader() {
20 | 		require_once plugin_dir_path( __FILE__ ) . 'includes/autoload.php';
21 | 
22 | 		// Functions Not Available in Earlier Versions of WordPress
23 | 		require_once plugin_dir_path( __FILE__ ) . 'includes/compat-functions.php';
24 | 
25 | 		require_once plugin_dir_path( __FILE__ ) . 'includes/functions.php';
26 | 		// Parse This REST Endpoint
27 | 		require_once plugin_dir_path( __FILE__ ) . 'includes/class-rest-parse-this.php';
28 | 
29 | 	}
30 | 	add_action( 'plugins_loaded', 'parse_this_loader', 9 );
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Serializer/README.md:
--------------------------------------------------------------------------------
 1 | # The Serializer (Writer) Model
 2 | 
 3 | The serializer roughly follows sections _8.1 Writing HTML documents_ and section
 4 | _8.3 Serializing HTML fragments_ by converting DOMDocument, DOMDocumentFragment,
 5 | and DOMNodeList into HTML5.
 6 | 
 7 |        [ HTML5 ]   // Interface for saving.
 8 |           ||
 9 |      [ Traverser ]   // Walk the DOM
10 |           ||
11 |        [ Rules ]     // Convert DOM elements into strings.
12 |           ||
13 |        [ HTML5 ]     // HTML5 document or fragment in text.
14 | 
15 | 
16 | ## HTML5 Class
17 | 
18 | Provides the top level interface for saving.
19 | 
20 | ## The Traverser
21 | 
22 | Walks the DOM finding each element and passing it off to the output rules to
23 | convert to HTML5.
24 | 
25 | ## Output Rules
26 | 
27 | The output rules are defined in the RulesInterface which can have multiple
28 | implementations. Currently, the OutputRules is the default implementation that
29 | converts a DOM as is into HTML5.
30 | 
31 | ## HTML5 String
32 | 
33 | The output of the process it HTML5 as a string or saved to a file.


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/FileInputStream.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Masterminds\HTML5\Parser;
 4 | 
 5 | /**
 6 |  * The FileInputStream loads a file to be parsed.
 7 |  *
 8 |  * So right now we read files into strings and then process the
 9 |  * string. We chose to do this largely for the sake of expediency of
10 |  * development, and also because we could optimize toward processing
11 |  * arbitrarily large chunks of the input. But in the future, we'd
12 |  * really like to rewrite this class to efficiently handle lower level
13 |  * stream reads (and thus efficiently handle large documents).
14 |  *
15 |  * @deprecated since 2.4, to remove in 3.0. Use a string in the scanner instead.
16 |  */
17 | class FileInputStream extends StringInputStream implements InputStream
18 | {
19 |     /**
20 |      * Load a file input stream.
21 |      *
22 |      * @param string $data     The file or url path to load.
23 |      * @param string $encoding The encoding to use for the data.
24 |      * @param string $debug    A fprintf format to use to echo the data on stdout.
25 |      */
26 |     public function __construct($data, $encoding = 'UTF-8', $debug = '')
27 |     {
28 |         // Get the contents of the file.
29 |         $content = file_get_contents($data);
30 | 
31 |         parent::__construct($content, $encoding, $debug);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-json.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Parse This JSON class.
 4 |  */
 5 | class Parse_This_JSON extends Parse_This_Base {
 6 | 	/**
 7 | 	 * Parses _meta, _images, and _links data from the content.
 8 | 	 *
 9 | 	 * @access public
10 | 	 */
11 | 	public static function parse( $doc, $url, $args ) {
12 | 		if ( ! $doc ) {
13 | 			return array();
14 | 		}
15 | 		$xpath = new DOMXPath( $doc );
16 | 
17 | 		$json    = array();
18 | 		$content = '';
19 | 		foreach ( $xpath->query( "//script[@type='application/json']" ) as $script ) {
20 | 			$content  = $script->textContent; // phpcs:ignore
21 | 			$json[]  = json_decode( $content, true );
22 | 		}
23 | 		$json = array_filter( $json );
24 | 
25 | 		$jf2 = array();
26 | 
27 | 		if ( 1 === count( $json ) && wp_is_numeric_array( $json ) ) {
28 | 			$json = $json[0];
29 | 			if ( array_key_exists( 'props', $json ) ) {
30 | 				$props = $json['props'];
31 | 				if ( array_key_exists( 'pageProps', $props ) ) {
32 | 					$props = $props['pageProps'];
33 | 					if ( array_key_exists( 'article', $props ) ) {
34 | 						$jf2['type'] = 'entry';
35 | 						$jf2['name'] = ifset( $props['article']['title'] );
36 | 						if ( array_key_exists( 'meta', $props['article'] ) ) {
37 | 							$jf2['published'] = normalize_iso8601( ifset( $props['article']['meta']['date'] ) );
38 | 							$jf2['category']  = ifset( $props['article']['meta']['tags'] );
39 | 						}
40 | 					}
41 | 				}
42 | 			}
43 | 		}
44 | 		$jf2 = array_filter( $jf2 );
45 | 
46 | 		if ( WP_DEBUG ) {
47 | 			$jf2['_json'] = $json;
48 | 		}
49 | 		return array_filter( $jf2 );
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
 1 | === Parse This ===
 2 | Contributors: dshanske
 3 | Tags: indieweb
 4 | Stable tag: trunk
 5 | Requires at least: 4.9
 6 | Requires PHP: 5.6
 7 | Tested up to: 5.6
 8 | License: GPLv2 or later
 9 | License URI: http://www.gnu.org/licenses/gpl-2.0.html
10 | 
11 | Parse This turns URLs into structured jf2 data
12 | 
13 | == Description == 
14 | 
15 | Parse This is based on a variety of projects including the parsing code from Press This, which was removed from WordPress. 
16 | 
17 | * It supports parsing from MF2 if present
18 | * For sites that are not marked up with Microformats 2(MF2) it will fall back onto parsing JSON-LD, then HTML/OpenGraph/Dublin Core Tags/etc. 
19 | * It supports parsing of JSONFeed and RSS/Atom feeds
20 | * It supports parsing of  WordPress REST API endpoints to generate a site feed
21 | 
22 | The goal is to produce structured jf2 data that can be used for previewing links as well as feed readers and other options. It is also bundled in the Post Kinds and Yarns Microsub plugins as a library.
23 | 
24 | It can be installed as a standalone plugin which will provide the necessary libraries and functionality as well as the REST API endpoint for getting JF2 data from an arbitrary URL or a WordPress Post. 
25 | 
26 | 
27 | == Frequently Asked Questions ==
28 | 
29 | == Changelog ==
30 | 
31 | = 1.0.1 ( 2021-04-02 ) =
32 | * Remove SimplePie as a dependency as the latest version 1.5.6 is now bundled with WordPress as of 5.6.
33 | * Remove MB polyfill due issues with PHP8.0 compatibility in favor of simpler solution.
34 | 
35 | = 1.0.0 ( 2020-12-15 ) =
36 | * First Official Release. Prior to this point it was in a point release.
37 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/CharacterReference.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Masterminds\HTML5\Parser;
 4 | 
 5 | use Masterminds\HTML5\Entities;
 6 | 
 7 | /**
 8 |  * Manage entity references.
 9 |  *
10 |  * This is a simple resolver for HTML5 character reference entitites. See Entities for the list of supported entities.
11 |  */
12 | class CharacterReference
13 | {
14 |     protected static $numeric_mask = array(
15 |         0x0,
16 |         0x2FFFF,
17 |         0,
18 |         0xFFFF,
19 |     );
20 | 
21 |     /**
22 |      * Given a name (e.g. 'amp'), lookup the UTF-8 character ('&').
23 |      *
24 |      * @param string $name The name to look up.
25 |      *
26 |      * @return string The character sequence. In UTF-8 this may be more than one byte.
27 |      */
28 |     public static function lookupName($name)
29 |     {
30 |         // Do we really want to return NULL here? or FFFD
31 |         return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : null;
32 |     }
33 | 
34 |     /**
35 |      * Given a decimal number, return the UTF-8 character.
36 |      *
37 |      * @param $int
38 |      *
39 |      * @return false|string|string[]|null
40 |      */
41 |     public static function lookupDecimal($int)
42 |     {
43 |         $entity = '&#' . $int . ';';
44 | 
45 |         // UNTESTED: This may fail on some planes. Couldn't find full documentation
46 |         // on the value of the mask array.
47 |         return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
48 |     }
49 | 
50 |     /**
51 |      * Given a hexidecimal number, return the UTF-8 character.
52 |      *
53 |      * @param $hexdec
54 |      *
55 |      * @return false|string|string[]|null
56 |      */
57 |     public static function lookupHex($hexdec)
58 |     {
59 |         return static::lookupDecimal(hexdec($hexdec));
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-youtube.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Parse This YouTube class.
 4 |  */
 5 | class Parse_This_YouTube extends Parse_This_Base {
 6 | 	/**
 7 | 	 *
 8 | 	 * @access public
 9 | 	 */
10 | 	public static function parse( $content, $url, $args ) {
11 | 		if ( ! $content ) {
12 | 			return array();
13 | 		}
14 | 
15 | 		if ( ! is_string( $content ) ) {
16 | 			return array();
17 | 		}
18 | 
19 | 		preg_match( '#ytInitialPlayerResponse = (\{.+\});#U', $content, $match );
20 | 		$decode = json_decode( $match[1], true );
21 | 		if ( empty( $decode ) ) {
22 | 			return array();
23 | 		}
24 | 		if ( ! isset( $decode['videoDetails'] ) ) {
25 | 			return array();
26 | 		}
27 | 		$details       = $decode['videoDetails'];
28 | 		$microformat   = $decode['microformat']['playerMicroformatRenderer'];
29 | 		$jf2           = array(
30 | 			'uid'       => ifset( $details['videoID'] ),
31 | 			'name'      => ifset( $details['title'] ),
32 | 			'duration'  => seconds_to_iso8601( ifset( $details['lengthSeconds'] ) ),
33 | 			'category'  => ifset( $details['keywords'] ),
34 | 			'summary'   => ifset( $details['shortDescription'] ),
35 | 			'published' => normalize_iso8601( ifset( $microformat['publishDate'] ) ),
36 | 		);
37 | 		$author        = array(
38 | 			'type' => 'card',
39 | 			'url'  => ifset( $microformat['ownerProfileUrl'] ),
40 | 			'name' => ifset( $details['author'] ),
41 | 		);
42 | 		$jf2['author'] = array_filter( $author );
43 | 
44 | 		if ( isset( $details['thumbnail'] ) ) {
45 | 			$thumbnail       = end( $details['thumbnail']['thumbnails'] );
46 | 			$jf2['featured'] = $thumbnail['url'];
47 | 		}
48 | 		if ( isset( $microformat['embed'] ) ) {
49 | 			$jf2['video'] = ifset( $microformat['embed']['iframeUrl'] );
50 | 		}
51 | 		if ( WP_DEBUG ) {
52 | 			$jf2['_yt'] = $decode;
53 | 		}
54 | 		return array_filter( $jf2 );
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/README.md:
--------------------------------------------------------------------------------
 1 | # The Parser Model
 2 | 
 3 | The parser model here follows the model in section
 4 | [8.2.1](http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#parsing)
 5 | of the HTML5 specification, though we do not assume a networking layer.
 6 | 
 7 |      [ InputStream ]    // Generic support for reading input.
 8 |            ||
 9 |       [ Scanner ]       // Breaks down the stream into characters.
10 |            ||
11 |      [ Tokenizer ]      // Groups characters into syntactic
12 |            ||
13 |     [ Tree Builder ]    // Organizes units into a tree of objects
14 |            ||
15 |      [ DOM Document ]     // The final state of the parsed document.
16 | 
17 | 
18 | ## InputStream
19 | 
20 | This is an interface with at least two concrete implementations:
21 | 
22 | - StringInputStream: Reads an HTML5 string.
23 | - FileInputStream: Reads an HTML5 file.
24 | 
25 | ## Scanner
26 | 
27 | This is a mechanical piece of the parser.
28 | 
29 | ## Tokenizer
30 | 
31 | This follows section 8.4 of the HTML5 spec. It is (roughly) a recursive
32 | descent parser. (Though there are plenty of optimizations that are less
33 | than purely functional.
34 | 
35 | ## EventHandler and DOMTree
36 | 
37 | EventHandler is the interface for tree builders. Since not all
38 | implementations will necessarily build trees, we've chosen a more
39 | generic name.
40 | 
41 | The event handler emits tokens during tokenization.
42 | 
43 | The DOMTree is an event handler that builds a DOM tree. The output of
44 | the DOMTree builder is a DOMDocument.
45 | 
46 | ## DOMDocument
47 | 
48 | PHP has a DOMDocument class built-in (technically, it's part of libxml.)
49 | We use that, thus rendering the output of this process compatible with
50 | SimpleXML, QueryPath, and many other XML/HTML processing tools.
51 | 
52 | For cases where the HTML5 is a fragment of a HTML5 document a
53 | DOMDocumentFragment is returned instead. This is another built-in class.
54 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/InstructionProcessor.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * A handler for processor instructions.
 4 |  */
 5 | 
 6 | namespace Masterminds\HTML5;
 7 | 
 8 | /**
 9 |  * Provide an processor to handle embedded instructions.
10 |  *
11 |  * XML defines a mechanism for inserting instructions (like PHP) into a
12 |  * document. These are called "Processor Instructions." The HTML5 parser
13 |  * provides an opportunity to handle these processor instructions during
14 |  * the tree-building phase (before the DOM is constructed), which makes
15 |  * it possible to alter the document as it is being created.
16 |  *
17 |  * One could, for example, use this mechanism to execute well-formed PHP
18 |  * code embedded inside of an HTML5 document.
19 |  */
20 | interface InstructionProcessor
21 | {
22 |     /**
23 |      * Process an individual processing instruction.
24 |      *
25 |      * The process() function is responsible for doing the following:
26 |      * - Determining whether $name is an instruction type it can handle.
27 |      * - Determining what to do with the data passed in.
28 |      * - Making any subsequent modifications to the DOM by modifying the
29 |      * DOMElement or its attached DOM tree.
30 |      *
31 |      * @param \DOMElement $element The parent element for the current processing instruction.
32 |      * @param string      $name    The instruction's name. E.g. `&lt;?php` has the name `php`.
33 |      * @param string      $data    All of the data between the opening and closing PI marks.
34 |      *
35 |      * @return \DOMElement The element that should be considered "Current". This may just be
36 |      *                     the element passed in, but if the processor added more elements,
37 |      *                     it may choose to reset the current element to one of the elements
38 |      *                     it created. (When in doubt, return the element passed in.)
39 |      */
40 |     public function process(\DOMElement $element, $name, $data);
41 | }
42 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-opml.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | class Parse_This_OPML {
 4 | 	private static function ifset( $key, $array ) {
 5 | 		return isset( $array[ $key ] ) ? $array[ $key ] : null;
 6 | 	}
 7 | 
 8 | 
 9 | 	/**
10 | 	 * Downloads the $url and returns the feeds it finds
11 | 	 *
12 | 	 * @param string $url URL to scan.
13 | 	 * @return WP_Error|boolean WP_Error if invalid and true if successful
14 | 	 */
15 | 	public function fetch( $url ) {
16 | 		if ( empty( $url ) || ! wp_http_validate_url( $url ) ) {
17 | 			return new WP_Error( 'invalid-url', __( 'A valid URL was not provided.', 'indieweb-post-kinds' ) );
18 | 		}
19 | 
20 | 		$user_agent = 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 Parse This/WP';
21 | 		$args       = array(
22 | 			'timeout'             => 15,
23 | 			'limit_response_size' => 1048576,
24 | 			'redirection'         => 5,
25 | 		// Use an explicit user-agent for Parse This
26 | 		);
27 | 		$links = array();
28 | 
29 | 		$response      = wp_safe_remote_get( $url, $args );
30 | 		$response_code = wp_remote_retrieve_response_code( $response );
31 | 		$content_type  = wp_remote_retrieve_header( $response, 'content-type' );
32 | 
33 | 		if ( in_array( $response_code, array( 403, 415 ), true ) ) {
34 | 			$args['user-agent'] = $user_agent;
35 | 			$response           = wp_safe_remote_get( $url, $args );
36 | 			$response_code      = wp_remote_retrieve_response_code( $response );
37 | 			if ( in_array( $response_code, array( 403, 415 ), true ) ) {
38 | 				return new WP_Error( 'source_error', 'Unable to Retrieve' );
39 | 			}
40 | 		}
41 | 
42 | 		// Strip any character set off the content type
43 | 		$ct = explode( ';', $content_type );
44 | 		if ( is_array( $ct ) ) {
45 | 			$content_type = array_shift( $ct );
46 | 		}
47 | 		$content_type = trim( $content_type );
48 | 
49 | 		$content = wp_remote_retrieve_body( $response );
50 | 		return $content;
51 | 	}
52 | 
53 | 	public function convert( $content ) {
54 | 		$xml    = simplexml_load_string( $content );
55 | 		$xml    = $xml->body;
56 | 		$return = array();
57 | 		foreach ( $xml->outline as $outline ) {
58 | 			$top = array(
59 | 				'title'    => $outline['title'],
60 | 				'children' => array(),
61 | 			);
62 | 			foreach ( $outline as $feed ) {
63 | 				$top['children'][] = array(
64 | 					'name' => $feed['title'],
65 | 					'url'  => $feed['xmlUrl'],
66 | 				);
67 | 			}
68 | 			$return[] = $top;
69 | 		}
70 | 		return $return;
71 | 	}
72 | }
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-twitter.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Parse This Twitter class.
 4 |  */
 5 | class Parse_This_Twitter extends Parse_This_Base {
 6 | 	/**
 7 | 	 *
 8 | 	 * @access public
 9 | 	 */
10 | 	public static function parse( $url, $args ) {
11 | 		if ( false === strpos( $url, 'status' ) ) {
12 | 			return array();
13 | 		}
14 | 
15 | 		$args     = array(
16 | 			'timeout'             => 15,
17 | 			'limit_response_size' => 1048576,
18 | 			'redirection'         => 5,
19 | 			// Use an explicit user-agent for Parse This
20 | 			'user_agent'          => 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 Parse This/WP',
21 | 		);
22 | 		$url      = add_query_arg( 'url', $url, 'https://publish.twitter.com/oembed' );
23 | 		$response = wp_safe_remote_get( $url, $args );
24 | 		$oembed   = json_decode( wp_remote_retrieve_body( $response ), true );
25 | 		$jf2      = array();
26 | 		if ( array_key_exists( 'url', $oembed ) ) {
27 | 			$jf2['url'] = $oembed['url'];
28 | 		}
29 | 		if ( array_key_exists( 'html', $oembed ) ) {
30 | 			$html = $oembed['html'];
31 | 			$dom  = pt_load_domdocument( $html );
32 | 			$html = explode( '&mdash;', $html );
33 | 			$html = $html[0];
34 | 			$text = wp_strip_all_tags( $html );
35 | 			$text = explode( '&mdash;', $text );
36 | 			$text = $text[0];
37 | 
38 | 			$links    = $dom->getElementsByTagName( 'a' );
39 | 			$names    = array();
40 | 			$category = array();
41 | 			foreach ( $links as $link ) {
42 | 					$key   = wp_strip_all_tags( $link->nodeValue ); // phpcs:ignore
43 | 					$value = $link->getAttribute( 'href' );
44 | 					$parse = wp_parse_url( $value );
45 | 					unset( $parse['query'] );
46 | 					$value = build_url( $parse );
47 | 				if ( '#' === $key[0] ) {
48 | 					$category[] = str_replace( '#', '', $key );
49 | 				} elseif ( '@' === $key[0] ) {
50 | 					$category[] = $value;
51 | 				} elseif ( $jf2['url'] === $value ) {
52 | 					$published        = new DateTime( $key );
53 | 					$jf2['published'] = $published->format( DATE_W3C );
54 | 				} else {
55 | 					$names[ wp_strip_all_tags( $key ) ] = normalize_url( $value ); // phpcs:ignore
56 | 				}
57 | 			}
58 | 			$jf2['links']    = $names;
59 | 			$jf2['category'] = $category;
60 | 			$jf2['content']  = array(
61 | 				'html'  => Parse_This::clean_content( $html, array( 'blockquote' => array() ) ),
62 | 				'value' => $text,
63 | 			);
64 | 			$jf2['summary']  = $jf2['content']['html'];
65 | 		}
66 | 		$jf2['author']      = array_filter(
67 | 			array(
68 | 				'type' => 'card',
69 | 				'name' => ifset( $oembed['author_name'] ),
70 | 				'url'  => ifset( $oembed['author_url'] ),
71 | 			)
72 | 		);
73 | 		$jf2['publication'] = 'Twitter';
74 | 		if ( WP_DEBUG ) {
75 | 			$jf2['_ombed'] = $oembed;
76 | 		}
77 | 
78 | 		return array_filter( $jf2 );
79 | 	}
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/InputStream.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Masterminds\HTML5\Parser;
 4 | 
 5 | /**
 6 |  * Interface for stream readers.
 7 |  *
 8 |  * The parser only reads from streams. Various input sources can write
 9 |  * an adapater to this InputStream.
10 |  *
11 |  * Currently provided InputStream implementations include
12 |  * FileInputStream and StringInputStream.
13 |  *
14 |  * @deprecated since 2.4, to remove in 3.0. Use a string in the scanner instead.
15 |  */
16 | interface InputStream extends \Iterator
17 | {
18 |     /**
19 |      * Returns the current line that is being consumed.
20 |      *
21 |      * TODO: Move this to the scanner.
22 |      */
23 |     public function currentLine();
24 | 
25 |     /**
26 |      * Returns the current column of the current line that the tokenizer is at.
27 |      *
28 |      * Newlines are column 0. The first char after a newline is column 1.
29 |      *
30 |      * @TODO Move this to the scanner.
31 |      *
32 |      * @return int The column number.
33 |      */
34 |     public function columnOffset();
35 | 
36 |     /**
37 |      * Get all characters until EOF.
38 |      *
39 |      * This consumes characters until the EOF.
40 |      */
41 |     public function remainingChars();
42 | 
43 |     /**
44 |      * Read to a particular match (or until $max bytes are consumed).
45 |      *
46 |      * This operates on byte sequences, not characters.
47 |      *
48 |      * Matches as far as possible until we reach a certain set of bytes
49 |      * and returns the matched substring.
50 |      *
51 |      * @see strcspn
52 |      *
53 |      * @param string $bytes Bytes to match.
54 |      * @param int    $max   Maximum number of bytes to scan.
55 |      *
56 |      * @return mixed Index or false if no match is found. You should use strong
57 |      *               equality when checking the result, since index could be 0.
58 |      */
59 |     public function charsUntil($bytes, $max = null);
60 | 
61 |     /**
62 |      * Returns the string so long as $bytes matches.
63 |      *
64 |      * Matches as far as possible with a certain set of bytes
65 |      * and returns the matched substring.
66 |      *
67 |      * @see strspn
68 |      *
69 |      * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
70 |      *                      current char, the pointer advances and the char is part of the
71 |      *                      substring.
72 |      * @param int    $max   The max number of chars to read.
73 |      */
74 |     public function charsWhile($bytes, $max = null);
75 | 
76 |     /**
77 |      * Unconsume one character.
78 |      *
79 |      * @param int $howMany The number of characters to move the pointer back.
80 |      */
81 |     public function unconsume($howMany = 1);
82 | 
83 |     /**
84 |      * Retrieve the next character without advancing the pointer.
85 |      */
86 |     public function peek();
87 | }
88 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Serializer/RulesInterface.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * @file
  4 |  * The interface definition for Rules to generate output.
  5 |  */
  6 | 
  7 | namespace Masterminds\HTML5\Serializer;
  8 | 
  9 | /**
 10 |  * To create a new rule set for writing output the RulesInterface needs to be implemented.
 11 |  * The resulting class can be specified in the options with the key of rules.
 12 |  *
 13 |  * For an example implementation see Serializer\OutputRules.
 14 |  */
 15 | interface RulesInterface
 16 | {
 17 |     /**
 18 |      * The class constructor.
 19 |      *
 20 |      * Note, before the rules can be used a traverser must be registered.
 21 |      *
 22 |      * @param mixed $output  The output stream to write output to.
 23 |      * @param array $options An array of options.
 24 |      */
 25 |     public function __construct($output, $options = array());
 26 | 
 27 |     /**
 28 |      * Register the traverser used in but the rules.
 29 |      *
 30 |      * Note, only one traverser can be used by the rules.
 31 |      *
 32 |      * @param Traverser $traverser The traverser used in the rules.
 33 |      *
 34 |      * @return RulesInterface $this for the current object.
 35 |      */
 36 |     public function setTraverser(Traverser $traverser);
 37 | 
 38 |     /**
 39 |      * Write a document element (\DOMDocument).
 40 |      *
 41 |      * Instead of returning the result write it to the output stream ($output)
 42 |      * that was passed into the constructor.
 43 |      *
 44 |      * @param \DOMDocument $dom
 45 |      */
 46 |     public function document($dom);
 47 | 
 48 |     /**
 49 |      * Write an element.
 50 |      *
 51 |      * Instead of returning the result write it to the output stream ($output)
 52 |      * that was passed into the constructor.
 53 |      *
 54 |      * @param mixed $ele
 55 |      */
 56 |     public function element($ele);
 57 | 
 58 |     /**
 59 |      * Write a text node.
 60 |      *
 61 |      * Instead of returning the result write it to the output stream ($output)
 62 |      * that was passed into the constructor.
 63 |      *
 64 |      * @param mixed $ele
 65 |      */
 66 |     public function text($ele);
 67 | 
 68 |     /**
 69 |      * Write a CDATA node.
 70 |      *
 71 |      * Instead of returning the result write it to the output stream ($output)
 72 |      * that was passed into the constructor.
 73 |      *
 74 |      * @param mixed $ele
 75 |      */
 76 |     public function cdata($ele);
 77 | 
 78 |     /**
 79 |      * Write a comment node.
 80 |      *
 81 |      * Instead of returning the result write it to the output stream ($output)
 82 |      * that was passed into the constructor.
 83 |      *
 84 |      * @param mixed $ele
 85 |      */
 86 |     public function comment($ele);
 87 | 
 88 |     /**
 89 |      * Write a processor instruction.
 90 |      *
 91 |      * To learn about processor instructions see InstructionProcessor
 92 |      *
 93 |      * Instead of returning the result write it to the output stream ($output)
 94 |      * that was passed into the constructor.
 95 |      *
 96 |      * @param mixed $ele
 97 |      */
 98 |     public function processorInstruction($ele);
 99 | }
100 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-jsonfeed.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | class Parse_This_JSONFeed extends Parse_This_Base {
  4 | 	private static function ifset( $key, $array ) {
  5 | 		return isset( $array[ $key ] ) ? $array[ $key ] : null;
  6 | 	}
  7 | 
  8 | 	private static function get_author( $array ) {
  9 | 		if ( isset( $array['author'] ) && ! isset( $array['authors'] ) ) {
 10 | 			$array['authors'] = $array['author'];
 11 | 		}
 12 | 		if ( ! isset( $array['authors'] ) ) {
 13 | 			return null;
 14 | 		}
 15 | 		$author = $array['authors'];
 16 | 		if ( ! wp_is_numeric_array( $author ) ) {
 17 | 			$author = array( $author );
 18 | 		}
 19 | 		foreach ( $author as $element ) {
 20 | 			$return[] = array_filter(
 21 | 				array(
 22 | 					'name'  => self::ifset( 'name', $element ),
 23 | 					'url'   => self::ifset( 'url', $element ),
 24 | 					'photo' => self::ifset( 'avatar', $element ),
 25 | 				)
 26 | 			);
 27 | 		}
 28 | 		$return = array_filter( $return );
 29 | 		if ( 1 === count( $return ) ) {
 30 | 			return $return[0];
 31 | 		}
 32 | 		return $return;
 33 | 	}
 34 | 
 35 | 	public static function to_jf2( $content, $url ) {
 36 | 		$return          = array_filter(
 37 | 			array(
 38 | 				'type'       => 'feed',
 39 | 				'_feed_type' => 'jsonfeed',
 40 | 				'name'       => self::ifset( 'title', $content ),
 41 | 				'url'        => $url,
 42 | 				'summary'    => self::ifset( 'description', $content ),
 43 | 				'photo'      => self::ifset( 'icon', $content ),
 44 | 				'author'     => self::get_author( $content ),
 45 | 				'language'   => self::ifset( 'language', $content ),
 46 | 			)
 47 | 		);
 48 | 		$return['items'] = array();
 49 | 		foreach ( $content['items'] as $item ) {
 50 | 			$newitem = array_filter(
 51 | 				array(
 52 | 					'uid'         => self::ifset( 'id', $item ),
 53 | 					'url'         => self::ifset( 'url', $item ),
 54 | 					'in-reply-to' => self::ifset( 'external_url', $item ),
 55 | 					'name'        => self::ifset( 'title', $item ),
 56 | 					'content'     => array_filter(
 57 | 						array(
 58 | 							'html' => Parse_This::clean_content( self::ifset( 'content_html', $item ) ),
 59 | 							'text' => self::ifset( 'content_text', $item ),
 60 | 						)
 61 | 					),
 62 | 					'summary'     => self::ifset( 'summary', $item ),
 63 | 					'featured'    => self::ifset( 'image', $item ),
 64 | 					'published'   => normalize_iso8601( self::ifset( 'date_published', $item ) ),
 65 | 					'updated'     => normalize_iso8601( self::ifset( 'date_modified', $item ) ),
 66 | 					'author'      => self::get_author( $item ),
 67 | 					'category'    => self::ifset( 'tags', $item ),
 68 | 					'language'    => self::ifset( 'language', $item ),
 69 | 				)
 70 | 			);
 71 | 			if ( array_key_exists( 'attachments', $item ) ) {
 72 | 				foreach ( $item['attachments'] as $attachment ) {
 73 | 					$type = explode( '/', $attachment['mime_type'] );
 74 | 					$type = array_shift( $type );
 75 | 					switch ( $type ) {
 76 | 						case 'audio':
 77 | 							$newitem['audio'] = $attachment['url'];
 78 | 							if ( isset( $attachment['duration_in_seconds'] ) ) {
 79 | 								$newitem['duration'] = seconds_to_iso8601( $attachment['duration_in_seconds'] );
 80 | 							}
 81 | 							break;
 82 | 						case 'image':
 83 | 							$newitem['photo'] = $attachment['url'];
 84 | 							break;
 85 | 						case 'video':
 86 | 							$newitem['video'] = $attachment['url'];
 87 | 							if ( isset( $attachment['duration_in_seconds'] ) ) {
 88 | 								$newitem['duration'] = seconds_to_iso8601( $attachment['duration_in_seconds'] );
 89 | 							}
 90 | 							break;
 91 | 					}
 92 | 				}
 93 | 			}
 94 | 			$return['items'][] = $newitem;
 95 | 		}
 96 | 		$return['_last_published'] = self::find_last_published( $return['items'] );
 97 | 		$return['_last_updated']   = self::find_last_updated( $return['items'] );
 98 | 		return $return;
 99 | 	}
100 | }
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/TreeBuildingRules.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Masterminds\HTML5\Parser;
  4 | 
  5 | /**
  6 |  * Handles special-case rules for the DOM tree builder.
  7 |  *
  8 |  * Many tags have special rules that need to be accomodated on an
  9 |  * individual basis. This class handles those rules.
 10 |  *
 11 |  * See section 8.1.2.4 of the spec.
 12 |  *
 13 |  * @todo - colgroup and col special behaviors
 14 |  *       - body and head special behaviors
 15 |  */
 16 | class TreeBuildingRules
 17 | {
 18 |     protected static $tags = array(
 19 |         'li' => 1,
 20 |         'dd' => 1,
 21 |         'dt' => 1,
 22 |         'rt' => 1,
 23 |         'rp' => 1,
 24 |         'tr' => 1,
 25 |         'th' => 1,
 26 |         'td' => 1,
 27 |         'thead' => 1,
 28 |         'tfoot' => 1,
 29 |         'tbody' => 1,
 30 |         'table' => 1,
 31 |         'optgroup' => 1,
 32 |         'option' => 1,
 33 |     );
 34 | 
 35 |     /**
 36 |      * Returns true if the given tagname has special processing rules.
 37 |      */
 38 |     public function hasRules($tagname)
 39 |     {
 40 |         return isset(static::$tags[$tagname]);
 41 |     }
 42 | 
 43 |     /**
 44 |      * Evaluate the rule for the current tag name.
 45 |      *
 46 |      * This may modify the existing DOM.
 47 |      *
 48 |      * @return \DOMElement The new Current DOM element.
 49 |      */
 50 |     public function evaluate($new, $current)
 51 |     {
 52 |         switch ($new->tagName) {
 53 |             case 'li':
 54 |                 return $this->handleLI($new, $current);
 55 |             case 'dt':
 56 |             case 'dd':
 57 |                 return $this->handleDT($new, $current);
 58 |             case 'rt':
 59 |             case 'rp':
 60 |                 return $this->handleRT($new, $current);
 61 |             case 'optgroup':
 62 |                 return $this->closeIfCurrentMatches($new, $current, array(
 63 |                     'optgroup',
 64 |                 ));
 65 |             case 'option':
 66 |                 return $this->closeIfCurrentMatches($new, $current, array(
 67 |                     'option',
 68 |                 ));
 69 |             case 'tr':
 70 |                 return $this->closeIfCurrentMatches($new, $current, array(
 71 |                     'tr',
 72 |                 ));
 73 |             case 'td':
 74 |             case 'th':
 75 |                 return $this->closeIfCurrentMatches($new, $current, array(
 76 |                     'th',
 77 |                     'td',
 78 |                 ));
 79 |             case 'tbody':
 80 |             case 'thead':
 81 |             case 'tfoot':
 82 |             case 'table': // Spec isn't explicit about this, but it's necessary.
 83 | 
 84 |                 return $this->closeIfCurrentMatches($new, $current, array(
 85 |                     'thead',
 86 |                     'tfoot',
 87 |                     'tbody',
 88 |                 ));
 89 |         }
 90 | 
 91 |         return $current;
 92 |     }
 93 | 
 94 |     protected function handleLI($ele, $current)
 95 |     {
 96 |         return $this->closeIfCurrentMatches($ele, $current, array(
 97 |             'li',
 98 |         ));
 99 |     }
100 | 
101 |     protected function handleDT($ele, $current)
102 |     {
103 |         return $this->closeIfCurrentMatches($ele, $current, array(
104 |             'dt',
105 |             'dd',
106 |         ));
107 |     }
108 | 
109 |     protected function handleRT($ele, $current)
110 |     {
111 |         return $this->closeIfCurrentMatches($ele, $current, array(
112 |             'rt',
113 |             'rp',
114 |         ));
115 |     }
116 | 
117 |     protected function closeIfCurrentMatches($ele, $current, $match)
118 |     {
119 |         if (in_array($current->tagName, $match, true)) {
120 |             $current->parentNode->appendChild($ele);
121 |         } else {
122 |             $current->appendChild($ele);
123 |         }
124 | 
125 |         return $ele;
126 |     }
127 | }
128 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/EventHandler.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Masterminds\HTML5\Parser;
  4 | 
  5 | /**
  6 |  * Standard events for HTML5.
  7 |  *
  8 |  * This is roughly analogous to a SAX2 or expat-style interface.
  9 |  * However, it is tuned specifically for HTML5, according to section 8
 10 |  * of the HTML5 specification.
 11 |  *
 12 |  * An event handler receives parser events. For a concrete
 13 |  * implementation, see DOMTreeBuilder.
 14 |  *
 15 |  * Quirks support in the parser is limited to close-in syntax (malformed
 16 |  * tags or attributes). Higher order syntax and semantic issues with a
 17 |  * document (e.g. mismatched tags, illegal nesting, etc.) are the
 18 |  * responsibility of the event handler implementation.
 19 |  *
 20 |  * See HTML5 spec section 8.2.4
 21 |  */
 22 | interface EventHandler
 23 | {
 24 |     const DOCTYPE_NONE = 0;
 25 | 
 26 |     const DOCTYPE_PUBLIC = 1;
 27 | 
 28 |     const DOCTYPE_SYSTEM = 2;
 29 | 
 30 |     /**
 31 |      * A doctype declaration.
 32 |      *
 33 |      * @param string $name   The name of the root element.
 34 |      * @param int    $idType One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM
 35 |      * @param string $id     The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
 36 |      *                       then this is a system ID.
 37 |      * @param bool   $quirks Indicates whether the builder should enter quirks mode.
 38 |      */
 39 |     public function doctype($name, $idType = 0, $id = null, $quirks = false);
 40 | 
 41 |     /**
 42 |      * A start tag.
 43 |      *
 44 |      * IMPORTANT: The parser watches the return value of this event. If this returns
 45 |      * an integer, the parser will switch TEXTMODE patters according to the int.
 46 |      *
 47 |      * This is how the Tree Builder can tell the Tokenizer when a certain tag should
 48 |      * cause the parser to go into RAW text mode.
 49 |      *
 50 |      * The HTML5 standard requires that the builder is the one that initiates this
 51 |      * step, and this is the only way short of a circular reference that we can
 52 |      * do that.
 53 |      *
 54 |      * Example: if a startTag even for a `script` name is fired, and the startTag()
 55 |      * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
 56 |      * switch into RAW text mode and consume data until it reaches a closing
 57 |      * `script` tag.
 58 |      *
 59 |      * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
 60 |      * closing tag is encounter. **This behavior may change.**
 61 |      *
 62 |      * @param string $name        The tag name.
 63 |      * @param array  $attributes  An array with all of the tag's attributes.
 64 |      * @param bool   $selfClosing An indicator of whether or not this tag is self-closing (<foo/>).
 65 |      *
 66 |      * @return int one of the Tokenizer::TEXTMODE_* constants
 67 |      */
 68 |     public function startTag($name, $attributes = array(), $selfClosing = false);
 69 | 
 70 |     /**
 71 |      * An end-tag.
 72 |      */
 73 |     public function endTag($name);
 74 | 
 75 |     /**
 76 |      * A comment section (unparsed character data).
 77 |      */
 78 |     public function comment($cdata);
 79 | 
 80 |     /**
 81 |      * A unit of parsed character data.
 82 |      *
 83 |      * Entities in this text are *already decoded*.
 84 |      */
 85 |     public function text($cdata);
 86 | 
 87 |     /**
 88 |      * Indicates that the document has been entirely processed.
 89 |      */
 90 |     public function eof();
 91 | 
 92 |     /**
 93 |      * Emitted when the parser encounters an error condition.
 94 |      */
 95 |     public function parseError($msg, $line, $col);
 96 | 
 97 |     /**
 98 |      * A CDATA section.
 99 |      *
100 |      * @param string $data
101 |      *                     The unparsed character data
102 |      */
103 |     public function cdata($data);
104 | 
105 |     /**
106 |      * This is a holdover from the XML spec.
107 |      *
108 |      * While user agents don't get PIs, server-side does.
109 |      *
110 |      * @param string $name The name of the processor (e.g. 'php').
111 |      * @param string $data The unparsed data.
112 |      */
113 |     public function processingInstruction($name, $data = null);
114 | }
115 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Serializer/Traverser.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Masterminds\HTML5\Serializer;
  4 | 
  5 | /**
  6 |  * Traverser for walking a DOM tree.
  7 |  *
  8 |  * This is a concrete traverser designed to convert a DOM tree into an
  9 |  * HTML5 document. It is not intended to be a generic DOMTreeWalker
 10 |  * implementation.
 11 |  *
 12 |  * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments
 13 |  */
 14 | class Traverser
 15 | {
 16 |     /**
 17 |      * Namespaces that should be treated as "local" to HTML5.
 18 |      */
 19 |     protected static $local_ns = array(
 20 |         'http://www.w3.org/1999/xhtml' => 'html',
 21 |         'http://www.w3.org/1998/Math/MathML' => 'math',
 22 |         'http://www.w3.org/2000/svg' => 'svg',
 23 |     );
 24 | 
 25 |     protected $dom;
 26 | 
 27 |     protected $options;
 28 | 
 29 |     protected $encode = false;
 30 | 
 31 |     protected $rules;
 32 | 
 33 |     protected $out;
 34 | 
 35 |     /**
 36 |      * Create a traverser.
 37 |      *
 38 |      * @param \DOMNode|\DOMNodeList $dom     The document or node to traverse.
 39 |      * @param resource              $out     A stream that allows writing. The traverser will output into this
 40 |      *                                       stream.
 41 |      * @param array                 $options An array of options for the traverser as key/value pairs. These include:
 42 |      *                                       - encode_entities: A bool to specify if full encding should happen for all named
 43 |      *                                       charachter references. Defaults to false which escapes &'<>".
 44 |      *                                       - output_rules: The path to the class handling the output rules.
 45 |      */
 46 |     public function __construct($dom, $out, RulesInterface $rules, $options = array())
 47 |     {
 48 |         $this->dom = $dom;
 49 |         $this->out = $out;
 50 |         $this->rules = $rules;
 51 |         $this->options = $options;
 52 | 
 53 |         $this->rules->setTraverser($this);
 54 |     }
 55 | 
 56 |     /**
 57 |      * Tell the traverser to walk the DOM.
 58 |      *
 59 |      * @return resource $out Returns the output stream.
 60 |      */
 61 |     public function walk()
 62 |     {
 63 |         if ($this->dom instanceof \DOMDocument) {
 64 |             $this->rules->document($this->dom);
 65 |         } elseif ($this->dom instanceof \DOMDocumentFragment) {
 66 |             // Document fragments are a special case. Only the children need to
 67 |             // be serialized.
 68 |             if ($this->dom->hasChildNodes()) {
 69 |                 $this->children($this->dom->childNodes);
 70 |             }
 71 |         }        // If NodeList, loop
 72 |         elseif ($this->dom instanceof \DOMNodeList) {
 73 |             // If this is a NodeList of DOMDocuments this will not work.
 74 |             $this->children($this->dom);
 75 |         }         // Else assume this is a DOMNode-like datastructure.
 76 |         else {
 77 |             $this->node($this->dom);
 78 |         }
 79 | 
 80 |         return $this->out;
 81 |     }
 82 | 
 83 |     /**
 84 |      * Process a node in the DOM.
 85 |      *
 86 |      * @param mixed $node A node implementing \DOMNode.
 87 |      */
 88 |     public function node($node)
 89 |     {
 90 |         // A listing of types is at http://php.net/manual/en/dom.constants.php
 91 |         switch ($node->nodeType) {
 92 |             case XML_ELEMENT_NODE:
 93 |                 $this->rules->element($node);
 94 |                 break;
 95 |             case XML_TEXT_NODE:
 96 |                 $this->rules->text($node);
 97 |                 break;
 98 |             case XML_CDATA_SECTION_NODE:
 99 |                 $this->rules->cdata($node);
100 |                 break;
101 |             case XML_PI_NODE:
102 |                 $this->rules->processorInstruction($node);
103 |                 break;
104 |             case XML_COMMENT_NODE:
105 |                 $this->rules->comment($node);
106 |                 break;
107 |             // Currently we don't support embedding DTDs.
108 |             default:
109 |                 //print '<!-- Skipped -->';
110 |                 break;
111 |         }
112 |     }
113 | 
114 |     /**
115 |      * Walk through all the nodes on a node list.
116 |      *
117 |      * @param \DOMNodeList $nl A list of child elements to walk through.
118 |      */
119 |     public function children($nl)
120 |     {
121 |         foreach ($nl as $node) {
122 |             $this->node($node);
123 |         }
124 |     }
125 | 
126 |     /**
127 |      * Is an element local?
128 |      *
129 |      * @param mixed $ele An element that implement \DOMNode.
130 |      *
131 |      * @return bool true if local and false otherwise.
132 |      */
133 |     public function isLocalElement($ele)
134 |     {
135 |         $uri = $ele->namespaceURI;
136 |         if (empty($uri)) {
137 |             return false;
138 |         }
139 | 
140 |         return isset(static::$local_ns[$uri]);
141 |     }
142 | }
143 | 


--------------------------------------------------------------------------------
/lib/html5/RELEASE.md:
--------------------------------------------------------------------------------
  1 | # Release Notes
  2 | 
  3 | 2.7.6  (2021-08-18)
  4 | 
  5 | - #218: Address comment handling issues 
  6 | 
  7 | 2.7.5  (2021-07-01)
  8 | 
  9 | - #204: Travis: Enable tests on PHP 8.0 
 10 | - #207: Fix PHP 8.1 deprecations 
 11 | 
 12 | 2.7.4  (2020-10-01)
 13 | 
 14 | - #191: Fix travisci build 
 15 | - #195: Add .gitattributes file with export-ignore rules 
 16 | - #194: Fix query parameter parsed as character entity
 17 | 
 18 | 2.7.3 (2020-07-05)
 19 | 
 20 | - #190: mitigate cyclic reference between output rules and the traverser objects 
 21 | 
 22 | 2.7.2 (2020-07-01)
 23 | 
 24 | - #187: Fixed memory leak in HTML5::saveHTML() 
 25 | - #186: Add special case for end tag </br>
 26 | 
 27 | 2.7.1 (2020-06-14)
 28 | 
 29 | - #171: add PHP 7.4 job 
 30 | - #178: Prevent infinite loop on un-terminated entity declaration at EOF 
 31 | 
 32 | 2.7.0 (2019-07-25)
 33 | 
 34 | - #164: Drop HHVM support
 35 | - #168: Set default encoding in the DOMDocument object
 36 | 
 37 | 2.6.0 (2019-03-10)
 38 | 
 39 | - #163: Allow to pass a charset to the Scanner
 40 | 
 41 | 2.5.0 (2018-12-27)
 42 | 
 43 | - #162, #161, #155, #154, #153, #151: big performance improvements
 44 | - #156: fixed typos
 45 | - #160: adopt and enforce code style
 46 | - #159: remove deprecated php unit base test case
 47 | - #150: backport changes from old master branch 
 48 | 
 49 | 2.4.0 (2018-11-17)
 50 | 
 51 | - #148: Improve performance by moving sequence matching 
 52 | - #147: Improve the Tokenizer performance 
 53 | - #146: Improve performance by relying on a native string instead of InputStream 
 54 | - #144: Add DOM extension in composer.json
 55 | - #145: Add more extensions on composer.json, improve phpdocs and remove dead code 
 56 | - #143: Remove experimental comment 
 57 | 
 58 | 2.3.1 (2018-10-18)
 59 | 
 60 | - #121: Audio is not a block tag (fixed by #141)
 61 | - #136: Handle illegal self-closing according to spec (fixed by #137)
 62 | - #141: Minor fixes in the README
 63 | 
 64 | 2.3.0 (2017-09-04)
 65 | 
 66 | - #129: image within inline svg breaks system (fixed by #133) 
 67 | - #131: &sup2; does not work (fixed by #132)
 68 | - #134: Improve tokenizer performance by 20% (alternative version of #130 thanks to @MichaelHeerklotz)
 69 | - #135: Raw & in attributes
 70 | 
 71 | 2.2.2 (2016-09-22)
 72 | 
 73 | - #116: In XML mode, tags are case sensitive
 74 | - #115: Fix PHP Notice in OutputRules
 75 | - #112: fix parsing of options of an optgroup
 76 | - #111: Adding test for the address tag
 77 | 
 78 | 2.2.1 (2016-05-10)
 79 | 
 80 | - #109: Fixed issue where address tag could be written without closing tag (thanks sylus)
 81 | 
 82 | 2.2.0 (2016-04-11)
 83 | 
 84 | - #105: Enable composer cache (for CI/CD)
 85 | - #100: Use mb_substitute_character inset of ini_set for environments where ini_set is disable (e.g., shared hosting)
 86 | - #98: Allow link, meta, style tags in noscript tags
 87 | - #96: Fixed xml:href on svgs that use the "use" breaking
 88 | - #94: Counting UTF8 characters performance improvement
 89 | - #93: Use newer version of coveralls package
 90 | - #90: Remove duplicate test
 91 | - #87: Allow multiple root nodes
 92 | 
 93 | 2.1.2 (2015-06-07)
 94 | - #82: Support for PHP7
 95 | - #84: Improved boolean attribute handling
 96 | 
 97 | 2.1.1 (2015-03-23)
 98 | - #78: Fixes bug where unmatched entity like string drops everything after &.
 99 | 
100 | 2.1.0 (2015-02-01)
101 | - #74: Added `disable_html_ns` and `target_doc` dom parsing options
102 | - Unified option names
103 | - #73: Fixed alphabet, &szlig; now can be detected
104 | - #75 and #76: Allow whitespace in RCDATA tags
105 | - #77: Fixed parsing blunder for json embeds
106 | - #72: Add options to HTML methods
107 | 
108 | 2.0.2 (2014-12-17)
109 | - #50: empty document handling
110 | - #63: tags with strange capitalization
111 | - #65: dashes and underscores as allowed characters in tag names
112 | - #68: Fixed issue with non-inline elements inside inline containers
113 | 
114 | 2.0.1 (2014-09-23)
115 | - #59: Fixed issue parsing some fragments.
116 | - #56: Incorrectly saw 0 as empty string
117 | - Sami as new documentation generator
118 | 
119 | 2.0.0 (2014-07-28)
120 | - #53: Improved boolean attributes handling
121 | - #52: Facebook HHVM compatibility
122 | - #48: Adopted PSR-2 as coding standard
123 | - #47: Moved everything to Masterminds namespace
124 | - #45: Added custom namespaces
125 | - #44: Added support to XML-style namespaces
126 | - #37: Refactored HTML5 class removing static methods
127 | 
128 | 1.0.5 (2014-06-10)
129 | - #38: Set the dev-master branch as the 1.0.x branch for composer (goetas)
130 | - #34: Tests use PSR-4 for autoloading. (goetas)
131 | - #40, #41: Fix entity handling in RCDATA sections. (KitaitiMakoto)
132 | - #32: Fixed issue where wharacter references were being incorrectly encoded in style tags.
133 | 
134 | 1.0.4 (2014-04-29)
135 | - #30/#31 Don't throw an exception for invalid tag names.
136 | 
137 | 1.0.3 (2014-02-28)
138 | - #23 and #29: Ignore attributes with illegal chars in name for the PHP DOM.
139 | 
140 | 1.0.2 (2014-02-12)
141 | - #23: Handle missing tag close in attribute list.
142 | - #25: Fixed text escaping in the serializer (HTML% 8.3).
143 | - #27: Fixed tests on Windows: changed "\n" -> PHP_EOL.
144 | - #28: Fixed infinite loop for char "&" in unquoted attribute in parser.
145 | - #26: Updated tag name case handling to deal with uppercase usage.
146 | - #24: Newlines and tabs are allowed inside quoted attributes (HTML5 8.2.4).
147 | - Fixed Travis CI testing.
148 | 
149 | 1.0.1 (2013-11-07)
150 | - CDATA encoding is improved. (Non-standard; Issue #19)
151 | - Some parser rules were not returning the new current element. (Issue #20)
152 | - Added, to the README, details on code test coverage and to packagist version.
153 | - Fixed processor instructions.
154 | - Improved test coverage and documentation coverage.
155 | 
156 | 1.0.0 (2013-10-02)
157 | - Initial release.
158 | 


--------------------------------------------------------------------------------
/includes/class-rest-parse-this.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Provides REST Endpoint to Retrieve the Parsed Data
  4 |  */
  5 | 
  6 | class REST_Parse_This {
  7 | 	public function __construct() {
  8 | 		add_action( 'rest_api_init', array( $this, 'register_routes' ) );
  9 | 		add_action( 'admin_menu', array( $this, 'admin_menu' ) );
 10 | 	}
 11 | 
 12 | 		/**
 13 | 		 * Adds Options Page for Plugin Options.
 14 | 		 *
 15 | 		 * @access public
 16 | 		 */
 17 | 	public function admin_menu() {
 18 | 		add_management_page(
 19 | 			__( 'Parse This', 'indieweb-post-kinds' ), // page title
 20 | 			__( 'Parse This', 'indieweb-post-kinds' ), // menu title
 21 | 			'manage_options', // access capability
 22 | 			'parse_this',
 23 | 			array( $this, 'debug' )
 24 | 		);
 25 | 	}
 26 | 
 27 | 	/**
 28 | 	 * Generate Debug Tool
 29 | 	 *
 30 | 	 * @access public
 31 | 	 */
 32 | 	public static function debug() {
 33 | 		?>
 34 | 				<div class="wrap">
 35 | 						<h2> <?php esc_html_e( 'Parse This Debugger', 'indieweb-post-kinds' ); ?> </h2>
 36 | 						<p> <?php esc_html_e( 'Test the Parse Tools Debugger. You can report sites to the developer for possibly improvement in future.', 'parse-this' ); ?>
 37 | 							<a href="https://github.com/dshanske/parse-this/issues"><?php esc_html_e( 'Open an Issue', 'parse-this' ); ?></a>
 38 | 						</p>
 39 | 
 40 | 							<p>
 41 | 							<?php
 42 | 							if ( is_plugin_active( 'parse-this/parse-this.php' ) ) {
 43 | 								esc_html_e( 'You are using the plugin version of Parse This as opposed to a version built into any plugin', 'parse-this' );
 44 | 							}
 45 | 							?>
 46 | 						<hr />
 47 | 			<form method="get" action="<?php echo esc_url( rest_url( '/parse-this/1.0/parse/' ) ); ?> ">
 48 | 				<p>
 49 | 					<label for="url"><?php esc_html_e( 'URL', 'indieweb-post-kinds' ); ?></label><input type="url" class="widefat" name="url" id="url" />
 50 | 				</p>
 51 | 				<table class="form-table" role="presentation">
 52 | 					<tbody>
 53 | 					<tr>
 54 | 						<th scope="row">
 55 | 							<label for="mf2"><?php esc_html_e( 'MF2', 'indieweb-post-kinds' ); ?></label>
 56 | 						</th>
 57 | 						<td>
 58 | 							<input type="checkbox" name="mf2" id="mf2" />
 59 | 						</td>
 60 | 					</tr>
 61 | 					<tr>
 62 | 						<th scope="row">
 63 | 							<label for="discovery"><?php esc_html_e( 'Feed Discovery', 'indieweb-post-kinds' ); ?></label>
 64 | 						</th>
 65 | 						<td>
 66 | 							<input type="checkbox" name="discovery" id="discovery" />
 67 | 						</td>
 68 | 					</tr>
 69 | 					<tr>
 70 | 						<th scope="row">
 71 | 							<label for="discovery"><?php esc_html_e( 'References', 'indieweb-post-kinds' ); ?></label>
 72 | 						</th>
 73 | 						<td>
 74 | 							<input type="checkbox" name="references" id="references" checked />
 75 | 						</td>
 76 | 					</tr>
 77 | 					<tr>
 78 | 						<th scope="row">
 79 | 							<label for="location"><?php esc_html_e( 'Clean up Location', 'indieweb-post-kinds' ); ?></label>
 80 | 						</th>
 81 | 						<td>
 82 | 							<input type="checkbox" name="location" id="location" />
 83 | 						</td>
 84 | 					</tr>
 85 | 					<tr>
 86 | 						<th scope="row">
 87 | 							<label for"return"><?php esc_html_e( 'Return Type', 'indieweb-post-kinds' ); ?></label>
 88 | 						</th>
 89 | 						<td>
 90 | 							<select name="return">
 91 | 								<option value="single"><?php esc_html_e( 'Single', 'indieweb-post-kinds' ); ?></option>
 92 | 								<option value="feed"><?php esc_html_e( 'Feed', 'indieweb-post-kinds' ); ?></option>
 93 | 							</select>
 94 | 						</td>
 95 | 					</tr>
 96 | 					<tr>
 97 | 						<th scope="row">
 98 | 							<label for="follow"><?php esc_html_e( 'Follow Author Links', 'indieweb-post-kinds' ); ?></label>
 99 | 						</th>
100 | 						<td>
101 | 							<input type="checkbox" name="follow" id="follow" />
102 | 						</td>
103 | 					</tr>
104 | 					</tbody>
105 | 				</table>
106 | 			<?php wp_nonce_field( 'wp_rest' ); ?>
107 | 			<?php submit_button( __( 'Parse', 'indieweb-post-kinds' ) ); ?>
108 | 						</form>
109 | 				</div>
110 | 				<?php
111 | 	}
112 | 
113 | 
114 | 	/**
115 | 	 * Register the Route.
116 | 	 */
117 | 	public static function register_routes() {
118 | 		$cls = get_called_class();
119 | 		register_rest_route(
120 | 			'parse-this/1.0',
121 | 			'/parse',
122 | 			array(
123 | 				array(
124 | 					'methods'             => WP_REST_Server::READABLE,
125 | 					'callback'            => array( $cls, 'read' ),
126 | 					'args'                => array(
127 | 						'url' => array(
128 | 							'required'          => true,
129 | 							'validate_callback' => array( $cls, 'is_valid_url' ),
130 | 							'sanitize_callback' => 'esc_url_raw',
131 | 						),
132 | 					),
133 | 					'permission_callback' => function () {
134 | 						return current_user_can( 'read' );
135 | 					},
136 | 				),
137 | 			)
138 | 		);
139 | 	}
140 | 
141 | 	public static function read( $request ) {
142 | 		$url       = $request->get_param( 'url' );
143 | 		$mf2       = $request->get_param( 'mf2' );
144 | 		$return    = $request->get_param( 'return' );
145 | 		$refs      = $request->get_param( 'references' );
146 | 		$discovery = $request->get_param( 'discovery' );
147 | 		$location  = $request->get_param( 'location' );
148 | 		$follow    = $request->get_param( 'follow' );
149 | 		if ( $discovery ) {
150 | 			$parse = new Parse_This_Discovery();
151 | 			return $parse->fetch( $url );
152 | 		}
153 | 		$parse = new Parse_This( $url );
154 | 		$r     = $parse->fetch();
155 | 
156 | 		if ( is_wp_error( $r ) ) {
157 | 			return $r;
158 | 		}
159 | 		$parse->parse(
160 | 			array(
161 | 				'return'     => $return,
162 | 				'follow'     => $follow,
163 | 				'references' => $refs,
164 | 				'location'   => $location,
165 | 			)
166 | 		);
167 | 		if ( $mf2 ) {
168 | 			return $parse->get( 'mf2' );
169 | 		}
170 | 		return $parse->get();
171 | 	}
172 | 
173 | 	/**
174 | 	 * Returns if valid URL for REST validation
175 | 	 *
176 | 	 * @param string $url
177 | 	 *
178 | 	 * @return boolean
179 | 	 */
180 | 	public static function is_valid_url( $url, $request = null, $key = null ) {
181 | 		return wp_http_validate_url( $url );
182 | 	}
183 | 
184 | 
185 | 	public static function addscheme( $url, $scheme = 'http://' ) {
186 | 		return wp_parse_url( $url, PHP_URL_SCHEME ) === null ? $scheme . $url : $url;
187 | 	}
188 | 
189 | }
190 | 
191 | new REST_Parse_This();
192 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-instagram.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Parse This Instagram class.
  4 |  */
  5 | class Parse_This_Instagram extends Parse_This_Base {
  6 | 	/**
  7 | 	 *
  8 | 	 * @access public
  9 | 	 */
 10 | 	public static function parse( $doc, $url, $args ) {
 11 | 		if ( ! $doc ) {
 12 | 			return array();
 13 | 		}
 14 | 		$xpath = new DOMXPath( $doc );
 15 | 		foreach ( $xpath->query( '//script' ) as $script ) {
 16 | 			if ( preg_match( '/window\._sharedData = ({.+});/', $script->textContent, $match ) ) { // phpcs:ignore
 17 | 				$data = json_decode( $match[1], true );
 18 | 			}
 19 | 		}
 20 | 		if ( empty( $data ) ) {
 21 | 			return array();
 22 | 		}
 23 | 
 24 | 		$jf2 = array();
 25 | 		if ( $data && is_array( $data ) && array_key_exists( 'entry_data', $data ) ) {
 26 | 			if ( is_array( $data['entry_data'] ) ) {
 27 | 				if ( array_key_exists( 'PostPage', $data['entry_data'] ) ) {
 28 | 					// Photo Page
 29 | 					$jf2 = self::html_photo( $data, $url );
 30 | 				} elseif ( array_key_exists( 'LocationsPage', $data['entry_data'] ) ) {
 31 | 					// Locations Page
 32 | 					$jf2 = self::html_location( $data, $url );
 33 | 				} elseif ( array_key_exists( 'LoginAndSignupPage', $data['entry_data'] ) ) {
 34 | 					return array();
 35 | 				}
 36 | 			}
 37 | 		}
 38 | 		if ( WP_DEBUG ) {
 39 | 			$jf2['_ig'] = $data;
 40 | 		}
 41 | 		return array_filter( $jf2 );
 42 | 	}
 43 | 
 44 | 	private static function html_location( $data, $url ) {
 45 | 		$post = $data['entry_data']['LocationsPage'];
 46 | 		if ( isset( $post[0]['graphql']['location'] ) ) {
 47 | 			$data = $post[0]['graphql']['location'];
 48 | 		} else {
 49 | 			return array();
 50 | 		}
 51 | 		return self::json_location( $data, $url );
 52 | 	}
 53 | 
 54 | 	private static function json_location( $data, $url ) {
 55 | 		$address = isset( $data['address_json'] ) ? json_decode( $data['address_json'], true ) : array();
 56 | 		$jf2     = array(
 57 | 			'address'        => $address,
 58 | 			'name'           => ifset( $data['name'] ),
 59 | 			'latitude'       => ifset( $data['lat'] ),
 60 | 			'longitude'      => ifset( $data['lng'] ),
 61 | 			'url'            => ifset( $data['website'] ),
 62 | 			'street_address' => ifset( $address['street_address'] ),
 63 | 			'postal_code'    => ifset( $address['zip_code'] ),
 64 | 			'region'         => ifset( $address['region_name'] ),
 65 | 			'country'        => ifset( $address['country_code'] ),
 66 | 		);
 67 | 		return array_filter( $jf2 );
 68 | 	}
 69 | 
 70 | 	private static function feed( $data, $url ) {
 71 | 		return self::profile( $data );
 72 | 	}
 73 | 
 74 | 	private static function html_photo( $data, $url ) {
 75 | 		$post = $data['entry_data']['PostPage'];
 76 | 		if ( isset( $post[0]['graphql']['shortcode_media'] ) ) {
 77 | 			$data = $post[0]['graphql']['shortcode_media'];
 78 | 		} elseif ( isset( $post[0]['graphql']['media'] ) ) {
 79 | 			$data = $post[0]['graphql']['media'];
 80 | 		} elseif ( isset( $post[0]['media'] ) ) {
 81 | 			$data = $post[0]['media'];
 82 | 		}
 83 | 		return self::json_photo( $data, $url );
 84 | 	}
 85 | 
 86 | 	public static function json_photo( $data, $url ) {
 87 | 		// Start building the h-entry
 88 | 		$entry = array(
 89 | 			'type' => 'entry',
 90 | 			'url'  => $url,
 91 | 		);
 92 | 
 93 | 		// Content and hashtags
 94 | 		$caption = false;
 95 | 
 96 | 		if ( isset( $data['caption'] ) ) {
 97 | 			  $caption = $data['caption'];
 98 | 		} elseif ( isset( $data['edge_media_to_caption']['edges'][0]['node']['text'] ) ) {
 99 | 			  $caption = $data['edge_media_to_caption']['edges'][0]['node']['text'];
100 | 		}
101 | 
102 | 		if ( $caption ) {
103 | 			if ( preg_match_all( '/#([a-z0-9_-]+)/i', $caption, $matches ) ) {
104 | 				$entry['category'] = array();
105 | 				foreach ( $matches[1] as $match ) {
106 | 					$entry['category'][] = $match;
107 | 				}
108 | 			}
109 | 
110 | 			$entry['content'] = array(
111 | 				'text' => $caption,
112 | 			);
113 | 		}
114 | 
115 | 		// Include the photo/video media URLs
116 | 		// (Always return arrays, even for single images)
117 | 		if ( array_key_exists( 'edge_sidecar_to_children', $data ) ) {
118 | 			$entry['photo'] = array();
119 | 			foreach ( $data['edge_sidecar_to_children']['edges'] as $edge ) {
120 | 				$entry['photo'][] = $edge['node']['display_url'];
121 | 			}
122 | 		} else {
123 | 			 // Single photo or video
124 | 			if ( array_key_exists( 'display_src', $data ) ) {
125 | 				$entry['photo'] = array( $data['display_src'] );
126 | 			} elseif ( array_key_exists( 'display_url', $data ) ) {
127 | 				$entry['photo'] = array( $data['display_url'] );
128 | 			}
129 | 
130 | 			if ( isset( $data['is_video'] ) && $data['is_video'] && isset( $data['video_url'] ) ) {
131 | 				$entry['video'] = array( $data['video_url'] );
132 | 			}
133 | 		}
134 | 
135 | 		// Published date
136 | 		$published = new Datetime();
137 | 		if ( isset( $data['taken_at_timestamp'] ) ) {
138 | 			  $published->setTimestamp( $data['taken_at_timestamp'] );
139 | 		} elseif ( isset( $data['date'] ) ) {
140 | 			  $published = new DateTime( $data['date'] );
141 | 		}
142 | 		$entry['published'] = $published->format( DATE_W3C );
143 | 		if ( isset( $data['location'] ) ) {
144 | 			$entry['location'] = array();
145 | 			if ( isset( $data['location']['address_json'] ) ) {
146 | 				$address           = json_decode( $data['location']['address_json'], true );
147 | 				$entry['location'] = array(
148 | 					'street_address' => $address['street_address'],
149 | 					'postal_code'    => $address['zip_code'],
150 | 					'region'         => $address['region_name'],
151 | 					'country'        => $address['country_code'],
152 | 				);
153 | 			}
154 | 			$entry['location']['name'] = $data['location']['name'];
155 | 			$entry['location']['url']  = sprintf( 'https://www.instagram.com/explore/locations/%1$s', $data['location']['id'] );
156 | 			$entry['location']         = array_filter( $entry['location'] );
157 | 		}
158 | 		if ( isset( $data['owner'] ) ) {
159 | 			$entry['author'] = array(
160 | 				'type'     => 'card',
161 | 				'name'     => ifset( $data['owner']['full_name'] ),
162 | 				'nickname' => ifset( $data['owner']['username'] ),
163 | 				'url'      => sprintf( 'https://www.instagram.com/%1$s/', $data['owner']['username'] ),
164 | 				'photo'    => ifset( $data['owner']['profile_pic_url'] ),
165 | 			);
166 | 		}
167 | 		return $entry;
168 | 	}
169 | 
170 | 	private static function profile( $data ) {
171 | 		if ( isset( $data['entry_data']['ProfilePage'][0] ) ) {
172 | 			$profile = $data['entry_data']['ProfilePage'][0];
173 | 			if ( $profile && isset( $profile['graphql']['user'] ) ) {
174 | 				$user = $profile['graphql']['user'];
175 | 				return $user;
176 | 			}
177 | 		}
178 | 		return array();
179 | 	}
180 | 
181 | 
182 | 
183 | }
184 | 


--------------------------------------------------------------------------------
/lib/mf2/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Creative Commons Legal Code
 2 | 
 3 | ## CC0 1.0 Universal
 4 | 
 5 | http://creativecommons.org/publicdomain/zero/1.0
 6 | 
 7 | Official translations of this legal tool are available> CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
 8 | 
 9 | ### _Statement of Purpose_
10 | 
11 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
12 | 
13 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
14 | 
15 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
16 | 
17 | **1. Copyright and Related Rights.** A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
18 | 
19 | 1.  the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
20 | 2.  moral rights retained by the original author(s) and/or performer(s);
21 | 3.  publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
22 | 4.  rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
23 | 5.  rights protecting the extraction, dissemination, use and reuse of data in a Work;
24 | 6.  database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
25 | 7.  other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
26 | 
27 | **2. Waiver.** To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
28 | 
29 | **3. Public License Fallback.** Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
30 | 
31 | **4. Limitations and Disclaimers.**
32 | 
33 | 1.  No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
34 | 2.  Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
35 | 3.  Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
36 | 4.  Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
37 | 


--------------------------------------------------------------------------------
/includes/compat-functions.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | 
  4 | if ( ! function_exists( 'current_datetime' ) ) {
  5 | 	/**
  6 | 	 * Retrieves the current time as an object with the timezone from settings.
  7 | 	 *
  8 | 	 * @since 5.3.0 - Backported to Parse This
  9 | 	 *
 10 | 	 * @return DateTime Date and time object.
 11 | 	 */
 12 | 	function current_datetime() {
 13 | 		return new DateTimeImmutable( 'now', wp_timezone() );
 14 | 	}
 15 | }
 16 | 
 17 | if ( ! function_exists( 'get_post_timestamp' ) ) {
 18 | 	/**
 19 | 	 * Retrieve post published or modified time as a Unix timestamp.
 20 | 	 *
 21 | 	 * Note that this function returns a true Unix timestamp, not summed with timezone offset
 22 | 	 * like older WP functions.
 23 | 	 *
 24 | 	 * @since 5.3.0 - backported to Parse This
 25 | 	 *
 26 | 	 * @param int|WP_Post $post  Optional. WP_Post object or ID. Default is global `$post` object.
 27 | 	 * @param string      $field Optional. Post field to use. Accepts 'date' or 'modified'.
 28 | 	 * @return int|false Unix timestamp on success, false on failure.
 29 | 	 */
 30 | 	function get_post_timestamp( $post = null, $field = 'date' ) {
 31 | 		$datetime = get_post_datetime( $post, $field );
 32 | 		if ( false === $datetime ) {
 33 | 			return false;
 34 | 		}
 35 | 		return $datetime->getTimestamp();
 36 | 	}
 37 | }
 38 | 
 39 | 
 40 | if ( ! function_exists( 'get_post_datetime' ) ) {
 41 | 	/**
 42 | 	 * Retrieve post published or modified time as a `DateTime` object instance.
 43 | 	 *
 44 | 	 * The object will be set to the timezone from WordPress settings.
 45 | 	 *
 46 | 	 * @since 5.3.0 - backported to Parse This
 47 | 	 *
 48 | 	 * @param int|WP_Post $post  Optional. WP_Post object or ID. Default is global `$post` object.
 49 | 	 * @param string      $field Optional. Post field to use. Accepts 'date' or 'modified'.
 50 | 	 * @return DateTime|false Time object on success, false on failure.
 51 | 	 */
 52 | 	function get_post_datetime( $post = null, $field = 'date' ) {
 53 | 		$post = get_post( $post );
 54 | 		if ( ! $post ) {
 55 | 			return false;
 56 | 		}
 57 | 		$time = ( 'modified' === $field ) ? $post->post_modified : $post->post_date;
 58 | 		if ( empty( $time ) || '0000-00-00 00:00:00' === $time ) {
 59 | 			return false;
 60 | 		}
 61 | 		return date_create_immutable_from_format( 'Y-m-d H:i:s', $time, wp_timezone() );
 62 | 	}
 63 | }
 64 | 
 65 | if ( ! function_exists( 'wp_timezone_string' ) ) {
 66 | 	/**
 67 | 	 * Retrieves the timezone from site settings as a string.
 68 | 	 *
 69 | 	 * Uses the `timezone_string` option to get a proper timezone if available,
 70 | 	 * otherwise falls back to an offset.
 71 | 	 *
 72 | 	 * @since 5.3.0 - backported into Parse This
 73 | 	 *
 74 | 	 * @return string PHP timezone string or a ±HH:MM offset.
 75 | 	 */
 76 | 	function wp_timezone_string() {
 77 | 		$timezone_string = get_option( 'timezone_string' );
 78 | 		if ( $timezone_string ) {
 79 | 			return $timezone_string;
 80 | 		}
 81 | 		$offset    = (float) get_option( 'gmt_offset' );
 82 | 		$hours     = (int) $offset;
 83 | 		$minutes   = ( $offset - $hours );
 84 | 		$sign      = ( $offset < 0 ) ? '-' : '+';
 85 | 		$abs_hour  = abs( $hours );
 86 | 		$abs_mins  = abs( $minutes * 60 );
 87 | 		$tz_offset = sprintf( '%s%02d:%02d', $sign, $abs_hour, $abs_mins );
 88 | 		return $tz_offset;
 89 | 	}
 90 | }
 91 | 
 92 | if ( ! function_exists( 'wp_timezone' ) ) {
 93 | 	/**
 94 | 	 * Retrieves the timezone from site settings as a `DateTimeZone` object.
 95 | 	 *
 96 | 	 * Timezone can be based on a PHP timezone string or a ±HH:MM offset.
 97 | 	 *
 98 | 	 * @since 5.3.0 - backported into Parse This
 99 | 	 *
100 | 	 * @return DateTimeZone Timezone object.
101 | 	 */
102 | 	function wp_timezone() {
103 | 		return new DateTimeZone( wp_timezone_string() );
104 | 	}
105 | }
106 | 
107 | 
108 | if ( ! function_exists( 'wp_date' ) ) {
109 | 	/**
110 | 	 * Retrieves the date, in localized format.
111 | 	 *
112 | 	 * This is a newer function, intended to replace `date_i18n()` without legacy quirks in it.
113 | 	 *
114 | 	 * Note that, unlike `date_i18n()`, this function accepts a true Unix timestamp, not summed
115 | 	 * with timezone offset.
116 | 	 *
117 | 	 * @since 5.3.0 - backported to Parse This
118 | 	 *
119 | 	 * @param string       $format    PHP date format.
120 | 	 * @param int          $timestamp Optional. Unix timestamp. Defaults to current time.
121 | 	 * @param DateTimeZone $timezone  Optional. Timezone to output result in. Defaults to timezone
122 | 	 *                                from site settings.
123 | 	 * @return string|false The date, translated if locale specifies it. False on invalid timestamp input.
124 | 	 */
125 | 	function wp_date( $format, $timestamp = null, $timezone = null ) {
126 | 		global $wp_locale;
127 | 		if ( null === $timestamp ) {
128 | 			$timestamp = time();
129 | 		} elseif ( ! is_numeric( $timestamp ) ) {
130 | 			return false;
131 | 		}
132 | 		if ( ! $timezone ) {
133 | 			$timezone = wp_timezone();
134 | 		}
135 | 		$datetime = date_create( '@' . $timestamp );
136 | 		$datetime->setTimezone( $timezone );
137 | 		if ( empty( $wp_locale->month ) || empty( $wp_locale->weekday ) ) {
138 | 			$date = $datetime->format( $format );
139 | 		} else {
140 | 			// We need to unpack shorthand `r` format because it has parts that might be localized.
141 | 			$format        = preg_replace( '/(?<!\\\\)r/', DATE_RFC2822, $format );
142 | 			$new_format    = '';
143 | 			$format_length = strlen( $format );
144 | 			$month         = $wp_locale->get_month( $datetime->format( 'm' ) );
145 | 			$weekday       = $wp_locale->get_weekday( $datetime->format( 'w' ) );
146 | 			for ( $i = 0; $i < $format_length; $i ++ ) {
147 | 				switch ( $format[ $i ] ) {
148 | 					case 'D':
149 | 						$new_format .= backslashit( $wp_locale->get_weekday_abbrev( $weekday ) );
150 | 						break;
151 | 					case 'F':
152 | 						$new_format .= backslashit( $month );
153 | 						break;
154 | 					case 'l':
155 | 						$new_format .= backslashit( $weekday );
156 | 						break;
157 | 					case 'M':
158 | 						$new_format .= backslashit( $wp_locale->get_month_abbrev( $month ) );
159 | 						break;
160 | 					case 'a':
161 | 						$new_format .= backslashit( $wp_locale->get_meridiem( $datetime->format( 'a' ) ) );
162 | 						break;
163 | 					case 'A':
164 | 						$new_format .= backslashit( $wp_locale->get_meridiem( $datetime->format( 'A' ) ) );
165 | 						break;
166 | 					case '\\':
167 | 						$new_format .= $format[ $i ];
168 | 						// If character follows a slash, we add it without translating.
169 | 						if ( $i < $format_length ) {
170 | 							$new_format .= $format[ ++$i ];
171 | 						}
172 | 						break;
173 | 					default:
174 | 						$new_format .= $format[ $i ];
175 | 						break;
176 | 				}
177 | 			}
178 | 			$date = $datetime->format( $new_format );
179 | 			$date = wp_maybe_decline_date( $date );
180 | 		}
181 | 		/**
182 | 		 * Filters the date formatted based on the locale.
183 | 		 *
184 | 		 * @since 5.3.0 but backported to Parse This
185 | 		 *
186 | 		 * @param string       $date      Formatted date string.
187 | 		 * @param string       $format    Format to display the date.
188 | 		 * @param int          $timestamp Unix timestamp.
189 | 		 * @param DateTimeZone $timezone  Timezone.
190 | 		 */
191 | 		$date = apply_filters( 'wp_date', $date, $format, $timestamp, $timezone );
192 | 		return $date;
193 | 	}
194 | }
195 | 
196 | if ( ! function_exists( 'str_contains' ) ) {
197 | 	function str_contains( $haystack, $needle ) {
198 | 		return $needle !== '' && false !== mb_strpos( $haystack, $needle );
199 | 	}
200 | }
201 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/UTF8Utils.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Masterminds\HTML5\Parser;
  4 | 
  5 | /*
  6 | Portions based on code from html5lib files with the following copyright:
  7 | 
  8 | Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
  9 | 
 10 | Permission is hereby granted, free of charge, to any person obtaining a
 11 | copy of this software and associated documentation files (the
 12 |     "Software"), to deal in the Software without restriction, including
 13 | without limitation the rights to use, copy, modify, merge, publish,
 14 | distribute, sublicense, and/or sell copies of the Software, and to
 15 | permit persons to whom the Software is furnished to do so, subject to
 16 | the following conditions:
 17 | 
 18 | The above copyright notice and this permission notice shall be included
 19 | in all copies or substantial portions of the Software.
 20 | 
 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 22 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 23 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 24 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 25 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 26 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 27 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 28 | */
 29 | 
 30 | use Masterminds\HTML5\Exception;
 31 | 
 32 | class UTF8Utils
 33 | {
 34 |     /**
 35 |      * The Unicode replacement character.
 36 |      */
 37 |     const FFFD = "\xEF\xBF\xBD";
 38 | 
 39 |     /**
 40 |      * Count the number of characters in a string.
 41 |      * UTF-8 aware. This will try (in order) iconv, MB, libxml, and finally a custom counter.
 42 |      *
 43 |      * @param string $string
 44 |      *
 45 |      * @return int
 46 |      */
 47 |     public static function countChars($string)
 48 |     {
 49 |         // Get the length for the string we need.
 50 |         if (function_exists('mb_strlen')) {
 51 |             return mb_strlen($string, 'utf-8');
 52 |         }
 53 | 
 54 |         if (function_exists('iconv_strlen')) {
 55 |             return iconv_strlen($string, 'utf-8');
 56 |         }
 57 | 
 58 |         if (function_exists('utf8_decode')) {
 59 |             // MPB: Will this work? Won't certain decodes lead to two chars
 60 |             // extrapolated out of 2-byte chars?
 61 |             return strlen(utf8_decode($string));
 62 |         }
 63 | 
 64 |         $count = count_chars($string);
 65 | 
 66 |         // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
 67 |         // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
 68 |         return array_sum(array_slice($count, 0, 0x80)) + array_sum(array_slice($count, 0xC2, 0x33));
 69 |     }
 70 | 
 71 |     /**
 72 |      * Convert data from the given encoding to UTF-8.
 73 |      *
 74 |      * This has not yet been tested with charactersets other than UTF-8.
 75 |      * It should work with ISO-8859-1/-13 and standard Latin Win charsets.
 76 |      *
 77 |      * @param string $data     The data to convert
 78 |      * @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
 79 |      *
 80 |      * @return string
 81 |      */
 82 |     public static function convertToUTF8($data, $encoding = 'UTF-8')
 83 |     {
 84 |         /*
 85 |          * From the HTML5 spec: Given an encoding, the bytes in the input stream must be converted
 86 |          * to Unicode characters for the tokeniser, as described by the rules for that encoding,
 87 |          * except that the leading U+FEFF BYTE ORDER MARK character, if any, must not be stripped
 88 |          * by the encoding layer (it is stripped by the rule below). Bytes or sequences of bytes
 89 |          * in the original byte stream that could not be converted to Unicode characters must be
 90 |          * converted to U+FFFD REPLACEMENT CHARACTER code points.
 91 |          */
 92 | 
 93 |         // mb_convert_encoding is chosen over iconv because of a bug. The best
 94 |         // details for the bug are on http://us1.php.net/manual/en/function.iconv.php#108643
 95 |         // which contains links to the actual but reports as well as work around
 96 |         // details.
 97 |         if (function_exists('mb_convert_encoding')) {
 98 |             // mb library has the following behaviors:
 99 |             // - UTF-16 surrogates result in false.
100 |             // - Overlongs and outside Plane 16 result in empty strings.
101 | 
102 |             // Before we run mb_convert_encoding we need to tell it what to do with
103 |             // characters it does not know. This could be different than the parent
104 |             // application executing this library so we store the value, change it
105 |             // to our needs, and then change it back when we are done. This feels
106 |             // a little excessive and it would be great if there was a better way.
107 |             $save = mb_substitute_character();
108 |             mb_substitute_character('none');
109 |             $data = mb_convert_encoding($data, 'UTF-8', $encoding);
110 |             mb_substitute_character($save);
111 |         }
112 |         // @todo Get iconv running in at least some environments if that is possible.
113 |         elseif (function_exists('iconv') && 'auto' !== $encoding) {
114 |             // fprintf(STDOUT, "iconv found\n");
115 |             // iconv has the following behaviors:
116 |             // - Overlong representations are ignored.
117 |             // - Beyond Plane 16 is replaced with a lower char.
118 |             // - Incomplete sequences generate a warning.
119 |             $data = @iconv($encoding, 'UTF-8//IGNORE', $data);
120 |         } else {
121 |             throw new Exception('Not implemented, please install mbstring or iconv');
122 |         }
123 | 
124 |         /*
125 |          * One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
126 |          */
127 |         if ("\xEF\xBB\xBF" === substr($data, 0, 3)) {
128 |             $data = substr($data, 3);
129 |         }
130 | 
131 |         return $data;
132 |     }
133 | 
134 |     /**
135 |      * Checks for Unicode code points that are not valid in a document.
136 |      *
137 |      * @param string $data A string to analyze
138 |      *
139 |      * @return array An array of (string) error messages produced by the scanning
140 |      */
141 |     public static function checkForIllegalCodepoints($data)
142 |     {
143 |         // Vestigal error handling.
144 |         $errors = array();
145 | 
146 |         /*
147 |          * All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs.
148 |          * Any occurrences of such characters is a parse error.
149 |          */
150 |         for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) {
151 |             $errors[] = 'null-character';
152 |         }
153 | 
154 |         /*
155 |          * Any occurrences of any characters in the ranges U+0001 to U+0008, U+000B, U+000E to U+001F, U+007F
156 |          * to U+009F, U+D800 to U+DFFF , U+FDD0 to U+FDEF, and characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF,
157 |          * U+2FFFE, U+2FFFF, U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, U+6FFFF, U+7FFFE,
158 |          * U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF,
159 |          * U+DFFFE, U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and U+10FFFF are parse errors.
160 |          * (These are all control characters or permanently undefined Unicode characters.)
161 |          */
162 |         // Check PCRE is loaded.
163 |         $count = preg_match_all(
164 |             '/(?:
165 |         [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B,  U+000E to U+001F and U+007F
166 |       |
167 |         \xC2[\x80-\x9F] # U+0080 to U+009F
168 |       |
169 |         \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
170 |       |
171 |         \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
172 |       |
173 |         \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
174 |       |
175 |         [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
176 |       )/x', $data, $matches);
177 |         for ($i = 0; $i < $count; ++$i) {
178 |             $errors[] = 'invalid-codepoint';
179 |         }
180 | 
181 |         return $errors;
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-base.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Parse This Base class.
  4 |  * Originally Derived from the Press This Class with Enhancements.
  5 |  */
  6 | class Parse_This_Base {
  7 | 
  8 | 
  9 | 
 10 | 	/**
 11 | 	 *
 12 | 	 */
 13 | 	protected static function order_by_date( $items, $field = 'published' ) {
 14 | 		// If the first entry does not have this field return.
 15 | 		if ( ! array_key_exists( $field, $items[0] ) ) {
 16 | 			return null;
 17 | 		}
 18 | 		if ( ! is_string( $items[0][ $field ] ) ) {
 19 | 			return null;
 20 | 		}
 21 | 		usort(
 22 | 			$items,
 23 | 			function( $a, $b ) use ( $field ) {
 24 | 				return ( strtotime( $b[ $field ] ) - strtotime( $a[ $field ] ) );
 25 | 			}
 26 | 		);
 27 | 		return $items;
 28 | 	}
 29 | 
 30 | 	/**
 31 | 	 *
 32 | 	 */
 33 | 	protected static function find_last_published( $items ) {
 34 | 		$items = self::order_by_date( $items, 'published' );
 35 | 		if ( ! $items ) {
 36 | 			return null;
 37 | 		}
 38 | 		$return = new DateTime( $items[0]['published'], wp_timezone() );
 39 | 		return $return->format( DATE_W3C );
 40 | 	}
 41 | 
 42 | 	public static function validate_email( $email ) {
 43 | 		return filter_var( $email, FILTER_VALIDATE_EMAIL );
 44 | 	}
 45 | 
 46 | 	/**
 47 | 	 *
 48 | 	 */
 49 | 	protected static function find_last_updated( $items ) {
 50 | 		$items  = self::order_by_date( $items, 'updated' );
 51 | 		$return = new DateTime( $items[0]['updated'], wp_timezone() );
 52 | 		return $return->format( DATE_W3C );
 53 | 	}
 54 | 
 55 | 	/**
 56 | 	 * Utility method to limit an array to 100 values.
 57 | 	 * Originally set to 50 but some sites are very detailed in their meta.
 58 | 	 *
 59 | 	 * @ignore
 60 | 	 * @since 4.2.0
 61 | 	 *
 62 | 	 * @param array $value Array to limit.
 63 | 	 * @return array Original array if fewer than 100 values, limited array, empty array otherwise.
 64 | 	 */
 65 | 	protected static function limit_array( $value ) {
 66 | 		if ( is_array( $value ) ) {
 67 | 			if ( count( $value ) > 100 ) {
 68 | 				return array_slice( $value, 0, 100 );
 69 | 			}
 70 | 
 71 | 			return $value;
 72 | 		}
 73 | 
 74 | 		return array();
 75 | 	}
 76 | 
 77 | 	/**
 78 | 	 * Utility method to limit the length of a given string to 5,000 characters.
 79 | 	 *
 80 | 	 * @ignore
 81 | 	 * @since 4.2.0
 82 | 	 *
 83 | 	 * @param string $value String to limit.
 84 | 	 * @return bool|int|string If boolean or integer, that value. If a string, the original value
 85 | 	 *                         if fewer than 5,000 characters, a truncated version, otherwise an
 86 | 	 *                         empty string.
 87 | 	 */
 88 | 	protected static function limit_string( $value ) {
 89 | 		$return = '';
 90 | 		if ( is_numeric( $value ) || is_bool( $value ) ) {
 91 | 			$return = $value;
 92 | 		} elseif ( is_string( $value ) ) {
 93 | 			if ( mb_strlen( $value ) > 5000 ) {
 94 | 				$return = mb_substr( $value, 0, 5000 );
 95 | 			} else {
 96 | 				$return = $value;
 97 | 			}
 98 | 			$return = sanitize_text_field( trim( $return ) );
 99 | 		}
100 | 
101 | 		return $return;
102 | 	}
103 | 
104 | 	/**
105 | 	 * Utility method to limit a given URL to 2,048 characters.
106 | 	 *
107 | 	 * @ignore
108 | 	 * @since 4.2.0
109 | 	 *
110 | 	 * @param string $url URL to check for length and validity.
111 | 	 * @param string $source_url URL URL to use to resolve relative URLs
112 | 	 * @return string Escaped URL if of valid length (< 2048) and makeup. Empty string otherwise.
113 | 	 */
114 | 	protected static function limit_url( $url, $source_url ) {
115 | 		if ( ! is_string( $url ) ) {
116 | 			return '';
117 | 		}
118 | 
119 | 		// HTTP 1.1 allows 8000 chars but the "de-facto" standard supported in all current browsers is 2048.
120 | 		if ( strlen( $url ) > 2048 ) {
121 | 			return ''; // Return empty rather than a truncated/invalid URL
122 | 		}
123 | 
124 | 		// Does not look like a URL.
125 | 		if ( ! filter_var( $url, FILTER_VALIDATE_URL ) ) {
126 | 			return '';
127 | 		}
128 | 
129 | 		$url = pt_make_absolute_url( $url, $source_url );
130 | 
131 | 		return esc_url_raw( $url, array( 'http', 'https' ) );
132 | 	}
133 | 
134 | 	/**
135 | 	 * Utility method to limit image source URLs.
136 | 	 *
137 | 	 * Excluded URLs include share-this type buttons, loaders, spinners, spacers, WordPress interface images,
138 | 	 * tiny buttons or thumbs, mathtag.com or quantserve.com images, or the WordPress.com stats gif.
139 | 	 *
140 | 	 * @param string $src Image source URL.
141 | 	 * @return string If not matched an excluded URL type, the original URL, empty string otherwise.
142 | 	 */
143 | 	protected static function limit_img( $src, $source_url ) {
144 | 		$src = self::limit_url( $src, $source_url );
145 | 
146 | 		if ( preg_match( '!/ad[sx]?/!i', $src ) ) {
147 | 			// Ads
148 | 			return '';
149 | 		} elseif ( preg_match( '!(/share-?this[^.]+?\.[a-z0-9]{3,4})(\?.*)?$!i', $src ) ) {
150 | 			// Share-this type button
151 | 			return '';
152 | 		} elseif ( preg_match( '!/(spinner|loading|spacer|blank|rss)\.(gif|jpg|png)!i', $src ) ) {
153 | 			// Loaders, spinners, spacers
154 | 			return '';
155 | 		} elseif ( preg_match( '!/([^./]+[-_])?(spinner|loading|spacer|blank)s?([-_][^./]+)?\.[a-z0-9]{3,4}!i', $src ) ) {
156 | 			// Fancy loaders, spinners, spacers
157 | 			return '';
158 | 		} elseif ( preg_match( '!([^./]+[-_])?thumb[^.]*\.(gif|jpg|png)$!i', $src ) ) {
159 | 			// Thumbnails, too small, usually irrelevant to context
160 | 			return '';
161 | 		} elseif ( false !== stripos( $src, '/wp-includes/' ) ) {
162 | 			// Classic WordPress interface images
163 | 			return '';
164 | 		} elseif ( false !== stripos( $src, '/wp-content/themes' ) ) {
165 | 			// Anything within a WordPress theme directory
166 | 			return '';
167 | 		} elseif ( false !== stripos( $src, '/wp-content/plugins' ) ) {
168 | 			// Anything within a WordPress plugin directory
169 | 			return '';
170 | 		} elseif ( preg_match( '![^\d]\d{1,2}x\d+\.(gif|jpg|png)$!i', $src ) ) {
171 | 			// Most often tiny buttons/thumbs (< 100px wide)
172 | 			return '';
173 | 		} elseif ( preg_match( '!/pixel\.(mathtag|quantserve)\.com!i', $src ) ) {
174 | 			// See mathtag.com and https://www.quantcast.com/how-we-do-it/iab-standard-measurement/how-we-collect-data/
175 | 			return '';
176 | 		} elseif ( preg_match( '!/[gb]\.gif(\?.+)?$!i', $src ) ) {
177 | 			// WordPress.com stats gif
178 | 			return '';
179 | 		}
180 | 		// Optionally add additional limits
181 | 		return apply_filters( 'parse_this_img_filters', $src );
182 | 	}
183 | 
184 | 	/**
185 | 	 * Limit embed source URLs to specific providers.
186 | 	 *
187 | 	 * Not all core oEmbed providers are supported. Supported providers include YouTube, Vimeo,
188 | 	 * Vine, Daily Motion, SoundCloud, and Twitter.
189 | 	 *
190 | 	 * @param string $src Embed source URL.
191 | 	 * @param string $source_url Source URL
192 | 	 * @return string If not from a supported provider, an empty string. Otherwise, a reformatted embed URL.
193 | 	 */
194 | 	protected static function limit_embed( $src, $source_url ) {
195 | 		$src = self::limit_url( $src, $source_url );
196 | 
197 | 		if ( empty( $src ) ) {
198 | 			return '';
199 | 		}
200 | 
201 | 		if ( preg_match( '!//(m|www)\.youtube\.com/(embed|v)/([^?]+)\?.+$!i', $src, $src_matches ) ) {
202 | 			// Embedded Youtube videos (www or mobile)
203 | 			$src = 'https://www.youtube.com/watch?v=' . $src_matches[3];
204 | 		} elseif ( preg_match( '!//player\.vimeo\.com/video/([\d]+)([?/].*)?$!i', $src, $src_matches ) ) {
205 | 			// Embedded Vimeo iframe videos
206 | 			$src = 'https://vimeo.com/' . (int) $src_matches[1];
207 | 		} elseif ( preg_match( '!//vimeo\.com/moogaloop\.swf\?clip_id=([\d]+)$!i', $src, $src_matches ) ) {
208 | 			// Embedded Vimeo Flash videos
209 | 			$src = 'https://vimeo.com/' . (int) $src_matches[1];
210 | 		} elseif ( preg_match( '!//vine\.co/v/([^/]+)/embed!i', $src, $src_matches ) ) {
211 | 			// Embedded Vine videos
212 | 			$src = 'https://vine.co/v/' . $src_matches[1];
213 | 		} elseif ( preg_match( '!//(www\.)?dailymotion\.com/embed/video/([^/?]+)([/?].+)?!i', $src, $src_matches ) ) {
214 | 			// Embedded Daily Motion videos
215 | 			$src = 'https://www.dailymotion.com/video/' . $src_matches[2];
216 | 		} else {
217 | 			$oembed = _wp_oembed_get_object();
218 | 
219 | 			if ( ! $oembed->get_provider(
220 | 				$src,
221 | 				array(
222 | 					'discover' => false,
223 | 				)
224 | 			) ) {
225 | 				$src = '';
226 | 			}
227 | 		}
228 | 
229 | 		return $src;
230 | 	}
231 | 
232 | 	public static function set( $array, $key, $value ) {
233 | 		if ( ! isset( $array[ $key ] ) ) {
234 | 			$array[ $key ] = $value;
235 | 		} elseif ( is_string( $array[ $key ] ) ) {
236 | 			$array[ $key ] = array( $array[ $key ], $value );
237 | 		} elseif ( is_array( $array[ $key ] ) ) {
238 | 			$array[ $key ][] = $value;
239 | 		}
240 | 		return $array;
241 | 	}
242 | }
243 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Masterminds;
  4 | 
  5 | use Masterminds\HTML5\Parser\DOMTreeBuilder;
  6 | use Masterminds\HTML5\Parser\Scanner;
  7 | use Masterminds\HTML5\Parser\Tokenizer;
  8 | use Masterminds\HTML5\Serializer\OutputRules;
  9 | use Masterminds\HTML5\Serializer\Traverser;
 10 | 
 11 | /**
 12 |  * This class offers convenience methods for parsing and serializing HTML5.
 13 |  * It is roughly designed to mirror the \DOMDocument native class.
 14 |  */
 15 | class HTML5
 16 | {
 17 |     /**
 18 |      * Global options for the parser and serializer.
 19 |      *
 20 |      * @var array
 21 |      */
 22 |     private $defaultOptions = array(
 23 |         // Whether the serializer should aggressively encode all characters as entities.
 24 |         'encode_entities' => false,
 25 | 
 26 |         // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
 27 |         'disable_html_ns' => false,
 28 |     );
 29 | 
 30 |     protected $errors = array();
 31 | 
 32 |     public function __construct(array $defaultOptions = array())
 33 |     {
 34 |         $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
 35 |     }
 36 | 
 37 |     /**
 38 |      * Get the current default options.
 39 |      *
 40 |      * @return array
 41 |      */
 42 |     public function getOptions()
 43 |     {
 44 |         return $this->defaultOptions;
 45 |     }
 46 | 
 47 |     /**
 48 |      * Load and parse an HTML file.
 49 |      *
 50 |      * This will apply the HTML5 parser, which is tolerant of many
 51 |      * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
 52 |      * 3. Note that in these cases, not all of the old data will be
 53 |      * preserved. For example, XHTML's XML declaration will be removed.
 54 |      *
 55 |      * The rules governing parsing are set out in the HTML 5 spec.
 56 |      *
 57 |      * @param string|resource $file    The path to the file to parse. If this is a resource, it is
 58 |      *                                 assumed to be an open stream whose pointer is set to the first
 59 |      *                                 byte of input.
 60 |      * @param array           $options Configuration options when parsing the HTML.
 61 |      *
 62 |      * @return \DOMDocument A DOM document. These object type is defined by the libxml
 63 |      *                      library, and should have been included with your version of PHP.
 64 |      */
 65 |     public function load($file, array $options = array())
 66 |     {
 67 |         // Handle the case where file is a resource.
 68 |         if (is_resource($file)) {
 69 |             return $this->parse(stream_get_contents($file), $options);
 70 |         }
 71 | 
 72 |         return $this->parse(file_get_contents($file), $options);
 73 |     }
 74 | 
 75 |     /**
 76 |      * Parse a HTML Document from a string.
 77 |      *
 78 |      * Take a string of HTML 5 (or earlier) and parse it into a
 79 |      * DOMDocument.
 80 |      *
 81 |      * @param string $string  A html5 document as a string.
 82 |      * @param array  $options Configuration options when parsing the HTML.
 83 |      *
 84 |      * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
 85 |      *                      almost all distribtions of PHP.
 86 |      */
 87 |     public function loadHTML($string, array $options = array())
 88 |     {
 89 |         return $this->parse($string, $options);
 90 |     }
 91 | 
 92 |     /**
 93 |      * Convenience function to load an HTML file.
 94 |      *
 95 |      * This is here to provide backwards compatibility with the
 96 |      * PHP DOM implementation. It simply calls load().
 97 |      *
 98 |      * @param string $file    The path to the file to parse. If this is a resource, it is
 99 |      *                        assumed to be an open stream whose pointer is set to the first
100 |      *                        byte of input.
101 |      * @param array  $options Configuration options when parsing the HTML.
102 |      *
103 |      * @return \DOMDocument A DOM document. These object type is defined by the libxml
104 |      *                      library, and should have been included with your version of PHP.
105 |      */
106 |     public function loadHTMLFile($file, array $options = array())
107 |     {
108 |         return $this->load($file, $options);
109 |     }
110 | 
111 |     /**
112 |      * Parse a HTML fragment from a string.
113 |      *
114 |      * @param string $string  the HTML5 fragment as a string
115 |      * @param array  $options Configuration options when parsing the HTML
116 |      *
117 |      * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
118 |      *                              almost all distributions of PHP.
119 |      */
120 |     public function loadHTMLFragment($string, array $options = array())
121 |     {
122 |         return $this->parseFragment($string, $options);
123 |     }
124 | 
125 |     /**
126 |      * Return all errors encountered into parsing phase.
127 |      *
128 |      * @return array
129 |      */
130 |     public function getErrors()
131 |     {
132 |         return $this->errors;
133 |     }
134 | 
135 |     /**
136 |      * Return true it some errors were encountered into parsing phase.
137 |      *
138 |      * @return bool
139 |      */
140 |     public function hasErrors()
141 |     {
142 |         return count($this->errors) > 0;
143 |     }
144 | 
145 |     /**
146 |      * Parse an input string.
147 |      *
148 |      * @param string $input
149 |      * @param array  $options
150 |      *
151 |      * @return \DOMDocument
152 |      */
153 |     public function parse($input, array $options = array())
154 |     {
155 |         $this->errors = array();
156 |         $options = array_merge($this->defaultOptions, $options);
157 |         $events = new DOMTreeBuilder(false, $options);
158 |         $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
159 |         $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
160 | 
161 |         $parser->parse();
162 |         $this->errors = $events->getErrors();
163 | 
164 |         return $events->document();
165 |     }
166 | 
167 |     /**
168 |      * Parse an input stream where the stream is a fragment.
169 |      *
170 |      * Lower-level loading function. This requires an input stream instead
171 |      * of a string, file, or resource.
172 |      *
173 |      * @param string $input   The input data to parse in the form of a string.
174 |      * @param array  $options An array of options.
175 |      *
176 |      * @return \DOMDocumentFragment
177 |      */
178 |     public function parseFragment($input, array $options = array())
179 |     {
180 |         $options = array_merge($this->defaultOptions, $options);
181 |         $events = new DOMTreeBuilder(true, $options);
182 |         $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
183 |         $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
184 | 
185 |         $parser->parse();
186 |         $this->errors = $events->getErrors();
187 | 
188 |         return $events->fragment();
189 |     }
190 | 
191 |     /**
192 |      * Save a DOM into a given file as HTML5.
193 |      *
194 |      * @param mixed           $dom     The DOM to be serialized.
195 |      * @param string|resource $file    The filename to be written or resource to write to.
196 |      * @param array           $options Configuration options when serializing the DOM. These include:
197 |      *                                 - encode_entities: Text written to the output is escaped by default and not all
198 |      *                                 entities are encoded. If this is set to true all entities will be encoded.
199 |      *                                 Defaults to false.
200 |      */
201 |     public function save($dom, $file, $options = array())
202 |     {
203 |         $close = true;
204 |         if (is_resource($file)) {
205 |             $stream = $file;
206 |             $close = false;
207 |         } else {
208 |             $stream = fopen($file, 'wb');
209 |         }
210 |         $options = array_merge($this->defaultOptions, $options);
211 |         $rules = new OutputRules($stream, $options);
212 |         $trav = new Traverser($dom, $stream, $rules, $options);
213 | 
214 |         $trav->walk();
215 |         /*
216 |          * release the traverser to avoid cyclic references and allow PHP to free memory without waiting for gc_collect_cycles
217 |          */
218 |         $rules->unsetTraverser();
219 |         if ($close) {
220 |             fclose($stream);
221 |         }
222 |     }
223 | 
224 |     /**
225 |      * Convert a DOM into an HTML5 string.
226 |      *
227 |      * @param mixed $dom     The DOM to be serialized.
228 |      * @param array $options Configuration options when serializing the DOM. These include:
229 |      *                       - encode_entities: Text written to the output is escaped by default and not all
230 |      *                       entities are encoded. If this is set to true all entities will be encoded.
231 |      *                       Defaults to false.
232 |      *
233 |      * @return string A HTML5 documented generated from the DOM.
234 |      */
235 |     public function saveHTML($dom, $options = array())
236 |     {
237 |         $stream = fopen('php://temp', 'wb');
238 |         $this->save($dom, $stream, array_merge($this->defaultOptions, $options));
239 | 
240 |         $html = stream_get_contents($stream, -1, 0);
241 | 
242 |         fclose($stream);
243 | 
244 |         return $html;
245 |     }
246 | }
247 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-discovery.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * Parse This Discovery class.
  5 |  */
  6 | class Parse_This_Discovery {
  7 | 	private function get_feed_type( $type ) {
  8 | 		switch ( $type ) {
  9 | 			case 'application/feed+json':
 10 | 				return 'jsonfeed';
 11 | 			case 'application/json':
 12 | 				return 'json';
 13 | 			case 'text/xml':
 14 | 			case 'application/rss+xml':
 15 | 				return 'rss';
 16 | 			case 'application/atom+xml':
 17 | 				return 'atom';
 18 | 			case 'application/jf2feed+json':
 19 | 				return 'jf2feed';
 20 | 			case 'text/mf2+html':
 21 | 				return 'microformats';
 22 | 			default:
 23 | 				return '';
 24 | 		}
 25 | 	}
 26 | 
 27 | 	/**
 28 | 	 * Returns a list of supported content types
 29 | 	 *
 30 | 	 * @param string $content_type
 31 | 	 * @return boolean if supported
 32 | 	 */
 33 | 	public function supported_content( $content_type ) {
 34 | 		$types = array(
 35 | 			'application/mf2+json',
 36 | 			'text/html',
 37 | 			'application/json',
 38 | 			'application/xml',
 39 | 			'text/xml',
 40 | 			'application/jf2+json',
 41 | 			'application/jf2feed+json',
 42 | 			'application/rss+xml',
 43 | 			'application/atom+xml',
 44 | 		);
 45 | 		return in_array( $content_type, $types, true );
 46 | 	}
 47 | 
 48 | 
 49 | 	/**
 50 | 	 * Downloads the $url and returns the feeds it finds
 51 | 	 *
 52 | 	 * @param string $url URL to scan.
 53 | 	 * @return WP_Error|boolean WP_Error if invalid and true if successful
 54 | 	 */
 55 | 	public function fetch( $url ) {
 56 | 		if ( empty( $url ) || ! wp_http_validate_url( $url ) ) {
 57 | 			return new WP_Error( 'invalid-url', __( 'A valid URL was not provided.', 'indieweb-post-kinds' ) );
 58 | 		}
 59 | 
 60 | 		$user_agent = 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 Parse This/WP';
 61 | 		$args       = array(
 62 | 			'timeout'             => 15,
 63 | 			'limit_response_size' => 1048576,
 64 | 			'redirection'         => 5,
 65 | 		// Use an explicit user-agent for Parse This
 66 | 		);
 67 | 		$links = array();
 68 | 
 69 | 		$response      = wp_safe_remote_get( $url, $args );
 70 | 		$response_code = wp_remote_retrieve_response_code( $response );
 71 | 		$content_type  = wp_remote_retrieve_header( $response, 'content-type' );
 72 | 		$wprest        = array();
 73 | 		$linkheaders   = wp_remote_retrieve_header( $response, 'link' );
 74 | 		if ( $linkheaders ) {
 75 | 			if ( is_array( $linkheaders ) ) {
 76 | 				foreach ( $linkheaders as $link ) {
 77 | 					if ( preg_match( '/<(.[^>]+)>;\s+rel\s?=\s?[\"\']?(https:\/\/)?api.w.org?\/?[\"\']?/i', $link, $result ) ) {
 78 | 						$wprest[] = array(
 79 | 							'url'        => untrailingslashit( pt_make_absolute_url( $result[1], $url ) ),
 80 | 							'type'       => 'feed',
 81 | 							'_feed_type' => 'wordpress',
 82 | 							'name'       => 'WordPress REST API',
 83 | 						);
 84 | 					}
 85 | 				}
 86 | 			} else {
 87 | 				if ( preg_match( '/<(.[^>]+)>;\s+rel\s?=\s?[\"\']?(https:\/\/)?api.w.org?\/?[\"\']?/i', $linkheaders, $result ) ) {
 88 | 						$wprest[] = array(
 89 | 							'url'        => untrailingslashit( pt_make_absolute_url( $result[1], $url ) ),
 90 | 							'type'       => 'feed',
 91 | 							'_feed_type' => 'wordpress',
 92 | 							'name'       => 'WordPress REST API',
 93 | 						);
 94 | 				}
 95 | 			}
 96 | 		}
 97 | 		if ( in_array( $response_code, array( 403, 415 ), true ) ) {
 98 | 			$args['user-agent'] = $user_agent;
 99 | 			$response           = wp_safe_remote_get( $url, $args );
100 | 			$response_code      = wp_remote_retrieve_response_code( $response );
101 | 			if ( in_array( $response_code, array( 403, 415 ), true ) ) {
102 | 				return new WP_Error( 'source_error', 'Unable to Retrieve' );
103 | 			}
104 | 		}
105 | 
106 | 		// Strip any character set off the content type
107 | 		$ct = explode( ';', $content_type );
108 | 		if ( is_array( $ct ) ) {
109 | 			$content_type = array_shift( $ct );
110 | 		}
111 | 		$content_type = trim( $content_type );
112 | 
113 | 		$content = wp_remote_retrieve_body( $response );
114 | 		// Find Youtube RSS Feeds
115 | 		if ( in_array( wp_parse_url( $url, PHP_URL_HOST ), array( 'www.youtube.com', 'm.youtube.com', 'youtube.com' ), true ) ) {
116 | 			$links[] = array(
117 | 				'url'        => self::youtube_rss( $url ),
118 | 				'type'       => 'feed',
119 | 				'_feed_type' => 'atom',
120 | 				'name'       => 'YouTube Feed',
121 | 			);
122 | 		}
123 | 		// This is an RSS or Atom Feed URL and if it is not we do not know how to deal with XML anyway
124 | 		if ( ( in_array( $content_type, array( 'application/rss+xml', 'application/atom+xml', 'text/xml', 'application/xml', 'text/xml' ), true ) ) ) {
125 | 			$content = Parse_This::fetch_feed( $url );
126 | 			if ( class_exists( 'Parse_This_RSS' ) ) {
127 | 				$links[] = array(
128 | 					'url'        => $url,
129 | 					'type'       => 'feed',
130 | 					'_feed_type' => Parse_This_RSS::get_type( $content ),
131 | 					'name'       => $content->get_title(),
132 | 				);
133 | 			}
134 | 			return array( 'results' => $links );
135 | 		}
136 | 
137 | 		if ( in_array( $content_type, array( 'application/mf2+json', 'application/jf2+json', 'application/jf2feed+json' ), true ) ) {
138 | 			$content = json_decode( $content, true );
139 | 		}
140 | 		if ( 'application/json' === $content_type ) {
141 | 			$content = json_decode( $content, true );
142 | 			if ( $content && isset( $content['version'] ) && 'https://jsonfeed.org/version/1' === $content['version'] ) {
143 | 				$links[] = array(
144 | 					'url'        => $url,
145 | 					'type'       => 'feed',
146 | 					'_feed_type' => 'jsonfeed',
147 | 				);
148 | 			}
149 | 			return array( 'results' => $links );
150 | 		}
151 | 		if ( 'text/html' === $content_type ) {
152 | 			$doc = pt_load_domdocument( $content );
153 | 			if ( $doc instanceof DOMDocument ) {
154 | 				$xpath = new DOMXPath( $doc );
155 | 				// Fetch and gather <link> data.
156 | 				$mf2 = false;
157 | 				foreach ( $xpath->query( '(//link|//a)[@rel and @href]' ) as $link ) {
158 | 					$rel   = $link->getAttribute( 'rel' );
159 | 					$href  = $link->getAttribute( 'href' );
160 | 					$title = $link->getAttribute( 'title' );
161 | 					$type  = self::get_feed_type( $link->getAttribute( 'type' ) );
162 | 					if ( 'microformats' === $type ) {
163 | 						$mf2 = true;
164 | 					}
165 | 
166 | 					if ( in_array( $rel, array( 'alternate', 'feed' ), true ) && ! empty( $type ) ) {
167 | 						$links[] = array_filter(
168 | 							array(
169 | 								'url'        => pt_make_absolute_url( $href, $url ),
170 | 								'type'       => 'feed',
171 | 								'_feed_type' => $type,
172 | 								'name'       => $title,
173 | 								'_mime-type' => $link->getAttribute( 'type' ),
174 | 								'_rel'       => $rel,
175 | 							)
176 | 						);
177 | 					}
178 | 					if ( 'https://api.w.org/' === $rel && empty( $wprest ) ) {
179 | 						$wprest[] = array_filter(
180 | 							array(
181 | 								'url'        => untrailingslashit( pt_make_absolute_url( $href, $url ) ),
182 | 								'type'       => 'feed',
183 | 								'_feed_type' => 'wordpress',
184 | 								'name'       => 'WordPress REST API',
185 | 							)
186 | 						);
187 | 					}
188 | 				}
189 | 
190 | 				// If an mf2 feed was found, do not check to see if this page is also one.
191 | 				if ( ! $mf2 ) {
192 | 					// Check to see if the current page is an h-feed
193 | 					$feeds = Parse_This_MF2::find_hfeed( $doc, $url );
194 | 					foreach ( $feeds as $key => $feed ) {
195 | 						if ( ! Parse_This_MF2::is_microformat( $feed ) ) {
196 | 							continue;
197 | 						}
198 | 						if ( array_key_exists( 'children', $feed ) ) {
199 | 							unset( $feed['children'] );
200 | 						}
201 | 						$jf2 = mf2_to_jf2( $feed );
202 | 						if ( isset( $jf2['type'] ) && 'feed' === $jf2['type'] ) {
203 | 							$author = array();
204 | 							if ( array_key_exists( 'author', $jf2 ) ) {
205 | 								if ( is_array( $jf2['author'] ) ) {
206 | 									$author = $jf2['author'];
207 | 								} elseif ( is_string( $jf2['author'] ) ) {
208 | 									$author = array(
209 | 										'type' => 'card',
210 | 									);
211 | 									if ( wp_http_validate_url( $jf2['author'] ) ) {
212 | 										$author['url'] = $jf2['author'];
213 | 									} else {
214 | 										$author['name'] = $jf2['author'];
215 | 									}
216 | 								}
217 | 							}
218 | 							$links[] = array_filter(
219 | 								array(
220 | 									'url'        => $jf2['url'],
221 | 									'type'       => 'feed',
222 | 									'_feed_type' => 'microformats',
223 | 									'name'       => isset( $jf2['name'] ) ? $jf2['name'] : null,
224 | 									'author'     => $author,
225 | 								)
226 | 							);
227 | 						}
228 | 					}
229 | 				}
230 | 			}
231 | 
232 | 			if ( ! empty( $wprest ) ) {
233 | 				$links = array_merge( $wprest, $links );
234 | 			}
235 | 
236 | 			// Sort feeds by priority
237 | 			$rank = array(
238 | 				'jf2feed'      => 0,
239 | 				'microformats' => 1,
240 | 				'jsonfeed'     => 2,
241 | 				'wordpress'    => 3,
242 | 				'atom'         => 4,
243 | 				'rss'          => 5,
244 | 			);
245 | 			usort(
246 | 				$links,
247 | 				function( $a, $b ) use ( $rank ) {
248 | 					return $rank[ $a['_feed_type'] ] > $rank[ $b['_feed_type'] ];
249 | 				}
250 | 			);
251 | 
252 | 			return array( 'results' => $links );
253 | 
254 | 		}
255 | 	}
256 | 
257 | 
258 | 	private static function youtube_rss( $url ) {
259 | 		$youtube_url_base = 'https://www.youtube.com/feeds/videos.xml';
260 | 		$preg_entities    = array(
261 | 			'channel_id'  => '\/channel\/(([^\/])+?)$', // match YouTube channel ID from url
262 | 			'user'        => '\/user\/(([^\/])+?)$', // match YouTube user from url
263 | 			'playlist_id' => '\/playlist\?list=(([^\/])+?)$',  // match YouTube playlist ID from url
264 | 		);
265 | 
266 | 		foreach ( $preg_entities as $key => $preg_entity ) {
267 | 			if ( preg_match( '/' . $preg_entity . '/', $url, $matches ) ) {
268 | 				if ( isset( $matches[1] ) ) {
269 | 						return $youtube_url_base . '?' . $key . '=' . $matches[1];
270 | 				}
271 | 			}
272 | 		}
273 | 	}
274 | }
275 | 


--------------------------------------------------------------------------------
/lib/html5/README.md:
--------------------------------------------------------------------------------
  1 | > # UKRAINE NEEDS YOUR HELP NOW!
  2 | >
  3 | > On 24 February 2022, Russian [President Vladimir Putin ordered an invasion of Ukraine by Russian Armed Forces](https://www.bbc.com/news/world-europe-60504334).
  4 | >
  5 | > Your support is urgently needed.
  6 | >
  7 | > - Donate to the volunteers. Here is the volunteer fund helping the Ukrainian army to provide all the necessary equipment:
  8 | >  https://bank.gov.ua/en/news/all/natsionalniy-bank-vidkriv-spetsrahunok-dlya-zboru-koshtiv-na-potrebi-armiyi or https://savelife.in.ua/en/donate/
  9 | > - Triple-check social media sources. Russian disinformation is attempting to coverup and distort the reality in Ukraine.
 10 | > - Help Ukrainian refugees who are fleeing Russian attacks and shellings: https://www.globalcitizen.org/en/content/ways-to-help-ukraine-conflict/
 11 | > -  Put pressure on your political representatives to provide help to Ukraine.
 12 | > -  Believe in the Ukrainian people, they will not surrender, they don't have another Ukraine.
 13 | >
 14 | > THANK YOU!
 15 | ----
 16 | 
 17 | # HTML5-PHP
 18 | 
 19 | HTML5 is a standards-compliant HTML5 parser and writer written entirely in PHP.
 20 | It is stable and used in many production websites, and has
 21 | well over [five million downloads](https://packagist.org/packages/masterminds/html5).
 22 | 
 23 | HTML5 provides the following features.
 24 | 
 25 | - An HTML5 serializer
 26 | - Support for PHP namespaces
 27 | - Composer support
 28 | - Event-based (SAX-like) parser
 29 | - A DOM tree builder
 30 | - Interoperability with [QueryPath](https://github.com/technosophos/querypath)
 31 | - Runs on **PHP** 5.3.0 or newer
 32 | 
 33 | [![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php)
 34 | [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5)
 35 | [![Code Coverage](https://scrutinizer-ci.com/g/Masterminds/html5-php/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/Masterminds/html5-php/?branch=master)
 36 | [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/Masterminds/html5-php/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/Masterminds/html5-php/?branch=master)
 37 | [![Stability: Sustained](https://masterminds.github.io/stability/sustained.svg)](https://masterminds.github.io/stability/sustained.html)
 38 | 
 39 | ## Installation
 40 | 
 41 | Install HTML5-PHP using [composer](http://getcomposer.org/).
 42 | 
 43 | By adding the `masterminds/html5` dependency to your `composer.json` file:
 44 | 
 45 | ```json
 46 | {
 47 |   "require" : {
 48 |     "masterminds/html5": "^2.0"
 49 |   },
 50 | }
 51 | ```
 52 | 
 53 | By invoking require command via composer executable:
 54 | 
 55 | ```bash
 56 | composer require masterminds/html5
 57 | ```
 58 | 
 59 | ## Basic Usage
 60 | 
 61 | HTML5-PHP has a high-level API and a low-level API.
 62 | 
 63 | Here is how you use the high-level `HTML5` library API:
 64 | 
 65 | ```php
 66 | <?php
 67 | // Assuming you installed from Composer:
 68 | require "vendor/autoload.php";
 69 | 
 70 | use Masterminds\HTML5;
 71 | 
 72 | // An example HTML document:
 73 | $html = <<< 'HERE'
 74 |   <html>
 75 |   <head>
 76 |     <title>TEST</title>
 77 |   </head>
 78 |   <body id='foo'>
 79 |     <h1>Hello World</h1>
 80 |     <p>This is a test of the HTML5 parser.</p>
 81 |   </body>
 82 |   </html>
 83 | HERE;
 84 | 
 85 | // Parse the document. $dom is a DOMDocument.
 86 | $html5 = new HTML5();
 87 | $dom = $html5->loadHTML($html);
 88 | 
 89 | // Render it as HTML5:
 90 | print $html5->saveHTML($dom);
 91 | 
 92 | // Or save it to a file:
 93 | $html5->save($dom, 'out.html');
 94 | ```
 95 | 
 96 | The `$dom` created by the parser is a full `DOMDocument` object. And the
 97 | `save()` and `saveHTML()` methods will take any DOMDocument.
 98 | 
 99 | ### Options
100 | 
101 | It is possible to pass in an array of configuration options when loading
102 | an HTML5 document.
103 | 
104 | ```php
105 | // An associative array of options
106 | $options = array(
107 |   'option_name' => 'option_value',
108 | );
109 | 
110 | // Provide the options to the constructor
111 | $html5 = new HTML5($options);
112 | 
113 | $dom = $html5->loadHTML($html);
114 | ```
115 | 
116 | The following options are supported:
117 | 
118 | * `encode_entities` (boolean): Indicates that the serializer should aggressively
119 |   encode characters as entities. Without this, it only encodes the bare
120 |   minimum.
121 | * `disable_html_ns` (boolean): Prevents the parser from automatically
122 |   assigning the HTML5 namespace to the DOM document. This is for
123 |   non-namespace aware DOM tools.
124 | * `target_document` (\DOMDocument): A DOM document that will be used as the
125 |   destination for the parsed nodes.
126 | * `implicit_namespaces` (array): An assoc array of namespaces that should be
127 |   used by the parser. Name is tag prefix, value is NS URI.
128 | 
129 | ## The Low-Level API
130 | 
131 | This library provides the following low-level APIs that you can use to
132 | create more customized HTML5 tools:
133 | 
134 | - A SAX-like event-based parser that you can hook into for special kinds
135 | of parsing.
136 | - A flexible error-reporting mechanism that can be tuned to document
137 | syntax checking.
138 | - A DOM implementation that uses PHP's built-in DOM library.
139 | 
140 | The unit tests exercise each piece of the API, and every public function
141 | is well-documented.
142 | 
143 | ### Parser Design
144 | 
145 | The parser is designed as follows:
146 | 
147 | - The `Scanner` handles scanning on behalf of the parser.
148 | - The `Tokenizer` requests data off of the scanner, parses it, clasifies
149 | it, and sends it to an `EventHandler`. It is a *recursive descent parser.*
150 | - The `EventHandler` receives notifications and data for each specific
151 | semantic event that occurs during tokenization.
152 | - The `DOMBuilder` is an `EventHandler` that listens for tokenizing
153 | events and builds a document tree (`DOMDocument`) based on the events.
154 | 
155 | ### Serializer Design
156 | 
157 | The serializer takes a data structure (the `DOMDocument`) and transforms
158 | it into a character representation -- an HTML5 document.
159 | 
160 | The serializer is broken into three parts:
161 | 
162 | - The `OutputRules` contain the rules to turn DOM elements into strings. The
163 | rules are an implementation of the interface `RulesInterface` allowing for
164 | different rule sets to be used.
165 | - The `Traverser`, which is a special-purpose tree walker. It visits
166 | each node node in the tree and uses the `OutputRules` to transform the node
167 | into a string.
168 | - `HTML5` manages the `Traverser` and stores the resultant data
169 | in the correct place.
170 | 
171 | The serializer (`save()`, `saveHTML()`) follows the
172 | [section 8.9 of the HTML 5.0 spec](http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments).
173 | So tags are serialized according to these rules:
174 | 
175 | - A tag with children: &lt;foo&gt;CHILDREN&lt;/foo&gt;
176 | - A tag that cannot have content: &lt;foo&gt; (no closing tag)
177 | - A tag that could have content, but doesn't: &lt;foo&gt;&lt;/foo&gt;
178 | 
179 | ## Known Issues (Or, Things We Designed Against the Spec)
180 | 
181 | Please check the issue queue for a full list, but the following are
182 | issues known issues that are not presently on the roadmap:
183 | 
184 | - Namespaces: HTML5 only [supports a selected list of namespaces](http://www.w3.org/TR/html5/infrastructure.html#namespaces)
185 |   and they do not operate in the same way as XML namespaces. A `:` has no special
186 |   meaning.
187 |   By default the parser does not support XML style namespaces via `:`;
188 |   to enable the XML namespaces see the  [XML Namespaces section](#xml-namespaces)
189 | - Scripts: This parser does not contain a JavaScript or a CSS
190 |   interpreter. While one may be supplied, not all features will be
191 |   supported.
192 | - Rentrance: The current parser is not re-entrant. (Thus you can't pause
193 |   the parser to modify the HTML string mid-parse.)
194 | - Validation: The current tree builder is **not** a validating parser.
195 |   While it will correct some HTML, it does not check that the HTML
196 |   conforms to the standard. (Should you wish, you can build a validating
197 |   parser by extending DOMTree or building your own EventHandler
198 |   implementation.)
199 |   * There is limited support for insertion modes.
200 |   * Some autocorrection is done automatically.
201 |   * Per the spec, many legacy tags are admitted and correctly handled,
202 |     even though they are technically not part of HTML5.
203 | - Attribute names and values: Due to the implementation details of the
204 |   PHP implementation of DOM, attribute names that do not follow the
205 |   XML 1.0 standard are not inserted into the DOM. (Effectively, they
206 |   are ignored.) If you've got a clever fix for this, jump in!
207 | - Processor Instructions: The HTML5 spec does not allow processor
208 |   instructions. We do. Since this is a server-side library, we think
209 |   this is useful. And that means, dear reader, that in some cases you
210 |   can parse the HTML from a mixed PHP/HTML document. This, however,
211 |   is an incidental feature, not a core feature.
212 | - HTML manifests: Unsupported.
213 | - PLAINTEXT: Unsupported.
214 | - Adoption Agency Algorithm: Not yet implemented. (8.2.5.4.7)
215 | 
216 | ## XML Namespaces
217 | 
218 | To use XML style namespaces you have to configure well the main `HTML5` instance.
219 | 
220 | ```php
221 | use Masterminds\HTML5;
222 | $html = new HTML5(array(
223 |     "xmlNamespaces" => true
224 | ));
225 | 
226 | $dom = $html->loadHTML('<t:tag xmlns:t="http://www.example.com"/>');
227 | 
228 | $dom->documentElement->namespaceURI; // http://www.example.com
229 | 
230 | ```
231 | 
232 | You can also add some default prefixes that will not require the namespace declaration,
233 | but its elements will be namespaced.
234 | 
235 | ```php
236 | use Masterminds\HTML5;
237 | $html = new HTML5(array(
238 |     "implicitNamespaces"=>array(
239 |         "t"=>"http://www.example.com"
240 |     )
241 | ));
242 | 
243 | $dom = $html->loadHTML('<t:tag/>');
244 | 
245 | $dom->documentElement->namespaceURI; // http://www.example.com
246 | 
247 | ```
248 | 
249 | ## Thanks to...
250 | 
251 | The dedicated (and patient) contributors of patches small and large,
252 | who have already made this library better.See the CREDITS file for
253 | a list of contributors.
254 | 
255 | We owe a huge debt of gratitude to the original authors of html5lib.
256 | 
257 | While not much of the original parser remains, we learned a lot from
258 | reading the html5lib library. And some pieces remain here. In
259 | particular, much of the UTF-8 and Unicode handling is derived from the
260 | html5lib project.
261 | 
262 | ## License
263 | 
264 | This software is released under the MIT license. The original html5lib
265 | library was also released under the MIT license.
266 | 
267 | See LICENSE.txt
268 | 
269 | Certain files contain copyright assertions by specific individuals
270 | involved with html5lib. Those have been retained where appropriate.
271 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/StringInputStream.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Loads a string to be parsed.
  4 |  */
  5 | 
  6 | namespace Masterminds\HTML5\Parser;
  7 | 
  8 | /*
  9 |  *
 10 | * Based on code from html5lib:
 11 | 
 12 | Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
 13 | 
 14 | Permission is hereby granted, free of charge, to any person obtaining a
 15 | copy of this software and associated documentation files (the
 16 |     "Software"), to deal in the Software without restriction, including
 17 | without limitation the rights to use, copy, modify, merge, publish,
 18 | distribute, sublicense, and/or sell copies of the Software, and to
 19 | permit persons to whom the Software is furnished to do so, subject to
 20 | the following conditions:
 21 | 
 22 | The above copyright notice and this permission notice shall be included
 23 | in all copies or substantial portions of the Software.
 24 | 
 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 26 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 27 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 28 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 29 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 30 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 31 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 32 | 
 33 | */
 34 | 
 35 | // Some conventions:
 36 | // - /* */ indicates verbatim text from the HTML 5 specification
 37 | //   MPB: Not sure which version of the spec. Moving from HTML5lib to
 38 | //   HTML5-PHP, I have been using this version:
 39 | //   http://www.w3.org/TR/2012/CR-html5-20121217/Overview.html#contents
 40 | //
 41 | // - // indicates regular comments
 42 | 
 43 | /**
 44 |  * @deprecated since 2.4, to remove in 3.0. Use a string in the scanner instead.
 45 |  */
 46 | class StringInputStream implements InputStream
 47 | {
 48 |     /**
 49 |      * The string data we're parsing.
 50 |      */
 51 |     private $data;
 52 | 
 53 |     /**
 54 |      * The current integer byte position we are in $data.
 55 |      */
 56 |     private $char;
 57 | 
 58 |     /**
 59 |      * Length of $data; when $char === $data, we are at the end-of-file.
 60 |      */
 61 |     private $EOF;
 62 | 
 63 |     /**
 64 |      * Parse errors.
 65 |      */
 66 |     public $errors = array();
 67 | 
 68 |     /**
 69 |      * Create a new InputStream wrapper.
 70 |      *
 71 |      * @param string $data     Data to parse.
 72 |      * @param string $encoding The encoding to use for the data.
 73 |      * @param string $debug    A fprintf format to use to echo the data on stdout.
 74 |      */
 75 |     public function __construct($data, $encoding = 'UTF-8', $debug = '')
 76 |     {
 77 |         $data = UTF8Utils::convertToUTF8($data, $encoding);
 78 |         if ($debug) {
 79 |             fprintf(STDOUT, $debug, $data, strlen($data));
 80 |         }
 81 | 
 82 |         // There is good reason to question whether it makes sense to
 83 |         // do this here, since most of these checks are done during
 84 |         // parsing, and since this check doesn't actually *do* anything.
 85 |         $this->errors = UTF8Utils::checkForIllegalCodepoints($data);
 86 | 
 87 |         $data = $this->replaceLinefeeds($data);
 88 | 
 89 |         $this->data = $data;
 90 |         $this->char = 0;
 91 |         $this->EOF = strlen($data);
 92 |     }
 93 | 
 94 |     public function __toString()
 95 |     {
 96 |         return $this->data;
 97 |     }
 98 | 
 99 |     /**
100 |      * Replace linefeed characters according to the spec.
101 |      */
102 |     protected function replaceLinefeeds($data)
103 |     {
104 |         /*
105 |          * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially.
106 |          * Any CR characters that are followed by LF characters must be removed, and any CR characters not
107 |          * followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are
108 |          * represented by LF characters, and there are never any CR characters in the input to the tokenization
109 |          * stage.
110 |          */
111 |         $crlfTable = array(
112 |             "\0" => "\xEF\xBF\xBD",
113 |             "\r\n" => "\n",
114 |             "\r" => "\n",
115 |         );
116 | 
117 |         return strtr($data, $crlfTable);
118 |     }
119 | 
120 |     /**
121 |      * Returns the current line that the tokenizer is at.
122 |      */
123 |     public function currentLine()
124 |     {
125 |         if (empty($this->EOF) || 0 === $this->char) {
126 |             return 1;
127 |         }
128 |         // Add one to $this->char because we want the number for the next
129 |         // byte to be processed.
130 |         return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
131 |     }
132 | 
133 |     /**
134 |      * @deprecated
135 |      */
136 |     public function getCurrentLine()
137 |     {
138 |         return $this->currentLine();
139 |     }
140 | 
141 |     /**
142 |      * Returns the current column of the current line that the tokenizer is at.
143 |      * Newlines are column 0. The first char after a newline is column 1.
144 |      *
145 |      * @return int The column number.
146 |      */
147 |     public function columnOffset()
148 |     {
149 |         // Short circuit for the first char.
150 |         if (0 === $this->char) {
151 |             return 0;
152 |         }
153 |         // strrpos is weird, and the offset needs to be negative for what we
154 |         // want (i.e., the last \n before $this->char). This needs to not have
155 |         // one (to make it point to the next character, the one we want the
156 |         // position of) added to it because strrpos's behaviour includes the
157 |         // final offset byte.
158 |         $backwardFrom = $this->char - 1 - strlen($this->data);
159 |         $lastLine = strrpos($this->data, "\n", $backwardFrom);
160 | 
161 |         // However, for here we want the length up until the next byte to be
162 |         // processed, so add one to the current byte ($this->char).
163 |         if (false !== $lastLine) {
164 |             $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
165 |         } else {
166 |             // After a newline.
167 |             $findLengthOf = substr($this->data, 0, $this->char);
168 |         }
169 | 
170 |         return UTF8Utils::countChars($findLengthOf);
171 |     }
172 | 
173 |     /**
174 |      * @deprecated
175 |      */
176 |     public function getColumnOffset()
177 |     {
178 |         return $this->columnOffset();
179 |     }
180 | 
181 |     /**
182 |      * Get the current character.
183 |      *
184 |      * @return string The current character.
185 |      */
186 |     public function current()
187 |     {
188 |         return $this->data[$this->char];
189 |     }
190 | 
191 |     /**
192 |      * Advance the pointer.
193 |      * This is part of the Iterator interface.
194 |      */
195 |     public function next()
196 |     {
197 |         ++$this->char;
198 |     }
199 | 
200 |     /**
201 |      * Rewind to the start of the string.
202 |      */
203 |     public function rewind()
204 |     {
205 |         $this->char = 0;
206 |     }
207 | 
208 |     /**
209 |      * Is the current pointer location valid.
210 |      *
211 |      * @return bool Whether the current pointer location is valid.
212 |      */
213 |     public function valid()
214 |     {
215 |         return $this->char < $this->EOF;
216 |     }
217 | 
218 |     /**
219 |      * Get all characters until EOF.
220 |      *
221 |      * This reads to the end of the file, and sets the read marker at the
222 |      * end of the file.
223 |      *
224 |      * Note this performs bounds checking.
225 |      *
226 |      * @return string Returns the remaining text. If called when the InputStream is
227 |      *                already exhausted, it returns an empty string.
228 |      */
229 |     public function remainingChars()
230 |     {
231 |         if ($this->char < $this->EOF) {
232 |             $data = substr($this->data, $this->char);
233 |             $this->char = $this->EOF;
234 | 
235 |             return $data;
236 |         }
237 | 
238 |         return ''; // false;
239 |     }
240 | 
241 |     /**
242 |      * Read to a particular match (or until $max bytes are consumed).
243 |      *
244 |      * This operates on byte sequences, not characters.
245 |      *
246 |      * Matches as far as possible until we reach a certain set of bytes
247 |      * and returns the matched substring.
248 |      *
249 |      * @param string $bytes Bytes to match.
250 |      * @param int    $max   Maximum number of bytes to scan.
251 |      *
252 |      * @return mixed Index or false if no match is found. You should use strong
253 |      *               equality when checking the result, since index could be 0.
254 |      */
255 |     public function charsUntil($bytes, $max = null)
256 |     {
257 |         if ($this->char >= $this->EOF) {
258 |             return false;
259 |         }
260 | 
261 |         if (0 === $max || $max) {
262 |             $len = strcspn($this->data, $bytes, $this->char, $max);
263 |         } else {
264 |             $len = strcspn($this->data, $bytes, $this->char);
265 |         }
266 | 
267 |         $string = (string) substr($this->data, $this->char, $len);
268 |         $this->char += $len;
269 | 
270 |         return $string;
271 |     }
272 | 
273 |     /**
274 |      * Returns the string so long as $bytes matches.
275 |      *
276 |      * Matches as far as possible with a certain set of bytes
277 |      * and returns the matched substring.
278 |      *
279 |      * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
280 |      *                      current char, the pointer advances and the char is part of the
281 |      *                      substring.
282 |      * @param int    $max   The max number of chars to read.
283 |      *
284 |      * @return string
285 |      */
286 |     public function charsWhile($bytes, $max = null)
287 |     {
288 |         if ($this->char >= $this->EOF) {
289 |             return false;
290 |         }
291 | 
292 |         if (0 === $max || $max) {
293 |             $len = strspn($this->data, $bytes, $this->char, $max);
294 |         } else {
295 |             $len = strspn($this->data, $bytes, $this->char);
296 |         }
297 |         $string = (string) substr($this->data, $this->char, $len);
298 |         $this->char += $len;
299 | 
300 |         return $string;
301 |     }
302 | 
303 |     /**
304 |      * Unconsume characters.
305 |      *
306 |      * @param int $howMany The number of characters to unconsume.
307 |      */
308 |     public function unconsume($howMany = 1)
309 |     {
310 |         if (($this->char - $howMany) >= 0) {
311 |             $this->char -= $howMany;
312 |         }
313 |     }
314 | 
315 |     /**
316 |      * Look ahead without moving cursor.
317 |      */
318 |     public function peek()
319 |     {
320 |         if (($this->char + 1) <= $this->EOF) {
321 |             return $this->data[$this->char + 1];
322 |         }
323 | 
324 |         return false;
325 |     }
326 | 
327 |     public function key()
328 |     {
329 |         return $this->char;
330 |     }
331 | }
332 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-rss.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Helpers for Turning RSS/Atom into JF2
  4 |  **/
  5 | 
  6 | class Parse_This_RSS extends Parse_This_Base {
  7 | 
  8 | 	/*
  9 | 	 * Parse RSS/Atom into JF2
 10 | 	 *
 11 | 	 * @param SimplePie $feed
 12 | 	 * @return JF2 array
 13 | 	 */
 14 | 	public static function parse( $feed, $url ) {
 15 | 		$items     = array();
 16 | 		$rss_items = $feed->get_items();
 17 | 		$title     = $feed->get_title();
 18 | 		foreach ( $rss_items as $item ) {
 19 | 			$items[] = self::get_item( $item, $title );
 20 | 		}
 21 | 		return array_filter(
 22 | 			array(
 23 | 				'type'            => 'feed',
 24 | 				'_feed_type'      => self::get_type( $feed ),
 25 | 				'_last_updated'   => self::last_updated( $feed ),
 26 | 				'_last_published' => self::find_last_published( $items ),
 27 | 				'_last_updated'   => self::find_last_updated( $items ),
 28 | 				'summary'         => $feed->get_description(),
 29 | 				'author'          => self::get_authors( $feed->get_author() ),
 30 | 				'name'            => htmlspecialchars_decode( $title, ENT_QUOTES ),
 31 | 				'url'             => $feed->get_permalink(),
 32 | 				'photo'           => $feed->get_image_url(),
 33 | 				'items'           => $items,
 34 | 			)
 35 | 		);
 36 | 	}
 37 | 
 38 | 	public static function last_updated( $feed ) {
 39 | 		$type    = self::get_type( $feed );
 40 | 		$updated = null;
 41 | 		if ( 'RSS' === $type ) {
 42 | 			$updated = $feed->get_channel_tags( SIMPLEPIE_NAMESPACE_RSS_20, 'lastBuildDate' );
 43 | 		} elseif ( 'atom' === $type ) {
 44 | 			$updated = $feed->get_channel_tags( SIMPLEPIE_NAMESPACE_ATOM_10, 'updated' );
 45 | 		}
 46 | 		if ( $updated && isset( $updated[0]['data'] ) ) {
 47 | 			$datetime = new DateTime( $updated[0]['data'] );
 48 | 			if ( $datetime ) {
 49 | 				return $datetime->format( DATE_W3C );
 50 | 			}
 51 | 		}
 52 | 
 53 | 		return null;
 54 | 	}
 55 | 
 56 | 	public static function get_type( $feed ) {
 57 | 		if ( $feed->get_type() & SIMPLEPIE_TYPE_NONE ) {
 58 | 			return 'unknown';
 59 | 		} elseif ( $feed->get_type() & SIMPLEPIE_TYPE_RSS_ALL ) {
 60 | 			return 'RSS';
 61 | 		} elseif ( $feed->get_type() & SIMPLEPIE_TYPE_ATOM_ALL ) {
 62 | 			return 'atom';
 63 | 		}
 64 | 	}
 65 | 
 66 | 	/*
 67 | 	 * Takes a SimplePie_Author object and Turns it into a JF2 Author property
 68 | 	 * @param SimplePie_Author $author
 69 | 	 * @return JF2 array
 70 | 	 */
 71 | 	public static function get_authors( $author ) {
 72 | 		if ( ! $author ) {
 73 | 			return array();
 74 | 		}
 75 | 		if ( $author instanceof SimplePie_Author ) {
 76 | 			$author = array( $author );
 77 | 		}
 78 | 		$return = array();
 79 | 		foreach ( $author as $a ) {
 80 | 			$r     = array(
 81 | 				'type'  => 'card',
 82 | 				'name'  => htmlspecialchars_decode( $a->get_name() ),
 83 | 				'url'   => $a->get_link(),
 84 | 				'email' => self::validate_email( $a->get_email() ),
 85 | 			);
 86 | 			$dom   = pt_load_domdocument( $r['name'] );
 87 | 			$links = $dom->getElementsByTagName( 'a' );
 88 | 			$names = array();
 89 | 			foreach ( $links as $link ) {
 90 | 					$names[ wp_strip_all_tags( $link->nodeValue ) ] = $link->getAttribute( 'href' ); // phpcs:ignore
 91 | 			}
 92 | 			if ( ! empty( $names ) ) {
 93 | 				if ( 1 === count( $names ) ) {
 94 | 					reset( $names );
 95 | 					$r['name'] = key( $names );
 96 | 				} else {
 97 | 					foreach ( $names as $name => $url ) {
 98 | 						$return[] = array(
 99 | 							'type' => 'card',
100 | 							'name' => $name,
101 | 							'url'  => $url,
102 | 						);
103 | 					}
104 | 				}
105 | 			} else {
106 | 				$r['name'] = wp_strip_all_tags( $r['name'] );
107 | 				$return[]  = array_filter( $r );
108 | 			}
109 | 		}
110 | 		if ( 1 === count( $return ) ) {
111 | 			$return = array_shift( $return );
112 | 		}
113 | 		return $return;
114 | 	}
115 | 
116 | 	public static function credit_to_card( $credit ) {
117 | 		if ( ! $credit instanceof SimplePie_Credit ) {
118 | 			return null;
119 | 		}
120 | 		return array(
121 | 			'type' => 'card',
122 | 			'role' => $credit->get_role(),
123 | 			'name' => $credit->get_name(),
124 | 		);
125 | 	}
126 | 
127 | 	public static function source_to_cite( $source ) {
128 | 		if ( ! $source instanceof SimplePie_Source ) {
129 | 			return null;
130 | 		}
131 | 		return array_filter(
132 | 			array(
133 | 				'type'    => 'cite',
134 | 				'name'    => $source->get_title(),
135 | 				'summary' => $source->get_description(),
136 | 				'url'     => $source->get_permalink(),
137 | 				'author'  => self::get_authors( $source->get_authors() ),
138 | 				'photo'   => $sourece->get_image_url(),
139 | 			)
140 | 		);
141 | 	}
142 | 
143 | 
144 | 	public static function get_source( $item ) {
145 | 		$return = $item->get_item_tags( SIMPLEPIE_NAMESPACE_RSS_20, 'source' );
146 | 		if ( $return ) {
147 | 			return array(
148 | 				'url'  => $return[0]['attribs']['']['url'],
149 | 				'name' => $return[0]['data'],
150 | 			);
151 | 		}
152 | 		return self::source_to_cite( $item->get_source() );
153 | 	}
154 | 
155 | 	public static function get_thumbnail( $item ) {
156 | 		if ( method_exists( $item, 'get_thumbnail' ) ) {
157 | 			$return = $item->get_thumbnail();
158 | 			if ( is_string( $return ) ) {
159 | 				return $return;
160 | 			}
161 | 			if ( is_array( $return ) && isset( $return['url'] ) ) {
162 | 				return $return['url'];
163 | 			}
164 | 		}
165 | 		return null;
166 | 	}
167 | 
168 | 	/*
169 | 	 * Takes a SimplePie_Item object and Turns it into a JF2 entry
170 | 	 * @param SimplePie_Item $item
171 | 	 * @return JF2
172 | 	 */
173 | 	public static function get_item( $item, $title = '' ) {
174 | 		$content = Parse_This::clean_content( $item->get_content( true ) );
175 | 		$return  = array(
176 | 			'type'         => 'entry',
177 | 			'name'         => $item->get_title(),
178 | 			'author'       => self::get_authors( $item->get_authors() ),
179 | 			'contributors' => self::get_authors( $item->get_contributors() ),
180 | 			'publication'  => $title,
181 | 			'summary'      => wp_strip_all_tags( $item->get_description( true ) ),
182 | 			'content'      => array_filter(
183 | 				array(
184 | 					'html' => $content,
185 | 					'text' => wp_strip_all_tags( $content ),
186 | 				)
187 | 			),
188 | 			'_source'      => self::get_source( $item ),
189 | 			'published'    => self::get_date( $item ),
190 | 			'updated'      => self::get_updated_date( $item ),
191 | 			'url'          => $item->get_permalink(),
192 | 			'uid'          => $item->get_id(),
193 | 			'location'     => self::get_location( $item ),
194 | 			'category'     => self::get_categories( $item->get_categories() ),
195 | 			'featured'     => self::get_thumbnail( $item ),
196 | 		);
197 | 
198 | 		if ( ! is_array( $return['category'] ) ) {
199 | 			$return['category'] = array();
200 | 		}
201 | 
202 | 		// To cover the non obvious types
203 | 		$medium_map = array(
204 | 			'application/x-shockwave-flash' => 'video',
205 | 		);
206 | 
207 | 		$enclosures = $item->get_enclosures();
208 | 		foreach ( $enclosures as $enclosure ) {
209 | 			$medium = $enclosure->get_type();
210 | 			if ( ! $medium ) {
211 | 				$medium = $enclosure->get_medium();
212 | 			} else {
213 | 				if ( array_key_exists( $medium, $medium_map ) ) {
214 | 					$medium = $medium_map[ $medium ];
215 | 				} else {
216 | 					$medium = explode( '/', $medium );
217 | 					$medium = array_shift( $medium );
218 | 				}
219 | 			}
220 | 			switch ( $medium ) {
221 | 				case 'audio':
222 | 					$medium = 'audio';
223 | 					break;
224 | 				case 'image':
225 | 					$medium = 'photo';
226 | 					break;
227 | 				case 'video':
228 | 					$medium = 'video';
229 | 					break;
230 | 			}
231 | 			if ( array_key_exists( $medium, $return ) ) {
232 | 				if ( is_string( $return[ $medium ] ) ) {
233 | 					$return[ $medium ] = array( $return[ $medium ] );
234 | 				}
235 | 				$return[ $medium ][] = $enclosure->get_link();
236 | 			} else {
237 | 				$return[ $medium ] = $enclosure->get_link();
238 | 			}
239 | 			if ( isset( $return['category'] ) && is_array( $return['category'] ) ) {
240 | 				$keywords = $enclosure->get_keywords();
241 | 				if ( ! $keywords ) {
242 | 					$keywords = array();
243 | 				}
244 | 				$return['category'] = array_merge( $return['category'], $keywords );
245 | 			} else {
246 | 				$return['category'] = $enclosure->get_keywords();
247 | 			}
248 | 			if ( ! isset( $return['duration'] ) ) {
249 | 				$duration = $enclosure->get_duration();
250 | 				if ( 0 < $duration ) {
251 | 					$return['duration'] = seconds_to_iso8601( $duration );
252 | 				}
253 | 			}
254 | 			if ( empty( $return['summary'] ) ) {
255 | 				$return['summary'] = $enclosure->get_description();
256 | 			}
257 | 			if ( empty( $return['featured'] ) ) {
258 | 				$return['featured'] = self::get_thumbnail( $enclosure );
259 | 			}
260 | 			$credits = $enclosure->get_credits();
261 | 			if ( ! $credits ) {
262 | 				$credits = array();
263 | 			}
264 | 			foreach ( $credits as $credit ) {
265 | 				if ( ! isset( $return['credits'] ) ) {
266 | 					$return['credits'] = array();
267 | 				}
268 | 				$return['credits'][] = self::credit_to_card( $credit );
269 | 			}
270 | 		}
271 | 		// If there is just one photo it is probably the featured image
272 | 		if ( isset( $return['photo'] ) && is_string( $return['photo'] ) && empty( $return['featured'] ) ) {
273 | 			$return['featured'] = $return['photo'];
274 | 			unset( $return['photo'] );
275 | 		}
276 | 		if ( empty( $return['featured'] ) ) {
277 | 			$i = $item->get_item_tags( SIMPLEPIE_NAMESPACE_ITUNES, 'image' );
278 | 			if ( is_array( $i ) ) {
279 | 				$i = array_shift( $i );
280 | 				if ( isset( $i['attribs'] ) && is_array( $i['attribs'] ) ) {
281 | 					$i = array_shift( $i['attribs'] );
282 | 					if ( isset( $i['href'] ) ) {
283 | 						$i = $i['href'];
284 | 					}
285 | 				}
286 | 			}
287 | 			if ( is_string( $i ) ) {
288 | 				$return['featured'] = $i;
289 | 			}
290 | 		}
291 | 		$return['post_type'] = post_type_discovery( $return );
292 | 		foreach ( array( 'category', 'video', 'audio' ) as $prop ) {
293 | 			if ( array_key_exists( $prop, $return ) && is_array( $return[ $prop ] ) ) {
294 | 				$return[ $prop ] = array_unique( $return[ $prop ] );
295 | 			}
296 | 		}
297 | 		return array_filter( $return );
298 | 	}
299 | 
300 | 	private static function get_categories( $categories ) {
301 | 		if ( ! is_array( $categories ) ) {
302 | 			return array();
303 | 		}
304 | 		$return = array();
305 | 		foreach ( $categories as $category ) {
306 | 			$return[] = $category->get_label();
307 | 		}
308 | 		return $return;
309 | 	}
310 | 
311 | 	private static function get_location_name( $item ) {
312 | 		$return = $item->get_item_tags( SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'featureName' );
313 | 		if ( $return ) {
314 | 			return $return[0]['data'];
315 | 		}
316 | 	}
317 | 
318 | 
319 | 	public static function get_location( $item ) {
320 | 		return array_filter(
321 | 			array(
322 | 				'latitude'  => $item->get_latitude(),
323 | 				'longitude' => $item->get_longitude(),
324 | 				'name'      => self::get_location_name( $item ),
325 | 			)
326 | 		);
327 | 	}
328 | 
329 | 	public static function get_date( $item ) {
330 | 		$datetime = new DateTime( $item->get_date( null ) );
331 | 		if ( $datetime ) {
332 | 			return $datetime->format( DATE_W3C );
333 | 		}
334 | 		return null;
335 | 	}
336 | 
337 | 	public static function get_updated_date( $item ) {
338 | 		$datetime = new DateTime( $item->get_updated_date( null ) );
339 | 		if ( $datetime ) {
340 | 			return $datetime->format( DATE_W3C );
341 | 		}
342 | 		return null;
343 | 	}
344 | 
345 | 
346 | }
347 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-restapi.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | class Parse_This_RESTAPI {
  4 | 	private static function ifset( $key, $array ) {
  5 | 		return isset( $array[ $key ] ) ? $array[ $key ] : null;
  6 | 	}
  7 | 
  8 | 	public static function get_rendered( $key, $item ) {
  9 | 		if ( ! array_key_exists( $key, $item ) ) {
 10 | 			return null;
 11 | 		}
 12 | 		if ( array_key_exists( 'rendered', $item[ $key ] ) ) {
 13 | 			return $item[ $key ]['rendered'];
 14 | 		}
 15 | 		return null;
 16 | 	}
 17 | 
 18 | 	public static function base64url_encode( $data ) {
 19 | 		return rtrim( strtr( base64_encode( $data ), '+/', '-_' ), '=' );
 20 | 	}
 21 | 
 22 | 	public static function get_rest_url( $rest_url, $path ) {
 23 | 		if ( ! wp_http_validate_url( $rest_url ) ) {
 24 | 			return false;
 25 | 		}
 26 | 		$path  = '/' . ltrim( $path, '/' );
 27 | 		$query = wp_parse_url( $rest_url, PHP_URL_QUERY );
 28 | 		if ( ! empty( $query ) ) {
 29 | 			$query = explode( '=', $query );
 30 | 			if ( array_key_exists( 'rest_route' ) ) {
 31 | 				return add_query_arg(
 32 | 					array(
 33 | 						'rest_route' => $path,
 34 | 						'_embed'     => 1,
 35 | 					),
 36 | 					trailingslashit( $rest_url )
 37 | 				);
 38 | 			}
 39 | 			return false;
 40 | 		}
 41 | 
 42 | 		$rest_url = untrailingslashit( $rest_url );
 43 | 		return add_query_arg( '_embed', 1, $rest_url . $path );
 44 | 	}
 45 | 
 46 | 	public static function get_rest_path( $rest_url, $url ) {
 47 | 		if ( ! wp_http_validate_url( $rest_url ) ) {
 48 | 			return false;
 49 | 		}
 50 | 		$query = wp_parse_url( $rest_url, PHP_URL_QUERY );
 51 | 		if ( ! empty( $query ) ) {
 52 | 			$query = explode( '=', $query );
 53 | 			if ( array_key_exists( 'rest_route' ) ) {
 54 | 				return $query['rest_route'];
 55 | 			}
 56 | 		}
 57 | 		$path = str_replace( $rest_url, '', $url );
 58 | 		return '/' . ltrim( $path, '/' );
 59 | 	}
 60 | 
 61 | 
 62 | 	public static function fetch( $rest_url, $path, $cache = false ) {
 63 | 		if ( empty( $rest_url ) || ! $rest_url ) {
 64 | 			return new WP_Error( 'no_url', __( 'No URL provided', 'parse-this' ) );
 65 | 		}
 66 | 
 67 | 		$url = self::get_rest_url( $rest_url, $path );
 68 | 		$key = 'pt_rest_' . self::base64url_encode( $url );
 69 | 		if ( $cache ) {
 70 | 			$transient = get_transient( $key );
 71 | 			if ( false !== $transient ) {
 72 | 				return json_decode( $transient, true );
 73 | 			}
 74 | 		}
 75 | 
 76 | 		$user_agent = 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 Parse This/WP';
 77 | 		$args       = array(
 78 | 			'timeout'             => 15,
 79 | 			'limit_response_size' => 1048576,
 80 | 			'redirection'         => 5,
 81 | 			// Use an explicit user-agent for Parse This
 82 | 		);
 83 | 
 84 | 		$response = wp_safe_remote_get( $url, $args );
 85 | 		if ( is_wp_error( $response ) ) {
 86 | 			return $response;
 87 | 		}
 88 | 		$response_code = (int) wp_remote_retrieve_response_code( $response );
 89 | 		$content_type  = wp_remote_retrieve_header( $response, 'content-type' );
 90 | 		if ( in_array( $response_code, array( 404, 403, 415 ), true ) ) {
 91 | 			$args['user-agent'] = $user_agent;
 92 | 			$response           = wp_safe_remote_get( $url, $args );
 93 | 			$response_code      = wp_remote_retrieve_response_code( $response );
 94 | 			if ( in_array( $response_code, array( 404, 403, 415 ), true ) ) {
 95 | 				return new WP_Error( 'source_error', 'Unable to Retrieve' );
 96 | 			}
 97 | 		}
 98 | 
 99 | 		// Strip any character set off the content type
100 | 		$ct = explode( ';', $content_type );
101 | 		if ( is_array( $ct ) ) {
102 | 			$content_type = array_shift( $ct );
103 | 		}
104 | 		$content_type = trim( $content_type );
105 | 		// List of content types we know how to handle
106 | 		if ( 'application/json' !== $content_type ) {
107 | 			return new WP_Error( 'content-type', 'Retrieved incorrect page', array( 'content-type' => $content_type ) );
108 | 		}
109 | 
110 | 		$content = wp_remote_retrieve_body( $response );
111 | 		if ( $cache ) {
112 | 			set_transient( $key, $content, WEEK_IN_SECONDS );
113 | 		}
114 | 
115 | 		$content = json_decode( $content, true );
116 | 
117 | 		if ( wp_remote_retrieve_header( $response, 'x-wp-total' ) ) {
118 | 			$return           = array();
119 | 			$return['_total'] = wp_remote_retrieve_header( $response, 'x-wp-total' );
120 | 			$return['_pages'] = wp_remote_retrieve_header( $response, 'x-wp-totalpages' );
121 | 			$return['items']  = $content;
122 | 			return $return;
123 | 		} else {
124 | 			return $content;
125 | 		}
126 | 		return false;
127 | 
128 | 	}
129 | 
130 | 	public static function parse( $content, $rest_url, $args ) {
131 | 		if ( is_wp_error( $content ) ) {
132 | 			return $content;
133 | 		} 
134 | 		if ( array_key_exists( 'id', $content ) ) {
135 | 			return self::get_post( $content, $rest_url );
136 | 		// This is the REST URL itself if it has this.
137 | 		} elseif ( array_key_exists( 'namespaces', $content ) ) {
138 | 			// Return site data if single otherwise feed data.
139 | 			if ( 'single' === $args['return'] ) {
140 | 				$return       = array(
141 | 					'type' => 'card',
142 | 				);
143 | 				$timezone     = self::timezone( $content );
144 | 				$return['tz'] = $timezone->getName();
145 | 				if ( array_key_exists( '_embedded', $content ) ) {
146 | 					if ( array_key_exists( 'wp:featuredmedia', $content['_embedded'] ) ) {
147 | 						$photo = array();
148 | 						foreach ( $content['_embedded']['wp:featuredmedia'] as $media ) {
149 | 							$photo[] = ifset( $media['source_url'] );
150 | 						}
151 | 						$photo = array_unique( $photo );
152 | 						if ( 1 === count( $photo ) ) {
153 | 							$return['photo'] = array_pop( $photo );
154 | 						} else {
155 | 							$return['photo'] = array_filter( $photo );
156 | 						}
157 | 					}
158 | 				}
159 | 				$return['url']  = $content['url'];
160 | 				$return['name'] = $content['name'];
161 | 				$return['note'] = $content['description'];
162 | 				return $return;
163 | 			} else {
164 | 				$content = self::fetch( $rest_url, '/wp/v2/posts?_embed=1' );
165 | 
166 | 				$content = self::posts_to_feed( $content, $rest_url );
167 | 				return $content;
168 | 			}
169 | 		}
170 | 		return false;
171 | 	}
172 | 
173 | 	public static function get_author( $item ) {
174 | 		if ( ! array_key_exists( '_embedded', $item ) ) {
175 | 			return null;
176 | 		}
177 | 		$author      = $item['_embedded']['author'][0];
178 | 		if ( array_key_exists( 'code', $author ) ) {
179 | 			return null;
180 | 		}
181 | 		$avatar_urls = self::ifset( 'avatar_urls', $author );
182 | 		$avatar_urls = is_array( $avatar_urls ) ? end( $avatar_urls ) : null;
183 | 		$return      = array(
184 | 			'type'  => 'card',
185 | 			'name'  => self::ifset( 'name', $author ),
186 | 			'url'   => self::ifset( 'url', $author ),
187 | 			'note'  => self::ifset( 'description', $author ),
188 | 			'photo' => $avatar_urls,
189 | 			'me'    => self::ifset( 'me', $author ),
190 | 		);
191 | 		return array_filter( $return );
192 | 	}
193 | 
194 | 	public static function format_author( $json ) {
195 | 		$avatar_urls = self::ifset( 'avatar_urls', $json );
196 | 		$avatar_urls = is_array( $avatar_urls ) ? end( $avatar_urls ) : null;
197 | 		$return      = array(
198 | 			'type'  => 'card',
199 | 			'name'  => self::ifset( 'name', $json ),
200 | 			'url'   => self::ifset( 'url', $json ),
201 | 			'note'  => self::ifset( 'description', $json ),
202 | 			'photo' => $avatar_urls,
203 | 		);
204 | 		return $return;
205 | 	}
206 | 
207 | 	public static function get_datetime( $time, $timezone = null ) {
208 | 		$datetime = new DateTime( $time );
209 | 		if ( 'UTC' === $datetime->getTimeZone()->getName() ) {
210 | 			$datetime = new DateTime( $time, $timezone );
211 | 		}
212 | 		return $datetime->format( DATE_W3C );
213 | 	}
214 | 
215 | 	public static function site_data( $rest_url ) {
216 | 		$fetch = self::fetch( $rest_url, '', true );
217 | 		return wp_array_slice_assoc( $fetch, array( 'name', 'url', 'timezone_string', 'gmt_offset', 'description' ) );
218 | 	}
219 | 
220 | 	public static function timezone( $fetch ) {
221 | 		$timezone_string = self::ifset( 'timezone_string', $fetch );
222 | 		if ( $timezone_string ) {
223 | 				return new DateTimeZone( $timezone_string );
224 | 		}
225 | 
226 | 		$offset  = (float) self::ifset( 'gmt_offset', $fetch );
227 | 		$hours   = (int) $offset;
228 | 		$minutes = ( $offset - $hours );
229 | 
230 | 		$sign      = ( $offset < 0 ) ? '-' : '+';
231 | 		$abs_hour  = abs( $hours );
232 | 		$abs_mins  = abs( $minutes * 60 );
233 | 		$tz_offset = sprintf( '%s%02d:%02d', $sign, $abs_hour, $abs_mins );
234 | 		return new DateTimeZone( $tz_offset );
235 | 	}
236 | 
237 | 	public static function get_post( $item, $rest_url ) {
238 | 		$site_data = self::site_data( $rest_url );
239 | 		$author    = self::get_rest_path( $rest_url, $item['_links']['author'][0]['href'] );
240 | 		$timezone  = self::timezone( $site_data );
241 | 		$newitem   = array_filter(
242 | 			array(
243 | 				'uid'       => self::get_rendered( 'guid', $item ),
244 | 				'url'       => self::ifset( 'link', $item ),
245 | 				'name'      => self::get_rendered( 'title', $item ),
246 | 				'content'   => array_filter(
247 | 					array(
248 | 						'html' => Parse_This::clean_content( self::get_rendered( 'content', $item ) ),
249 | 						'text' => wp_strip_all_tags( self::get_rendered( 'content', $item ) ),
250 | 					)
251 | 				),
252 | 				'summary'   => self::get_rendered( 'excerpt', $item ),
253 | 				'published' => self::get_datetime( self::ifset( 'date', $item ), $timezone ),
254 | 				'updated'   => self::get_datetime( self::ifset( 'modified', $item ), $timezone ),
255 | 				'kind'      => self::ifset( 'kind', $item ),
256 | 			)
257 | 		);
258 | 
259 | 		if ( array_key_exists( '_embedded', $item ) ) {
260 | 			if ( array_key_exists( 'featured_media', $item ) && 0 !== $item['featured_media'] ) {
261 | 				$newitem['featured'] = $item['_embedded']['wp:featuredmedia'][0]['source_url'];
262 | 			}
263 | 			if ( array_key_exists( 'tags', $item ) && ! empty( $item['tags'] ) ) {
264 | 				foreach ( $item['_links']['wp:term'] as $term ) {
265 | 					if ( 'post_tag' === $term['taxonomy'] ) {
266 | 						$tag_path            = self::get_rest_path( $rest_url, $term['href'] );
267 | 						$tags                = self::fetch( $rest_url, $tag_path );
268 | 						$newitem['category'] = wp_list_pluck( $tags['items'], 'name' );
269 | 					}
270 | 				}
271 | 			}
272 | 			$newitem['author'] = self::get_author( $item );
273 | 		}
274 | 		return array_filter( $newitem );
275 | 	}
276 | 
277 | 	public static function posts_to_feed( $input, $url ) {
278 | 		$return            = array_filter(
279 | 			array(
280 | 				'type'       => 'feed',
281 | 				'_feed_type' => 'wordpress',
282 | 			)
283 | 		);
284 | 		$items             = $input['items'];
285 | 		$data              = self::site_data( $url );
286 | 		$timezone          = self::timezone( $data );
287 | 		$return['items']   = array();
288 | 		$return['name']    = self::ifset( 'name', $data );
289 | 		$return['summary'] = self::ifset( 'description', $data );
290 | 		$return['url']     = self::ifset( 'url', $data );
291 | 		foreach ( $items as $item ) {
292 | 			$newitem = array_filter(
293 | 				array(
294 | 					'uid'       => self::get_rendered( 'guid', $item ),
295 | 					'url'       => self::ifset( 'link', $item ),
296 | 					'name'      => self::get_rendered( 'title', $item ),
297 | 					'content'   => array_filter(
298 | 						array(
299 | 							'html' => Parse_This::clean_content( self::get_rendered( 'content', $item ) ),
300 | 							'text' => wp_strip_all_tags( self::get_rendered( 'content', $item ) ),
301 | 						)
302 | 					),
303 | 					'summary'   => self::get_rendered( 'excerpt', $item ),
304 | 					'published' => self::get_datetime( self::ifset( 'date', $item ), $timezone ),
305 | 					'updated'   => self::get_datetime( self::ifset( 'modified', $item ), $timezone ),
306 | 					'author'    => self::get_author( $item ),
307 | 					'kind'      => self::ifset( 'kind', $item ),
308 | 				)
309 | 			);
310 | 			if ( array_key_exists( '_embedded', $item ) ) {
311 | 				if ( array_key_exists( 'wp:term', $item['_embedded'] ) ) {
312 | 					$category = array();
313 | 					foreach ( $item['_embedded']['wp:term'] as $terms ) {
314 | 						foreach ( $terms as $term ) {
315 | 							if ( in_array( $term['taxonomy'], array( 'category', 'post_tags' ), true ) && 'Uncategorized' !== $term['name'] ) {
316 | 								$category[] = $term['name'];
317 | 							}
318 | 						}
319 | 					}
320 | 					$newitem['category'] = $category;
321 | 				}
322 | 				if ( array_key_exists( 'wp:featuredmedia', $item['_embedded'] ) ) {
323 | 					$newitem['featured'] = $item['_embedded']['wp:featuredmedia'][0]['source_url'];
324 | 				}
325 | 			}
326 | 			if ( WP_DEBUG ) {
327 | 				$newitem['_rest'] = $item;
328 | 			}
329 | 			$return['items'][] = array_filter( $newitem );
330 | 		}
331 | 		if ( array_key_exists( '_pages', $input ) ) {
332 | 			$return['_pages'] = $input['_pages'];
333 | 			$return['_total'] = $input['_total'];
334 | 		}
335 | 		return $return;
336 | 	}
337 | }
338 | 
339 | 
340 | 
341 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-mf2-utils.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Helpers for processing microformats2 array structures.
  4 |  * Derived from https://github.com/barnabywalters/php-mf-cleaner
  5 |  * and https://github.com/aaronpk/XRay/blob/master/lib/Formats/Mf2.php
  6 |  * and https://github.com/pfefferle/wordpress-semantic-linkbacks/blob/master/includes/class-linkbacks-mf2-handler.php
  7 |  **/
  8 | 
  9 | class Parse_This_MF2_Utils extends Parse_This_Base {
 10 | 
 11 | 	/**
 12 | 	 * Verifies if $mf is an array without numeric keys, and has a 'properties' key.
 13 | 	 *
 14 | 	 * @param $mf
 15 | 	 * @return bool
 16 | 	 */
 17 | 	public static function is_microformat( $mf ) {
 18 | 		return ( is_array( $mf ) && ! wp_is_numeric_array( $mf ) && ! empty( $mf['type'] ) && isset( $mf['properties'] ) );
 19 | 	}
 20 | 
 21 | 	/**
 22 | 	 * Verifies if $mf is a microformat and has children
 23 | 	 *
 24 | 	 * @param $mf
 25 | 	 * @return bool
 26 | 	 */
 27 | 	public static function has_children( $mf ) {
 28 | 		return ( self::is_microformat( $mf ) && isset( $mf['children'] ) );
 29 | 	}
 30 | 
 31 | 	/**
 32 | 	 * Verifies if $mf has an 'items' key which is also an array, returns true.
 33 | 	 *
 34 | 	 * @param $mf
 35 | 	 * @return bool
 36 | 	 */
 37 | 	public static function is_microformat_array( $mf ) {
 38 | 		return ( is_array( $mf ) && isset( $mf['items'] ) && is_array( $mf['items'] ) );
 39 | 	}
 40 | 
 41 | 	/**
 42 | 	 * is this what type
 43 | 	 *
 44 | 	 * @param array  $mf Parsed Microformats Array
 45 | 	 * @param string $type Type
 46 | 	 * @return bool
 47 | 	 */
 48 | 	public static function is_type( $mf, $type ) {
 49 | 		return is_array( $mf ) && ! empty( $mf['type'] ) && is_array( $mf['type'] ) && in_array( $type, $mf['type'], true );
 50 | 	}
 51 | 
 52 | 	/**
 53 | 	 * Return Type of a Microformat.
 54 | 	 *
 55 | 	 * @param array $mf Parsed Microformats Array
 56 | 	 * @return string|false Return type if present or false if not a microformat.
 57 | 	 */
 58 | 	public static function get_type( $mf, $strip = false ) {
 59 | 		$type = false;
 60 | 		if ( self::is_microformat( $mf ) && is_array( $mf['type'] ) ) {
 61 | 			$type = $mf['type'][0];
 62 | 			if ( $strip ) {
 63 | 				$type = str_replace( 'h-', '', $type );
 64 | 			}
 65 | 		}
 66 | 		return $type;
 67 | 	}
 68 | 
 69 | 	/**
 70 | 	 * Parse Content
 71 | 	 *
 72 | 	 * @param array $mf Parsed Microformats Array.
 73 | 	 * @return array $data Content array consisting of text and html properties.
 74 | 	 */
 75 | 	public static function parse_html_value( $mf, $property ) {
 76 | 		if ( ! array_key_exists( $property, $mf['properties'] ) ) {
 77 | 			return null;
 78 | 		}
 79 | 		$textcontent = false;
 80 | 		$htmlcontent = false;
 81 | 		$content     = $mf['properties'][ $property ][0];
 82 | 		if ( is_string( $content ) ) {
 83 | 			$textcontent = $content;
 84 | 		} elseif ( ! is_string( $content ) && is_array( $content ) && array_key_exists( 'value', $content ) ) {
 85 | 			if ( array_key_exists( 'html', $content ) ) {
 86 | 				$htmlcontent = trim( Parse_This::clean_content( $content['html'] ) );
 87 | 				$textcontent = wp_strip_all_tags( $content['value'] );
 88 | 			} else {
 89 | 				$textcontent = trim( $content['value'] );
 90 | 			}
 91 | 		}
 92 | 		$data = array(
 93 | 			'text' => $textcontent,
 94 | 		);
 95 | 		if ( $htmlcontent && $textcontent !== $htmlcontent ) {
 96 | 			$data['html'] = $htmlcontent;
 97 | 		}
 98 | 		return $data;
 99 | 	}
100 | 
101 | 	/**
102 | 	 * Verifies if $p is an array without numeric keys and has key 'value' and 'html' set.
103 | 	 *
104 | 	 * @param $p
105 | 	 * @return bool
106 | 	 */
107 | 	public static function is_embedded_html( $p ) {
108 | 		return is_array( $p ) && ! wp_is_numeric_array( $p ) && isset( $p['value'] ) && isset( $p['html'] );
109 | 	}
110 | 
111 | 	/**
112 | 	 * Verifies if $p is an array without numeric keys and has key 'value' and 'alt' set.
113 | 	 *
114 | 	 * @param $p
115 | 	 * @return bool
116 | 	 */
117 | 	public static function is_embedded_img( $p ) {
118 | 		return is_array( $p ) && ! wp_is_numeric_array( $p ) && isset( $p['value'] ) && isset( $p['alt'] );
119 | 	}
120 | 
121 | 	/**
122 | 	 * Verifies if property named $propname is in array $mf.
123 | 	 *
124 | 	 * @param array    $mf
125 | 	 * @param $propname
126 | 	 * @return bool
127 | 	 */
128 | 	public static function has_prop( array $mf, $propname ) {
129 | 		return ! empty( $mf['properties'][ $propname ] ) && is_array( $mf['properties'][ $propname ] );
130 | 	}
131 | 
132 | 
133 | 	/**
134 | 	 * Verifies if rel named $relname is in array $mf.
135 | 	 *
136 | 	 * @param array   $mf
137 | 	 * @param $relname
138 | 	 * @return bool
139 | 	 */
140 | 	public static function has_rel( array $mf, $relname ) {
141 | 		return ! empty( $mf['rels'][ $relname ] ) && is_array( $mf['rels'][ $relname ] );
142 | 	}
143 | 
144 | 	/**
145 | 	 * Returns rel property $relname in array $mf.
146 | 	 *
147 | 	 * @param array   $mf
148 | 	 * @param $relname
149 | 	 * @return mixed
150 | 	 */
151 | 	public static function get_rel( array $mf, $relname ) {
152 | 		if ( self::has_rel( $mf, $relname ) ) {
153 | 			return $mf['rels'][ $relname ];
154 | 		}
155 | 		return false;
156 | 	}
157 | 
158 | 	/**
159 | 	 * Verifies if rel-url named $url is in array $mf.
160 | 	 *
161 | 	 * @param array   $mf
162 | 	 * @param $url
163 | 	 * @return bool
164 | 	 */
165 | 	public static function has_rel_urls( array $mf, $url ) {
166 | 		return ! empty( $mf['rel-urls'][ $url ] ) && is_array( $mf['rel-urls'][ $url ] );
167 | 	}
168 | 
169 | 	/**
170 | 	 * Returns rel-url property $url array $mf.
171 | 	 *
172 | 	 * @param array   $mf
173 | 	 * @param $url
174 | 	 * @return mixed
175 | 	 */
176 | 	public static function get_rel_urls( array $mf, $url ) {
177 | 		if ( self::has_rel_urls( $mf, $url ) ) {
178 | 			$return = array(
179 | 				'url' => array( $url ),
180 | 			);
181 | 			if ( array_key_exists( 'text', $mf['rel-urls'][ $url ] ) ) {
182 | 				$return['name'] = array( $mf['rel-urls'][ $url ]['text'] );
183 | 			} else {
184 | 				$return['name'] = array( $mf['rel-urls'][ $url ]['title'] );
185 | 			}
186 | 			return $return;
187 | 		}
188 | 		return false;
189 | 	}
190 | 
191 | 	/**
192 | 	 * shortcut for getPlaintext.
193 | 	 *
194 | 	 * @deprecated use getPlaintext from now on
195 | 	 * @param array       $mf
196 | 	 * @param $propname
197 | 	 * @param null|string $fallback
198 | 	 * @return mixed|null
199 | 	 */
200 | 	public static function get_prop( array $mf, $propname, $fallback = null ) {
201 | 		return self::get_plaintext( $mf, $propname, $fallback );
202 | 	}
203 | 
204 | 	/**
205 | 	 * If $v is a microformat or embedded html, return $v['value']. Else return v.
206 | 	 *
207 | 	 * @param $v
208 | 	 * @return mixed
209 | 	 */
210 | 	public static function to_plaintext( $v ) {
211 | 		if ( self::is_microformat( $v ) || self::is_embedded_html( $v ) || self::is_embedded_img( $v ) ) {
212 | 			return $v['value'];
213 | 		} elseif ( is_array( $v ) && isset( $v['text'] ) ) {
214 | 			return $v['text'];
215 | 		}
216 | 		return $v;
217 | 	}
218 | 
219 | 	/**
220 | 	 * Returns plaintext of $propname with optional $fallback
221 | 	 *
222 | 	 * @param array       $mf
223 | 	 * @param $propname
224 | 	 * @param null|string $fallback
225 | 	 * @return mixed|null
226 | 	 * @link http://php.net/manual/en/function.current.php
227 | 	 */
228 | 	public static function get_plaintext( array $mf, $propname, $fallback = null ) {
229 | 		if ( ! empty( $mf['properties'][ $propname ] ) && is_array( $mf['properties'][ $propname ] ) ) {
230 | 			return self::to_plaintext( current( $mf['properties'][ $propname ] ) );
231 | 		}
232 | 		return $fallback;
233 | 	}
234 | 
235 | 	/**
236 | 	 * Converts $propname in $mf into array_map plaintext, or $fallback if not valid.
237 | 	 *
238 | 	 * @param array       $mf
239 | 	 * @param $propname
240 | 	 * @param null|string $fallback
241 | 	 * @return null
242 | 	 */
243 | 	public static function get_plaintext_array( array $mf, $propname, $fallback = null ) {
244 | 		if ( ! empty( $mf['properties'][ $propname ] ) && is_array( $mf['properties'][ $propname ] ) ) {
245 | 			return array_map( array( static::class, 'to_plaintext' ), $mf['properties'][ $propname ] ); }
246 | 		return $fallback;
247 | 	}
248 | 
249 | 	/**
250 | 	 * Returns ['html'] element of $v, or ['value'] or just $v, in order of availablility.
251 | 	 *
252 | 	 * @param $v
253 | 	 * @return mixed
254 | 	 */
255 | 	public static function to_html( $v ) {
256 | 		if ( self::is_embedded_html( $v ) ) {
257 | 			return $v['html']; } elseif ( self::is_microformat( $v ) ) {
258 | 			return htmlspecialchars( $v['value'] ); }
259 | 			return htmlspecialchars( $v );
260 | 	}
261 | 
262 | 	/**
263 | 	 * Gets HTML of $propname or if not, $fallback
264 | 	 *
265 | 	 * @param array       $mf
266 | 	 * @param $propname
267 | 	 * @param null|string $fallback
268 | 	 * @return mixed|null
269 | 	 */
270 | 	public static function get_html( array $mf, $propname, $fallback = null ) {
271 | 		if ( ! empty( $mf['properties'][ $propname ] ) && is_array( $mf['properties'][ $propname ] ) ) {
272 | 			return self::to_html( current( $mf['properties'][ $propname ] ) ); }
273 | 		return $fallback;
274 | 	}
275 | 
276 | 
277 | 
278 | 	/**
279 | 	 * Returns 'summary' element of $mf or a truncated Plaintext of $mf['properties']['content'] with 19 chars and ellipsis.
280 | 	 *
281 | 	 * @deprecated as not often used
282 | 	 * @param array $mf
283 | 	 * @param array $content
284 | 	 * @return mixed|null|string
285 | 	 */
286 | 	public static function get_summary( array $mf, $content = null ) {
287 | 		if ( self::has_prop( $mf, 'summary' ) ) {
288 | 			return self::get_prop( $mf, 'summary' );
289 | 		}
290 | 		if ( ! $content ) {
291 | 			$content = self::parse_html_value( $mf, 'content' );
292 | 		}
293 | 		if ( is_array( $content ) && array_key_exists( 'text', $content ) ) {
294 | 			$summary = substr( $content['text'], 0, 300 );
295 | 			if ( 300 < strlen( $content['text'] ) ) {
296 | 				$summary .= '...';
297 | 			}
298 | 			return $summary;
299 | 		}
300 | 		return '';
301 | 	}
302 | 
303 | 
304 | 	/**
305 | 	 * Gets the date published of $mf array.
306 | 	 *
307 | 	 * @param array       $mf
308 | 	 * @param bool        $ensurevalid
309 | 	 * @param null|string $fallback optional result if date not available
310 | 	 * @return mixed|null
311 | 	 */
312 | 	public static function get_published( array $mf, $ensurevalid = false, $fallback = null ) {
313 | 		$date = self::get_datetime_property( 'published', $mf, $ensurevalid, $fallback );
314 | 		if ( $date instanceof DateTimeImmutable ) {
315 | 			return $date->format( DATE_W3C );
316 | 		}
317 | 		return null;
318 | 	}
319 | 
320 | 	/**
321 | 	 * Gets the date updated of $mf array.
322 | 	 *
323 | 	 * @param array $mf
324 | 	 * @param bool  $ensurevalid
325 | 	 * @param null  $fallback
326 | 	 * @return mixed|null
327 | 	 */
328 | 	public static function get_updated( array $mf, $ensurevalid = false, $fallback = null ) {
329 | 		$date = self::get_datetime_property( 'updated', $mf, $ensurevalid, $fallback );
330 | 		if ( $date instanceof DateTimeImmutable ) {
331 | 			return $date->format( DATE_W3C );
332 | 		}
333 | 		return null;
334 | 	}
335 | 
336 | 	/**
337 | 	 * Gets the DateTime properties including published or updated, depending on params.
338 | 	 *
339 | 	 * @param $name string updated or published
340 | 	 * @param array                            $mf
341 | 	 * @param bool                             $ensurevalid
342 | 	 * @param null|string                      $fallback
343 | 	 * @return DateTime|null
344 | 	 */
345 | 	public static function get_datetime_property( $name, array $mf, $ensurevalid = false, $fallback = null ) {
346 | 		$compliment = 'published' === $name ? 'updated' : 'published';
347 | 		if ( self::has_prop( $mf, $name ) ) {
348 | 			$return = self::get_prop( $mf, $name );
349 | 		} elseif ( self::has_prop( $mf, $compliment ) ) {
350 | 			$return = self::get_prop( $mf, $compliment );
351 | 		} else {
352 | 			return $fallback;
353 | 		}
354 | 		if ( ! $ensurevalid ) {
355 | 			return $return;
356 | 		} else {
357 | 			try {
358 | 				return new DateTimeImmutable( $return );
359 | 			} catch ( Exception $e ) {
360 | 				return $fallback;
361 | 			}
362 | 		}
363 | 	}
364 | 
365 | 	/**
366 | 	 * True if same hostname is parsed on both
367 | 	 *
368 | 	 * @param $u1 string url
369 | 	 * @param $u2 string url
370 | 	 * @return bool
371 | 	 * @link http://php.net/manual/en/function.parse-url.php
372 | 	 */
373 | 	public static function same_hostname( $u1, $u2 ) {
374 | 		return wp_parse_url( $u1, PHP_URL_HOST ) === wp_parse_url( $u2, PHP_URL_HOST );
375 | 	}
376 | 
377 | 	/**
378 | 	 * Returns array per parse_url standard with pathname key added.
379 | 	 *
380 | 	 * @param $url
381 | 	 * @return mixed
382 | 	 * @link http://php.net/manual/en/function.parse-url.php
383 | 	 */
384 | 	public static function parse_url( $url ) {
385 | 		$r             = wp_parse_url( $url );
386 | 		$r['pathname'] = empty( $r['path'] ) ? '/' : $r['path'];
387 | 		return $r;
388 | 	}
389 | 
390 | 
391 | 	/**
392 | 	 * See if urls match for each component of parsed urls. Return true if so.
393 | 	 *
394 | 	 * @param $url1
395 | 	 * @param $url2
396 | 	 * @return bool
397 | 	 * @see parseUrl()
398 | 	 */
399 | 	public static function urls_match( $url1, $url2 ) {
400 | 		return ( normalize_url( $url1 ) === normalize_url( $url2 ) );
401 | 	}
402 | }
403 | 


--------------------------------------------------------------------------------
/lib/html5/HTML5/Parser/Scanner.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Masterminds\HTML5\Parser;
  4 | 
  5 | use Masterminds\HTML5\Exception;
  6 | 
  7 | /**
  8 |  * The scanner scans over a given data input to react appropriately to characters.
  9 |  */
 10 | class Scanner
 11 | {
 12 |     const CHARS_HEX = 'abcdefABCDEF01234567890';
 13 |     const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
 14 |     const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 15 | 
 16 |     /**
 17 |      * The string data we're parsing.
 18 |      */
 19 |     private $data;
 20 | 
 21 |     /**
 22 |      * The current integer byte position we are in $data.
 23 |      */
 24 |     private $char;
 25 | 
 26 |     /**
 27 |      * Length of $data; when $char === $data, we are at the end-of-file.
 28 |      */
 29 |     private $EOF;
 30 | 
 31 |     /**
 32 |      * Parse errors.
 33 |      */
 34 |     public $errors = array();
 35 | 
 36 |     /**
 37 |      * Create a new Scanner.
 38 |      *
 39 |      * @param string $data     Data to parse.
 40 |      * @param string $encoding The encoding to use for the data.
 41 |      *
 42 |      * @throws Exception If the given data cannot be encoded to UTF-8.
 43 |      */
 44 |     public function __construct($data, $encoding = 'UTF-8')
 45 |     {
 46 |         if ($data instanceof InputStream) {
 47 |             @trigger_error('InputStream objects are deprecated since version 2.4 and will be removed in 3.0. Use strings instead.', E_USER_DEPRECATED);
 48 |             $data = (string) $data;
 49 |         }
 50 | 
 51 |         $data = UTF8Utils::convertToUTF8($data, $encoding);
 52 | 
 53 |         // There is good reason to question whether it makes sense to
 54 |         // do this here, since most of these checks are done during
 55 |         // parsing, and since this check doesn't actually *do* anything.
 56 |         $this->errors = UTF8Utils::checkForIllegalCodepoints($data);
 57 | 
 58 |         $data = $this->replaceLinefeeds($data);
 59 | 
 60 |         $this->data = $data;
 61 |         $this->char = 0;
 62 |         $this->EOF = strlen($data);
 63 |     }
 64 | 
 65 |     /**
 66 |      * Check if upcomming chars match the given sequence.
 67 |      *
 68 |      * This will read the stream for the $sequence. If it's
 69 |      * found, this will return true. If not, return false.
 70 |      * Since this unconsumes any chars it reads, the caller
 71 |      * will still need to read the next sequence, even if
 72 |      * this returns true.
 73 |      *
 74 |      * Example: $this->scanner->sequenceMatches('</script>') will
 75 |      * see if the input stream is at the start of a
 76 |      * '</script>' string.
 77 |      *
 78 |      * @param string $sequence
 79 |      * @param bool   $caseSensitive
 80 |      *
 81 |      * @return bool
 82 |      */
 83 |     public function sequenceMatches($sequence, $caseSensitive = true)
 84 |     {
 85 |         $portion = substr($this->data, $this->char, strlen($sequence));
 86 | 
 87 |         return $caseSensitive ? $portion === $sequence : 0 === strcasecmp($portion, $sequence);
 88 |     }
 89 | 
 90 |     /**
 91 |      * Get the current position.
 92 |      *
 93 |      * @return int The current intiger byte position.
 94 |      */
 95 |     public function position()
 96 |     {
 97 |         return $this->char;
 98 |     }
 99 | 
100 |     /**
101 |      * Take a peek at the next character in the data.
102 |      *
103 |      * @return string The next character.
104 |      */
105 |     public function peek()
106 |     {
107 |         if (($this->char + 1) < $this->EOF) {
108 |             return $this->data[$this->char + 1];
109 |         }
110 | 
111 |         return false;
112 |     }
113 | 
114 |     /**
115 |      * Get the next character.
116 |      * Note: This advances the pointer.
117 |      *
118 |      * @return string The next character.
119 |      */
120 |     public function next()
121 |     {
122 |         ++$this->char;
123 | 
124 |         if ($this->char < $this->EOF) {
125 |             return $this->data[$this->char];
126 |         }
127 | 
128 |         return false;
129 |     }
130 | 
131 |     /**
132 |      * Get the current character.
133 |      * Note, this does not advance the pointer.
134 |      *
135 |      * @return string The current character.
136 |      */
137 |     public function current()
138 |     {
139 |         if ($this->char < $this->EOF) {
140 |             return $this->data[$this->char];
141 |         }
142 | 
143 |         return false;
144 |     }
145 | 
146 |     /**
147 |      * Silently consume N chars.
148 |      *
149 |      * @param int $count
150 |      */
151 |     public function consume($count = 1)
152 |     {
153 |         $this->char += $count;
154 |     }
155 | 
156 |     /**
157 |      * Unconsume some of the data.
158 |      * This moves the data pointer backwards.
159 |      *
160 |      * @param int $howMany The number of characters to move the pointer back.
161 |      */
162 |     public function unconsume($howMany = 1)
163 |     {
164 |         if (($this->char - $howMany) >= 0) {
165 |             $this->char -= $howMany;
166 |         }
167 |     }
168 | 
169 |     /**
170 |      * Get the next group of that contains hex characters.
171 |      * Note, along with getting the characters the pointer in the data will be
172 |      * moved as well.
173 |      *
174 |      * @return string The next group that is hex characters.
175 |      */
176 |     public function getHex()
177 |     {
178 |         return $this->doCharsWhile(static::CHARS_HEX);
179 |     }
180 | 
181 |     /**
182 |      * Get the next group of characters that are ASCII Alpha characters.
183 |      * Note, along with getting the characters the pointer in the data will be
184 |      * moved as well.
185 |      *
186 |      * @return string The next group of ASCII alpha characters.
187 |      */
188 |     public function getAsciiAlpha()
189 |     {
190 |         return $this->doCharsWhile(static::CHARS_ALPHA);
191 |     }
192 | 
193 |     /**
194 |      * Get the next group of characters that are ASCII Alpha characters and numbers.
195 |      * Note, along with getting the characters the pointer in the data will be
196 |      * moved as well.
197 |      *
198 |      * @return string The next group of ASCII alpha characters and numbers.
199 |      */
200 |     public function getAsciiAlphaNum()
201 |     {
202 |         return $this->doCharsWhile(static::CHARS_ALNUM);
203 |     }
204 | 
205 |     /**
206 |      * Get the next group of numbers.
207 |      * Note, along with getting the characters the pointer in the data will be
208 |      * moved as well.
209 |      *
210 |      * @return string The next group of numbers.
211 |      */
212 |     public function getNumeric()
213 |     {
214 |         return $this->doCharsWhile('0123456789');
215 |     }
216 | 
217 |     /**
218 |      * Consume whitespace.
219 |      * Whitespace in HTML5 is: formfeed, tab, newline, space.
220 |      *
221 |      * @return int The length of the matched whitespaces.
222 |      */
223 |     public function whitespace()
224 |     {
225 |         if ($this->char >= $this->EOF) {
226 |             return false;
227 |         }
228 | 
229 |         $len = strspn($this->data, "\n\t\f ", $this->char);
230 | 
231 |         $this->char += $len;
232 | 
233 |         return $len;
234 |     }
235 | 
236 |     /**
237 |      * Returns the current line that is being consumed.
238 |      *
239 |      * @return int The current line number.
240 |      */
241 |     public function currentLine()
242 |     {
243 |         if (empty($this->EOF) || 0 === $this->char) {
244 |             return 1;
245 |         }
246 | 
247 |         // Add one to $this->char because we want the number for the next
248 |         // byte to be processed.
249 |         return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
250 |     }
251 | 
252 |     /**
253 |      * Read chars until something in the mask is encountered.
254 |      *
255 |      * @param string $mask
256 |      *
257 |      * @return mixed
258 |      */
259 |     public function charsUntil($mask)
260 |     {
261 |         return $this->doCharsUntil($mask);
262 |     }
263 | 
264 |     /**
265 |      * Read chars as long as the mask matches.
266 |      *
267 |      * @param string $mask
268 |      *
269 |      * @return int
270 |      */
271 |     public function charsWhile($mask)
272 |     {
273 |         return $this->doCharsWhile($mask);
274 |     }
275 | 
276 |     /**
277 |      * Returns the current column of the current line that the tokenizer is at.
278 |      *
279 |      * Newlines are column 0. The first char after a newline is column 1.
280 |      *
281 |      * @return int The column number.
282 |      */
283 |     public function columnOffset()
284 |     {
285 |         // Short circuit for the first char.
286 |         if (0 === $this->char) {
287 |             return 0;
288 |         }
289 | 
290 |         // strrpos is weird, and the offset needs to be negative for what we
291 |         // want (i.e., the last \n before $this->char). This needs to not have
292 |         // one (to make it point to the next character, the one we want the
293 |         // position of) added to it because strrpos's behaviour includes the
294 |         // final offset byte.
295 |         $backwardFrom = $this->char - 1 - strlen($this->data);
296 |         $lastLine = strrpos($this->data, "\n", $backwardFrom);
297 | 
298 |         // However, for here we want the length up until the next byte to be
299 |         // processed, so add one to the current byte ($this->char).
300 |         if (false !== $lastLine) {
301 |             $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
302 |         } else {
303 |             // After a newline.
304 |             $findLengthOf = substr($this->data, 0, $this->char);
305 |         }
306 | 
307 |         return UTF8Utils::countChars($findLengthOf);
308 |     }
309 | 
310 |     /**
311 |      * Get all characters until EOF.
312 |      *
313 |      * This consumes characters until the EOF.
314 |      *
315 |      * @return int The number of characters remaining.
316 |      */
317 |     public function remainingChars()
318 |     {
319 |         if ($this->char < $this->EOF) {
320 |             $data = substr($this->data, $this->char);
321 |             $this->char = $this->EOF;
322 | 
323 |             return $data;
324 |         }
325 | 
326 |         return ''; // false;
327 |     }
328 | 
329 |     /**
330 |      * Replace linefeed characters according to the spec.
331 |      *
332 |      * @param $data
333 |      *
334 |      * @return string
335 |      */
336 |     private function replaceLinefeeds($data)
337 |     {
338 |         /*
339 |          * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially.
340 |          * Any CR characters that are followed by LF characters must be removed, and any CR characters not
341 |          * followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are
342 |          * represented by LF characters, and there are never any CR characters in the input to the tokenization
343 |          * stage.
344 |          */
345 |         $crlfTable = array(
346 |             "\0" => "\xEF\xBF\xBD",
347 |             "\r\n" => "\n",
348 |             "\r" => "\n",
349 |         );
350 | 
351 |         return strtr($data, $crlfTable);
352 |     }
353 | 
354 |     /**
355 |      * Read to a particular match (or until $max bytes are consumed).
356 |      *
357 |      * This operates on byte sequences, not characters.
358 |      *
359 |      * Matches as far as possible until we reach a certain set of bytes
360 |      * and returns the matched substring.
361 |      *
362 |      * @param string $bytes Bytes to match.
363 |      * @param int    $max   Maximum number of bytes to scan.
364 |      *
365 |      * @return mixed Index or false if no match is found. You should use strong
366 |      *               equality when checking the result, since index could be 0.
367 |      */
368 |     private function doCharsUntil($bytes, $max = null)
369 |     {
370 |         if ($this->char >= $this->EOF) {
371 |             return false;
372 |         }
373 | 
374 |         if (0 === $max || $max) {
375 |             $len = strcspn($this->data, $bytes, $this->char, $max);
376 |         } else {
377 |             $len = strcspn($this->data, $bytes, $this->char);
378 |         }
379 | 
380 |         $string = (string) substr($this->data, $this->char, $len);
381 |         $this->char += $len;
382 | 
383 |         return $string;
384 |     }
385 | 
386 |     /**
387 |      * Returns the string so long as $bytes matches.
388 |      *
389 |      * Matches as far as possible with a certain set of bytes
390 |      * and returns the matched substring.
391 |      *
392 |      * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
393 |      *                      current char, the pointer advances and the char is part of the
394 |      *                      substring.
395 |      * @param int    $max   The max number of chars to read.
396 |      *
397 |      * @return string
398 |      */
399 |     private function doCharsWhile($bytes, $max = null)
400 |     {
401 |         if ($this->char >= $this->EOF) {
402 |             return false;
403 |         }
404 | 
405 |         if (0 === $max || $max) {
406 |             $len = strspn($this->data, $bytes, $this->char, $max);
407 |         } else {
408 |             $len = strspn($this->data, $bytes, $this->char);
409 |         }
410 | 
411 |         $string = (string) substr($this->data, $this->char, $len);
412 |         $this->char += $len;
413 | 
414 |         return $string;
415 |     }
416 | }
417 | 


--------------------------------------------------------------------------------
/includes/class-parse-this-html.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Parse This HTML class.
  4 |  * Originally Derived from the Press This Class with Enhancements.
  5 |  */
  6 | class Parse_This_HTML extends Parse_This_Base {
  7 | 	/**
  8 | 	 * Parses _meta, _images, and _links data from the content.
  9 | 	 *
 10 | 	 * @access public
 11 | 	 */
 12 | 	public static function parse( $doc, $url ) {
 13 | 		if ( ! $doc ) {
 14 | 			return array();
 15 | 		}
 16 | 		if ( ! is_object( $doc ) ) {
 17 | 			return $doc;
 18 | 		}
 19 | 		$xpath = new DOMXPath( $doc );
 20 | 
 21 | 		$meta = array();
 22 | 		// Look for OGP properties
 23 | 		foreach ( $xpath->query( '//meta[(@name or @property or @itemprop) and @content]' ) as $tag ) {
 24 | 			$meta_name = self::limit_string( $tag->getAttribute( 'property' ) );
 25 | 			if ( ! $meta_name ) {
 26 | 				$meta_name = self::limit_string( $tag->getAttribute( 'name' ) );
 27 | 			}
 28 | 			if ( ! $meta_name ) {
 29 | 				$meta_name = self::limit_string( $tag->getAttribute( 'itemprop' ) );
 30 | 			}
 31 | 			$meta_value = $tag->getAttribute( 'content' );
 32 | 
 33 | 			// Sanity check. $key is usually things like 'title', 'description', 'keywords', etc.
 34 | 			if ( strlen( $meta_name ) > 200 ) {
 35 | 				continue;
 36 | 			}
 37 | 			// Decode known JSON encoded properties
 38 | 			if ( 'parsely-metadata' === $meta_name ) {
 39 | 				$json = json_decode( $meta_value, true );
 40 | 				if ( is_array( $json ) ) {
 41 | 					$meta_value = $json;
 42 | 				}
 43 | 			}
 44 | 
 45 | 			// Parsely-page is deprecated but convert it to the new parsely format.
 46 | 			if ( 'parsely-page' === $meta_name ) {
 47 | 				$json = json_decode( $meta_value, true );
 48 | 				if ( is_array( $json ) ) {
 49 | 					foreach ( $json as $key => $value ) {
 50 | 						$key  = str_replace( '_', '-', $key );
 51 | 						$meta = self::set( $meta, 'parsely-' . $key, $value );
 52 | 					}
 53 | 					continue;
 54 | 				}
 55 | 			}
 56 | 			$meta = self::set( $meta, $meta_name, $meta_value );
 57 | 		}
 58 | 
 59 | 		$meta['title'] = trim( $xpath->query( '//title' )->item( 0 )->textContent );
 60 | 		$meta          = self::parse_meta( $meta );
 61 | 		if ( isset( $meta['og'] ) ) {
 62 | 			$meta['og'] = self::parse_meta( $meta['og'] );
 63 | 		}
 64 | 		$jf2 = self::meta_to_jf2( $meta );
 65 | 
 66 | 		if ( ! isset( $jf2['video'] ) ) {
 67 | 			// Fetch and gather <video> data.
 68 | 			$videos = array();
 69 | 			foreach ( $xpath->query( '//video' ) as $video ) {
 70 | 				$src = $video->getAttribute( 'src' );
 71 | 				if ( ! empty( $src ) ) {
 72 | 					$videos = $src;
 73 | 				}
 74 | 			}
 75 | 			$jf2['video'] = array_unique( $videos );
 76 | 		}
 77 | 
 78 | 		if ( ! isset( $jf2['audio'] ) ) {
 79 | 			// Fetch and gather <audio> data.
 80 | 			$audios = array();
 81 | 
 82 | 			foreach ( $xpath->query( '//audio' ) as $audio ) {
 83 | 				$src = $audio->getAttribute( 'src' );
 84 | 				if ( ! empty( $src ) ) {
 85 | 					$audios[] = $src;
 86 | 				}
 87 | 			}
 88 | 
 89 | 			foreach ( $xpath->query( '//figure' ) as $audio ) {
 90 | 				$src = $audio->getAttribute( 'data-audio-url' );
 91 | 				if ( ! empty( $src ) ) {
 92 | 					$audios[] = $src;
 93 | 				}
 94 | 			}
 95 | 			$jf2['audio'] = array_unique( $audios );
 96 | 		}
 97 | 
 98 | 		/*
 99 | 		 For now do not search every link embed etc
100 | 		// Fetch and gather <iframe> data.
101 | 		$embeds = array();
102 | 
103 | 		foreach ( $xpath->query( '//iframe[@src]' ) as $embed ) {
104 | 			$src = self::limit_embed( $embed->getAttribute( 'src' ), $url );
105 | 			if ( ! empty( $src ) ) {
106 | 				$embeds[] = $src;
107 | 			}
108 | 		}
109 | 
110 | 		// Fetch and gather <img> data.
111 | 		$images = array();
112 | 		foreach ( $xpath->query( '//img[@src]' ) as $image ) {
113 | 			$src = self::limit_img( $image->getAttribute( 'src' ), $url );
114 | 			if ( ! empty( $src ) ) {
115 | 				$images[] = $src;
116 | 			}
117 | 		}
118 | 		$images = array_unique( $images );
119 | 
120 | 		// Fetch and gather <link> data.
121 | 		$links = array();
122 | 
123 | 		foreach ( $xpath->query( '//link[@rel and @href]' ) as $link ) {
124 | 			$rel = $link->getAttribute( 'rel' );
125 | 			$url = self::limit_url( $link->getAttribute( 'href' ), $url );
126 | 			if ( ! empty( $url ) ) {
127 | 				$links[ $rel ] = $url;
128 | 			}
129 | 		}
130 | 
131 | 		$video_extensions = array(
132 | 			'mp4',
133 | 			'mkv',
134 | 			'webm',
135 | 			'ogv',
136 | 			'avi',
137 | 			'm4v',
138 | 			'mpg',
139 | 		);
140 | 		$audio_extensions = array(
141 | 			'mp3',
142 | 			'ogg',
143 | 			'm4a',
144 | 			'm4b',
145 | 			'flac',
146 | 			'aac',
147 | 		);
148 | 		$urls             = array();
149 | 		foreach ( $xpath->query( '//a' ) as $link ) {
150 | 			$u         = pt_make_absolute_url( $link->getAttribute( 'href' ), $url );
151 | 			$urls[]    = wp_http_validate_url( $u );
152 | 			$extension = pathinfo( wp_parse_url( $url, PHP_URL_PATH ), PATHINFO_EXTENSION );
153 | 			if ( in_array( $extension, $audio_extensions, true ) ) {
154 | 				$audios[] = $url;
155 | 			}
156 | 			if ( in_array( $extension, $video_extensions, true ) ) {
157 | 				$videos[] = $url;
158 | 			}
159 | 		} */
160 | 
161 | 		if ( WP_DEBUG ) {
162 | 			$jf2['_meta'] = $meta;
163 | 		}
164 | 		return array_filter( $jf2 );
165 | 	}
166 | 
167 | 	public static function meta_to_jf2( $meta ) {
168 | 		if ( empty( $meta ) ) {
169 | 			return array();
170 | 		}
171 | 		$jf2 = array();
172 | 		if ( isset( $meta['og'] ) ) {
173 | 			if ( isset( $meta['og']['url'] ) ) {
174 | 				$jf2['url'] = $meta['og']['url'];
175 | 			}
176 | 			if ( isset( $meta['og']['title'] ) ) {
177 | 				$jf2['name'] = $meta['og']['title'];
178 | 			}
179 | 			if ( isset( $meta['og']['description'] ) ) {
180 | 				$jf2['summary'] = $meta['og']['description'];
181 | 			}
182 | 			if ( isset( $meta['og']['image'] ) ) {
183 | 				$image = $meta['og']['image'];
184 | 				if ( is_string( $image ) ) {
185 | 					$jf2['featured'] = $image;
186 | 				} elseif ( is_array( $image ) ) {
187 | 					$jf2['featured'] = ifset( $image[0], ifset( $image['secure_url'] ) );
188 | 				}
189 | 			}
190 | 			if ( isset( $meta['og']['site_name'] ) ) {
191 | 				$jf2['publication'] = $meta['og']['site_name'];
192 | 			}
193 | 			if ( isset( $meta['og']['video'] ) ) {
194 | 				$video = $meta['og']['video'];
195 | 				if ( is_string( $video ) ) {
196 | 					$jf2['video'] = $video;
197 | 				} elseif ( is_array( $video ) ) {
198 | 					$jf2['video']    = ifset( $video['url'], ifset( $video[0] ) );
199 | 					$jf2['category'] = ifset( $video['tag'] );
200 | 				}
201 | 			}
202 | 			if ( isset( $meta['og']['audio'] ) ) {
203 | 				$jf2['audio'] = $meta['og']['audio'];
204 | 			}
205 | 			if ( isset( $meta['og']['locale'] ) ) {
206 | 				$jf2['locale'] = $meta['og']['locale'];
207 | 			}
208 | 			if ( isset( $meta['og']['longitude'] ) ) {
209 | 				$jf2['location'] = array(
210 | 					'longitude' => $meta['og']['longitude'],
211 | 					'latitude'  => $meta['og']['longitude'],
212 | 				);
213 | 			}
214 | 			if ( isset( $meta['og']['type'] ) ) {
215 | 				$type = $meta['og']['type'];
216 | 				if ( isset( $meta[ $type ]['tag'] ) ) {
217 | 					$jf2['category'] = $meta[ $type ]['tag'];
218 | 				}
219 | 				if ( ! empty( $meta[ $type ]['author'] ) ) {
220 | 					$jf2['author'] = $meta[ $type ]['author'];
221 | 				}
222 | 				if ( 'article' === $type ) {
223 | 					$jf2['type'] = 'entry';
224 | 					$published   = ifset( $meta['article']['published_time'], ifset( $meta['article']['published'] ) );
225 | 					if ( $published ) {
226 | 						$jf2['published'] = normalize_iso8601( $published );
227 | 					}
228 | 					$modified = ifset( $meta['article']['modified_time'], ifset( $meta['article']['modified'] ) );
229 | 					if ( $modified ) {
230 | 						$jf2['modified'] = normalize_iso8601( $modified );
231 | 					}
232 | 					$jf2['category'] = ifset( $meta['article']['tag'] );
233 | 				}
234 | 				if ( 'book' === $type ) {
235 | 					$jf2['type'] = 'cite';
236 | 					if ( isset( $meta['book']['isbn'] ) ) {
237 | 						$jf2['uid'] = $meta['book']['isbn'];
238 | 					}
239 | 					if ( isset( $meta['release_date'] ) ) {
240 | 						$jf2['release_date'] = $meta['book']['release_date'];
241 | 					}
242 | 				}
243 | 				if ( 'profile' === $type ) {
244 | 					$jf2['type'] = 'card';
245 | 				}
246 | 				if ( 'music.song' === $type ) {
247 | 					$jf2['type'] = 'cite';
248 | 					if ( isset( $meta['music']['musician'] ) ) {
249 | 						$jf2['author'] = $meta['music']['musician'];
250 | 					}
251 | 					if ( isset( $meta['music']['duration'] ) ) {
252 | 						$jf2['duration'] = $meta['music']['duration'];
253 | 					}
254 | 					if ( isset( $meta['music']['release_date'] ) ) {
255 | 						$jf2['release_date'] = $meta['music']['release_date'];
256 | 					}
257 | 					if ( isset( $meta['music']['album'] ) ) {
258 | 						$jf2['publication'] = $meta['music']['album'];
259 | 					}
260 | 				}
261 | 				if ( in_array( $type, array( 'video.movie', 'video.episode' ), true ) ) {
262 | 					$jf2['type'] = 'cite';
263 | 					if ( isset( $meta['video']['tag'] ) ) {
264 | 						$jf2['category'] = $meta['video']['tag'];
265 | 					}
266 | 					if ( isset( $meta['video']['release_date'] ) ) {
267 | 						$jf2['release_date'] = $meta['video']['release_date'];
268 | 					}
269 | 					if ( isset( $meta['video']['duration'] ) ) {
270 | 						$jf2['duration'] = $meta['video']['duration'];
271 | 					}
272 | 				}
273 | 			}
274 | 		}
275 | 		if ( isset( $meta['dc'] ) ) {
276 | 			$dc = $meta['dc'];
277 | 			if ( isset( $dc['Title'] ) ) {
278 | 				$jf2['name'] = $dc['Title'];
279 | 			}
280 | 			if ( isset( $dc['Creator'] ) ) {
281 | 				if ( is_string( $dc['Creator'] ) ) {
282 | 					$jf2['author'] = $dc['Creator'];
283 | 				} else {
284 | 					$jf2['author'] = array();
285 | 					foreach ( $dc['Creator'] as $creator ) {
286 | 						$jf2['author'][] = array(
287 | 							'type' => 'card',
288 | 							'name' => $creator,
289 | 						);
290 | 					}
291 | 				}
292 | 			}
293 | 			if ( isset( $dc['Description'] ) ) {
294 | 				$jf2['summary'] = $dc['Description'];
295 | 			}
296 | 			if ( isset( $dc['Date'] ) && ! isset( $jf2['published'] ) ) {
297 | 				$jf2['published'] = normalize_iso8601( $dc['Date'] );
298 | 			}
299 | 		}
300 | 
301 | 		if ( ! isset( $jf2['author'] ) ) {
302 | 			foreach ( array( 'citation_author', 'parsely-author', 'author' ) as $author ) {
303 | 				if ( isset( $meta[ $author ] ) ) {
304 | 					if ( is_string( $meta[ $author ] ) ) {
305 | 						$jf2['author'] = $meta[ $author ];
306 | 					} else {
307 | 						$jf2['author'] = array();
308 | 						foreach ( $meta[ $author ] as $a ) {
309 | 							$jf2['author'][] = $a;
310 | 						}
311 | 					}
312 | 					break;
313 | 				}
314 | 			}
315 | 		}
316 | 
317 | 		if ( isset( $jf2['author'] ) && is_array( $jf2['author'] ) && 1 === count( $jf2['author'] ) ) {
318 | 			$jf2['author'] = array_pop( $jf2['author'] );
319 | 		}
320 | 
321 | 		if ( ! isset( $jf2['featured'] ) && isset( $meta['parsely-image-url'] ) ) {
322 | 			$jf2['featured'] = esc_url_raw( $meta['parsely-image-url'] );
323 | 		}
324 | 
325 | 		if ( empty( $jf2['category'] ) && isset( $meta['parsely-tags'] ) ) {
326 | 			if ( is_array( $meta['parsely-tags'] ) ) {
327 | 				$jf2['category'] = $meta['parsely-tags'];
328 | 			} else {
329 | 				$jf2['category'] = explode( ',', $meta['parsely-tags'] );
330 | 			}
331 | 		}
332 | 
333 | 		if ( ! isset( $jf2['latitude'] ) && isset( $meta['playfoursquare'] ) ) {
334 | 			$jf2['latitude']  = ifset( $meta['playfoursquare']['location:latitude'] );
335 | 			$jf2['longitude'] = ifset( $meta['playfoursquare']['location:longitude'] );
336 | 		}
337 | 
338 | 		if ( ! isset( $jf2['duration'] ) && isset( $meta['duration'] ) ) {
339 | 			$jf2['duration'] = $meta['duration'];
340 | 		}
341 | 		if ( ! isset( $jf2['published'] ) ) {
342 | 			foreach ( array( 'citation_date', 'datePublished', 'parsely-pub-date' ) as $date ) {
343 | 				if ( isset( $meta[ $date ] ) ) {
344 | 					$jf2['published'] = normalize_iso8601( $meta[ $date ] );
345 | 					break;
346 | 				}
347 | 			}
348 | 		}
349 | 
350 | 		// If Site Name is not set use domain name less www
351 | 		if ( ! isset( $jf2['publication'] ) && isset( $jf2['url'] ) ) {
352 | 			$jf2['publication'] = preg_replace( '/^www\./', '', wp_parse_url( $jf2['url'], PHP_URL_HOST ) );
353 | 		}
354 | 
355 | 		if ( ! isset( $jf2['name'] ) && isset( $meta['parsely-title'] ) ) {
356 | 			$jf2['name'] = $meta['parsely-title'];
357 | 		} elseif ( ! isset( $jf2['name'] ) && isset( $meta['name'] ) ) {
358 | 			$jf2['name'] = $meta['name'];
359 | 		}
360 | 
361 | 		if ( ! isset( $jf2['type'] ) && isset( $meta['parsely-type'] ) ) {
362 | 			$jf2['type'] = ( 'post' === $meta['parsely-type'] ) ? 'entry' : 'feed';
363 | 		}
364 | 
365 | 		return $jf2;
366 | 	}
367 | 
368 | 	public static function parse_meta( $meta ) {
369 | 		$return = array();
370 | 		if ( isset( $meta ) && is_array( $meta ) ) {
371 | 			foreach ( $meta as $key => $value ) {
372 | 				$name = explode( ':', $key );
373 | 				if ( 1 === count( $name ) ) {
374 | 					$name = explode( '.', $key );
375 | 				}
376 | 				if ( 1 < count( $name ) ) {
377 | 					$name = $name[0];
378 | 					$key  = str_replace( $name . ':', '', $key );
379 | 					$key  = str_replace( $name . '.', '', $key );
380 | 					if ( is_array( $value ) ) {
381 | 						$value = array_unique( $value );
382 | 						if ( 1 === count( $value ) ) {
383 | 							$value = array_shift( $value );
384 | 						}
385 | 					}
386 | 					if ( ! isset( $return[ $name ] ) ) {
387 | 						$return[ $name ] = array(
388 | 							$key => $value,
389 | 						);
390 | 					} else {
391 | 						if ( is_string( $return[ $name ] ) ) {
392 | 							$return[ $name ] = array( $return[ $name ] );
393 | 						}
394 | 						$return[ $name ][ $key ] = $value;
395 | 					}
396 | 				} else {
397 | 					$return[ $key ] = $value;
398 | 				}
399 | 			}
400 | 		}
401 | 		return $return;
402 | 	}
403 | 
404 | }
405 | 


--------------------------------------------------------------------------------
/includes/class-parse-this.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * Parse This class.
  5 |  * Originally Derived from the Press This Class with Enhancements.
  6 |  */
  7 | class Parse_This {
  8 | 	private $url = '';
  9 | 	private $doc;
 10 | 	private $links = array();
 11 | 	private $jf2   = array();
 12 | 
 13 | 	private $domain = '';
 14 | 
 15 | 	private $content = '';
 16 | 
 17 | 	private $content_type = '';
 18 | 
 19 | 	/**
 20 | 	 * Constructor.
 21 | 	 *
 22 | 	 * @since x.x.x
 23 | 	 * @access public
 24 | 	 */
 25 | 	public function __construct( $url = null ) {
 26 | 		if ( wp_http_validate_url( $url ) ) {
 27 | 			$this->url = pt_secure_rewrite( $url );
 28 | 		}
 29 | 	}
 30 | 
 31 | 	public function get( $key = 'jf2' ) {
 32 | 		if ( 'mf2' === $key ) {
 33 | 			return jf2_to_mf2( $this->jf2 );
 34 | 		}
 35 | 		if ( ! in_array( $key, get_object_vars( $this ), true ) ) {
 36 | 			$key = 'jf2';
 37 | 		}
 38 | 		return $this->$key;
 39 | 	}
 40 | 
 41 | 	/*
 42 | 	 Cleans HTML content.
 43 | 	 *
 44 | 	 * @param string $content HTML content to be cleaned.
 45 | 	 * @param array $strip Any keys in this array will be removed from the allowed tags that are retained when cleaned.
 46 | 	 *
 47 | 	 * @return string Clean Content.
 48 | 	 */
 49 | 	public static function clean_content( $content, $strip = array() ) {
 50 | 		if ( ! is_string( $content ) ) {
 51 | 			return $content;
 52 | 		}
 53 | 		// Decode escaped entities so that they can be stripped
 54 | 		$content     = html_entity_decode( $content, ENT_COMPAT | ENT_HTML401, 'UTF-8' );
 55 | 		$content     = preg_replace( '/<!--(.|\s)*?-->/', '', $content );
 56 | 		$domdocument = pt_load_domdocument( $content );
 57 | 		$scripts     = $domdocument->getElementsByTagName( 'script' );
 58 | 		foreach ( $scripts as $item ) {
 59 | 			$item->parentNode->removeChild( $item ); // phpcs:ignore
 60 | 		}
 61 | 
 62 | 		$content = $domdocument->saveHTML();
 63 | 
 64 | 		$allowed = array(
 65 | 			'a'          => array(
 66 | 				'href' => array(),
 67 | 				'name' => array(),
 68 | 			),
 69 | 			'abbr'       => array(),
 70 | 			'b'          => array(),
 71 | 			'br'         => array(),
 72 | 			'code'       => array(),
 73 | 			'ins'        => array(),
 74 | 			'del'        => array(),
 75 | 			'em'         => array(),
 76 | 			'i'          => array(),
 77 | 			'q'          => array(),
 78 | 			'strike'     => array(),
 79 | 			'strong'     => array(),
 80 | 			'time'       => array(
 81 | 				'datetime' => array(),
 82 | 			),
 83 | 			'blockquote' => array(),
 84 | 			'pre'        => array(),
 85 | 			'p'          => array(),
 86 | 			'h1'         => array(),
 87 | 			'h2'         => array(),
 88 | 			'h3'         => array(),
 89 | 			'h4'         => array(),
 90 | 			'h5'         => array(),
 91 | 			'h6'         => array(),
 92 | 			'ul'         => array(),
 93 | 			'li'         => array(),
 94 | 			'ol'         => array(),
 95 | 			'span'       => array(),
 96 | 			'img'        => array(
 97 | 				'src'    => array(),
 98 | 				'alt'    => array(),
 99 | 				'title'  => array(),
100 | 				'width'  => array(),
101 | 				'height' => array(),
102 | 				'srcset' => array(),
103 | 			),
104 | 			'figure'     => array(),
105 | 			'figcaption' => array(),
106 | 			'picture'    => array(
107 | 				'srcset' => array(),
108 | 				'type'   => array(),
109 | 			),
110 | 			'video'      => array(
111 | 				'poster' => array(),
112 | 				'src'    => array(),
113 | 			),
114 | 			'audio'      => array(
115 | 				'duration' => array(),
116 | 				'src'      => array(),
117 | 			),
118 | 			'track'      => array(
119 | 				'label'   => array(),
120 | 				'src'     => array(),
121 | 				'srclang' => array(),
122 | 				'kind'    => array(),
123 | 			),
124 | 			'source'     => array(
125 | 				'src'    => array(),
126 | 				'srcset' => array(),
127 | 				'type'   => array(),
128 | 
129 | 			),
130 | 			'hr'         => array(),
131 | 		);
132 | 		if ( ! empty( $strip ) ) {
133 | 			$allowed = array_diff_key( $allowed, $strip );
134 | 		}
135 | 		return trim( wp_kses( $content, $allowed ) );
136 | 	}
137 | 
138 | 	/**
139 | 	 * Sets the source.
140 | 	 *
141 | 	 * @since x.x.x
142 | 	 * @access public
143 | 	 *
144 | 	 * @param string $source_content source content.
145 | 	 * @param string $url Source URL
146 | 	 * @param string $jf2 If set it passes the content directly as preparsed
147 | 	 */
148 | 	public function set( $source_content, $url, $jf2 = false ) {
149 | 		$this->content = $source_content;
150 | 		if ( wp_http_validate_url( $url ) ) {
151 | 			$this->url    = pt_secure_rewrite( $url );
152 | 			$this->domain = wp_parse_url( $url, PHP_URL_HOST );
153 | 		}
154 | 		if ( $jf2 ) {
155 | 			$this->jf2 = $source_content;
156 | 		} elseif ( is_string( $this->content ) ) {
157 | 			$this->doc = pt_load_domdocument( $this->content );
158 | 		}
159 | 	}
160 | 
161 | 	/*
162 | 	 Reproduced version of fetch_feed from core which calls bundled SimplePie instead of older version
163 | 	*/
164 | 	public static function fetch_feed( $url ) {
165 | 		$url = pt_secure_rewrite( $url );
166 | 		if ( ! class_exists( 'SimplePie', false ) ) {
167 | 			require_once ABSPATH . WPINC . '/class-simplepie.php';
168 | 		}
169 | 		require_once ABSPATH . WPINC . '/class-wp-feed-cache-transient.php';
170 | 		require_once ABSPATH . WPINC . '/class-wp-simplepie-file.php';
171 | 		require_once ABSPATH . WPINC . '/class-wp-simplepie-sanitize-kses.php';
172 | 		$feed = new SimplePie();
173 | 
174 | 		// Register the cache handler using the recommended method for SimplePie 1.3 or later.
175 | 		if ( method_exists( 'SimplePie_Cache', 'register' ) ) {
176 | 			SimplePie_Cache::register( 'wp_transient', 'WP_Feed_Cache_Transient' );
177 | 			$feed->set_cache_location( 'wp_transient' );
178 | 		} else {
179 | 			// Back-compat for SimplePie 1.2.x.
180 | 			require_once ABSPATH . WPINC . '/class-wp-feed-cache.php';
181 | 			$feed->set_cache_class( 'WP_Feed_Cache' );
182 | 		}
183 | 
184 | 		$feed->set_file_class( 'WP_SimplePie_File' );
185 | 		$feed->enable_cache( false );
186 | 		$feed->set_feed_url( $url );
187 | 		$feed->strip_htmltags( false );
188 | 		/**
189 | 		 * Fires just before processing the SimplePie feed object.
190 | 		 *
191 | 		 * @since 3.0.0
192 | 		 *
193 | 		 * @param object $feed SimplePie feed object (passed by reference).
194 | 		 * @param mixed  $url  URL of feed to retrieve. If an array of URLs, the feeds are merged.
195 | 		 */
196 | 		do_action_ref_array( 'wp_feed_options', array( &$feed, $url ) );
197 | 		$feed->init();
198 | 		$feed->set_output_encoding( get_option( 'blog_charset' ) );
199 | 
200 | 		if ( $feed->error() ) {
201 | 			return new WP_Error( 'simplepie-error', $feed->error() );
202 | 		}
203 | 
204 | 		return $feed;
205 | 	}
206 | 
207 | 	/**
208 | 	 * Returns a list of supported content types
209 | 	 *
210 | 	 * @param string $content_type
211 | 	 * @return boolean if supported
212 | 	 */
213 | 	public function supported_content( $content_type ) {
214 | 		$types = array(
215 | 			'application/mf2+json',
216 | 			'text/html',
217 | 			'application/json',
218 | 			'application/feed+json',
219 | 			'application/xml',
220 | 			'text/xml',
221 | 			'application/jf2+json',
222 | 			'application/jf2feed+json',
223 | 			'application/rss+xml',
224 | 			'application/atom+xml',
225 | 		);
226 | 		return in_array( $content_type, $types, true );
227 | 	}
228 | 
229 | 	public static function redirect( $url, $allowlist = true ) {
230 | 		if ( empty( $url ) || ! wp_http_validate_url( $url ) ) {
231 | 			return new WP_Error( 'invalid-url', __( 'A valid URL was not provided.', 'indieweb-post-kinds' ) );
232 | 		}
233 | 		$url        = pt_secure_rewrite( $url );
234 | 		$domain     = wp_parse_url( $url, PHP_URL_HOST );
235 | 		$shorteners = array( 'fb.me', 't.co', 'youtu.be', 'ow.ly', 'bit.ly', 'tinyurl.com' );
236 | 		if ( ! $allowlist && ! in_array( $domain, $shorteners, true ) ) {
237 | 			return false;
238 | 		}
239 | 		$user_agent    = 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 Parse This/WP';
240 | 		$args          = array(
241 | 			'timeout'             => 15,
242 | 			'limit_response_size' => 1048576,
243 | 			'redirection'         => 0,
244 | 		);
245 | 		$response      = wp_safe_remote_get( $url, $args );
246 | 		$response_code = wp_remote_retrieve_response_code( $response );
247 | 		$redirect      = wp_remote_retrieve_header( $response, 'location' );
248 | 		if ( ! $redirect ) {
249 | 			return false;
250 | 		}
251 | 		return ( normalize_url( $redirect ) !== normalize_url( $url ) ) ? $redirect : false;
252 | 	}
253 | 
254 | 	/**
255 | 	 * Downloads the source's via server - side call for the given URL .
256 | 	 *
257 | 	 * @param string $url URL to scan .
258 | 	 * @return WP_Error | boolean WP_Error if invalid and true if successful
259 | 	 */
260 | 	public function fetch( $url = null ) {
261 | 		if ( ! $url ) {
262 | 			$url = $this->url;
263 | 		}
264 | 		if ( empty( $url ) || ! wp_http_validate_url( $url ) ) {
265 | 			return new WP_Error( 'invalid-url', __( 'A valid URL was not provided.', 'indieweb-post-kinds' ) );
266 | 		}
267 | 		$user_agent = 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 Parse This/WP';
268 | 		$args       = array(
269 | 			'timeout'             => 15,
270 | 			'limit_response_size' => 1048576,
271 | 			'redirection'         => 5,
272 | 		// Use an explicit user-agent for Parse This
273 | 		);
274 | 
275 | 		$response = wp_safe_remote_get( $url, $args );
276 | 
277 | 		$raw = wp_remote_retrieve_header( $response, 'link' );
278 | 		if ( is_string( $raw ) ) {
279 | 			$raw = explode( ',', $raw );
280 | 		}
281 | 
282 | 		if ( is_array( $raw ) && 1 <= count( $raw ) ) {
283 | 			$this->links = pt_parse_header_links( $raw );
284 | 		}
285 | 
286 | 		$response_code      = wp_remote_retrieve_response_code( $response );
287 | 		$this->content_type = wp_remote_retrieve_header( $response, 'content-type' );
288 | 		if ( in_array( $response_code, array( 403, 415 ), true ) ) {
289 | 			$args['user-agent'] = $user_agent;
290 | 			$response           = wp_safe_remote_get( $url, $args );
291 | 			$response_code      = wp_remote_retrieve_response_code( $response );
292 | 			if ( in_array( $response_code, array( 403, 415 ), true ) ) {
293 | 				return new WP_Error( 'source_error', 'Unable to Retrieve' );
294 | 			}
295 | 		}
296 | 		if ( is_array( $this->content_type ) ) {
297 | 			$this->content_type = array_pop( $this->content_type );
298 | 		}
299 | 						// Strip any character set off the content type
300 | 						$ct = explode( ';', $this->content_type );
301 | 		if ( is_array( $ct ) ) {
302 | 			$this->content_type = array_shift( $ct );
303 | 		}
304 | 						$this->content_type = trim( $this->content_type );
305 | 						// List of content types we know how to handle
306 | 		if ( ! self::supported_content( $this->content_type ) ) {
307 | 			return new WP_Error( 'content-type', 'Content Type is Not Supported', array( 'content-type' => $content_type ) );
308 | 		}
309 | 
310 | 		$content = wp_remote_retrieve_body( $response );
311 | 
312 | 		// This is an RSS or Atom Feed URL and if it is not we do not know how to deal with XML anyway
313 | 		if ( class_exists( 'Parse_This_RSS' ) && ( in_array( $this->content_type, array( 'application/rss+xml', 'application/atom+xml', 'text/xml', 'application/xml', 'text/xml' ), true ) ) ) {
314 | 			// Get a SimplePie feed object from the specified feed source.
315 | 			$content = self::fetch_feed( $url );
316 | 			if ( is_wp_error( $content ) ) {
317 | 				return false;
318 | 			}
319 | 
320 | 			$this->set( $content, $url, true );
321 | 			return true;
322 | 		}
323 | 
324 | 		if ( in_array( $this->content_type, array( 'application/mf2+json', 'application/jf2+json', 'application/jf2feed+json' ), true ) ) {
325 | 			$content = json_decode( $content, true );
326 | 			return true;
327 | 		}
328 | 
329 | 		if ( in_array( $this->content_type, array( 'application/feed+json', 'application/json' ), true ) ) {
330 | 			$content = json_decode( $content, true );
331 | 
332 | 			if ( class_exists( 'Parse_This_JSONFeed' ) && isset( $content['version'] ) && false !== strpos( $content['version'], 'https://jsonfeed.org/version/' ) ) {
333 | 				$content = Parse_This_JSONFeed::to_jf2( $content, $url );
334 | 				$this->set( $content, $url, true );
335 | 				// This means we are probing a specific REST Endpoint as they return this.
336 | 			} elseif ( wp_remote_retrieve_header( $response, 'x-wp-total' ) ) {
337 | 				$content           = Parse_This_RESTAPI::posts_to_feed( $content, $url );
338 | 				$content['_total'] = wp_remote_retrieve_header( $response, 'x-wp-total' );
339 | 				$content['_pages'] = wp_remote_retrieve_header( $response, 'x-wp-totalpages' );
340 | 
341 | 				$this->set( $content, $url, true );
342 | 			}
343 | 		}
344 | 
345 | 		$this->set( $content, $url, ( 'application/jf2+json' === $this->content_type ) );
346 | 		return true;
347 | 	}
348 | 
349 | 	public function parse( $args = array() ) {
350 | 		$defaults = array(
351 | 			'alternate'  => false, // check for rel-alternate jf2 or mf2 feed
352 | 			'return'     => 'single', // Options are single, feed or TBC mention
353 | 			'follow'     => false, // If set to true h-card and author properties with external urls will be retrieved parsed and merged into the return
354 | 			'limit'      => 150, // Limit the number of children returned.
355 | 			'jsonld'     => true,  // Try JSON-LD parsing
356 | 			'html'       => true, // If mf2 parsing does not work look for html parsing which includes OGP, meta tags, and title tags
357 | 			'references' => true, // Store nested citations as references per the JF2 spec
358 | 			'location'   => false, // Collapse location parameters in jf2. Specifically, location will be a string and latitude, longitude, and altitude will be set as h-entry properties.
359 | 		);
360 | 		$args     = wp_parse_args( $args, $defaults );
361 | 		// If not an option then revert to single
362 | 		if ( ! in_array( $args['return'], array( 'single', 'feed' ), true ) ) {
363 | 			$args['return'] = 'single';
364 | 		}
365 | 		if ( class_exists( 'Parse_This_RSS' ) && $this->content instanceof SimplePie ) {
366 | 			$this->jf2 = Parse_This_RSS::parse( $this->content, $this->url );
367 | 
368 | 			return;
369 | 		} elseif ( $this->doc instanceof DOMDocument ) {
370 | 			$content = $this->doc;
371 | 		} else {
372 | 			$content = $this->content;
373 | 		}
374 | 		if ( ! $content ) {
375 | 			return new WP_Error( 'Missing Content' );
376 | 		}
377 | 
378 | 		if ( 'application/json' === $this->content_type ) {
379 | 			$this->jf2 = Parse_This_RESTAPI::parse( $content, $this->url, $args );
380 | 			if ( ! empty( $this->jf2 ) ) {
381 | 				$this->jf2['_rest'] = $content;
382 | 				return;
383 | 			}
384 | 		}
385 | 
386 | 		if ( ! is_array( $this->jf2 ) ) {
387 | 			$this->jf2 = array(
388 | 				'raw' => $this->jf2,
389 | 				'url' => $this->url,
390 | 			);
391 | 			return;
392 | 		}
393 | 
394 | 		// Ensure not already preparsed
395 | 		if ( empty( $this->jf2 ) ) {
396 | 			$this->jf2 = Parse_This_MF2::parse( $content, $this->url, $args );
397 | 		}
398 | 
399 | 		$more = array();
400 | 
401 | 		// If No MF2 or if the parsed jf2 is missing any sort of content then try to find it in the HTML
402 | 		if ( isset( $this->jf2['type'] ) && 'card' === $this->jf2['type'] ) {
403 | 			$more = array_intersect( array_keys( $this->jf2 ), array( 'name', 'url', 'photo' ) );
404 | 		} else {
405 | 			$more = array_intersect( array_keys( $this->jf2 ), array( 'summary', 'content', 'refs', 'items' ) );
406 | 			if ( empty( $more ) ) {
407 | 				$this->set( array( '_jf2' => $this->jf2 ), $this->url, true );
408 | 			}
409 | 		}
410 | 		if ( ! isset( $this->jf2['url'] ) ) {
411 | 			$this->jf2['url'] = $this->url;
412 | 		}
413 | 
414 | 		if ( empty( $more ) ) {
415 | 			$alt = null;
416 | 			$jf2 = $this->jf2['_jf2'];
417 | 
418 | 			$empty = true;
419 | 
420 | 			if ( ! empty( $this->links ) ) {
421 | 				$endpoint = pt_find_rest_endpoint( $this->links );
422 | 				$rest     = pt_find_rest_alternate( $this->links );
423 | 				if ( $endpoint && $rest ) {
424 | 					$empty        = false;
425 | 					$path         = Parse_This_RESTAPI::get_rest_path( $endpoint, $rest );
426 | 					$fetch        = Parse_This_RESTAPI::fetch( $endpoint, $path );
427 | 					$alt          = Parse_This_RESTAPI::parse( $fetch, $endpoint, $args );
428 | 					$alt['_rest'] = $fetch;
429 | 				}
430 | 			}
431 | 
432 | 			if ( $empty && $args['jsonld'] ) {
433 | 				$alt = Parse_This_JSONLD::parse( $this->doc, $this->url, $args );
434 | 			}
435 | 
436 | 			if ( empty( $alt ) ) {
437 | 				$empty = true;
438 | 			} elseif ( is_countable( $alt ) && 1 === count( $alt ) && array_key_exists( '_jsonld', $alt ) ) {
439 | 				$empty = true;
440 | 			} else {
441 | 				$empty = false;
442 | 			}
443 | 			if ( $empty && $args['html'] ) {
444 | 				$args['alternate'] = true;
445 | 				if ( in_array( wp_parse_url( $this->url, PHP_URL_HOST ), array( 'youtube.com', 'www.youtube.com', 'm.youtube.com', 'youtu.be' ), true ) ) {
446 | 					$alt = Parse_This_YouTube::parse( $this->content, $this->url, $args );
447 | 				} elseif ( in_array( wp_parse_url( $this->url, PHP_URL_HOST ), array( 'www.instagram.com', 'instagram.com' ), true ) ) {
448 | 					$alt = Parse_This_Instagram::parse( $this->doc, $this->url, $args );
449 | 				} elseif ( in_array( wp_parse_url( $this->url, PHP_URL_HOST ), array( 'twitter.com', 'mobile.twitter.com' ), true ) ) {
450 | 					$alt = Parse_This_Twitter::parse( $this->url, $args );
451 | 				}
452 | 				if ( ! $alt ) {
453 | 					$alt = Parse_This_HTML::parse( $content, $this->url, $args );
454 | 				}
455 | 			}
456 | 			$json      = Parse_This_JSON::parse( $this->doc, $this->url, $args );
457 | 			$this->jf2 = array_merge( $this->jf2, $json );
458 | 			$this->jf2 = array_merge( $this->jf2, $alt );
459 | 			if ( ! empty( $jf2 ) ) {
460 | 				if ( isset( $jf2['author'] ) ) {
461 | 					if ( isset( $this->jf2['author'] ) && is_string( $this->jf2['author'] ) ) {
462 | 						$jf2['author']['name'] = $this->jf2['author'];
463 | 					}
464 | 					$this->jf2['author']   = $jf2['author'];
465 | 				}
466 | 			}
467 | 			if ( isset( $alt['author'] ) && is_array( $this->jf2['author'] ) && ! wp_is_numeric_array( $this->jf2['author'] ) && ! isset( $this->jf2['author']['name'] ) ) {
468 | 				$this->jf2['author']['name'] = $alt['author'];
469 | 			}  
470 | 		} 
471 | 		if ( ! isset( $this->jf2['url'] ) ) {
472 | 			$this->jf2['url'] = $this->url;
473 | 		}
474 | 			// Expand Short URLs in summary
475 | 		if ( isset( $this->jf2['summary'] ) ) {
476 | 			$urls = wp_extract_urls( $this->jf2['summary'] );
477 | 			foreach ( $urls as $url ) {
478 | 				$redirect = self::redirect( $url );
479 | 				if ( $redirect && ! is_wp_error( $redirect ) ) {
480 | 					$this->jf2['_urls'][] = $redirect;
481 | 					$this->jf2['summary'] = str_replace( $url, $redirect, $this->jf2['summary'] );
482 | 				}
483 | 			}
484 | 		}
485 | 		if ( isset( $this->jf2['location'] ) && $args['location'] ) {
486 | 			$this->jf2 = jf2_location( $this->jf2 );
487 | 		}
488 | 
489 | 		$this->jf2['_links'] = $this->links;
490 | 	}
491 | }
492 | 


--------------------------------------------------------------------------------