├── .gitignore ├── README.md ├── composer.json ├── example.php └── ogp └── Parser.php /.gitignore: -------------------------------------------------------------------------------- 1 | /nbproject/ 2 | .AppleDouble -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PHP Open Graph Library 2 | ====================== 3 | 4 | This is a very very simple PHP open graph parser. 5 | 6 | Pass it the contents of a web page, and it'll spit back an associated array of open graph tags and the value. 7 | 8 | Usage 9 | ----- 10 | 11 | Include the library and call Parser's ::parse() function. 12 | 13 | Example: 14 | 15 | ```php 16 | 17 | require_once('ogp/Parser.php'); 18 | 19 | $content = file_get_contents("https://www.youtube.com/watch?v=EIGGsZZWzZA"); 20 | 21 | print_r(\ogp\Parser::parse($content)); 22 | ``` 23 | 24 | Installation 25 | ------------ 26 | 27 | To install this in your project: 28 | 29 | ``` 30 | composer require mapkyca/php-ogp 31 | ``` 32 | 33 | Author 34 | ------ 35 | 36 | * Marcus Povey 37 | 38 | See 39 | --- 40 | 41 | * Me 42 | * Open Graph 43 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mapkyca/php-ogp", 3 | "version": "1.1.0", 4 | "description": "Simple PHP Open Graph Protocol parser library", 5 | "license": "GPL-2.0-only", 6 | "keywords": ["opengraph"], 7 | "authors": [ 8 | { 9 | "name": "Marcus Povey", 10 | "email": "marcus@marcus-povey.co.uk", 11 | "homepage": "https://www.marcus-povey.co.uk", 12 | "role": "Developer" 13 | } 14 | ], 15 | "require": { 16 | "php": ">=5.6.0" 17 | }, 18 | "autoload": { 19 | "psr-4": { 20 | "ogp\\": "ogp/" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /example.php: -------------------------------------------------------------------------------- 1 | 11 | * $content = file_get_contents("https://www.youtube.com/watch?v=EIGGsZZWzZA"); 12 | * 13 | * print_r(\ogp\Parser::parse($content)); 14 | * 15 | * 16 | * @author Marcus Povey 17 | * @licence GPL2 18 | */ 19 | 20 | namespace ogp { 21 | 22 | use DOMDocument; 23 | 24 | class Parser { 25 | 26 | /** 27 | * Parse content into an array. 28 | * @param $content html The HTML 29 | * @return array 30 | */ 31 | public static function parse($content) { 32 | 33 | $doc = new \DOMDocument(); 34 | 35 | // Fudge to handle a situation when an encoding isn't present 36 | if (strpos($content, 'xml encoding=')===false) 37 | $content = '' . $content; 38 | 39 | @$doc->loadHTML($content); 40 | 41 | $interested_in = ['og', 'fb', 'twitter']; // Open graph namespaces we're interested in (open graph + extensions) 42 | 43 | $ogp = []; 44 | 45 | // Open graph 46 | $metas = $doc->getElementsByTagName('meta'); 47 | if (!empty($metas)) { 48 | for ($n = 0; $n < $metas->length; $n++) { 49 | 50 | $meta = $metas->item($n); 51 | 52 | foreach (array('name', 'property') as $name) { 53 | $meta_bits = explode(':', $meta->getAttribute($name)); 54 | if (in_array($meta_bits[0], $interested_in)) { 55 | 56 | // If we're adding to an existing element, convert it to an array 57 | if (isset($ogp[$meta->getAttribute($name)]) && (!is_array($ogp[$meta->getAttribute($name)]))) 58 | $ogp[$meta_bits[0]][$meta->getAttribute($name)] = array($ogp[$meta->getAttribute($name)], $meta->getAttribute('content')); 59 | else if (isset($ogp[$meta->getAttribute($name)]) && (is_array($ogp[$meta->getAttribute($name)]))) 60 | $ogp[$meta_bits[0]][$meta->getAttribute($name)][] = $meta->getAttribute('content'); 61 | else 62 | $ogp[$meta_bits[0]][$meta->getAttribute($name)] = $meta->getAttribute('content'); 63 | } 64 | } 65 | } 66 | } 67 | 68 | // OEmbed 69 | $metas = $doc->getElementsByTagName('link'); 70 | if (!empty($metas)) { 71 | for ($n = 0; $n < $metas->length; $n++) { 72 | 73 | $meta = $metas->item($n); 74 | 75 | if (strtolower($meta->getAttribute('rel')) == 'alternate') { 76 | 77 | if (in_array(strtolower($meta->getAttribute('type')), ['application/json+oembed'])) { 78 | $ogp['oembed']['jsonp'][] = $meta->getAttribute('href'); 79 | } 80 | if (in_array(strtolower($meta->getAttribute('type')), ['text/json+oembed'])) { 81 | $ogp['oembed']['json'][] = $meta->getAttribute('href'); 82 | } 83 | if (in_array(strtolower($meta->getAttribute('type')), ['text/xml+oembed'])) { 84 | $ogp['oembed']['xml'][] = $meta->getAttribute('href'); 85 | } 86 | } 87 | } 88 | 89 | $ogp = self::parseTwitterOEmbed($metas, $ogp); 90 | } 91 | 92 | // Basics 93 | foreach (['title'] as $basic) { 94 | if (preg_match("#<$basic>(.*?)#siu", $content, $matches)) 95 | $ogp[$basic] = trim($matches[1], " \n"); 96 | } 97 | $metas = $doc->getElementsByTagName('meta'); 98 | if (!empty($metas)) { 99 | for ($n = 0; $n < $metas->length; $n++) { 100 | 101 | $meta = $metas->item($n); 102 | 103 | if (strtolower($meta->getAttribute('name')) == 'description') { 104 | $ogp['description'] = $meta->getAttribute('content'); 105 | } 106 | if (strtolower($meta->getAttribute('name')) == 'keywords') { 107 | $ogp['keywords'] = $meta->getAttribute('content'); 108 | } 109 | } 110 | } 111 | 112 | return $ogp; 113 | } 114 | 115 | /** 116 | * For Twitter API reference, 117 | * see https://developer.twitter.com/en/docs/twitter-api/v1/tweets/post-and-engage/api-reference/get-statuses-oembed 118 | */ 119 | private static function parseTwitterOEmbed(\DOMNodeList $metas, array $ogp): array 120 | { 121 | if (isset($ogp['oembed']['jsonp'])) { 122 | return $ogp; 123 | } 124 | 125 | $canonicalLinks = array_filter( 126 | // List link nodes 127 | iterator_to_array($metas), 128 | // Filter HTML link tags to preserve those having 129 | // a "rel" attribute and a "canonical" value for this attribute 130 | function ($meta) { 131 | $canonicalLinks = array_filter( 132 | array_values( 133 | // List DOM attributes for each link node 134 | iterator_to_array($meta->attributes) 135 | ), 136 | // Expect to find an attribute having 137 | // - "rel" name and 138 | // - "canonical" value 139 | function ($attr) { 140 | return $attr->name === 'rel' && $attr->value === 'canonical'; 141 | } 142 | ); 143 | 144 | return count($canonicalLinks) > 0; 145 | } 146 | ); 147 | 148 | if (count($canonicalLinks) >= 1) { 149 | // Reorder list from zero index 150 | /** @var \DOMNode[] $links */ 151 | $links = array_values($canonicalLinks); 152 | $firstCanonicalLink = $links[0]->getAttribute('href'); 153 | 154 | if ( 155 | !empty(trim($firstCanonicalLink)) 156 | && preg_match('#^https://(www\.|mobile\.)?twitter\.com#i', $firstCanonicalLink) === 1 157 | ) { 158 | $ogp['oembed'] = [ 159 | 'jsonp' => [ 160 | implode( 161 | [ 162 | 'https://publish.twitter.com/oembed?url=', 163 | $firstCanonicalLink, 164 | '&align=center' 165 | ] 166 | ) 167 | ] 168 | ]; 169 | } 170 | } 171 | 172 | return $ogp; 173 | } 174 | 175 | } 176 | 177 | } 178 | --------------------------------------------------------------------------------