├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── example ├── main.dart └── parse_document.dart ├── lib ├── metadata_fetch.dart └── src │ ├── metadata_fetch_base.dart │ ├── parsers │ ├── base_parser.dart │ ├── htmlmeta_parser.dart │ ├── jsonld_parser.dart │ ├── metadata_parser.dart │ ├── opengraph_parser.dart │ ├── parsers.dart │ └── twittercard_parser.dart │ └── utils │ └── util.dart ├── pubspec.yaml └── test └── metadata_fetch_test.dart /.gitignore: -------------------------------------------------------------------------------- 1 | .dart_tool/ 2 | .packages 3 | pubspec.lock 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.4.2 2 | - Updated SDK constraint to `">=2.12.0 <4.0.0"` 3 | - Updated dependencies: 4 | - `http` from `^0.13.3` to `^1.2.2` 5 | - `string_validator` from `^0.3.0` to `^1.1.0` 6 | - `html` from `^0.15.0` to `^0.15.4` 7 | - Replaced `pedantic` with `lints` `^4.0.0` 8 | - `test` from `^1.17.5` to `^1.25.8` 9 | 10 | ## 0.4.1 11 | - Improve Documentation 12 | 13 | ## 0.4.0 14 | 15 | - NNBD (Null Safety) enabled 16 | - Various API changes 17 | - `extract()` and `responsetoDocument` are now encapsulated inside the `MetadataFetch` class. This is to avoid global clashes when imported. 18 | - Removed the `requestURL` extension which served as a static variable attached to `http.Document`, which caused problems. 19 | - `Metadata.parse(document, url: myURL)` now accepts the `url` keyword argument. This is used as a fallback url in `Metadata.url` and to resolve relative (non-absolute URL) images. 20 | 21 | ## 0.3.4 22 | 23 | - Fix resolution of relative URL for images 24 | 25 | ## 0.3.3 26 | 27 | - Relative image url now uses the absolute path 28 | 29 | ## 0.3.2 30 | 31 | - Improved JsonLD Parser 32 | 33 | ## 0.3.1 34 | 35 | - Added JSON serialization 36 | 37 | ## 0.3.0 38 | 39 | - Added Twitter Card Parser 40 | - Metadata structure now includes url 41 | 42 | ## 0.2.1 43 | 44 | - Minor Improvements 45 | 46 | ## 0.2.0 47 | 48 | - Improve API and generalized the Metadata Parser 49 | - Added more documentation 50 | 51 | ## 0.1.1 52 | 53 | - Various improvements throughout 54 | 55 | ## 0.1.0 56 | 57 | - Initial version, created by Stagehand 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 Jethro Lising 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Metadata Fetch 2 | A dart library for extracting metadata in web pages. Supports OpenGraph, Meta, Twitter Cards, and Structured Data (Json-LD) 3 | 4 | Available on Pub Dev: 5 | [Pub](https://pub.dev/packages/metadata_fetch) 6 | 7 | ## Metadata Structure 8 | 9 | ```yaml 10 | Metadata: 11 | - title 12 | - description 13 | - image 14 | - url 15 | ``` 16 | 17 | ## Usage 18 | 19 | 20 | ### Extract Metadata for a given URL 21 | 22 | ```dart 23 | import 'package:metadata_fetch/metadata_fetch.dart'; 24 | 25 | main() async { 26 | final myURL = 'https://flutter.dev'; 27 | 28 | // Use the `MetadataFetch.extract()` function to fetch data from the url 29 | var data = await MetadataFetch.extract(myURL); 30 | 31 | print(data.title) // Flutter - Beautiful native apps in record time 32 | 33 | print(data.description) // Flutter is Google's UI toolkit for crafting beautiful... 34 | 35 | print(data.image) // https://flutter.dev/images/flutter-logo-sharing.png 36 | 37 | print(data.url) // https://flutter.dev/ 38 | 39 | var dataAsMap = data.toMap(); 40 | 41 | 42 | } 43 | ``` 44 | 45 | ### Parsing Manually 46 | 47 | #### Get aggregated Metadata from a document 48 | 49 | This method prioritizes Open Graph data, followed by Twitter Card, JSON-LD and finally falls back to HTML metadata. 50 | 51 | 52 | ```dart 53 | import 'package:metadata_fetch/metadata_fetch.dart'; 54 | import 'package:http/http.dart' as http; 55 | 56 | void main () async { 57 | 58 | final myURL = 'https://flutter.dev'; 59 | 60 | // makes a call 61 | var response = await http.get(myURL); 62 | 63 | // Convert Response to a Document. The utility function `MetadataFetch.responseToDocument` is provided or you can use own decoder/parser. 64 | var document = MetadataFetch.responseToDocument(response); 65 | 66 | 67 | // get aggregated metadata 68 | var data = MetadataParser.parse(document); 69 | print(data); 70 | 71 | 72 | } 73 | 74 | ``` 75 | 76 | #### Manually specify which Metadata parser to use 77 | 78 | ```dart 79 | import 'package:metadata_fetch/metadata_fetch.dart'; 80 | import 'package:http/http.dart' as http; 81 | 82 | void main () async { 83 | 84 | final myURL = 'https://flutter.dev'; 85 | 86 | // Makes a call 87 | var response = await http.get(myURL); 88 | 89 | // Convert Response to a Document. The utility function `responseToDocument` is provided or you can use own decoder/parser. 90 | var document = responseToDocument(response); 91 | 92 | 93 | // Get OpenGraph Metadata 94 | var ogData = MetadataParser.OpenGraph(document); 95 | print(ogData); 96 | 97 | // Get Html metadata 98 | var htmlData = MetadataParser.HtmlMeta(document); 99 | print(htmlData); 100 | 101 | // Get Structured Data 102 | var structuredData = MetadataParser.JsonLdSchema(document); 103 | print(structuredData); 104 | 105 | // Get Twitter Cards Data 106 | var twitterCardData = MetadataParser.TwitterCard(document); 107 | print(twitterCardData); 108 | 109 | } 110 | ``` 111 | 112 | #### Provide a fallback url when manually parsing 113 | 114 | If the parsers cannot extract a URL from the document, you may optionally provide a URL in `MetadataFetch.parse()`. 115 | 116 | This URL will be added in the final `Metadata` structure, and is used to resolve images with relative URLs (non-absolute URLs). 117 | 118 | ```dart 119 | import 'package:metadata_fetch/metadata_fetch.dart'; 120 | import 'package:http/http.dart' as http; 121 | 122 | void main () async { 123 | 124 | final myURL = 'https://flutter.dev'; 125 | 126 | // makes a call 127 | var response = await http.get(myURL); 128 | 129 | // Convert Response to a Document. The utility function `MetadataFetch.responseToDocument` is provided or you can use own decoder/parser. 130 | var document = MetadataFetch.responseToDocument(response); 131 | 132 | 133 | // get aggregated metadata, supplying a fallback URL 134 | // Used for images with relative URLs 135 | var data = MetadataParser.parse(document, url:myURL); 136 | print(data); 137 | 138 | } 139 | 140 | ``` 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | ## Credit 149 | This library is inspired by [open_graph_parser](https://github.com/Patte1808/open_graph_parser). 150 | However this one tries to be more general. 151 | 152 | 153 | ## Roadmap 154 | - Weighted or Preferred Metadata. Can assign custom weights for each parser to provide a fallback priority sytem 155 | - Improve Documentation 156 | 157 | 158 | ## Questions, Bugs, and Feature Requests 159 | Please forward all queries about this project to the [issue tracker](https://github.com/jg-l/metadata_fetch/issues). 160 | 161 | -------------------------------------------------------------------------------- /example/main.dart: -------------------------------------------------------------------------------- 1 | import 'package:metadata_fetch/metadata_fetch.dart'; 2 | 3 | void main() async { 4 | var data = await MetadataFetch.extract( 5 | 'https://flutter.dev'); // returns a Metadata object 6 | print(data); // Metadata.toString() 7 | print(data?.title); // Metadata.title 8 | print(data?.toMap()); // converts Metadata to map 9 | print(data?.toJson()); // converts Metadata to JSON 10 | } 11 | -------------------------------------------------------------------------------- /example/parse_document.dart: -------------------------------------------------------------------------------- 1 | import 'package:metadata_fetch/metadata_fetch.dart'; 2 | import 'package:http/http.dart' as http; 3 | 4 | void main() async { 5 | var url = 'https://flutter.dev'; 6 | var response = await http.get(Uri.parse(url)); 7 | var document = MetadataFetch.responseToDocument(response); 8 | 9 | // Provide a url fallback if no urls were extracted 10 | var data = MetadataParser.parse(document, url: url); 11 | print(data); 12 | 13 | // Just Opengraph 14 | var og = MetadataParser.openGraph(document); 15 | print(og); 16 | 17 | var hm = MetadataParser.htmlMeta(document); 18 | print(hm); 19 | 20 | var js = MetadataParser.jsonLdSchema(document); 21 | print(js); 22 | 23 | var twitter = MetadataParser.twitterCard(document); 24 | print(twitter); 25 | } 26 | -------------------------------------------------------------------------------- /lib/metadata_fetch.dart: -------------------------------------------------------------------------------- 1 | /// This library provides two Metadata Parsers and two utility functions for retriveing and parsing documents from a url. 2 | /// 3 | /// [OpenGraphParser] and [HTMLMetaParser] are Metadata parsers that takes in a [dom.Document] 4 | /// Utility functions [extract] and [responseToDocument] help retrieving and decoding documents. 5 | library metadata_fetch; 6 | 7 | export 'src/metadata_fetch_base.dart'; 8 | export 'src/parsers/parsers.dart'; 9 | -------------------------------------------------------------------------------- /lib/src/metadata_fetch_base.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | import 'package:html/dom.dart'; 3 | import 'package:http/http.dart' as http; 4 | import 'package:html/parser.dart' as parser; 5 | import 'package:metadata_fetch/metadata_fetch.dart'; 6 | import 'package:metadata_fetch/src/parsers/parsers.dart'; 7 | import 'package:metadata_fetch/src/utils/util.dart'; 8 | import 'package:string_validator/string_validator.dart'; 9 | 10 | class MetadataFetch { 11 | /// Fetches a [url], validates it, and returns [Metadata]. 12 | static Future extract(String url) async { 13 | if (!isURL(url)) { 14 | return null; 15 | } 16 | 17 | /// Sane defaults; Always return the Domain name as the [title], and a [description] for a given [url] 18 | final defaultOutput = Metadata(); 19 | defaultOutput.title = getDomain(url); 20 | defaultOutput.description = url; 21 | 22 | // Make our network call 23 | final response = await http.get(Uri.parse(url)); 24 | final headerContentType = response.headers['content-type']; 25 | 26 | if (headerContentType != null && headerContentType.startsWith(r'image/')) { 27 | defaultOutput.title = ''; 28 | defaultOutput.description = ''; 29 | defaultOutput.image = url; 30 | return defaultOutput; 31 | } 32 | 33 | final document = responseToDocument(response); 34 | 35 | if (document == null) { 36 | return defaultOutput; 37 | } 38 | 39 | final data = _extractMetadata(document); 40 | if (data == null) { 41 | return defaultOutput; 42 | } 43 | 44 | return data; 45 | } 46 | 47 | /// Takes an [http.Response] and returns a [html.Document] 48 | static Document? responseToDocument(http.Response response) { 49 | if (response.statusCode != 200) { 50 | return null; 51 | } 52 | 53 | Document? document; 54 | try { 55 | document = parser.parse(utf8.decode(response.bodyBytes)); 56 | } catch (err) { 57 | return document; 58 | } 59 | 60 | return document; 61 | } 62 | 63 | /// Returns instance of [Metadata] with data extracted from the [html.Document] 64 | /// Provide a given url as a fallback when there are no Document url extracted 65 | /// by the parsers. 66 | /// 67 | /// Future: Can pass in a strategy i.e: to retrieve only OpenGraph, or OpenGraph and Json+LD only 68 | static Metadata? _extractMetadata(Document document, {String? url}) { 69 | return MetadataParser.parse(document, url: url); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /lib/src/parsers/base_parser.dart: -------------------------------------------------------------------------------- 1 | /// The base class for implementing a parser 2 | 3 | mixin MetadataKeys { 4 | static const keyTitle = 'title'; 5 | static const keyDescription = 'description'; 6 | static const keyImage = 'image'; 7 | static const keyUrl = 'url'; 8 | } 9 | 10 | mixin BaseMetadataParser { 11 | String? title; 12 | String? description; 13 | String? image; 14 | String? url; 15 | 16 | Metadata parse() { 17 | final m = Metadata(); 18 | m.title = title; 19 | m.description = description; 20 | m.image = image; 21 | m.url = url; 22 | return m; 23 | } 24 | } 25 | 26 | /// Container class for Metadata 27 | class Metadata with BaseMetadataParser, MetadataKeys { 28 | bool get hasAllMetadata { 29 | return (title != null && 30 | description != null && 31 | image != null && 32 | url != null); 33 | } 34 | 35 | @override 36 | String toString() { 37 | return toMap().toString(); 38 | } 39 | 40 | Map toMap() { 41 | return { 42 | MetadataKeys.keyTitle: title, 43 | MetadataKeys.keyDescription: description, 44 | MetadataKeys.keyImage: image, 45 | MetadataKeys.keyUrl: url, 46 | }; 47 | } 48 | 49 | Map toJson() { 50 | return toMap(); 51 | } 52 | 53 | static Metadata fromJson(Map json) { 54 | final m = Metadata(); 55 | m.title = json[MetadataKeys.keyTitle]; 56 | m.description = json[MetadataKeys.keyDescription]; 57 | m.image = json[MetadataKeys.keyImage]; 58 | m.url = json[MetadataKeys.keyUrl]; 59 | return m; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /lib/src/parsers/htmlmeta_parser.dart: -------------------------------------------------------------------------------- 1 | import 'package:html/dom.dart'; 2 | import 'package:metadata_fetch/metadata_fetch.dart'; 3 | import 'package:metadata_fetch/src/utils/util.dart'; 4 | 5 | import 'base_parser.dart'; 6 | 7 | /// Takes a [http.document] and parses [Metadata] from [, , <img>] tags 8 | class HtmlMetaParser with BaseMetadataParser { 9 | /// The [document] to be parse 10 | final Document? _document; 11 | 12 | HtmlMetaParser(this._document); 13 | 14 | /// Get the [Metadata.title] from the [<title>] tag 15 | @override 16 | String? get title => _document?.head?.querySelector('title')?.text; 17 | 18 | /// Get the [Metadata.description] from the <meta name="description" content=""> tag 19 | @override 20 | String? get description => _document?.head 21 | ?.querySelector("meta[name='description']") 22 | ?.attributes 23 | .get('content'); 24 | 25 | /// Get the [Metadata.image] from the first <img> tag in the body;s 26 | @override 27 | String? get image => 28 | _document?.body?.querySelector('img')?.attributes.get('src'); 29 | 30 | @override 31 | String toString() => parse().toString(); 32 | } 33 | -------------------------------------------------------------------------------- /lib/src/parsers/jsonld_parser.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | 3 | import 'package:html/dom.dart'; 4 | import 'package:metadata_fetch/src/utils/util.dart'; 5 | 6 | import 'base_parser.dart'; 7 | 8 | /// Takes a [http.document] and parses [Metadata] from `json-ld` data in `<script>` 9 | class JsonLdParser with BaseMetadataParser { 10 | /// The [document] to be parse 11 | Document? document; 12 | dynamic _jsonData; 13 | 14 | JsonLdParser(this.document) { 15 | _jsonData = _parseToJson(document); 16 | } 17 | 18 | dynamic _parseToJson(Document? document) { 19 | final data = document?.head 20 | ?.querySelector("script[type='application/ld+json']") 21 | ?.innerHtml; 22 | if (data == null) { 23 | return null; 24 | } 25 | var d = jsonDecode(data); 26 | return d; 27 | } 28 | 29 | /// Get the [Metadata.title] from the [<title>] tag 30 | @override 31 | String? get title { 32 | final data = _jsonData; 33 | if (data is List) { 34 | return data.first['name']; 35 | } else if (data is Map) { 36 | return data.get('name') ?? data.get('headline'); 37 | } 38 | return null; 39 | } 40 | 41 | /// Get the [Metadata.description] from the <meta name="description" content=""> tag 42 | @override 43 | String? get description { 44 | final data = _jsonData; 45 | if (data is List) { 46 | return data.first['description'] ?? data.first['headline']; 47 | } else if (data is Map) { 48 | return data.get('description') ?? data.get('headline'); 49 | } 50 | return null; 51 | } 52 | 53 | /// Get the [Metadata.image] from the first <img> tag in the body;s 54 | @override 55 | String? get image { 56 | final data = _jsonData; 57 | if (data is List && data.isNotEmpty) { 58 | return _imageResultToString(data.first['logo'] ?? data.first['image']); 59 | } else if (data is Map) { 60 | return _imageResultToString( 61 | data.getDynamic('logo') ?? data.getDynamic('image')); 62 | } 63 | 64 | return null; 65 | } 66 | 67 | String? _imageResultToString(dynamic result) { 68 | if (result is List && result.isNotEmpty) { 69 | result = result.first; 70 | } 71 | 72 | if (result is String) { 73 | return result; 74 | } 75 | 76 | return null; 77 | } 78 | 79 | @override 80 | String toString() => parse().toString(); 81 | } 82 | -------------------------------------------------------------------------------- /lib/src/parsers/metadata_parser.dart: -------------------------------------------------------------------------------- 1 | import 'package:html/dom.dart'; 2 | import 'package:metadata_fetch/metadata_fetch.dart'; 3 | 4 | /// Does Works with `BaseMetadataParser` 5 | class MetadataParser { 6 | /// This is the default strategy for building our [Metadata] 7 | /// 8 | /// It tries [OpenGraphParser], then [TwitterCardParser], then [JsonLdParser], and falls back to [HTMLMetaParser] tags for missing data. 9 | /// You may optionally provide a URL to the function, used to resolve relative images or to compensate for the lack of URI identifiers 10 | /// from the metadata parsers. 11 | static Metadata parse(Document? document, {String? url}) { 12 | final output = Metadata(); 13 | 14 | final parsers = [ 15 | openGraph(document), 16 | twitterCard(document), 17 | jsonLdSchema(document), 18 | htmlMeta(document), 19 | ]; 20 | 21 | for (final p in parsers) { 22 | output.title ??= p.title; 23 | output.description ??= p.description; 24 | output.image ??= p.image; 25 | output.url ??= p.url; 26 | 27 | if (output.hasAllMetadata) { 28 | break; 29 | } 30 | } 31 | // If the parsers did not extract a URL from the metadata, use the given 32 | // url, if available. This is used to attempt to resolve relative images. 33 | final _url = output.url ?? url; 34 | final image = output.image; 35 | if (_url != null && image != null) { 36 | output.image = Uri.parse(_url).resolve(image).toString(); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | static Metadata openGraph(Document? document) { 43 | return OpenGraphParser(document).parse(); 44 | } 45 | 46 | static Metadata htmlMeta(Document? document) { 47 | return HtmlMetaParser(document).parse(); 48 | } 49 | 50 | static Metadata jsonLdSchema(Document? document) { 51 | return JsonLdParser(document).parse(); 52 | } 53 | 54 | static Metadata twitterCard(Document? document) { 55 | return TwitterCardParser(document).parse(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /lib/src/parsers/opengraph_parser.dart: -------------------------------------------------------------------------------- 1 | import 'package:html/dom.dart'; 2 | import 'package:metadata_fetch/src/utils/util.dart'; 3 | 4 | import 'base_parser.dart'; 5 | 6 | /// Takes a [http.Document] and parses [Metadata] from [<meta property='og:*'>] tags 7 | class OpenGraphParser with BaseMetadataParser { 8 | final Document? _document; 9 | OpenGraphParser(this._document); 10 | 11 | /// Get [Metadata.title] from 'og:title' 12 | @override 13 | String? get title => getProperty( 14 | _document, 15 | property: 'og:title', 16 | ); 17 | 18 | /// Get [Metadata.description] from 'og:description' 19 | @override 20 | String? get description => getProperty( 21 | _document, 22 | property: 'og:description', 23 | ); 24 | 25 | /// Get [Metadata.image] from 'og:image' 26 | @override 27 | String? get image => getProperty( 28 | _document, 29 | property: 'og:image', 30 | ); 31 | 32 | /// Get [Metadata.url] from 'og:url' 33 | @override 34 | String? get url => getProperty( 35 | _document, 36 | property: 'og:url', 37 | ); 38 | 39 | @override 40 | String toString() => parse().toString(); 41 | } 42 | -------------------------------------------------------------------------------- /lib/src/parsers/parsers.dart: -------------------------------------------------------------------------------- 1 | // The parsers used by the library 2 | export 'base_parser.dart'; 3 | export 'metadata_parser.dart'; 4 | export 'htmlmeta_parser.dart'; 5 | export 'opengraph_parser.dart'; 6 | export 'jsonld_parser.dart'; 7 | export 'twittercard_parser.dart'; 8 | -------------------------------------------------------------------------------- /lib/src/parsers/twittercard_parser.dart: -------------------------------------------------------------------------------- 1 | import 'package:html/dom.dart'; 2 | import 'package:metadata_fetch/metadata_fetch.dart'; 3 | import 'package:metadata_fetch/src/utils/util.dart'; 4 | 5 | import 'base_parser.dart'; 6 | 7 | /// Takes a [http.Document] and parses [Metadata] from [<meta property='twitter:*'>] tags 8 | class TwitterCardParser with BaseMetadataParser { 9 | final Document? _document; 10 | TwitterCardParser(this._document); 11 | 12 | /// Get [Metadata.title] from 'twitter:title' 13 | @override 14 | String? get title => 15 | getProperty( 16 | _document, 17 | attribute: 'name', 18 | property: 'twitter:title', 19 | ) ?? 20 | getProperty( 21 | _document, 22 | property: 'twitter:title', 23 | ); 24 | 25 | /// Get [Metadata.description] from 'twitter:description' 26 | @override 27 | String? get description => 28 | getProperty( 29 | _document, 30 | attribute: 'name', 31 | property: 'twitter:description', 32 | ) ?? 33 | getProperty( 34 | _document, 35 | property: 'twitter:description', 36 | ); 37 | 38 | /// Get [Metadata.image] from 'twitter:image' 39 | @override 40 | String? get image => 41 | getProperty( 42 | _document, 43 | attribute: 'name', 44 | property: 'twitter:image', 45 | ) ?? 46 | getProperty( 47 | _document, 48 | property: 'twitter:image', 49 | ); 50 | 51 | /// Twitter Cards do not have a url property so get the url from [og:url], if available. 52 | @override 53 | String? get url => OpenGraphParser(_document).url; 54 | 55 | @override 56 | String toString() => parse().toString(); 57 | } 58 | -------------------------------------------------------------------------------- /lib/src/utils/util.dart: -------------------------------------------------------------------------------- 1 | import 'package:html/dom.dart'; 2 | 3 | extension GetMethod on Map { 4 | String? get(dynamic key) { 5 | var value = this[key]; 6 | if (value is List) return value.first; 7 | return value.toString(); 8 | } 9 | 10 | dynamic getDynamic(dynamic key) { 11 | return this[key]; 12 | } 13 | } 14 | 15 | String? getDomain(String url) { 16 | return Uri.parse(url).host.toString().split('.')[0]; 17 | } 18 | 19 | String? getProperty( 20 | Document? document, { 21 | String tag = 'meta', 22 | String attribute = 'property', 23 | String? property, 24 | String key = 'content', 25 | }) { 26 | return document 27 | ?.getElementsByTagName(tag) 28 | .cast<Element?>() 29 | .firstWhere((element) => element?.attributes[attribute] == property, 30 | orElse: () => null) 31 | ?.attributes 32 | .get(key); 33 | } 34 | -------------------------------------------------------------------------------- /pubspec.yaml: -------------------------------------------------------------------------------- 1 | name: metadata_fetch 2 | description: A dart library for extracting metadata on web pages such as OpenGraph, Meta, Twitter Cards, and Structured Data (Json-LD) 3 | version: 0.4.2 4 | homepage: https://github.com/jg-l/metadata_fetch 5 | 6 | environment: 7 | sdk: ">=2.12.0 <4.0.0" 8 | 9 | dependencies: 10 | http: ^1.2.2 11 | string_validator: ^1.1.0 12 | html: ^0.15.4 13 | 14 | dev_dependencies: 15 | lints: ^4.0.0 16 | test: ^1.25.8 17 | -------------------------------------------------------------------------------- /test/metadata_fetch_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:metadata_fetch/metadata_fetch.dart'; 2 | import 'package:html/parser.dart' as html; 3 | import 'package:http/http.dart' as http; 4 | import 'package:metadata_fetch/src/parsers/jsonld_parser.dart'; 5 | import 'package:metadata_fetch/src/parsers/parsers.dart'; 6 | import 'package:test/test.dart'; 7 | 8 | // TODO: Use a Mock Server for testing 9 | // TODO: Improve testing 10 | void main() { 11 | test('JSON Serialization', () async { 12 | final url = 'https://flutter.dev'; 13 | final response = await http.get(Uri.parse(url)); 14 | final document = MetadataFetch.responseToDocument(response); 15 | final data = MetadataParser.parse(document); 16 | print(data.toJson()); 17 | expect(data.toJson().isNotEmpty, true); 18 | }); 19 | 20 | test('Metadata Parser', () async { 21 | final url = 'https://flutter.dev'; 22 | final response = await http.get(Uri.parse(url)); 23 | final document = MetadataFetch.responseToDocument(response); 24 | 25 | final data = MetadataParser.parse(document); 26 | print(data); 27 | 28 | // Just Opengraph 29 | final og = MetadataParser.openGraph(document); 30 | print('OG $og'); 31 | 32 | // Just Html 33 | final hm = MetadataParser.htmlMeta(document); 34 | print('Html $hm'); 35 | 36 | // Just Json-ld schema 37 | final js = MetadataParser.jsonLdSchema(document); 38 | print('JSON $js'); 39 | 40 | final twitter = MetadataParser.twitterCard(document); 41 | print('Twitter $twitter'); 42 | }); 43 | group('Metadata parsers', () { 44 | test('JSONLD', () async { 45 | final url = 'https://www.epicurious.com/'; 46 | final response = await http.get(Uri.parse(url)); 47 | final document = MetadataFetch.responseToDocument(response); 48 | // print(response.statusCode); 49 | 50 | print(JsonLdParser(document)); 51 | }); 52 | 53 | test('JSONLD II', () async { 54 | final url = 55 | 'https://www.epicurious.com/expert-advice/best-soy-sauce-chefs-pick-article'; 56 | final response = await http.get(Uri.parse(url)); 57 | final document = MetadataFetch.responseToDocument(response); 58 | // print(response.statusCode); 59 | 60 | print(JsonLdParser(document)); 61 | }); 62 | 63 | test('JSONLD III', () async { 64 | final url = 65 | 'https://medium.com/@quicky316/install-flutter-sdk-on-windows-without-android-studio-102fdf567ce4'; 66 | final response = await http.get(Uri.parse(url)); 67 | final document = MetadataFetch.responseToDocument(response); 68 | // print(response.statusCode); 69 | 70 | print(JsonLdParser(document)); 71 | }); 72 | 73 | test('JSONLD IV', () async { 74 | final url = 'https://www.distilled.net/'; 75 | final response = await http.get(Uri.parse(url)); 76 | final document = MetadataFetch.responseToDocument(response); 77 | // print(response.statusCode); 78 | 79 | print(JsonLdParser(document)); 80 | }); 81 | test('HTML', () async { 82 | final url = 'https://flutter.dev'; 83 | final response = await http.get(Uri.parse(url)); 84 | final document = MetadataFetch.responseToDocument(response); 85 | print(response.statusCode); 86 | 87 | print(HtmlMetaParser(document).title); 88 | print(HtmlMetaParser(document).description); 89 | print(HtmlMetaParser(document).image); 90 | }); 91 | 92 | test('OpenGraph Parser', () async { 93 | final url = 'https://flutter.dev'; 94 | final response = await http.get(Uri.parse(url)); 95 | final document = MetadataFetch.responseToDocument(response); 96 | print(response.statusCode); 97 | 98 | print(OpenGraphParser(document)); 99 | print(OpenGraphParser(document).title); 100 | print(OpenGraphParser(document).description); 101 | print(OpenGraphParser(document).image); 102 | }); 103 | 104 | test('OpenGraph Youtube Test', () async { 105 | String url = 'https://www.youtube.com/watch?v=0jz0GAFNNIo'; 106 | final response = await http.get(Uri.parse(url)); 107 | final document = MetadataFetch.responseToDocument(response); 108 | print(OpenGraphParser(document)); 109 | print(OpenGraphParser(document).title); 110 | Metadata data = OpenGraphParser(document).parse(); 111 | expect(data.title, 'Drake - When To Say When & Chicago Freestyle'); 112 | expect( 113 | data.image, 'https://i.ytimg.com/vi/0jz0GAFNNIo/maxresdefault.jpg'); 114 | }); 115 | 116 | test('TwitterCard Parser', () async { 117 | final url = 118 | 'https://www.epicurious.com/expert-advice/best-soy-sauce-chefs-pick-article'; 119 | final response = await http.get(Uri.parse(url)); 120 | final document = MetadataFetch.responseToDocument(response); 121 | print(response.statusCode); 122 | 123 | print(TwitterCardParser(document)); 124 | print(TwitterCardParser(document).title); 125 | print(TwitterCardParser(document).description); 126 | print(TwitterCardParser(document).image); 127 | // Test the url 128 | print(TwitterCardParser(document).url); 129 | }); 130 | 131 | test('Faulty', () async { 132 | final url = 'https://google.ca'; 133 | final response = await http.get(Uri.parse(url)); 134 | final document = MetadataFetch.responseToDocument(response); 135 | print(response.statusCode); 136 | 137 | print(OpenGraphParser(document).title); 138 | print(OpenGraphParser(document).description); 139 | print(OpenGraphParser(document).image); 140 | 141 | print(HtmlMetaParser(document).title); 142 | print(HtmlMetaParser(document).description); 143 | print(HtmlMetaParser(document).image); 144 | }); 145 | }); 146 | 147 | group('MetadataFetch.extract()', () { 148 | test('First Test', () async { 149 | final url = 'https://flutter.dev'; 150 | final data = await MetadataFetch.extract(url); 151 | print(data); 152 | print(data?.description); 153 | print(data?.url); 154 | expect(data?.toMap().isEmpty, false); 155 | expect(data?.url, url + "/"); 156 | }); 157 | 158 | test('FB Test', () async { 159 | final data = await MetadataFetch.extract('https://facebook.com/'); 160 | expect(data?.toMap().isEmpty, false); 161 | }); 162 | 163 | test('Youtube Test', () async { 164 | Metadata? data = await MetadataFetch.extract( 165 | 'https://www.youtube.com/watch?v=0jz0GAFNNIo'); 166 | expect(data?.title, 'Drake - When To Say When & Chicago Freestyle'); 167 | expect( 168 | data?.image, 'https://i.ytimg.com/vi/0jz0GAFNNIo/maxresdefault.jpg'); 169 | }); 170 | 171 | test('Unicode Test', () async { 172 | final data = await MetadataFetch.extract('https://www.jpf.go.jp/'); 173 | expect(data?.toMap().isEmpty, false); 174 | }); 175 | 176 | test('Gooogle Test', () async { 177 | final data = await MetadataFetch.extract('https://google.ca'); 178 | expect(data?.toMap().isEmpty, false); 179 | expect(data?.title, 'google'); 180 | }); 181 | 182 | test('Invalid Url Test', () async { 183 | final data = await MetadataFetch.extract('https://google'); 184 | expect(data == null, true); 185 | }); 186 | 187 | final htmlPage = ''' 188 | <html> 189 | <head> 190 | <title>Test 191 | 192 | 193 | 194 | 195 | 196 | '''; 197 | 198 | test( 199 | "Image url without slash at beginning still results in valid url when falling back to html parser", 200 | () { 201 | final doc = html.parse(htmlPage); 202 | // Provide a url to be used as a fallback, when no url metadata is extracted from the Document. 203 | // Useful for relative images 204 | var data = MetadataParser.parse(doc, url: 'https://example.com/some/'); 205 | expect(data.image, equals('https://example.com/some/this/is/a/test.png')); 206 | }); 207 | 208 | test( 209 | "MetadataParser.parse(doc) works without a doc.requestUrl (relative URLs are just not resolved)", 210 | () { 211 | final doc = html.parse(htmlPage); 212 | final data = MetadataParser.parse(doc); 213 | expect(data.image, equals('this/is/a/test.png')); 214 | }); 215 | }); 216 | } 217 | --------------------------------------------------------------------------------