├── logos ├── logo-box-builtby.png └── logo-box-madefor.png ├── filters.js ├── .gitignore ├── .eslintrc ├── package.json ├── lib └── fb.js ├── CHANGELOG.md ├── oembed.js ├── index.js ├── tests └── test.js └── README.md /logos/logo-box-builtby.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apostrophecms/oembetter/main/logos/logo-box-builtby.png -------------------------------------------------------------------------------- /logos/logo-box-madefor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apostrophecms/oembetter/main/logos/logo-box-madefor.png -------------------------------------------------------------------------------- /filters.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | before: [], 3 | after: [ 4 | require('./lib/fb.js') 5 | ], 6 | fallback: [] 7 | }; 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Exclude the lock file so dependencies stay up-to-date. 2 | package-lock.json 3 | npm-debug.log 4 | *.DS_Store 5 | node_modules 6 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "apostrophe" 4 | ], 5 | "env": { 6 | "mocha": true 7 | }, 8 | "rules": { 9 | "no-var": "error", 10 | "no-console": 0, 11 | "multiline-ternary": "off", 12 | "no-unused-vars": [ 13 | "error", 14 | { "varsIgnorePattern": "^_.", "args": "none" } 15 | ] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "oembetter", 3 | "version": "1.1.4", 4 | "description": "A modern oembed client. Allows you to register filters to improve or supply oembed support for sites that don't normally have it. You can also supply a allowlist of services you trust to prevent XSS attacks.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "eslint --ext .js . && eslint && cd tests && mocha test" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/apostrophecms/oembetter" 12 | }, 13 | "keywords": [ 14 | "oembed", 15 | "oembetter", 16 | "media", 17 | "embed" 18 | ], 19 | "author": "Apostrophe Technologies", 20 | "license": "MIT", 21 | "bugs": { 22 | "url": "https://github.com/apostrophecms/oembetter/issues" 23 | }, 24 | "homepage": "https://github.com/apostrophecms/oembetter", 25 | "dependencies": { 26 | "async": "^0.9.0", 27 | "cheerio": "^1.0.0-rc.10", 28 | "fast-xml-parser": "^4.2.2", 29 | "node-fetch": "^2.6.7", 30 | "urls": "0.0.4" 31 | }, 32 | "devDependencies": { 33 | "eslint": "^7.25.0", 34 | "eslint-config-apostrophe": "^3.4.0", 35 | "eslint-plugin-n": "^15.2.1", 36 | "eslint-plugin-node": "^11.1.0", 37 | "eslint-plugin-promise": "^5.1.0", 38 | "mocha": "^10.0.0" 39 | } 40 | } -------------------------------------------------------------------------------- /lib/fb.js: -------------------------------------------------------------------------------- 1 | module.exports = function(url, options, response, callback) { 2 | 3 | // if it's a Facebook video, manipulate the response 4 | if (!(response.provider_name === 'Facebook')) { 5 | return setImmediate(callback); 6 | } 7 | 8 | // check if "fb-root" exists, if it doesn't, add it to the DOM 9 | response.html = response.html.replace(/
<\/div>\s*/, 10 | ''); 17 | 18 | // call FB.XFBML.parse() to render special Facebook markup (XFBML) 19 | response.html = response.html + 20 | ''; 23 | 24 | // FB doesn't include a thumbnail image URL, pull it manually from their graph API 25 | if (response.url) { 26 | const matches = response.url.match(/\/videos\/[^\d]*(\d+)/); 27 | if (matches) { 28 | response.thumbnail_url = 'https://graph.facebook.com/' + matches[1] + '/picture'; 29 | } 30 | } 31 | return callback(null, response); 32 | }; 33 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.1.4 (2024-08-07) 4 | 5 | - Followup to 1.1.3: also hardcode the YouTube oembed endpoint for sharing URLs (`youtu.be`). 6 | 7 | ## 1.1.3 (2024-08-07) 8 | 9 | - Hardcode the well-known YouTube oembed endpoint. While YouTube still does output oembed metadata, today many users saw broken videos due to broken canonical tags on YouTube pages (`link rel="undefined"`), which hampered discovery. Also this heavily used service will benefit from one less request per video. 10 | 11 | ## 1.1.2 (2024-07-10) 12 | 13 | - Removes `path` regex check on the vimeo endpoint to allow urls with and without `/video`. 14 | 15 | ## 1.1.1 - 2023-09-22 16 | 17 | - Hardcode the oembed endpoint for vimeo, which stopped offering oembed metadata on pages. 18 | 19 | ## 1.1.0 - 2023-05-03 20 | 21 | - Switched to `fast-xml-parser`, eliminating installation warnings about `xml2js`. 22 | 23 | ## 1.0.2 - 2022-12-21 24 | 25 | - Switched to `node-fetch`, eliminating installation warnings about the unsupported `request` module. 26 | - Switched to mocha 10.x, eliminating another set of installation warnings. 27 | - Internal implementation of `oembed.js` now uses `async/await` for better maintainability. 28 | 29 | ## 1.0.1 - 2020-06-30 30 | 31 | ### Fixes 32 | 33 | - Updates `cheerio` to the 1.0.0-rc version series to address a security vulnerability. 34 | 35 | ## 1.0.0 36 | renamed the `whitelist` and `suggestedWhitelist` properties to `allowlist` and `suggestedAllowlist`, respectively. Also introduced support for `options.headers`. 37 | 38 | ## 0.1.23 39 | workaround for YouTube bug in which video pages contain `http:` recommendations for oembed URLs, but an `http:` request is rejected with a 403 error. Force `https:` for YouTube. 40 | 41 | ## 0.1.22 42 | fixed URL parsing bugs impacting use of preconfigured endpoints that already contain some query string parameters. 43 | 44 | ## 0.1.21 45 | Updated links and information in the README. 46 | 47 | ## 0.1.20 48 | fixed a nuisance error that was appearing when Facebook was present but `window` was not the default object. 49 | 50 | ## 0.1.19 51 | unnecessary Facebook API logic was running on non-Facebook embeds due to a syntax mistake in 0.1.17. 52 | 53 | ## 0.1.18 54 | report HTTP errors properly rather than attempting to parse a nonexistent JSON body. Also, always try/catch when parsing JSON and report the exception as the callback error if necessary. 55 | 56 | ## 0.1.17 57 | Facebook oembed filter works regardless of whether Facebook's API has been initialized yet or not. 58 | 59 | ## 0.1.16 60 | Built in filter that handles Facebook oembed responses. 61 | 62 | ## 0.1.15 63 | allowlisted `facebook.com`, which has extensive oembed these days. 64 | 65 | ## 0.1.14 66 | bumped `cheerio` dependency to fix deprecation warnings. No behavior changes. 67 | 68 | ## 0.1.13 69 | relative URLs work with discovery. Thanks to Alejandro Torrado. 70 | 71 | ## 0.1.12 72 | (unchanged, npm publishing issue) 73 | 74 | ## 0.1.11 75 | don't crash when evaluating allowlists if `parsed.hostname` somehow manages not to be set. 76 | 77 | ## 0.1.10 78 | user agent string to please Facebook. Thanks to `equinox7`. 79 | 80 | ## 0.1.9 81 | the new `endpoints` option allows you to configure custom oembed API endpoints for services that don't advertise an endpoint or advertise it incorrectly. 82 | 83 | ## 0.1.7-0.1.8 84 | support SoundCloud. Added it to the suggested allowlist and added tolerance for their incorrect JSON content type. 85 | 86 | ## 0.1.6 87 | security improvement: 88 | reject all URLs that are not `http:` or `https:` completely, right up front. This means you don't have to protect against these obvious hacks in your `before` and `after` handlers. 89 | 90 | ## 0.1.5 91 | packaging issues, no changes. 92 | 93 | ## 0.1.4 94 | if the URL leads to a page with no oembed metadata, look for a `link rel="canonical"` tag and try that URL instead. Don't pursue this more than one step. 95 | 96 | Also, specify a user agent so that certain hosts don't give us watered-down HTML. 97 | 98 | ## 0.1.3 99 | added `youtu.be` to the suggested allowlist. 100 | -------------------------------------------------------------------------------- /oembed.js: -------------------------------------------------------------------------------- 1 | const fetch = require('node-fetch'); 2 | const { XMLParser } = require('fast-xml-parser'); 3 | 4 | const cheerio = require('cheerio'); 5 | 6 | let forceXml = false; 7 | 8 | module.exports = oembed; 9 | 10 | // The _canonical option is used internally to prevent 11 | // infinite recursion when retrying with a canonical URL. 12 | // Don't worry about it in your code. 13 | 14 | async function oembed(url, options, endpoint, callback, _canonical) { 15 | 16 | let oUrl; 17 | 18 | try { 19 | const { canonical, url } = await discover(); 20 | oUrl = url; 21 | if (canonical) { 22 | return oembed(canonical, options, endpoint, callback, true); 23 | } 24 | return callback(null, await retrieve()); 25 | } catch (e) { 26 | return callback(e); 27 | } 28 | 29 | async function discover() { 30 | let resultUrl; 31 | // if we're being told the endpoint, use it 32 | if (endpoint) { 33 | if (!options) { 34 | options = {}; 35 | } 36 | 37 | options.url = url; 38 | return { url: endpoint }; 39 | } 40 | 41 | // otherwise discover it 42 | const body = await get(url, { 43 | headers: Object.assign({ 44 | 'User-Agent': 'oembetter' 45 | }, options.headers || {}) 46 | }); 47 | const $ = cheerio.load(body); 48 | 49 | // 50 | 51 | // Allow for all the dumb stuff we've seen. 52 | // (Only application/json+oembed and 53 | // text/xmloembed are in the standard.) 54 | 55 | const ideas = [ 56 | 'link[type="application/json+oembed"]', 57 | 'link[type="text/json+oembed"]', 58 | 'link[type="application/xml+oembed"]', 59 | 'link[type="text/xml+oembed"]' 60 | ]; 61 | 62 | for (let i = 0; (i < ideas.length); i++) { 63 | const linkUrl = $(ideas[i]).attr('href'); 64 | if (linkUrl) { 65 | resultUrl = new URL(linkUrl, url); 66 | if (resultUrl.protocol === 'http:') { 67 | // Fix for YouTube's bug 12/15/20: issuing HTTP discovery URLs 68 | // but flunking them with a 403 when they arrive 69 | if (resultUrl.hostname.match(/youtube/) && resultUrl.hostname.match(/^http:/)) { 70 | resultUrl.protocol = 'https'; 71 | } 72 | } 73 | break; 74 | } 75 | } 76 | 77 | if (!resultUrl) { 78 | if (!_canonical) { 79 | // No oembed information here, however if 80 | // there is a canonical URL retry with that instead 81 | const canonical = $('link[rel="canonical"]').attr('href'); 82 | if (canonical && (canonical !== url)) { 83 | return { canonical }; 84 | } 85 | } 86 | throw new Error('no oembed discovery information available'); 87 | } 88 | return { url: resultUrl.toString() }; 89 | } 90 | 91 | async function retrieve() { 92 | // Just for testing - a lot of modern services 93 | // default to JSON and we want to make sure we 94 | // still test XML too 95 | if (forceXml) { 96 | oUrl = oUrl.replace('json', 'xml'); 97 | } 98 | if (options) { 99 | // make sure parsed.query is an object by passing true as 100 | // second argument 101 | const parsed = new URL(oUrl, url); 102 | const keys = Object.keys(options); 103 | keys.forEach(function(key) { 104 | if (key !== 'headers') { 105 | parsed.searchParams.set(key, options[key]); 106 | } 107 | }); 108 | // Clean up things url.format defaults to if they are already there, 109 | // ensuring that parsed.query is actually used 110 | delete parsed.href; 111 | delete parsed.search; 112 | oUrl = parsed.toString(); 113 | } 114 | const body = await get(oUrl, { 115 | headers: Object.assign({ 116 | 'User-Agent': 'oembetter' 117 | }, options.headers || {}) 118 | }); 119 | if (body[0] === '<') { 120 | return parseXmlString(body); 121 | } else { 122 | return JSON.parse(body); 123 | } 124 | } 125 | }; 126 | 127 | async function get(url, options) { 128 | const response = await fetch(url, options); 129 | if (response.status >= 400) { 130 | throw response; 131 | } 132 | return response.text(); 133 | } 134 | 135 | async function parseXmlString(body) { 136 | const parser = new XMLParser(); 137 | const response = parser.parse(body); 138 | if (!response.oembed) { 139 | throw new Error('XML response lacks oembed element'); 140 | } 141 | const result = response.oembed; 142 | result._xml = true; 143 | return result; 144 | } 145 | 146 | // For testing 147 | module.exports.setForceXml = function(flag) { 148 | forceXml = flag; 149 | }; 150 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const oembed = require('./oembed.js'); 2 | const async = require('async'); 3 | const filters = require('./filters.js'); 4 | 5 | module.exports = function(options) { 6 | 7 | const self = {}; 8 | 9 | if (!options) { 10 | options = {}; 11 | } 12 | 13 | self.before = filters.before.concat(options.before || []); 14 | self.after = filters.after.concat(options.after || []); 15 | self.fallback = filters.fallback.concat(options.fallback || []); 16 | 17 | self.fetch = function(url, options, callback) { 18 | let i; 19 | if (arguments.length === 2) { 20 | callback = options; 21 | options = {}; 22 | } 23 | 24 | if (url.match(/^\/\//)) { 25 | // Protocol-relative URLs are commonly found 26 | // in markup these days and can be upgraded 27 | // to https so that they work 28 | url = 'https:' + url; 29 | } 30 | let response; 31 | const warnings = []; 32 | let parsed; 33 | try { 34 | parsed = new URL(url); 35 | } catch (e) { 36 | return callback(new Error('oembetter: invalid URL: ' + url)); 37 | } 38 | if ((parsed.protocol !== 'http:') && (parsed.protocol !== 'https:')) { 39 | return callback(new Error('oembetter: URL is neither http nor https: ' + url)); 40 | } 41 | if (self._allowlist) { 42 | let good = false; 43 | for (i = 0; (i < self._allowlist.length); i++) { 44 | if (!parsed.hostname) { 45 | continue; 46 | } 47 | if (self.inDomain(self._allowlist[i], parsed.hostname)) { 48 | good = true; 49 | break; 50 | } 51 | } 52 | if (!good) { 53 | return callback(new Error('oembetter: ' + url + ' is not in an allowed domain.')); 54 | } 55 | } 56 | let endpoint = false; 57 | if (self._endpoints) { 58 | for (i = 0; i < self._endpoints.length; i++) { 59 | if (!parsed.hostname) { 60 | continue; 61 | } 62 | if (!self.inDomain(self._endpoints[i].domain, parsed.hostname)) { 63 | continue; 64 | } 65 | if (self._endpoints[i].path) { 66 | if ((!parsed.pathname) || (!parsed.pathname.match(self._endpoints[i].path))) { 67 | continue; 68 | } 69 | } 70 | endpoint = self._endpoints[i].endpoint; 71 | break; 72 | } 73 | } 74 | return async.series({ 75 | before: function(callback) { 76 | return async.eachSeries(self.before, function(before, callback) { 77 | return before(url, options, response, function(err, _url, _options, _response) { 78 | // Nonfatal 79 | if (err) { 80 | warnings.push(err); 81 | return callback(null); 82 | } 83 | url = _url || url; 84 | options = _options || options; 85 | response = _response || response; 86 | return callback(null); 87 | }); 88 | }, callback); 89 | }, 90 | fetch: function(callback) { 91 | if (response) { 92 | // Preempted by a before 93 | return callback(null); 94 | } 95 | return oembed(url, options, endpoint, function (err, result) { 96 | 97 | response = result; 98 | if (err) { 99 | // not necessarily fatal 100 | warnings.push(err); 101 | } 102 | return callback(null); 103 | }); 104 | }, 105 | fallback: function(fallbackCallback) { 106 | if (response) { 107 | return setImmediate(fallbackCallback); 108 | } 109 | return async.eachSeries(self.fallback, function(fallback, callback) { 110 | return fallback(url, options, function(err, _response) { 111 | if (err) { 112 | warnings.push(err); 113 | return callback(err); 114 | } 115 | response = _response || response; 116 | if (response) { 117 | // Stop trying fallbacks, we got one 118 | return fallbackCallback(null); 119 | } 120 | return callback(null); 121 | }); 122 | }, fallbackCallback); 123 | }, 124 | after: function(callback) { 125 | if (!response) { 126 | return setImmediate(callback); 127 | } 128 | return async.eachSeries(self.after, function(after, callback) { 129 | return after(url, options, response, function(err, _response) { 130 | if (err) { 131 | warnings.push(err); 132 | return callback(err); 133 | } 134 | response = _response || response; 135 | return callback(null); 136 | }); 137 | }, callback); 138 | } 139 | }, function(err) { 140 | // Handle fatal errors 141 | if (err) { 142 | return callback(err); 143 | } 144 | // If there is no response, treat the first 145 | // warning as a fatal error 146 | if (!response) { 147 | if (warnings.length) { 148 | return callback(warnings[0], warnings); 149 | } 150 | } 151 | 152 | // If there is a response, make the warnings available as the 153 | // third argument 154 | return callback(null, response, warnings); 155 | }); 156 | }; 157 | 158 | self.addBefore = function(fn) { 159 | self.before.push(fn); 160 | }; 161 | 162 | self.addAfter = function(fn) { 163 | self.after.push(fn); 164 | }; 165 | 166 | self.addFallback = function(fn) { 167 | self.fallback.push(fn); 168 | }; 169 | 170 | self.inDomain = function(domain, hostname) { 171 | 172 | hostname = hostname.toLowerCase(); 173 | domain = domain.toLowerCase(); 174 | if (hostname === domain) { 175 | return true; 176 | } 177 | if (hostname.substr(-domain.length - 1) === ('.' + domain)) { 178 | return true; 179 | } 180 | return false; 181 | }; 182 | 183 | self.allowlist = function(_allowlist) { 184 | self._allowlist = _allowlist; 185 | }; 186 | 187 | self.suggestedAllowlist = [ 188 | 'youtube.com', 189 | 'youtu.be', 190 | 'blip.tv', 191 | 'dailymotion.com', 192 | 'flickr.com', 193 | 'hulu.com', 194 | 'nfb.ca', 195 | 'qik.com', 196 | 'revision3.com', 197 | 'scribd.com', 198 | 'viddler.com', 199 | 'vimeo.com', 200 | 'youtube.com', 201 | 'dotsub.com', 202 | 'yfrog.com', 203 | 'photobucket.com', 204 | 'soundcloud.com', 205 | 'instagram.com', 206 | 'twitter.com', 207 | 'facebook.com' 208 | ]; 209 | 210 | self.suggestedEndpoints = [ 211 | { 212 | domain: 'instagram.com', 213 | endpoint: 'http://api.instagram.com/oembed' 214 | }, 215 | { 216 | domain: 'facebook.com', 217 | path: /\/videos\//, 218 | endpoint: 'https://www.facebook.com/plugins/video/oembed.json/' 219 | }, 220 | { 221 | domain: 'facebook.com', 222 | path: /\/posts\//, 223 | endpoint: 'https://www.facebook.com/plugins/post/oembed.json/' 224 | }, 225 | { 226 | domain: 'vimeo.com', 227 | endpoint: 'https://vimeo.com/api/oembed.json' 228 | }, 229 | { 230 | domain: 'youtube.com', 231 | endpoint: 'https://www.youtube.com/oembed' 232 | }, 233 | { 234 | domain: 'youtu.be', 235 | endpoint: 'https://www.youtube.com/oembed' 236 | } 237 | ]; 238 | 239 | self.endpoints = function(_endpoints) { 240 | self._endpoints = _endpoints; 241 | }; 242 | 243 | return self; 244 | }; 245 | -------------------------------------------------------------------------------- /tests/test.js: -------------------------------------------------------------------------------- 1 | 2 | const assert = require('assert'); 3 | const oembetter = require('../index.js')(); 4 | 5 | // For testing custom before filters 6 | oembetter.addBefore(function(url, options, response, callback) { 7 | const parsed = new URL(url); 8 | if (!oembetter.inDomain('hootenanny.com', parsed.hostname)) { 9 | return setImmediate(callback); 10 | } 11 | const matches = parsed.pathname.match(/pages\/(\d+).html/); 12 | if (!matches) { 13 | return setImmediate(callback); 14 | } 15 | const id = matches[1]; 16 | const newResponse = { 17 | thumbnail_url: 'http://hootenanny.com/thumbnails/' + id + '.jpg', 18 | html: '' 19 | }; 20 | return callback(null, url, options, newResponse); 21 | }); 22 | 23 | // For testing a before filter that just adjusts the URL 24 | oembetter.addBefore(function(url, options, response, callback) { 25 | const parsed = new URL(url); 26 | if (!oembetter.inDomain('wiggypants.com', parsed.hostname)) { 27 | return setImmediate(callback); 28 | } 29 | url = url.replace(/wiggypants\.com/g, 'jiggypants.com'); 30 | return callback(null, url); 31 | }); 32 | 33 | // just verifying that wiggypants became jiggypants 34 | oembetter.addBefore(function(url, options, response, callback) { 35 | const parsed = new URL(url); 36 | if (!oembetter.inDomain('jiggypants.com', parsed.hostname)) { 37 | return setImmediate(callback); 38 | } 39 | return callback(null, url, options, { html: 'so jiggy' }); 40 | }); 41 | 42 | // "after" filter can change a response 43 | oembetter.addAfter(function(url, options, response, callback) { 44 | const parsed = new URL(url); 45 | if (!oembetter.inDomain('jiggypants.com', parsed.hostname)) { 46 | return setImmediate(callback); 47 | } 48 | response.extra = 'extra'; 49 | return callback(null); 50 | }); 51 | 52 | // "fallback" filter can create a response when oembed fails 53 | oembetter.addFallback(function(url, options, callback) { 54 | const parsed = new URL(url); 55 | if (!oembetter.inDomain('wonkypants83742938.com', parsed.hostname)) { 56 | return setImmediate(callback); 57 | } 58 | return callback(null, { html: 'so wonky' }); 59 | }); 60 | 61 | // fallback filter for a working domain has no effect 62 | oembetter.addFallback(function(url, options, callback) { 63 | const parsed = new URL(url); 64 | if (!oembetter.inDomain('youtube.com', parsed.hostname)) { 65 | return setImmediate(callback); 66 | } 67 | return callback(null, { html: 'oopsie' }); 68 | }); 69 | 70 | describe('oembetter', function() { 71 | // youtube oembed can be sluggish 72 | this.timeout(10000); 73 | it('should be an object', function() { 74 | assert(oembetter); 75 | }); 76 | it('should return no response gracefully for apostrophecms.com', function(done) { 77 | oembetter.fetch('http://apostrophecms.com/', function(err, response) { 78 | assert(err); 79 | return done(); 80 | }); 81 | }); 82 | it('should return an oembed response for youtube full links', function(done) { 83 | const oembetter = require('../index.js')(); 84 | // Use the suggested endpoints, youtube sometimes has discovery issues 85 | // so we always do this in production 86 | oembetter.endpoints(oembetter.suggestedEndpoints); 87 | oembetter.fetch('https://www.youtube.com/watch?v=zsl_auoGuy4', function(err, response) { 88 | assert(!err); 89 | assert(response); 90 | assert(response.html); 91 | done(); 92 | }); 93 | }); 94 | it('should return an oembed response for youtube sharing links', function(done) { 95 | const oembetter = require('../index.js')(); 96 | // Use the suggested endpoints, youtube sometimes has discovery issues 97 | // so we always do this in production 98 | oembetter.endpoints(oembetter.suggestedEndpoints); 99 | oembetter.fetch('https://youtu.be/RRfHbyCQDCo?si=U5yxvQeXgACwajqa', function(err, response) { 100 | assert(!err); 101 | assert(response); 102 | assert(response.html); 103 | done(); 104 | }); 105 | }); 106 | it('should return an oembed response for youtube with forced use of XML', function(done) { 107 | require('../oembed.js').setForceXml(true); 108 | oembetter.fetch('https://www.youtube.com/watch?v=zsl_auoGuy4', function(err, response) { 109 | require('../oembed.js').setForceXml(false); 110 | assert(!err); 111 | assert(response); 112 | assert(response.html); 113 | assert(response._xml); 114 | done(); 115 | }); 116 | }); 117 | it('should respect a custom before filter', function(done) { 118 | oembetter.fetch('http://hootenanny.com/pages/50.html', function(err, response) { 119 | assert(!err); 120 | assert(response); 121 | assert(response.html); 122 | assert(response.html === ''); 123 | return done(); 124 | }); 125 | }); 126 | it('inDomain method should handle a subdomain properly', function(done) { 127 | oembetter.fetch('http://www.hootenanny.com/pages/50.html', function(err, response) { 128 | assert(!err); 129 | assert(response); 130 | assert(response.html); 131 | assert(response.html === ''); 132 | return done(); 133 | }); 134 | }); 135 | it('inDomain method should flunk a bad domain', function(done) { 136 | oembetter.fetch('http://flhootenanny.com/pages/50.html', function(err, response) { 137 | assert(err); 138 | return done(); 139 | }); 140 | }); 141 | it('before filter can adjust URL', function(done) { 142 | oembetter.fetch('http://wiggypants.com/whatever', function(err, response) { 143 | assert(!err); 144 | assert(response); 145 | assert(response.html === 'so jiggy'); 146 | return done(); 147 | }); 148 | }); 149 | it('after filter can change response', function(done) { 150 | oembetter.fetch('http://jiggypants.com/whatever', function(err, response) { 151 | assert(!err); 152 | assert(response); 153 | assert(response.extra === 'extra'); 154 | assert(response.html === 'so jiggy'); 155 | return done(); 156 | }); 157 | }); 158 | it('fallback filter can provide last ditch response', function(done) { 159 | oembetter.fetch('http://wonkypants83742938.com/purple', function(err, response) { 160 | assert(!err); 161 | assert(response); 162 | assert(response.html === 'so wonky'); 163 | return done(); 164 | }); 165 | }); 166 | it('fallback filter for a working oembed service has no effect', function(done) { 167 | oembetter.fetch('https://www.youtube.com/watch?v=zsl_auoGuy4', function(err, response) { 168 | assert(!err); 169 | assert(response); 170 | assert(response.html !== 'oopsie'); 171 | return done(); 172 | }); 173 | }); 174 | it('setting allowlist does not crash', function() { 175 | oembetter.allowlist([ 'jiggypants.com' ]); 176 | }); 177 | it('allowlisted domains work', function(done) { 178 | oembetter.fetch('http://jiggypants.com/whatever', function(err, response) { 179 | assert(!err); 180 | assert(response); 181 | assert(response.html === 'so jiggy'); 182 | return done(); 183 | }); 184 | }); 185 | it('does not allow domains not on the allowlist', function(done) { 186 | oembetter.fetch('http://wiggypants.com/whatever', function(err, response) { 187 | assert(err); 188 | return done(); 189 | }); 190 | }); 191 | it('suggested allowlist is available', function() { 192 | assert(Array.isArray(oembetter.suggestedAllowlist)); 193 | }); 194 | it('non-http URLs fail up front with the appropriate error', function(done) { 195 | oembetter.fetch('test://jiggypants.com/whatever', function(err, response) { 196 | assert(err); 197 | assert(err.message === 'oembetter: URL is neither http nor https: test://jiggypants.com/whatever'); 198 | return done(); 199 | }); 200 | }); 201 | it('We can set the suggested endpoints and allowlist', function() { 202 | oembetter.allowlist(oembetter.suggestedAllowlist); 203 | oembetter.endpoints(oembetter.suggestedEndpoints); 204 | }); 205 | if (process.env.VIMEO_PRIVATE_URL) { 206 | it('Can embed vimeo private video with full metadata', function(done) { 207 | oembetter.fetch(process.env.VIMEO_PRIVATE_URL, { 208 | headers: { 209 | Referer: process.env.VIMEO_PRIVATE_REFERER 210 | } 211 | }, function(err, response) { 212 | assert(!err); 213 | assert(response); 214 | assert(response.html); 215 | assert(response.thumbnail_url); 216 | done(); 217 | }); 218 | }); 219 | } 220 | }); 221 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # oembetter 2 | 3 | 4 | 5 | `oembetter` is a modern [oembed](http://oembed.com) client which allows you to add filters that provide or improve oembed support for services that either don't have it or don't do it very well. 6 | 7 | `oembetter` fully supports the `oembed` standard including both XML and JSON responses from servers, and delivers the result as a neatly parsed JavaScript object. 8 | 9 | `oembetter` also allows you to allowlist trusted `oembed` domains. We strongly recommend this to prevent session cookie theft and other attacks. 10 | 11 | `oembetter` intentionally sticks to the oembed standard so you can use it to implement straightforward proxies that provide "better oembed." 12 | 13 | ## Basic Usage 14 | 15 | ```javascript 16 | const oembetter = require('oembetter')(); 17 | 18 | oembetter.fetch(url, function(err, response) { 19 | if (!err) { 20 | // thumbnail_url points to an image 21 | console.log(response.thumbnail_url); 22 | 23 | // response.html contains markup to embed the video or 24 | // whatever it might be 25 | console.log(response.html); 26 | } 27 | }); 28 | ``` 29 | 30 | oembetter is not restricted to handling responses of type `video`. See the [oembed documentation](http://oembed.com) for other response types that may come down the pipe. 31 | 32 | ## Usage with `maxwidth` and `maxheight` 33 | 34 | You can pass an object containing `maxwidth` and `maxheight` options. Sites vary in how well they support them. 35 | 36 | ```javascript 37 | const oembetter = require('oembetter')(); 38 | 39 | oembetter.fetch(url, { maxwidth: 480, maxheight: 480 }, function(err, response) { 40 | if (!err) { 41 | // response.html contains markup to embed the video or 42 | // whatever it might be 43 | } 44 | }); 45 | ``` 46 | 47 | ## Usage with custom headers 48 | 49 | You may pass on custom headers to be included in all HTTP requests made by oembetter. `Referer` is particularly useful to ensure that Vimeo allows you to embed videos when they are private and embedding is restricted on a per-domain basis: 50 | 51 | ```javascript 52 | const oembetter = require('oembetter')(); 53 | 54 | oembetter.fetch('https://vimeo.com/abc/def', { 55 | headers: { 56 | 'Referer': 'https://example.com' 57 | } 58 | }, function(err, response) { 59 | if (!err) { 60 | // response.html contains markup to embed the video 61 | } 62 | }); 63 | ``` 64 | 65 | ## Important security note: allowlisting 66 | 67 | **Trusting `oembed` completely isn't safe for your users,** especially if you are allowing untrusted users to embed things. The HTML returned by third party sites could do nasty things like running JavaScript that sniffs user sessions or just displaying a fake login prompt. 68 | 69 | But sites like YouTube, Vimeo and Flickr do play nicely with others. So we use a allowlist to decide which domains are OK: 70 | 71 | ```javascript 72 | oembetter.allowlist([ 'youtube.com', 'vimeo.com', 'wufoo.com' ]); 73 | ``` 74 | 75 | Just list acceptable domain names and `oembetter` will make sure URLs are in one of those domains (or a subdomain) before doing anything else. If not, an error is delivered to the callback. 76 | 77 | For your convenience, there is a standard allowlist available. Use it at your own risk: 78 | 79 | ```javascript 80 | oembetter.allowlist(oembetter.suggestedAllowlist); 81 | ``` 82 | 83 | ## Suggesting Endpoints 84 | 85 | Some services support `oembed` but aren't discoverable. You can tell oembetter where to point for the oembed api by passing in an array of endpoints. 86 | 87 | ```javascript 88 | oembetter.endpoints([ 89 | { domain: 'instagram.com', endpoint: 'http://api.instagram.com/oembed' }, 90 | { domain: 'facebook.com', path: /\/videos\//, endpoint: 'https://www.facebook.com/plugins/video/oembed.json/' }, 91 | ]); 92 | ``` 93 | 94 | There is also a list of suggested endpoints which you can elect to use: 95 | 96 | ```javascript 97 | oembetter.endpoints(oembetter.suggestedEndpoints); 98 | ``` 99 | 100 | (Hint: you can concatenate an additional array with your own endpoints if desired.) 101 | 102 | ## Adding Filters 103 | 104 | ### Filtering before the oembed request 105 | 106 | Some services don't support `oembed`. In these cases you'll want to fake it by providing a filter that substitutes its own response. 107 | 108 | Other services support `oembed`, but only for certain URLs. In these cases you'll want to change the URL that `oembed` will use. 109 | 110 | Pass a function to the `addBefore` method. This function will receive the URL, the options object (which might contain `maxwidth` and `maxheight`), the response so far (usually undefined at this point), and a callback function. Your function should invoke the callback with an error if any, and a new URL, options object and response object if desired. You may also modify the objects you are given and skip passing any arguments to the callback. 111 | 112 | "When would `response` already be defined?" If another `before` filter has already suggested one, you'll see it even though we haven't made a real oembed call yet. 113 | 114 | **If any filter provides a response object, then an actual oembed call is not made.** Not all `before` filters do this. Some just change the URL. 115 | 116 | **Your filter must begin by making sure this URL is relevant to its interests.** 117 | 118 | Here's an example: `hootenanny.com` (yes, I made it up) has pages like `/pages/50.html`. We know each one has a thumbnail at `/thumbnails/50.jpg` and a video page suitable for iframes at `/videos/50`. Let's create our own oembed response since `hootenanny.com` doesn't support it. 119 | 120 | ```javascript 121 | 122 | oembetter.addBefore(function(url, options, response, callback) { 123 | const parsed = new URL(url); 124 | if (!oembetter.inDomain('hootenanny.com', parsed.hostname)) { 125 | return setImmediate(callback); 126 | } 127 | const matches = parsed.pathname.match(/pages\/(\d+).html/); 128 | if (!matches) { 129 | return setImmediate(callback); 130 | } 131 | const id = matches[1]; 132 | const newResponse = { 133 | thumbnail_url: 'http://hootenanny.com/thumbnails/' + id + '.jpg', 134 | html: '' 135 | }; 136 | return callback(null, url, options, newResponse); 137 | }); 138 | ``` 139 | 140 | You can also write a filter that just adjusts URLs. This filter knows that `wiggypants.com` URLs will work better if we point them at `jiggypants.com`: 141 | 142 | ```javascript 143 | oembetter.addBefore(function(url, options, response, callback) { 144 | const parsed = new URL(url); 145 | if (!oembetter.inDomain('wiggypants.com', parsed.hostname)) { 146 | return setImmediate(callback); 147 | } 148 | url = url.replace(/wiggypants\.com/g, 'jiggypants.com'); 149 | return callback(null, url); 150 | }); 151 | ``` 152 | 153 | ### Filtering after the oembed request 154 | 155 | Some services support `oembed`, but not quite well enough. So you want to make a small adjustment to the standard response. You want an `after` filter. 156 | 157 | Here's an async filter that makes sure YouTube's embed codes use `wmode=opaque` and also upgrades to a high-res thumbnail if possible. 158 | 159 | ```javascript 160 | oembetter.addAfter(function(url, options, response, callback) { 161 | if (!url.match(/youtube/)) { 162 | return setImmediate(callback); 163 | } 164 | 165 | // Fix YouTube iframes to use wmode=opaque so they don't 166 | // ignore z-index in Windows Chrome 167 | response.html = response.html.replace('feature=oembed', 'feature=oembed&wmode=opaque'); 168 | 169 | // Change thumbnail to be largest available if it exists 170 | const maxResImage = result.thumbnail_url.replace('hqdefault.jpg', 'maxresdefault.jpg'); 171 | 172 | return request.head(maxResImage, function(err, httpResponse) { 173 | if (response.statusCode < 400) { 174 | result.thumbnail_url = maxResImage; 175 | } 176 | return callback(null); 177 | }); 178 | }); 179 | ``` 180 | 181 | This filter modifies the `result` object directly. You may also pass a new `result` object as the second argument to the callback. 182 | 183 | ### Fallback filters: when all else fails 184 | 185 | `after` filters are only called if there *is* a response. 186 | 187 | If you wish to provide a fallback solution for cases where there is **no** response from oembed, use a fallback filter. 188 | 189 | This only makes sense when you're hopeful that oembed will work some of the time. If not, write a `before` filter that supplies its own response. 190 | 191 | ```javascript 192 | // "fallback" filter can create a response when oembed fails 193 | oembetter.addFallback(function(url, options, callback) { 194 | const parsed = new URL(url); 195 | if (!oembetter.inDomain('wonkypants83742938.com', parsed.hostname)) { 196 | return setImmediate(callback); 197 | } 198 | return callback(null, { html: 'so wonky' }); 199 | }); 200 | ``` 201 | 202 | ## About P'unk Avenue and ApostropheCMS 203 | 204 | `oembetter` was created at [P'unk Avenue](https://punkave.com) for use in many projects built with ApostropheCMS, an open-source content management system built on node.js. `oembetter` isn't mandatory for Apostrophe and vice versa, but they play very well together. If you like `oembetter` you should definitely [check out Apostrophe](https://apostrophecms.com/). 205 | --------------------------------------------------------------------------------