├── .eslintignore ├── .eslintrc.json ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── node.js.yml ├── .gitignore ├── .snyk ├── CHANGELOG.md ├── LICENSE ├── README.md ├── example ├── Dockerfile ├── index.js └── package.json ├── index.ts ├── lib ├── extract.ts ├── fallback.ts ├── fields.ts ├── isUrl.ts ├── media.ts ├── openGraphScraper.ts ├── request.ts ├── types.ts └── utils.ts ├── package-lock.json ├── package.json ├── tests ├── .eslintrc.json ├── integration │ ├── basic.spec.ts │ ├── blacklist.spec.ts │ ├── encoding.spec.ts │ ├── fetch.spec.ts │ ├── html.spec.ts │ ├── image.spec.ts │ ├── onlyGetOpenGraphInfo.spec.ts │ ├── redirect.spec.ts │ ├── spotify.spec.ts │ ├── static.spec.ts │ ├── statusCode.spec.ts │ ├── twitter.spec.ts │ ├── url.spec.ts │ └── video.spec.ts └── unit │ ├── fallback.spec.ts │ ├── media.spec.ts │ ├── openGraphScraper.spec.ts │ ├── static.spec.ts │ └── utils.spec.ts ├── tsconfig.build.json ├── tsconfig.declaration.json ├── tsconfig.json ├── tsconfig.tests.json └── types ├── index.d.ts └── lib ├── extract.d.ts ├── fallback.d.ts ├── fields.d.ts ├── isUrl.d.ts ├── media.d.ts ├── openGraphScraper.d.ts ├── request.d.ts ├── types.d.ts └── utils.d.ts /.eslintignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | dist/ 3 | node_modules/ 4 | example/ 5 | types/ 6 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "airbnb-base", 4 | "airbnb-typescript/base", 5 | "plugin:promise/recommended", 6 | "plugin:@typescript-eslint/recommended-type-checked", 7 | "plugin:@typescript-eslint/stylistic-type-checked" 8 | ], 9 | "plugins": [ 10 | "promise" 11 | ], 12 | "parserOptions": { 13 | "project": "./tsconfig.json" 14 | }, 15 | "rules": { 16 | "@typescript-eslint/no-explicit-any": "warn", 17 | "@typescript-eslint/no-unsafe-argument": "warn", 18 | "@typescript-eslint/no-unsafe-assignment": "warn", 19 | "@typescript-eslint/no-unsafe-call": "warn", 20 | "@typescript-eslint/no-unsafe-member-access": "warn", 21 | "@typescript-eslint/no-unsafe-return": "warn", 22 | "import/no-named-as-default": 0, 23 | "max-len": ["error", { 24 | "code": 120, 25 | "ignoreStrings": true, 26 | "ignoreTrailingComments": true 27 | }], 28 | "no-param-reassign": "off" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Actual behavior** 20 | A clear and concise description of what is happening. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | **Additional context** 26 | Add any other context about the problem here. 27 | 28 | - OS: [e.g. iOS/Windows/Linux] 29 | - Node Version: [e.g. 18] 30 | - openGraphScraper Version: 31 | - tsconfig.json: 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/node.js.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | buildAndTest: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | node-version: 19 | - 18 20 | - 20 21 | - 22 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Use Node.js ${{ matrix.node-version }} 25 | uses: actions/setup-node@v3 26 | with: 27 | node-version: ${{ matrix.node-version }} 28 | - name: Install Dependencies 29 | run: npm ci 30 | - name: Linting 31 | run: npm run eslint 32 | - name: Running Typescript 33 | run: npm run build 34 | - name: Running Typescript Declaration 35 | run: npm run build:declaration 36 | - name: Unit Testing 37 | run: npm run mocha:unit 38 | - name: Integration Testing 39 | run: npm run mocha:int 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | *.DS_Store 10 | 11 | pids 12 | logs 13 | results 14 | node_modules 15 | dist 16 | 17 | .nyc_output/ 18 | coverage/ 19 | 20 | npm-debug.log 21 | .idea/ 22 | -------------------------------------------------------------------------------- /.snyk: -------------------------------------------------------------------------------- 1 | # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities. 2 | version: v1.14.1 3 | ignore: {} 4 | # patches apply the minimum changes required to fix a vulnerability 5 | patch: 6 | SNYK-JS-LODASH-567746: 7 | - lodash: 8 | patched: '2020-05-30T23:04:33.532Z' 9 | - cheerio > lodash: 10 | patched: '2020-05-30T23:04:33.532Z' 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## 6.10.0 4 | 5 | - Updating dependencies to fix npm vulnerabilities 6 | 7 | ## 6.9.0 8 | 9 | - Add `jsonLDOptions.throwOnJSONParseError` and change default behavior to not throw on JSON-LD string parse errors 10 | 11 | ## 6.8.4 12 | 13 | - Normalize `content-type` header check for case insensitivity 14 | - Updating dependencies 15 | 16 | ## 6.8.3 17 | 18 | - Fixed issue where empty jsonLD would caused an error 19 | - Updating dependencies 20 | 21 | ## 6.8.2 22 | 23 | - Remove new lines from jsonLD. 24 | - If url string is not `isLatin1` then encode it, otherwise you will run into `ByteString` errors within `fetch` 25 | - Updating dependencies 26 | 27 | ## 6.8.1 28 | 29 | - Fixing issue where setting `fetchOptions.headers` would replace the default `headers` 30 | - Updating dependencies 31 | 32 | ## 6.8.0 33 | 34 | - Updating how `onlyGetOpenGraphInfo` works. By default it is `false` but now it accepts an array of properties for which no fallback should be used. 35 | - Updating how you get types `import { SuccessResult } from 'open-graph-scraper/types';`. See readme for details. 36 | - Updating dependencies 37 | 38 | ## 6.7.2 39 | 40 | - Adding `types` to the npm export. You can now use `import { SuccessResult } from 'open-graph-scraper/types/lib/types';` 41 | - Updating dependencies 42 | 43 | ## 6.7.1 44 | 45 | - Remove `default` export off of the `run` function and just set `export` to `run`. 46 | - Updating dependencies 47 | 48 | ## 6.7.0 49 | 50 | - Replace `validator` with internal version of `isUrl` so we have better control on how that works. 51 | - Fix issue where `JSON` parsing fails when Youtube escape '&' to '\x26'. 52 | - Updating dependencies 53 | 54 | ## 6.6.3 55 | 56 | - Fix issue with the `charset` fallback. Replace Buffer.from with Uint8Array since body is always html 57 | - Updating dependencies to fix npm vulnerabilities 58 | 59 | ## 6.6.2 60 | 61 | - Fixed issue with `package.json` `exports` was not working in `CommonJs` projects. 62 | - Fixed issue where if the `jsonLD` tag was empty, it would cause a error. 63 | 64 | ## 6.6.1 65 | 66 | - Use `node16` for `module`/`moduleResolution` ESM build 67 | - Fixed issue with `package.json` `exports` was not working in `NextJs` projects. 68 | 69 | ## 6.6.0 70 | 71 | - Updating the `tsc` build process to better support both `ESM` and `commonJS` 72 | - Fixed issue where some meta tags would always come back as array even thought there was only ever one meta tag. 73 | - Removed the `dist` folder from version control 74 | - Start running node22 in the CI pipeline 75 | - General typescript clean up 76 | - Example service will only return the `result` of OGS now 77 | - Updating dependencies 78 | 79 | ## 6.5.2 80 | 81 | - adding a new favicon fallback using appIcon 82 | - Updating dependencies to fix npm vulnerabilities 83 | 84 | ## 6.5.1 85 | 86 | - jsonLD is now a array of objects since there can be more then one jsonLD tag pre page 87 | - Updating dependencies to fix npm vulnerabilities 88 | 89 | ## 6.5.0 90 | 91 | - Adding support for JSON LD 92 | - Adding support for `og:image:alt`, `twitterAccount`, `fbAppId` and extra og tags for `music` and `video` 93 | - Fixing jsdoc param name 94 | - Updating dependencies 95 | 96 | ## 6.4.0 97 | 98 | - Add character encoding detection and decoding logic using `iconv-lite` 99 | - Updating dependencies 100 | 101 | ## 6.3.4 102 | 103 | - Adding check to make sure `customMetaTags` are valid 104 | - Updating dependencies 105 | 106 | ## 6.3.3 107 | 108 | - Updating dependencies 109 | - Sent the `Accept: text/html` header by default 110 | 111 | ## 6.3.2 112 | 113 | - Fixing issue with npm 114 | 115 | ## 6.3.1 116 | 117 | - Adding a fallback for `charset` using `http-equiv` 118 | - Updating dependencies to fix npm vulnerabilities 119 | 120 | ## 6.3.0 121 | 122 | - Export `SuccessResult` and `ErrorResult` types 123 | - Updating dependencies 124 | 125 | ## 6.2.2 126 | 127 | - Updating dependencies to fix npm vulnerabilities 128 | 129 | ## 6.2.1 130 | 131 | - Send back more details when there is a server error 132 | 133 | ## 6.2.0 134 | 135 | - Modified the `url` property in `OpenGraphScraperOptions` to be an optional property since you don't need this when using just `html` 136 | - `Type` can optional in `ImageObject` since type is not set it it's invalid 137 | - Take all of the `customMetaTags` out of base of `ogObject` and store them into `ogObject.customMetaTags` 138 | - The internal meta properties can be string arrays 139 | - Updating Dependencies 140 | 141 | ## 6.1.0 142 | 143 | - Setting the `origin` `header` to the request url since `fetch` runs in [cors mode by default](https://github.com/nodejs/undici/issues/1305). 144 | - Import `undici` for `fetch` so all versions of node18 are running the same version of `fetch`. Now ogs supports all versions of node18! 145 | - Updating Dependencies 146 | 147 | ## 6.0.1 148 | 149 | - `OpenGraphScraperOptions.fetchOptions` should be of type `RequestInit` instead of `Request`. 150 | - Updating Dependencies 151 | 152 | ## 6.0.0 (Has breaking changes!) 153 | 154 | - Replace `GOT` with [fetch](https://nodejs.org/docs/latest-v18.x/api/globals.html#fetch)! 155 | - Only supporting `node18` or higher going forward 156 | - Updated how options work. `Fetch` and `OGS` options no longer being mixed together, users can now set [fetch options](https://developer.mozilla.org/en-US/docs/Web/API/fetch#options) using `options.fetchOptions` 157 | - Remove any ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs results that have no url 158 | - The `downloadLimit` option has been removed in favor of just using timeouts. 159 | - Limit ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs to 10 items 160 | - Adding html to the `SuccessResult` of `OGS` 161 | - Adding `options.timeout` to set the fetch request timeout. (default is 10 seconds) 162 | - Remove `null` values from ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs 163 | - Removing `options.allMedia`, you can just grab the first value of the array for the pervious behavior 164 | - Removing `options.ogImageFallback`, you can set `options.onlyGetOpenGraphInfo` to `true` for the pervious behavior 165 | - ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs will always be an array now, you can just grab the first value of the array for the pervious behavior 166 | - Updating Dependencies 167 | 168 | ## 5.2.3 169 | 170 | - Add in declaration files for typescript users. 171 | 172 | ## 5.2.2 173 | 174 | - Specify true/false to distinguish `SuccessResult` and `ErrorResult` by `error` field. 175 | 176 | ## 5.2.1 177 | 178 | - Adding the importsNotUsedAsValues flag and fixing type import issues 179 | 180 | ## 5.2.0 181 | 182 | - Remove the `charset` lib and just use `chardet` for finding the html encoding 183 | - Remove `peekSize` option since that was used by `charset` 184 | - Updating the `charset` fallback to be more reliable 185 | - Adding support for `article:published_date` and `article:modified_date` meta tags 186 | - Updating Dependencies 187 | 188 | ## 5.1.1 189 | 190 | - Fix issue where using `import` would cause typescript errors 191 | - Updating the `urlValidatorSettings` defaults to match `validatorjs` 192 | - Updating Dependencies 193 | 194 | ## 5.1.0 195 | 196 | - Convert source code to typescript 197 | - Changing `response.body` to be a `string` and `response.rawBody` to be a `buffer`. They now match the `node` type for `response`. 198 | - Updating Dependencies 199 | 200 | ## 5.0.5 201 | 202 | - Adding `twitterImageObject` and `twitterPlayerObject` types 203 | - Updating Dependencies 204 | 205 | ## 5.0.4 206 | 207 | - The `options.downloadLimit` type now allows for `false` 208 | - Updating Dependencies 209 | 210 | ## 5.0.3 211 | 212 | - Adding successResult and errorResult types 213 | 214 | ## 5.0.2 215 | 216 | - TS export now has common GOT options. 217 | - Updating Dependencies 218 | 219 | ## 5.0.1 220 | 221 | - TS export run as a Promise 222 | - Updating Docs 223 | - Updating Dependencies 224 | 225 | ## 5.0.0 226 | 227 | - Updating to `got` version 12! 228 | - Adding typescript support. 229 | - The `retry` option is now a object -> https://github.com/sindresorhus/got/blob/main/documentation/7-retry.md#retry 230 | - The `timeout` option is now a object -> https://github.com/sindresorhus/got/blob/main/documentation/6-timeout.md#timeout-options 231 | - Dropping callback support. If you still want to use callbacks, you can use `callbackify` -> https://nodejs.org/api/util.html#util_util_callbackify_original 232 | - Auth errors will now be passed back to the clint and will no long just be `Page not found` errors. 233 | - Dropping support for node12 since `got` no longer supports it. 234 | - Removing `options.encoding`. 235 | - Updating Dependencies 236 | 237 | ## 4.11.1 238 | 239 | - Updating Dependencies to fix a security vulnerability 240 | 241 | ## 4.11.0 242 | 243 | - Adding support for fetching the favicon 244 | - Updating Dependencies 245 | 246 | ## 4.10.0 247 | 248 | - Adding a check for the `content-type` header, it has to contain `text/html` 249 | - Adding `options.downloadLimit`, it sets the maximum size of the content downloaded from the server, in bytes 250 | - Updating Dependencies 251 | 252 | ## 4.9.2 253 | 254 | - Updating Dependencies to fix a security vulnerability 255 | 256 | ## 4.9.1 257 | 258 | - Updating Dependencies to fix a security vulnerability 259 | 260 | ## 4.9.0 261 | 262 | - Dropping support for Node10 since it has reach it's end of life 263 | - Setting response.rawBody to the parsed body since response.body is a buffer 264 | - Updating Dependencies 265 | 266 | ## 4.8.2 267 | 268 | - Adding support for Node16 269 | - Updating Dependencies 270 | 271 | ## 4.8.1 272 | 273 | - Fixing bug where the title fallback would return multiple titles 274 | 275 | ## 4.8.0 276 | 277 | - Adding support for Proxies 278 | - Updating Dependencies 279 | 280 | ## 4.7.1 281 | 282 | - Updating Dependencies to fix a security vulnerability 283 | 284 | ## 4.7.0 285 | 286 | - Adding `options.urlValidatorSettings`, it sets the options used by validator.js for testing the URL 287 | - Updating Dependencies 288 | 289 | ## 4.6.0 290 | 291 | - Fixing issue where you would get a false positive errors with pages that have `.tar` in it like `www.target.com` 292 | - Split extract and request into their own files 293 | - Updating Dependencies 294 | 295 | ## 4.5.1 296 | 297 | - Fixing issue where you couldn't set the `ogImageFallback` option to false 298 | - Fixing image type fallback so it works with arrays 299 | 300 | ## 4.5.0 301 | 302 | - Adding support for custom meta tags you want to scrape 303 | - If ogs thinks the URL isn't a HTML page, it will return a 'Must scrape an HTML page' error. 304 | - Updating Dependencies 305 | 306 | ## 4.4.0 307 | 308 | - Adding support for app links meta data 309 | - Removed the `withCharset` option, you can use `onlyGetOpenGraphInfo` now if you do not want charset 310 | - Removed the `runChar` option, this will always be turned on 311 | - `options.encoding === null` is now deprecated 312 | - Updating image fallback to only send back valid URLs 313 | - Updating Dependencies 314 | 315 | ## 4.3.1 316 | 317 | - Small code clean up and adding tests 318 | - Updating Dependencies 319 | 320 | ## 4.3.0 321 | 322 | - Adding support for request headers 323 | 324 | ## 4.2.1 325 | 326 | - Make sure item.fieldName exists before trying to use it 327 | - Updating devDependencies 328 | - Updating eslint rule set to be more simple 329 | - Fixed the badge icon in the readme 330 | 331 | ## 4.2.0 332 | 333 | - Checking for new tags like article, book, profile, business and restaurant 334 | - Adding support for Dublin Core tags! 335 | - Updating image fallback to send back width/height/type 336 | - Adding more title/description/locale/audio/other fallbacks 337 | - Fixed bug where if there was a weird casing on a meta, ogs would skip it 338 | - Will no longer return undefined values in some cases 339 | - Updating dependencies and removed lodash 340 | 341 | ## 4.1.1 342 | 343 | - Updating to use github actions for CI! 344 | 345 | ## 4.1.0 346 | 347 | - Updating to use `validators.js`'s `isURL` to check user input URLs 348 | - Moving snyk to be under devDependencies 349 | 350 | ## 4.0.0 (has breaking changes!) 351 | 352 | - Dropping support for any node version under 10 353 | - Open Graph values are no longer nested in a data object. 354 | - Stop using request.js(deprecated) and start using got.js 355 | - Using promises will now send the error/result/response back in one object. 356 | - Options.gzip is now options.decompress 357 | - Options.followAllRedirects is now options.followRedirect 358 | - Drop support for options.jar 359 | - Options.timeout must be a number value 360 | - Updating error messaging 361 | - Updating dependencies 362 | 363 | ## 3.6.2 364 | 365 | - Updating lodash. 366 | 367 | ## 3.6.1 368 | 369 | - Updating dependencies to vulnerabilities. 370 | 371 | ## 3.6.0 372 | 373 | - Replaced jschardet with charde. 374 | 375 | ## 3.5.1 376 | 377 | - Updating dependencies. 378 | 379 | ## 3.5.0 380 | 381 | - Adding Open Graph music tags! 382 | 383 | ## 3.4.0 384 | 385 | - Adding a new option for the 'jar' setting for requests. It will now be turned off by default. 386 | 387 | ## 3.3.0 388 | 389 | - Code refactor to work in an es5 environment! 390 | 391 | ## 3.2.0 392 | 393 | - Website that don't have Open Graph images will now return an array of all of the images on the site 394 | 395 | ## 3.1.5 396 | 397 | - Updating lodash to fix vulnerable 398 | 399 | ## 3.1.4 400 | 401 | - Returns more info on the error occurred when using promises 402 | 403 | ## 3.1.3 404 | 405 | - Catch iconv exception to prevent unexpected charset 406 | 407 | ## 3.1.2 408 | 409 | - Checking for Open Graph price and availability info 410 | 411 | ## 3.1.1 412 | 413 | - Updating packages 414 | 415 | ## 3.1.0 416 | 417 | - Adding ability to extract meta from HTML string 418 | 419 | ## 3.0.2 420 | 421 | - Adding CHANGELOG.md 422 | 423 | ## 3.0.1 424 | 425 | - Fixing coverage reporter 426 | - Fixing tests 427 | 428 | ## 3.0.0 429 | 430 | - Updated dependencies to their latest version(s) 431 | - Officially now support Node.js v4 and up 432 | - Adds unit tests to ensure code quality 433 | - Adds options for encoding, blacklist, followAllRedirects, and maxRedirects 434 | - Module can now be used a promise 435 | - `err` is now `error` 436 | - Adds check for Open Graph product info 437 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # openGraphScraper 2 | 3 | [![Node.js CI](https://github.com/jshemas/openGraphScraper/workflows/Node.js%20CI/badge.svg?branch=master)](https://github.com/jshemas/openGraphScraper/actions?query=branch%3Amaster) 4 | [![Known Vulnerabilities](https://snyk.io/test/github/jshemas/openGraphScraper/badge.svg)](https://snyk.io/test/github/jshemas/openGraphScraper) 5 | 6 | A simple node module(with TypeScript declarations) for scraping Open Graph and Twitter Card and other metadata off a site. 7 | 8 | Note: `open-graph-scraper` doesn't support browser usage at this time but you can use `open-graph-scraper-lite` if you already have the `HTML` and can't use Node's [Fetch API](https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#fetch). 9 | 10 | ## Installation 11 | 12 | ```bash 13 | npm install open-graph-scraper --save 14 | ``` 15 | 16 | ## Usage 17 | 18 | ```javascript 19 | const ogs = require('open-graph-scraper'); 20 | const options = { url: 'http://ogp.me/' }; 21 | ogs(options) 22 | .then((data) => { 23 | const { error, html, result, response } = data; 24 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object. 25 | console.log('html:', html); // This contains the HTML of page 26 | console.log('result:', result); // This contains all of the Open Graph results 27 | console.log('response:', response); // This contains response from the Fetch API 28 | }) 29 | ``` 30 | 31 | ## Results JSON 32 | 33 | Check the return for a ```success``` flag. If success is set to true, then the url input was valid. Otherwise it will be set to false. The above example will return something like... 34 | 35 | ```javascript 36 | { 37 | ogTitle: 'Open Graph protocol', 38 | ogType: 'website', 39 | ogUrl: 'https://ogp.me/', 40 | ogDescription: 'The Open Graph protocol enables any web page to become a rich object in a social graph.', 41 | ogImage: [ 42 | { 43 | height: '300', 44 | type: 'image/png', 45 | url: 'https://ogp.me/logo.png', 46 | width: '300' 47 | } 48 | ], 49 | charset: 'utf-8', 50 | requestUrl: 'http://ogp.me/', 51 | success: true 52 | } 53 | ``` 54 | 55 | ## Options 56 | 57 | | Name | Info | Default Value | Required | 58 | |----------------------|-------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|----------| 59 | | url | URL of the site. | | x | 60 | | html | You can pass in an HTML string to run ogs on it. (use without options.url) | | | 61 | | fetchOptions | Options that are used by the Fetch API | {} | | 62 | | timeout | Request timeout for Fetch (Default is 10 seconds) | 10 | | 63 | | blacklist | Pass in an array of sites you don't want ogs to run on. | [] | | 64 | | onlyGetOpenGraphInfo | Only fetch open graph info and don't fall back on anything else. Also accepts an array of properties for which no fallback should be used | false | | 65 | | customMetaTags | Here you can define custom meta tags you want to scrape. | [] | | 66 | | urlValidatorSettings | Sets the options used by validator.js for testing the URL | [Here](https://github.com/jshemas/openGraphScraper/blob/master/lib/utils.ts#L4-L17) | | 67 | | jsonLDOptions | Sets the options used when parsing JSON-LD data | | | 68 | 69 | Note: `open-graph-scraper` uses the [Fetch API](https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#fetch) for requests and most of [Fetch's options](https://developer.mozilla.org/en-US/docs/Web/API/fetch#options) should work as `open-graph-scraper`'s `fetchOptions` options. 70 | 71 | ## Types And Import Example 72 | 73 | ```javascript 74 | // example of how to get types 75 | import type { SuccessResult } from 'open-graph-scraper/types'; 76 | const example: SuccessResult = { 77 | result: { ogTitle: 'this is a title' }, 78 | error: false, 79 | response: {}, 80 | html: '' 81 | } 82 | 83 | // import example 84 | import ogs from 'open-graph-scraper'; 85 | const options = { url: 'http://ogp.me/' }; 86 | ogs(options) 87 | .then((data) => { 88 | const { error, html, result, response } = data; 89 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object. 90 | console.log('html:', html); // This contains the HTML of page 91 | console.log('result:', result); // This contains all of the Open Graph results 92 | console.log('response:', response); // This contains response from the Fetch API 93 | }); 94 | ``` 95 | 96 | ## Custom Meta Tag Example 97 | 98 | ```javascript 99 | const ogs = require('open-graph-scraper'); 100 | const options = { 101 | url: 'https://github.com/jshemas/openGraphScraper', 102 | customMetaTags: [{ 103 | multiple: false, // is there more than one of these tags on a page (normally this is false) 104 | property: 'hostname', // meta tag name/property attribute 105 | fieldName: 'hostnameMetaTag', // name of the result variable 106 | }], 107 | }; 108 | ogs(options) 109 | .then((data) => { 110 | const { result } = data; 111 | console.log('hostnameMetaTag:', result.customMetaTags.hostnameMetaTag); // hostnameMetaTag: github.com 112 | }) 113 | ``` 114 | 115 | ## HTML Example 116 | 117 | ```javascript 118 | const ogs = require('open-graph-scraper'); 119 | const options = { 120 | html: ` 121 | 122 | 123 | 124 | 125 | 126 | 127 | ` 128 | }; 129 | ogs(options) 130 | .then((data) => { 131 | const { result } = data; 132 | console.log('result:', result); 133 | // result: { 134 | // ogDescription: 'html description example', 135 | // ogTitle: 'foobar', 136 | // ogType: 'website', 137 | // ogImage: [ { url: 'https://www.foo.com/bar.jpg', type: 'jpg' } ], 138 | // favicon: 'https://bar.com/foo.png', 139 | // charset: 'utf-8', 140 | // success: true 141 | // } 142 | }) 143 | 144 | ``` 145 | 146 | ## User Agent Example 147 | 148 | The request header is set to [undici](https://github.com/nodejs/undici) by default. Some sites might block this, and changing the `userAgent` might work. If not you can try [using a proxy](https://www.scrapingbee.com/blog/proxy-node-fetch/) for the request and then pass the `html` into `open-graph-scraper`. 149 | 150 | ```javascript 151 | const ogs = require("open-graph-scraper"); 152 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'; 153 | ogs({ url: 'https://www.wikipedia.org/', fetchOptions: { headers: { 'user-agent': userAgent } } }) 154 | .then((data) => { 155 | const { error, html, result, response } = data; 156 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object. 157 | console.log('html:', html); // This contains the HTML of page 158 | console.log('result:', result); // This contains all of the Open Graph results 159 | console.log('response:', response); // This contains response from the Fetch API 160 | }) 161 | ``` 162 | 163 | ## JSON-LD Parsing Options Example 164 | 165 | `throwOnJSONParseError` and `logOnJSONParseError` properties control what happens if `JSON.parse` 166 | throws an error when parsing JSON-LD data. 167 | If `throwOnJSONParseError` is set to `true`, then the error will be thrown. 168 | If `logOnJSONParseError` is set to `true`, then the error will be logged to the console. 169 | 170 | ```javascript 171 | const ogs = require("open-graph-scraper"); 172 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'; 173 | ogs({ url: 'https://www.wikipedia.org/', jsonLDOptions: { throwOnJSONParseError: true } }) 174 | .then((data) => { 175 | const { error, html, result, response } = data; 176 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object. 177 | console.log('html:', html); // This contains the HTML of page 178 | console.log('result:', result); // This contains all of the Open Graph results 179 | console.log('response:', response); // This contains response from the Fetch API 180 | }) 181 | ``` 182 | 183 | ## Running the example app 184 | 185 | Inside the `example` folder contains a simple express app where you can run `npm ci && npm run start` to spin up. Once the app is running, open a web browser and go to `http://localhost:3000/scraper?url=http://ogp.me/` to test it out. There is also a `Dockerfile` if you want to run this example app in a docker container. 186 | -------------------------------------------------------------------------------- /example/Dockerfile: -------------------------------------------------------------------------------- 1 | # docker build -t open-graph-scraper . 2 | # docker run -dp 127.0.0.1:3000:3000 open-graph-scraper 3 | # http://127.0.0.1:3000/scraper?url=http://ogp.me/ 4 | FROM node:20 5 | 6 | WORKDIR /usr/src/app 7 | 8 | COPY package*.json ./ 9 | 10 | RUN npm install 11 | 12 | COPY . . 13 | 14 | EXPOSE 3000 15 | 16 | CMD [ "node", "index.js" ] 17 | -------------------------------------------------------------------------------- /example/index.js: -------------------------------------------------------------------------------- 1 | const express = require('express'); 2 | const ogs = require('open-graph-scraper'); 3 | const app = express(); 4 | const port = 3000; 5 | 6 | // http://localhost:3000/scraper?url=http://ogp.me/ 7 | app.get('/scraper', async (req, res) => { 8 | if (!req.query.url) return res.send('Missing url query!'); 9 | const options = { url: req.query.url }; 10 | try { 11 | const data = await ogs(options); 12 | res.send(data); 13 | } catch (error) { 14 | res.send(error.result); 15 | } 16 | }); 17 | 18 | app.listen(port, () => { 19 | console.log(`Example open-graph-scraper app listening on port ${port}`); 20 | }); 21 | -------------------------------------------------------------------------------- /example/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "example-app", 3 | "version": "1.0.0", 4 | "description": "Example open-graph-scraper express app", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "node index" 8 | }, 9 | "license": "MIT", 10 | "dependencies": { 11 | "express": "^4.19.2", 12 | "open-graph-scraper": "^6.5.2" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable import/no-import-module-exports */ 2 | import setOptionsAndReturnOpenGraphResults from './lib/openGraphScraper'; 3 | import type { 4 | ErrorResult, 5 | OpenGraphScraperOptions, 6 | SuccessResult, 7 | } from './lib/types'; 8 | 9 | /** 10 | * `open-graph-scraper` uses [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) for http requests 11 | * for scraping Open Graph and Twitter Card info off a website. 12 | * 13 | * @param {object} options - The options used by Open Graph Scraper 14 | * @param {boolean|string[]} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on 15 | * anything else. 16 | * @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape. 17 | * @param {object} [options.fetchOptions] - Sets the options used by fetch for the http requests 18 | * @param {object} [options.urlValidatorSettings] - Sets the options used by validator.js for testing the URL 19 | * @param {string[]} [options.blacklist] - Pass in an array of sites you don't want ogs to run on. 20 | * @param {string} [options.html] - You can pass in an HTML string to run ogs on it. (use without options.url) 21 | * @param {number} [options.timeout] - Number of seconds before the fetch request ends. (default is 10 seconds) 22 | * @param {string} options.url - URL of the site. (Required) 23 | * @returns {Promise} Promise Object with the Open Graph results 24 | */ 25 | async function run(options: OpenGraphScraperOptions): Promise { 26 | let results; 27 | try { 28 | results = await setOptionsAndReturnOpenGraphResults(options); 29 | } catch (error) { 30 | const exception = error as Error; 31 | const returnError:ErrorResult = { 32 | error: true, 33 | result: { 34 | success: false, 35 | requestUrl: options.url, 36 | error: exception.message, 37 | errorDetails: exception, 38 | }, 39 | response: undefined, 40 | html: undefined, 41 | }; 42 | // eslint-disable-next-line @typescript-eslint/no-throw-literal 43 | throw returnError; 44 | } 45 | const returnSuccess: SuccessResult = { 46 | error: false, 47 | result: results.ogObject, 48 | response: results.response, 49 | html: results.html, 50 | }; 51 | return returnSuccess; 52 | } 53 | 54 | export = run; 55 | -------------------------------------------------------------------------------- /lib/extract.ts: -------------------------------------------------------------------------------- 1 | import { load } from 'cheerio'; 2 | 3 | import fallback from './fallback'; 4 | import fields from './fields'; 5 | import mediaSetup from './media'; 6 | import { unescapeScriptText } from './utils'; 7 | 8 | import type { OgObjectInternal, OpenGraphScraperOptions } from './types'; 9 | 10 | /** 11 | * extract all of the meta tags needed for ogs 12 | * 13 | * @param {sting} body - the body of the fetch request 14 | * @param {object} options - options for ogs 15 | * @return {object} object with ogs results 16 | * 17 | */ 18 | export default function extractMetaTags(body: string, options: OpenGraphScraperOptions) { 19 | let ogObject: OgObjectInternal = { success: true }; 20 | const $ = load(body); 21 | const metaFields = fields; 22 | 23 | // find all of the open graph info in the meta tags 24 | $('meta').each((index, meta) => { 25 | if (!meta.attribs || (!meta.attribs.property && !meta.attribs.name)) return; 26 | const property = meta.attribs.property || meta.attribs.name; 27 | const content: any = meta.attribs.content || meta.attribs.value; 28 | metaFields.forEach((item) => { 29 | if (item && property.toLowerCase() === item.property.toLowerCase()) { 30 | // check if fieldName is one of mediaMapperProperties 31 | if ( 32 | item.fieldName === 'musicSongDisc' 33 | || item.fieldName === 'musicSongProperty' 34 | || item.fieldName === 'musicSongTrack' 35 | || item.fieldName === 'musicSongUrl' 36 | || item.fieldName === 'ogImageAlt' 37 | || item.fieldName === 'ogImageHeight' 38 | || item.fieldName === 'ogImageProperty' 39 | || item.fieldName === 'ogImageSecureURL' 40 | || item.fieldName === 'ogImageType' 41 | || item.fieldName === 'ogImageURL' 42 | || item.fieldName === 'ogImageWidth' 43 | || item.fieldName === 'ogVideoHeight' 44 | || item.fieldName === 'ogVideoProperty' 45 | || item.fieldName === 'ogVideoType' 46 | || item.fieldName === 'ogVideoWidth' 47 | || item.fieldName === 'twitterImageAlt' 48 | || item.fieldName === 'twitterImageHeight' 49 | || item.fieldName === 'twitterImageProperty' 50 | || item.fieldName === 'twitterImageSrc' 51 | || item.fieldName === 'twitterImageWidth' 52 | || item.fieldName === 'twitterPlayerHeight' 53 | || item.fieldName === 'twitterPlayerProperty' 54 | || item.fieldName === 'twitterPlayerStream' 55 | || item.fieldName === 'twitterPlayerWidth' 56 | ) { 57 | if (!ogObject[item.fieldName]) { 58 | ogObject[item.fieldName] = [content]; 59 | } else { 60 | ogObject[item.fieldName]?.push(content); 61 | } 62 | } else { 63 | ogObject[item.fieldName] = content; 64 | } 65 | } 66 | }); 67 | 68 | if (options.customMetaTags) { 69 | options.customMetaTags.forEach((item) => { 70 | if (!ogObject.customMetaTags) ogObject.customMetaTags = {}; 71 | if (item && property.toLowerCase() === item.property.toLowerCase()) { 72 | if (!item.multiple) { 73 | ogObject.customMetaTags[item.fieldName] = content; 74 | } else if (!ogObject.customMetaTags[item.fieldName]) { 75 | ogObject.customMetaTags[item.fieldName] = [content]; 76 | } else if (Array.isArray(ogObject.customMetaTags[item.fieldName])) { 77 | ogObject.customMetaTags[item.fieldName] = [ 78 | ...ogObject.customMetaTags[item.fieldName], 79 | content, 80 | ]; 81 | } 82 | } 83 | }); 84 | if (ogObject.customMetaTags && Object.keys(ogObject.customMetaTags).length === 0) delete ogObject.customMetaTags; 85 | } 86 | }); 87 | 88 | // formats the multiple media values 89 | ogObject = mediaSetup(ogObject); 90 | 91 | // if onlyGetOpenGraphInfo isn't set, run the open graph fallbacks 92 | if (!options.onlyGetOpenGraphInfo || Array.isArray(options.onlyGetOpenGraphInfo)) { 93 | ogObject = fallback(ogObject, options, $, body); 94 | 95 | $('script').each((index, script) => { 96 | if (script.attribs.type && script.attribs.type === 'application/ld+json') { 97 | if (!ogObject.jsonLD) ogObject.jsonLD = []; 98 | let scriptText = $(script).text().trim(); 99 | if (scriptText) { 100 | scriptText = scriptText.replace(/(\r\n|\n|\r)/gm, ''); // remove newlines 101 | scriptText = unescapeScriptText(scriptText); 102 | try { 103 | ogObject.jsonLD.push(JSON.parse(scriptText)); 104 | } catch (error: unknown) { 105 | if (options.jsonLDOptions?.logOnJSONParseError) { 106 | console.error('Error parsing JSON-LD script tag:', error); 107 | } 108 | if (options.jsonLDOptions?.throwOnJSONParseError) { 109 | throw error; 110 | } 111 | } 112 | } 113 | } 114 | }); 115 | } 116 | 117 | return ogObject; 118 | } 119 | -------------------------------------------------------------------------------- /lib/fallback.ts: -------------------------------------------------------------------------------- 1 | import chardet from 'chardet'; 2 | import type { CheerioAPI } from 'cheerio'; 3 | 4 | import { 5 | defaultUrlValidatorSettings, 6 | findImageTypeFromUrl, 7 | isImageTypeValid, 8 | isUrlValid, 9 | } from './utils'; 10 | import type { 11 | OpenGraphScraperOptions, ImageObject, OgObjectInternal, OnlyGetOpenGraphInfoItem, 12 | } from './types'; 13 | 14 | const doesElementExist = (selector:string, attribute:string, $: CheerioAPI) => ( 15 | $(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0 16 | ); 17 | 18 | /** 19 | * ogs fallbacks 20 | * 21 | * @param {object} ogObject - the current ogObject 22 | * @param {object} options - options for ogs 23 | * @param {object} $ - cheerio.load() of the current html 24 | * @return {object} object with ogs results with updated fallback values 25 | * 26 | */ 27 | export function fallback(ogObject: OgObjectInternal, options: OpenGraphScraperOptions, $: CheerioAPI, body: string) { 28 | const shouldFallback = (key: OnlyGetOpenGraphInfoItem): boolean => { 29 | if (!options.onlyGetOpenGraphInfo) { 30 | return true; 31 | } 32 | if (options.onlyGetOpenGraphInfo === true) { 33 | return false; 34 | } 35 | return !options.onlyGetOpenGraphInfo.includes(key); 36 | }; 37 | 38 | // title fallback 39 | if (!ogObject.ogTitle && shouldFallback('title')) { 40 | if ($('title').text() && $('title').text().length > 0) { 41 | ogObject.ogTitle = $('title').first().text(); 42 | } else if ($('head > meta[name="title"]').attr('content') && ($('head > meta[name="title"]').attr('content')?.length ?? 0) > 0) { 43 | ogObject.ogTitle = $('head > meta[name="title"]').attr('content'); 44 | } else if ($('.post-title').text() && $('.post-title').text().length > 0) { 45 | ogObject.ogTitle = $('.post-title').text(); 46 | } else if ($('.entry-title').text() && $('.entry-title').text().length > 0) { 47 | ogObject.ogTitle = $('.entry-title').text(); 48 | } else if ($('h1[class*="title" i] a').text() && $('h1[class*="title" i] a').text().length > 0) { 49 | ogObject.ogTitle = $('h1[class*="title" i] a').text(); 50 | } else if ($('h1[class*="title" i]').text() && $('h1[class*="title" i]').text().length > 0) { 51 | ogObject.ogTitle = $('h1[class*="title" i]').text(); 52 | } 53 | } 54 | 55 | // Get meta description tag if og description was not provided 56 | if (!ogObject.ogDescription && shouldFallback('description')) { 57 | if (doesElementExist('head > meta[name="description"]', 'content', $)) { 58 | ogObject.ogDescription = $('head > meta[name="description"]').attr('content'); 59 | } else if (doesElementExist('head > meta[itemprop="description"]', 'content', $)) { 60 | ogObject.ogDescription = $('head > meta[itemprop="description"]').attr('content'); 61 | } else if ($('#description').text() && $('#description').text().length > 0) { 62 | ogObject.ogDescription = $('#description').text(); 63 | } 64 | } 65 | 66 | // Get all of images if there is no og:image info 67 | if (!ogObject.ogImage && shouldFallback('image')) { 68 | ogObject.ogImage = []; 69 | $('img').map((index, imageElement) => { 70 | const source: string = $(imageElement).attr('src') ?? ''; 71 | if (!source) return false; 72 | const type = findImageTypeFromUrl(source); 73 | if ( 74 | !isUrlValid(source, (options.urlValidatorSettings ?? defaultUrlValidatorSettings)) || !isImageTypeValid(type) 75 | ) return false; 76 | const fallbackImage: ImageObject = { 77 | url: source, 78 | type, 79 | }; 80 | if ($(imageElement).attr('width') && Number($(imageElement).attr('width'))) fallbackImage.width = Number($(imageElement).attr('width')); 81 | if ($(imageElement).attr('height') && Number($(imageElement).attr('height'))) fallbackImage.height = Number($(imageElement).attr('height')); 82 | ogObject.ogImage?.push(fallbackImage); 83 | return false; 84 | }); 85 | ogObject.ogImage = ogObject.ogImage 86 | .filter((value) => value.url !== undefined && value.url !== '') 87 | .filter((value, index) => index < 10); 88 | if (ogObject.ogImage.length === 0) delete ogObject.ogImage; 89 | } else if (ogObject.ogImage) { 90 | ogObject.ogImage.map((image) => { 91 | if (image.url && !image.type) { 92 | const type = findImageTypeFromUrl(image.url); 93 | if (isImageTypeValid(type)) image.type = type; 94 | } 95 | return false; 96 | }); 97 | } 98 | 99 | // audio fallback 100 | if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL && shouldFallback('audioUrl')) { 101 | const audioElementValue: string = $('audio').attr('src') ?? ''; 102 | const audioSourceElementValue: string = $('audio > source').attr('src') ?? ''; 103 | if (doesElementExist('audio', 'src', $)) { 104 | if (audioElementValue.startsWith('https')) { 105 | ogObject.ogAudioSecureURL = audioElementValue; 106 | } else { 107 | ogObject.ogAudioURL = audioElementValue; 108 | } 109 | const audioElementTypeValue: string = $('audio').attr('type') ?? ''; 110 | if (!ogObject.ogAudioType && doesElementExist('audio', 'type', $)) ogObject.ogAudioType = audioElementTypeValue; 111 | } else if (doesElementExist('audio > source', 'src', $)) { 112 | if (audioSourceElementValue.startsWith('https')) { 113 | ogObject.ogAudioSecureURL = audioSourceElementValue; 114 | } else { 115 | ogObject.ogAudioURL = audioSourceElementValue; 116 | } 117 | const audioSourceElementTypeValue: string = $('audio > source').attr('type') ?? ''; 118 | if (!ogObject.ogAudioType && doesElementExist('audio > source', 'type', $)) ogObject.ogAudioType = audioSourceElementTypeValue; 119 | } 120 | } 121 | 122 | // locale fallback 123 | if (!ogObject.ogLocale && shouldFallback('locale')) { 124 | if (doesElementExist('html', 'lang', $)) { 125 | ogObject.ogLocale = $('html').attr('lang'); 126 | } else if (doesElementExist('head > meta[itemprop="inLanguage"]', 'content', $)) { 127 | ogObject.ogLocale = $('head > meta[itemprop="inLanguage"]').attr('content'); 128 | } 129 | } 130 | 131 | // logo fallback 132 | if (!ogObject.ogLogo && shouldFallback('logo')) { 133 | if (doesElementExist('meta[itemprop="logo"]', 'content', $)) { 134 | ogObject.ogLogo = $('meta[itemprop="logo"]').attr('content'); 135 | } else if (doesElementExist('img[itemprop="logo"]', 'src', $)) { 136 | ogObject.ogLogo = $('img[itemprop="logo"]').attr('src'); 137 | } 138 | } 139 | 140 | // url fallback 141 | if (!ogObject.ogUrl && shouldFallback('url')) { 142 | if (doesElementExist('link[rel="canonical"]', 'href', $)) { 143 | ogObject.ogUrl = $('link[rel="canonical"]').attr('href'); 144 | } else if (doesElementExist('link[rel="alternate"][hreflang="x-default"]', 'href', $)) { 145 | ogObject.ogUrl = $('link[rel="alternate"][hreflang="x-default"]').attr('href'); 146 | } 147 | } 148 | 149 | // date fallback 150 | if (!ogObject.ogDate && shouldFallback('date')) { 151 | if (doesElementExist('head > meta[name="date"]', 'content', $)) { 152 | ogObject.ogDate = $('head > meta[name="date"]').attr('content'); 153 | } else if (doesElementExist('[itemprop*="datemodified" i]', 'content', $)) { 154 | ogObject.ogDate = $('[itemprop*="datemodified" i]').attr('content'); 155 | } else if (doesElementExist('[itemprop="datepublished" i]', 'content', $)) { 156 | ogObject.ogDate = $('[itemprop="datepublished" i]').attr('content'); 157 | } else if (doesElementExist('[itemprop*="date" i]', 'content', $)) { 158 | ogObject.ogDate = $('[itemprop*="date" i]').attr('content'); 159 | } else if (doesElementExist('time[itemprop*="date" i]', 'datetime', $)) { 160 | ogObject.ogDate = $('time[itemprop*="date" i]').attr('datetime'); 161 | } else if (doesElementExist('time[datetime]', 'datetime', $)) { 162 | ogObject.ogDate = $('time[datetime]').attr('datetime'); 163 | } 164 | } 165 | 166 | // favicon fallback 167 | if (!ogObject.favicon && shouldFallback('favicon')) { 168 | if (doesElementExist('link[rel="shortcut icon"]', 'href', $)) { 169 | ogObject.favicon = $('link[rel="shortcut icon"]').attr('href'); 170 | } else if (doesElementExist('link[rel="icon"]', 'href', $)) { 171 | ogObject.favicon = $('link[rel="icon"]').attr('href'); 172 | } else if (doesElementExist('link[rel="mask-icon"]', 'href', $)) { 173 | ogObject.favicon = $('link[rel="mask-icon"]').attr('href'); 174 | } else if (doesElementExist('link[rel="apple-touch-icon"]', 'href', $)) { 175 | ogObject.favicon = $('link[rel="apple-touch-icon"]').attr('href'); 176 | } else if (doesElementExist('link[type="image/png"]', 'href', $)) { 177 | ogObject.favicon = $('link[type="image/png"]').attr('href'); 178 | } else if (doesElementExist('link[type="image/ico"]', 'href', $)) { 179 | ogObject.favicon = $('link[type="image/ico"]').attr('href'); 180 | } else if (doesElementExist('link[type="image/x-icon"]', 'href', $)) { 181 | ogObject.favicon = $('link[type="image/x-icon"]').attr('href'); 182 | } else if (doesElementExist('head > meta[property*="appIcon"]', 'content', $)) { 183 | ogObject.favicon = $('head > meta[property*="appIcon"]').attr('content'); 184 | } 185 | } 186 | 187 | // set the charset 188 | if (doesElementExist('meta', 'charset', $)) { 189 | ogObject.charset = $('meta').attr('charset'); 190 | } else if (doesElementExist('head > meta[name="charset"]', 'content', $)) { 191 | ogObject.charset = $('head > meta[name="charset"]').attr('content'); 192 | } else if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) { 193 | const content = $('head > meta[http-equiv="content-type"]').attr('content') ?? ''; 194 | const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i; 195 | 196 | if (charsetRegEx.test(content)) { 197 | const charsetRegExExec = charsetRegEx.exec(content); 198 | if (charsetRegExExec?.[1]) ogObject.charset = charsetRegExExec[1] || 'utf-8'; 199 | } 200 | } else if (body) { 201 | const encoder = new TextEncoder(); 202 | const uint8Array = encoder.encode(body); 203 | ogObject.charset = chardet.detect(uint8Array) ?? ''; 204 | } 205 | 206 | return ogObject; 207 | } 208 | 209 | export default fallback; 210 | -------------------------------------------------------------------------------- /lib/isUrl.ts: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 2 | // @ts-nocheck 3 | /* eslint-disable @typescript-eslint/no-unsafe-argument */ 4 | /* eslint-disable @typescript-eslint/no-unsafe-call */ 5 | /* eslint-disable @typescript-eslint/no-unsafe-member-access */ 6 | /* eslint-disable @typescript-eslint/no-unsafe-assignment */ 7 | // This is from https://github.com/validatorjs/validator.js version: 13.12.0 8 | 9 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/util/assertString.js 10 | function assertString(input) { 11 | const isString = typeof input === 'string' || input instanceof String; 12 | 13 | if (!isString) { 14 | let invalidType = typeof input; 15 | if (input === null) invalidType = 'null'; 16 | else if (invalidType === 'object') invalidType = input.constructor.name; 17 | 18 | throw new TypeError(`Expected a string but received a ${invalidType}`); 19 | } 20 | } 21 | 22 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/util/merge.js 23 | // eslint-disable-next-line @typescript-eslint/default-param-last 24 | function merge(obj = { }, defaults) { 25 | // eslint-disable-next-line no-restricted-syntax 26 | for (const key in defaults) { 27 | if (typeof obj[key] === 'undefined') { 28 | obj[key] = defaults[key]; 29 | } 30 | } 31 | return obj; 32 | } 33 | 34 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/isFQDN.js 35 | const defaultFqdnOptions = { 36 | require_tld: true, 37 | allow_underscores: false, 38 | allow_trailing_dot: false, 39 | allow_numeric_tld: false, 40 | allow_wildcard: false, 41 | ignore_max_length: false, 42 | }; 43 | 44 | function isFQDN(str, options) { 45 | assertString(str); 46 | options = merge(options, defaultFqdnOptions); 47 | 48 | /* Remove the optional trailing dot before checking validity */ 49 | if (options.allow_trailing_dot && str[str.length - 1] === '.') { 50 | str = str.substring(0, str.length - 1); 51 | } 52 | 53 | /* Remove the optional wildcard before checking validity */ 54 | if (options.allow_wildcard === true && str.indexOf('*.') === 0) { 55 | str = str.substring(2); 56 | } 57 | 58 | const parts = str.split('.'); 59 | const tld = parts[parts.length - 1]; 60 | 61 | if (options.require_tld) { 62 | // disallow fqdns without tld 63 | if (parts.length < 2) { 64 | return false; 65 | } 66 | 67 | if ( 68 | !options.allow_numeric_tld 69 | // eslint-disable-next-line @typescript-eslint/no-unsafe-argument 70 | && !/^([a-z\u00A1-\u00A8\u00AA-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}|xn[a-z0-9-]{2,})$/i.test(tld) 71 | ) { 72 | return false; 73 | } 74 | 75 | // disallow spaces 76 | if (/\s/.test(tld)) { 77 | return false; 78 | } 79 | } 80 | 81 | // reject numeric TLDs 82 | if (!options.allow_numeric_tld && /^\d+$/.test(tld)) { 83 | return false; 84 | } 85 | 86 | // eslint-disable-next-line @typescript-eslint/no-unsafe-return 87 | return parts.every((part) => { 88 | if (part.length > 63 && !options.ignore_max_length) { 89 | return false; 90 | } 91 | 92 | if (!/^[a-z_\u00a1-\uffff0-9-]+$/i.test(part)) { 93 | return false; 94 | } 95 | 96 | // disallow full-width chars 97 | if (/[\uff01-\uff5e]/.test(part)) { 98 | return false; 99 | } 100 | 101 | // disallow parts starting or ending with hyphen 102 | if (/^-|-$/.test(part)) { 103 | return false; 104 | } 105 | 106 | if (!options.allow_underscores && /_/.test(part)) { 107 | return false; 108 | } 109 | 110 | return true; 111 | }); 112 | } 113 | 114 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/isIP.js 115 | const IPv4SegmentFormat = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'; 116 | const IPv4AddressFormat = `(${IPv4SegmentFormat}[.]){3}${IPv4SegmentFormat}`; 117 | const IPv4AddressRegExp = new RegExp(`^${IPv4AddressFormat}$`); 118 | 119 | const IPv6SegmentFormat = '(?:[0-9a-fA-F]{1,4})'; 120 | const IPv6AddressRegExp = new RegExp('^(' 121 | + `(?:${IPv6SegmentFormat}:){7}(?:${IPv6SegmentFormat}|:)|` 122 | + `(?:${IPv6SegmentFormat}:){6}(?:${IPv4AddressFormat}|:${IPv6SegmentFormat}|:)|` 123 | + `(?:${IPv6SegmentFormat}:){5}(?::${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,2}|:)|` 124 | + `(?:${IPv6SegmentFormat}:){4}(?:(:${IPv6SegmentFormat}){0,1}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,3}|:)|` 125 | + `(?:${IPv6SegmentFormat}:){3}(?:(:${IPv6SegmentFormat}){0,2}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,4}|:)|` 126 | + `(?:${IPv6SegmentFormat}:){2}(?:(:${IPv6SegmentFormat}){0,3}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,5}|:)|` 127 | + `(?:${IPv6SegmentFormat}:){1}(?:(:${IPv6SegmentFormat}){0,4}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,6}|:)|` 128 | + `(?::((?::${IPv6SegmentFormat}){0,5}:${IPv4AddressFormat}|(?::${IPv6SegmentFormat}){1,7}|:))` 129 | + ')(%[0-9a-zA-Z-.:]{1,})?$'); 130 | 131 | function isIP(str, version = '') { 132 | assertString(str); 133 | version = String(version); 134 | if (!version) { 135 | // eslint-disable-next-line @typescript-eslint/no-unsafe-return 136 | return isIP(str, 4) || isIP(str, 6); 137 | } 138 | if (version === '4') { 139 | return IPv4AddressRegExp.test(str); 140 | } 141 | if (version === '6') { 142 | return IPv6AddressRegExp.test(str); 143 | } 144 | return false; 145 | } 146 | 147 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/isURL.js 148 | /* 149 | options for isURL method 150 | 151 | require_protocol - if set as true isURL will return false if protocol is not present in the URL 152 | require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option 153 | protocols - valid protocols can be modified with this option 154 | require_host - if set as false isURL will not check if host is present in the URL 155 | require_port - if set as true isURL will check if port is present in the URL 156 | allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed 157 | validate_length - if set as false isURL will skip string length validation (IE maximum is 2083) 158 | 159 | */ 160 | const defaultUrlOptions = { 161 | protocols: ['http', 'https', 'ftp'], 162 | require_tld: true, 163 | require_protocol: false, 164 | require_host: true, 165 | require_port: false, 166 | require_valid_protocol: true, 167 | allow_underscores: false, 168 | allow_trailing_dot: false, 169 | allow_protocol_relative_urls: false, 170 | allow_fragments: true, 171 | allow_query_components: true, 172 | validate_length: true, 173 | }; 174 | 175 | const wrappedIpv6 = /^\[([^\]]+)\](?::([0-9]+))?$/; 176 | 177 | function isRegExp(obj) { 178 | return Object.prototype.toString.call(obj) === '[object RegExp]'; 179 | } 180 | 181 | function checkHost(host, matches) { 182 | // eslint-disable-next-line @typescript-eslint/prefer-for-of 183 | for (let i = 0; i < matches.length; i += 1) { 184 | const match = matches[i]; 185 | if (host === match || (isRegExp(match) && match.test(host))) { 186 | return true; 187 | } 188 | } 189 | return false; 190 | } 191 | 192 | export default function isURL(url, options) { 193 | assertString(url); 194 | if (!url || /[\s<>]/.test(url)) { 195 | return false; 196 | } 197 | if (url.indexOf('mailto:') === 0) { 198 | return false; 199 | } 200 | options = merge(options, defaultUrlOptions); 201 | 202 | if (options.validate_length && url.length >= 2083) { 203 | return false; 204 | } 205 | 206 | if (!options.allow_fragments && url.includes('#')) { 207 | return false; 208 | } 209 | 210 | if (!options.allow_query_components && (url.includes('?') || url.includes('&'))) { 211 | return false; 212 | } 213 | 214 | // eslint-disable-next-line @typescript-eslint/naming-convention 215 | let protocol; let auth; let host; let port; let port_str; let split; let 216 | ipv6; 217 | 218 | split = url.split('#'); 219 | url = split.shift(); 220 | 221 | split = url.split('?'); 222 | url = split.shift(); 223 | 224 | split = url.split('://'); 225 | if (split.length > 1) { 226 | protocol = split.shift().toLowerCase(); 227 | if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) { 228 | return false; 229 | } 230 | } else if (options.require_protocol) { 231 | return false; 232 | } else if (url.slice(0, 2) === '//') { 233 | if (!options.allow_protocol_relative_urls) { 234 | return false; 235 | } 236 | split[0] = url.slice(2); 237 | } 238 | url = split.join('://'); 239 | 240 | if (url === '') { 241 | return false; 242 | } 243 | 244 | split = url.split('/'); 245 | url = split.shift(); 246 | 247 | if (url === '' && !options.require_host) { 248 | return true; 249 | } 250 | 251 | split = url.split('@'); 252 | if (split.length > 1) { 253 | if (options.disallow_auth) { 254 | return false; 255 | } 256 | if (split[0] === '') { 257 | return false; 258 | } 259 | auth = split.shift(); 260 | if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) { 261 | return false; 262 | } 263 | const [user, password] = auth.split(':'); 264 | if (user === '' && password === '') { 265 | return false; 266 | } 267 | } 268 | const hostname = split.join('@'); 269 | 270 | port_str = null; 271 | ipv6 = null; 272 | // eslint-disable-next-line @typescript-eslint/naming-convention 273 | const ipv6_match = hostname.match(wrappedIpv6); 274 | if (ipv6_match) { 275 | host = ''; 276 | // eslint-disable-next-line prefer-destructuring 277 | ipv6 = ipv6_match[1]; 278 | port_str = ipv6_match[2] || null; 279 | } else { 280 | split = hostname.split(':'); 281 | host = split.shift(); 282 | if (split.length) { 283 | port_str = split.join(':'); 284 | } 285 | } 286 | 287 | if (port_str !== null && port_str.length > 0) { 288 | port = parseInt(port_str, 10); 289 | if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) { 290 | return false; 291 | } 292 | } else if (options.require_port) { 293 | return false; 294 | } 295 | 296 | if (options.host_whitelist) { 297 | return checkHost(host, options.host_whitelist); 298 | } 299 | 300 | if (host === '' && !options.require_host) { 301 | return true; 302 | } 303 | 304 | if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) { 305 | return false; 306 | } 307 | 308 | host = host || ipv6; 309 | 310 | if (options.host_blacklist && checkHost(host, options.host_blacklist)) { 311 | return false; 312 | } 313 | 314 | return true; 315 | } 316 | -------------------------------------------------------------------------------- /lib/media.ts: -------------------------------------------------------------------------------- 1 | import fields from './fields'; 2 | import { removeNestedUndefinedValues } from './utils'; 3 | import type { 4 | ImageObject, 5 | MusicSongObject, 6 | OgObjectInternal, 7 | TwitterImageObject, 8 | TwitterPlayerObject, 9 | VideoObject, 10 | } from './types'; 11 | 12 | const mediaMapperTwitterImage = (item: TwitterImageObject[]) => ({ 13 | alt: item[3], 14 | height: item[2], 15 | url: item[0], 16 | width: item[1], 17 | }); 18 | 19 | const mediaMapperTwitterPlayer = (item: TwitterPlayerObject[]) => ({ 20 | height: item[2], 21 | stream: item[3], 22 | url: item[0], 23 | width: item[1], 24 | }); 25 | 26 | const mediaMapperMusicSong = (item: MusicSongObject[]) => ({ 27 | disc: item[2], 28 | track: item[1], 29 | url: item[0], 30 | }); 31 | 32 | const mediaMapper = (item: ImageObject[] | VideoObject[]) => ({ 33 | height: item[2], 34 | type: item[3], 35 | url: item[0], 36 | width: item[1], 37 | alt: item[4], 38 | }); 39 | 40 | const mediaSorter = ( 41 | a: ImageObject | TwitterImageObject | VideoObject | TwitterPlayerObject, 42 | b: ImageObject | TwitterImageObject | VideoObject | TwitterPlayerObject, 43 | ) => { 44 | if (!(a.url && b.url)) { 45 | return 0; 46 | } 47 | 48 | const aRes = a.url.match(/\.(\w{2,5})$/); 49 | const aExt = (aRes?.[1].toLowerCase()) ?? null; 50 | const bRes = b.url.match(/\.(\w{2,5})$/); 51 | const bExt = (bRes?.[1].toLowerCase()) ?? null; 52 | 53 | if (aExt === 'gif' && bExt !== 'gif') { 54 | return -1; 55 | } if (aExt !== 'gif' && bExt === 'gif') { 56 | return 1; 57 | } 58 | return Math.max(b.width ?? 0, b.height ?? 0) - Math.max(a.width ?? 0, a.height ?? 0); 59 | }; 60 | 61 | const mediaSorterMusicSong = (a: MusicSongObject, b: MusicSongObject) => { 62 | if (!(a.track && b.track)) { 63 | return 0; 64 | } if ((a.disc ?? 0) > (b.disc ?? 0)) { 65 | return 1; 66 | } if ((a.disc ?? 0) < (b.disc ?? 0)) { 67 | return -1; 68 | } 69 | return a.track - b.track; 70 | }; 71 | 72 | // lodash zip replacement 73 | const zip = (array: any, ...args: any) => { 74 | if (array === undefined) return []; 75 | return array 76 | .map((value: any, idx: number) => [value, ...args.map((arr: []) => arr[idx])]); 77 | }; 78 | 79 | /** 80 | * formats the multiple media values 81 | * 82 | * @param {object} ogObject - the current ogObject 83 | * @param {object} options - options for ogs 84 | * @return {object} object with ogs results with updated media values 85 | * 86 | */ 87 | export function mediaSetup(ogObject: OgObjectInternal) { 88 | // sets ogImage property/width/height/type to empty array if one these exists 89 | if ( 90 | ogObject.ogImageSecureURL 91 | ?? ogObject.ogImageURL 92 | ?? ogObject.ogImageProperty 93 | ?? ogObject.ogImageWidth 94 | ?? ogObject.ogImageHeight 95 | ?? ogObject.ogImageType 96 | ?? ogObject.ogImageAlt 97 | ) { 98 | ogObject.ogImageSecureURL = ogObject.ogImageSecureURL ? ogObject.ogImageSecureURL : []; 99 | ogObject.ogImageURL = ogObject.ogImageURL ? ogObject.ogImageURL : []; 100 | ogObject.ogImageProperty = ogObject.ogImageProperty ? ogObject.ogImageProperty : []; 101 | // set ogImageProperty to ogImageSecureURL if it exists 102 | // eslint-disable-next-line max-len 103 | ogObject.ogImageProperty = (ogObject.ogImageSecureURL.length !== 0) ? ogObject.ogImageSecureURL : ogObject.ogImageProperty; 104 | // fall back to ogImageURL if ogImageProperty isn't set 105 | ogObject.ogImageProperty = (ogObject.ogImageProperty.length !== 0) ? ogObject.ogImageProperty : ogObject.ogImageURL; 106 | ogObject.ogImageWidth = ogObject.ogImageWidth ? ogObject.ogImageWidth : []; 107 | ogObject.ogImageHeight = ogObject.ogImageHeight ? ogObject.ogImageHeight : []; 108 | ogObject.ogImageType = ogObject.ogImageType ? ogObject.ogImageType : []; 109 | ogObject.ogImageAlt = ogObject.ogImageAlt ? ogObject.ogImageAlt : []; 110 | } 111 | 112 | // format images and limit to 10 113 | const ogImages: ImageObject[] = zip( 114 | ogObject.ogImageProperty, 115 | ogObject.ogImageWidth, 116 | ogObject.ogImageHeight, 117 | ogObject.ogImageType, 118 | ogObject.ogImageAlt, 119 | ) 120 | .map(mediaMapper) 121 | .filter((value:ImageObject) => value.url !== undefined && value.url !== '') 122 | .filter((value:ImageObject, index:number) => index < 10) 123 | .sort(mediaSorter); 124 | 125 | // sets ogVideo property/width/height/type to empty array if one these exists 126 | if (ogObject.ogVideoProperty ?? ogObject.ogVideoWidth ?? ogObject.ogVideoHeight ?? ogObject.ogVideoType) { 127 | ogObject.ogVideoProperty = ogObject.ogVideoProperty ? ogObject.ogVideoProperty : []; 128 | ogObject.ogVideoWidth = ogObject.ogVideoWidth ? ogObject.ogVideoWidth : []; 129 | ogObject.ogVideoHeight = ogObject.ogVideoHeight ? ogObject.ogVideoHeight : []; 130 | ogObject.ogVideoType = ogObject.ogVideoType ? ogObject.ogVideoType : []; 131 | } 132 | 133 | // format videos and limit to 10 134 | const ogVideos: VideoObject[] = zip( 135 | ogObject.ogVideoProperty, 136 | ogObject.ogVideoWidth, 137 | ogObject.ogVideoHeight, 138 | ogObject.ogVideoType, 139 | ) 140 | .map(mediaMapper) 141 | .filter((value:VideoObject) => value.url !== undefined && value.url !== '') 142 | .filter((value:VideoObject, index:number) => index < 10) 143 | .sort(mediaSorter); 144 | 145 | // sets twitter image src/property/width/height/alt to empty array if one these exists 146 | if ( 147 | ogObject.twitterImageSrc 148 | ?? ogObject.twitterImageProperty 149 | ?? ogObject.twitterImageWidth 150 | ?? ogObject.twitterImageHeight 151 | ?? ogObject.twitterImageAlt 152 | ) { 153 | ogObject.twitterImageSrc = ogObject.twitterImageSrc ? ogObject.twitterImageSrc : []; 154 | // eslint-disable-next-line max-len 155 | ogObject.twitterImageProperty = ogObject.twitterImageProperty ? ogObject.twitterImageProperty : ogObject.twitterImageSrc; // deafult to twitterImageSrc 156 | ogObject.twitterImageWidth = ogObject.twitterImageWidth ? ogObject.twitterImageWidth : []; 157 | ogObject.twitterImageHeight = ogObject.twitterImageHeight ? ogObject.twitterImageHeight : []; 158 | ogObject.twitterImageAlt = ogObject.twitterImageAlt ? ogObject.twitterImageAlt : []; 159 | } 160 | 161 | // format twitter images and limit to 10 162 | const twitterImages: TwitterImageObject[] = zip( 163 | ogObject.twitterImageProperty, 164 | ogObject.twitterImageWidth, 165 | ogObject.twitterImageHeight, 166 | ogObject.twitterImageAlt, 167 | ) 168 | .map(mediaMapperTwitterImage) 169 | .filter((value:TwitterImageObject) => value.url !== undefined && value.url !== '') 170 | .filter((value:TwitterImageObject, index:number) => index < 10) 171 | .sort(mediaSorter); 172 | 173 | // sets twitter property/width/height/stream to empty array if one these exists 174 | if (ogObject.twitterPlayerProperty 175 | ?? ogObject.twitterPlayerWidth 176 | ?? ogObject.twitterPlayerHeight 177 | ?? ogObject.twitterPlayerStream 178 | ) { 179 | ogObject.twitterPlayerProperty = ogObject.twitterPlayerProperty ? ogObject.twitterPlayerProperty : []; 180 | ogObject.twitterPlayerWidth = ogObject.twitterPlayerWidth ? ogObject.twitterPlayerWidth : []; 181 | ogObject.twitterPlayerHeight = ogObject.twitterPlayerHeight ? ogObject.twitterPlayerHeight : []; 182 | ogObject.twitterPlayerStream = ogObject.twitterPlayerStream ? ogObject.twitterPlayerStream : []; 183 | } 184 | 185 | // format twitter player and limit to 10 186 | const twitterPlayers: TwitterPlayerObject[] = zip( 187 | ogObject.twitterPlayerProperty, 188 | ogObject.twitterPlayerWidth, 189 | ogObject.twitterPlayerHeight, 190 | ogObject.twitterPlayerStream, 191 | ).map(mediaMapperTwitterPlayer) 192 | .filter((value:TwitterPlayerObject) => value.url !== undefined && value.url !== '') 193 | .filter((value:TwitterPlayerObject, index:number) => index < 10) 194 | .sort(mediaSorter); 195 | 196 | // sets music property/songTrack/songDisc to empty array if one these exists 197 | if (ogObject.musicSongProperty ?? ogObject.musicSongTrack ?? ogObject.musicSongDisc ?? ogObject.musicSongUrl) { 198 | ogObject.musicSongUrl = ogObject.musicSongUrl ? ogObject.musicSongUrl : []; 199 | ogObject.musicSongProperty = ogObject.musicSongProperty ? ogObject.musicSongProperty : ogObject.musicSongUrl; // deafult to musicSongUrl 200 | ogObject.musicSongTrack = ogObject.musicSongTrack ? ogObject.musicSongTrack : []; 201 | ogObject.musicSongDisc = ogObject.musicSongDisc ? ogObject.musicSongDisc : []; 202 | } 203 | 204 | // format music songs and limit to 10 205 | const musicSongs: MusicSongObject[] = zip(ogObject.musicSongProperty, ogObject.musicSongTrack, ogObject.musicSongDisc) 206 | .map(mediaMapperMusicSong) 207 | .filter((value:MusicSongObject) => value.url !== undefined && value.url !== '') 208 | .filter((value:MusicSongObject, index:number) => index < 10) 209 | .sort(mediaSorterMusicSong); 210 | 211 | // remove old values since everything will live under the main property 212 | fields.filter((item) => (item.multiple && item.fieldName?.match('(ogImage|ogVideo|twitter|musicSong).*'))) 213 | .forEach((item) => { 214 | delete ogObject[item.fieldName]; 215 | }); 216 | 217 | if (ogImages.length) ogObject.ogImage = ogImages; 218 | if (ogVideos.length) ogObject.ogVideo = ogVideos; 219 | if (twitterImages.length) ogObject.twitterImage = twitterImages; 220 | if (twitterPlayers.length) ogObject.twitterPlayer = twitterPlayers; 221 | if (musicSongs.length) ogObject.musicSong = musicSongs; 222 | 223 | // removes any undefs 224 | ogObject = removeNestedUndefinedValues(ogObject); 225 | 226 | return ogObject; 227 | } 228 | 229 | export default mediaSetup; 230 | -------------------------------------------------------------------------------- /lib/openGraphScraper.ts: -------------------------------------------------------------------------------- 1 | import extractMetaTags from './extract'; 2 | import requestAndResultsFormatter from './request'; 3 | import { 4 | defaultUrlValidatorSettings, 5 | isCustomMetaTagsValid, 6 | isThisANonHTMLUrl, 7 | optionSetup, 8 | validateAndFormatURL, 9 | } from './utils'; 10 | import type { OpenGraphScraperOptions } from './types'; 11 | 12 | /** 13 | * sets up options for the fetch request and calls extract on html 14 | * 15 | * @param {object} options - options for ogs 16 | * @return {object} object with ogs results 17 | * 18 | */ 19 | export default async function setOptionsAndReturnOpenGraphResults(ogsOptions: OpenGraphScraperOptions) { 20 | const { options } = optionSetup(ogsOptions); 21 | 22 | if (options.html && options.url) throw new Error('Must specify either `url` or `html`, not both'); 23 | 24 | if (!isCustomMetaTagsValid(options.customMetaTags ?? [])) throw new Error('Invalid Custom Meta Tags'); 25 | 26 | if (options.html) { 27 | const ogObject = extractMetaTags(options.html, options); 28 | ogObject.success = true; 29 | return { ogObject, response: { body: options.html }, html: options.html }; 30 | } 31 | 32 | const formattedUrl = validateAndFormatURL(options.url ?? '', (options.urlValidatorSettings ?? defaultUrlValidatorSettings)); 33 | 34 | if (!formattedUrl.url) throw new Error('Invalid URL'); 35 | 36 | options.url = formattedUrl.url; 37 | 38 | // trying to limit non html pages 39 | if (isThisANonHTMLUrl(options.url)) throw new Error('Must scrape an HTML page'); 40 | 41 | // eslint-disable-next-line max-len 42 | if (options?.blacklist?.some((blacklistedHostname) => options.url?.includes(blacklistedHostname))) { 43 | throw new Error('Host name has been black listed'); 44 | } 45 | 46 | try { 47 | const { body, response } = await requestAndResultsFormatter(options); 48 | const ogObject = extractMetaTags(body, options); 49 | 50 | ogObject.requestUrl = options.url; 51 | 52 | return { ogObject, response, html: body }; 53 | } catch (exception: any) { 54 | if (exception && (exception.code === 'ENOTFOUND' || exception.code === 'EHOSTUNREACH' || exception.code === 'ENETUNREACH')) { 55 | throw new Error('Page not found'); 56 | } else if (exception && (exception.name === 'AbortError')) { 57 | throw new Error('The operation was aborted due to timeout'); 58 | } 59 | if (exception instanceof Error) throw exception; 60 | throw new Error('Page not found'); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lib/request.ts: -------------------------------------------------------------------------------- 1 | import { fetch } from 'undici'; 2 | import { decode } from 'iconv-lite'; 3 | import { CheerioAPI, load } from 'cheerio'; 4 | import chardet from 'chardet'; 5 | import type { OpenGraphScraperOptions } from './types'; 6 | 7 | /** 8 | * checks if an element exists 9 | */ 10 | const doesElementExist = (selector:string, attribute:string, $: CheerioAPI) => ( 11 | $(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0 12 | ); 13 | 14 | /** 15 | * gets the charset of the html 16 | */ 17 | function getCharset(body: string, buffer: ArrayBuffer, $: CheerioAPI) { 18 | if (doesElementExist('meta', 'charset', $)) { 19 | return $('meta').attr('charset'); 20 | } 21 | if (doesElementExist('head > meta[name="charset"]', 'content', $)) { 22 | return $('head > meta[name="charset"]').attr('content'); 23 | } 24 | if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) { 25 | const content = $('head > meta[http-equiv="content-type"]').attr('content') ?? ''; 26 | const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i; 27 | 28 | if (charsetRegEx.test(content)) { 29 | const charsetRegExExec = charsetRegEx.exec(content); 30 | if (charsetRegExExec?.[1]) return charsetRegExExec[1]; 31 | } 32 | } 33 | if (body) { 34 | return chardet.detect(Buffer.from(buffer)); 35 | } 36 | 37 | return 'utf-8'; 38 | } 39 | 40 | /** 41 | * performs the fetch request and formats the body for ogs 42 | * 43 | * @param {object} options - options for ogs 44 | * @return {object} formatted request body and response 45 | * 46 | */ 47 | export default async function requestAndResultsFormatter(options: OpenGraphScraperOptions) { 48 | let body; 49 | let response; 50 | try { 51 | // eslint-disable-next-line no-control-regex 52 | const isLatin1 = /^[\u0000-\u00ff]{0,}$/; 53 | 54 | let url = options.url ?? ''; 55 | if (!isLatin1.test(url)) url = encodeURI(url); 56 | 57 | response = await fetch( 58 | url ?? '', 59 | { 60 | signal: AbortSignal.timeout((options.timeout ?? 10) * 1000), 61 | ...options.fetchOptions, 62 | headers: { Origin: url ?? '', Accept: 'text/html', ...options.fetchOptions?.headers }, 63 | }, 64 | ); 65 | 66 | const bodyArrayBuffer = await response.arrayBuffer(); 67 | const bodyText = Buffer.from(bodyArrayBuffer).toString('utf-8'); 68 | const charset = getCharset(bodyText, bodyArrayBuffer, load(bodyText)) ?? 'utf-8'; 69 | if (charset.toLowerCase() === 'utf-8') { 70 | body = bodyText; 71 | } else { 72 | body = decode(Buffer.from(bodyArrayBuffer), charset); 73 | } 74 | 75 | const contentType = response?.headers?.get('content-type')?.toLowerCase(); 76 | if (contentType && !contentType.includes('text/')) { 77 | throw new Error('Page must return a header content-type with text/'); 78 | } 79 | if (response?.status && (response.status.toString().startsWith('4') || response.status.toString().startsWith('5'))) { 80 | switch (response.status) { 81 | case 400: 82 | throw new Error('400 Bad Request'); 83 | case 401: 84 | throw new Error('401 Unauthorized'); 85 | case 403: 86 | throw new Error('403 Forbidden'); 87 | case 404: 88 | throw new Error('404 Not Found'); 89 | case 408: 90 | throw new Error('408 Request Timeout'); 91 | case 410: 92 | throw new Error('410 Gone'); 93 | case 500: 94 | throw new Error('500 Internal Server Error'); 95 | case 502: 96 | throw new Error('502 Bad Gateway'); 97 | case 503: 98 | throw new Error('503 Service Unavailable'); 99 | case 504: 100 | throw new Error('504 Gateway Timeout'); 101 | default: 102 | throw new Error('Server has returned a 400/500 error code'); 103 | } 104 | } 105 | 106 | if (body === undefined || body === '') { 107 | throw new Error('Page not found'); 108 | } 109 | } catch (error) { 110 | if (error instanceof Error && error.message === 'fetch failed') throw error.cause; 111 | throw error; 112 | } 113 | 114 | return { body, response }; 115 | } 116 | -------------------------------------------------------------------------------- /lib/types.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | import type { RequestInit } from 'undici'; 3 | 4 | export interface SuccessResult { 5 | error: false; 6 | html: string; 7 | response: object; 8 | result: OgObject; 9 | } 10 | 11 | export interface ErrorResult { 12 | error: true; 13 | html: undefined; 14 | response: undefined; 15 | result: OgObject; 16 | } 17 | 18 | export type OnlyGetOpenGraphInfoItem = 'image' | 'title' | 'description' | 'locale' | 'logo' | 'url' | 'favicon' | 'audioUrl' | 'date'; 19 | 20 | /** 21 | * The options used by Open Graph Scraper 22 | * 23 | * @typeParam {string} url - URL of the site. (Required) 24 | * @typeParam {string} [html] - You can pass in an HTML string to run ogs on it. (use without options.url) 25 | * @typeParam {string[]} [blacklist] - Pass in an array of sites you don't want ogs to run on. 26 | * @typeParam {boolean | OnlyGetOpenGraphInfoItem[]} [onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else. 27 | * @typeParam {CustomMetaTags} [customMetaTags] - Here you can define custom meta tags you want to scrape. 28 | * @typeParam {Request} [fetchOptions] - The options passed into fetch. 29 | * @typeParam {number} [timeout] - Number of seconds before the fetch request ends. (default is 10 seconds) 30 | * @typeParam {ValidatorSettings} [urlValidatorSettings] - Sets the options used by validator.js for testing the URL 31 | */ 32 | export interface OpenGraphScraperOptions { 33 | blacklist?: string[]; 34 | customMetaTags?: CustomMetaTags[]; 35 | fetchOptions?: RequestInit; 36 | html?: string; 37 | onlyGetOpenGraphInfo?: boolean | OnlyGetOpenGraphInfoItem[]; 38 | timeout?: number; 39 | url?: string; 40 | urlValidatorSettings?: ValidatorSettings; 41 | jsonLDOptions?: JSONLDOptions; 42 | } 43 | 44 | /** 45 | * Options for isURL method in Validator.js 46 | * 47 | * @typeParam allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed 48 | * @typeParam protocols - valid protocols can be modified with this option 49 | * @typeParam require_host - if set as false isURL will not check if host is present in the URL 50 | * @typeParam require_port - if set as true isURL will check if port is present in the URL 51 | * @typeParam require_protocol - if set as true isURL will return false if protocol is not present in the URL 52 | * @typeParam require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option 53 | * @typeParam validate_length - if set as false isURL will skip string length validation (IE maximum is 2083) 54 | * 55 | */ 56 | export interface ValidatorSettings { 57 | allow_fragments: boolean; 58 | allow_protocol_relative_urls: boolean; 59 | allow_query_components: boolean; 60 | allow_trailing_dot: boolean; 61 | allow_underscores: boolean; 62 | protocols: string[]; 63 | require_host: boolean; 64 | require_port: boolean; 65 | require_protocol: boolean; 66 | require_tld: boolean; 67 | require_valid_protocol: boolean; 68 | validate_length: boolean; 69 | } 70 | 71 | /** 72 | * Options for the JSON-LD parser 73 | */ 74 | export interface JSONLDOptions { 75 | throwOnJSONParseError?: boolean; 76 | logOnJSONParseError?: boolean; 77 | } 78 | 79 | /** 80 | * The type for user defined custom meta tags you want to scrape. 81 | * 82 | * @typeParam {boolean} multiple - is there more than one of these tags on a page (normally this is false) 83 | * @typeParam {string} property - meta tag name/property attribute 84 | * @typeParam {string} fieldName - name of the result variable 85 | */ 86 | export interface CustomMetaTags { 87 | fieldName: string; 88 | multiple: boolean; 89 | property: string; 90 | } 91 | 92 | export interface TwitterImageObject { 93 | alt?: string; 94 | height?: number; 95 | url: string; 96 | width?: number; 97 | } 98 | 99 | export interface TwitterPlayerObject { 100 | height?: number; 101 | stream?: string; 102 | url: string; 103 | width?: number; 104 | } 105 | 106 | export interface ImageObject { 107 | height?: number; 108 | type?: string; 109 | url: string; 110 | width?: number; 111 | alt?: string 112 | } 113 | 114 | export interface VideoObject { 115 | height?: number; 116 | type?: string; 117 | url: string; 118 | width?: number; 119 | } 120 | 121 | export interface MusicSongObject { 122 | disc?: string; 123 | track?: number; 124 | url: string; 125 | } 126 | 127 | export interface OgObjectInternal { 128 | alAndroidAppName?: string; 129 | alAndroidClass?: string; 130 | alAndroidPackage?: string; 131 | alAndroidUrl?: string; 132 | alIosAppName?: string; 133 | alIosAppStoreId?: string; 134 | alIosUrl?: string; 135 | alIpadAppName?: string; 136 | alIpadAppStoreId?: string; 137 | alIpadUrl?: string; 138 | alIphoneAppName?: string; 139 | alIphoneAppStoreId?: string; 140 | alIphoneUrl?: string; 141 | alWebShouldFallback?: string; 142 | alWebUrl?: string; 143 | alWindowsAppId?: string; 144 | alWindowsAppName?: string; 145 | alWindowsPhoneAppId?: string; 146 | alWindowsPhoneAppName?: string; 147 | alWindowsPhoneUrl?: string; 148 | alWindowsUniversalAppId?: string; 149 | alWindowsUniversalAppName?: string; 150 | alWindowsUniversalUrl?: string; 151 | alWindowsUrl?: string; 152 | articleAuthor?: string; 153 | articleExpirationTime?: string; 154 | articleModifiedTime?: string; 155 | articlePublishedDate?: string; 156 | articlePublishedTime?: string; 157 | articleModifiedDate?: string; 158 | articlePublisher?: string; 159 | articleSection?: string; 160 | articleTag?: string; 161 | author?: string; 162 | bookAuthor?: string; 163 | bookCanonicalName?: string; 164 | bookIsbn?: string; 165 | bookReleaseDate?: string; 166 | booksBook?: string; 167 | booksRatingScale?: string; 168 | booksRatingValue?: string; 169 | bookTag?: string; 170 | businessContactDataCountryName?: string; 171 | businessContactDataLocality?: string; 172 | businessContactDataPostalCode?: string; 173 | businessContactDataRegion?: string; 174 | businessContactDataStreetAddress?: string; 175 | charset?: string; 176 | customMetaTags?: Record 177 | dcContributor?: string; 178 | dcCoverage?: string; 179 | dcCreator?: string; 180 | dcDate?: string; 181 | dcDateCreated?: string; 182 | dcDateIssued?: string; 183 | dcDescription?: string; 184 | dcFormatMedia?: string; 185 | dcFormatSize?: string; 186 | dcIdentifier?: string; 187 | dcLanguage?: string; 188 | dcPublisher?: string; 189 | dcRelation?: string; 190 | dcRights?: string; 191 | dcSource?: string; 192 | dcSubject?: string; 193 | dcTitle?: string; 194 | dcType?: string; 195 | error?: string; 196 | errorDetails?: Error; 197 | favicon?: string; 198 | fbAppId?: string; 199 | jsonLD?: object[]; 200 | modifiedTime?: string; 201 | musicAlbum?: string; 202 | musicAlbumDisc?: string; 203 | musicAlbumTrack?: string; 204 | musicAlbumUrl?: string; 205 | musicCreator?: string; 206 | musicDuration?: string; 207 | musicMusician?: string; 208 | musicPlaylist?: string; 209 | musicRadioStation?: string; 210 | musicReleaseDate?: string; 211 | musicSong?: MusicSongObject[]; 212 | musicSongDisc?: string[]; 213 | musicSongProperty?: string[]; 214 | musicSongTrack?: string[]; 215 | musicSongUrl?: string[]; 216 | ogArticleAuthor?: string; 217 | ogArticleExpirationTime?: string; 218 | ogArticleModifiedTime?: string; 219 | ogArticlePublishedTime?: string; 220 | ogArticlePublisher?: string; 221 | ogArticleSection?: string; 222 | ogArticleTag?: string; 223 | ogAudio?: string; 224 | ogAudioSecureURL?: string; 225 | ogAudioType?: string; 226 | ogAudioURL?: string; 227 | ogAvailability?: string; 228 | ogDate?: string; 229 | ogDescription?: string; 230 | ogDeterminer?: string; 231 | ogEpisode?: string; 232 | ogImage?: ImageObject[]; 233 | ogImageAlt?: string[]; 234 | ogImageHeight?: string[]; 235 | ogImageProperty?: string[]; 236 | ogImageSecureURL?: string[]; 237 | ogImageType?: string[]; 238 | ogImageURL?: string[]; 239 | ogImageWidth?: string[]; 240 | ogLocale?: string; 241 | ogLocaleAlternate?: string; 242 | ogLogo?: string; 243 | ogMovie?: string; 244 | ogPriceAmount?: string; 245 | ogPriceCurrency?: string; 246 | ogProductAvailability?: string; 247 | ogProductCondition?: string; 248 | ogProductPriceAmount?: string; 249 | ogProductPriceCurrency?: string; 250 | ogProductRetailerItemId?: string; 251 | ogSiteName?: string; 252 | ogTitle?: string; 253 | ogType?: string; 254 | ogUrl?: string; 255 | ogVideo?: VideoObject[]; 256 | ogVideoActor?: string; 257 | ogVideoActorId?: string; 258 | ogVideoActorRole?: string; 259 | ogVideoDirector?: string; 260 | ogVideoDuration?: string; 261 | ogVideoHeight?: string[]; 262 | ogVideoOther?: string; 263 | ogVideoProperty?: string[]; 264 | ogVideoReleaseDate?: string; 265 | ogVideoSecureURL?: string; 266 | ogVideoSeries?: string; 267 | ogVideoTag?: string; 268 | ogVideoTvShow?: string; 269 | ogVideoType?: string[]; 270 | ogVideoWidth?: string[]; 271 | ogVideoWriter?: string; 272 | ogWebsite?: string; 273 | placeLocationLatitude?: string; 274 | placeLocationLongitude?: string; 275 | profileFirstName?: string; 276 | profileGender?: string; 277 | profileLastName?: string; 278 | profileUsername?: string; 279 | publishedTime?: string; 280 | releaseDate?: string; 281 | requestUrl?: string; 282 | restaurantContactInfoCountryName?: string; 283 | restaurantContactInfoEmail?: string; 284 | restaurantContactInfoLocality?: string; 285 | restaurantContactInfoPhoneNumber?: string; 286 | restaurantContactInfoPostalCode?: string; 287 | restaurantContactInfoRegion?: string; 288 | restaurantContactInfoStreetAddress?: string; 289 | restaurantContactInfoWebsite?: string; 290 | restaurantMenu?: string; 291 | restaurantRestaurant?: string; 292 | restaurantSection?: string; 293 | restaurantVariationPriceAmount?: string; 294 | restaurantVariationPriceCurrency?: string; 295 | success?: boolean; 296 | twitterAccount?: string; 297 | twitterAppIdGooglePlay?: string; 298 | twitterAppIdiPad?: string; 299 | twitterAppIdiPhone?: string; 300 | twitterAppNameGooglePlay?: string; 301 | twitterAppNameiPad?: string; 302 | twitterAppNameiPhone?: string; 303 | twitterAppUrlGooglePlay?: string; 304 | twitterAppUrliPad?: string; 305 | twitterAppUrliPhone?: string; 306 | twitterCard?: string; 307 | twitterCreator?: string; 308 | twitterCreatorId?: string; 309 | twitterDescription?: string; 310 | twitterImage?: TwitterImageObject[]; 311 | twitterImageAlt?: string[]; 312 | twitterImageHeight?: string[]; 313 | twitterImageProperty?: string[]; 314 | twitterImageSrc?: string[]; 315 | twitterImageWidth?: string[]; 316 | twitterPlayer?: TwitterPlayerObject[]; 317 | twitterPlayerHeight?: string[]; 318 | twitterPlayerProperty?: string[]; 319 | twitterPlayerStream?: string[]; 320 | twitterPlayerStreamContentType?: string; 321 | twitterPlayerWidth?: string[]; 322 | twitterSite?: string; 323 | twitterSiteId?: string; 324 | twitterTitle?: string; 325 | twitterUrl?: string; 326 | updatedTime?: string; 327 | } 328 | 329 | // Omit values from mediaMapperProperties 330 | export type OgObject = Omit< 331 | OgObjectInternal, 332 | 'musicSongDisc' | 333 | 'musicSongProperty' | 334 | 'musicSongTrack' | 335 | 'musicSongUrl' | 336 | 'ogImageAlt' | 337 | 'ogImageHeight' | 338 | 'ogImageProperty' | 339 | 'ogImageSecureURL' | 340 | 'ogImageType' | 341 | 'ogImageURL' | 342 | 'ogImageWidth' | 343 | 'ogVideoHeight' | 344 | 'ogVideoProperty' | 345 | 'ogVideoType' | 346 | 'ogVideoWidth' | 347 | 'twitterImageAlt' | 348 | 'twitterImageHeight' | 349 | 'twitterImageProperty' | 350 | 'twitterImageSrc' | 351 | 'twitterImageWidth' | 352 | 'twitterPlayerHeight' | 353 | 'twitterPlayerProperty' | 354 | 'twitterPlayerStream' | 355 | 'twitterPlayerWidth' 356 | >; 357 | -------------------------------------------------------------------------------- /lib/utils.ts: -------------------------------------------------------------------------------- 1 | import isUrl from './isUrl'; 2 | import type { 3 | CustomMetaTags, 4 | OgObjectInternal, 5 | OpenGraphScraperOptions, 6 | ValidatorSettings, 7 | } from './types'; 8 | 9 | export const defaultUrlValidatorSettings = { 10 | allow_fragments: true, 11 | allow_protocol_relative_urls: false, 12 | allow_query_components: true, 13 | allow_trailing_dot: false, 14 | allow_underscores: false, 15 | protocols: ['http', 'https'], 16 | require_host: true, 17 | require_port: false, 18 | require_protocol: false, 19 | require_tld: true, 20 | require_valid_protocol: true, 21 | validate_length: true, 22 | }; 23 | 24 | /** 25 | * Checks if URL is valid 26 | * 27 | * @param {string} url - url to be checked 28 | * @param {string} urlValidatorSettings - settings used by validator 29 | * @return {boolean} boolean value if the url is valid 30 | * 31 | */ 32 | export function isUrlValid(url: string, urlValidatorSettings: ValidatorSettings): boolean { 33 | return typeof url === 'string' && url.length > 0 && isUrl(url, urlValidatorSettings); 34 | } 35 | 36 | /** 37 | * Forces url to start with http:// if it doesn't 38 | * 39 | * @param {string} url - url to be updated 40 | * @return {string} url that starts with http 41 | * 42 | */ 43 | const coerceUrl = (url: string): string => (/^(f|ht)tps?:\/\//i.test(url) ? url : `http://${url}`); 44 | 45 | /** 46 | * Validates and formats url 47 | * 48 | * @param {string} url - url to be checked and formatted 49 | * @param {string} urlValidatorSettings - settings used by validator 50 | * @return {string} proper url or null 51 | * 52 | */ 53 | export function validateAndFormatURL(url: string, urlValidatorSettings: ValidatorSettings): { url: string | null } { 54 | return { url: isUrlValid(url, urlValidatorSettings) ? coerceUrl(url) : null }; 55 | } 56 | 57 | /** 58 | * Finds the image type from a given url 59 | * 60 | * @param {string} url - url to be checked 61 | * @return {string} image type from url 62 | * 63 | */ 64 | export function findImageTypeFromUrl(url: string): string { 65 | let type: string = url.split('.').pop() ?? ''; 66 | [type] = type.split('?'); 67 | return type; 68 | } 69 | 70 | /** 71 | * Checks if image type is valid 72 | * 73 | * @param {string} type - type to be checked 74 | * @return {boolean} boolean value if type is value 75 | * 76 | */ 77 | export function isImageTypeValid(type: string): boolean { 78 | const validImageTypes: string[] = ['apng', 'bmp', 'gif', 'ico', 'cur', 'jpg', 'jpeg', 'jfif', 'pjpeg', 'pjp', 'png', 'svg', 'tif', 'tiff', 'webp']; 79 | return validImageTypes.includes(type); 80 | } 81 | 82 | /** 83 | * Checks if URL is a non html page 84 | * 85 | * @param {string} url - url to be checked 86 | * @return {boolean} boolean value if url is non html 87 | * 88 | */ 89 | export function isThisANonHTMLUrl(url: string): boolean { 90 | const invalidImageTypes: string[] = ['.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.3gp', '.avi', '.mov', '.mp4', '.m4v', '.m4a', '.mp3', '.mkv', '.ogv', '.ogm', '.ogg', '.oga', '.webm', '.wav', '.bmp', '.gif', '.jpg', '.jpeg', '.png', '.webp', '.zip', '.rar', '.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2', '.txt', '.pdf']; 91 | const extension: string = findImageTypeFromUrl(url); 92 | return invalidImageTypes.some((type: string): boolean => `.${extension}`.includes(type)); 93 | } 94 | 95 | /** 96 | * Find and delete nested undefineds 97 | * 98 | * @param {object} object - object to be cleaned 99 | * @return {object} object without nested undefineds 100 | * 101 | */ 102 | export function removeNestedUndefinedValues(object: Record): OgObjectInternal { 103 | Object.entries(object).forEach(([key, value]) => { 104 | if (value && typeof value === 'object') removeNestedUndefinedValues(value); 105 | else if (value === undefined) delete object[key]; 106 | }); 107 | return object; 108 | } 109 | 110 | /** 111 | * Split the options object into ogs and got option objects 112 | * 113 | * @param {object} options - options that need to be split 114 | * @return {object} object with nested options for ogs and got 115 | * 116 | */ 117 | export function optionSetup(ogsOptions: OpenGraphScraperOptions): { options: OpenGraphScraperOptions } { 118 | const options: OpenGraphScraperOptions = { 119 | onlyGetOpenGraphInfo: false, 120 | ...ogsOptions, 121 | }; 122 | 123 | return { options }; 124 | } 125 | 126 | /** 127 | * Checks if image type is valid 128 | * 129 | * @param {string} type - type to be checked 130 | * @return {boolean} boolean value if type is value 131 | * 132 | */ 133 | export function isCustomMetaTagsValid(customMetaTags: CustomMetaTags[]): boolean { 134 | if (!Array.isArray(customMetaTags)) return false; 135 | 136 | let result = true; 137 | customMetaTags.forEach((customMetaTag) => { 138 | if (typeof customMetaTag === 'object') { 139 | if (!('fieldName' in customMetaTag) || typeof customMetaTag.fieldName !== 'string') result = false; 140 | if (!('multiple' in customMetaTag) || typeof customMetaTag.multiple !== 'boolean') result = false; 141 | if (!('property' in customMetaTag) || typeof customMetaTag.property !== 'string') result = false; 142 | } else { 143 | result = false; 144 | } 145 | }); 146 | 147 | return result; 148 | } 149 | 150 | /** 151 | * Unescape script text. 152 | * 153 | * Certain websites escape script text within script tags, which can 154 | * interfere with `JSON.parse()`. Therefore, we need to unescape it. 155 | * 156 | * Known good escape sequences: 157 | * 158 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh 159 | * 160 | * ```js 161 | * JSON.parse('"\\u2611"'); // '☑' 162 | * ``` 163 | * 164 | * Known bad escape sequences: 165 | * 166 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh 167 | * 168 | * ```js 169 | * JSON.parse('"\\x26"'); // '&' 170 | * ``` 171 | * 172 | * @param {string} scriptText - the text of the script tag 173 | * @returns {string} unescaped script text 174 | */ 175 | export function unescapeScriptText(scriptText: string) { 176 | // https://stackoverflow.com/a/34056693 177 | return scriptText.replace(/\\x([0-9a-f]{2})/ig, (_, pair) => { 178 | const charCode = parseInt(pair, 16); 179 | if (charCode === 34) { 180 | return '\\"'; 181 | } 182 | return String.fromCharCode(charCode); 183 | }); 184 | } 185 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "open-graph-scraper", 3 | "description": "Node.js scraper module for Open Graph and Twitter Card info", 4 | "version": "6.10.0", 5 | "license": "MIT", 6 | "main": "./dist/cjs/index.js", 7 | "types": "./types/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./types/index.d.ts", 11 | "import": "./dist/esm/index.js", 12 | "require": "./dist/cjs/index.js" 13 | }, 14 | "./types": "./types/lib/types.d.ts" 15 | }, 16 | "scripts": { 17 | "build:cjs": "tsc --project tsconfig.build.json --module commonjs --outDir dist/cjs/", 18 | "build:declaration": "tsc --project tsconfig.declaration.json --module node16 --moduleResolution node16", 19 | "build:esm": "tsc --project tsconfig.build.json --module node16 --moduleResolution node16 --outDir dist/esm/", 20 | "build": "rm -rf dist/ && npm run build:cjs && npm run build:esm", 21 | "ci": "npm run eslint && npm run build && npm run build:declaration && npm run test", 22 | "eslint:fix": "eslint . --ext .js,.ts --fix", 23 | "eslint": "eslint . --ext .js,.ts", 24 | "mocha:int": "ts-mocha --recursive \"./tests/integration/**/*.spec.ts\" --timeout 10000", 25 | "mocha:unit": "nyc --reporter=html --reporter=text --exclude=tests/ ts-mocha --recursive \"./tests/unit/**/*.spec.ts\"", 26 | "prepare": "npm run snyk-protect && npm run build", 27 | "snyk-protect": "snyk-protect", 28 | "test": "npm run mocha:unit && npm run mocha:int" 29 | }, 30 | "engines": { 31 | "node": ">=18.0.0" 32 | }, 33 | "author": { 34 | "name": "Josh Shemas", 35 | "email": "jjs90jjs@gmail.com", 36 | "url": "https://github.com/jshemas" 37 | }, 38 | "dependencies": { 39 | "chardet": "^2.1.0", 40 | "cheerio": "^1.0.0-rc.12", 41 | "iconv-lite": "^0.6.3", 42 | "undici": "^6.21.2" 43 | }, 44 | "files": [ 45 | "/dist", 46 | "/types", 47 | "index.ts", 48 | "CHANGELOG.md" 49 | ], 50 | "devDependencies": { 51 | "@snyk/protect": "^1.1296.2", 52 | "@types/mocha": "^10.0.10", 53 | "@types/node": "^18.19.86", 54 | "@typescript-eslint/eslint-plugin": "^7.18.0", 55 | "@typescript-eslint/parser": "^7.18.0", 56 | "chai": "^4.5.0", 57 | "eslint": "^8.57.0", 58 | "eslint-config-airbnb-base": "^15.0.0", 59 | "eslint-config-airbnb-typescript": "^18.0.0", 60 | "eslint-plugin-import": "^2.31.0", 61 | "eslint-plugin-mocha": "^10.5.0", 62 | "eslint-plugin-promise": "^7.2.1", 63 | "mocha": "^11.1.0", 64 | "nyc": "^17.1.0", 65 | "sinon": "^19.0.2", 66 | "ts-mocha": "^11.1.0", 67 | "typescript": "5.5.4" 68 | }, 69 | "repository": { 70 | "type": "git", 71 | "url": "git+ssh://git@github.com/jshemas/openGraphScraper.git", 72 | "web": "http://github.com/jshemas/openGraphScraper" 73 | }, 74 | "keywords": [ 75 | "app links", 76 | "dublin core", 77 | "json ld", 78 | "meta tags", 79 | "metadata", 80 | "og", 81 | "ogp", 82 | "ogs", 83 | "open graph scraper", 84 | "open graph", 85 | "opengraph", 86 | "openGraphScraper", 87 | "scraper", 88 | "twitter card", 89 | "twitter" 90 | ], 91 | "snyk": true 92 | } 93 | -------------------------------------------------------------------------------- /tests/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": [ 4 | "airbnb-base", 5 | "airbnb-typescript/base", 6 | "plugin:promise/recommended", 7 | "plugin:mocha/recommended" 8 | ], 9 | "plugins": [ 10 | "mocha" 11 | ], 12 | "env": { 13 | "mocha": true 14 | }, 15 | "parserOptions": { 16 | "project": "./tsconfig.tests.json" 17 | }, 18 | "rules": { 19 | "@typescript-eslint/no-unused-expressions": 0, 20 | "func-names": 0, 21 | "import/extensions": 0, 22 | "import/no-unresolved": 0, 23 | "max-len": ["error", { 24 | "code": 120, 25 | "ignoreStrings": true, 26 | "ignoreTrailingComments": true 27 | }], 28 | "no-console": 0, 29 | "no-param-reassign": "off", 30 | "no-unused-expressions": 0, 31 | "prefer-arrow-callback": 0, 32 | "promise/always-return": 0 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /tests/integration/blacklist.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('blacklist', function () { 6 | it('when website is on the blacklist', function () { 7 | return ogs({ 8 | url: 'https://www.wikipedia.org/', 9 | blacklist: ['www.test.com', 'www.wikipedia.org'], 10 | }) 11 | .then(function () { 12 | expect().fail('this should not happen'); 13 | }) 14 | .catch(function ({ error, result, response }) { 15 | console.log('error:', error); 16 | console.log('result:', result); 17 | expect(error).to.be.eql(true); 18 | expect(result.success).to.be.eql(false); 19 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/'); 20 | expect(result.error).to.eql('Host name has been black listed'); 21 | expect(result.errorDetails.toString()).to.eql('Error: Host name has been black listed'); 22 | expect(result).to.have.all.keys( 23 | 'error', 24 | 'errorDetails', 25 | 'requestUrl', 26 | 'success', 27 | ); 28 | expect(response).to.eql(undefined); 29 | }); 30 | }); 31 | 32 | it('when website is not on the blacklist', function () { 33 | return ogs({ 34 | url: 'https://www.wikipedia.org/', 35 | blacklist: ['www.test.com', 'www.google.org'], 36 | }).then(function ({ error, result, response }) { 37 | console.log('error:', error); 38 | console.log('result:', result); 39 | expect(error).to.be.eql(false); 40 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 41 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 42 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 43 | expect(result.ogLocale).to.be.eql('en'); 44 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/'); 45 | expect(result.charset).to.be.eql('utf-8'); 46 | expect(result.success).to.be.eql(true); 47 | expect(result).to.have.all.keys( 48 | 'charset', 49 | 'favicon', 50 | 'ogDescription', 51 | 'ogImage', 52 | 'ogLocale', 53 | 'ogTitle', 54 | 'ogType', 55 | 'requestUrl', 56 | 'success', 57 | ); 58 | expect(response).to.be.an('Response'); 59 | }); 60 | }); 61 | 62 | it('when blacklist empty', function () { 63 | return ogs({ 64 | url: 'https://www.wikipedia.org/', 65 | blacklist: [], 66 | }).then(function ({ error, result, response }) { 67 | console.log('error:', error); 68 | console.log('result:', result); 69 | expect(error).to.be.eql(false); 70 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 71 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 72 | expect(result.ogLocale).to.be.eql('en'); 73 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/'); 74 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 75 | expect(result.charset).to.be.eql('utf-8'); 76 | expect(result.success).to.be.eql(true); 77 | expect(result).to.have.all.keys( 78 | 'charset', 79 | 'favicon', 80 | 'ogDescription', 81 | 'ogImage', 82 | 'ogLocale', 83 | 'ogTitle', 84 | 'ogType', 85 | 'requestUrl', 86 | 'success', 87 | ); 88 | expect(response).to.be.an('Response'); 89 | }); 90 | }); 91 | }); 92 | -------------------------------------------------------------------------------- /tests/integration/fetch.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | import { load } from 'cheerio'; 3 | 4 | import ogs from '../../index'; 5 | 6 | describe('fetch', function () { 7 | // TODO: Site keeps going offline, will need to find a new site 8 | // eslint-disable-next-line mocha/no-skipped-tests 9 | it.skip('setting the fetch headers', function () { 10 | // userAgent is undici by default 11 | const userAgent = 'Mozilla/5.0 (Linux; Android 10) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.5563.57 Mobile Safari/537.36'; 12 | return ogs({ url: 'https://www.whatsmyua.info/', fetchOptions: { headers: { 'user-agent': userAgent } } }) 13 | .then(async function ({ 14 | error, result, response, html, 15 | }) { 16 | console.log('error:', error); 17 | console.log('result:', result); 18 | expect(error).to.be.eql(false); 19 | expect(result.ogTitle).to.be.eql('What\'s my user agent?'); 20 | expect(result.ogDescription).to.be.eql('Detect user-agent, operating system, browser, and device using several libraries, including ua-parser, ua-parser-js, and platform.'); 21 | expect(result.requestUrl).to.be.eql('https://www.whatsmyua.info/'); 22 | expect(result.charset).to.be.eql('utf-8'); 23 | expect(result.success).to.be.eql(true); 24 | expect(result).to.have.all.keys( 25 | 'ogTitle', 26 | 'ogDescription', 27 | 'requestUrl', 28 | 'charset', 29 | 'success', 30 | ); 31 | expect(response).to.be.an('Response'); 32 | const $ = load(html || ''); 33 | const rawUa = $('li#rawUa').text(); 34 | expect(rawUa).to.be.eql(`rawUa: ${userAgent}`); 35 | }); 36 | }); 37 | 38 | it('setting a timeout', function () { 39 | return ogs({ url: 'https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso', timeout: 3 }) 40 | .then(function () { 41 | expect().fail('this should not happen'); 42 | }) 43 | .catch(function ({ error, result, response }) { 44 | console.log('error:', error); 45 | console.log('result:', result); 46 | expect(error).to.be.eql(true); 47 | expect(result.success).to.be.eql(false); 48 | expect(result.requestUrl).to.be.eql('https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso'); 49 | expect(result.error).to.eql('The operation was aborted due to timeout'); 50 | expect(result.errorDetails.toString()).to.eql('TimeoutError: The operation was aborted due to timeout'); 51 | expect(result).to.have.all.keys( 52 | 'error', 53 | 'errorDetails', 54 | 'requestUrl', 55 | 'success', 56 | ); 57 | expect(response).to.eql(undefined); 58 | }); 59 | }); 60 | // https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/timeout 61 | // eslint-disable-next-line mocha/no-skipped-tests 62 | it.skip('setting a timeout - using AbortSignal.timeout()', function () { 63 | return ogs({ url: 'https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso', fetchOptions: { signal: AbortSignal.timeout(3000) } }) 64 | .then(function () { 65 | expect().fail('this should not happen'); 66 | }) 67 | .catch(function ({ error, result, response }) { 68 | console.log('error:', error); 69 | console.log('result:', result); 70 | expect(error).to.be.eql(true); 71 | expect(result.success).to.be.eql(false); 72 | expect(result.requestUrl).to.be.eql('https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso'); 73 | expect(result.error).to.eql('The operation was aborted due to timeout'); 74 | expect(result.errorDetails.toString()).to.eql('Error: The operation was aborted due to timeout'); 75 | expect(result).to.have.all.keys( 76 | 'error', 77 | 'errorDetails', 78 | 'requestUrl', 79 | 'success', 80 | ); 81 | expect(response).to.eql(undefined); 82 | }); 83 | }); 84 | // https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/abort 85 | // eslint-disable-next-line mocha/no-skipped-tests 86 | it.skip('setting a timeout - using controller.abort()', function () { 87 | const controller = new AbortController(); 88 | setTimeout(() => controller.abort(), 3000); 89 | return ogs({ url: 'https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso', fetchOptions: { signal: controller.signal } }) 90 | .then(function () { 91 | expect().fail('this should not happen'); 92 | }) 93 | .catch(function ({ error, result, response }) { 94 | console.log('error:', error); 95 | console.log('result:', result); 96 | expect(error).to.be.eql(true); 97 | expect(result.success).to.be.eql(false); 98 | expect(result.requestUrl).to.be.eql('https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso'); 99 | expect(result.error).to.eql('The operation was aborted due to timeout'); 100 | expect(result.errorDetails.toString()).to.eql('Error: The operation was aborted due to timeout'); 101 | expect(result).to.have.all.keys( 102 | 'error', 103 | 'errorDetails', 104 | 'requestUrl', 105 | 'success', 106 | ); 107 | expect(response).to.eql(undefined); 108 | }); 109 | }); 110 | }); 111 | -------------------------------------------------------------------------------- /tests/integration/html.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | const HTML_STRING = ` 6 | 7 | 8 | 9 | 10 | 11 | 12 | `; 13 | 14 | describe('html', function () { 15 | it('pass in HTML string', function () { 16 | return ogs({ html: HTML_STRING }).then(function ({ error, result, response }) { 17 | console.log('error:', error); 18 | console.log('result:', result); 19 | expect(error).to.be.eql(false); 20 | expect(result.success).to.be.eql(true); 21 | expect(result.ogTitle).to.be.eql('Test page'); 22 | expect(result.charset).to.be.eql('ISO-8859-1'); 23 | expect(result).to.have.all.keys( 24 | 'charset', 25 | 'ogTitle', 26 | 'success', 27 | ); 28 | expect(response).to.be.an('object').and.to.not.be.empty; 29 | }); 30 | }); 31 | 32 | it('Invalid Call - Can\'t request URL and pass in HTML string', function () { 33 | return ogs({ 34 | url: 'https://upload.wikimedia.org/wikipedia/commons.jpg', 35 | html: HTML_STRING, 36 | }) 37 | .then(function () { 38 | expect().fail('this should not happen'); 39 | }) 40 | .catch(function ({ error, result, response }) { 41 | console.log('error:', error); 42 | console.log('result:', result); 43 | expect(error).to.be.eql(true); 44 | expect(result.success).to.be.eql(false); 45 | expect(result.requestUrl).to.be.eql('https://upload.wikimedia.org/wikipedia/commons.jpg'); 46 | expect(result.error).to.eql('Must specify either `url` or `html`, not both'); 47 | expect(result.errorDetails.toString()).to.eql('Error: Must specify either `url` or `html`, not both'); 48 | expect(result).to.have.all.keys( 49 | 'error', 50 | 'errorDetails', 51 | 'requestUrl', 52 | 'success', 53 | ); 54 | expect(response).to.eql(undefined); 55 | }); 56 | }); 57 | 58 | it('Invalid Call - Not a HTML page', function () { 59 | return ogs({ 60 | url: 'https://upload.wikimedia.org/wikipedia/commons.jpg', 61 | }) 62 | .then(function () { 63 | expect().fail('this should not happen'); 64 | }) 65 | .catch(function ({ error, result, response }) { 66 | console.log('error:', error); 67 | console.log('result:', result); 68 | expect(error).to.be.eql(true); 69 | expect(result.success).to.be.eql(false); 70 | expect(result.requestUrl).to.be.eql('https://upload.wikimedia.org/wikipedia/commons.jpg'); 71 | expect(result.error).to.eql('Must scrape an HTML page'); 72 | expect(result.errorDetails.toString()).to.eql('Error: Must scrape an HTML page'); 73 | expect(result).to.have.all.keys( 74 | 'error', 75 | 'errorDetails', 76 | 'requestUrl', 77 | 'success', 78 | ); 79 | expect(response).to.eql(undefined); 80 | }); 81 | }); 82 | }); 83 | -------------------------------------------------------------------------------- /tests/integration/image.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('image', function () { 6 | it('Test Flickr Image - Should Return correct Open Graph Info', function () { 7 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/flickr' }).then(function ({ error, result, response }) { 8 | console.log('error:', error); 9 | console.log('result:', result); 10 | expect(error).to.be.eql(false); 11 | expect(result.alIosAppName).to.be.eql('Flickr'); 12 | expect(result.alIosAppStoreId).to.be.eql('328407587'); 13 | expect(result.alIosUrl).to.be.eql('flickr://flickr.com/photos/travelgraph/18791678505'); 14 | expect(result.ogSiteName).to.be.eql('Flickr'); 15 | expect(result.twitterAppNameiPhone).to.be.eql('Flickr'); 16 | expect(result.twitterAppIdiPhone).to.be.eql('328407587'); 17 | expect(result.twitterSite).to.be.eql('@flickr'); 18 | expect(result.ogTitle).to.be.eql('Heimgarten'); 19 | expect(result.ogDescription).to.be.eql('____________________ Press "L" to view on black Press "F" to favor Share, if you like :) You can leave a comment, if you like :) Not to use or publish without permission! © Christoph Wagner Photographie'); 20 | expect(result.ogType).to.be.eql('article'); 21 | expect(result.ogUrl).to.be.eql('https://www.flickr.com/photos/travelgraph/18791678505/'); 22 | expect(result.favicon).to.be.eql('https://s.yimg.com/pw/favicon.ico'); 23 | expect(result.ogLocale).to.be.eql('en-us'); 24 | expect(result.twitterCard).to.be.eql('photo'); 25 | expect(result.twitterDescription).to.be.eql('____________________ Press "L" to view on black Press "F" to favor Share, if you like :) You can leave a comment, if you like :) Not to use or publish without permission! © Christoph Wagner Photographie'); 26 | expect(result.twitterAppUrliPhone).to.be.eql('flickr://flickr.com/photos/travelgraph/18791678505/'); 27 | expect(result.ogImage).to.be.eql([{ 28 | url: 'https://c1.staticflickr.com/1/499/18791678505_5886fefcf7_b.jpg', 29 | width: '1024', 30 | height: '375', 31 | type: 'jpg', 32 | }]); 33 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/flickr'); 34 | expect(result.charset).to.be.eql('UTF-8'); 35 | expect(result.fbAppId).to.be.eql('137206539707334'); 36 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 37 | expect(result.success).to.be.eql(true); 38 | expect(result).to.have.all.keys( 39 | 'favicon', 40 | 'jsonLD', 41 | 'fbAppId', 42 | 'alIosAppName', 43 | 'alIosAppStoreId', 44 | 'alIosUrl', 45 | 'ogDescription', 46 | 'ogImage', 47 | 'ogLocale', 48 | 'ogSiteName', 49 | 'ogTitle', 50 | 'ogType', 51 | 'ogUrl', 52 | 'requestUrl', 53 | 'success', 54 | 'charset', 55 | 'twitterAppIdiPhone', 56 | 'twitterAppNameiPhone', 57 | 'twitterAppUrliPhone', 58 | 'twitterCard', 59 | 'twitterDescription', 60 | 'twitterSite', 61 | ); 62 | expect(response).to.be.an('Response'); 63 | }); 64 | }); 65 | 66 | it('Test getting the description and images from meta tags', function () { 67 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/twitter.html' }).then(function ({ error, result, response }) { 68 | console.log('error:', error); 69 | console.log('result:', result); 70 | expect(error).to.be.eql(false); 71 | expect(result.charset).to.be.eql('utf-8'); 72 | expect(result.success).to.be.eql(true); 73 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/twitter.html'); 74 | expect(result.ogTitle).to.be.eql('Twitter. It\'s what\'s happening.'); 75 | expect(result.ogDescription).to.be.eql('From breaking news and entertainment to sports and politics, get the full story with all the live commentary.'); 76 | expect(result.ogLocale).to.be.eql('en'); 77 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20170608000236/https://twitter.com/i/hello'); 78 | expect(result.favicon).to.be.eql('//web.archive.org/web/20170608000236im_/https://abs.twimg.com/favicons/favicon.ico'); 79 | expect(result.ogImage).to.be.eql([ 80 | { 81 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/C_-sImTVYAEEJM5.jpg', 82 | type: 'jpg', 83 | }, 84 | { 85 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/tweet_video_thumb/DBp8umbVYAAWJuC.jpg', 86 | type: 'jpg', 87 | }, 88 | { 89 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/tweet_video_thumb/DBvjXJsWsAEAs90.jpg', 90 | type: 'jpg', 91 | }, 92 | { 93 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/CljfeELUoAUskW4.jpg', 94 | type: 'jpg', 95 | }, 96 | { 97 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/DBwVAyKUIAEmWVs.jpg', 98 | type: 'jpg', 99 | }, 100 | { 101 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/profile_images/508960761826131968/LnvhR8ED_bigger.png', 102 | type: 'png', 103 | }, 104 | { 105 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/profile_images/854430488777379840/zFdLhwbT_bigger.jpg', 106 | type: 'jpg', 107 | }, 108 | { 109 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/profile_images/652596362073272320/Zv6K-clv_bigger.jpg', 110 | type: 'jpg', 111 | }, 112 | { 113 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/DBrlZk4UwAA9Zq-.jpg', 114 | type: 'jpg', 115 | }, 116 | { 117 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/DBrlZk2UQAAfAkd.jpg', 118 | type: 'jpg', 119 | }, 120 | ]); 121 | expect(result).to.have.all.keys( 122 | 'favicon', 123 | 'ogDescription', 124 | 'ogImage', 125 | 'ogTitle', 126 | 'ogLocale', 127 | 'requestUrl', 128 | 'ogUrl', 129 | 'success', 130 | 'charset', 131 | ); 132 | expect(response).to.be.an('Response'); 133 | }); 134 | }); 135 | }); 136 | -------------------------------------------------------------------------------- /tests/integration/onlyGetOpenGraphInfo.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('onlyGetOpenGraphInfo', function () { 6 | // TODO: wikipedia now has og data by default, will need to find a replacement site for this test 7 | it.skip('should only get open graph info', function () { 8 | return ogs({ 9 | url: 'http://www.wikipedia.org/', 10 | onlyGetOpenGraphInfo: true, 11 | }).then(function ({ error, result, response }) { 12 | console.log('error:', error); 13 | console.log('result:', result); 14 | expect(error).to.be.eql(false); 15 | expect(result.ogTitle).to.eql(undefined); 16 | expect(result.ogDescription).to.eql(undefined); 17 | expect(result.ogImage).to.eql(undefined); 18 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/'); 19 | expect(result.success).to.be.eql(true); 20 | expect(result).to.have.all.keys( 21 | 'requestUrl', 22 | 'success', 23 | ); 24 | expect(response).to.be.an('Response'); 25 | }); 26 | }); 27 | 28 | it('should get all open graph info', function () { 29 | return ogs({ 30 | url: 'http://www.wikipedia.org/', 31 | onlyGetOpenGraphInfo: false, 32 | }).then(function ({ error, result, response }) { 33 | console.log('error:', error); 34 | console.log('result:', result); 35 | expect(error).to.be.eql(false); 36 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 37 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 38 | expect(result.ogLocale).to.be.eql('en'); 39 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/'); 40 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 41 | expect(result.charset).to.be.eql('utf-8'); 42 | expect(result.success).to.be.eql(true); 43 | expect(result).to.have.all.keys( 44 | 'charset', 45 | 'favicon', 46 | 'ogDescription', 47 | 'ogImage', 48 | 'ogLocale', 49 | 'ogTitle', 50 | 'ogType', 51 | 'requestUrl', 52 | 'success', 53 | ); 54 | expect(response).to.be.an('Response'); 55 | }); 56 | }); 57 | }); 58 | -------------------------------------------------------------------------------- /tests/integration/redirect.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('redirect', function () { 6 | context('should return correct Open Graph Info', function () { 7 | // nytimes keep blocking requests, will need to find way to bypass this 8 | it.skip('nytimes page', function () { 9 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36'; 10 | return ogs({ 11 | url: 'https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html?_r=0', 12 | fetchOptions: { headers: { 'user-agent': userAgent } }, 13 | }).then(function ({ error, result, response }) { 14 | console.log('error:', error); 15 | console.log('result:', result); 16 | expect(error).to.be.eql(false); 17 | expect(result.alAndroidUrl).to.be.eql('nyt://article/d07123d7-f6dc-5370-97cb-86dd6aa0b0de'); 18 | expect(result.alAndroidPackage).to.be.eql('com.nytimes.android'); 19 | expect(result.alAndroidAppName).to.be.eql('NYTimes'); 20 | expect(result.alIphoneUrl).to.be.eql('nytimes://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html'); 21 | expect(result.alIphoneAppStoreId).to.be.eql('284862083'); 22 | expect(result.alIphoneAppName).to.be.eql('NYTimes'); 23 | expect(result.alIpadUrl).to.be.eql('nytimes://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html'); 24 | expect(result.alIpadAppStoreId).to.be.eql('357066198'); 25 | expect(result.alIpadAppName).to.be.eql('NYTimes'); 26 | expect(result.ogUrl).to.be.eql('https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html'); 27 | expect(result.favicon).to.be.eql('/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico'); 28 | expect(result.ogType).to.be.eql('article'); 29 | expect(result.articleSection).to.be.eql('Arts'); 30 | expect(result.articleTag).to.be.eql('Blum & Poe (Los Angeles, Calif)'); 31 | expect(result.articleModifiedTime).to.be.eql('2016-09-01T17:37:39.000Z'); 32 | expect(result.articlePublishedTime).to.be.eql('2016-09-01T01:34:35.000Z'); 33 | expect(result.ogTitle).to.be.eql('Gallery Hopes to Sell Kanye West’s ‘Famous’ Sculpture for $4 Million (Published 2016)'); 34 | expect(result.ogDescription).to.be.eql('The Los Angeles gallery Blum & Poe, which hosted the ‘Famous’ exhibition, is projecting a hefty price tag for the work.'); 35 | expect(result.twitterTitle).to.be.eql('Gallery Hopes to Sell Kanye West’s ‘Famous’ Sculpture for $4 Million (Published 2016)'); 36 | expect(result.twitterDescription).to.be.eql('The Los Angeles gallery Blum & Poe, which hosted the ‘Famous’ exhibition, is projecting a hefty price tag for the work.'); 37 | expect(result.twitterCard).to.be.eql('summary_large_image'); 38 | expect(result.twitterAppNameGooglePlay).to.be.eql('NYTimes'); 39 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.nytimes.android'); 40 | expect(result.twitterAppUrlGooglePlay).to.be.eql('nyt://article/d07123d7-f6dc-5370-97cb-86dd6aa0b0de'); 41 | expect(result.ogLocale).to.be.eql('en'); 42 | expect(result.twitterSite).to.be.eql('@nytimes'); 43 | expect(result.twitterUrl).to.be.eql('https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html'); 44 | expect(result.ogImage).to.be.eql([{ 45 | url: 'https://static01.nyt.com/images/2016/09/02/arts/01KANYE1-web/01KANYE1-web-facebookJumbo.jpg?year=2016&h=550&w=1050&s=f15e16ac34e5bf83b85e4497c724e7bd5ba43994e780f23119610eba47cd726d&k=ZQJBKqZ0VN', 46 | type: 'jpg', 47 | }]); 48 | expect(result.twitterImage).to.be.eql([{ 49 | url: 'https://static01.nyt.com/images/2016/09/02/arts/01KANYE1-web/01KANYE1-web-videoSixteenByNineJumbo1600.jpg?year=2016&h=901&w=1600&s=a5f74a00775cb159c1978e3d3c89d7ea7f176aec59f6565fad3c377cf3b1bd7b&k=ZQJBKqZ0VN&tw=1', 50 | alt: 'Kim Kardashian West at the “Famous” exhibition at Blum & Poe in Los Angeles last week. The gallery is planning to sell the sculpture for a hefty price tag.', 51 | }]); 52 | expect(result.requestUrl).to.be.eql('https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html?_r=0'); 53 | expect(result.charset).to.be.eql('utf-8'); 54 | expect(result.success).to.be.eql(true); 55 | expect(result).to.have.all.keys( 56 | 'favicon', 57 | 'alAndroidAppName', 58 | 'alAndroidPackage', 59 | 'alAndroidUrl', 60 | 'alIpadAppName', 61 | 'alIpadAppStoreId', 62 | 'alIpadUrl', 63 | 'alIphoneAppName', 64 | 'alIphoneAppStoreId', 65 | 'alIphoneUrl', 66 | 'articleModifiedTime', 67 | 'articlePublishedTime', 68 | 'articleSection', 69 | 'articleTag', 70 | 'ogDate', 71 | 'ogDescription', 72 | 'ogImage', 73 | 'ogLocale', 74 | 'ogTitle', 75 | 'ogType', 76 | 'ogUrl', 77 | 'requestUrl', 78 | 'success', 79 | 'charset', 80 | 'twitterAppIdGooglePlay', 81 | 'twitterAppNameGooglePlay', 82 | 'twitterAppUrlGooglePlay', 83 | 'twitterCard', 84 | 'twitterDescription', 85 | 'twitterImage', 86 | 'twitterSite', 87 | 'twitterTitle', 88 | 'twitterUrl', 89 | ); 90 | expect(response).to.be.an('Response'); 91 | }); 92 | }); 93 | it('forbes page', function () { 94 | return ogs({ 95 | url: 'https://www.forbes.com/sites/kenkam/2017/09/28/3-stocks-like-apple-was-10-years-ago-tesla-nvidia-and-alibaba/#2636f6c2f0fa', 96 | }).then(function ({ error, result, response }) { 97 | console.log('error:', error); 98 | console.log('result:', result); 99 | expect(error).to.be.eql(false); 100 | expect(result.ogTitle).to.be.eql('3 Stocks Like Apple Was 10 Years Ago: Tesla, Nvidia And Alibaba'); 101 | expect(result.ogSiteName).to.be.eql('Forbes'); 102 | expect(result.articleAuthor).to.be.eql('Ken Kam'); 103 | expect(result.articleSection).to.be.eql('Markets'); 104 | expect(result.author).to.be.eql('Ken Kam'); 105 | expect(result.ogType).to.be.eql('article'); 106 | expect(result.ogUrl).to.be.eql('https://www.forbes.com/sites/kenkam/2017/09/28/3-stocks-like-apple-was-10-years-ago-tesla-nvidia-and-alibaba/'); 107 | expect(result.favicon).to.be.eql('https://i.forbesimg.com/48X48-F.png'); 108 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty; 109 | expect(result.twitterCard).to.be.eql('summary_large_image'); 110 | expect(result.twitterSite).to.be.eql('@forbes'); 111 | expect(result.twitterCreator).to.be.eql('@MarketocracyInc'); 112 | expect(result.ogLocale).to.be.eql('en'); 113 | expect(result.twitterTitle).to.be.eql('3 Stocks Like Apple Was 10 Years Ago: Tesla, Nvidia And Alibaba'); 114 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty; 115 | expect(result.ogImage).to.be.eql([{ 116 | url: 'https://imageio.forbes.com/specials-images/imageserve/825671570/0x0.jpg?format=jpg&height=900&width=1600&fit=bounds', 117 | type: 'image/jpeg', 118 | }]); 119 | expect(result.twitterImage).to.be.eql([{ 120 | url: 'https://imageio.forbes.com/specials-images/imageserve/825671570/0x0.jpg?format=jpg&height=600&width=1200&fit=bounds', 121 | }]); 122 | expect(result.requestUrl).to.be.eql('https://www.forbes.com/sites/kenkam/2017/09/28/3-stocks-like-apple-was-10-years-ago-tesla-nvidia-and-alibaba/#2636f6c2f0fa'); 123 | expect(result.charset).to.be.eql('utf-8'); 124 | expect(result.fbAppId).to.be.eql('123694841080850'); 125 | expect(result.success).to.be.eql(true); 126 | expect(result).to.have.all.keys( 127 | 'favicon', 128 | 'fbAppId', 129 | 'ogDate', 130 | 'articleAuthor', 131 | 'articleSection', 132 | 'author', 133 | 'ogDescription', 134 | 'ogImage', 135 | 'ogLocale', 136 | 'ogSiteName', 137 | 'ogTitle', 138 | 'ogType', 139 | 'ogUrl', 140 | 'requestUrl', 141 | 'success', 142 | 'charset', 143 | 'twitterCard', 144 | 'twitterCreator', 145 | 'twitterDescription', 146 | 'twitterImage', 147 | 'twitterSite', 148 | 'twitterTitle', 149 | ); 150 | expect(response).to.be.an('Response'); 151 | }); 152 | }); 153 | }); 154 | }); 155 | -------------------------------------------------------------------------------- /tests/integration/spotify.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('spotify', function () { 6 | it('album should return music:album and associated tags', function () { 7 | return ogs({ url: 'https://open.spotify.com/album/5EBGCvO6upi3GNknMVe9x9' }).then(function ({ error, result, response }) { 8 | console.log('error:', error); 9 | console.log('result:', result); 10 | expect(error).to.be.eql(false); 11 | expect(result.alAndroidAppName).to.be.eql('Spotify'); 12 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music'); 13 | expect(result.alAndroidUrl).to.be.eql('spotify://album/5EBGCvO6upi3GNknMVe9x9'); 14 | expect(result.alIosAppName).to.be.eql('Spotify'); 15 | expect(result.alIosAppStoreId).to.be.eql('324684580'); 16 | expect(result.alIosUrl).to.be.eql('spotify://album/5EBGCvO6upi3GNknMVe9x9'); 17 | expect(result.ogTitle).to.be.eql('ye'); 18 | // expect(result.ogDescription).to.be.eql('Album · Kanye West · 2018 · 7 songs'); 19 | // expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon.0c211e2e.png'); 20 | // expect(result.ogUrl).to.be.eql('https://open.spotify.com/album/5EBGCvO6upi3GNknMVe9x9'); 21 | expect(result.ogType).to.be.eql('music.album'); 22 | expect(result.musicMusician).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x'); 23 | expect(result.musicReleaseDate).to.be.eql('2018-06-01'); 24 | expect(result.twitterTitle).to.be.eql('ye'); 25 | expect(result.twitterSite).to.be.eql('@spotify'); 26 | // expect(result.twitterDescription).to.be.eql('Album · Kanye West · 2018 · 7 songs'); 27 | expect(result.twitterCard).to.be.eql('summary'); 28 | expect(result.ogLocale).to.be.eql('en'); 29 | expect(result.ogSiteName).to.be.eql('Spotify'); 30 | expect(result.ogImage).to.be.eql([{ 31 | url: 'https://i.scdn.co/image/ab67616d0000b27397508a4b756763370510bd44', 32 | }]); 33 | expect(result.twitterImage).to.be.eql([{ 34 | url: 'https://i.scdn.co/image/ab67616d0000b27397508a4b756763370510bd44', 35 | }]); 36 | expect(result.musicSong).to.be.eql([{ 37 | url: 'https://open.spotify.com/track/6EuE9M1viu9gkdFSafia9o', 38 | track: '1', 39 | disc: '1', 40 | }, { 41 | disc: '1', 42 | track: '2', 43 | url: 'https://open.spotify.com/track/2r4JRwcbIeuAzWjH4YXlLs', 44 | }, 45 | { 46 | disc: '1', 47 | track: '3', 48 | url: 'https://open.spotify.com/track/3qnoOm4fwZPBS116f5hpgF', 49 | }, 50 | { 51 | disc: '1', 52 | track: '4', 53 | url: 'https://open.spotify.com/track/3dG6tjetoR4GMmUGZUprLt', 54 | }, 55 | { 56 | disc: '1', 57 | track: '5', 58 | url: 'https://open.spotify.com/track/1DdgqEZk4Hqfpl3drdXJun', 59 | }, 60 | { 61 | disc: '1', 62 | track: '6', 63 | url: 'https://open.spotify.com/track/6Bg7MznA9X0dIhlAsLyBYj', 64 | }, 65 | { 66 | disc: '1', 67 | track: '7', 68 | url: 'https://open.spotify.com/track/2VYb3Fb5iK5Y8HGZ8oEEkp', 69 | }]); 70 | expect(result.requestUrl).to.be.eql('https://open.spotify.com/album/5EBGCvO6upi3GNknMVe9x9'); 71 | expect(result.charset).to.be.eql('utf-8'); 72 | expect(result.fbAppId).to.be.eql('174829003346'); 73 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 74 | expect(result.success).to.be.eql(true); 75 | expect(result).to.have.all.keys( 76 | 'favicon', 77 | 'fbAppId', 78 | 'jsonLD', 79 | 'alAndroidAppName', 80 | 'alAndroidPackage', 81 | 'alAndroidUrl', 82 | 'alIosAppName', 83 | 'alIosAppStoreId', 84 | 'alIosUrl', 85 | 'musicMusician', 86 | 'musicReleaseDate', 87 | 'musicSong', 88 | 'ogDescription', 89 | 'ogImage', 90 | 'ogLocale', 91 | 'ogSiteName', 92 | 'ogTitle', 93 | 'ogType', 94 | 'ogUrl', 95 | 'requestUrl', 96 | 'success', 97 | 'charset', 98 | 'twitterCard', 99 | 'twitterDescription', 100 | 'twitterImage', 101 | 'twitterSite', 102 | 'twitterTitle', 103 | ); 104 | expect(response).to.be.an('Response'); 105 | }); 106 | }); 107 | 108 | it('artist should return music:musician', function () { 109 | return ogs({ url: 'https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x' }).then(function ({ error, result, response }) { 110 | console.log('error:', error); 111 | console.log('result:', result); 112 | expect(error).to.be.eql(false); 113 | expect(result.alAndroidAppName).to.be.eql('Spotify'); 114 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music'); 115 | expect(result.alAndroidUrl).to.be.eql('spotify://artist/5K4W6rqBFWDnAN6FQUkS6x'); 116 | expect(result.alIosAppName).to.be.eql('Spotify'); 117 | expect(result.alIosAppStoreId).to.be.eql('324684580'); 118 | expect(result.alIosUrl).to.be.eql('spotify://artist/5K4W6rqBFWDnAN6FQUkS6x'); 119 | expect(result.ogTitle).to.be.eql('Kanye West'); 120 | // expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon.0c211e2e.png'); 121 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty; 122 | expect(result.ogUrl).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x'); 123 | expect(result.ogType).to.be.eql('profile'); 124 | expect(result.twitterTitle).to.be.eql('Kanye West'); 125 | expect(result.twitterSite).to.be.eql('@spotify'); 126 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty; 127 | expect(result.twitterCard).to.be.eql('summary'); 128 | expect(result.ogSiteName).to.be.eql('Spotify'); 129 | expect(result.ogLocale).to.be.eql('en'); 130 | expect(result.ogImage).to.be.eql([{ 131 | url: 'https://i.scdn.co/image/ab6761610000e5eb6e835a500e791bf9c27a422a', 132 | }]); 133 | expect(result.twitterImage).to.be.eql([{ 134 | url: 'https://i.scdn.co/image/ab6761610000e5eb6e835a500e791bf9c27a422a', 135 | }]); 136 | expect(result.requestUrl).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x'); 137 | expect(result.charset).to.be.eql('utf-8'); 138 | expect(result.fbAppId).to.be.eql('174829003346'); 139 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 140 | expect(result.success).to.be.eql(true); 141 | expect(result).to.have.all.keys( 142 | 'favicon', 143 | 'fbAppId', 144 | 'jsonLD', 145 | 'alAndroidAppName', 146 | 'alAndroidPackage', 147 | 'alAndroidUrl', 148 | 'alIosAppName', 149 | 'alIosAppStoreId', 150 | 'alIosUrl', 151 | 'ogDescription', 152 | 'ogImage', 153 | 'ogLocale', 154 | 'ogSiteName', 155 | 'ogTitle', 156 | 'ogType', 157 | 'ogUrl', 158 | 'requestUrl', 159 | 'success', 160 | 'charset', 161 | 'twitterCard', 162 | 'twitterDescription', 163 | 'twitterImage', 164 | 'twitterSite', 165 | 'twitterTitle', 166 | ); 167 | expect(response).to.be.an('Response'); 168 | }); 169 | }); 170 | 171 | it('track should return music:song and associated tags', function () { 172 | return ogs({ url: 'https://open.spotify.com/track/3p6fkbeZDIVqapfdgQe6fm' }).then(function ({ error, result, response }) { 173 | console.log('error:', error); 174 | console.log('result:', result); 175 | expect(error).to.be.eql(false); 176 | expect(result.alAndroidAppName).to.be.eql('Spotify'); 177 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music'); 178 | expect(result.alAndroidUrl).to.be.eql('spotify://track/3p6fkbeZDIVqapfdgQe6fm'); 179 | expect(result.alIosAppName).to.be.eql('Spotify'); 180 | expect(result.alIosAppStoreId).to.be.eql('324684580'); 181 | expect(result.alIosUrl).to.be.eql('spotify://track/3p6fkbeZDIVqapfdgQe6fm'); 182 | expect(result.ogTitle).to.be.eql('Famous'); 183 | // expect(result.ogDescription).to.be.eql('Song · Kanye West · 2016'); 184 | expect(result.ogUrl).to.be.eql('https://open.spotify.com/track/3p6fkbeZDIVqapfdgQe6fm'); 185 | // expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon32.a19b4f5b.png'); 186 | expect(result.ogType).to.be.eql('music.song'); 187 | expect(result.musicDuration).to.be.eql('196'); 188 | expect(result.musicAlbum).to.be.eql('https://open.spotify.com/album/4xM1pUHZp9HsuKNxyOQDR0'); 189 | expect(result.musicAlbumTrack).to.be.eql('4'); 190 | expect(result.musicMusician).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x'); 191 | expect(result.musicReleaseDate).to.be.eql('2016-06-10'); 192 | expect(result.ogAudio).to.be.an('string').and.to.not.be.empty; 193 | expect(result.ogAudioType).to.be.eql('audio/mpeg'); 194 | expect(result.twitterTitle).to.be.eql('Famous'); 195 | expect(result.twitterSite).to.be.eql('@spotify'); 196 | // expect(result.twitterDescription).to.be.eql('Song · Kanye West · 2016'); 197 | expect(result.twitterCard).to.be.eql('summary'); 198 | expect(result.ogSiteName).to.be.eql('Spotify'); 199 | expect(result.ogLocale).to.be.eql('en'); 200 | expect(result.ogImage).to.be.eql([{ 201 | url: 'https://i.scdn.co/image/ab67616d0000b2730939dadf614e70aeffc6710c', 202 | }]); 203 | expect(result.twitterImage).to.be.eql([{ 204 | url: 'https://i.scdn.co/image/ab67616d0000b2730939dadf614e70aeffc6710c', 205 | }]); 206 | expect(result.requestUrl).to.be.eql('https://open.spotify.com/track/3p6fkbeZDIVqapfdgQe6fm'); 207 | expect(result.charset).to.be.eql('utf-8'); 208 | expect(result.fbAppId).to.be.eql('174829003346'); 209 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 210 | expect(result.success).to.be.eql(true); 211 | expect(result).to.have.all.keys( 212 | 'favicon', 213 | 'fbAppId', 214 | 'jsonLD', 215 | 'alAndroidAppName', 216 | 'alAndroidPackage', 217 | 'alAndroidUrl', 218 | 'alIosAppName', 219 | 'alIosAppStoreId', 220 | 'alIosUrl', 221 | 'musicAlbum', 222 | 'musicAlbumTrack', 223 | 'musicDuration', 224 | 'musicMusician', 225 | 'musicReleaseDate', 226 | 'ogAudio', 227 | 'ogAudioType', 228 | 'ogDescription', 229 | 'ogImage', 230 | 'ogLocale', 231 | 'ogSiteName', 232 | 'ogTitle', 233 | 'ogType', 234 | 'ogUrl', 235 | 'requestUrl', 236 | 'success', 237 | 'charset', 238 | 'twitterCard', 239 | 'twitterDescription', 240 | 'twitterImage', 241 | 'twitterSite', 242 | 'twitterTitle', 243 | ); 244 | expect(response).to.be.an('Response'); 245 | }); 246 | }); 247 | 248 | it('playlist should return music:playlist and associated tags', function () { 249 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/spotifyPlayList' }).then(function ({ error, result, response }) { 250 | console.log('error:', error); 251 | console.log('result:', result); 252 | expect(error).to.be.eql(false); 253 | expect(result.alAndroidAppName).to.be.eql('Spotify'); 254 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music'); 255 | expect(result.alAndroidUrl).to.be.eql('spotify://playlist/4BSIiLTu7qzDZLDdkHaty9'); 256 | expect(result.alIosAppName).to.be.eql('Spotify'); 257 | expect(result.alIosAppStoreId).to.be.eql('324684580'); 258 | expect(result.alIosUrl).to.be.eql('spotify://playlist/4BSIiLTu7qzDZLDdkHaty9'); 259 | expect(result.ogTitle).to.be.eql('Calm Hip Hop Mindset'); 260 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty; 261 | expect(result.ogUrl).to.be.eql('https://open.spotify.com/playlist/4BSIiLTu7qzDZLDdkHaty9'); 262 | expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon32.a19b4f5b.png'); 263 | expect(result.ogType).to.be.eql('music.playlist'); 264 | expect(result.musicCreator).to.be.eql('https://open.spotify.com/user/mjaschmidt'); 265 | expect(result.twitterTitle).to.be.eql('Calm Hip Hop Mindset'); 266 | expect(result.twitterAppIdiPhone).to.be.eql('324684580'); 267 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.spotify.music'); 268 | expect(result.twitterSite).to.be.eql('@spotify'); 269 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty; 270 | expect(result.twitterCard).to.be.eql('audio'); 271 | expect(result.ogSiteName).to.be.eql('Spotify'); 272 | expect(result.ogLocale).to.be.eql('en'); 273 | expect(result.ogImage).to.be.eql([{ 274 | url: 'https://i.scdn.co/image/ab67706c0000bebb5a535b3001ccf567ddef2927', 275 | }]); 276 | expect(result.twitterImage).to.be.eql([{ 277 | url: 'https://i.scdn.co/image/ab67706c0000bebb5a535b3001ccf567ddef2927', 278 | }]); 279 | expect(result.twitterPlayer).to.be.eql([{ 280 | url: 'https://open.spotify.com/embed/playlist/4BSIiLTu7qzDZLDdkHaty9?utm_campaign=twitter-player&utm_source=open&utm_medium=twitter', 281 | width: '504', 282 | height: '584', 283 | }]); 284 | expect(result.musicSong).to.be.eql([{ 285 | url: 'https://open.spotify.com/track/2LTlO3NuNVN70lp2ZbVswF', 286 | track: '1', 287 | }, { 288 | track: '2', 289 | url: 'https://open.spotify.com/track/7hZoTr4ffMGqm9opMiGmBp', 290 | }, 291 | { 292 | track: '3', 293 | url: 'https://open.spotify.com/track/7FW4HUo39yRwGVxEoHqTa4', 294 | }, 295 | { 296 | track: '4', 297 | url: 'https://open.spotify.com/track/6MF4tRr5lU8qok8IKaFOBE', 298 | }, 299 | { 300 | track: '5', 301 | url: 'https://open.spotify.com/track/5TCBWmEBrin7etRa4Lswr1', 302 | }, 303 | { 304 | track: '6', 305 | url: 'https://open.spotify.com/track/77KMttn3Lic7ZQKDlPqp8v', 306 | }, 307 | { 308 | track: '7', 309 | url: 'https://open.spotify.com/track/6XH0KeCZ0nRysAeSJYRFFg', 310 | }, 311 | { 312 | track: '8', 313 | url: 'https://open.spotify.com/track/1pRjlrQMZYo1K5i1RcLGs7', 314 | }, 315 | { 316 | track: '9', 317 | url: 'https://open.spotify.com/track/1oOEkBNp4zWnkD7nWjJdog', 318 | }, 319 | { 320 | track: '10', 321 | url: 'https://open.spotify.com/track/15YteXcgC1tRTDrKKclPOp', 322 | }]); 323 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/spotifyPlayList'); 324 | expect(result.charset).to.be.eql('UTF-8'); 325 | expect(result.fbAppId).to.be.eql('174829003346'); 326 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 327 | expect(result.success).to.be.eql(true); 328 | expect(result).to.have.all.keys( 329 | 'favicon', 330 | 'fbAppId', 331 | 'jsonLD', 332 | 'alAndroidAppName', 333 | 'alAndroidPackage', 334 | 'alAndroidUrl', 335 | 'alIosAppName', 336 | 'alIosAppStoreId', 337 | 'alIosUrl', 338 | 'musicCreator', 339 | 'musicSong', 340 | 'ogDescription', 341 | 'ogImage', 342 | 'ogLocale', 343 | 'ogSiteName', 344 | 'ogTitle', 345 | 'ogType', 346 | 'ogUrl', 347 | 'requestUrl', 348 | 'success', 349 | 'charset', 350 | 'twitterAppIdGooglePlay', 351 | 'twitterAppIdiPhone', 352 | 'twitterCard', 353 | 'twitterDescription', 354 | 'twitterImage', 355 | 'twitterPlayer', 356 | 'twitterSite', 357 | 'twitterTitle', 358 | ); 359 | expect(response).to.be.an('Response'); 360 | }); 361 | }); 362 | }); 363 | -------------------------------------------------------------------------------- /tests/integration/statusCode.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | // http://httpstat.us keeps going offline, we need to fine a replacement 6 | // eslint-disable-next-line mocha/no-skipped-tests 7 | describe.skip('statusCode', function () { 8 | context('when the site returns', function () { 9 | it('403', function () { 10 | return ogs({ url: 'http://httpstat.us/403' }) 11 | .then(function () { 12 | expect().fail('this should not happen'); 13 | }) 14 | .catch(function ({ error, result, response }) { 15 | console.log('error:', error); 16 | console.log('result:', result); 17 | expect(error).to.be.eql(true); 18 | expect(result.success).to.be.eql(false); 19 | expect(result.requestUrl).to.be.eql('http://httpstat.us/403'); 20 | expect(result.error).to.eql('Server has returned a 400/500 error code'); 21 | expect(result.errorDetails.toString()).to.eql('Error: Server has returned a 400/500 error code'); 22 | expect(result).to.have.all.keys( 23 | 'error', 24 | 'errorDetails', 25 | 'requestUrl', 26 | 'success', 27 | ); 28 | expect(response).to.eql(undefined); 29 | }); 30 | }); 31 | it('500', function () { 32 | return ogs({ url: 'http://httpstat.us/500' }) 33 | .then(function () { 34 | expect().fail('this should not happen'); 35 | }) 36 | .catch(function ({ error, result, response }) { 37 | console.log('error:', error); 38 | console.log('result:', result); 39 | expect(error).to.be.eql(true); 40 | expect(result.success).to.be.eql(false); 41 | expect(result.requestUrl).to.be.eql('http://httpstat.us/500'); 42 | expect(result.error).to.eql('Server has returned a 400/500 error code'); 43 | expect(result.errorDetails.toString()).to.eql('Error: Server has returned a 400/500 error code'); 44 | expect(result).to.have.all.keys( 45 | 'error', 46 | 'errorDetails', 47 | 'requestUrl', 48 | 'success', 49 | ); 50 | expect(response).to.eql(undefined); 51 | }); 52 | }); 53 | }); 54 | }); 55 | -------------------------------------------------------------------------------- /tests/integration/twitter.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('twitter', function () { 6 | context('Should Return correct Open Graph Info + Some Twitter Info ', function () { 7 | it('On Twitter Site', function () { 8 | return ogs({ 9 | url: 'https://jshemas.github.io/openGraphScraperPages/twitter-dev', 10 | }).then(function ({ error, result, response }) { 11 | console.log('error:', error); 12 | console.log('result:', result); 13 | expect(error).to.be.eql(false); 14 | expect(result.twitterTitle).to.be.eql('Twitter Developers'); 15 | expect(result.favicon).to.be.eql('https://web.archive.org/web/20160303190414im_/https://abs.twimg.com/favicons/favicon.ico'); 16 | expect(result.ogTitle).to.be.eql('Twitter Developers'); 17 | expect(result.ogType).to.be.eql('website'); 18 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20160303190414im_/https://dev.twitter.com/'); 19 | expect(result.twitterCard).to.be.eql('summary'); 20 | expect(result.ogSiteName).to.be.eql('Twitter Developers'); 21 | expect(result.ogLocale).to.be.eql('en'); 22 | expect(result.twitterUrl).to.be.eql('https://web.archive.org/web/20160303190414im_/https://dev.twitter.com/'); 23 | expect(result.twitterDescription).to.be.eql('The Twitter platform connects your website or application with the worldwide conversation happening on Twitter.'); 24 | expect(result.ogImage).to.be.eql([{ 25 | url: 'https://web.archive.org/web/20160303190414im_/https://pbs.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3.png', 26 | type: 'png', 27 | }]); 28 | expect(result.twitterImage).to.be.eql([{ 29 | url: 'https://web.archive.org/web/20160303190414im_/https://pbs.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3.png', 30 | width: '500', 31 | height: '500', 32 | }]); 33 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/twitter-dev'); 34 | expect(result.charset).to.be.eql('utf-8'); 35 | expect(result.success).to.be.eql(true); 36 | expect(result).to.have.all.keys( 37 | 'favicon', 38 | 'ogImage', 39 | 'ogSiteName', 40 | 'ogTitle', 41 | 'ogLocale', 42 | 'ogType', 43 | 'ogUrl', 44 | 'requestUrl', 45 | 'success', 46 | 'charset', 47 | 'twitterCard', 48 | 'twitterDescription', 49 | 'twitterImage', 50 | 'twitterTitle', 51 | 'twitterUrl', 52 | ); 53 | expect(response).to.be.an('Response'); 54 | }); 55 | }); 56 | it('On Github Site', function () { 57 | return ogs({ 58 | url: 'https://jshemas.github.io/openGraphScraperPages/github', 59 | }).then(function ({ error, result, response }) { 60 | console.log('error:', error); 61 | console.log('result:', result); 62 | expect(error).to.be.eql(false); 63 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20170113081103/https://github.com/'); 64 | expect(result.favicon).to.be.eql('https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/favicon.ico'); 65 | expect(result.ogSiteName).to.be.eql('GitHub'); 66 | expect(result.ogTitle).to.be.eql('Build software better, together'); 67 | expect(result.ogDescription).to.be.eql('GitHub is where people build software. More than 19 million people use GitHub to discover, fork, and contribute to over 50 million projects.'); 68 | expect(result.twitterSite).to.be.eql('github'); 69 | expect(result.twitterSiteId).to.be.eql('13334762'); 70 | expect(result.twitterCreator).to.be.eql('github'); 71 | expect(result.twitterCreatorId).to.be.eql('13334762'); 72 | expect(result.twitterCard).to.be.eql('summary_large_image'); 73 | expect(result.twitterTitle).to.be.eql('GitHub'); 74 | expect(result.ogLocale).to.be.eql('en'); 75 | expect(result.twitterDescription).to.be.eql('GitHub is where people build software. More than 19 million people use GitHub to discover, fork, and contribute to over 50 million projects.'); 76 | expect(result.ogImage).to.be.eql([{ 77 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-logo.png', 78 | width: '1200', 79 | height: '1200', 80 | type: 'image/png', 81 | }, { 82 | height: '620', 83 | type: 'image/png', 84 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-mark.png', 85 | width: '1200', 86 | }, 87 | { 88 | height: '620', 89 | type: 'image/png', 90 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-octocat.png', 91 | width: '1200', 92 | }]); 93 | expect(result.twitterImage).to.be.eql([{ 94 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-logo.png', 95 | width: '1200', 96 | height: '1200', 97 | }]); 98 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/github'); 99 | expect(result.charset).to.be.eql('utf-8'); 100 | expect(result.fbAppId).to.be.eql('1401488693436528'); 101 | expect(result.success).to.be.eql(true); 102 | expect(result).to.have.all.keys( 103 | 'favicon', 104 | 'fbAppId', 105 | 'ogDescription', 106 | 'ogImage', 107 | 'ogSiteName', 108 | 'ogLocale', 109 | 'ogTitle', 110 | 'ogUrl', 111 | 'requestUrl', 112 | 'success', 113 | 'charset', 114 | 'twitterCard', 115 | 'twitterCreator', 116 | 'twitterCreatorId', 117 | 'twitterDescription', 118 | 'twitterImage', 119 | 'twitterSite', 120 | 'twitterSiteId', 121 | 'twitterTitle', 122 | ); 123 | expect(response).to.be.an('Response'); 124 | }); 125 | }); 126 | it('On Atom Site', function () { 127 | return ogs({ 128 | url: 'https://jshemas.github.io/openGraphScraperPages/atom.html', 129 | }).then(function ({ error, result, response }) { 130 | console.log('error:', error); 131 | console.log('result:', result); 132 | expect(error).to.be.eql(false); 133 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20170913111314/https://atom.io/'); 134 | expect(result.favicon).to.be.eql('/web/20170913111314im_/https://atom.io/favicon.ico'); 135 | expect(result.ogSiteName).to.be.eql('Atom'); 136 | expect(result.ogTitle).to.be.eql('A hackable text editor for the 21st Century'); 137 | expect(result.ogLocale).to.be.eql('en'); 138 | expect(result.ogDescription).to.be.eql('At GitHub, we’re building the text editor we’ve always wanted: hackable to the core, but approachable on the first day without ever touching a config file. We can’t wait to see what you build with it.'); 139 | expect(result.ogType).to.be.eql('website'); 140 | expect(result.twitterCard).to.be.eql('summary_large_image'); 141 | expect(result.twitterSite).to.be.eql('@AtomEditor'); 142 | expect(result.twitterCreator).to.be.eql('@github'); 143 | expect(result.twitterTitle).to.be.eql('Atom'); 144 | expect(result.twitterDescription).to.be.eql('A hackable text editor for the 21st Century'); 145 | expect(result.ogImage).to.be.eql([{ 146 | url: 'https://web.archive.org/web/20170913111314im_/http://og.github.com/atom-mark/atom-mark@1200x630.png', 147 | width: '1200', 148 | height: '630', 149 | type: 'png', 150 | }]); 151 | expect(result.twitterImage).to.be.eql([{ 152 | url: 'https://web.archive.org/web/20170913111314im_/http://og.github.com/atom-logo/atom-logo@1200x630.png', 153 | width: '1200', 154 | height: '630', 155 | }]); 156 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/atom.html'); 157 | expect(result.charset).to.be.eql('UTF-8'); 158 | expect(result.success).to.be.eql(true); 159 | expect(result).to.have.all.keys( 160 | 'favicon', 161 | 'ogDescription', 162 | 'ogImage', 163 | 'ogLocale', 164 | 'ogSiteName', 165 | 'ogTitle', 166 | 'ogType', 167 | 'ogUrl', 168 | 'requestUrl', 169 | 'success', 170 | 'charset', 171 | 'twitterCard', 172 | 'twitterCreator', 173 | 'twitterDescription', 174 | 'twitterImage', 175 | 'twitterSite', 176 | 'twitterTitle', 177 | ); 178 | expect(response).to.be.an('Response'); 179 | }); 180 | }); 181 | }); 182 | }); 183 | -------------------------------------------------------------------------------- /tests/integration/url.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | describe('url', function () { 6 | it('http', function () { 7 | return ogs({ url: 'http://www.wikipedia.org/' }).then(function ({ error, result, response }) { 8 | console.log('error:', error); 9 | console.log('result:', result); 10 | expect(error).to.be.eql(false); 11 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 12 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 13 | expect(result.ogLocale).to.be.eql('en'); 14 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/'); 15 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 16 | expect(result.charset).to.be.eql('utf-8'); 17 | expect(result.success).to.be.eql(true); 18 | expect(result).to.have.all.keys( 19 | 'charset', 20 | 'favicon', 21 | 'ogDescription', 22 | 'ogImage', 23 | 'ogLocale', 24 | 'ogTitle', 25 | 'ogType', 26 | 'requestUrl', 27 | 'success', 28 | ); 29 | expect(response).to.be.an('Response'); 30 | }); 31 | }); 32 | 33 | it('https', function () { 34 | return ogs({ url: 'https://www.wikipedia.org/' }).then(function ({ error, result, response }) { 35 | console.log('error:', error); 36 | console.log('result:', result); 37 | expect(error).to.be.eql(false); 38 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 39 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 40 | expect(result.ogLocale).to.be.eql('en'); 41 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/'); 42 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 43 | expect(result.charset).to.be.eql('utf-8'); 44 | expect(result.success).to.be.eql(true); 45 | expect(result).to.have.all.keys( 46 | 'charset', 47 | 'favicon', 48 | 'ogDescription', 49 | 'ogImage', 50 | 'ogLocale', 51 | 'ogTitle', 52 | 'ogType', 53 | 'requestUrl', 54 | 'success', 55 | ); 56 | expect(response).to.be.an('Response'); 57 | }); 58 | }); 59 | 60 | it('no protocol', function () { 61 | return ogs({ url: 'www.wikipedia.org/' }).then(function ({ error, result, response }) { 62 | console.log('error:', error); 63 | console.log('result:', result); 64 | expect(error).to.be.eql(false); 65 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 66 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 67 | expect(result.ogLocale).to.be.eql('en'); 68 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/'); 69 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 70 | expect(result.charset).to.be.eql('utf-8'); 71 | expect(result.success).to.be.eql(true); 72 | expect(result).to.have.all.keys( 73 | 'charset', 74 | 'favicon', 75 | 'ogDescription', 76 | 'ogImage', 77 | 'ogLocale', 78 | 'ogTitle', 79 | 'ogType', 80 | 'requestUrl', 81 | 'success', 82 | ); 83 | expect(response).to.be.an('Response'); 84 | }); 85 | }); 86 | 87 | it('no protocol and no wwww', function () { 88 | return ogs({ url: 'wikipedia.org/' }).then(function ({ error, result, response }) { 89 | console.log('error:', error); 90 | console.log('result:', result); 91 | expect(error).to.be.eql(false); 92 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 93 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 94 | expect(result.ogLocale).to.be.eql('en'); 95 | expect(result.requestUrl).to.be.eql('http://wikipedia.org/'); 96 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 97 | expect(result.charset).to.be.eql('utf-8'); 98 | expect(result.success).to.be.eql(true); 99 | expect(result).to.have.all.keys( 100 | 'charset', 101 | 'favicon', 102 | 'ogDescription', 103 | 'ogImage', 104 | 'ogLocale', 105 | 'ogTitle', 106 | 'ogType', 107 | 'requestUrl', 108 | 'success', 109 | ); 110 | expect(response).to.be.an('Response'); 111 | }); 112 | }); 113 | 114 | it('protocol with no wwww', function () { 115 | return ogs({ url: 'http://wikipedia.org/' }).then(function ({ error, result, response }) { 116 | console.log('error:', error); 117 | console.log('result:', result); 118 | expect(error).to.be.eql(false); 119 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia'); 120 | expect(result.ogLocale).to.be.eql('en'); 121 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.'); 122 | expect(result.requestUrl).to.be.eql('http://wikipedia.org/'); 123 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico'); 124 | expect(result.charset).to.be.eql('utf-8'); 125 | expect(result.success).to.be.eql(true); 126 | expect(result).to.have.all.keys( 127 | 'charset', 128 | 'favicon', 129 | 'ogDescription', 130 | 'ogImage', 131 | 'ogLocale', 132 | 'ogTitle', 133 | 'ogType', 134 | 'requestUrl', 135 | 'success', 136 | ); 137 | expect(response).to.be.an('Response'); 138 | }); 139 | }); 140 | 141 | it('fake page', function () { 142 | return ogs({ url: 'http://testtesttest4564568.com' }) 143 | .then(function () { 144 | expect().fail('this should not happen'); 145 | }) 146 | .catch(function ({ error, result, response }) { 147 | console.log('error:', error); 148 | console.log('result:', result); 149 | expect(error).to.be.eql(true); 150 | expect(result.success).to.be.eql(false); 151 | expect(result.requestUrl).to.be.eql('http://testtesttest4564568.com'); 152 | expect(result.error).to.eql('Page not found'); 153 | expect(result.errorDetails.toString()).to.eql('Error: Page not found'); 154 | expect(result).to.have.all.keys( 155 | 'error', 156 | 'errorDetails', 157 | 'requestUrl', 158 | 'success', 159 | ); 160 | expect(response).to.eql(undefined); 161 | }); 162 | }); 163 | 164 | it('empty url', function () { 165 | return ogs({ url: '' }) 166 | .then(function () { 167 | expect().fail('this should not happen'); 168 | }) 169 | .catch(function ({ error, result, response }) { 170 | console.log('error:', error); 171 | console.log('result:', result); 172 | expect(error).to.be.eql(true); 173 | expect(result.success).to.be.eql(false); 174 | expect(result.requestUrl).to.be.eql(''); 175 | expect(result.error).to.eql('Invalid URL'); 176 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL'); 177 | expect(result).to.have.all.keys( 178 | 'error', 179 | 'errorDetails', 180 | 'requestUrl', 181 | 'success', 182 | ); 183 | expect(response).to.eql(undefined); 184 | }); 185 | }); 186 | 187 | it('empty options', function () { 188 | return ogs({}) 189 | .then(function () { 190 | expect().fail('this should not happen'); 191 | }) 192 | .catch(function ({ error, result, response }) { 193 | console.log('error:', error); 194 | console.log('result:', result); 195 | expect(error).to.be.eql(true); 196 | expect(result.success).to.be.eql(false); 197 | expect(result.requestUrl).to.eql(undefined); 198 | expect(result.error).to.eql('Invalid URL'); 199 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL'); 200 | expect(result).to.have.all.keys( 201 | 'error', 202 | 'errorDetails', 203 | 'requestUrl', 204 | 'success', 205 | ); 206 | expect(response).to.eql(undefined); 207 | }); 208 | }); 209 | 210 | it('url is a string of numbers', function () { 211 | return ogs({ url: '2323233' }) 212 | .then(function () { 213 | expect().fail('this should not happen'); 214 | }) 215 | .catch(function ({ error, result, response }) { 216 | console.log('error:', error); 217 | console.log('result:', result); 218 | expect(error).to.be.eql(true); 219 | expect(result.success).to.be.eql(false); 220 | expect(result.requestUrl).to.be.eql('2323233'); 221 | expect(result.error).to.eql('Invalid URL'); 222 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL'); 223 | expect(result).to.have.all.keys( 224 | 'error', 225 | 'errorDetails', 226 | 'requestUrl', 227 | 'success', 228 | ); 229 | expect(response).to.eql(undefined); 230 | }); 231 | }); 232 | 233 | it('url is a string of words', function () { 234 | return ogs({ url: 'this is a test' }) 235 | .then(function () { 236 | expect().fail('this should not happen'); 237 | }) 238 | .catch(function ({ error, result, response }) { 239 | console.log('error:', error); 240 | console.log('result:', result); 241 | expect(error).to.be.eql(true); 242 | expect(result.success).to.be.eql(false); 243 | expect(result.requestUrl).to.be.eql('this is a test'); 244 | expect(result.error).to.eql('Invalid URL'); 245 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL'); 246 | expect(result).to.have.all.keys( 247 | 'error', 248 | 'errorDetails', 249 | 'requestUrl', 250 | 'success', 251 | ); 252 | expect(response).to.eql(undefined); 253 | }); 254 | }); 255 | 256 | it('url is invalid because user disallows https with urlValidatorSettings', function () { 257 | return ogs({ 258 | url: 'https://www.wikipedia.org/', 259 | urlValidatorSettings: { 260 | allow_fragments: true, 261 | allow_protocol_relative_urls: false, 262 | allow_query_components: true, 263 | allow_trailing_dot: false, 264 | allow_underscores: false, 265 | protocols: ['http'], 266 | require_host: true, 267 | require_port: false, 268 | require_protocol: false, 269 | require_tld: true, 270 | require_valid_protocol: true, 271 | validate_length: true, 272 | }, 273 | }) 274 | .then(function () { 275 | expect().fail('this should not happen'); 276 | }) 277 | .catch(function ({ error, result, response }) { 278 | console.log('error:', error); 279 | console.log('result:', result); 280 | expect(error).to.be.eql(true); 281 | expect(result.success).to.be.eql(false); 282 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/'); 283 | expect(result.error).to.eql('Invalid URL'); 284 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL'); 285 | expect(result).to.have.all.keys( 286 | 'error', 287 | 'errorDetails', 288 | 'requestUrl', 289 | 'success', 290 | ); 291 | expect(response).to.eql(undefined); 292 | }); 293 | }); 294 | 295 | it('url is to a pdf', function () { 296 | return ogs({ url: 'test.pdf?123' }) 297 | .then(function () { 298 | expect().fail('this should not happen'); 299 | }) 300 | .catch(function ({ error, result, response }) { 301 | console.log('error:', error); 302 | console.log('result:', result); 303 | expect(error).to.be.eql(true); 304 | expect(result.success).to.be.eql(false); 305 | expect(result.requestUrl).to.be.eql('test.pdf?123'); 306 | expect(result.error).to.eql('Must scrape an HTML page'); 307 | expect(result.errorDetails.toString()).to.eql('Error: Must scrape an HTML page'); 308 | expect(result).to.have.all.keys( 309 | 'error', 310 | 'errorDetails', 311 | 'requestUrl', 312 | 'success', 313 | ); 314 | expect(response).to.eql(undefined); 315 | }); 316 | }); 317 | }); 318 | -------------------------------------------------------------------------------- /tests/integration/video.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import ogs from '../../index'; 4 | 5 | // TODO: youtube is blocking requests from github, will need to find a way around this 6 | describe.skip('video', function () { 7 | it('Test Youtube Video - Should Return correct Open Graph Info', function () { 8 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'; 9 | return ogs({ url: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', fetchOptions: { headers: { 'user-agent': userAgent } } }).then(function ({ error, result, response }) { 10 | console.log('error:', error); 11 | console.log('result:', result); 12 | expect(error).to.be.eql(false); 13 | expect(result.alAndroidAppName).to.be.eql('YouTube'); 14 | expect(result.alAndroidPackage).to.be.eql('com.google.android.youtube'); 15 | expect(result.alAndroidUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks'); 16 | expect(result.alIosAppName).to.be.eql('YouTube'); 17 | expect(result.alIosAppStoreId).to.be.eql('544007664'); 18 | expect(result.alIosUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks'); 19 | expect(result.alWebUrl).to.be.oneOf(['https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks', 'http://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks']); 20 | expect(result.ogSiteName).to.be.eql('YouTube'); 21 | expect(result.ogUrl).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ'); 22 | expect(result.ogTitle).to.be.eql('Rick Astley - Never Gonna Give You Up (Official Music Video)'); 23 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty; 24 | expect(result.ogType).to.be.eql('video.other'); 25 | expect(result.ogLocale).to.be.oneOf(['en', 'en-US', 'nl-NL']); 26 | expect(result.twitterCard).to.be.eql('player'); 27 | expect(result.twitterSite).to.be.eql('@youtube'); 28 | expect(result.twitterTitle).to.be.eql('Rick Astley - Never Gonna Give You Up (Official Music Video)'); 29 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty; 30 | expect(result.twitterAppNameiPhone).to.be.eql('YouTube'); 31 | expect(result.twitterAppIdiPhone).to.be.eql('544007664'); 32 | expect(result.twitterAppNameiPad).to.be.eql('YouTube'); 33 | expect(result.twitterAppIdiPad).to.be.eql('544007664'); 34 | expect(result.twitterUrl).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ'); 35 | expect(result.ogDate).to.be.eql('2009-10-24T23:57:33-07:00'); 36 | expect(result.twitterAppUrliPhone).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks'); 37 | expect(result.twitterAppUrliPad).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks'); 38 | expect(result.twitterAppNameGooglePlay).to.be.eql('YouTube'); 39 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.google.android.youtube'); 40 | expect(result.twitterAppUrlGooglePlay).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ'); 41 | expect(result.ogImage).to.be.eql([{ 42 | url: 'https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg', 43 | width: '1280', 44 | height: '720', 45 | type: 'jpg', 46 | }]); 47 | expect(result.ogVideo).to.be.eql([{ 48 | url: 'https://www.youtube.com/embed/dQw4w9WgXcQ', 49 | width: '1280', 50 | height: '720', 51 | type: 'text/html', 52 | }]); 53 | expect(result.twitterImage).to.be.eql([{ 54 | url: 'https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg', 55 | }]); 56 | expect(result.twitterPlayer).to.be.eql([{ 57 | url: 'https://www.youtube.com/embed/dQw4w9WgXcQ', 58 | width: '1280', 59 | height: '720', 60 | }]); 61 | expect(result.ogVideoTag).to.be.eql('never gonna give you up karaoke'); 62 | expect(result.ogVideoSecureURL).to.be.eql('https://www.youtube.com/embed/dQw4w9WgXcQ'); 63 | expect(result.requestUrl).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ'); 64 | expect(result.charset).to.be.eql('UTF-8'); 65 | expect(result.success).to.be.eql(true); 66 | expect(result.fbAppId).to.be.eql('87741124305'); 67 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 68 | if (result.ogDate === undefined) result.ogDate = 'hack because sometimes this does not come back for some reason'; 69 | expect(result).to.have.all.keys( 70 | 'favicon', 71 | 'fbAppId', 72 | 'jsonLD', 73 | 'alAndroidAppName', 74 | 'alAndroidPackage', 75 | 'alAndroidUrl', 76 | 'alIosAppName', 77 | 'alIosAppStoreId', 78 | 'alIosUrl', 79 | 'alWebUrl', 80 | 'ogDate', 81 | 'ogDescription', 82 | 'ogImage', 83 | 'ogLocale', 84 | 'ogSiteName', 85 | 'ogTitle', 86 | 'ogType', 87 | 'ogUrl', 88 | 'ogVideo', 89 | 'ogVideoTag', 90 | 'ogVideoSecureURL', 91 | 'requestUrl', 92 | 'success', 93 | 'charset', 94 | 'twitterAppIdGooglePlay', 95 | 'twitterAppIdiPad', 96 | 'twitterAppIdiPhone', 97 | 'twitterAppNameGooglePlay', 98 | 'twitterAppNameiPad', 99 | 'twitterAppNameiPhone', 100 | 'twitterAppUrlGooglePlay', 101 | 'twitterAppUrliPad', 102 | 'twitterAppUrliPhone', 103 | 'twitterCard', 104 | 'twitterDescription', 105 | 'twitterImage', 106 | 'twitterPlayer', 107 | 'twitterSite', 108 | 'twitterTitle', 109 | 'twitterUrl', 110 | ); 111 | expect(response).to.be.an('Response'); 112 | }); 113 | }); 114 | 115 | it('Test Youtube Video with bad escape sequence - Should Return correct Open Graph Info', function () { 116 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'; 117 | return ogs({ url: 'https://www.youtube.com/watch?v=nFbKMg4E3JM', fetchOptions: { headers: { 'user-agent': userAgent } } }).then(function ({ error, result, response }) { 118 | console.log('error:', error); 119 | console.log('result:', result); 120 | expect(error).to.be.eql(false); 121 | expect(result.alAndroidAppName).to.be.eql('YouTube'); 122 | expect(result.alAndroidPackage).to.be.eql('com.google.android.youtube'); 123 | expect(result.alAndroidUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks'); 124 | expect(result.alIosAppName).to.be.eql('YouTube'); 125 | expect(result.alIosAppStoreId).to.be.eql('544007664'); 126 | expect(result.alIosUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks'); 127 | expect(result.alWebUrl).to.be.oneOf(['https://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks', 'http://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks']); 128 | expect(result.ogSiteName).to.be.eql('YouTube'); 129 | expect(result.ogUrl).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM'); 130 | expect(result.ogTitle).to.be.eql('Force Class 10 in One Shot (Full Chapter) | ICSE 10 Physics Chapter 1 - Abhishek Sir |Vedantu 9 & 10'); 131 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty; 132 | expect(result.ogType).to.be.eql('video.other'); 133 | expect(result.ogLocale).to.be.oneOf(['en', 'en-US', 'nl-NL']); 134 | expect(result.twitterCard).to.be.eql('player'); 135 | expect(result.twitterSite).to.be.eql('@youtube'); 136 | expect(result.twitterTitle).to.be.eql('Force Class 10 in One Shot (Full Chapter) | ICSE 10 Physics Chapter 1 - Abhishek Sir |Vedantu 9 & 10'); 137 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty; 138 | expect(result.twitterAppNameiPhone).to.be.eql('YouTube'); 139 | expect(result.twitterAppIdiPhone).to.be.eql('544007664'); 140 | expect(result.twitterAppNameiPad).to.be.eql('YouTube'); 141 | expect(result.twitterAppIdiPad).to.be.eql('544007664'); 142 | expect(result.twitterUrl).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM'); 143 | expect(result.ogDate).to.be.eql('2021-06-11T09:14:37-07:00'); 144 | expect(result.twitterAppUrliPhone).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks'); 145 | expect(result.twitterAppUrliPad).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks'); 146 | expect(result.twitterAppNameGooglePlay).to.be.eql('YouTube'); 147 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.google.android.youtube'); 148 | expect(result.twitterAppUrlGooglePlay).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM'); 149 | expect(result.ogImage).to.be.eql([{ 150 | url: 'https://i.ytimg.com/vi/nFbKMg4E3JM/maxresdefault.jpg', 151 | width: '1280', 152 | height: '720', 153 | type: 'jpg', 154 | }]); 155 | expect(result.ogVideo).to.be.eql([{ 156 | url: 'https://www.youtube.com/embed/nFbKMg4E3JM', 157 | width: '1280', 158 | height: '720', 159 | type: 'text/html', 160 | }]); 161 | expect(result.twitterImage).to.be.eql([{ 162 | url: 'https://i.ytimg.com/vi/nFbKMg4E3JM/maxresdefault.jpg', 163 | }]); 164 | expect(result.twitterPlayer).to.be.eql([{ 165 | url: 'https://www.youtube.com/embed/nFbKMg4E3JM', 166 | width: '1280', 167 | height: '720', 168 | }]); 169 | expect(result.ogVideoTag).to.be.eql('vedantu'); 170 | expect(result.ogVideoSecureURL).to.be.eql('https://www.youtube.com/embed/nFbKMg4E3JM'); 171 | expect(result.requestUrl).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM'); 172 | expect(result.charset).to.be.eql('UTF-8'); 173 | expect(result.success).to.be.eql(true); 174 | expect(result.fbAppId).to.be.eql('87741124305'); 175 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty; 176 | if (result.ogDate === undefined) result.ogDate = 'hack because sometimes this does not come back for some reason'; 177 | expect(result).to.have.all.keys( 178 | 'favicon', 179 | 'fbAppId', 180 | 'jsonLD', 181 | 'alAndroidAppName', 182 | 'alAndroidPackage', 183 | 'alAndroidUrl', 184 | 'alIosAppName', 185 | 'alIosAppStoreId', 186 | 'alIosUrl', 187 | 'alWebUrl', 188 | 'ogDate', 189 | 'ogDescription', 190 | 'ogImage', 191 | 'ogLocale', 192 | 'ogSiteName', 193 | 'ogTitle', 194 | 'ogType', 195 | 'ogUrl', 196 | 'ogVideo', 197 | 'ogVideoTag', 198 | 'ogVideoSecureURL', 199 | 'requestUrl', 200 | 'success', 201 | 'charset', 202 | 'twitterAppIdGooglePlay', 203 | 'twitterAppIdiPad', 204 | 'twitterAppIdiPhone', 205 | 'twitterAppNameGooglePlay', 206 | 'twitterAppNameiPad', 207 | 'twitterAppNameiPhone', 208 | 'twitterAppUrlGooglePlay', 209 | 'twitterAppUrliPad', 210 | 'twitterAppUrliPhone', 211 | 'twitterCard', 212 | 'twitterDescription', 213 | 'twitterImage', 214 | 'twitterPlayer', 215 | 'twitterSite', 216 | 'twitterTitle', 217 | 'twitterUrl', 218 | ); 219 | expect(response).to.be.an('Response'); 220 | }); 221 | }); 222 | 223 | it('Test Twitch.tv Video - Should Return correct Open Graph Info', function () { 224 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/twitch.html' }).then(function ({ error, result, response }) { 225 | console.log('error:', error); 226 | console.log('result:', result); 227 | expect(error).to.be.eql(false); 228 | expect(result.ogSiteName).to.be.eql('Twitch'); 229 | expect(result.twitterSite).to.be.eql('@twitch'); 230 | expect(result.ogLocale).to.be.eql('en-US'); 231 | expect(result.ogTitle).to.be.oneOf(['Twitch', 'AI Soundscapes, Trials of Mana', 'AI Soundscapes, Trials of Mana - Vinesauce on Twitch']); 232 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty; 233 | expect(result.ogUrl).to.be.eql('https://www.twitch.tv/videos/632214184'); 234 | expect(result.favicon).to.be.eql('https://static.twitchcdn.net/assets/favicon-32-d6025c14e900565d6177.png'); 235 | expect(result.ogType).to.be.oneOf(['website', 'video.other']); 236 | expect(result.ogImage).to.be.to.be.an('array').and.to.not.be.empty; 237 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/twitch.html'); 238 | expect(result.charset).to.be.eql('utf-8'); 239 | expect(result.fbAppId).to.be.eql('161273083968709'); 240 | expect(result.success).to.be.eql(true); 241 | expect(result).to.have.all.keys( 242 | 'favicon', 243 | 'fbAppId', 244 | 'ogDescription', 245 | 'ogImage', 246 | 'ogLocale', 247 | 'ogSiteName', 248 | 'ogTitle', 249 | 'ogType', 250 | 'ogUrl', 251 | 'requestUrl', 252 | 'success', 253 | 'charset', 254 | 'twitterSite', 255 | ); 256 | expect(response).to.be.an('Response'); 257 | }); 258 | }); 259 | }); 260 | -------------------------------------------------------------------------------- /tests/unit/media.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | import { mediaSetup } from '../../lib/media'; 4 | 5 | describe('media', function () { 6 | it('has images and twitter images', function () { 7 | const ogMedia = mediaSetup({ 8 | ogImageProperty: ['http://test.com/logo.png'], 9 | ogImageType: ['image/png'], 10 | ogImageWidth: ['300'], 11 | ogImageHeight: ['300'], 12 | twitterImageProperty: ['http://test.com/logo.png'], 13 | twitterImageAlt: ['image/png'], 14 | twitterImageWidth: ['300'], 15 | twitterImageHeight: ['300'], 16 | }); 17 | 18 | expect(ogMedia.ogImage).to.eql([{ 19 | url: 'http://test.com/logo.png', 20 | width: '300', 21 | height: '300', 22 | type: 'image/png', 23 | }]); 24 | 25 | expect(ogMedia.twitterImage).to.eql([{ 26 | url: 'http://test.com/logo.png', 27 | width: '300', 28 | height: '300', 29 | alt: 'image/png', 30 | }]); 31 | }); 32 | 33 | it('has twitter images but falls back to twitterImageSrc', function () { 34 | const ogMedia = mediaSetup({ 35 | twitterImageSrc: ['http://test.com/logoTwo.png'], 36 | twitterImageAlt: ['image/png'], 37 | twitterImageWidth: ['300'], 38 | twitterImageHeight: ['300'], 39 | }); 40 | 41 | expect(ogMedia.twitterImage).to.eql([{ 42 | url: 'http://test.com/logoTwo.png', 43 | width: '300', 44 | height: '300', 45 | alt: 'image/png', 46 | }]); 47 | }); 48 | 49 | it('has image/twitterImage but no type/height/width', function () { 50 | const ogMedia = mediaSetup({ 51 | ogImageProperty: ['http://test.com/logo.png'], 52 | twitterImageProperty: ['http://test.com/logo.png'], 53 | }); 54 | 55 | expect(ogMedia.ogImage).to.eql([{ 56 | url: 'http://test.com/logo.png', 57 | }]); 58 | 59 | expect(ogMedia.twitterImage).to.eql([{ 60 | url: 'http://test.com/logo.png', 61 | }]); 62 | }); 63 | 64 | it('has images and twitter images without property', function () { 65 | const ogMedia = mediaSetup({ 66 | ogImageType: ['image/png'], 67 | ogImageWidth: ['300'], 68 | ogImageHeight: ['300'], 69 | twitterImageAlt: ['image/png'], 70 | twitterImageWidth: ['300'], 71 | twitterImageHeight: ['300'], 72 | }); 73 | 74 | expect(ogMedia).to.eql({}); 75 | }); 76 | 77 | it('has image with ogImageSecureURL/ogImageURL/ogImageProperty', function () { 78 | const ogMedia = mediaSetup({ 79 | ogImageSecureURL: ['https://test.com/logo.png'], 80 | ogImageURL: ['http://test.com/logoTwo.png'], 81 | ogImageProperty: ['http://test.com/logo.png'], 82 | ogImageType: ['image/png'], 83 | ogImageWidth: ['300'], 84 | ogImageHeight: ['300'], 85 | }); 86 | 87 | expect(ogMedia.ogImage).to.eql([{ 88 | url: 'https://test.com/logo.png', 89 | width: '300', 90 | height: '300', 91 | type: 'image/png', 92 | }]); 93 | }); 94 | 95 | it('has image with ogImageURL/ogImageProperty', function () { 96 | const ogMedia = mediaSetup({ 97 | ogImageURL: ['http://test.com/logoTwo.png'], 98 | ogImageProperty: ['http://test.com/logo.png'], 99 | ogImageType: ['image/png'], 100 | ogImageWidth: ['300'], 101 | ogImageHeight: ['300'], 102 | }); 103 | 104 | expect(ogMedia.ogImage).to.eql([{ 105 | url: 'http://test.com/logo.png', 106 | width: '300', 107 | height: '300', 108 | type: 'image/png', 109 | }]); 110 | }); 111 | 112 | it('has image with ogImageURL', function () { 113 | const ogMedia = mediaSetup({ 114 | ogImageURL: ['http://test.com/logoTwo.png'], 115 | ogImageType: ['image/png'], 116 | ogImageWidth: ['300'], 117 | ogImageHeight: ['300'], 118 | }); 119 | 120 | expect(ogMedia.ogImage).to.eql([{ 121 | url: 'http://test.com/logoTwo.png', 122 | width: '300', 123 | height: '300', 124 | type: 'image/png', 125 | }]); 126 | }); 127 | 128 | it('has many images and twitter images', function () { 129 | const ogMedia = mediaSetup({ 130 | ogImageProperty: ['http://test.com/logo_one.png', 'http://test.com/logo_two.png', 'http://test.com/logo_three.png', ''], 131 | ogImageType: ['image/png', 'image/png', 'image/png'], 132 | ogImageWidth: ['300'], 133 | ogImageHeight: ['300'], 134 | twitterImageProperty: ['http://test.com/logo_one.png', 'http://test.com/logo_two.png', 'http://test.com/logo_three.png', ''], 135 | twitterImageAlt: ['image/png', 'image/png', 'image/png'], 136 | twitterImageWidth: ['300'], 137 | twitterImageHeight: ['300'], 138 | }); 139 | 140 | expect(ogMedia.ogImage).to.eql([{ 141 | url: 'http://test.com/logo_one.png', 142 | width: '300', 143 | height: '300', 144 | type: 'image/png', 145 | }, { 146 | url: 'http://test.com/logo_two.png', 147 | type: 'image/png', 148 | }, { 149 | url: 'http://test.com/logo_three.png', 150 | type: 'image/png', 151 | }]); 152 | 153 | expect(ogMedia.twitterImage).to.eql([{ 154 | url: 'http://test.com/logo_one.png', 155 | width: '300', 156 | height: '300', 157 | alt: 'image/png', 158 | }, { 159 | url: 'http://test.com/logo_two.png', 160 | alt: 'image/png', 161 | }, { 162 | url: 'http://test.com/logo_three.png', 163 | alt: 'image/png', 164 | }]); 165 | }); 166 | 167 | it('has a .gif images and twitter images', function () { 168 | const ogMedia = mediaSetup({ 169 | ogImageProperty: ['http://test.com/logo_one.png', 'http://test.com/logo_two.gif'], 170 | ogImageType: ['image/png', 'image/gif'], 171 | ogImageWidth: ['300', '600'], 172 | ogImageHeight: ['300', '600'], 173 | twitterImageProperty: ['http://test.com/logo_two.gif', 'http://test.com/logo_one.png'], 174 | twitterImageAlt: ['image/gif', 'image/png'], 175 | twitterImageWidth: ['300', '600'], 176 | twitterImageHeight: ['300', '600'], 177 | }); 178 | 179 | expect(ogMedia.ogImage).to.eql([{ 180 | url: 'http://test.com/logo_two.gif', 181 | type: 'image/gif', 182 | width: '600', 183 | height: '600', 184 | }, { 185 | url: 'http://test.com/logo_one.png', 186 | type: 'image/png', 187 | width: '300', 188 | height: '300', 189 | }]); 190 | 191 | expect(ogMedia.twitterImage).to.eql([{ 192 | url: 'http://test.com/logo_two.gif', 193 | alt: 'image/gif', 194 | width: '300', 195 | height: '300', 196 | }, { 197 | url: 'http://test.com/logo_one.png', 198 | alt: 'image/png', 199 | width: '600', 200 | height: '600', 201 | }]); 202 | }); 203 | 204 | it('has no image or video', function () { 205 | const ogMedia = mediaSetup({ 206 | ogTitle: 'test site', 207 | ogType: 'website', 208 | ogUrl: 'http://test.com/', 209 | ogDescription: 'stuff', 210 | }); 211 | 212 | expect(ogMedia.ogImage).to.eql(undefined); 213 | expect(ogMedia.twitterImage).to.eql(undefined); 214 | expect(ogMedia.ogVideo).to.eql(undefined); 215 | expect(ogMedia.twitterPlayer).to.eql(undefined); 216 | }); 217 | 218 | it('has video and twitter video', function () { 219 | const ogMedia = mediaSetup({ 220 | ogVideoProperty: ['http://test.com/logo.png'], 221 | ogVideoType: ['image/png'], 222 | ogVideoWidth: ['300'], 223 | ogVideoHeight: ['300'], 224 | twitterPlayerProperty: ['http://test.com/logo.png'], 225 | twitterPlayerStream: ['image/png'], 226 | twitterPlayerWidth: ['300'], 227 | twitterPlayerHeight: ['300'], 228 | }); 229 | 230 | expect(ogMedia.ogVideo).to.eql([{ 231 | url: 'http://test.com/logo.png', 232 | width: '300', 233 | height: '300', 234 | type: 'image/png', 235 | }]); 236 | 237 | expect(ogMedia.twitterPlayer).to.eql([{ 238 | url: 'http://test.com/logo.png', 239 | width: '300', 240 | height: '300', 241 | stream: 'image/png', 242 | }]); 243 | }); 244 | 245 | it('has video/twitterVideo but no type/width/height', function () { 246 | const ogMedia = mediaSetup({ 247 | ogVideoProperty: ['http://test.com/logo.png'], 248 | twitterPlayerProperty: ['http://test.com/logo.png'], 249 | }); 250 | 251 | expect(ogMedia.ogVideo).to.eql([{ 252 | url: 'http://test.com/logo.png', 253 | }]); 254 | 255 | expect(ogMedia.twitterPlayer).to.eql([{ 256 | url: 'http://test.com/logo.png', 257 | }]); 258 | }); 259 | 260 | it('has video and twitter video but with no property', function () { 261 | const ogMedia = mediaSetup({ 262 | ogVideoType: ['image/png'], 263 | ogVideoWidth: ['300'], 264 | ogVideoHeight: ['300'], 265 | twitterPlayerStream: ['image/png'], 266 | twitterPlayerWidth: ['300'], 267 | twitterPlayerHeight: ['300'], 268 | }); 269 | 270 | expect(ogMedia).to.eql({}); 271 | }); 272 | 273 | it('has music:song', function () { 274 | const ogMedia = mediaSetup({ 275 | musicSongProperty: ['http://test.com/songurl'], 276 | musicSongTrack: ['1'], 277 | musicSongDisc: ['1'], 278 | }); 279 | 280 | expect(ogMedia.musicSong).to.eql([{ 281 | url: 'http://test.com/songurl', 282 | track: '1', 283 | disc: '1', 284 | }]); 285 | }); 286 | 287 | it('has music:song but falls back to musicSongUrl', function () { 288 | const ogMedia = mediaSetup({ 289 | musicSongUrl: ['http://test.com/songurlTwo'], 290 | musicSongTrack: ['1'], 291 | musicSongDisc: ['1'], 292 | }); 293 | 294 | expect(ogMedia.musicSong).to.eql([{ 295 | url: 'http://test.com/songurlTwo', 296 | track: '1', 297 | disc: '1', 298 | }]); 299 | }); 300 | 301 | it('has music:song but no track/disc', function () { 302 | const ogMedia = mediaSetup({ 303 | musicSongProperty: ['http://test.com/songurl'], 304 | }); 305 | 306 | expect(ogMedia.musicSong).to.eql([{ 307 | url: 'http://test.com/songurl', 308 | }]); 309 | }); 310 | 311 | it('has multiple music:songs', function () { 312 | const ogMedia = mediaSetup({ 313 | musicSongProperty: ['http://test.com/songurl', 'http://test.com/songurl3', 'http://test.com/songurl2', ''], 314 | musicSongTrack: ['1', '2', '4', ''], 315 | musicSongDisc: ['1', '2', '1', ''], 316 | }); 317 | 318 | expect(ogMedia.musicSong).to.eql([{ 319 | url: 'http://test.com/songurl', 320 | track: '1', 321 | disc: '1', 322 | }, 323 | { 324 | url: 'http://test.com/songurl2', 325 | track: '4', 326 | disc: '1', 327 | }, 328 | { 329 | url: 'http://test.com/songurl3', 330 | track: '2', 331 | disc: '2', 332 | }]); 333 | }); 334 | 335 | it('has ogImageProperty/twitterImage/ogVideo/twitterPlayer', function () { 336 | const ogMedia = mediaSetup({ 337 | ogImageProperty: ['http://test.com/logo.png'], 338 | ogImageType: ['image/png'], 339 | ogImageWidth: ['300'], 340 | ogImageHeight: ['300'], 341 | twitterImageProperty: ['http://test.com/logo.png'], 342 | twitterImageAlt: ['image/png'], 343 | twitterImageWidth: ['300'], 344 | twitterImageHeight: ['300'], 345 | ogVideoProperty: ['http://test.com/logo.png'], 346 | ogVideoType: ['image/png'], 347 | ogVideoWidth: ['300'], 348 | ogVideoHeight: ['300'], 349 | twitterPlayerProperty: ['http://test.com/logo.png'], 350 | twitterPlayerStream: ['image/png'], 351 | twitterPlayerWidth: ['300'], 352 | twitterPlayerHeight: ['300'], 353 | }); 354 | 355 | expect(ogMedia.ogImage).to.eql([{ 356 | url: 'http://test.com/logo.png', 357 | width: '300', 358 | height: '300', 359 | type: 'image/png', 360 | }]); 361 | 362 | expect(ogMedia.twitterImage).to.eql([{ 363 | url: 'http://test.com/logo.png', 364 | width: '300', 365 | height: '300', 366 | alt: 'image/png', 367 | }]); 368 | 369 | expect(ogMedia.ogVideo).to.eql([{ 370 | url: 'http://test.com/logo.png', 371 | width: '300', 372 | height: '300', 373 | type: 'image/png', 374 | }]); 375 | 376 | expect(ogMedia.twitterPlayer).to.eql([{ 377 | url: 'http://test.com/logo.png', 378 | width: '300', 379 | height: '300', 380 | stream: 'image/png', 381 | }]); 382 | }); 383 | 384 | it('has more then 10 images', function () { 385 | const ogMedia = mediaSetup({ 386 | ogImageProperty: ['http://test.com/logo1.png', 'http://test.com/logo2.png', 'http://test.com/logo3.png', 'http://test.com/logo4.png', 'http://test.com/logo5.png', 'http://test.com/logo6.png', 'http://test.com/logo7.png', 'http://test.com/logo8.png', 'http://test.com/logo9.png', 'http://test.com/logo10.png', 'http://test.com/logo11.png'], 387 | ogImageType: ['image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png'], 388 | ogImageWidth: ['300', '300', '300', '300', '300', '300', '300', '300', '300', '300', '300'], 389 | ogImageHeight: ['300', '300', '300', '300', '300', '300', '300', '300', '300', '300', '300'], 390 | }); 391 | 392 | expect(ogMedia.ogImage).to.eql([ 393 | { 394 | url: 'http://test.com/logo1.png', 395 | width: '300', 396 | height: '300', 397 | type: 'image/png', 398 | }, 399 | { 400 | url: 'http://test.com/logo2.png', 401 | width: '300', 402 | height: '300', 403 | type: 'image/png', 404 | }, 405 | { 406 | url: 'http://test.com/logo3.png', 407 | width: '300', 408 | height: '300', 409 | type: 'image/png', 410 | }, 411 | { 412 | url: 'http://test.com/logo4.png', 413 | width: '300', 414 | height: '300', 415 | type: 'image/png', 416 | }, 417 | { 418 | url: 'http://test.com/logo5.png', 419 | width: '300', 420 | height: '300', 421 | type: 'image/png', 422 | }, 423 | { 424 | url: 'http://test.com/logo6.png', 425 | width: '300', 426 | height: '300', 427 | type: 'image/png', 428 | }, 429 | { 430 | url: 'http://test.com/logo7.png', 431 | width: '300', 432 | height: '300', 433 | type: 'image/png', 434 | }, 435 | { 436 | url: 'http://test.com/logo8.png', 437 | width: '300', 438 | height: '300', 439 | type: 'image/png', 440 | }, 441 | { 442 | url: 'http://test.com/logo9.png', 443 | width: '300', 444 | height: '300', 445 | type: 'image/png', 446 | }, 447 | { 448 | url: 'http://test.com/logo10.png', 449 | width: '300', 450 | height: '300', 451 | type: 'image/png', 452 | }, 453 | ]); 454 | }); 455 | }); 456 | -------------------------------------------------------------------------------- /tests/unit/utils.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai'; 2 | 3 | /* eslint-disable mocha/no-setup-in-describe */ 4 | import { 5 | findImageTypeFromUrl, 6 | isCustomMetaTagsValid, 7 | isImageTypeValid, 8 | isThisANonHTMLUrl, 9 | optionSetup, 10 | removeNestedUndefinedValues, 11 | unescapeScriptText, 12 | validateAndFormatURL, 13 | } from '../../lib/utils'; 14 | 15 | const validateUrl = (urls, valid, message, urlValidatorSettings) => { 16 | for (let index = 0; index < urls.length; index += 1) { 17 | // eslint-disable-next-line no-loop-func 18 | it(`${urls[index]} ${message}`, function () { 19 | const formattedUrl = validateAndFormatURL(urls[index], urlValidatorSettings); 20 | if (valid) { 21 | return expect(formattedUrl.url).to.not.be.eql(null); 22 | } 23 | return expect(formattedUrl.url).to.be.eql(null); 24 | }); 25 | } 26 | }; 27 | 28 | describe('utils', function () { 29 | describe('validateAndFormatURL', function () { 30 | context('validing URLs', function () { 31 | const defaultUrlValidatorSettings = { 32 | allow_fragments: true, 33 | allow_protocol_relative_urls: false, 34 | allow_query_components: true, 35 | allow_trailing_dot: false, 36 | allow_underscores: false, 37 | protocols: ['http', 'https'], 38 | require_host: true, 39 | require_port: false, 40 | require_protocol: false, 41 | require_tld: true, 42 | require_valid_protocol: true, 43 | validate_length: true, 44 | }; 45 | 46 | validateUrl([ 47 | 'foobar.com', 48 | 'foobar.com/', 49 | 'http://[::192.9.5.5]/ipng', 50 | 'http://[::FFFF:129.144.52.38]:80/index.html', 51 | 'http://[1080::8:800:200C:417A]/foo', 52 | 'http://[1080:0:0:0:8:800:200C:417A]/index.html', 53 | 'http://[2010:836B:4179::836B:4179]', 54 | 'http://[3ffe:2a00:100:7031::1]', 55 | 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html', 56 | 'http://10.0.0.0/', 57 | 'http://127.0.0.1/', 58 | 'http://189.123.14.13/', 59 | 'http://duckduckgo.com/?q=%2F', 60 | 'http://example.com/example.json#/foo/bar', 61 | 'http://foo--bar.com', 62 | 'http://foobar.com?foo=bar', 63 | 'http://foobar.com/?foo=bar#baz=qux', 64 | 'http://foobar.com/t$-_.+!*\'(),', 65 | 'http://foobar.com#baz=qux', 66 | 'http://høyfjellet.no', 67 | 'http://user:@www.foobar.com/', 68 | 'http://user:pass@www.foobar.com/', 69 | 'http://www.foobar.com:23/', 70 | 'http://www.foobar.com:5/', 71 | 'http://www.foobar.com:65535/', 72 | 'http://www.foobar.com/', 73 | 'HTTP://WWW.FOOBAR.COM/', 74 | 'http://www.foobar.com/~foobar', 75 | 'http://www.xn--froschgrn-x9a.net/', 76 | 'http://xn------eddceddeftq7bvv7c4ke4c.xn--p1ai', 77 | 'http://xn--froschgrn-x9a.com/', 78 | 'http://xn--j1aac5a4g.xn--j1amh', 79 | 'http://кулік.укр', 80 | 'https://www.foobar.com/', 81 | 'https://www.foobar.com/', 82 | 'HTTPS://WWW.FOOBAR.COM/', 83 | 'test.com?ref=http://test2.com', 84 | 'valid.au', 85 | 'www.foobar.com', 86 | ], true, 'should be valid', defaultUrlValidatorSettings); 87 | 88 | validateUrl([ 89 | '!.foo.com', 90 | '.com', 91 | '', 92 | '*.foo.com', 93 | '////foobar.com', 94 | '//foobar.com', 95 | 'ftp://www.foobar.com/', 96 | 'http://*.foo.com', 97 | 'http:////foobar.com', 98 | 'http://\n@www.foobar.com/', 99 | 'http://300.0.0.1/', 100 | 'http://com/', 101 | 'http://example.com.', 102 | 'http://foobar/ lol/', 103 | 'http://foobar/? lol', 104 | 'http://foobar/# lol', 105 | 'http://localhost:3000/', 106 | 'http://localhost:61500this is an invalid url!!!!', 107 | 'http://lol @foobar.com/', 108 | 'http://lol: @foobar.com/', 109 | 'http://lol:lol @foobar.com/', 110 | 'http://lol:lol:lol@foobar.com/', 111 | 'http://www.-foobar.com/', 112 | 'http://www.foo_bar.com/', 113 | 'http://www.foobar-.com/', 114 | 'http://www.foobar.com:0/', 115 | 'http://www.foobar.com:70000/', 116 | 'http://www.foobar.com:99999/', 117 | 'http://www.foobar.com/\t', 118 | 'http://www.xn--.com/', 119 | 'http://xn--.com/', 120 | 'https://example.com/foo//', 121 | 'invalid.', 122 | 'invalid.x', 123 | 'invalid/', 124 | 'mailto:foo@bar.com', 125 | 'rtmp://foobar.com', 126 | 'xyz://foobar.com', 127 | `http://foobar.com/${new Array(2083).join('f')}`, 128 | ], false, 'should be invalid', defaultUrlValidatorSettings); 129 | }); 130 | 131 | context('validing URLs with options.urlValidatorSettings (https is invalid)', function () { 132 | const noHTTPSUrlValidatorSettings = { 133 | allow_fragments: true, 134 | allow_protocol_relative_urls: false, 135 | allow_query_components: true, 136 | allow_trailing_dot: false, 137 | allow_underscores: false, 138 | protocols: ['http'], 139 | require_host: true, 140 | require_port: false, 141 | require_protocol: false, 142 | require_tld: true, 143 | require_valid_protocol: true, 144 | validate_length: true, 145 | }; 146 | 147 | validateUrl([ 148 | 'http://www.foobar.com/', 149 | 'http://www.foobar.com/', 150 | 'HTTP://WWW.FOOBAR.COM/', 151 | ], true, 'should be valid', noHTTPSUrlValidatorSettings); 152 | 153 | validateUrl([ 154 | 'https://www.foobar.com/', 155 | 'https://www.foobar.com/', 156 | 'HTTPS://WWW.FOOBAR.COM/', 157 | ], false, 'should be invalid', noHTTPSUrlValidatorSettings); 158 | }); 159 | }); 160 | 161 | describe('findImageTypeFromUrl', function () { 162 | it('foobar.com/image.png?test=true', function () { 163 | const type = findImageTypeFromUrl('foobar.com/image.png?test=true'); 164 | expect(type).to.eql('png'); 165 | }); 166 | 167 | it('foobar.com/image.png', function () { 168 | const type = findImageTypeFromUrl('foobar.com/image.png'); 169 | expect(type).to.eql('png'); 170 | }); 171 | 172 | it('image.png', function () { 173 | const type = findImageTypeFromUrl('image.png'); 174 | expect(type).to.eql('png'); 175 | }); 176 | 177 | it('image', function () { 178 | const type = findImageTypeFromUrl('image'); 179 | expect(type).to.eql('image'); 180 | }); 181 | 182 | it('empty string', function () { 183 | const type = findImageTypeFromUrl(''); 184 | expect(type).to.eql(''); 185 | }); 186 | }); 187 | 188 | describe('isImageTypeValid', function () { 189 | it('when type is png', function () { 190 | const valid = isImageTypeValid('png'); 191 | expect(valid).to.eql(true); 192 | }); 193 | 194 | it('when type is foo', function () { 195 | const valid = isImageTypeValid('foo'); 196 | expect(valid).to.eql(false); 197 | }); 198 | }); 199 | 200 | describe('isThisANonHTMLUrl', function () { 201 | it('when url is type .png', function () { 202 | const valid = isThisANonHTMLUrl('www.foo.com/bar.png'); 203 | expect(valid).to.eql(true); 204 | }); 205 | 206 | it('when url is type .html', function () { 207 | const valid = isThisANonHTMLUrl('www.foo.com/bar.html'); 208 | expect(valid).to.eql(false); 209 | }); 210 | 211 | it('when url is type .pdf and has params', function () { 212 | const valid = isThisANonHTMLUrl('www.foo.com/bar.pdf?123'); 213 | expect(valid).to.eql(true); 214 | }); 215 | 216 | it('when domain in url contains a non HTML string (.txt)', function () { 217 | const valid = isThisANonHTMLUrl('www.txt.com/bar.html'); 218 | expect(valid).to.eql(false); 219 | }); 220 | 221 | it('when domain in url contains a non HTML string (.mov) no extension on path', function () { 222 | const valid = isThisANonHTMLUrl('www.mov.com/bar'); 223 | expect(valid).to.eql(false); 224 | }); 225 | }); 226 | 227 | describe('removeNestedUndefinedValues', function () { 228 | it('when there is no undef values', function () { 229 | const object = removeNestedUndefinedValues({ one: 1 }); 230 | expect(object).to.eql({ one: 1 }); 231 | }); 232 | 233 | it('when there is undef values', function () { 234 | const object = removeNestedUndefinedValues({ one: 1, two: undefined }); 235 | expect(object).to.eql({ one: 1 }); 236 | }); 237 | 238 | it('when there is a nested undef value', function () { 239 | const object = removeNestedUndefinedValues({ one: 1, two: { three: undefined } }); 240 | expect(object).to.eql({ one: 1, two: {} }); 241 | }); 242 | }); 243 | 244 | describe('optionSetup', function () { 245 | it('when passing nothing into optionSetup', function () { 246 | const { options } = optionSetup({}); 247 | expect(options).to.eql({ onlyGetOpenGraphInfo: false }); 248 | }); 249 | 250 | it('when passing onlyGetOpenGraphInfo into optionSetup', function () { 251 | const { options } = optionSetup({ onlyGetOpenGraphInfo: true }); 252 | expect(options).to.eql({ onlyGetOpenGraphInfo: true }); 253 | }); 254 | }); 255 | 256 | describe('isCustomMetaTagsValid', function () { 257 | it('when passing a valid custom tag into isCustomMetaTagsValid', function () { 258 | const response = isCustomMetaTagsValid([{ 259 | multiple: false, 260 | property: 'foo', 261 | fieldName: 'fooTag', 262 | }]); 263 | expect(response).to.eql(true); 264 | }); 265 | 266 | it('when passing a enpty array into isCustomMetaTagsValid', function () { 267 | const response = isCustomMetaTagsValid([]); 268 | expect(response).to.eql(true); 269 | }); 270 | 271 | it('when passing a custom tag missing property into isCustomMetaTagsValid', function () { 272 | // @ts-ignore 273 | const response = isCustomMetaTagsValid([{ 274 | multiple: false, 275 | fieldName: 'fooTag', 276 | }]); 277 | expect(response).to.eql(false); 278 | }); 279 | 280 | it('when passing a custom tag invalid property into isCustomMetaTagsValid', function () { 281 | const response = isCustomMetaTagsValid([{ 282 | multiple: false, 283 | property: 'foo', 284 | // @ts-ignore 285 | fieldName: true, 286 | }]); 287 | expect(response).to.eql(false); 288 | }); 289 | 290 | it('when passing a valid and invalid custom tag into isCustomMetaTagsValid', function () { 291 | // @ts-ignore 292 | const response = isCustomMetaTagsValid([{ 293 | multiple: false, 294 | property: 'foo', 295 | }, { 296 | multiple: false, 297 | property: 'foo', 298 | fieldName: 'fooTag', 299 | }]); 300 | expect(response).to.eql(false); 301 | }); 302 | 303 | it('when passing a invalid array into isCustomMetaTagsValid', function () { 304 | // @ts-ignore 305 | const response = isCustomMetaTagsValid(['foo', 'bar']); 306 | expect(response).to.eql(false); 307 | }); 308 | }); 309 | 310 | describe('unescapeScriptText', function () { 311 | it('is needed because `JSON.parse()` is not able to parse string with \\xHH', function () { 312 | expect(JSON.parse('"\\u2611"')).to.eql('☑'); 313 | expect(() => { 314 | JSON.parse('"\\x26"'); 315 | }).to.throw(SyntaxError); 316 | }); 317 | 318 | it('should unescape script text', function () { 319 | expect(unescapeScriptText('"\\x27"')).to.eql('"\'"'); 320 | expect(unescapeScriptText('"\\x26"')).to.eql('"&"'); 321 | expect(unescapeScriptText('"\\x22"')).to.eql('"\\""'); 322 | }); 323 | }); 324 | }); 325 | -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "index.ts", 4 | "lib/**/*" 5 | ], 6 | "exclude": [ 7 | "tests/**/*" 8 | ], 9 | "compilerOptions": { 10 | "esModuleInterop": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "lib": ["es2023"], 13 | "skipLibCheck": true, 14 | "strict": true, 15 | "target": "es2022", 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tsconfig.declaration.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "index.ts", 4 | "lib/**/*" 5 | ], 6 | "exclude": [ 7 | "tests/**/*" 8 | ], 9 | "compilerOptions": { 10 | "declaration": true, 11 | "declarationDir": "./types", 12 | "emitDeclarationOnly": true, 13 | "esModuleInterop": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "lib": ["es2023"], 16 | "skipLibCheck": true, 17 | "strict": true, 18 | "target": "es2022", 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "index.ts", 4 | "lib/**/*" 5 | ], 6 | "exclude": [ 7 | "tests/**/*" 8 | ], 9 | "compilerOptions": { 10 | "esModuleInterop": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "lib": ["es2023"], 13 | "module": "node16", 14 | "moduleResolution": "node16", 15 | "skipLibCheck": true, 16 | "strict": true, 17 | "target": "es2022", 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tsconfig.tests.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "include": [ 4 | "tests/**/*" 5 | ], 6 | "exclude": [ 7 | "index.ts", 8 | "lib/**/*" 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /types/index.d.ts: -------------------------------------------------------------------------------- 1 | import type { ErrorResult, OpenGraphScraperOptions, SuccessResult } from './lib/types'; 2 | /** 3 | * `open-graph-scraper` uses [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) for http requests 4 | * for scraping Open Graph and Twitter Card info off a website. 5 | * 6 | * @param {object} options - The options used by Open Graph Scraper 7 | * @param {boolean|string[]} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on 8 | * anything else. 9 | * @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape. 10 | * @param {object} [options.fetchOptions] - Sets the options used by fetch for the http requests 11 | * @param {object} [options.urlValidatorSettings] - Sets the options used by validator.js for testing the URL 12 | * @param {string[]} [options.blacklist] - Pass in an array of sites you don't want ogs to run on. 13 | * @param {string} [options.html] - You can pass in an HTML string to run ogs on it. (use without options.url) 14 | * @param {number} [options.timeout] - Number of seconds before the fetch request ends. (default is 10 seconds) 15 | * @param {string} options.url - URL of the site. (Required) 16 | * @returns {Promise} Promise Object with the Open Graph results 17 | */ 18 | declare function run(options: OpenGraphScraperOptions): Promise; 19 | export = run; 20 | -------------------------------------------------------------------------------- /types/lib/extract.d.ts: -------------------------------------------------------------------------------- 1 | import type { OgObjectInternal, OpenGraphScraperOptions } from './types'; 2 | /** 3 | * extract all of the meta tags needed for ogs 4 | * 5 | * @param {sting} body - the body of the fetch request 6 | * @param {object} options - options for ogs 7 | * @return {object} object with ogs results 8 | * 9 | */ 10 | export default function extractMetaTags(body: string, options: OpenGraphScraperOptions): OgObjectInternal; 11 | -------------------------------------------------------------------------------- /types/lib/fallback.d.ts: -------------------------------------------------------------------------------- 1 | import type { CheerioAPI } from 'cheerio'; 2 | import type { OpenGraphScraperOptions, OgObjectInternal } from './types'; 3 | /** 4 | * ogs fallbacks 5 | * 6 | * @param {object} ogObject - the current ogObject 7 | * @param {object} options - options for ogs 8 | * @param {object} $ - cheerio.load() of the current html 9 | * @return {object} object with ogs results with updated fallback values 10 | * 11 | */ 12 | export declare function fallback(ogObject: OgObjectInternal, options: OpenGraphScraperOptions, $: CheerioAPI, body: string): OgObjectInternal; 13 | export default fallback; 14 | -------------------------------------------------------------------------------- /types/lib/fields.d.ts: -------------------------------------------------------------------------------- 1 | import type { OgObjectInternal } from './types'; 2 | type Fields = { 3 | multiple: boolean; 4 | property: string; 5 | fieldName: keyof OgObjectInternal; 6 | }[]; 7 | /** 8 | * array of meta tags ogs is looking for 9 | * 10 | * @return {array} array of meta tags 11 | * 12 | */ 13 | declare const fields: Fields; 14 | export default fields; 15 | -------------------------------------------------------------------------------- /types/lib/isUrl.d.ts: -------------------------------------------------------------------------------- 1 | export default function isURL(url: any, options: any): boolean; 2 | -------------------------------------------------------------------------------- /types/lib/media.d.ts: -------------------------------------------------------------------------------- 1 | import type { OgObjectInternal } from './types'; 2 | /** 3 | * formats the multiple media values 4 | * 5 | * @param {object} ogObject - the current ogObject 6 | * @param {object} options - options for ogs 7 | * @return {object} object with ogs results with updated media values 8 | * 9 | */ 10 | export declare function mediaSetup(ogObject: OgObjectInternal): OgObjectInternal; 11 | export default mediaSetup; 12 | -------------------------------------------------------------------------------- /types/lib/openGraphScraper.d.ts: -------------------------------------------------------------------------------- 1 | import type { OpenGraphScraperOptions } from './types'; 2 | /** 3 | * sets up options for the fetch request and calls extract on html 4 | * 5 | * @param {object} options - options for ogs 6 | * @return {object} object with ogs results 7 | * 8 | */ 9 | export default function setOptionsAndReturnOpenGraphResults(ogsOptions: OpenGraphScraperOptions): Promise<{ 10 | ogObject: import("./types").OgObjectInternal; 11 | response: { 12 | body: string; 13 | }; 14 | html: string; 15 | } | { 16 | ogObject: import("./types").OgObjectInternal; 17 | response: import("undici").Response; 18 | html: string; 19 | }>; 20 | -------------------------------------------------------------------------------- /types/lib/request.d.ts: -------------------------------------------------------------------------------- 1 | import type { OpenGraphScraperOptions } from './types'; 2 | /** 3 | * performs the fetch request and formats the body for ogs 4 | * 5 | * @param {object} options - options for ogs 6 | * @return {object} formatted request body and response 7 | * 8 | */ 9 | export default function requestAndResultsFormatter(options: OpenGraphScraperOptions): Promise<{ 10 | body: string; 11 | response: import("undici").Response; 12 | }>; 13 | -------------------------------------------------------------------------------- /types/lib/types.d.ts: -------------------------------------------------------------------------------- 1 | import type { RequestInit } from 'undici'; 2 | export interface SuccessResult { 3 | error: false; 4 | html: string; 5 | response: object; 6 | result: OgObject; 7 | } 8 | export interface ErrorResult { 9 | error: true; 10 | html: undefined; 11 | response: undefined; 12 | result: OgObject; 13 | } 14 | export type OnlyGetOpenGraphInfoItem = 'image' | 'title' | 'description' | 'locale' | 'logo' | 'url' | 'favicon' | 'audioUrl' | 'date'; 15 | /** 16 | * The options used by Open Graph Scraper 17 | * 18 | * @typeParam {string} url - URL of the site. (Required) 19 | * @typeParam {string} [html] - You can pass in an HTML string to run ogs on it. (use without options.url) 20 | * @typeParam {string[]} [blacklist] - Pass in an array of sites you don't want ogs to run on. 21 | * @typeParam {boolean | OnlyGetOpenGraphInfoItem[]} [onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else. 22 | * @typeParam {CustomMetaTags} [customMetaTags] - Here you can define custom meta tags you want to scrape. 23 | * @typeParam {Request} [fetchOptions] - The options passed into fetch. 24 | * @typeParam {number} [timeout] - Number of seconds before the fetch request ends. (default is 10 seconds) 25 | * @typeParam {ValidatorSettings} [urlValidatorSettings] - Sets the options used by validator.js for testing the URL 26 | */ 27 | export interface OpenGraphScraperOptions { 28 | blacklist?: string[]; 29 | customMetaTags?: CustomMetaTags[]; 30 | fetchOptions?: RequestInit; 31 | html?: string; 32 | onlyGetOpenGraphInfo?: boolean | OnlyGetOpenGraphInfoItem[]; 33 | timeout?: number; 34 | url?: string; 35 | urlValidatorSettings?: ValidatorSettings; 36 | } 37 | /** 38 | * Options for isURL method in Validator.js 39 | * 40 | * @typeParam allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed 41 | * @typeParam protocols - valid protocols can be modified with this option 42 | * @typeParam require_host - if set as false isURL will not check if host is present in the URL 43 | * @typeParam require_port - if set as true isURL will check if port is present in the URL 44 | * @typeParam require_protocol - if set as true isURL will return false if protocol is not present in the URL 45 | * @typeParam require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option 46 | * @typeParam validate_length - if set as false isURL will skip string length validation (IE maximum is 2083) 47 | * 48 | */ 49 | export interface ValidatorSettings { 50 | allow_fragments: boolean; 51 | allow_protocol_relative_urls: boolean; 52 | allow_query_components: boolean; 53 | allow_trailing_dot: boolean; 54 | allow_underscores: boolean; 55 | protocols: string[]; 56 | require_host: boolean; 57 | require_port: boolean; 58 | require_protocol: boolean; 59 | require_tld: boolean; 60 | require_valid_protocol: boolean; 61 | validate_length: boolean; 62 | } 63 | /** 64 | * The type for user defined custom meta tags you want to scrape. 65 | * 66 | * @typeParam {boolean} multiple - is there more than one of these tags on a page (normally this is false) 67 | * @typeParam {string} property - meta tag name/property attribute 68 | * @typeParam {string} fieldName - name of the result variable 69 | */ 70 | export interface CustomMetaTags { 71 | fieldName: string; 72 | multiple: boolean; 73 | property: string; 74 | } 75 | export interface TwitterImageObject { 76 | alt?: string; 77 | height?: number; 78 | url: string; 79 | width?: number; 80 | } 81 | export interface TwitterPlayerObject { 82 | height?: number; 83 | stream?: string; 84 | url: string; 85 | width?: number; 86 | } 87 | export interface ImageObject { 88 | height?: number; 89 | type?: string; 90 | url: string; 91 | width?: number; 92 | alt?: string; 93 | } 94 | export interface VideoObject { 95 | height?: number; 96 | type?: string; 97 | url: string; 98 | width?: number; 99 | } 100 | export interface MusicSongObject { 101 | disc?: string; 102 | track?: number; 103 | url: string; 104 | } 105 | export interface OgObjectInternal { 106 | alAndroidAppName?: string; 107 | alAndroidClass?: string; 108 | alAndroidPackage?: string; 109 | alAndroidUrl?: string; 110 | alIosAppName?: string; 111 | alIosAppStoreId?: string; 112 | alIosUrl?: string; 113 | alIpadAppName?: string; 114 | alIpadAppStoreId?: string; 115 | alIpadUrl?: string; 116 | alIphoneAppName?: string; 117 | alIphoneAppStoreId?: string; 118 | alIphoneUrl?: string; 119 | alWebShouldFallback?: string; 120 | alWebUrl?: string; 121 | alWindowsAppId?: string; 122 | alWindowsAppName?: string; 123 | alWindowsPhoneAppId?: string; 124 | alWindowsPhoneAppName?: string; 125 | alWindowsPhoneUrl?: string; 126 | alWindowsUniversalAppId?: string; 127 | alWindowsUniversalAppName?: string; 128 | alWindowsUniversalUrl?: string; 129 | alWindowsUrl?: string; 130 | articleAuthor?: string; 131 | articleExpirationTime?: string; 132 | articleModifiedTime?: string; 133 | articlePublishedDate?: string; 134 | articlePublishedTime?: string; 135 | articleModifiedDate?: string; 136 | articlePublisher?: string; 137 | articleSection?: string; 138 | articleTag?: string; 139 | author?: string; 140 | bookAuthor?: string; 141 | bookCanonicalName?: string; 142 | bookIsbn?: string; 143 | bookReleaseDate?: string; 144 | booksBook?: string; 145 | booksRatingScale?: string; 146 | booksRatingValue?: string; 147 | bookTag?: string; 148 | businessContactDataCountryName?: string; 149 | businessContactDataLocality?: string; 150 | businessContactDataPostalCode?: string; 151 | businessContactDataRegion?: string; 152 | businessContactDataStreetAddress?: string; 153 | charset?: string; 154 | customMetaTags?: Record; 155 | dcContributor?: string; 156 | dcCoverage?: string; 157 | dcCreator?: string; 158 | dcDate?: string; 159 | dcDateCreated?: string; 160 | dcDateIssued?: string; 161 | dcDescription?: string; 162 | dcFormatMedia?: string; 163 | dcFormatSize?: string; 164 | dcIdentifier?: string; 165 | dcLanguage?: string; 166 | dcPublisher?: string; 167 | dcRelation?: string; 168 | dcRights?: string; 169 | dcSource?: string; 170 | dcSubject?: string; 171 | dcTitle?: string; 172 | dcType?: string; 173 | error?: string; 174 | errorDetails?: Error; 175 | favicon?: string; 176 | fbAppId?: string; 177 | jsonLD?: object[]; 178 | modifiedTime?: string; 179 | musicAlbum?: string; 180 | musicAlbumDisc?: string; 181 | musicAlbumTrack?: string; 182 | musicAlbumUrl?: string; 183 | musicCreator?: string; 184 | musicDuration?: string; 185 | musicMusician?: string; 186 | musicPlaylist?: string; 187 | musicRadioStation?: string; 188 | musicReleaseDate?: string; 189 | musicSong?: MusicSongObject[]; 190 | musicSongDisc?: string[]; 191 | musicSongProperty?: string[]; 192 | musicSongTrack?: string[]; 193 | musicSongUrl?: string[]; 194 | ogArticleAuthor?: string; 195 | ogArticleExpirationTime?: string; 196 | ogArticleModifiedTime?: string; 197 | ogArticlePublishedTime?: string; 198 | ogArticlePublisher?: string; 199 | ogArticleSection?: string; 200 | ogArticleTag?: string; 201 | ogAudio?: string; 202 | ogAudioSecureURL?: string; 203 | ogAudioType?: string; 204 | ogAudioURL?: string; 205 | ogAvailability?: string; 206 | ogDate?: string; 207 | ogDescription?: string; 208 | ogDeterminer?: string; 209 | ogEpisode?: string; 210 | ogImage?: ImageObject[]; 211 | ogImageAlt?: string[]; 212 | ogImageHeight?: string[]; 213 | ogImageProperty?: string[]; 214 | ogImageSecureURL?: string[]; 215 | ogImageType?: string[]; 216 | ogImageURL?: string[]; 217 | ogImageWidth?: string[]; 218 | ogLocale?: string; 219 | ogLocaleAlternate?: string; 220 | ogLogo?: string; 221 | ogMovie?: string; 222 | ogPriceAmount?: string; 223 | ogPriceCurrency?: string; 224 | ogProductAvailability?: string; 225 | ogProductCondition?: string; 226 | ogProductPriceAmount?: string; 227 | ogProductPriceCurrency?: string; 228 | ogProductRetailerItemId?: string; 229 | ogSiteName?: string; 230 | ogTitle?: string; 231 | ogType?: string; 232 | ogUrl?: string; 233 | ogVideo?: VideoObject[]; 234 | ogVideoActor?: string; 235 | ogVideoActorId?: string; 236 | ogVideoActorRole?: string; 237 | ogVideoDirector?: string; 238 | ogVideoDuration?: string; 239 | ogVideoHeight?: string[]; 240 | ogVideoOther?: string; 241 | ogVideoProperty?: string[]; 242 | ogVideoReleaseDate?: string; 243 | ogVideoSecureURL?: string; 244 | ogVideoSeries?: string; 245 | ogVideoTag?: string; 246 | ogVideoTvShow?: string; 247 | ogVideoType?: string[]; 248 | ogVideoWidth?: string[]; 249 | ogVideoWriter?: string; 250 | ogWebsite?: string; 251 | placeLocationLatitude?: string; 252 | placeLocationLongitude?: string; 253 | profileFirstName?: string; 254 | profileGender?: string; 255 | profileLastName?: string; 256 | profileUsername?: string; 257 | publishedTime?: string; 258 | releaseDate?: string; 259 | requestUrl?: string; 260 | restaurantContactInfoCountryName?: string; 261 | restaurantContactInfoEmail?: string; 262 | restaurantContactInfoLocality?: string; 263 | restaurantContactInfoPhoneNumber?: string; 264 | restaurantContactInfoPostalCode?: string; 265 | restaurantContactInfoRegion?: string; 266 | restaurantContactInfoStreetAddress?: string; 267 | restaurantContactInfoWebsite?: string; 268 | restaurantMenu?: string; 269 | restaurantRestaurant?: string; 270 | restaurantSection?: string; 271 | restaurantVariationPriceAmount?: string; 272 | restaurantVariationPriceCurrency?: string; 273 | success?: boolean; 274 | twitterAccount?: string; 275 | twitterAppIdGooglePlay?: string; 276 | twitterAppIdiPad?: string; 277 | twitterAppIdiPhone?: string; 278 | twitterAppNameGooglePlay?: string; 279 | twitterAppNameiPad?: string; 280 | twitterAppNameiPhone?: string; 281 | twitterAppUrlGooglePlay?: string; 282 | twitterAppUrliPad?: string; 283 | twitterAppUrliPhone?: string; 284 | twitterCard?: string; 285 | twitterCreator?: string; 286 | twitterCreatorId?: string; 287 | twitterDescription?: string; 288 | twitterImage?: TwitterImageObject[]; 289 | twitterImageAlt?: string[]; 290 | twitterImageHeight?: string[]; 291 | twitterImageProperty?: string[]; 292 | twitterImageSrc?: string[]; 293 | twitterImageWidth?: string[]; 294 | twitterPlayer?: TwitterPlayerObject[]; 295 | twitterPlayerHeight?: string[]; 296 | twitterPlayerProperty?: string[]; 297 | twitterPlayerStream?: string[]; 298 | twitterPlayerStreamContentType?: string; 299 | twitterPlayerWidth?: string[]; 300 | twitterSite?: string; 301 | twitterSiteId?: string; 302 | twitterTitle?: string; 303 | twitterUrl?: string; 304 | updatedTime?: string; 305 | } 306 | export type OgObject = Omit; 307 | -------------------------------------------------------------------------------- /types/lib/utils.d.ts: -------------------------------------------------------------------------------- 1 | import type { CustomMetaTags, OgObjectInternal, OpenGraphScraperOptions, ValidatorSettings } from './types'; 2 | export declare const defaultUrlValidatorSettings: { 3 | allow_fragments: boolean; 4 | allow_protocol_relative_urls: boolean; 5 | allow_query_components: boolean; 6 | allow_trailing_dot: boolean; 7 | allow_underscores: boolean; 8 | protocols: string[]; 9 | require_host: boolean; 10 | require_port: boolean; 11 | require_protocol: boolean; 12 | require_tld: boolean; 13 | require_valid_protocol: boolean; 14 | validate_length: boolean; 15 | }; 16 | /** 17 | * Checks if URL is valid 18 | * 19 | * @param {string} url - url to be checked 20 | * @param {string} urlValidatorSettings - settings used by validator 21 | * @return {boolean} boolean value if the url is valid 22 | * 23 | */ 24 | export declare function isUrlValid(url: string, urlValidatorSettings: ValidatorSettings): boolean; 25 | /** 26 | * Validates and formats url 27 | * 28 | * @param {string} url - url to be checked and formatted 29 | * @param {string} urlValidatorSettings - settings used by validator 30 | * @return {string} proper url or null 31 | * 32 | */ 33 | export declare function validateAndFormatURL(url: string, urlValidatorSettings: ValidatorSettings): { 34 | url: string | null; 35 | }; 36 | /** 37 | * Finds the image type from a given url 38 | * 39 | * @param {string} url - url to be checked 40 | * @return {string} image type from url 41 | * 42 | */ 43 | export declare function findImageTypeFromUrl(url: string): string; 44 | /** 45 | * Checks if image type is valid 46 | * 47 | * @param {string} type - type to be checked 48 | * @return {boolean} boolean value if type is value 49 | * 50 | */ 51 | export declare function isImageTypeValid(type: string): boolean; 52 | /** 53 | * Checks if URL is a non html page 54 | * 55 | * @param {string} url - url to be checked 56 | * @return {boolean} boolean value if url is non html 57 | * 58 | */ 59 | export declare function isThisANonHTMLUrl(url: string): boolean; 60 | /** 61 | * Find and delete nested undefineds 62 | * 63 | * @param {object} object - object to be cleaned 64 | * @return {object} object without nested undefineds 65 | * 66 | */ 67 | export declare function removeNestedUndefinedValues(object: Record): OgObjectInternal; 68 | /** 69 | * Split the options object into ogs and got option objects 70 | * 71 | * @param {object} options - options that need to be split 72 | * @return {object} object with nested options for ogs and got 73 | * 74 | */ 75 | export declare function optionSetup(ogsOptions: OpenGraphScraperOptions): { 76 | options: OpenGraphScraperOptions; 77 | }; 78 | /** 79 | * Checks if image type is valid 80 | * 81 | * @param {string} type - type to be checked 82 | * @return {boolean} boolean value if type is value 83 | * 84 | */ 85 | export declare function isCustomMetaTagsValid(customMetaTags: CustomMetaTags[]): boolean; 86 | /** 87 | * Unescape script text. 88 | * 89 | * Certain websites escape script text within script tags, which can 90 | * interfere with `JSON.parse()`. Therefore, we need to unescape it. 91 | * 92 | * Known good escape sequences: 93 | * 94 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh 95 | * 96 | * ```js 97 | * JSON.parse('"\\u2611"'); // '☑' 98 | * ``` 99 | * 100 | * Known bad escape sequences: 101 | * 102 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh 103 | * 104 | * ```js 105 | * JSON.parse('"\\x26"'); // '&' 106 | * ``` 107 | * 108 | * @param {string} scriptText - the text of the script tag 109 | * @returns {string} unescaped script text 110 | */ 111 | export declare function unescapeScriptText(scriptText: string): string; 112 | --------------------------------------------------------------------------------