├── .eslintignore
├── .eslintrc.json
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ └── node.js.yml
├── .gitignore
├── .snyk
├── CHANGELOG.md
├── LICENSE
├── README.md
├── example
├── Dockerfile
├── index.js
└── package.json
├── index.ts
├── lib
├── extract.ts
├── fallback.ts
├── fields.ts
├── isUrl.ts
├── media.ts
├── openGraphScraper.ts
├── request.ts
├── types.ts
└── utils.ts
├── package-lock.json
├── package.json
├── tests
├── .eslintrc.json
├── integration
│ ├── basic.spec.ts
│ ├── blacklist.spec.ts
│ ├── encoding.spec.ts
│ ├── fetch.spec.ts
│ ├── html.spec.ts
│ ├── image.spec.ts
│ ├── onlyGetOpenGraphInfo.spec.ts
│ ├── redirect.spec.ts
│ ├── spotify.spec.ts
│ ├── static.spec.ts
│ ├── statusCode.spec.ts
│ ├── twitter.spec.ts
│ ├── url.spec.ts
│ └── video.spec.ts
└── unit
│ ├── fallback.spec.ts
│ ├── media.spec.ts
│ ├── openGraphScraper.spec.ts
│ ├── static.spec.ts
│ └── utils.spec.ts
├── tsconfig.build.json
├── tsconfig.declaration.json
├── tsconfig.json
├── tsconfig.tests.json
└── types
├── index.d.ts
└── lib
├── extract.d.ts
├── fallback.d.ts
├── fields.d.ts
├── isUrl.d.ts
├── media.d.ts
├── openGraphScraper.d.ts
├── request.d.ts
├── types.d.ts
└── utils.d.ts
/.eslintignore:
--------------------------------------------------------------------------------
1 | coverage/
2 | dist/
3 | node_modules/
4 | example/
5 | types/
6 |
--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "airbnb-base",
4 | "airbnb-typescript/base",
5 | "plugin:promise/recommended",
6 | "plugin:@typescript-eslint/recommended-type-checked",
7 | "plugin:@typescript-eslint/stylistic-type-checked"
8 | ],
9 | "plugins": [
10 | "promise"
11 | ],
12 | "parserOptions": {
13 | "project": "./tsconfig.json"
14 | },
15 | "rules": {
16 | "@typescript-eslint/no-explicit-any": "warn",
17 | "@typescript-eslint/no-unsafe-argument": "warn",
18 | "@typescript-eslint/no-unsafe-assignment": "warn",
19 | "@typescript-eslint/no-unsafe-call": "warn",
20 | "@typescript-eslint/no-unsafe-member-access": "warn",
21 | "@typescript-eslint/no-unsafe-return": "warn",
22 | "import/no-named-as-default": 0,
23 | "max-len": ["error", {
24 | "code": 120,
25 | "ignoreStrings": true,
26 | "ignoreTrailingComments": true
27 | }],
28 | "no-param-reassign": "off"
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 |
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 |
19 | **Actual behavior**
20 | A clear and concise description of what is happening.
21 |
22 | **Screenshots**
23 | If applicable, add screenshots to help explain your problem.
24 |
25 | **Additional context**
26 | Add any other context about the problem here.
27 |
28 | - OS: [e.g. iOS/Windows/Linux]
29 | - Node Version: [e.g. 18]
30 | - openGraphScraper Version:
31 | - tsconfig.json:
32 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/node.js.yml:
--------------------------------------------------------------------------------
1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
3 |
4 | name: Node.js CI
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | buildAndTest:
14 | runs-on: ubuntu-latest
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | node-version:
19 | - 18
20 | - 20
21 | - 22
22 | steps:
23 | - uses: actions/checkout@v3
24 | - name: Use Node.js ${{ matrix.node-version }}
25 | uses: actions/setup-node@v3
26 | with:
27 | node-version: ${{ matrix.node-version }}
28 | - name: Install Dependencies
29 | run: npm ci
30 | - name: Linting
31 | run: npm run eslint
32 | - name: Running Typescript
33 | run: npm run build
34 | - name: Running Typescript Declaration
35 | run: npm run build:declaration
36 | - name: Unit Testing
37 | run: npm run mocha:unit
38 | - name: Integration Testing
39 | run: npm run mocha:int
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | lib-cov
2 | *.seed
3 | *.log
4 | *.csv
5 | *.dat
6 | *.out
7 | *.pid
8 | *.gz
9 | *.DS_Store
10 |
11 | pids
12 | logs
13 | results
14 | node_modules
15 | dist
16 |
17 | .nyc_output/
18 | coverage/
19 |
20 | npm-debug.log
21 | .idea/
22 |
--------------------------------------------------------------------------------
/.snyk:
--------------------------------------------------------------------------------
1 | # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities.
2 | version: v1.14.1
3 | ignore: {}
4 | # patches apply the minimum changes required to fix a vulnerability
5 | patch:
6 | SNYK-JS-LODASH-567746:
7 | - lodash:
8 | patched: '2020-05-30T23:04:33.532Z'
9 | - cheerio > lodash:
10 | patched: '2020-05-30T23:04:33.532Z'
11 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | ## 6.10.0
4 |
5 | - Updating dependencies to fix npm vulnerabilities
6 |
7 | ## 6.9.0
8 |
9 | - Add `jsonLDOptions.throwOnJSONParseError` and change default behavior to not throw on JSON-LD string parse errors
10 |
11 | ## 6.8.4
12 |
13 | - Normalize `content-type` header check for case insensitivity
14 | - Updating dependencies
15 |
16 | ## 6.8.3
17 |
18 | - Fixed issue where empty jsonLD would caused an error
19 | - Updating dependencies
20 |
21 | ## 6.8.2
22 |
23 | - Remove new lines from jsonLD.
24 | - If url string is not `isLatin1` then encode it, otherwise you will run into `ByteString` errors within `fetch`
25 | - Updating dependencies
26 |
27 | ## 6.8.1
28 |
29 | - Fixing issue where setting `fetchOptions.headers` would replace the default `headers`
30 | - Updating dependencies
31 |
32 | ## 6.8.0
33 |
34 | - Updating how `onlyGetOpenGraphInfo` works. By default it is `false` but now it accepts an array of properties for which no fallback should be used.
35 | - Updating how you get types `import { SuccessResult } from 'open-graph-scraper/types';`. See readme for details.
36 | - Updating dependencies
37 |
38 | ## 6.7.2
39 |
40 | - Adding `types` to the npm export. You can now use `import { SuccessResult } from 'open-graph-scraper/types/lib/types';`
41 | - Updating dependencies
42 |
43 | ## 6.7.1
44 |
45 | - Remove `default` export off of the `run` function and just set `export` to `run`.
46 | - Updating dependencies
47 |
48 | ## 6.7.0
49 |
50 | - Replace `validator` with internal version of `isUrl` so we have better control on how that works.
51 | - Fix issue where `JSON` parsing fails when Youtube escape '&' to '\x26'.
52 | - Updating dependencies
53 |
54 | ## 6.6.3
55 |
56 | - Fix issue with the `charset` fallback. Replace Buffer.from with Uint8Array since body is always html
57 | - Updating dependencies to fix npm vulnerabilities
58 |
59 | ## 6.6.2
60 |
61 | - Fixed issue with `package.json` `exports` was not working in `CommonJs` projects.
62 | - Fixed issue where if the `jsonLD` tag was empty, it would cause a error.
63 |
64 | ## 6.6.1
65 |
66 | - Use `node16` for `module`/`moduleResolution` ESM build
67 | - Fixed issue with `package.json` `exports` was not working in `NextJs` projects.
68 |
69 | ## 6.6.0
70 |
71 | - Updating the `tsc` build process to better support both `ESM` and `commonJS`
72 | - Fixed issue where some meta tags would always come back as array even thought there was only ever one meta tag.
73 | - Removed the `dist` folder from version control
74 | - Start running node22 in the CI pipeline
75 | - General typescript clean up
76 | - Example service will only return the `result` of OGS now
77 | - Updating dependencies
78 |
79 | ## 6.5.2
80 |
81 | - adding a new favicon fallback using appIcon
82 | - Updating dependencies to fix npm vulnerabilities
83 |
84 | ## 6.5.1
85 |
86 | - jsonLD is now a array of objects since there can be more then one jsonLD tag pre page
87 | - Updating dependencies to fix npm vulnerabilities
88 |
89 | ## 6.5.0
90 |
91 | - Adding support for JSON LD
92 | - Adding support for `og:image:alt`, `twitterAccount`, `fbAppId` and extra og tags for `music` and `video`
93 | - Fixing jsdoc param name
94 | - Updating dependencies
95 |
96 | ## 6.4.0
97 |
98 | - Add character encoding detection and decoding logic using `iconv-lite`
99 | - Updating dependencies
100 |
101 | ## 6.3.4
102 |
103 | - Adding check to make sure `customMetaTags` are valid
104 | - Updating dependencies
105 |
106 | ## 6.3.3
107 |
108 | - Updating dependencies
109 | - Sent the `Accept: text/html` header by default
110 |
111 | ## 6.3.2
112 |
113 | - Fixing issue with npm
114 |
115 | ## 6.3.1
116 |
117 | - Adding a fallback for `charset` using `http-equiv`
118 | - Updating dependencies to fix npm vulnerabilities
119 |
120 | ## 6.3.0
121 |
122 | - Export `SuccessResult` and `ErrorResult` types
123 | - Updating dependencies
124 |
125 | ## 6.2.2
126 |
127 | - Updating dependencies to fix npm vulnerabilities
128 |
129 | ## 6.2.1
130 |
131 | - Send back more details when there is a server error
132 |
133 | ## 6.2.0
134 |
135 | - Modified the `url` property in `OpenGraphScraperOptions` to be an optional property since you don't need this when using just `html`
136 | - `Type` can optional in `ImageObject` since type is not set it it's invalid
137 | - Take all of the `customMetaTags` out of base of `ogObject` and store them into `ogObject.customMetaTags`
138 | - The internal meta properties can be string arrays
139 | - Updating Dependencies
140 |
141 | ## 6.1.0
142 |
143 | - Setting the `origin` `header` to the request url since `fetch` runs in [cors mode by default](https://github.com/nodejs/undici/issues/1305).
144 | - Import `undici` for `fetch` so all versions of node18 are running the same version of `fetch`. Now ogs supports all versions of node18!
145 | - Updating Dependencies
146 |
147 | ## 6.0.1
148 |
149 | - `OpenGraphScraperOptions.fetchOptions` should be of type `RequestInit` instead of `Request`.
150 | - Updating Dependencies
151 |
152 | ## 6.0.0 (Has breaking changes!)
153 |
154 | - Replace `GOT` with [fetch](https://nodejs.org/docs/latest-v18.x/api/globals.html#fetch)!
155 | - Only supporting `node18` or higher going forward
156 | - Updated how options work. `Fetch` and `OGS` options no longer being mixed together, users can now set [fetch options](https://developer.mozilla.org/en-US/docs/Web/API/fetch#options) using `options.fetchOptions`
157 | - Remove any ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs results that have no url
158 | - The `downloadLimit` option has been removed in favor of just using timeouts.
159 | - Limit ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs to 10 items
160 | - Adding html to the `SuccessResult` of `OGS`
161 | - Adding `options.timeout` to set the fetch request timeout. (default is 10 seconds)
162 | - Remove `null` values from ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs
163 | - Removing `options.allMedia`, you can just grab the first value of the array for the pervious behavior
164 | - Removing `options.ogImageFallback`, you can set `options.onlyGetOpenGraphInfo` to `true` for the pervious behavior
165 | - ogImages/ogVideos/twitterImages/twitterPlayers/musicSongs will always be an array now, you can just grab the first value of the array for the pervious behavior
166 | - Updating Dependencies
167 |
168 | ## 5.2.3
169 |
170 | - Add in declaration files for typescript users.
171 |
172 | ## 5.2.2
173 |
174 | - Specify true/false to distinguish `SuccessResult` and `ErrorResult` by `error` field.
175 |
176 | ## 5.2.1
177 |
178 | - Adding the importsNotUsedAsValues flag and fixing type import issues
179 |
180 | ## 5.2.0
181 |
182 | - Remove the `charset` lib and just use `chardet` for finding the html encoding
183 | - Remove `peekSize` option since that was used by `charset`
184 | - Updating the `charset` fallback to be more reliable
185 | - Adding support for `article:published_date` and `article:modified_date` meta tags
186 | - Updating Dependencies
187 |
188 | ## 5.1.1
189 |
190 | - Fix issue where using `import` would cause typescript errors
191 | - Updating the `urlValidatorSettings` defaults to match `validatorjs`
192 | - Updating Dependencies
193 |
194 | ## 5.1.0
195 |
196 | - Convert source code to typescript
197 | - Changing `response.body` to be a `string` and `response.rawBody` to be a `buffer`. They now match the `node` type for `response`.
198 | - Updating Dependencies
199 |
200 | ## 5.0.5
201 |
202 | - Adding `twitterImageObject` and `twitterPlayerObject` types
203 | - Updating Dependencies
204 |
205 | ## 5.0.4
206 |
207 | - The `options.downloadLimit` type now allows for `false`
208 | - Updating Dependencies
209 |
210 | ## 5.0.3
211 |
212 | - Adding successResult and errorResult types
213 |
214 | ## 5.0.2
215 |
216 | - TS export now has common GOT options.
217 | - Updating Dependencies
218 |
219 | ## 5.0.1
220 |
221 | - TS export run as a Promise
222 | - Updating Docs
223 | - Updating Dependencies
224 |
225 | ## 5.0.0
226 |
227 | - Updating to `got` version 12!
228 | - Adding typescript support.
229 | - The `retry` option is now a object -> https://github.com/sindresorhus/got/blob/main/documentation/7-retry.md#retry
230 | - The `timeout` option is now a object -> https://github.com/sindresorhus/got/blob/main/documentation/6-timeout.md#timeout-options
231 | - Dropping callback support. If you still want to use callbacks, you can use `callbackify` -> https://nodejs.org/api/util.html#util_util_callbackify_original
232 | - Auth errors will now be passed back to the clint and will no long just be `Page not found` errors.
233 | - Dropping support for node12 since `got` no longer supports it.
234 | - Removing `options.encoding`.
235 | - Updating Dependencies
236 |
237 | ## 4.11.1
238 |
239 | - Updating Dependencies to fix a security vulnerability
240 |
241 | ## 4.11.0
242 |
243 | - Adding support for fetching the favicon
244 | - Updating Dependencies
245 |
246 | ## 4.10.0
247 |
248 | - Adding a check for the `content-type` header, it has to contain `text/html`
249 | - Adding `options.downloadLimit`, it sets the maximum size of the content downloaded from the server, in bytes
250 | - Updating Dependencies
251 |
252 | ## 4.9.2
253 |
254 | - Updating Dependencies to fix a security vulnerability
255 |
256 | ## 4.9.1
257 |
258 | - Updating Dependencies to fix a security vulnerability
259 |
260 | ## 4.9.0
261 |
262 | - Dropping support for Node10 since it has reach it's end of life
263 | - Setting response.rawBody to the parsed body since response.body is a buffer
264 | - Updating Dependencies
265 |
266 | ## 4.8.2
267 |
268 | - Adding support for Node16
269 | - Updating Dependencies
270 |
271 | ## 4.8.1
272 |
273 | - Fixing bug where the title fallback would return multiple titles
274 |
275 | ## 4.8.0
276 |
277 | - Adding support for Proxies
278 | - Updating Dependencies
279 |
280 | ## 4.7.1
281 |
282 | - Updating Dependencies to fix a security vulnerability
283 |
284 | ## 4.7.0
285 |
286 | - Adding `options.urlValidatorSettings`, it sets the options used by validator.js for testing the URL
287 | - Updating Dependencies
288 |
289 | ## 4.6.0
290 |
291 | - Fixing issue where you would get a false positive errors with pages that have `.tar` in it like `www.target.com`
292 | - Split extract and request into their own files
293 | - Updating Dependencies
294 |
295 | ## 4.5.1
296 |
297 | - Fixing issue where you couldn't set the `ogImageFallback` option to false
298 | - Fixing image type fallback so it works with arrays
299 |
300 | ## 4.5.0
301 |
302 | - Adding support for custom meta tags you want to scrape
303 | - If ogs thinks the URL isn't a HTML page, it will return a 'Must scrape an HTML page' error.
304 | - Updating Dependencies
305 |
306 | ## 4.4.0
307 |
308 | - Adding support for app links meta data
309 | - Removed the `withCharset` option, you can use `onlyGetOpenGraphInfo` now if you do not want charset
310 | - Removed the `runChar` option, this will always be turned on
311 | - `options.encoding === null` is now deprecated
312 | - Updating image fallback to only send back valid URLs
313 | - Updating Dependencies
314 |
315 | ## 4.3.1
316 |
317 | - Small code clean up and adding tests
318 | - Updating Dependencies
319 |
320 | ## 4.3.0
321 |
322 | - Adding support for request headers
323 |
324 | ## 4.2.1
325 |
326 | - Make sure item.fieldName exists before trying to use it
327 | - Updating devDependencies
328 | - Updating eslint rule set to be more simple
329 | - Fixed the badge icon in the readme
330 |
331 | ## 4.2.0
332 |
333 | - Checking for new tags like article, book, profile, business and restaurant
334 | - Adding support for Dublin Core tags!
335 | - Updating image fallback to send back width/height/type
336 | - Adding more title/description/locale/audio/other fallbacks
337 | - Fixed bug where if there was a weird casing on a meta, ogs would skip it
338 | - Will no longer return undefined values in some cases
339 | - Updating dependencies and removed lodash
340 |
341 | ## 4.1.1
342 |
343 | - Updating to use github actions for CI!
344 |
345 | ## 4.1.0
346 |
347 | - Updating to use `validators.js`'s `isURL` to check user input URLs
348 | - Moving snyk to be under devDependencies
349 |
350 | ## 4.0.0 (has breaking changes!)
351 |
352 | - Dropping support for any node version under 10
353 | - Open Graph values are no longer nested in a data object.
354 | - Stop using request.js(deprecated) and start using got.js
355 | - Using promises will now send the error/result/response back in one object.
356 | - Options.gzip is now options.decompress
357 | - Options.followAllRedirects is now options.followRedirect
358 | - Drop support for options.jar
359 | - Options.timeout must be a number value
360 | - Updating error messaging
361 | - Updating dependencies
362 |
363 | ## 3.6.2
364 |
365 | - Updating lodash.
366 |
367 | ## 3.6.1
368 |
369 | - Updating dependencies to vulnerabilities.
370 |
371 | ## 3.6.0
372 |
373 | - Replaced jschardet with charde.
374 |
375 | ## 3.5.1
376 |
377 | - Updating dependencies.
378 |
379 | ## 3.5.0
380 |
381 | - Adding Open Graph music tags!
382 |
383 | ## 3.4.0
384 |
385 | - Adding a new option for the 'jar' setting for requests. It will now be turned off by default.
386 |
387 | ## 3.3.0
388 |
389 | - Code refactor to work in an es5 environment!
390 |
391 | ## 3.2.0
392 |
393 | - Website that don't have Open Graph images will now return an array of all of the images on the site
394 |
395 | ## 3.1.5
396 |
397 | - Updating lodash to fix vulnerable
398 |
399 | ## 3.1.4
400 |
401 | - Returns more info on the error occurred when using promises
402 |
403 | ## 3.1.3
404 |
405 | - Catch iconv exception to prevent unexpected charset
406 |
407 | ## 3.1.2
408 |
409 | - Checking for Open Graph price and availability info
410 |
411 | ## 3.1.1
412 |
413 | - Updating packages
414 |
415 | ## 3.1.0
416 |
417 | - Adding ability to extract meta from HTML string
418 |
419 | ## 3.0.2
420 |
421 | - Adding CHANGELOG.md
422 |
423 | ## 3.0.1
424 |
425 | - Fixing coverage reporter
426 | - Fixing tests
427 |
428 | ## 3.0.0
429 |
430 | - Updated dependencies to their latest version(s)
431 | - Officially now support Node.js v4 and up
432 | - Adds unit tests to ensure code quality
433 | - Adds options for encoding, blacklist, followAllRedirects, and maxRedirects
434 | - Module can now be used a promise
435 | - `err` is now `error`
436 | - Adds check for Open Graph product info
437 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # openGraphScraper
2 |
3 | [](https://github.com/jshemas/openGraphScraper/actions?query=branch%3Amaster)
4 | [](https://snyk.io/test/github/jshemas/openGraphScraper)
5 |
6 | A simple node module(with TypeScript declarations) for scraping Open Graph and Twitter Card and other metadata off a site.
7 |
8 | Note: `open-graph-scraper` doesn't support browser usage at this time but you can use `open-graph-scraper-lite` if you already have the `HTML` and can't use Node's [Fetch API](https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#fetch).
9 |
10 | ## Installation
11 |
12 | ```bash
13 | npm install open-graph-scraper --save
14 | ```
15 |
16 | ## Usage
17 |
18 | ```javascript
19 | const ogs = require('open-graph-scraper');
20 | const options = { url: 'http://ogp.me/' };
21 | ogs(options)
22 | .then((data) => {
23 | const { error, html, result, response } = data;
24 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object.
25 | console.log('html:', html); // This contains the HTML of page
26 | console.log('result:', result); // This contains all of the Open Graph results
27 | console.log('response:', response); // This contains response from the Fetch API
28 | })
29 | ```
30 |
31 | ## Results JSON
32 |
33 | Check the return for a ```success``` flag. If success is set to true, then the url input was valid. Otherwise it will be set to false. The above example will return something like...
34 |
35 | ```javascript
36 | {
37 | ogTitle: 'Open Graph protocol',
38 | ogType: 'website',
39 | ogUrl: 'https://ogp.me/',
40 | ogDescription: 'The Open Graph protocol enables any web page to become a rich object in a social graph.',
41 | ogImage: [
42 | {
43 | height: '300',
44 | type: 'image/png',
45 | url: 'https://ogp.me/logo.png',
46 | width: '300'
47 | }
48 | ],
49 | charset: 'utf-8',
50 | requestUrl: 'http://ogp.me/',
51 | success: true
52 | }
53 | ```
54 |
55 | ## Options
56 |
57 | | Name | Info | Default Value | Required |
58 | |----------------------|-------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|----------|
59 | | url | URL of the site. | | x |
60 | | html | You can pass in an HTML string to run ogs on it. (use without options.url) | | |
61 | | fetchOptions | Options that are used by the Fetch API | {} | |
62 | | timeout | Request timeout for Fetch (Default is 10 seconds) | 10 | |
63 | | blacklist | Pass in an array of sites you don't want ogs to run on. | [] | |
64 | | onlyGetOpenGraphInfo | Only fetch open graph info and don't fall back on anything else. Also accepts an array of properties for which no fallback should be used | false | |
65 | | customMetaTags | Here you can define custom meta tags you want to scrape. | [] | |
66 | | urlValidatorSettings | Sets the options used by validator.js for testing the URL | [Here](https://github.com/jshemas/openGraphScraper/blob/master/lib/utils.ts#L4-L17) | |
67 | | jsonLDOptions | Sets the options used when parsing JSON-LD data | | |
68 |
69 | Note: `open-graph-scraper` uses the [Fetch API](https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#fetch) for requests and most of [Fetch's options](https://developer.mozilla.org/en-US/docs/Web/API/fetch#options) should work as `open-graph-scraper`'s `fetchOptions` options.
70 |
71 | ## Types And Import Example
72 |
73 | ```javascript
74 | // example of how to get types
75 | import type { SuccessResult } from 'open-graph-scraper/types';
76 | const example: SuccessResult = {
77 | result: { ogTitle: 'this is a title' },
78 | error: false,
79 | response: {},
80 | html: ''
81 | }
82 |
83 | // import example
84 | import ogs from 'open-graph-scraper';
85 | const options = { url: 'http://ogp.me/' };
86 | ogs(options)
87 | .then((data) => {
88 | const { error, html, result, response } = data;
89 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object.
90 | console.log('html:', html); // This contains the HTML of page
91 | console.log('result:', result); // This contains all of the Open Graph results
92 | console.log('response:', response); // This contains response from the Fetch API
93 | });
94 | ```
95 |
96 | ## Custom Meta Tag Example
97 |
98 | ```javascript
99 | const ogs = require('open-graph-scraper');
100 | const options = {
101 | url: 'https://github.com/jshemas/openGraphScraper',
102 | customMetaTags: [{
103 | multiple: false, // is there more than one of these tags on a page (normally this is false)
104 | property: 'hostname', // meta tag name/property attribute
105 | fieldName: 'hostnameMetaTag', // name of the result variable
106 | }],
107 | };
108 | ogs(options)
109 | .then((data) => {
110 | const { result } = data;
111 | console.log('hostnameMetaTag:', result.customMetaTags.hostnameMetaTag); // hostnameMetaTag: github.com
112 | })
113 | ```
114 |
115 | ## HTML Example
116 |
117 | ```javascript
118 | const ogs = require('open-graph-scraper');
119 | const options = {
120 | html: `
121 |
122 |
123 |
124 |
125 |
126 |
127 | `
128 | };
129 | ogs(options)
130 | .then((data) => {
131 | const { result } = data;
132 | console.log('result:', result);
133 | // result: {
134 | // ogDescription: 'html description example',
135 | // ogTitle: 'foobar',
136 | // ogType: 'website',
137 | // ogImage: [ { url: 'https://www.foo.com/bar.jpg', type: 'jpg' } ],
138 | // favicon: 'https://bar.com/foo.png',
139 | // charset: 'utf-8',
140 | // success: true
141 | // }
142 | })
143 |
144 | ```
145 |
146 | ## User Agent Example
147 |
148 | The request header is set to [undici](https://github.com/nodejs/undici) by default. Some sites might block this, and changing the `userAgent` might work. If not you can try [using a proxy](https://www.scrapingbee.com/blog/proxy-node-fetch/) for the request and then pass the `html` into `open-graph-scraper`.
149 |
150 | ```javascript
151 | const ogs = require("open-graph-scraper");
152 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36';
153 | ogs({ url: 'https://www.wikipedia.org/', fetchOptions: { headers: { 'user-agent': userAgent } } })
154 | .then((data) => {
155 | const { error, html, result, response } = data;
156 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object.
157 | console.log('html:', html); // This contains the HTML of page
158 | console.log('result:', result); // This contains all of the Open Graph results
159 | console.log('response:', response); // This contains response from the Fetch API
160 | })
161 | ```
162 |
163 | ## JSON-LD Parsing Options Example
164 |
165 | `throwOnJSONParseError` and `logOnJSONParseError` properties control what happens if `JSON.parse`
166 | throws an error when parsing JSON-LD data.
167 | If `throwOnJSONParseError` is set to `true`, then the error will be thrown.
168 | If `logOnJSONParseError` is set to `true`, then the error will be logged to the console.
169 |
170 | ```javascript
171 | const ogs = require("open-graph-scraper");
172 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36';
173 | ogs({ url: 'https://www.wikipedia.org/', jsonLDOptions: { throwOnJSONParseError: true } })
174 | .then((data) => {
175 | const { error, html, result, response } = data;
176 | console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the result object.
177 | console.log('html:', html); // This contains the HTML of page
178 | console.log('result:', result); // This contains all of the Open Graph results
179 | console.log('response:', response); // This contains response from the Fetch API
180 | })
181 | ```
182 |
183 | ## Running the example app
184 |
185 | Inside the `example` folder contains a simple express app where you can run `npm ci && npm run start` to spin up. Once the app is running, open a web browser and go to `http://localhost:3000/scraper?url=http://ogp.me/` to test it out. There is also a `Dockerfile` if you want to run this example app in a docker container.
186 |
--------------------------------------------------------------------------------
/example/Dockerfile:
--------------------------------------------------------------------------------
1 | # docker build -t open-graph-scraper .
2 | # docker run -dp 127.0.0.1:3000:3000 open-graph-scraper
3 | # http://127.0.0.1:3000/scraper?url=http://ogp.me/
4 | FROM node:20
5 |
6 | WORKDIR /usr/src/app
7 |
8 | COPY package*.json ./
9 |
10 | RUN npm install
11 |
12 | COPY . .
13 |
14 | EXPOSE 3000
15 |
16 | CMD [ "node", "index.js" ]
17 |
--------------------------------------------------------------------------------
/example/index.js:
--------------------------------------------------------------------------------
1 | const express = require('express');
2 | const ogs = require('open-graph-scraper');
3 | const app = express();
4 | const port = 3000;
5 |
6 | // http://localhost:3000/scraper?url=http://ogp.me/
7 | app.get('/scraper', async (req, res) => {
8 | if (!req.query.url) return res.send('Missing url query!');
9 | const options = { url: req.query.url };
10 | try {
11 | const data = await ogs(options);
12 | res.send(data);
13 | } catch (error) {
14 | res.send(error.result);
15 | }
16 | });
17 |
18 | app.listen(port, () => {
19 | console.log(`Example open-graph-scraper app listening on port ${port}`);
20 | });
21 |
--------------------------------------------------------------------------------
/example/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "example-app",
3 | "version": "1.0.0",
4 | "description": "Example open-graph-scraper express app",
5 | "main": "index.js",
6 | "scripts": {
7 | "start": "node index"
8 | },
9 | "license": "MIT",
10 | "dependencies": {
11 | "express": "^4.19.2",
12 | "open-graph-scraper": "^6.5.2"
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/no-import-module-exports */
2 | import setOptionsAndReturnOpenGraphResults from './lib/openGraphScraper';
3 | import type {
4 | ErrorResult,
5 | OpenGraphScraperOptions,
6 | SuccessResult,
7 | } from './lib/types';
8 |
9 | /**
10 | * `open-graph-scraper` uses [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) for http requests
11 | * for scraping Open Graph and Twitter Card info off a website.
12 | *
13 | * @param {object} options - The options used by Open Graph Scraper
14 | * @param {boolean|string[]} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on
15 | * anything else.
16 | * @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape.
17 | * @param {object} [options.fetchOptions] - Sets the options used by fetch for the http requests
18 | * @param {object} [options.urlValidatorSettings] - Sets the options used by validator.js for testing the URL
19 | * @param {string[]} [options.blacklist] - Pass in an array of sites you don't want ogs to run on.
20 | * @param {string} [options.html] - You can pass in an HTML string to run ogs on it. (use without options.url)
21 | * @param {number} [options.timeout] - Number of seconds before the fetch request ends. (default is 10 seconds)
22 | * @param {string} options.url - URL of the site. (Required)
23 | * @returns {Promise} Promise Object with the Open Graph results
24 | */
25 | async function run(options: OpenGraphScraperOptions): Promise {
26 | let results;
27 | try {
28 | results = await setOptionsAndReturnOpenGraphResults(options);
29 | } catch (error) {
30 | const exception = error as Error;
31 | const returnError:ErrorResult = {
32 | error: true,
33 | result: {
34 | success: false,
35 | requestUrl: options.url,
36 | error: exception.message,
37 | errorDetails: exception,
38 | },
39 | response: undefined,
40 | html: undefined,
41 | };
42 | // eslint-disable-next-line @typescript-eslint/no-throw-literal
43 | throw returnError;
44 | }
45 | const returnSuccess: SuccessResult = {
46 | error: false,
47 | result: results.ogObject,
48 | response: results.response,
49 | html: results.html,
50 | };
51 | return returnSuccess;
52 | }
53 |
54 | export = run;
55 |
--------------------------------------------------------------------------------
/lib/extract.ts:
--------------------------------------------------------------------------------
1 | import { load } from 'cheerio';
2 |
3 | import fallback from './fallback';
4 | import fields from './fields';
5 | import mediaSetup from './media';
6 | import { unescapeScriptText } from './utils';
7 |
8 | import type { OgObjectInternal, OpenGraphScraperOptions } from './types';
9 |
10 | /**
11 | * extract all of the meta tags needed for ogs
12 | *
13 | * @param {sting} body - the body of the fetch request
14 | * @param {object} options - options for ogs
15 | * @return {object} object with ogs results
16 | *
17 | */
18 | export default function extractMetaTags(body: string, options: OpenGraphScraperOptions) {
19 | let ogObject: OgObjectInternal = { success: true };
20 | const $ = load(body);
21 | const metaFields = fields;
22 |
23 | // find all of the open graph info in the meta tags
24 | $('meta').each((index, meta) => {
25 | if (!meta.attribs || (!meta.attribs.property && !meta.attribs.name)) return;
26 | const property = meta.attribs.property || meta.attribs.name;
27 | const content: any = meta.attribs.content || meta.attribs.value;
28 | metaFields.forEach((item) => {
29 | if (item && property.toLowerCase() === item.property.toLowerCase()) {
30 | // check if fieldName is one of mediaMapperProperties
31 | if (
32 | item.fieldName === 'musicSongDisc'
33 | || item.fieldName === 'musicSongProperty'
34 | || item.fieldName === 'musicSongTrack'
35 | || item.fieldName === 'musicSongUrl'
36 | || item.fieldName === 'ogImageAlt'
37 | || item.fieldName === 'ogImageHeight'
38 | || item.fieldName === 'ogImageProperty'
39 | || item.fieldName === 'ogImageSecureURL'
40 | || item.fieldName === 'ogImageType'
41 | || item.fieldName === 'ogImageURL'
42 | || item.fieldName === 'ogImageWidth'
43 | || item.fieldName === 'ogVideoHeight'
44 | || item.fieldName === 'ogVideoProperty'
45 | || item.fieldName === 'ogVideoType'
46 | || item.fieldName === 'ogVideoWidth'
47 | || item.fieldName === 'twitterImageAlt'
48 | || item.fieldName === 'twitterImageHeight'
49 | || item.fieldName === 'twitterImageProperty'
50 | || item.fieldName === 'twitterImageSrc'
51 | || item.fieldName === 'twitterImageWidth'
52 | || item.fieldName === 'twitterPlayerHeight'
53 | || item.fieldName === 'twitterPlayerProperty'
54 | || item.fieldName === 'twitterPlayerStream'
55 | || item.fieldName === 'twitterPlayerWidth'
56 | ) {
57 | if (!ogObject[item.fieldName]) {
58 | ogObject[item.fieldName] = [content];
59 | } else {
60 | ogObject[item.fieldName]?.push(content);
61 | }
62 | } else {
63 | ogObject[item.fieldName] = content;
64 | }
65 | }
66 | });
67 |
68 | if (options.customMetaTags) {
69 | options.customMetaTags.forEach((item) => {
70 | if (!ogObject.customMetaTags) ogObject.customMetaTags = {};
71 | if (item && property.toLowerCase() === item.property.toLowerCase()) {
72 | if (!item.multiple) {
73 | ogObject.customMetaTags[item.fieldName] = content;
74 | } else if (!ogObject.customMetaTags[item.fieldName]) {
75 | ogObject.customMetaTags[item.fieldName] = [content];
76 | } else if (Array.isArray(ogObject.customMetaTags[item.fieldName])) {
77 | ogObject.customMetaTags[item.fieldName] = [
78 | ...ogObject.customMetaTags[item.fieldName],
79 | content,
80 | ];
81 | }
82 | }
83 | });
84 | if (ogObject.customMetaTags && Object.keys(ogObject.customMetaTags).length === 0) delete ogObject.customMetaTags;
85 | }
86 | });
87 |
88 | // formats the multiple media values
89 | ogObject = mediaSetup(ogObject);
90 |
91 | // if onlyGetOpenGraphInfo isn't set, run the open graph fallbacks
92 | if (!options.onlyGetOpenGraphInfo || Array.isArray(options.onlyGetOpenGraphInfo)) {
93 | ogObject = fallback(ogObject, options, $, body);
94 |
95 | $('script').each((index, script) => {
96 | if (script.attribs.type && script.attribs.type === 'application/ld+json') {
97 | if (!ogObject.jsonLD) ogObject.jsonLD = [];
98 | let scriptText = $(script).text().trim();
99 | if (scriptText) {
100 | scriptText = scriptText.replace(/(\r\n|\n|\r)/gm, ''); // remove newlines
101 | scriptText = unescapeScriptText(scriptText);
102 | try {
103 | ogObject.jsonLD.push(JSON.parse(scriptText));
104 | } catch (error: unknown) {
105 | if (options.jsonLDOptions?.logOnJSONParseError) {
106 | console.error('Error parsing JSON-LD script tag:', error);
107 | }
108 | if (options.jsonLDOptions?.throwOnJSONParseError) {
109 | throw error;
110 | }
111 | }
112 | }
113 | }
114 | });
115 | }
116 |
117 | return ogObject;
118 | }
119 |
--------------------------------------------------------------------------------
/lib/fallback.ts:
--------------------------------------------------------------------------------
1 | import chardet from 'chardet';
2 | import type { CheerioAPI } from 'cheerio';
3 |
4 | import {
5 | defaultUrlValidatorSettings,
6 | findImageTypeFromUrl,
7 | isImageTypeValid,
8 | isUrlValid,
9 | } from './utils';
10 | import type {
11 | OpenGraphScraperOptions, ImageObject, OgObjectInternal, OnlyGetOpenGraphInfoItem,
12 | } from './types';
13 |
14 | const doesElementExist = (selector:string, attribute:string, $: CheerioAPI) => (
15 | $(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0
16 | );
17 |
18 | /**
19 | * ogs fallbacks
20 | *
21 | * @param {object} ogObject - the current ogObject
22 | * @param {object} options - options for ogs
23 | * @param {object} $ - cheerio.load() of the current html
24 | * @return {object} object with ogs results with updated fallback values
25 | *
26 | */
27 | export function fallback(ogObject: OgObjectInternal, options: OpenGraphScraperOptions, $: CheerioAPI, body: string) {
28 | const shouldFallback = (key: OnlyGetOpenGraphInfoItem): boolean => {
29 | if (!options.onlyGetOpenGraphInfo) {
30 | return true;
31 | }
32 | if (options.onlyGetOpenGraphInfo === true) {
33 | return false;
34 | }
35 | return !options.onlyGetOpenGraphInfo.includes(key);
36 | };
37 |
38 | // title fallback
39 | if (!ogObject.ogTitle && shouldFallback('title')) {
40 | if ($('title').text() && $('title').text().length > 0) {
41 | ogObject.ogTitle = $('title').first().text();
42 | } else if ($('head > meta[name="title"]').attr('content') && ($('head > meta[name="title"]').attr('content')?.length ?? 0) > 0) {
43 | ogObject.ogTitle = $('head > meta[name="title"]').attr('content');
44 | } else if ($('.post-title').text() && $('.post-title').text().length > 0) {
45 | ogObject.ogTitle = $('.post-title').text();
46 | } else if ($('.entry-title').text() && $('.entry-title').text().length > 0) {
47 | ogObject.ogTitle = $('.entry-title').text();
48 | } else if ($('h1[class*="title" i] a').text() && $('h1[class*="title" i] a').text().length > 0) {
49 | ogObject.ogTitle = $('h1[class*="title" i] a').text();
50 | } else if ($('h1[class*="title" i]').text() && $('h1[class*="title" i]').text().length > 0) {
51 | ogObject.ogTitle = $('h1[class*="title" i]').text();
52 | }
53 | }
54 |
55 | // Get meta description tag if og description was not provided
56 | if (!ogObject.ogDescription && shouldFallback('description')) {
57 | if (doesElementExist('head > meta[name="description"]', 'content', $)) {
58 | ogObject.ogDescription = $('head > meta[name="description"]').attr('content');
59 | } else if (doesElementExist('head > meta[itemprop="description"]', 'content', $)) {
60 | ogObject.ogDescription = $('head > meta[itemprop="description"]').attr('content');
61 | } else if ($('#description').text() && $('#description').text().length > 0) {
62 | ogObject.ogDescription = $('#description').text();
63 | }
64 | }
65 |
66 | // Get all of images if there is no og:image info
67 | if (!ogObject.ogImage && shouldFallback('image')) {
68 | ogObject.ogImage = [];
69 | $('img').map((index, imageElement) => {
70 | const source: string = $(imageElement).attr('src') ?? '';
71 | if (!source) return false;
72 | const type = findImageTypeFromUrl(source);
73 | if (
74 | !isUrlValid(source, (options.urlValidatorSettings ?? defaultUrlValidatorSettings)) || !isImageTypeValid(type)
75 | ) return false;
76 | const fallbackImage: ImageObject = {
77 | url: source,
78 | type,
79 | };
80 | if ($(imageElement).attr('width') && Number($(imageElement).attr('width'))) fallbackImage.width = Number($(imageElement).attr('width'));
81 | if ($(imageElement).attr('height') && Number($(imageElement).attr('height'))) fallbackImage.height = Number($(imageElement).attr('height'));
82 | ogObject.ogImage?.push(fallbackImage);
83 | return false;
84 | });
85 | ogObject.ogImage = ogObject.ogImage
86 | .filter((value) => value.url !== undefined && value.url !== '')
87 | .filter((value, index) => index < 10);
88 | if (ogObject.ogImage.length === 0) delete ogObject.ogImage;
89 | } else if (ogObject.ogImage) {
90 | ogObject.ogImage.map((image) => {
91 | if (image.url && !image.type) {
92 | const type = findImageTypeFromUrl(image.url);
93 | if (isImageTypeValid(type)) image.type = type;
94 | }
95 | return false;
96 | });
97 | }
98 |
99 | // audio fallback
100 | if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL && shouldFallback('audioUrl')) {
101 | const audioElementValue: string = $('audio').attr('src') ?? '';
102 | const audioSourceElementValue: string = $('audio > source').attr('src') ?? '';
103 | if (doesElementExist('audio', 'src', $)) {
104 | if (audioElementValue.startsWith('https')) {
105 | ogObject.ogAudioSecureURL = audioElementValue;
106 | } else {
107 | ogObject.ogAudioURL = audioElementValue;
108 | }
109 | const audioElementTypeValue: string = $('audio').attr('type') ?? '';
110 | if (!ogObject.ogAudioType && doesElementExist('audio', 'type', $)) ogObject.ogAudioType = audioElementTypeValue;
111 | } else if (doesElementExist('audio > source', 'src', $)) {
112 | if (audioSourceElementValue.startsWith('https')) {
113 | ogObject.ogAudioSecureURL = audioSourceElementValue;
114 | } else {
115 | ogObject.ogAudioURL = audioSourceElementValue;
116 | }
117 | const audioSourceElementTypeValue: string = $('audio > source').attr('type') ?? '';
118 | if (!ogObject.ogAudioType && doesElementExist('audio > source', 'type', $)) ogObject.ogAudioType = audioSourceElementTypeValue;
119 | }
120 | }
121 |
122 | // locale fallback
123 | if (!ogObject.ogLocale && shouldFallback('locale')) {
124 | if (doesElementExist('html', 'lang', $)) {
125 | ogObject.ogLocale = $('html').attr('lang');
126 | } else if (doesElementExist('head > meta[itemprop="inLanguage"]', 'content', $)) {
127 | ogObject.ogLocale = $('head > meta[itemprop="inLanguage"]').attr('content');
128 | }
129 | }
130 |
131 | // logo fallback
132 | if (!ogObject.ogLogo && shouldFallback('logo')) {
133 | if (doesElementExist('meta[itemprop="logo"]', 'content', $)) {
134 | ogObject.ogLogo = $('meta[itemprop="logo"]').attr('content');
135 | } else if (doesElementExist('img[itemprop="logo"]', 'src', $)) {
136 | ogObject.ogLogo = $('img[itemprop="logo"]').attr('src');
137 | }
138 | }
139 |
140 | // url fallback
141 | if (!ogObject.ogUrl && shouldFallback('url')) {
142 | if (doesElementExist('link[rel="canonical"]', 'href', $)) {
143 | ogObject.ogUrl = $('link[rel="canonical"]').attr('href');
144 | } else if (doesElementExist('link[rel="alternate"][hreflang="x-default"]', 'href', $)) {
145 | ogObject.ogUrl = $('link[rel="alternate"][hreflang="x-default"]').attr('href');
146 | }
147 | }
148 |
149 | // date fallback
150 | if (!ogObject.ogDate && shouldFallback('date')) {
151 | if (doesElementExist('head > meta[name="date"]', 'content', $)) {
152 | ogObject.ogDate = $('head > meta[name="date"]').attr('content');
153 | } else if (doesElementExist('[itemprop*="datemodified" i]', 'content', $)) {
154 | ogObject.ogDate = $('[itemprop*="datemodified" i]').attr('content');
155 | } else if (doesElementExist('[itemprop="datepublished" i]', 'content', $)) {
156 | ogObject.ogDate = $('[itemprop="datepublished" i]').attr('content');
157 | } else if (doesElementExist('[itemprop*="date" i]', 'content', $)) {
158 | ogObject.ogDate = $('[itemprop*="date" i]').attr('content');
159 | } else if (doesElementExist('time[itemprop*="date" i]', 'datetime', $)) {
160 | ogObject.ogDate = $('time[itemprop*="date" i]').attr('datetime');
161 | } else if (doesElementExist('time[datetime]', 'datetime', $)) {
162 | ogObject.ogDate = $('time[datetime]').attr('datetime');
163 | }
164 | }
165 |
166 | // favicon fallback
167 | if (!ogObject.favicon && shouldFallback('favicon')) {
168 | if (doesElementExist('link[rel="shortcut icon"]', 'href', $)) {
169 | ogObject.favicon = $('link[rel="shortcut icon"]').attr('href');
170 | } else if (doesElementExist('link[rel="icon"]', 'href', $)) {
171 | ogObject.favicon = $('link[rel="icon"]').attr('href');
172 | } else if (doesElementExist('link[rel="mask-icon"]', 'href', $)) {
173 | ogObject.favicon = $('link[rel="mask-icon"]').attr('href');
174 | } else if (doesElementExist('link[rel="apple-touch-icon"]', 'href', $)) {
175 | ogObject.favicon = $('link[rel="apple-touch-icon"]').attr('href');
176 | } else if (doesElementExist('link[type="image/png"]', 'href', $)) {
177 | ogObject.favicon = $('link[type="image/png"]').attr('href');
178 | } else if (doesElementExist('link[type="image/ico"]', 'href', $)) {
179 | ogObject.favicon = $('link[type="image/ico"]').attr('href');
180 | } else if (doesElementExist('link[type="image/x-icon"]', 'href', $)) {
181 | ogObject.favicon = $('link[type="image/x-icon"]').attr('href');
182 | } else if (doesElementExist('head > meta[property*="appIcon"]', 'content', $)) {
183 | ogObject.favicon = $('head > meta[property*="appIcon"]').attr('content');
184 | }
185 | }
186 |
187 | // set the charset
188 | if (doesElementExist('meta', 'charset', $)) {
189 | ogObject.charset = $('meta').attr('charset');
190 | } else if (doesElementExist('head > meta[name="charset"]', 'content', $)) {
191 | ogObject.charset = $('head > meta[name="charset"]').attr('content');
192 | } else if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) {
193 | const content = $('head > meta[http-equiv="content-type"]').attr('content') ?? '';
194 | const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i;
195 |
196 | if (charsetRegEx.test(content)) {
197 | const charsetRegExExec = charsetRegEx.exec(content);
198 | if (charsetRegExExec?.[1]) ogObject.charset = charsetRegExExec[1] || 'utf-8';
199 | }
200 | } else if (body) {
201 | const encoder = new TextEncoder();
202 | const uint8Array = encoder.encode(body);
203 | ogObject.charset = chardet.detect(uint8Array) ?? '';
204 | }
205 |
206 | return ogObject;
207 | }
208 |
209 | export default fallback;
210 |
--------------------------------------------------------------------------------
/lib/isUrl.ts:
--------------------------------------------------------------------------------
1 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2 | // @ts-nocheck
3 | /* eslint-disable @typescript-eslint/no-unsafe-argument */
4 | /* eslint-disable @typescript-eslint/no-unsafe-call */
5 | /* eslint-disable @typescript-eslint/no-unsafe-member-access */
6 | /* eslint-disable @typescript-eslint/no-unsafe-assignment */
7 | // This is from https://github.com/validatorjs/validator.js version: 13.12.0
8 |
9 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/util/assertString.js
10 | function assertString(input) {
11 | const isString = typeof input === 'string' || input instanceof String;
12 |
13 | if (!isString) {
14 | let invalidType = typeof input;
15 | if (input === null) invalidType = 'null';
16 | else if (invalidType === 'object') invalidType = input.constructor.name;
17 |
18 | throw new TypeError(`Expected a string but received a ${invalidType}`);
19 | }
20 | }
21 |
22 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/util/merge.js
23 | // eslint-disable-next-line @typescript-eslint/default-param-last
24 | function merge(obj = { }, defaults) {
25 | // eslint-disable-next-line no-restricted-syntax
26 | for (const key in defaults) {
27 | if (typeof obj[key] === 'undefined') {
28 | obj[key] = defaults[key];
29 | }
30 | }
31 | return obj;
32 | }
33 |
34 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/isFQDN.js
35 | const defaultFqdnOptions = {
36 | require_tld: true,
37 | allow_underscores: false,
38 | allow_trailing_dot: false,
39 | allow_numeric_tld: false,
40 | allow_wildcard: false,
41 | ignore_max_length: false,
42 | };
43 |
44 | function isFQDN(str, options) {
45 | assertString(str);
46 | options = merge(options, defaultFqdnOptions);
47 |
48 | /* Remove the optional trailing dot before checking validity */
49 | if (options.allow_trailing_dot && str[str.length - 1] === '.') {
50 | str = str.substring(0, str.length - 1);
51 | }
52 |
53 | /* Remove the optional wildcard before checking validity */
54 | if (options.allow_wildcard === true && str.indexOf('*.') === 0) {
55 | str = str.substring(2);
56 | }
57 |
58 | const parts = str.split('.');
59 | const tld = parts[parts.length - 1];
60 |
61 | if (options.require_tld) {
62 | // disallow fqdns without tld
63 | if (parts.length < 2) {
64 | return false;
65 | }
66 |
67 | if (
68 | !options.allow_numeric_tld
69 | // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
70 | && !/^([a-z\u00A1-\u00A8\u00AA-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}|xn[a-z0-9-]{2,})$/i.test(tld)
71 | ) {
72 | return false;
73 | }
74 |
75 | // disallow spaces
76 | if (/\s/.test(tld)) {
77 | return false;
78 | }
79 | }
80 |
81 | // reject numeric TLDs
82 | if (!options.allow_numeric_tld && /^\d+$/.test(tld)) {
83 | return false;
84 | }
85 |
86 | // eslint-disable-next-line @typescript-eslint/no-unsafe-return
87 | return parts.every((part) => {
88 | if (part.length > 63 && !options.ignore_max_length) {
89 | return false;
90 | }
91 |
92 | if (!/^[a-z_\u00a1-\uffff0-9-]+$/i.test(part)) {
93 | return false;
94 | }
95 |
96 | // disallow full-width chars
97 | if (/[\uff01-\uff5e]/.test(part)) {
98 | return false;
99 | }
100 |
101 | // disallow parts starting or ending with hyphen
102 | if (/^-|-$/.test(part)) {
103 | return false;
104 | }
105 |
106 | if (!options.allow_underscores && /_/.test(part)) {
107 | return false;
108 | }
109 |
110 | return true;
111 | });
112 | }
113 |
114 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/isIP.js
115 | const IPv4SegmentFormat = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])';
116 | const IPv4AddressFormat = `(${IPv4SegmentFormat}[.]){3}${IPv4SegmentFormat}`;
117 | const IPv4AddressRegExp = new RegExp(`^${IPv4AddressFormat}$`);
118 |
119 | const IPv6SegmentFormat = '(?:[0-9a-fA-F]{1,4})';
120 | const IPv6AddressRegExp = new RegExp('^('
121 | + `(?:${IPv6SegmentFormat}:){7}(?:${IPv6SegmentFormat}|:)|`
122 | + `(?:${IPv6SegmentFormat}:){6}(?:${IPv4AddressFormat}|:${IPv6SegmentFormat}|:)|`
123 | + `(?:${IPv6SegmentFormat}:){5}(?::${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,2}|:)|`
124 | + `(?:${IPv6SegmentFormat}:){4}(?:(:${IPv6SegmentFormat}){0,1}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,3}|:)|`
125 | + `(?:${IPv6SegmentFormat}:){3}(?:(:${IPv6SegmentFormat}){0,2}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,4}|:)|`
126 | + `(?:${IPv6SegmentFormat}:){2}(?:(:${IPv6SegmentFormat}){0,3}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,5}|:)|`
127 | + `(?:${IPv6SegmentFormat}:){1}(?:(:${IPv6SegmentFormat}){0,4}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,6}|:)|`
128 | + `(?::((?::${IPv6SegmentFormat}){0,5}:${IPv4AddressFormat}|(?::${IPv6SegmentFormat}){1,7}|:))`
129 | + ')(%[0-9a-zA-Z-.:]{1,})?$');
130 |
131 | function isIP(str, version = '') {
132 | assertString(str);
133 | version = String(version);
134 | if (!version) {
135 | // eslint-disable-next-line @typescript-eslint/no-unsafe-return
136 | return isIP(str, 4) || isIP(str, 6);
137 | }
138 | if (version === '4') {
139 | return IPv4AddressRegExp.test(str);
140 | }
141 | if (version === '6') {
142 | return IPv6AddressRegExp.test(str);
143 | }
144 | return false;
145 | }
146 |
147 | // https://github.com/validatorjs/validator.js/blob/master/src/lib/isURL.js
148 | /*
149 | options for isURL method
150 |
151 | require_protocol - if set as true isURL will return false if protocol is not present in the URL
152 | require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option
153 | protocols - valid protocols can be modified with this option
154 | require_host - if set as false isURL will not check if host is present in the URL
155 | require_port - if set as true isURL will check if port is present in the URL
156 | allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed
157 | validate_length - if set as false isURL will skip string length validation (IE maximum is 2083)
158 |
159 | */
160 | const defaultUrlOptions = {
161 | protocols: ['http', 'https', 'ftp'],
162 | require_tld: true,
163 | require_protocol: false,
164 | require_host: true,
165 | require_port: false,
166 | require_valid_protocol: true,
167 | allow_underscores: false,
168 | allow_trailing_dot: false,
169 | allow_protocol_relative_urls: false,
170 | allow_fragments: true,
171 | allow_query_components: true,
172 | validate_length: true,
173 | };
174 |
175 | const wrappedIpv6 = /^\[([^\]]+)\](?::([0-9]+))?$/;
176 |
177 | function isRegExp(obj) {
178 | return Object.prototype.toString.call(obj) === '[object RegExp]';
179 | }
180 |
181 | function checkHost(host, matches) {
182 | // eslint-disable-next-line @typescript-eslint/prefer-for-of
183 | for (let i = 0; i < matches.length; i += 1) {
184 | const match = matches[i];
185 | if (host === match || (isRegExp(match) && match.test(host))) {
186 | return true;
187 | }
188 | }
189 | return false;
190 | }
191 |
192 | export default function isURL(url, options) {
193 | assertString(url);
194 | if (!url || /[\s<>]/.test(url)) {
195 | return false;
196 | }
197 | if (url.indexOf('mailto:') === 0) {
198 | return false;
199 | }
200 | options = merge(options, defaultUrlOptions);
201 |
202 | if (options.validate_length && url.length >= 2083) {
203 | return false;
204 | }
205 |
206 | if (!options.allow_fragments && url.includes('#')) {
207 | return false;
208 | }
209 |
210 | if (!options.allow_query_components && (url.includes('?') || url.includes('&'))) {
211 | return false;
212 | }
213 |
214 | // eslint-disable-next-line @typescript-eslint/naming-convention
215 | let protocol; let auth; let host; let port; let port_str; let split; let
216 | ipv6;
217 |
218 | split = url.split('#');
219 | url = split.shift();
220 |
221 | split = url.split('?');
222 | url = split.shift();
223 |
224 | split = url.split('://');
225 | if (split.length > 1) {
226 | protocol = split.shift().toLowerCase();
227 | if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) {
228 | return false;
229 | }
230 | } else if (options.require_protocol) {
231 | return false;
232 | } else if (url.slice(0, 2) === '//') {
233 | if (!options.allow_protocol_relative_urls) {
234 | return false;
235 | }
236 | split[0] = url.slice(2);
237 | }
238 | url = split.join('://');
239 |
240 | if (url === '') {
241 | return false;
242 | }
243 |
244 | split = url.split('/');
245 | url = split.shift();
246 |
247 | if (url === '' && !options.require_host) {
248 | return true;
249 | }
250 |
251 | split = url.split('@');
252 | if (split.length > 1) {
253 | if (options.disallow_auth) {
254 | return false;
255 | }
256 | if (split[0] === '') {
257 | return false;
258 | }
259 | auth = split.shift();
260 | if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) {
261 | return false;
262 | }
263 | const [user, password] = auth.split(':');
264 | if (user === '' && password === '') {
265 | return false;
266 | }
267 | }
268 | const hostname = split.join('@');
269 |
270 | port_str = null;
271 | ipv6 = null;
272 | // eslint-disable-next-line @typescript-eslint/naming-convention
273 | const ipv6_match = hostname.match(wrappedIpv6);
274 | if (ipv6_match) {
275 | host = '';
276 | // eslint-disable-next-line prefer-destructuring
277 | ipv6 = ipv6_match[1];
278 | port_str = ipv6_match[2] || null;
279 | } else {
280 | split = hostname.split(':');
281 | host = split.shift();
282 | if (split.length) {
283 | port_str = split.join(':');
284 | }
285 | }
286 |
287 | if (port_str !== null && port_str.length > 0) {
288 | port = parseInt(port_str, 10);
289 | if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) {
290 | return false;
291 | }
292 | } else if (options.require_port) {
293 | return false;
294 | }
295 |
296 | if (options.host_whitelist) {
297 | return checkHost(host, options.host_whitelist);
298 | }
299 |
300 | if (host === '' && !options.require_host) {
301 | return true;
302 | }
303 |
304 | if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) {
305 | return false;
306 | }
307 |
308 | host = host || ipv6;
309 |
310 | if (options.host_blacklist && checkHost(host, options.host_blacklist)) {
311 | return false;
312 | }
313 |
314 | return true;
315 | }
316 |
--------------------------------------------------------------------------------
/lib/media.ts:
--------------------------------------------------------------------------------
1 | import fields from './fields';
2 | import { removeNestedUndefinedValues } from './utils';
3 | import type {
4 | ImageObject,
5 | MusicSongObject,
6 | OgObjectInternal,
7 | TwitterImageObject,
8 | TwitterPlayerObject,
9 | VideoObject,
10 | } from './types';
11 |
12 | const mediaMapperTwitterImage = (item: TwitterImageObject[]) => ({
13 | alt: item[3],
14 | height: item[2],
15 | url: item[0],
16 | width: item[1],
17 | });
18 |
19 | const mediaMapperTwitterPlayer = (item: TwitterPlayerObject[]) => ({
20 | height: item[2],
21 | stream: item[3],
22 | url: item[0],
23 | width: item[1],
24 | });
25 |
26 | const mediaMapperMusicSong = (item: MusicSongObject[]) => ({
27 | disc: item[2],
28 | track: item[1],
29 | url: item[0],
30 | });
31 |
32 | const mediaMapper = (item: ImageObject[] | VideoObject[]) => ({
33 | height: item[2],
34 | type: item[3],
35 | url: item[0],
36 | width: item[1],
37 | alt: item[4],
38 | });
39 |
40 | const mediaSorter = (
41 | a: ImageObject | TwitterImageObject | VideoObject | TwitterPlayerObject,
42 | b: ImageObject | TwitterImageObject | VideoObject | TwitterPlayerObject,
43 | ) => {
44 | if (!(a.url && b.url)) {
45 | return 0;
46 | }
47 |
48 | const aRes = a.url.match(/\.(\w{2,5})$/);
49 | const aExt = (aRes?.[1].toLowerCase()) ?? null;
50 | const bRes = b.url.match(/\.(\w{2,5})$/);
51 | const bExt = (bRes?.[1].toLowerCase()) ?? null;
52 |
53 | if (aExt === 'gif' && bExt !== 'gif') {
54 | return -1;
55 | } if (aExt !== 'gif' && bExt === 'gif') {
56 | return 1;
57 | }
58 | return Math.max(b.width ?? 0, b.height ?? 0) - Math.max(a.width ?? 0, a.height ?? 0);
59 | };
60 |
61 | const mediaSorterMusicSong = (a: MusicSongObject, b: MusicSongObject) => {
62 | if (!(a.track && b.track)) {
63 | return 0;
64 | } if ((a.disc ?? 0) > (b.disc ?? 0)) {
65 | return 1;
66 | } if ((a.disc ?? 0) < (b.disc ?? 0)) {
67 | return -1;
68 | }
69 | return a.track - b.track;
70 | };
71 |
72 | // lodash zip replacement
73 | const zip = (array: any, ...args: any) => {
74 | if (array === undefined) return [];
75 | return array
76 | .map((value: any, idx: number) => [value, ...args.map((arr: []) => arr[idx])]);
77 | };
78 |
79 | /**
80 | * formats the multiple media values
81 | *
82 | * @param {object} ogObject - the current ogObject
83 | * @param {object} options - options for ogs
84 | * @return {object} object with ogs results with updated media values
85 | *
86 | */
87 | export function mediaSetup(ogObject: OgObjectInternal) {
88 | // sets ogImage property/width/height/type to empty array if one these exists
89 | if (
90 | ogObject.ogImageSecureURL
91 | ?? ogObject.ogImageURL
92 | ?? ogObject.ogImageProperty
93 | ?? ogObject.ogImageWidth
94 | ?? ogObject.ogImageHeight
95 | ?? ogObject.ogImageType
96 | ?? ogObject.ogImageAlt
97 | ) {
98 | ogObject.ogImageSecureURL = ogObject.ogImageSecureURL ? ogObject.ogImageSecureURL : [];
99 | ogObject.ogImageURL = ogObject.ogImageURL ? ogObject.ogImageURL : [];
100 | ogObject.ogImageProperty = ogObject.ogImageProperty ? ogObject.ogImageProperty : [];
101 | // set ogImageProperty to ogImageSecureURL if it exists
102 | // eslint-disable-next-line max-len
103 | ogObject.ogImageProperty = (ogObject.ogImageSecureURL.length !== 0) ? ogObject.ogImageSecureURL : ogObject.ogImageProperty;
104 | // fall back to ogImageURL if ogImageProperty isn't set
105 | ogObject.ogImageProperty = (ogObject.ogImageProperty.length !== 0) ? ogObject.ogImageProperty : ogObject.ogImageURL;
106 | ogObject.ogImageWidth = ogObject.ogImageWidth ? ogObject.ogImageWidth : [];
107 | ogObject.ogImageHeight = ogObject.ogImageHeight ? ogObject.ogImageHeight : [];
108 | ogObject.ogImageType = ogObject.ogImageType ? ogObject.ogImageType : [];
109 | ogObject.ogImageAlt = ogObject.ogImageAlt ? ogObject.ogImageAlt : [];
110 | }
111 |
112 | // format images and limit to 10
113 | const ogImages: ImageObject[] = zip(
114 | ogObject.ogImageProperty,
115 | ogObject.ogImageWidth,
116 | ogObject.ogImageHeight,
117 | ogObject.ogImageType,
118 | ogObject.ogImageAlt,
119 | )
120 | .map(mediaMapper)
121 | .filter((value:ImageObject) => value.url !== undefined && value.url !== '')
122 | .filter((value:ImageObject, index:number) => index < 10)
123 | .sort(mediaSorter);
124 |
125 | // sets ogVideo property/width/height/type to empty array if one these exists
126 | if (ogObject.ogVideoProperty ?? ogObject.ogVideoWidth ?? ogObject.ogVideoHeight ?? ogObject.ogVideoType) {
127 | ogObject.ogVideoProperty = ogObject.ogVideoProperty ? ogObject.ogVideoProperty : [];
128 | ogObject.ogVideoWidth = ogObject.ogVideoWidth ? ogObject.ogVideoWidth : [];
129 | ogObject.ogVideoHeight = ogObject.ogVideoHeight ? ogObject.ogVideoHeight : [];
130 | ogObject.ogVideoType = ogObject.ogVideoType ? ogObject.ogVideoType : [];
131 | }
132 |
133 | // format videos and limit to 10
134 | const ogVideos: VideoObject[] = zip(
135 | ogObject.ogVideoProperty,
136 | ogObject.ogVideoWidth,
137 | ogObject.ogVideoHeight,
138 | ogObject.ogVideoType,
139 | )
140 | .map(mediaMapper)
141 | .filter((value:VideoObject) => value.url !== undefined && value.url !== '')
142 | .filter((value:VideoObject, index:number) => index < 10)
143 | .sort(mediaSorter);
144 |
145 | // sets twitter image src/property/width/height/alt to empty array if one these exists
146 | if (
147 | ogObject.twitterImageSrc
148 | ?? ogObject.twitterImageProperty
149 | ?? ogObject.twitterImageWidth
150 | ?? ogObject.twitterImageHeight
151 | ?? ogObject.twitterImageAlt
152 | ) {
153 | ogObject.twitterImageSrc = ogObject.twitterImageSrc ? ogObject.twitterImageSrc : [];
154 | // eslint-disable-next-line max-len
155 | ogObject.twitterImageProperty = ogObject.twitterImageProperty ? ogObject.twitterImageProperty : ogObject.twitterImageSrc; // deafult to twitterImageSrc
156 | ogObject.twitterImageWidth = ogObject.twitterImageWidth ? ogObject.twitterImageWidth : [];
157 | ogObject.twitterImageHeight = ogObject.twitterImageHeight ? ogObject.twitterImageHeight : [];
158 | ogObject.twitterImageAlt = ogObject.twitterImageAlt ? ogObject.twitterImageAlt : [];
159 | }
160 |
161 | // format twitter images and limit to 10
162 | const twitterImages: TwitterImageObject[] = zip(
163 | ogObject.twitterImageProperty,
164 | ogObject.twitterImageWidth,
165 | ogObject.twitterImageHeight,
166 | ogObject.twitterImageAlt,
167 | )
168 | .map(mediaMapperTwitterImage)
169 | .filter((value:TwitterImageObject) => value.url !== undefined && value.url !== '')
170 | .filter((value:TwitterImageObject, index:number) => index < 10)
171 | .sort(mediaSorter);
172 |
173 | // sets twitter property/width/height/stream to empty array if one these exists
174 | if (ogObject.twitterPlayerProperty
175 | ?? ogObject.twitterPlayerWidth
176 | ?? ogObject.twitterPlayerHeight
177 | ?? ogObject.twitterPlayerStream
178 | ) {
179 | ogObject.twitterPlayerProperty = ogObject.twitterPlayerProperty ? ogObject.twitterPlayerProperty : [];
180 | ogObject.twitterPlayerWidth = ogObject.twitterPlayerWidth ? ogObject.twitterPlayerWidth : [];
181 | ogObject.twitterPlayerHeight = ogObject.twitterPlayerHeight ? ogObject.twitterPlayerHeight : [];
182 | ogObject.twitterPlayerStream = ogObject.twitterPlayerStream ? ogObject.twitterPlayerStream : [];
183 | }
184 |
185 | // format twitter player and limit to 10
186 | const twitterPlayers: TwitterPlayerObject[] = zip(
187 | ogObject.twitterPlayerProperty,
188 | ogObject.twitterPlayerWidth,
189 | ogObject.twitterPlayerHeight,
190 | ogObject.twitterPlayerStream,
191 | ).map(mediaMapperTwitterPlayer)
192 | .filter((value:TwitterPlayerObject) => value.url !== undefined && value.url !== '')
193 | .filter((value:TwitterPlayerObject, index:number) => index < 10)
194 | .sort(mediaSorter);
195 |
196 | // sets music property/songTrack/songDisc to empty array if one these exists
197 | if (ogObject.musicSongProperty ?? ogObject.musicSongTrack ?? ogObject.musicSongDisc ?? ogObject.musicSongUrl) {
198 | ogObject.musicSongUrl = ogObject.musicSongUrl ? ogObject.musicSongUrl : [];
199 | ogObject.musicSongProperty = ogObject.musicSongProperty ? ogObject.musicSongProperty : ogObject.musicSongUrl; // deafult to musicSongUrl
200 | ogObject.musicSongTrack = ogObject.musicSongTrack ? ogObject.musicSongTrack : [];
201 | ogObject.musicSongDisc = ogObject.musicSongDisc ? ogObject.musicSongDisc : [];
202 | }
203 |
204 | // format music songs and limit to 10
205 | const musicSongs: MusicSongObject[] = zip(ogObject.musicSongProperty, ogObject.musicSongTrack, ogObject.musicSongDisc)
206 | .map(mediaMapperMusicSong)
207 | .filter((value:MusicSongObject) => value.url !== undefined && value.url !== '')
208 | .filter((value:MusicSongObject, index:number) => index < 10)
209 | .sort(mediaSorterMusicSong);
210 |
211 | // remove old values since everything will live under the main property
212 | fields.filter((item) => (item.multiple && item.fieldName?.match('(ogImage|ogVideo|twitter|musicSong).*')))
213 | .forEach((item) => {
214 | delete ogObject[item.fieldName];
215 | });
216 |
217 | if (ogImages.length) ogObject.ogImage = ogImages;
218 | if (ogVideos.length) ogObject.ogVideo = ogVideos;
219 | if (twitterImages.length) ogObject.twitterImage = twitterImages;
220 | if (twitterPlayers.length) ogObject.twitterPlayer = twitterPlayers;
221 | if (musicSongs.length) ogObject.musicSong = musicSongs;
222 |
223 | // removes any undefs
224 | ogObject = removeNestedUndefinedValues(ogObject);
225 |
226 | return ogObject;
227 | }
228 |
229 | export default mediaSetup;
230 |
--------------------------------------------------------------------------------
/lib/openGraphScraper.ts:
--------------------------------------------------------------------------------
1 | import extractMetaTags from './extract';
2 | import requestAndResultsFormatter from './request';
3 | import {
4 | defaultUrlValidatorSettings,
5 | isCustomMetaTagsValid,
6 | isThisANonHTMLUrl,
7 | optionSetup,
8 | validateAndFormatURL,
9 | } from './utils';
10 | import type { OpenGraphScraperOptions } from './types';
11 |
12 | /**
13 | * sets up options for the fetch request and calls extract on html
14 | *
15 | * @param {object} options - options for ogs
16 | * @return {object} object with ogs results
17 | *
18 | */
19 | export default async function setOptionsAndReturnOpenGraphResults(ogsOptions: OpenGraphScraperOptions) {
20 | const { options } = optionSetup(ogsOptions);
21 |
22 | if (options.html && options.url) throw new Error('Must specify either `url` or `html`, not both');
23 |
24 | if (!isCustomMetaTagsValid(options.customMetaTags ?? [])) throw new Error('Invalid Custom Meta Tags');
25 |
26 | if (options.html) {
27 | const ogObject = extractMetaTags(options.html, options);
28 | ogObject.success = true;
29 | return { ogObject, response: { body: options.html }, html: options.html };
30 | }
31 |
32 | const formattedUrl = validateAndFormatURL(options.url ?? '', (options.urlValidatorSettings ?? defaultUrlValidatorSettings));
33 |
34 | if (!formattedUrl.url) throw new Error('Invalid URL');
35 |
36 | options.url = formattedUrl.url;
37 |
38 | // trying to limit non html pages
39 | if (isThisANonHTMLUrl(options.url)) throw new Error('Must scrape an HTML page');
40 |
41 | // eslint-disable-next-line max-len
42 | if (options?.blacklist?.some((blacklistedHostname) => options.url?.includes(blacklistedHostname))) {
43 | throw new Error('Host name has been black listed');
44 | }
45 |
46 | try {
47 | const { body, response } = await requestAndResultsFormatter(options);
48 | const ogObject = extractMetaTags(body, options);
49 |
50 | ogObject.requestUrl = options.url;
51 |
52 | return { ogObject, response, html: body };
53 | } catch (exception: any) {
54 | if (exception && (exception.code === 'ENOTFOUND' || exception.code === 'EHOSTUNREACH' || exception.code === 'ENETUNREACH')) {
55 | throw new Error('Page not found');
56 | } else if (exception && (exception.name === 'AbortError')) {
57 | throw new Error('The operation was aborted due to timeout');
58 | }
59 | if (exception instanceof Error) throw exception;
60 | throw new Error('Page not found');
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/lib/request.ts:
--------------------------------------------------------------------------------
1 | import { fetch } from 'undici';
2 | import { decode } from 'iconv-lite';
3 | import { CheerioAPI, load } from 'cheerio';
4 | import chardet from 'chardet';
5 | import type { OpenGraphScraperOptions } from './types';
6 |
7 | /**
8 | * checks if an element exists
9 | */
10 | const doesElementExist = (selector:string, attribute:string, $: CheerioAPI) => (
11 | $(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0
12 | );
13 |
14 | /**
15 | * gets the charset of the html
16 | */
17 | function getCharset(body: string, buffer: ArrayBuffer, $: CheerioAPI) {
18 | if (doesElementExist('meta', 'charset', $)) {
19 | return $('meta').attr('charset');
20 | }
21 | if (doesElementExist('head > meta[name="charset"]', 'content', $)) {
22 | return $('head > meta[name="charset"]').attr('content');
23 | }
24 | if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) {
25 | const content = $('head > meta[http-equiv="content-type"]').attr('content') ?? '';
26 | const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i;
27 |
28 | if (charsetRegEx.test(content)) {
29 | const charsetRegExExec = charsetRegEx.exec(content);
30 | if (charsetRegExExec?.[1]) return charsetRegExExec[1];
31 | }
32 | }
33 | if (body) {
34 | return chardet.detect(Buffer.from(buffer));
35 | }
36 |
37 | return 'utf-8';
38 | }
39 |
40 | /**
41 | * performs the fetch request and formats the body for ogs
42 | *
43 | * @param {object} options - options for ogs
44 | * @return {object} formatted request body and response
45 | *
46 | */
47 | export default async function requestAndResultsFormatter(options: OpenGraphScraperOptions) {
48 | let body;
49 | let response;
50 | try {
51 | // eslint-disable-next-line no-control-regex
52 | const isLatin1 = /^[\u0000-\u00ff]{0,}$/;
53 |
54 | let url = options.url ?? '';
55 | if (!isLatin1.test(url)) url = encodeURI(url);
56 |
57 | response = await fetch(
58 | url ?? '',
59 | {
60 | signal: AbortSignal.timeout((options.timeout ?? 10) * 1000),
61 | ...options.fetchOptions,
62 | headers: { Origin: url ?? '', Accept: 'text/html', ...options.fetchOptions?.headers },
63 | },
64 | );
65 |
66 | const bodyArrayBuffer = await response.arrayBuffer();
67 | const bodyText = Buffer.from(bodyArrayBuffer).toString('utf-8');
68 | const charset = getCharset(bodyText, bodyArrayBuffer, load(bodyText)) ?? 'utf-8';
69 | if (charset.toLowerCase() === 'utf-8') {
70 | body = bodyText;
71 | } else {
72 | body = decode(Buffer.from(bodyArrayBuffer), charset);
73 | }
74 |
75 | const contentType = response?.headers?.get('content-type')?.toLowerCase();
76 | if (contentType && !contentType.includes('text/')) {
77 | throw new Error('Page must return a header content-type with text/');
78 | }
79 | if (response?.status && (response.status.toString().startsWith('4') || response.status.toString().startsWith('5'))) {
80 | switch (response.status) {
81 | case 400:
82 | throw new Error('400 Bad Request');
83 | case 401:
84 | throw new Error('401 Unauthorized');
85 | case 403:
86 | throw new Error('403 Forbidden');
87 | case 404:
88 | throw new Error('404 Not Found');
89 | case 408:
90 | throw new Error('408 Request Timeout');
91 | case 410:
92 | throw new Error('410 Gone');
93 | case 500:
94 | throw new Error('500 Internal Server Error');
95 | case 502:
96 | throw new Error('502 Bad Gateway');
97 | case 503:
98 | throw new Error('503 Service Unavailable');
99 | case 504:
100 | throw new Error('504 Gateway Timeout');
101 | default:
102 | throw new Error('Server has returned a 400/500 error code');
103 | }
104 | }
105 |
106 | if (body === undefined || body === '') {
107 | throw new Error('Page not found');
108 | }
109 | } catch (error) {
110 | if (error instanceof Error && error.message === 'fetch failed') throw error.cause;
111 | throw error;
112 | }
113 |
114 | return { body, response };
115 | }
116 |
--------------------------------------------------------------------------------
/lib/types.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable max-len */
2 | import type { RequestInit } from 'undici';
3 |
4 | export interface SuccessResult {
5 | error: false;
6 | html: string;
7 | response: object;
8 | result: OgObject;
9 | }
10 |
11 | export interface ErrorResult {
12 | error: true;
13 | html: undefined;
14 | response: undefined;
15 | result: OgObject;
16 | }
17 |
18 | export type OnlyGetOpenGraphInfoItem = 'image' | 'title' | 'description' | 'locale' | 'logo' | 'url' | 'favicon' | 'audioUrl' | 'date';
19 |
20 | /**
21 | * The options used by Open Graph Scraper
22 | *
23 | * @typeParam {string} url - URL of the site. (Required)
24 | * @typeParam {string} [html] - You can pass in an HTML string to run ogs on it. (use without options.url)
25 | * @typeParam {string[]} [blacklist] - Pass in an array of sites you don't want ogs to run on.
26 | * @typeParam {boolean | OnlyGetOpenGraphInfoItem[]} [onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else.
27 | * @typeParam {CustomMetaTags} [customMetaTags] - Here you can define custom meta tags you want to scrape.
28 | * @typeParam {Request} [fetchOptions] - The options passed into fetch.
29 | * @typeParam {number} [timeout] - Number of seconds before the fetch request ends. (default is 10 seconds)
30 | * @typeParam {ValidatorSettings} [urlValidatorSettings] - Sets the options used by validator.js for testing the URL
31 | */
32 | export interface OpenGraphScraperOptions {
33 | blacklist?: string[];
34 | customMetaTags?: CustomMetaTags[];
35 | fetchOptions?: RequestInit;
36 | html?: string;
37 | onlyGetOpenGraphInfo?: boolean | OnlyGetOpenGraphInfoItem[];
38 | timeout?: number;
39 | url?: string;
40 | urlValidatorSettings?: ValidatorSettings;
41 | jsonLDOptions?: JSONLDOptions;
42 | }
43 |
44 | /**
45 | * Options for isURL method in Validator.js
46 | *
47 | * @typeParam allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed
48 | * @typeParam protocols - valid protocols can be modified with this option
49 | * @typeParam require_host - if set as false isURL will not check if host is present in the URL
50 | * @typeParam require_port - if set as true isURL will check if port is present in the URL
51 | * @typeParam require_protocol - if set as true isURL will return false if protocol is not present in the URL
52 | * @typeParam require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option
53 | * @typeParam validate_length - if set as false isURL will skip string length validation (IE maximum is 2083)
54 | *
55 | */
56 | export interface ValidatorSettings {
57 | allow_fragments: boolean;
58 | allow_protocol_relative_urls: boolean;
59 | allow_query_components: boolean;
60 | allow_trailing_dot: boolean;
61 | allow_underscores: boolean;
62 | protocols: string[];
63 | require_host: boolean;
64 | require_port: boolean;
65 | require_protocol: boolean;
66 | require_tld: boolean;
67 | require_valid_protocol: boolean;
68 | validate_length: boolean;
69 | }
70 |
71 | /**
72 | * Options for the JSON-LD parser
73 | */
74 | export interface JSONLDOptions {
75 | throwOnJSONParseError?: boolean;
76 | logOnJSONParseError?: boolean;
77 | }
78 |
79 | /**
80 | * The type for user defined custom meta tags you want to scrape.
81 | *
82 | * @typeParam {boolean} multiple - is there more than one of these tags on a page (normally this is false)
83 | * @typeParam {string} property - meta tag name/property attribute
84 | * @typeParam {string} fieldName - name of the result variable
85 | */
86 | export interface CustomMetaTags {
87 | fieldName: string;
88 | multiple: boolean;
89 | property: string;
90 | }
91 |
92 | export interface TwitterImageObject {
93 | alt?: string;
94 | height?: number;
95 | url: string;
96 | width?: number;
97 | }
98 |
99 | export interface TwitterPlayerObject {
100 | height?: number;
101 | stream?: string;
102 | url: string;
103 | width?: number;
104 | }
105 |
106 | export interface ImageObject {
107 | height?: number;
108 | type?: string;
109 | url: string;
110 | width?: number;
111 | alt?: string
112 | }
113 |
114 | export interface VideoObject {
115 | height?: number;
116 | type?: string;
117 | url: string;
118 | width?: number;
119 | }
120 |
121 | export interface MusicSongObject {
122 | disc?: string;
123 | track?: number;
124 | url: string;
125 | }
126 |
127 | export interface OgObjectInternal {
128 | alAndroidAppName?: string;
129 | alAndroidClass?: string;
130 | alAndroidPackage?: string;
131 | alAndroidUrl?: string;
132 | alIosAppName?: string;
133 | alIosAppStoreId?: string;
134 | alIosUrl?: string;
135 | alIpadAppName?: string;
136 | alIpadAppStoreId?: string;
137 | alIpadUrl?: string;
138 | alIphoneAppName?: string;
139 | alIphoneAppStoreId?: string;
140 | alIphoneUrl?: string;
141 | alWebShouldFallback?: string;
142 | alWebUrl?: string;
143 | alWindowsAppId?: string;
144 | alWindowsAppName?: string;
145 | alWindowsPhoneAppId?: string;
146 | alWindowsPhoneAppName?: string;
147 | alWindowsPhoneUrl?: string;
148 | alWindowsUniversalAppId?: string;
149 | alWindowsUniversalAppName?: string;
150 | alWindowsUniversalUrl?: string;
151 | alWindowsUrl?: string;
152 | articleAuthor?: string;
153 | articleExpirationTime?: string;
154 | articleModifiedTime?: string;
155 | articlePublishedDate?: string;
156 | articlePublishedTime?: string;
157 | articleModifiedDate?: string;
158 | articlePublisher?: string;
159 | articleSection?: string;
160 | articleTag?: string;
161 | author?: string;
162 | bookAuthor?: string;
163 | bookCanonicalName?: string;
164 | bookIsbn?: string;
165 | bookReleaseDate?: string;
166 | booksBook?: string;
167 | booksRatingScale?: string;
168 | booksRatingValue?: string;
169 | bookTag?: string;
170 | businessContactDataCountryName?: string;
171 | businessContactDataLocality?: string;
172 | businessContactDataPostalCode?: string;
173 | businessContactDataRegion?: string;
174 | businessContactDataStreetAddress?: string;
175 | charset?: string;
176 | customMetaTags?: Record
177 | dcContributor?: string;
178 | dcCoverage?: string;
179 | dcCreator?: string;
180 | dcDate?: string;
181 | dcDateCreated?: string;
182 | dcDateIssued?: string;
183 | dcDescription?: string;
184 | dcFormatMedia?: string;
185 | dcFormatSize?: string;
186 | dcIdentifier?: string;
187 | dcLanguage?: string;
188 | dcPublisher?: string;
189 | dcRelation?: string;
190 | dcRights?: string;
191 | dcSource?: string;
192 | dcSubject?: string;
193 | dcTitle?: string;
194 | dcType?: string;
195 | error?: string;
196 | errorDetails?: Error;
197 | favicon?: string;
198 | fbAppId?: string;
199 | jsonLD?: object[];
200 | modifiedTime?: string;
201 | musicAlbum?: string;
202 | musicAlbumDisc?: string;
203 | musicAlbumTrack?: string;
204 | musicAlbumUrl?: string;
205 | musicCreator?: string;
206 | musicDuration?: string;
207 | musicMusician?: string;
208 | musicPlaylist?: string;
209 | musicRadioStation?: string;
210 | musicReleaseDate?: string;
211 | musicSong?: MusicSongObject[];
212 | musicSongDisc?: string[];
213 | musicSongProperty?: string[];
214 | musicSongTrack?: string[];
215 | musicSongUrl?: string[];
216 | ogArticleAuthor?: string;
217 | ogArticleExpirationTime?: string;
218 | ogArticleModifiedTime?: string;
219 | ogArticlePublishedTime?: string;
220 | ogArticlePublisher?: string;
221 | ogArticleSection?: string;
222 | ogArticleTag?: string;
223 | ogAudio?: string;
224 | ogAudioSecureURL?: string;
225 | ogAudioType?: string;
226 | ogAudioURL?: string;
227 | ogAvailability?: string;
228 | ogDate?: string;
229 | ogDescription?: string;
230 | ogDeterminer?: string;
231 | ogEpisode?: string;
232 | ogImage?: ImageObject[];
233 | ogImageAlt?: string[];
234 | ogImageHeight?: string[];
235 | ogImageProperty?: string[];
236 | ogImageSecureURL?: string[];
237 | ogImageType?: string[];
238 | ogImageURL?: string[];
239 | ogImageWidth?: string[];
240 | ogLocale?: string;
241 | ogLocaleAlternate?: string;
242 | ogLogo?: string;
243 | ogMovie?: string;
244 | ogPriceAmount?: string;
245 | ogPriceCurrency?: string;
246 | ogProductAvailability?: string;
247 | ogProductCondition?: string;
248 | ogProductPriceAmount?: string;
249 | ogProductPriceCurrency?: string;
250 | ogProductRetailerItemId?: string;
251 | ogSiteName?: string;
252 | ogTitle?: string;
253 | ogType?: string;
254 | ogUrl?: string;
255 | ogVideo?: VideoObject[];
256 | ogVideoActor?: string;
257 | ogVideoActorId?: string;
258 | ogVideoActorRole?: string;
259 | ogVideoDirector?: string;
260 | ogVideoDuration?: string;
261 | ogVideoHeight?: string[];
262 | ogVideoOther?: string;
263 | ogVideoProperty?: string[];
264 | ogVideoReleaseDate?: string;
265 | ogVideoSecureURL?: string;
266 | ogVideoSeries?: string;
267 | ogVideoTag?: string;
268 | ogVideoTvShow?: string;
269 | ogVideoType?: string[];
270 | ogVideoWidth?: string[];
271 | ogVideoWriter?: string;
272 | ogWebsite?: string;
273 | placeLocationLatitude?: string;
274 | placeLocationLongitude?: string;
275 | profileFirstName?: string;
276 | profileGender?: string;
277 | profileLastName?: string;
278 | profileUsername?: string;
279 | publishedTime?: string;
280 | releaseDate?: string;
281 | requestUrl?: string;
282 | restaurantContactInfoCountryName?: string;
283 | restaurantContactInfoEmail?: string;
284 | restaurantContactInfoLocality?: string;
285 | restaurantContactInfoPhoneNumber?: string;
286 | restaurantContactInfoPostalCode?: string;
287 | restaurantContactInfoRegion?: string;
288 | restaurantContactInfoStreetAddress?: string;
289 | restaurantContactInfoWebsite?: string;
290 | restaurantMenu?: string;
291 | restaurantRestaurant?: string;
292 | restaurantSection?: string;
293 | restaurantVariationPriceAmount?: string;
294 | restaurantVariationPriceCurrency?: string;
295 | success?: boolean;
296 | twitterAccount?: string;
297 | twitterAppIdGooglePlay?: string;
298 | twitterAppIdiPad?: string;
299 | twitterAppIdiPhone?: string;
300 | twitterAppNameGooglePlay?: string;
301 | twitterAppNameiPad?: string;
302 | twitterAppNameiPhone?: string;
303 | twitterAppUrlGooglePlay?: string;
304 | twitterAppUrliPad?: string;
305 | twitterAppUrliPhone?: string;
306 | twitterCard?: string;
307 | twitterCreator?: string;
308 | twitterCreatorId?: string;
309 | twitterDescription?: string;
310 | twitterImage?: TwitterImageObject[];
311 | twitterImageAlt?: string[];
312 | twitterImageHeight?: string[];
313 | twitterImageProperty?: string[];
314 | twitterImageSrc?: string[];
315 | twitterImageWidth?: string[];
316 | twitterPlayer?: TwitterPlayerObject[];
317 | twitterPlayerHeight?: string[];
318 | twitterPlayerProperty?: string[];
319 | twitterPlayerStream?: string[];
320 | twitterPlayerStreamContentType?: string;
321 | twitterPlayerWidth?: string[];
322 | twitterSite?: string;
323 | twitterSiteId?: string;
324 | twitterTitle?: string;
325 | twitterUrl?: string;
326 | updatedTime?: string;
327 | }
328 |
329 | // Omit values from mediaMapperProperties
330 | export type OgObject = Omit<
331 | OgObjectInternal,
332 | 'musicSongDisc' |
333 | 'musicSongProperty' |
334 | 'musicSongTrack' |
335 | 'musicSongUrl' |
336 | 'ogImageAlt' |
337 | 'ogImageHeight' |
338 | 'ogImageProperty' |
339 | 'ogImageSecureURL' |
340 | 'ogImageType' |
341 | 'ogImageURL' |
342 | 'ogImageWidth' |
343 | 'ogVideoHeight' |
344 | 'ogVideoProperty' |
345 | 'ogVideoType' |
346 | 'ogVideoWidth' |
347 | 'twitterImageAlt' |
348 | 'twitterImageHeight' |
349 | 'twitterImageProperty' |
350 | 'twitterImageSrc' |
351 | 'twitterImageWidth' |
352 | 'twitterPlayerHeight' |
353 | 'twitterPlayerProperty' |
354 | 'twitterPlayerStream' |
355 | 'twitterPlayerWidth'
356 | >;
357 |
--------------------------------------------------------------------------------
/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import isUrl from './isUrl';
2 | import type {
3 | CustomMetaTags,
4 | OgObjectInternal,
5 | OpenGraphScraperOptions,
6 | ValidatorSettings,
7 | } from './types';
8 |
9 | export const defaultUrlValidatorSettings = {
10 | allow_fragments: true,
11 | allow_protocol_relative_urls: false,
12 | allow_query_components: true,
13 | allow_trailing_dot: false,
14 | allow_underscores: false,
15 | protocols: ['http', 'https'],
16 | require_host: true,
17 | require_port: false,
18 | require_protocol: false,
19 | require_tld: true,
20 | require_valid_protocol: true,
21 | validate_length: true,
22 | };
23 |
24 | /**
25 | * Checks if URL is valid
26 | *
27 | * @param {string} url - url to be checked
28 | * @param {string} urlValidatorSettings - settings used by validator
29 | * @return {boolean} boolean value if the url is valid
30 | *
31 | */
32 | export function isUrlValid(url: string, urlValidatorSettings: ValidatorSettings): boolean {
33 | return typeof url === 'string' && url.length > 0 && isUrl(url, urlValidatorSettings);
34 | }
35 |
36 | /**
37 | * Forces url to start with http:// if it doesn't
38 | *
39 | * @param {string} url - url to be updated
40 | * @return {string} url that starts with http
41 | *
42 | */
43 | const coerceUrl = (url: string): string => (/^(f|ht)tps?:\/\//i.test(url) ? url : `http://${url}`);
44 |
45 | /**
46 | * Validates and formats url
47 | *
48 | * @param {string} url - url to be checked and formatted
49 | * @param {string} urlValidatorSettings - settings used by validator
50 | * @return {string} proper url or null
51 | *
52 | */
53 | export function validateAndFormatURL(url: string, urlValidatorSettings: ValidatorSettings): { url: string | null } {
54 | return { url: isUrlValid(url, urlValidatorSettings) ? coerceUrl(url) : null };
55 | }
56 |
57 | /**
58 | * Finds the image type from a given url
59 | *
60 | * @param {string} url - url to be checked
61 | * @return {string} image type from url
62 | *
63 | */
64 | export function findImageTypeFromUrl(url: string): string {
65 | let type: string = url.split('.').pop() ?? '';
66 | [type] = type.split('?');
67 | return type;
68 | }
69 |
70 | /**
71 | * Checks if image type is valid
72 | *
73 | * @param {string} type - type to be checked
74 | * @return {boolean} boolean value if type is value
75 | *
76 | */
77 | export function isImageTypeValid(type: string): boolean {
78 | const validImageTypes: string[] = ['apng', 'bmp', 'gif', 'ico', 'cur', 'jpg', 'jpeg', 'jfif', 'pjpeg', 'pjp', 'png', 'svg', 'tif', 'tiff', 'webp'];
79 | return validImageTypes.includes(type);
80 | }
81 |
82 | /**
83 | * Checks if URL is a non html page
84 | *
85 | * @param {string} url - url to be checked
86 | * @return {boolean} boolean value if url is non html
87 | *
88 | */
89 | export function isThisANonHTMLUrl(url: string): boolean {
90 | const invalidImageTypes: string[] = ['.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.3gp', '.avi', '.mov', '.mp4', '.m4v', '.m4a', '.mp3', '.mkv', '.ogv', '.ogm', '.ogg', '.oga', '.webm', '.wav', '.bmp', '.gif', '.jpg', '.jpeg', '.png', '.webp', '.zip', '.rar', '.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2', '.txt', '.pdf'];
91 | const extension: string = findImageTypeFromUrl(url);
92 | return invalidImageTypes.some((type: string): boolean => `.${extension}`.includes(type));
93 | }
94 |
95 | /**
96 | * Find and delete nested undefineds
97 | *
98 | * @param {object} object - object to be cleaned
99 | * @return {object} object without nested undefineds
100 | *
101 | */
102 | export function removeNestedUndefinedValues(object: Record): OgObjectInternal {
103 | Object.entries(object).forEach(([key, value]) => {
104 | if (value && typeof value === 'object') removeNestedUndefinedValues(value);
105 | else if (value === undefined) delete object[key];
106 | });
107 | return object;
108 | }
109 |
110 | /**
111 | * Split the options object into ogs and got option objects
112 | *
113 | * @param {object} options - options that need to be split
114 | * @return {object} object with nested options for ogs and got
115 | *
116 | */
117 | export function optionSetup(ogsOptions: OpenGraphScraperOptions): { options: OpenGraphScraperOptions } {
118 | const options: OpenGraphScraperOptions = {
119 | onlyGetOpenGraphInfo: false,
120 | ...ogsOptions,
121 | };
122 |
123 | return { options };
124 | }
125 |
126 | /**
127 | * Checks if image type is valid
128 | *
129 | * @param {string} type - type to be checked
130 | * @return {boolean} boolean value if type is value
131 | *
132 | */
133 | export function isCustomMetaTagsValid(customMetaTags: CustomMetaTags[]): boolean {
134 | if (!Array.isArray(customMetaTags)) return false;
135 |
136 | let result = true;
137 | customMetaTags.forEach((customMetaTag) => {
138 | if (typeof customMetaTag === 'object') {
139 | if (!('fieldName' in customMetaTag) || typeof customMetaTag.fieldName !== 'string') result = false;
140 | if (!('multiple' in customMetaTag) || typeof customMetaTag.multiple !== 'boolean') result = false;
141 | if (!('property' in customMetaTag) || typeof customMetaTag.property !== 'string') result = false;
142 | } else {
143 | result = false;
144 | }
145 | });
146 |
147 | return result;
148 | }
149 |
150 | /**
151 | * Unescape script text.
152 | *
153 | * Certain websites escape script text within script tags, which can
154 | * interfere with `JSON.parse()`. Therefore, we need to unescape it.
155 | *
156 | * Known good escape sequences:
157 | *
158 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh
159 | *
160 | * ```js
161 | * JSON.parse('"\\u2611"'); // '☑'
162 | * ```
163 | *
164 | * Known bad escape sequences:
165 | *
166 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh
167 | *
168 | * ```js
169 | * JSON.parse('"\\x26"'); // '&'
170 | * ```
171 | *
172 | * @param {string} scriptText - the text of the script tag
173 | * @returns {string} unescaped script text
174 | */
175 | export function unescapeScriptText(scriptText: string) {
176 | // https://stackoverflow.com/a/34056693
177 | return scriptText.replace(/\\x([0-9a-f]{2})/ig, (_, pair) => {
178 | const charCode = parseInt(pair, 16);
179 | if (charCode === 34) {
180 | return '\\"';
181 | }
182 | return String.fromCharCode(charCode);
183 | });
184 | }
185 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "open-graph-scraper",
3 | "description": "Node.js scraper module for Open Graph and Twitter Card info",
4 | "version": "6.10.0",
5 | "license": "MIT",
6 | "main": "./dist/cjs/index.js",
7 | "types": "./types/index.d.ts",
8 | "exports": {
9 | ".": {
10 | "types": "./types/index.d.ts",
11 | "import": "./dist/esm/index.js",
12 | "require": "./dist/cjs/index.js"
13 | },
14 | "./types": "./types/lib/types.d.ts"
15 | },
16 | "scripts": {
17 | "build:cjs": "tsc --project tsconfig.build.json --module commonjs --outDir dist/cjs/",
18 | "build:declaration": "tsc --project tsconfig.declaration.json --module node16 --moduleResolution node16",
19 | "build:esm": "tsc --project tsconfig.build.json --module node16 --moduleResolution node16 --outDir dist/esm/",
20 | "build": "rm -rf dist/ && npm run build:cjs && npm run build:esm",
21 | "ci": "npm run eslint && npm run build && npm run build:declaration && npm run test",
22 | "eslint:fix": "eslint . --ext .js,.ts --fix",
23 | "eslint": "eslint . --ext .js,.ts",
24 | "mocha:int": "ts-mocha --recursive \"./tests/integration/**/*.spec.ts\" --timeout 10000",
25 | "mocha:unit": "nyc --reporter=html --reporter=text --exclude=tests/ ts-mocha --recursive \"./tests/unit/**/*.spec.ts\"",
26 | "prepare": "npm run snyk-protect && npm run build",
27 | "snyk-protect": "snyk-protect",
28 | "test": "npm run mocha:unit && npm run mocha:int"
29 | },
30 | "engines": {
31 | "node": ">=18.0.0"
32 | },
33 | "author": {
34 | "name": "Josh Shemas",
35 | "email": "jjs90jjs@gmail.com",
36 | "url": "https://github.com/jshemas"
37 | },
38 | "dependencies": {
39 | "chardet": "^2.1.0",
40 | "cheerio": "^1.0.0-rc.12",
41 | "iconv-lite": "^0.6.3",
42 | "undici": "^6.21.2"
43 | },
44 | "files": [
45 | "/dist",
46 | "/types",
47 | "index.ts",
48 | "CHANGELOG.md"
49 | ],
50 | "devDependencies": {
51 | "@snyk/protect": "^1.1296.2",
52 | "@types/mocha": "^10.0.10",
53 | "@types/node": "^18.19.86",
54 | "@typescript-eslint/eslint-plugin": "^7.18.0",
55 | "@typescript-eslint/parser": "^7.18.0",
56 | "chai": "^4.5.0",
57 | "eslint": "^8.57.0",
58 | "eslint-config-airbnb-base": "^15.0.0",
59 | "eslint-config-airbnb-typescript": "^18.0.0",
60 | "eslint-plugin-import": "^2.31.0",
61 | "eslint-plugin-mocha": "^10.5.0",
62 | "eslint-plugin-promise": "^7.2.1",
63 | "mocha": "^11.1.0",
64 | "nyc": "^17.1.0",
65 | "sinon": "^19.0.2",
66 | "ts-mocha": "^11.1.0",
67 | "typescript": "5.5.4"
68 | },
69 | "repository": {
70 | "type": "git",
71 | "url": "git+ssh://git@github.com/jshemas/openGraphScraper.git",
72 | "web": "http://github.com/jshemas/openGraphScraper"
73 | },
74 | "keywords": [
75 | "app links",
76 | "dublin core",
77 | "json ld",
78 | "meta tags",
79 | "metadata",
80 | "og",
81 | "ogp",
82 | "ogs",
83 | "open graph scraper",
84 | "open graph",
85 | "opengraph",
86 | "openGraphScraper",
87 | "scraper",
88 | "twitter card",
89 | "twitter"
90 | ],
91 | "snyk": true
92 | }
93 |
--------------------------------------------------------------------------------
/tests/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "root": true,
3 | "extends": [
4 | "airbnb-base",
5 | "airbnb-typescript/base",
6 | "plugin:promise/recommended",
7 | "plugin:mocha/recommended"
8 | ],
9 | "plugins": [
10 | "mocha"
11 | ],
12 | "env": {
13 | "mocha": true
14 | },
15 | "parserOptions": {
16 | "project": "./tsconfig.tests.json"
17 | },
18 | "rules": {
19 | "@typescript-eslint/no-unused-expressions": 0,
20 | "func-names": 0,
21 | "import/extensions": 0,
22 | "import/no-unresolved": 0,
23 | "max-len": ["error", {
24 | "code": 120,
25 | "ignoreStrings": true,
26 | "ignoreTrailingComments": true
27 | }],
28 | "no-console": 0,
29 | "no-param-reassign": "off",
30 | "no-unused-expressions": 0,
31 | "prefer-arrow-callback": 0,
32 | "promise/always-return": 0
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/tests/integration/blacklist.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('blacklist', function () {
6 | it('when website is on the blacklist', function () {
7 | return ogs({
8 | url: 'https://www.wikipedia.org/',
9 | blacklist: ['www.test.com', 'www.wikipedia.org'],
10 | })
11 | .then(function () {
12 | expect().fail('this should not happen');
13 | })
14 | .catch(function ({ error, result, response }) {
15 | console.log('error:', error);
16 | console.log('result:', result);
17 | expect(error).to.be.eql(true);
18 | expect(result.success).to.be.eql(false);
19 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/');
20 | expect(result.error).to.eql('Host name has been black listed');
21 | expect(result.errorDetails.toString()).to.eql('Error: Host name has been black listed');
22 | expect(result).to.have.all.keys(
23 | 'error',
24 | 'errorDetails',
25 | 'requestUrl',
26 | 'success',
27 | );
28 | expect(response).to.eql(undefined);
29 | });
30 | });
31 |
32 | it('when website is not on the blacklist', function () {
33 | return ogs({
34 | url: 'https://www.wikipedia.org/',
35 | blacklist: ['www.test.com', 'www.google.org'],
36 | }).then(function ({ error, result, response }) {
37 | console.log('error:', error);
38 | console.log('result:', result);
39 | expect(error).to.be.eql(false);
40 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
41 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
42 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
43 | expect(result.ogLocale).to.be.eql('en');
44 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/');
45 | expect(result.charset).to.be.eql('utf-8');
46 | expect(result.success).to.be.eql(true);
47 | expect(result).to.have.all.keys(
48 | 'charset',
49 | 'favicon',
50 | 'ogDescription',
51 | 'ogImage',
52 | 'ogLocale',
53 | 'ogTitle',
54 | 'ogType',
55 | 'requestUrl',
56 | 'success',
57 | );
58 | expect(response).to.be.an('Response');
59 | });
60 | });
61 |
62 | it('when blacklist empty', function () {
63 | return ogs({
64 | url: 'https://www.wikipedia.org/',
65 | blacklist: [],
66 | }).then(function ({ error, result, response }) {
67 | console.log('error:', error);
68 | console.log('result:', result);
69 | expect(error).to.be.eql(false);
70 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
71 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
72 | expect(result.ogLocale).to.be.eql('en');
73 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/');
74 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
75 | expect(result.charset).to.be.eql('utf-8');
76 | expect(result.success).to.be.eql(true);
77 | expect(result).to.have.all.keys(
78 | 'charset',
79 | 'favicon',
80 | 'ogDescription',
81 | 'ogImage',
82 | 'ogLocale',
83 | 'ogTitle',
84 | 'ogType',
85 | 'requestUrl',
86 | 'success',
87 | );
88 | expect(response).to.be.an('Response');
89 | });
90 | });
91 | });
92 |
--------------------------------------------------------------------------------
/tests/integration/fetch.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 | import { load } from 'cheerio';
3 |
4 | import ogs from '../../index';
5 |
6 | describe('fetch', function () {
7 | // TODO: Site keeps going offline, will need to find a new site
8 | // eslint-disable-next-line mocha/no-skipped-tests
9 | it.skip('setting the fetch headers', function () {
10 | // userAgent is undici by default
11 | const userAgent = 'Mozilla/5.0 (Linux; Android 10) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.5563.57 Mobile Safari/537.36';
12 | return ogs({ url: 'https://www.whatsmyua.info/', fetchOptions: { headers: { 'user-agent': userAgent } } })
13 | .then(async function ({
14 | error, result, response, html,
15 | }) {
16 | console.log('error:', error);
17 | console.log('result:', result);
18 | expect(error).to.be.eql(false);
19 | expect(result.ogTitle).to.be.eql('What\'s my user agent?');
20 | expect(result.ogDescription).to.be.eql('Detect user-agent, operating system, browser, and device using several libraries, including ua-parser, ua-parser-js, and platform.');
21 | expect(result.requestUrl).to.be.eql('https://www.whatsmyua.info/');
22 | expect(result.charset).to.be.eql('utf-8');
23 | expect(result.success).to.be.eql(true);
24 | expect(result).to.have.all.keys(
25 | 'ogTitle',
26 | 'ogDescription',
27 | 'requestUrl',
28 | 'charset',
29 | 'success',
30 | );
31 | expect(response).to.be.an('Response');
32 | const $ = load(html || '');
33 | const rawUa = $('li#rawUa').text();
34 | expect(rawUa).to.be.eql(`rawUa: ${userAgent}`);
35 | });
36 | });
37 |
38 | it('setting a timeout', function () {
39 | return ogs({ url: 'https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso', timeout: 3 })
40 | .then(function () {
41 | expect().fail('this should not happen');
42 | })
43 | .catch(function ({ error, result, response }) {
44 | console.log('error:', error);
45 | console.log('result:', result);
46 | expect(error).to.be.eql(true);
47 | expect(result.success).to.be.eql(false);
48 | expect(result.requestUrl).to.be.eql('https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso');
49 | expect(result.error).to.eql('The operation was aborted due to timeout');
50 | expect(result.errorDetails.toString()).to.eql('TimeoutError: The operation was aborted due to timeout');
51 | expect(result).to.have.all.keys(
52 | 'error',
53 | 'errorDetails',
54 | 'requestUrl',
55 | 'success',
56 | );
57 | expect(response).to.eql(undefined);
58 | });
59 | });
60 | // https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/timeout
61 | // eslint-disable-next-line mocha/no-skipped-tests
62 | it.skip('setting a timeout - using AbortSignal.timeout()', function () {
63 | return ogs({ url: 'https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso', fetchOptions: { signal: AbortSignal.timeout(3000) } })
64 | .then(function () {
65 | expect().fail('this should not happen');
66 | })
67 | .catch(function ({ error, result, response }) {
68 | console.log('error:', error);
69 | console.log('result:', result);
70 | expect(error).to.be.eql(true);
71 | expect(result.success).to.be.eql(false);
72 | expect(result.requestUrl).to.be.eql('https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso');
73 | expect(result.error).to.eql('The operation was aborted due to timeout');
74 | expect(result.errorDetails.toString()).to.eql('Error: The operation was aborted due to timeout');
75 | expect(result).to.have.all.keys(
76 | 'error',
77 | 'errorDetails',
78 | 'requestUrl',
79 | 'success',
80 | );
81 | expect(response).to.eql(undefined);
82 | });
83 | });
84 | // https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/abort
85 | // eslint-disable-next-line mocha/no-skipped-tests
86 | it.skip('setting a timeout - using controller.abort()', function () {
87 | const controller = new AbortController();
88 | setTimeout(() => controller.abort(), 3000);
89 | return ogs({ url: 'https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso', fetchOptions: { signal: controller.signal } })
90 | .then(function () {
91 | expect().fail('this should not happen');
92 | })
93 | .catch(function ({ error, result, response }) {
94 | console.log('error:', error);
95 | console.log('result:', result);
96 | expect(error).to.be.eql(true);
97 | expect(result.success).to.be.eql(false);
98 | expect(result.requestUrl).to.be.eql('https://releases.ubuntu.com/23.04/ubuntu-23.04-desktop-amd64.iso');
99 | expect(result.error).to.eql('The operation was aborted due to timeout');
100 | expect(result.errorDetails.toString()).to.eql('Error: The operation was aborted due to timeout');
101 | expect(result).to.have.all.keys(
102 | 'error',
103 | 'errorDetails',
104 | 'requestUrl',
105 | 'success',
106 | );
107 | expect(response).to.eql(undefined);
108 | });
109 | });
110 | });
111 |
--------------------------------------------------------------------------------
/tests/integration/html.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | const HTML_STRING = `
6 |
7 |
8 |
9 |
10 |
11 |
12 | `;
13 |
14 | describe('html', function () {
15 | it('pass in HTML string', function () {
16 | return ogs({ html: HTML_STRING }).then(function ({ error, result, response }) {
17 | console.log('error:', error);
18 | console.log('result:', result);
19 | expect(error).to.be.eql(false);
20 | expect(result.success).to.be.eql(true);
21 | expect(result.ogTitle).to.be.eql('Test page');
22 | expect(result.charset).to.be.eql('ISO-8859-1');
23 | expect(result).to.have.all.keys(
24 | 'charset',
25 | 'ogTitle',
26 | 'success',
27 | );
28 | expect(response).to.be.an('object').and.to.not.be.empty;
29 | });
30 | });
31 |
32 | it('Invalid Call - Can\'t request URL and pass in HTML string', function () {
33 | return ogs({
34 | url: 'https://upload.wikimedia.org/wikipedia/commons.jpg',
35 | html: HTML_STRING,
36 | })
37 | .then(function () {
38 | expect().fail('this should not happen');
39 | })
40 | .catch(function ({ error, result, response }) {
41 | console.log('error:', error);
42 | console.log('result:', result);
43 | expect(error).to.be.eql(true);
44 | expect(result.success).to.be.eql(false);
45 | expect(result.requestUrl).to.be.eql('https://upload.wikimedia.org/wikipedia/commons.jpg');
46 | expect(result.error).to.eql('Must specify either `url` or `html`, not both');
47 | expect(result.errorDetails.toString()).to.eql('Error: Must specify either `url` or `html`, not both');
48 | expect(result).to.have.all.keys(
49 | 'error',
50 | 'errorDetails',
51 | 'requestUrl',
52 | 'success',
53 | );
54 | expect(response).to.eql(undefined);
55 | });
56 | });
57 |
58 | it('Invalid Call - Not a HTML page', function () {
59 | return ogs({
60 | url: 'https://upload.wikimedia.org/wikipedia/commons.jpg',
61 | })
62 | .then(function () {
63 | expect().fail('this should not happen');
64 | })
65 | .catch(function ({ error, result, response }) {
66 | console.log('error:', error);
67 | console.log('result:', result);
68 | expect(error).to.be.eql(true);
69 | expect(result.success).to.be.eql(false);
70 | expect(result.requestUrl).to.be.eql('https://upload.wikimedia.org/wikipedia/commons.jpg');
71 | expect(result.error).to.eql('Must scrape an HTML page');
72 | expect(result.errorDetails.toString()).to.eql('Error: Must scrape an HTML page');
73 | expect(result).to.have.all.keys(
74 | 'error',
75 | 'errorDetails',
76 | 'requestUrl',
77 | 'success',
78 | );
79 | expect(response).to.eql(undefined);
80 | });
81 | });
82 | });
83 |
--------------------------------------------------------------------------------
/tests/integration/image.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('image', function () {
6 | it('Test Flickr Image - Should Return correct Open Graph Info', function () {
7 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/flickr' }).then(function ({ error, result, response }) {
8 | console.log('error:', error);
9 | console.log('result:', result);
10 | expect(error).to.be.eql(false);
11 | expect(result.alIosAppName).to.be.eql('Flickr');
12 | expect(result.alIosAppStoreId).to.be.eql('328407587');
13 | expect(result.alIosUrl).to.be.eql('flickr://flickr.com/photos/travelgraph/18791678505');
14 | expect(result.ogSiteName).to.be.eql('Flickr');
15 | expect(result.twitterAppNameiPhone).to.be.eql('Flickr');
16 | expect(result.twitterAppIdiPhone).to.be.eql('328407587');
17 | expect(result.twitterSite).to.be.eql('@flickr');
18 | expect(result.ogTitle).to.be.eql('Heimgarten');
19 | expect(result.ogDescription).to.be.eql('____________________ Press "L" to view on black Press "F" to favor Share, if you like :) You can leave a comment, if you like :) Not to use or publish without permission! © Christoph Wagner Photographie');
20 | expect(result.ogType).to.be.eql('article');
21 | expect(result.ogUrl).to.be.eql('https://www.flickr.com/photos/travelgraph/18791678505/');
22 | expect(result.favicon).to.be.eql('https://s.yimg.com/pw/favicon.ico');
23 | expect(result.ogLocale).to.be.eql('en-us');
24 | expect(result.twitterCard).to.be.eql('photo');
25 | expect(result.twitterDescription).to.be.eql('____________________ Press "L" to view on black Press "F" to favor Share, if you like :) You can leave a comment, if you like :) Not to use or publish without permission! © Christoph Wagner Photographie');
26 | expect(result.twitterAppUrliPhone).to.be.eql('flickr://flickr.com/photos/travelgraph/18791678505/');
27 | expect(result.ogImage).to.be.eql([{
28 | url: 'https://c1.staticflickr.com/1/499/18791678505_5886fefcf7_b.jpg',
29 | width: '1024',
30 | height: '375',
31 | type: 'jpg',
32 | }]);
33 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/flickr');
34 | expect(result.charset).to.be.eql('UTF-8');
35 | expect(result.fbAppId).to.be.eql('137206539707334');
36 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
37 | expect(result.success).to.be.eql(true);
38 | expect(result).to.have.all.keys(
39 | 'favicon',
40 | 'jsonLD',
41 | 'fbAppId',
42 | 'alIosAppName',
43 | 'alIosAppStoreId',
44 | 'alIosUrl',
45 | 'ogDescription',
46 | 'ogImage',
47 | 'ogLocale',
48 | 'ogSiteName',
49 | 'ogTitle',
50 | 'ogType',
51 | 'ogUrl',
52 | 'requestUrl',
53 | 'success',
54 | 'charset',
55 | 'twitterAppIdiPhone',
56 | 'twitterAppNameiPhone',
57 | 'twitterAppUrliPhone',
58 | 'twitterCard',
59 | 'twitterDescription',
60 | 'twitterSite',
61 | );
62 | expect(response).to.be.an('Response');
63 | });
64 | });
65 |
66 | it('Test getting the description and images from meta tags', function () {
67 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/twitter.html' }).then(function ({ error, result, response }) {
68 | console.log('error:', error);
69 | console.log('result:', result);
70 | expect(error).to.be.eql(false);
71 | expect(result.charset).to.be.eql('utf-8');
72 | expect(result.success).to.be.eql(true);
73 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/twitter.html');
74 | expect(result.ogTitle).to.be.eql('Twitter. It\'s what\'s happening.');
75 | expect(result.ogDescription).to.be.eql('From breaking news and entertainment to sports and politics, get the full story with all the live commentary.');
76 | expect(result.ogLocale).to.be.eql('en');
77 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20170608000236/https://twitter.com/i/hello');
78 | expect(result.favicon).to.be.eql('//web.archive.org/web/20170608000236im_/https://abs.twimg.com/favicons/favicon.ico');
79 | expect(result.ogImage).to.be.eql([
80 | {
81 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/C_-sImTVYAEEJM5.jpg',
82 | type: 'jpg',
83 | },
84 | {
85 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/tweet_video_thumb/DBp8umbVYAAWJuC.jpg',
86 | type: 'jpg',
87 | },
88 | {
89 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/tweet_video_thumb/DBvjXJsWsAEAs90.jpg',
90 | type: 'jpg',
91 | },
92 | {
93 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/CljfeELUoAUskW4.jpg',
94 | type: 'jpg',
95 | },
96 | {
97 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/DBwVAyKUIAEmWVs.jpg',
98 | type: 'jpg',
99 | },
100 | {
101 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/profile_images/508960761826131968/LnvhR8ED_bigger.png',
102 | type: 'png',
103 | },
104 | {
105 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/profile_images/854430488777379840/zFdLhwbT_bigger.jpg',
106 | type: 'jpg',
107 | },
108 | {
109 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/profile_images/652596362073272320/Zv6K-clv_bigger.jpg',
110 | type: 'jpg',
111 | },
112 | {
113 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/DBrlZk4UwAA9Zq-.jpg',
114 | type: 'jpg',
115 | },
116 | {
117 | url: 'https://web.archive.org/web/20170608000236im_/https://pbs.twimg.com/media/DBrlZk2UQAAfAkd.jpg',
118 | type: 'jpg',
119 | },
120 | ]);
121 | expect(result).to.have.all.keys(
122 | 'favicon',
123 | 'ogDescription',
124 | 'ogImage',
125 | 'ogTitle',
126 | 'ogLocale',
127 | 'requestUrl',
128 | 'ogUrl',
129 | 'success',
130 | 'charset',
131 | );
132 | expect(response).to.be.an('Response');
133 | });
134 | });
135 | });
136 |
--------------------------------------------------------------------------------
/tests/integration/onlyGetOpenGraphInfo.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('onlyGetOpenGraphInfo', function () {
6 | // TODO: wikipedia now has og data by default, will need to find a replacement site for this test
7 | it.skip('should only get open graph info', function () {
8 | return ogs({
9 | url: 'http://www.wikipedia.org/',
10 | onlyGetOpenGraphInfo: true,
11 | }).then(function ({ error, result, response }) {
12 | console.log('error:', error);
13 | console.log('result:', result);
14 | expect(error).to.be.eql(false);
15 | expect(result.ogTitle).to.eql(undefined);
16 | expect(result.ogDescription).to.eql(undefined);
17 | expect(result.ogImage).to.eql(undefined);
18 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/');
19 | expect(result.success).to.be.eql(true);
20 | expect(result).to.have.all.keys(
21 | 'requestUrl',
22 | 'success',
23 | );
24 | expect(response).to.be.an('Response');
25 | });
26 | });
27 |
28 | it('should get all open graph info', function () {
29 | return ogs({
30 | url: 'http://www.wikipedia.org/',
31 | onlyGetOpenGraphInfo: false,
32 | }).then(function ({ error, result, response }) {
33 | console.log('error:', error);
34 | console.log('result:', result);
35 | expect(error).to.be.eql(false);
36 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
37 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
38 | expect(result.ogLocale).to.be.eql('en');
39 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/');
40 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
41 | expect(result.charset).to.be.eql('utf-8');
42 | expect(result.success).to.be.eql(true);
43 | expect(result).to.have.all.keys(
44 | 'charset',
45 | 'favicon',
46 | 'ogDescription',
47 | 'ogImage',
48 | 'ogLocale',
49 | 'ogTitle',
50 | 'ogType',
51 | 'requestUrl',
52 | 'success',
53 | );
54 | expect(response).to.be.an('Response');
55 | });
56 | });
57 | });
58 |
--------------------------------------------------------------------------------
/tests/integration/redirect.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('redirect', function () {
6 | context('should return correct Open Graph Info', function () {
7 | // nytimes keep blocking requests, will need to find way to bypass this
8 | it.skip('nytimes page', function () {
9 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36';
10 | return ogs({
11 | url: 'https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html?_r=0',
12 | fetchOptions: { headers: { 'user-agent': userAgent } },
13 | }).then(function ({ error, result, response }) {
14 | console.log('error:', error);
15 | console.log('result:', result);
16 | expect(error).to.be.eql(false);
17 | expect(result.alAndroidUrl).to.be.eql('nyt://article/d07123d7-f6dc-5370-97cb-86dd6aa0b0de');
18 | expect(result.alAndroidPackage).to.be.eql('com.nytimes.android');
19 | expect(result.alAndroidAppName).to.be.eql('NYTimes');
20 | expect(result.alIphoneUrl).to.be.eql('nytimes://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html');
21 | expect(result.alIphoneAppStoreId).to.be.eql('284862083');
22 | expect(result.alIphoneAppName).to.be.eql('NYTimes');
23 | expect(result.alIpadUrl).to.be.eql('nytimes://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html');
24 | expect(result.alIpadAppStoreId).to.be.eql('357066198');
25 | expect(result.alIpadAppName).to.be.eql('NYTimes');
26 | expect(result.ogUrl).to.be.eql('https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html');
27 | expect(result.favicon).to.be.eql('/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico');
28 | expect(result.ogType).to.be.eql('article');
29 | expect(result.articleSection).to.be.eql('Arts');
30 | expect(result.articleTag).to.be.eql('Blum & Poe (Los Angeles, Calif)');
31 | expect(result.articleModifiedTime).to.be.eql('2016-09-01T17:37:39.000Z');
32 | expect(result.articlePublishedTime).to.be.eql('2016-09-01T01:34:35.000Z');
33 | expect(result.ogTitle).to.be.eql('Gallery Hopes to Sell Kanye West’s ‘Famous’ Sculpture for $4 Million (Published 2016)');
34 | expect(result.ogDescription).to.be.eql('The Los Angeles gallery Blum & Poe, which hosted the ‘Famous’ exhibition, is projecting a hefty price tag for the work.');
35 | expect(result.twitterTitle).to.be.eql('Gallery Hopes to Sell Kanye West’s ‘Famous’ Sculpture for $4 Million (Published 2016)');
36 | expect(result.twitterDescription).to.be.eql('The Los Angeles gallery Blum & Poe, which hosted the ‘Famous’ exhibition, is projecting a hefty price tag for the work.');
37 | expect(result.twitterCard).to.be.eql('summary_large_image');
38 | expect(result.twitterAppNameGooglePlay).to.be.eql('NYTimes');
39 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.nytimes.android');
40 | expect(result.twitterAppUrlGooglePlay).to.be.eql('nyt://article/d07123d7-f6dc-5370-97cb-86dd6aa0b0de');
41 | expect(result.ogLocale).to.be.eql('en');
42 | expect(result.twitterSite).to.be.eql('@nytimes');
43 | expect(result.twitterUrl).to.be.eql('https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html');
44 | expect(result.ogImage).to.be.eql([{
45 | url: 'https://static01.nyt.com/images/2016/09/02/arts/01KANYE1-web/01KANYE1-web-facebookJumbo.jpg?year=2016&h=550&w=1050&s=f15e16ac34e5bf83b85e4497c724e7bd5ba43994e780f23119610eba47cd726d&k=ZQJBKqZ0VN',
46 | type: 'jpg',
47 | }]);
48 | expect(result.twitterImage).to.be.eql([{
49 | url: 'https://static01.nyt.com/images/2016/09/02/arts/01KANYE1-web/01KANYE1-web-videoSixteenByNineJumbo1600.jpg?year=2016&h=901&w=1600&s=a5f74a00775cb159c1978e3d3c89d7ea7f176aec59f6565fad3c377cf3b1bd7b&k=ZQJBKqZ0VN&tw=1',
50 | alt: 'Kim Kardashian West at the “Famous” exhibition at Blum & Poe in Los Angeles last week. The gallery is planning to sell the sculpture for a hefty price tag.',
51 | }]);
52 | expect(result.requestUrl).to.be.eql('https://www.nytimes.com/2016/09/01/arts/design/gallery-hopes-to-sell-kanye-wests-famous-sculpture-for-4-million.html?_r=0');
53 | expect(result.charset).to.be.eql('utf-8');
54 | expect(result.success).to.be.eql(true);
55 | expect(result).to.have.all.keys(
56 | 'favicon',
57 | 'alAndroidAppName',
58 | 'alAndroidPackage',
59 | 'alAndroidUrl',
60 | 'alIpadAppName',
61 | 'alIpadAppStoreId',
62 | 'alIpadUrl',
63 | 'alIphoneAppName',
64 | 'alIphoneAppStoreId',
65 | 'alIphoneUrl',
66 | 'articleModifiedTime',
67 | 'articlePublishedTime',
68 | 'articleSection',
69 | 'articleTag',
70 | 'ogDate',
71 | 'ogDescription',
72 | 'ogImage',
73 | 'ogLocale',
74 | 'ogTitle',
75 | 'ogType',
76 | 'ogUrl',
77 | 'requestUrl',
78 | 'success',
79 | 'charset',
80 | 'twitterAppIdGooglePlay',
81 | 'twitterAppNameGooglePlay',
82 | 'twitterAppUrlGooglePlay',
83 | 'twitterCard',
84 | 'twitterDescription',
85 | 'twitterImage',
86 | 'twitterSite',
87 | 'twitterTitle',
88 | 'twitterUrl',
89 | );
90 | expect(response).to.be.an('Response');
91 | });
92 | });
93 | it('forbes page', function () {
94 | return ogs({
95 | url: 'https://www.forbes.com/sites/kenkam/2017/09/28/3-stocks-like-apple-was-10-years-ago-tesla-nvidia-and-alibaba/#2636f6c2f0fa',
96 | }).then(function ({ error, result, response }) {
97 | console.log('error:', error);
98 | console.log('result:', result);
99 | expect(error).to.be.eql(false);
100 | expect(result.ogTitle).to.be.eql('3 Stocks Like Apple Was 10 Years Ago: Tesla, Nvidia And Alibaba');
101 | expect(result.ogSiteName).to.be.eql('Forbes');
102 | expect(result.articleAuthor).to.be.eql('Ken Kam');
103 | expect(result.articleSection).to.be.eql('Markets');
104 | expect(result.author).to.be.eql('Ken Kam');
105 | expect(result.ogType).to.be.eql('article');
106 | expect(result.ogUrl).to.be.eql('https://www.forbes.com/sites/kenkam/2017/09/28/3-stocks-like-apple-was-10-years-ago-tesla-nvidia-and-alibaba/');
107 | expect(result.favicon).to.be.eql('https://i.forbesimg.com/48X48-F.png');
108 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty;
109 | expect(result.twitterCard).to.be.eql('summary_large_image');
110 | expect(result.twitterSite).to.be.eql('@forbes');
111 | expect(result.twitterCreator).to.be.eql('@MarketocracyInc');
112 | expect(result.ogLocale).to.be.eql('en');
113 | expect(result.twitterTitle).to.be.eql('3 Stocks Like Apple Was 10 Years Ago: Tesla, Nvidia And Alibaba');
114 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty;
115 | expect(result.ogImage).to.be.eql([{
116 | url: 'https://imageio.forbes.com/specials-images/imageserve/825671570/0x0.jpg?format=jpg&height=900&width=1600&fit=bounds',
117 | type: 'image/jpeg',
118 | }]);
119 | expect(result.twitterImage).to.be.eql([{
120 | url: 'https://imageio.forbes.com/specials-images/imageserve/825671570/0x0.jpg?format=jpg&height=600&width=1200&fit=bounds',
121 | }]);
122 | expect(result.requestUrl).to.be.eql('https://www.forbes.com/sites/kenkam/2017/09/28/3-stocks-like-apple-was-10-years-ago-tesla-nvidia-and-alibaba/#2636f6c2f0fa');
123 | expect(result.charset).to.be.eql('utf-8');
124 | expect(result.fbAppId).to.be.eql('123694841080850');
125 | expect(result.success).to.be.eql(true);
126 | expect(result).to.have.all.keys(
127 | 'favicon',
128 | 'fbAppId',
129 | 'ogDate',
130 | 'articleAuthor',
131 | 'articleSection',
132 | 'author',
133 | 'ogDescription',
134 | 'ogImage',
135 | 'ogLocale',
136 | 'ogSiteName',
137 | 'ogTitle',
138 | 'ogType',
139 | 'ogUrl',
140 | 'requestUrl',
141 | 'success',
142 | 'charset',
143 | 'twitterCard',
144 | 'twitterCreator',
145 | 'twitterDescription',
146 | 'twitterImage',
147 | 'twitterSite',
148 | 'twitterTitle',
149 | );
150 | expect(response).to.be.an('Response');
151 | });
152 | });
153 | });
154 | });
155 |
--------------------------------------------------------------------------------
/tests/integration/spotify.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('spotify', function () {
6 | it('album should return music:album and associated tags', function () {
7 | return ogs({ url: 'https://open.spotify.com/album/5EBGCvO6upi3GNknMVe9x9' }).then(function ({ error, result, response }) {
8 | console.log('error:', error);
9 | console.log('result:', result);
10 | expect(error).to.be.eql(false);
11 | expect(result.alAndroidAppName).to.be.eql('Spotify');
12 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music');
13 | expect(result.alAndroidUrl).to.be.eql('spotify://album/5EBGCvO6upi3GNknMVe9x9');
14 | expect(result.alIosAppName).to.be.eql('Spotify');
15 | expect(result.alIosAppStoreId).to.be.eql('324684580');
16 | expect(result.alIosUrl).to.be.eql('spotify://album/5EBGCvO6upi3GNknMVe9x9');
17 | expect(result.ogTitle).to.be.eql('ye');
18 | // expect(result.ogDescription).to.be.eql('Album · Kanye West · 2018 · 7 songs');
19 | // expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon.0c211e2e.png');
20 | // expect(result.ogUrl).to.be.eql('https://open.spotify.com/album/5EBGCvO6upi3GNknMVe9x9');
21 | expect(result.ogType).to.be.eql('music.album');
22 | expect(result.musicMusician).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x');
23 | expect(result.musicReleaseDate).to.be.eql('2018-06-01');
24 | expect(result.twitterTitle).to.be.eql('ye');
25 | expect(result.twitterSite).to.be.eql('@spotify');
26 | // expect(result.twitterDescription).to.be.eql('Album · Kanye West · 2018 · 7 songs');
27 | expect(result.twitterCard).to.be.eql('summary');
28 | expect(result.ogLocale).to.be.eql('en');
29 | expect(result.ogSiteName).to.be.eql('Spotify');
30 | expect(result.ogImage).to.be.eql([{
31 | url: 'https://i.scdn.co/image/ab67616d0000b27397508a4b756763370510bd44',
32 | }]);
33 | expect(result.twitterImage).to.be.eql([{
34 | url: 'https://i.scdn.co/image/ab67616d0000b27397508a4b756763370510bd44',
35 | }]);
36 | expect(result.musicSong).to.be.eql([{
37 | url: 'https://open.spotify.com/track/6EuE9M1viu9gkdFSafia9o',
38 | track: '1',
39 | disc: '1',
40 | }, {
41 | disc: '1',
42 | track: '2',
43 | url: 'https://open.spotify.com/track/2r4JRwcbIeuAzWjH4YXlLs',
44 | },
45 | {
46 | disc: '1',
47 | track: '3',
48 | url: 'https://open.spotify.com/track/3qnoOm4fwZPBS116f5hpgF',
49 | },
50 | {
51 | disc: '1',
52 | track: '4',
53 | url: 'https://open.spotify.com/track/3dG6tjetoR4GMmUGZUprLt',
54 | },
55 | {
56 | disc: '1',
57 | track: '5',
58 | url: 'https://open.spotify.com/track/1DdgqEZk4Hqfpl3drdXJun',
59 | },
60 | {
61 | disc: '1',
62 | track: '6',
63 | url: 'https://open.spotify.com/track/6Bg7MznA9X0dIhlAsLyBYj',
64 | },
65 | {
66 | disc: '1',
67 | track: '7',
68 | url: 'https://open.spotify.com/track/2VYb3Fb5iK5Y8HGZ8oEEkp',
69 | }]);
70 | expect(result.requestUrl).to.be.eql('https://open.spotify.com/album/5EBGCvO6upi3GNknMVe9x9');
71 | expect(result.charset).to.be.eql('utf-8');
72 | expect(result.fbAppId).to.be.eql('174829003346');
73 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
74 | expect(result.success).to.be.eql(true);
75 | expect(result).to.have.all.keys(
76 | 'favicon',
77 | 'fbAppId',
78 | 'jsonLD',
79 | 'alAndroidAppName',
80 | 'alAndroidPackage',
81 | 'alAndroidUrl',
82 | 'alIosAppName',
83 | 'alIosAppStoreId',
84 | 'alIosUrl',
85 | 'musicMusician',
86 | 'musicReleaseDate',
87 | 'musicSong',
88 | 'ogDescription',
89 | 'ogImage',
90 | 'ogLocale',
91 | 'ogSiteName',
92 | 'ogTitle',
93 | 'ogType',
94 | 'ogUrl',
95 | 'requestUrl',
96 | 'success',
97 | 'charset',
98 | 'twitterCard',
99 | 'twitterDescription',
100 | 'twitterImage',
101 | 'twitterSite',
102 | 'twitterTitle',
103 | );
104 | expect(response).to.be.an('Response');
105 | });
106 | });
107 |
108 | it('artist should return music:musician', function () {
109 | return ogs({ url: 'https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x' }).then(function ({ error, result, response }) {
110 | console.log('error:', error);
111 | console.log('result:', result);
112 | expect(error).to.be.eql(false);
113 | expect(result.alAndroidAppName).to.be.eql('Spotify');
114 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music');
115 | expect(result.alAndroidUrl).to.be.eql('spotify://artist/5K4W6rqBFWDnAN6FQUkS6x');
116 | expect(result.alIosAppName).to.be.eql('Spotify');
117 | expect(result.alIosAppStoreId).to.be.eql('324684580');
118 | expect(result.alIosUrl).to.be.eql('spotify://artist/5K4W6rqBFWDnAN6FQUkS6x');
119 | expect(result.ogTitle).to.be.eql('Kanye West');
120 | // expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon.0c211e2e.png');
121 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty;
122 | expect(result.ogUrl).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x');
123 | expect(result.ogType).to.be.eql('profile');
124 | expect(result.twitterTitle).to.be.eql('Kanye West');
125 | expect(result.twitterSite).to.be.eql('@spotify');
126 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty;
127 | expect(result.twitterCard).to.be.eql('summary');
128 | expect(result.ogSiteName).to.be.eql('Spotify');
129 | expect(result.ogLocale).to.be.eql('en');
130 | expect(result.ogImage).to.be.eql([{
131 | url: 'https://i.scdn.co/image/ab6761610000e5eb6e835a500e791bf9c27a422a',
132 | }]);
133 | expect(result.twitterImage).to.be.eql([{
134 | url: 'https://i.scdn.co/image/ab6761610000e5eb6e835a500e791bf9c27a422a',
135 | }]);
136 | expect(result.requestUrl).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x');
137 | expect(result.charset).to.be.eql('utf-8');
138 | expect(result.fbAppId).to.be.eql('174829003346');
139 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
140 | expect(result.success).to.be.eql(true);
141 | expect(result).to.have.all.keys(
142 | 'favicon',
143 | 'fbAppId',
144 | 'jsonLD',
145 | 'alAndroidAppName',
146 | 'alAndroidPackage',
147 | 'alAndroidUrl',
148 | 'alIosAppName',
149 | 'alIosAppStoreId',
150 | 'alIosUrl',
151 | 'ogDescription',
152 | 'ogImage',
153 | 'ogLocale',
154 | 'ogSiteName',
155 | 'ogTitle',
156 | 'ogType',
157 | 'ogUrl',
158 | 'requestUrl',
159 | 'success',
160 | 'charset',
161 | 'twitterCard',
162 | 'twitterDescription',
163 | 'twitterImage',
164 | 'twitterSite',
165 | 'twitterTitle',
166 | );
167 | expect(response).to.be.an('Response');
168 | });
169 | });
170 |
171 | it('track should return music:song and associated tags', function () {
172 | return ogs({ url: 'https://open.spotify.com/track/3p6fkbeZDIVqapfdgQe6fm' }).then(function ({ error, result, response }) {
173 | console.log('error:', error);
174 | console.log('result:', result);
175 | expect(error).to.be.eql(false);
176 | expect(result.alAndroidAppName).to.be.eql('Spotify');
177 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music');
178 | expect(result.alAndroidUrl).to.be.eql('spotify://track/3p6fkbeZDIVqapfdgQe6fm');
179 | expect(result.alIosAppName).to.be.eql('Spotify');
180 | expect(result.alIosAppStoreId).to.be.eql('324684580');
181 | expect(result.alIosUrl).to.be.eql('spotify://track/3p6fkbeZDIVqapfdgQe6fm');
182 | expect(result.ogTitle).to.be.eql('Famous');
183 | // expect(result.ogDescription).to.be.eql('Song · Kanye West · 2016');
184 | expect(result.ogUrl).to.be.eql('https://open.spotify.com/track/3p6fkbeZDIVqapfdgQe6fm');
185 | // expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon32.a19b4f5b.png');
186 | expect(result.ogType).to.be.eql('music.song');
187 | expect(result.musicDuration).to.be.eql('196');
188 | expect(result.musicAlbum).to.be.eql('https://open.spotify.com/album/4xM1pUHZp9HsuKNxyOQDR0');
189 | expect(result.musicAlbumTrack).to.be.eql('4');
190 | expect(result.musicMusician).to.be.eql('https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x');
191 | expect(result.musicReleaseDate).to.be.eql('2016-06-10');
192 | expect(result.ogAudio).to.be.an('string').and.to.not.be.empty;
193 | expect(result.ogAudioType).to.be.eql('audio/mpeg');
194 | expect(result.twitterTitle).to.be.eql('Famous');
195 | expect(result.twitterSite).to.be.eql('@spotify');
196 | // expect(result.twitterDescription).to.be.eql('Song · Kanye West · 2016');
197 | expect(result.twitterCard).to.be.eql('summary');
198 | expect(result.ogSiteName).to.be.eql('Spotify');
199 | expect(result.ogLocale).to.be.eql('en');
200 | expect(result.ogImage).to.be.eql([{
201 | url: 'https://i.scdn.co/image/ab67616d0000b2730939dadf614e70aeffc6710c',
202 | }]);
203 | expect(result.twitterImage).to.be.eql([{
204 | url: 'https://i.scdn.co/image/ab67616d0000b2730939dadf614e70aeffc6710c',
205 | }]);
206 | expect(result.requestUrl).to.be.eql('https://open.spotify.com/track/3p6fkbeZDIVqapfdgQe6fm');
207 | expect(result.charset).to.be.eql('utf-8');
208 | expect(result.fbAppId).to.be.eql('174829003346');
209 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
210 | expect(result.success).to.be.eql(true);
211 | expect(result).to.have.all.keys(
212 | 'favicon',
213 | 'fbAppId',
214 | 'jsonLD',
215 | 'alAndroidAppName',
216 | 'alAndroidPackage',
217 | 'alAndroidUrl',
218 | 'alIosAppName',
219 | 'alIosAppStoreId',
220 | 'alIosUrl',
221 | 'musicAlbum',
222 | 'musicAlbumTrack',
223 | 'musicDuration',
224 | 'musicMusician',
225 | 'musicReleaseDate',
226 | 'ogAudio',
227 | 'ogAudioType',
228 | 'ogDescription',
229 | 'ogImage',
230 | 'ogLocale',
231 | 'ogSiteName',
232 | 'ogTitle',
233 | 'ogType',
234 | 'ogUrl',
235 | 'requestUrl',
236 | 'success',
237 | 'charset',
238 | 'twitterCard',
239 | 'twitterDescription',
240 | 'twitterImage',
241 | 'twitterSite',
242 | 'twitterTitle',
243 | );
244 | expect(response).to.be.an('Response');
245 | });
246 | });
247 |
248 | it('playlist should return music:playlist and associated tags', function () {
249 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/spotifyPlayList' }).then(function ({ error, result, response }) {
250 | console.log('error:', error);
251 | console.log('result:', result);
252 | expect(error).to.be.eql(false);
253 | expect(result.alAndroidAppName).to.be.eql('Spotify');
254 | expect(result.alAndroidPackage).to.be.eql('com.spotify.music');
255 | expect(result.alAndroidUrl).to.be.eql('spotify://playlist/4BSIiLTu7qzDZLDdkHaty9');
256 | expect(result.alIosAppName).to.be.eql('Spotify');
257 | expect(result.alIosAppStoreId).to.be.eql('324684580');
258 | expect(result.alIosUrl).to.be.eql('spotify://playlist/4BSIiLTu7qzDZLDdkHaty9');
259 | expect(result.ogTitle).to.be.eql('Calm Hip Hop Mindset');
260 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty;
261 | expect(result.ogUrl).to.be.eql('https://open.spotify.com/playlist/4BSIiLTu7qzDZLDdkHaty9');
262 | expect(result.favicon).to.be.eql('https://open.scdn.co/cdn/images/favicon32.a19b4f5b.png');
263 | expect(result.ogType).to.be.eql('music.playlist');
264 | expect(result.musicCreator).to.be.eql('https://open.spotify.com/user/mjaschmidt');
265 | expect(result.twitterTitle).to.be.eql('Calm Hip Hop Mindset');
266 | expect(result.twitterAppIdiPhone).to.be.eql('324684580');
267 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.spotify.music');
268 | expect(result.twitterSite).to.be.eql('@spotify');
269 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty;
270 | expect(result.twitterCard).to.be.eql('audio');
271 | expect(result.ogSiteName).to.be.eql('Spotify');
272 | expect(result.ogLocale).to.be.eql('en');
273 | expect(result.ogImage).to.be.eql([{
274 | url: 'https://i.scdn.co/image/ab67706c0000bebb5a535b3001ccf567ddef2927',
275 | }]);
276 | expect(result.twitterImage).to.be.eql([{
277 | url: 'https://i.scdn.co/image/ab67706c0000bebb5a535b3001ccf567ddef2927',
278 | }]);
279 | expect(result.twitterPlayer).to.be.eql([{
280 | url: 'https://open.spotify.com/embed/playlist/4BSIiLTu7qzDZLDdkHaty9?utm_campaign=twitter-player&utm_source=open&utm_medium=twitter',
281 | width: '504',
282 | height: '584',
283 | }]);
284 | expect(result.musicSong).to.be.eql([{
285 | url: 'https://open.spotify.com/track/2LTlO3NuNVN70lp2ZbVswF',
286 | track: '1',
287 | }, {
288 | track: '2',
289 | url: 'https://open.spotify.com/track/7hZoTr4ffMGqm9opMiGmBp',
290 | },
291 | {
292 | track: '3',
293 | url: 'https://open.spotify.com/track/7FW4HUo39yRwGVxEoHqTa4',
294 | },
295 | {
296 | track: '4',
297 | url: 'https://open.spotify.com/track/6MF4tRr5lU8qok8IKaFOBE',
298 | },
299 | {
300 | track: '5',
301 | url: 'https://open.spotify.com/track/5TCBWmEBrin7etRa4Lswr1',
302 | },
303 | {
304 | track: '6',
305 | url: 'https://open.spotify.com/track/77KMttn3Lic7ZQKDlPqp8v',
306 | },
307 | {
308 | track: '7',
309 | url: 'https://open.spotify.com/track/6XH0KeCZ0nRysAeSJYRFFg',
310 | },
311 | {
312 | track: '8',
313 | url: 'https://open.spotify.com/track/1pRjlrQMZYo1K5i1RcLGs7',
314 | },
315 | {
316 | track: '9',
317 | url: 'https://open.spotify.com/track/1oOEkBNp4zWnkD7nWjJdog',
318 | },
319 | {
320 | track: '10',
321 | url: 'https://open.spotify.com/track/15YteXcgC1tRTDrKKclPOp',
322 | }]);
323 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/spotifyPlayList');
324 | expect(result.charset).to.be.eql('UTF-8');
325 | expect(result.fbAppId).to.be.eql('174829003346');
326 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
327 | expect(result.success).to.be.eql(true);
328 | expect(result).to.have.all.keys(
329 | 'favicon',
330 | 'fbAppId',
331 | 'jsonLD',
332 | 'alAndroidAppName',
333 | 'alAndroidPackage',
334 | 'alAndroidUrl',
335 | 'alIosAppName',
336 | 'alIosAppStoreId',
337 | 'alIosUrl',
338 | 'musicCreator',
339 | 'musicSong',
340 | 'ogDescription',
341 | 'ogImage',
342 | 'ogLocale',
343 | 'ogSiteName',
344 | 'ogTitle',
345 | 'ogType',
346 | 'ogUrl',
347 | 'requestUrl',
348 | 'success',
349 | 'charset',
350 | 'twitterAppIdGooglePlay',
351 | 'twitterAppIdiPhone',
352 | 'twitterCard',
353 | 'twitterDescription',
354 | 'twitterImage',
355 | 'twitterPlayer',
356 | 'twitterSite',
357 | 'twitterTitle',
358 | );
359 | expect(response).to.be.an('Response');
360 | });
361 | });
362 | });
363 |
--------------------------------------------------------------------------------
/tests/integration/statusCode.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | // http://httpstat.us keeps going offline, we need to fine a replacement
6 | // eslint-disable-next-line mocha/no-skipped-tests
7 | describe.skip('statusCode', function () {
8 | context('when the site returns', function () {
9 | it('403', function () {
10 | return ogs({ url: 'http://httpstat.us/403' })
11 | .then(function () {
12 | expect().fail('this should not happen');
13 | })
14 | .catch(function ({ error, result, response }) {
15 | console.log('error:', error);
16 | console.log('result:', result);
17 | expect(error).to.be.eql(true);
18 | expect(result.success).to.be.eql(false);
19 | expect(result.requestUrl).to.be.eql('http://httpstat.us/403');
20 | expect(result.error).to.eql('Server has returned a 400/500 error code');
21 | expect(result.errorDetails.toString()).to.eql('Error: Server has returned a 400/500 error code');
22 | expect(result).to.have.all.keys(
23 | 'error',
24 | 'errorDetails',
25 | 'requestUrl',
26 | 'success',
27 | );
28 | expect(response).to.eql(undefined);
29 | });
30 | });
31 | it('500', function () {
32 | return ogs({ url: 'http://httpstat.us/500' })
33 | .then(function () {
34 | expect().fail('this should not happen');
35 | })
36 | .catch(function ({ error, result, response }) {
37 | console.log('error:', error);
38 | console.log('result:', result);
39 | expect(error).to.be.eql(true);
40 | expect(result.success).to.be.eql(false);
41 | expect(result.requestUrl).to.be.eql('http://httpstat.us/500');
42 | expect(result.error).to.eql('Server has returned a 400/500 error code');
43 | expect(result.errorDetails.toString()).to.eql('Error: Server has returned a 400/500 error code');
44 | expect(result).to.have.all.keys(
45 | 'error',
46 | 'errorDetails',
47 | 'requestUrl',
48 | 'success',
49 | );
50 | expect(response).to.eql(undefined);
51 | });
52 | });
53 | });
54 | });
55 |
--------------------------------------------------------------------------------
/tests/integration/twitter.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('twitter', function () {
6 | context('Should Return correct Open Graph Info + Some Twitter Info ', function () {
7 | it('On Twitter Site', function () {
8 | return ogs({
9 | url: 'https://jshemas.github.io/openGraphScraperPages/twitter-dev',
10 | }).then(function ({ error, result, response }) {
11 | console.log('error:', error);
12 | console.log('result:', result);
13 | expect(error).to.be.eql(false);
14 | expect(result.twitterTitle).to.be.eql('Twitter Developers');
15 | expect(result.favicon).to.be.eql('https://web.archive.org/web/20160303190414im_/https://abs.twimg.com/favicons/favicon.ico');
16 | expect(result.ogTitle).to.be.eql('Twitter Developers');
17 | expect(result.ogType).to.be.eql('website');
18 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20160303190414im_/https://dev.twitter.com/');
19 | expect(result.twitterCard).to.be.eql('summary');
20 | expect(result.ogSiteName).to.be.eql('Twitter Developers');
21 | expect(result.ogLocale).to.be.eql('en');
22 | expect(result.twitterUrl).to.be.eql('https://web.archive.org/web/20160303190414im_/https://dev.twitter.com/');
23 | expect(result.twitterDescription).to.be.eql('The Twitter platform connects your website or application with the worldwide conversation happening on Twitter.');
24 | expect(result.ogImage).to.be.eql([{
25 | url: 'https://web.archive.org/web/20160303190414im_/https://pbs.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3.png',
26 | type: 'png',
27 | }]);
28 | expect(result.twitterImage).to.be.eql([{
29 | url: 'https://web.archive.org/web/20160303190414im_/https://pbs.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3.png',
30 | width: '500',
31 | height: '500',
32 | }]);
33 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/twitter-dev');
34 | expect(result.charset).to.be.eql('utf-8');
35 | expect(result.success).to.be.eql(true);
36 | expect(result).to.have.all.keys(
37 | 'favicon',
38 | 'ogImage',
39 | 'ogSiteName',
40 | 'ogTitle',
41 | 'ogLocale',
42 | 'ogType',
43 | 'ogUrl',
44 | 'requestUrl',
45 | 'success',
46 | 'charset',
47 | 'twitterCard',
48 | 'twitterDescription',
49 | 'twitterImage',
50 | 'twitterTitle',
51 | 'twitterUrl',
52 | );
53 | expect(response).to.be.an('Response');
54 | });
55 | });
56 | it('On Github Site', function () {
57 | return ogs({
58 | url: 'https://jshemas.github.io/openGraphScraperPages/github',
59 | }).then(function ({ error, result, response }) {
60 | console.log('error:', error);
61 | console.log('result:', result);
62 | expect(error).to.be.eql(false);
63 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20170113081103/https://github.com/');
64 | expect(result.favicon).to.be.eql('https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/favicon.ico');
65 | expect(result.ogSiteName).to.be.eql('GitHub');
66 | expect(result.ogTitle).to.be.eql('Build software better, together');
67 | expect(result.ogDescription).to.be.eql('GitHub is where people build software. More than 19 million people use GitHub to discover, fork, and contribute to over 50 million projects.');
68 | expect(result.twitterSite).to.be.eql('github');
69 | expect(result.twitterSiteId).to.be.eql('13334762');
70 | expect(result.twitterCreator).to.be.eql('github');
71 | expect(result.twitterCreatorId).to.be.eql('13334762');
72 | expect(result.twitterCard).to.be.eql('summary_large_image');
73 | expect(result.twitterTitle).to.be.eql('GitHub');
74 | expect(result.ogLocale).to.be.eql('en');
75 | expect(result.twitterDescription).to.be.eql('GitHub is where people build software. More than 19 million people use GitHub to discover, fork, and contribute to over 50 million projects.');
76 | expect(result.ogImage).to.be.eql([{
77 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-logo.png',
78 | width: '1200',
79 | height: '1200',
80 | type: 'image/png',
81 | }, {
82 | height: '620',
83 | type: 'image/png',
84 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-mark.png',
85 | width: '1200',
86 | },
87 | {
88 | height: '620',
89 | type: 'image/png',
90 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-octocat.png',
91 | width: '1200',
92 | }]);
93 | expect(result.twitterImage).to.be.eql([{
94 | url: 'https://web.archive.org/web/20170113081103im_/https://assets-cdn.github.com/images/modules/open_graph/github-logo.png',
95 | width: '1200',
96 | height: '1200',
97 | }]);
98 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/github');
99 | expect(result.charset).to.be.eql('utf-8');
100 | expect(result.fbAppId).to.be.eql('1401488693436528');
101 | expect(result.success).to.be.eql(true);
102 | expect(result).to.have.all.keys(
103 | 'favicon',
104 | 'fbAppId',
105 | 'ogDescription',
106 | 'ogImage',
107 | 'ogSiteName',
108 | 'ogLocale',
109 | 'ogTitle',
110 | 'ogUrl',
111 | 'requestUrl',
112 | 'success',
113 | 'charset',
114 | 'twitterCard',
115 | 'twitterCreator',
116 | 'twitterCreatorId',
117 | 'twitterDescription',
118 | 'twitterImage',
119 | 'twitterSite',
120 | 'twitterSiteId',
121 | 'twitterTitle',
122 | );
123 | expect(response).to.be.an('Response');
124 | });
125 | });
126 | it('On Atom Site', function () {
127 | return ogs({
128 | url: 'https://jshemas.github.io/openGraphScraperPages/atom.html',
129 | }).then(function ({ error, result, response }) {
130 | console.log('error:', error);
131 | console.log('result:', result);
132 | expect(error).to.be.eql(false);
133 | expect(result.ogUrl).to.be.eql('https://web.archive.org/web/20170913111314/https://atom.io/');
134 | expect(result.favicon).to.be.eql('/web/20170913111314im_/https://atom.io/favicon.ico');
135 | expect(result.ogSiteName).to.be.eql('Atom');
136 | expect(result.ogTitle).to.be.eql('A hackable text editor for the 21st Century');
137 | expect(result.ogLocale).to.be.eql('en');
138 | expect(result.ogDescription).to.be.eql('At GitHub, we’re building the text editor we’ve always wanted: hackable to the core, but approachable on the first day without ever touching a config file. We can’t wait to see what you build with it.');
139 | expect(result.ogType).to.be.eql('website');
140 | expect(result.twitterCard).to.be.eql('summary_large_image');
141 | expect(result.twitterSite).to.be.eql('@AtomEditor');
142 | expect(result.twitterCreator).to.be.eql('@github');
143 | expect(result.twitterTitle).to.be.eql('Atom');
144 | expect(result.twitterDescription).to.be.eql('A hackable text editor for the 21st Century');
145 | expect(result.ogImage).to.be.eql([{
146 | url: 'https://web.archive.org/web/20170913111314im_/http://og.github.com/atom-mark/atom-mark@1200x630.png',
147 | width: '1200',
148 | height: '630',
149 | type: 'png',
150 | }]);
151 | expect(result.twitterImage).to.be.eql([{
152 | url: 'https://web.archive.org/web/20170913111314im_/http://og.github.com/atom-logo/atom-logo@1200x630.png',
153 | width: '1200',
154 | height: '630',
155 | }]);
156 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/atom.html');
157 | expect(result.charset).to.be.eql('UTF-8');
158 | expect(result.success).to.be.eql(true);
159 | expect(result).to.have.all.keys(
160 | 'favicon',
161 | 'ogDescription',
162 | 'ogImage',
163 | 'ogLocale',
164 | 'ogSiteName',
165 | 'ogTitle',
166 | 'ogType',
167 | 'ogUrl',
168 | 'requestUrl',
169 | 'success',
170 | 'charset',
171 | 'twitterCard',
172 | 'twitterCreator',
173 | 'twitterDescription',
174 | 'twitterImage',
175 | 'twitterSite',
176 | 'twitterTitle',
177 | );
178 | expect(response).to.be.an('Response');
179 | });
180 | });
181 | });
182 | });
183 |
--------------------------------------------------------------------------------
/tests/integration/url.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | describe('url', function () {
6 | it('http', function () {
7 | return ogs({ url: 'http://www.wikipedia.org/' }).then(function ({ error, result, response }) {
8 | console.log('error:', error);
9 | console.log('result:', result);
10 | expect(error).to.be.eql(false);
11 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
12 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
13 | expect(result.ogLocale).to.be.eql('en');
14 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/');
15 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
16 | expect(result.charset).to.be.eql('utf-8');
17 | expect(result.success).to.be.eql(true);
18 | expect(result).to.have.all.keys(
19 | 'charset',
20 | 'favicon',
21 | 'ogDescription',
22 | 'ogImage',
23 | 'ogLocale',
24 | 'ogTitle',
25 | 'ogType',
26 | 'requestUrl',
27 | 'success',
28 | );
29 | expect(response).to.be.an('Response');
30 | });
31 | });
32 |
33 | it('https', function () {
34 | return ogs({ url: 'https://www.wikipedia.org/' }).then(function ({ error, result, response }) {
35 | console.log('error:', error);
36 | console.log('result:', result);
37 | expect(error).to.be.eql(false);
38 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
39 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
40 | expect(result.ogLocale).to.be.eql('en');
41 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/');
42 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
43 | expect(result.charset).to.be.eql('utf-8');
44 | expect(result.success).to.be.eql(true);
45 | expect(result).to.have.all.keys(
46 | 'charset',
47 | 'favicon',
48 | 'ogDescription',
49 | 'ogImage',
50 | 'ogLocale',
51 | 'ogTitle',
52 | 'ogType',
53 | 'requestUrl',
54 | 'success',
55 | );
56 | expect(response).to.be.an('Response');
57 | });
58 | });
59 |
60 | it('no protocol', function () {
61 | return ogs({ url: 'www.wikipedia.org/' }).then(function ({ error, result, response }) {
62 | console.log('error:', error);
63 | console.log('result:', result);
64 | expect(error).to.be.eql(false);
65 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
66 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
67 | expect(result.ogLocale).to.be.eql('en');
68 | expect(result.requestUrl).to.be.eql('http://www.wikipedia.org/');
69 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
70 | expect(result.charset).to.be.eql('utf-8');
71 | expect(result.success).to.be.eql(true);
72 | expect(result).to.have.all.keys(
73 | 'charset',
74 | 'favicon',
75 | 'ogDescription',
76 | 'ogImage',
77 | 'ogLocale',
78 | 'ogTitle',
79 | 'ogType',
80 | 'requestUrl',
81 | 'success',
82 | );
83 | expect(response).to.be.an('Response');
84 | });
85 | });
86 |
87 | it('no protocol and no wwww', function () {
88 | return ogs({ url: 'wikipedia.org/' }).then(function ({ error, result, response }) {
89 | console.log('error:', error);
90 | console.log('result:', result);
91 | expect(error).to.be.eql(false);
92 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
93 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
94 | expect(result.ogLocale).to.be.eql('en');
95 | expect(result.requestUrl).to.be.eql('http://wikipedia.org/');
96 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
97 | expect(result.charset).to.be.eql('utf-8');
98 | expect(result.success).to.be.eql(true);
99 | expect(result).to.have.all.keys(
100 | 'charset',
101 | 'favicon',
102 | 'ogDescription',
103 | 'ogImage',
104 | 'ogLocale',
105 | 'ogTitle',
106 | 'ogType',
107 | 'requestUrl',
108 | 'success',
109 | );
110 | expect(response).to.be.an('Response');
111 | });
112 | });
113 |
114 | it('protocol with no wwww', function () {
115 | return ogs({ url: 'http://wikipedia.org/' }).then(function ({ error, result, response }) {
116 | console.log('error:', error);
117 | console.log('result:', result);
118 | expect(error).to.be.eql(false);
119 | expect(result.ogTitle).to.be.eql('Wikipedia, the free encyclopedia');
120 | expect(result.ogLocale).to.be.eql('en');
121 | expect(result.ogDescription).to.be.eql('Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.');
122 | expect(result.requestUrl).to.be.eql('http://wikipedia.org/');
123 | expect(result.favicon).to.be.eql('/static/favicon/wikipedia.ico');
124 | expect(result.charset).to.be.eql('utf-8');
125 | expect(result.success).to.be.eql(true);
126 | expect(result).to.have.all.keys(
127 | 'charset',
128 | 'favicon',
129 | 'ogDescription',
130 | 'ogImage',
131 | 'ogLocale',
132 | 'ogTitle',
133 | 'ogType',
134 | 'requestUrl',
135 | 'success',
136 | );
137 | expect(response).to.be.an('Response');
138 | });
139 | });
140 |
141 | it('fake page', function () {
142 | return ogs({ url: 'http://testtesttest4564568.com' })
143 | .then(function () {
144 | expect().fail('this should not happen');
145 | })
146 | .catch(function ({ error, result, response }) {
147 | console.log('error:', error);
148 | console.log('result:', result);
149 | expect(error).to.be.eql(true);
150 | expect(result.success).to.be.eql(false);
151 | expect(result.requestUrl).to.be.eql('http://testtesttest4564568.com');
152 | expect(result.error).to.eql('Page not found');
153 | expect(result.errorDetails.toString()).to.eql('Error: Page not found');
154 | expect(result).to.have.all.keys(
155 | 'error',
156 | 'errorDetails',
157 | 'requestUrl',
158 | 'success',
159 | );
160 | expect(response).to.eql(undefined);
161 | });
162 | });
163 |
164 | it('empty url', function () {
165 | return ogs({ url: '' })
166 | .then(function () {
167 | expect().fail('this should not happen');
168 | })
169 | .catch(function ({ error, result, response }) {
170 | console.log('error:', error);
171 | console.log('result:', result);
172 | expect(error).to.be.eql(true);
173 | expect(result.success).to.be.eql(false);
174 | expect(result.requestUrl).to.be.eql('');
175 | expect(result.error).to.eql('Invalid URL');
176 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL');
177 | expect(result).to.have.all.keys(
178 | 'error',
179 | 'errorDetails',
180 | 'requestUrl',
181 | 'success',
182 | );
183 | expect(response).to.eql(undefined);
184 | });
185 | });
186 |
187 | it('empty options', function () {
188 | return ogs({})
189 | .then(function () {
190 | expect().fail('this should not happen');
191 | })
192 | .catch(function ({ error, result, response }) {
193 | console.log('error:', error);
194 | console.log('result:', result);
195 | expect(error).to.be.eql(true);
196 | expect(result.success).to.be.eql(false);
197 | expect(result.requestUrl).to.eql(undefined);
198 | expect(result.error).to.eql('Invalid URL');
199 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL');
200 | expect(result).to.have.all.keys(
201 | 'error',
202 | 'errorDetails',
203 | 'requestUrl',
204 | 'success',
205 | );
206 | expect(response).to.eql(undefined);
207 | });
208 | });
209 |
210 | it('url is a string of numbers', function () {
211 | return ogs({ url: '2323233' })
212 | .then(function () {
213 | expect().fail('this should not happen');
214 | })
215 | .catch(function ({ error, result, response }) {
216 | console.log('error:', error);
217 | console.log('result:', result);
218 | expect(error).to.be.eql(true);
219 | expect(result.success).to.be.eql(false);
220 | expect(result.requestUrl).to.be.eql('2323233');
221 | expect(result.error).to.eql('Invalid URL');
222 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL');
223 | expect(result).to.have.all.keys(
224 | 'error',
225 | 'errorDetails',
226 | 'requestUrl',
227 | 'success',
228 | );
229 | expect(response).to.eql(undefined);
230 | });
231 | });
232 |
233 | it('url is a string of words', function () {
234 | return ogs({ url: 'this is a test' })
235 | .then(function () {
236 | expect().fail('this should not happen');
237 | })
238 | .catch(function ({ error, result, response }) {
239 | console.log('error:', error);
240 | console.log('result:', result);
241 | expect(error).to.be.eql(true);
242 | expect(result.success).to.be.eql(false);
243 | expect(result.requestUrl).to.be.eql('this is a test');
244 | expect(result.error).to.eql('Invalid URL');
245 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL');
246 | expect(result).to.have.all.keys(
247 | 'error',
248 | 'errorDetails',
249 | 'requestUrl',
250 | 'success',
251 | );
252 | expect(response).to.eql(undefined);
253 | });
254 | });
255 |
256 | it('url is invalid because user disallows https with urlValidatorSettings', function () {
257 | return ogs({
258 | url: 'https://www.wikipedia.org/',
259 | urlValidatorSettings: {
260 | allow_fragments: true,
261 | allow_protocol_relative_urls: false,
262 | allow_query_components: true,
263 | allow_trailing_dot: false,
264 | allow_underscores: false,
265 | protocols: ['http'],
266 | require_host: true,
267 | require_port: false,
268 | require_protocol: false,
269 | require_tld: true,
270 | require_valid_protocol: true,
271 | validate_length: true,
272 | },
273 | })
274 | .then(function () {
275 | expect().fail('this should not happen');
276 | })
277 | .catch(function ({ error, result, response }) {
278 | console.log('error:', error);
279 | console.log('result:', result);
280 | expect(error).to.be.eql(true);
281 | expect(result.success).to.be.eql(false);
282 | expect(result.requestUrl).to.be.eql('https://www.wikipedia.org/');
283 | expect(result.error).to.eql('Invalid URL');
284 | expect(result.errorDetails.toString()).to.eql('Error: Invalid URL');
285 | expect(result).to.have.all.keys(
286 | 'error',
287 | 'errorDetails',
288 | 'requestUrl',
289 | 'success',
290 | );
291 | expect(response).to.eql(undefined);
292 | });
293 | });
294 |
295 | it('url is to a pdf', function () {
296 | return ogs({ url: 'test.pdf?123' })
297 | .then(function () {
298 | expect().fail('this should not happen');
299 | })
300 | .catch(function ({ error, result, response }) {
301 | console.log('error:', error);
302 | console.log('result:', result);
303 | expect(error).to.be.eql(true);
304 | expect(result.success).to.be.eql(false);
305 | expect(result.requestUrl).to.be.eql('test.pdf?123');
306 | expect(result.error).to.eql('Must scrape an HTML page');
307 | expect(result.errorDetails.toString()).to.eql('Error: Must scrape an HTML page');
308 | expect(result).to.have.all.keys(
309 | 'error',
310 | 'errorDetails',
311 | 'requestUrl',
312 | 'success',
313 | );
314 | expect(response).to.eql(undefined);
315 | });
316 | });
317 | });
318 |
--------------------------------------------------------------------------------
/tests/integration/video.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import ogs from '../../index';
4 |
5 | // TODO: youtube is blocking requests from github, will need to find a way around this
6 | describe.skip('video', function () {
7 | it('Test Youtube Video - Should Return correct Open Graph Info', function () {
8 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36';
9 | return ogs({ url: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', fetchOptions: { headers: { 'user-agent': userAgent } } }).then(function ({ error, result, response }) {
10 | console.log('error:', error);
11 | console.log('result:', result);
12 | expect(error).to.be.eql(false);
13 | expect(result.alAndroidAppName).to.be.eql('YouTube');
14 | expect(result.alAndroidPackage).to.be.eql('com.google.android.youtube');
15 | expect(result.alAndroidUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks');
16 | expect(result.alIosAppName).to.be.eql('YouTube');
17 | expect(result.alIosAppStoreId).to.be.eql('544007664');
18 | expect(result.alIosUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks');
19 | expect(result.alWebUrl).to.be.oneOf(['https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks', 'http://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks']);
20 | expect(result.ogSiteName).to.be.eql('YouTube');
21 | expect(result.ogUrl).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ');
22 | expect(result.ogTitle).to.be.eql('Rick Astley - Never Gonna Give You Up (Official Music Video)');
23 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty;
24 | expect(result.ogType).to.be.eql('video.other');
25 | expect(result.ogLocale).to.be.oneOf(['en', 'en-US', 'nl-NL']);
26 | expect(result.twitterCard).to.be.eql('player');
27 | expect(result.twitterSite).to.be.eql('@youtube');
28 | expect(result.twitterTitle).to.be.eql('Rick Astley - Never Gonna Give You Up (Official Music Video)');
29 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty;
30 | expect(result.twitterAppNameiPhone).to.be.eql('YouTube');
31 | expect(result.twitterAppIdiPhone).to.be.eql('544007664');
32 | expect(result.twitterAppNameiPad).to.be.eql('YouTube');
33 | expect(result.twitterAppIdiPad).to.be.eql('544007664');
34 | expect(result.twitterUrl).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ');
35 | expect(result.ogDate).to.be.eql('2009-10-24T23:57:33-07:00');
36 | expect(result.twitterAppUrliPhone).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks');
37 | expect(result.twitterAppUrliPad).to.be.eql('vnd.youtube://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=applinks');
38 | expect(result.twitterAppNameGooglePlay).to.be.eql('YouTube');
39 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.google.android.youtube');
40 | expect(result.twitterAppUrlGooglePlay).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ');
41 | expect(result.ogImage).to.be.eql([{
42 | url: 'https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg',
43 | width: '1280',
44 | height: '720',
45 | type: 'jpg',
46 | }]);
47 | expect(result.ogVideo).to.be.eql([{
48 | url: 'https://www.youtube.com/embed/dQw4w9WgXcQ',
49 | width: '1280',
50 | height: '720',
51 | type: 'text/html',
52 | }]);
53 | expect(result.twitterImage).to.be.eql([{
54 | url: 'https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg',
55 | }]);
56 | expect(result.twitterPlayer).to.be.eql([{
57 | url: 'https://www.youtube.com/embed/dQw4w9WgXcQ',
58 | width: '1280',
59 | height: '720',
60 | }]);
61 | expect(result.ogVideoTag).to.be.eql('never gonna give you up karaoke');
62 | expect(result.ogVideoSecureURL).to.be.eql('https://www.youtube.com/embed/dQw4w9WgXcQ');
63 | expect(result.requestUrl).to.be.eql('https://www.youtube.com/watch?v=dQw4w9WgXcQ');
64 | expect(result.charset).to.be.eql('UTF-8');
65 | expect(result.success).to.be.eql(true);
66 | expect(result.fbAppId).to.be.eql('87741124305');
67 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
68 | if (result.ogDate === undefined) result.ogDate = 'hack because sometimes this does not come back for some reason';
69 | expect(result).to.have.all.keys(
70 | 'favicon',
71 | 'fbAppId',
72 | 'jsonLD',
73 | 'alAndroidAppName',
74 | 'alAndroidPackage',
75 | 'alAndroidUrl',
76 | 'alIosAppName',
77 | 'alIosAppStoreId',
78 | 'alIosUrl',
79 | 'alWebUrl',
80 | 'ogDate',
81 | 'ogDescription',
82 | 'ogImage',
83 | 'ogLocale',
84 | 'ogSiteName',
85 | 'ogTitle',
86 | 'ogType',
87 | 'ogUrl',
88 | 'ogVideo',
89 | 'ogVideoTag',
90 | 'ogVideoSecureURL',
91 | 'requestUrl',
92 | 'success',
93 | 'charset',
94 | 'twitterAppIdGooglePlay',
95 | 'twitterAppIdiPad',
96 | 'twitterAppIdiPhone',
97 | 'twitterAppNameGooglePlay',
98 | 'twitterAppNameiPad',
99 | 'twitterAppNameiPhone',
100 | 'twitterAppUrlGooglePlay',
101 | 'twitterAppUrliPad',
102 | 'twitterAppUrliPhone',
103 | 'twitterCard',
104 | 'twitterDescription',
105 | 'twitterImage',
106 | 'twitterPlayer',
107 | 'twitterSite',
108 | 'twitterTitle',
109 | 'twitterUrl',
110 | );
111 | expect(response).to.be.an('Response');
112 | });
113 | });
114 |
115 | it('Test Youtube Video with bad escape sequence - Should Return correct Open Graph Info', function () {
116 | const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36';
117 | return ogs({ url: 'https://www.youtube.com/watch?v=nFbKMg4E3JM', fetchOptions: { headers: { 'user-agent': userAgent } } }).then(function ({ error, result, response }) {
118 | console.log('error:', error);
119 | console.log('result:', result);
120 | expect(error).to.be.eql(false);
121 | expect(result.alAndroidAppName).to.be.eql('YouTube');
122 | expect(result.alAndroidPackage).to.be.eql('com.google.android.youtube');
123 | expect(result.alAndroidUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks');
124 | expect(result.alIosAppName).to.be.eql('YouTube');
125 | expect(result.alIosAppStoreId).to.be.eql('544007664');
126 | expect(result.alIosUrl).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks');
127 | expect(result.alWebUrl).to.be.oneOf(['https://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks', 'http://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks']);
128 | expect(result.ogSiteName).to.be.eql('YouTube');
129 | expect(result.ogUrl).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM');
130 | expect(result.ogTitle).to.be.eql('Force Class 10 in One Shot (Full Chapter) | ICSE 10 Physics Chapter 1 - Abhishek Sir |Vedantu 9 & 10');
131 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty;
132 | expect(result.ogType).to.be.eql('video.other');
133 | expect(result.ogLocale).to.be.oneOf(['en', 'en-US', 'nl-NL']);
134 | expect(result.twitterCard).to.be.eql('player');
135 | expect(result.twitterSite).to.be.eql('@youtube');
136 | expect(result.twitterTitle).to.be.eql('Force Class 10 in One Shot (Full Chapter) | ICSE 10 Physics Chapter 1 - Abhishek Sir |Vedantu 9 & 10');
137 | expect(result.twitterDescription).to.be.an('string').and.to.not.be.empty;
138 | expect(result.twitterAppNameiPhone).to.be.eql('YouTube');
139 | expect(result.twitterAppIdiPhone).to.be.eql('544007664');
140 | expect(result.twitterAppNameiPad).to.be.eql('YouTube');
141 | expect(result.twitterAppIdiPad).to.be.eql('544007664');
142 | expect(result.twitterUrl).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM');
143 | expect(result.ogDate).to.be.eql('2021-06-11T09:14:37-07:00');
144 | expect(result.twitterAppUrliPhone).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks');
145 | expect(result.twitterAppUrliPad).to.be.eql('vnd.youtube://www.youtube.com/watch?v=nFbKMg4E3JM&feature=applinks');
146 | expect(result.twitterAppNameGooglePlay).to.be.eql('YouTube');
147 | expect(result.twitterAppIdGooglePlay).to.be.eql('com.google.android.youtube');
148 | expect(result.twitterAppUrlGooglePlay).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM');
149 | expect(result.ogImage).to.be.eql([{
150 | url: 'https://i.ytimg.com/vi/nFbKMg4E3JM/maxresdefault.jpg',
151 | width: '1280',
152 | height: '720',
153 | type: 'jpg',
154 | }]);
155 | expect(result.ogVideo).to.be.eql([{
156 | url: 'https://www.youtube.com/embed/nFbKMg4E3JM',
157 | width: '1280',
158 | height: '720',
159 | type: 'text/html',
160 | }]);
161 | expect(result.twitterImage).to.be.eql([{
162 | url: 'https://i.ytimg.com/vi/nFbKMg4E3JM/maxresdefault.jpg',
163 | }]);
164 | expect(result.twitterPlayer).to.be.eql([{
165 | url: 'https://www.youtube.com/embed/nFbKMg4E3JM',
166 | width: '1280',
167 | height: '720',
168 | }]);
169 | expect(result.ogVideoTag).to.be.eql('vedantu');
170 | expect(result.ogVideoSecureURL).to.be.eql('https://www.youtube.com/embed/nFbKMg4E3JM');
171 | expect(result.requestUrl).to.be.eql('https://www.youtube.com/watch?v=nFbKMg4E3JM');
172 | expect(result.charset).to.be.eql('UTF-8');
173 | expect(result.success).to.be.eql(true);
174 | expect(result.fbAppId).to.be.eql('87741124305');
175 | expect(result.jsonLD).to.be.an('array').and.to.not.be.empty;
176 | if (result.ogDate === undefined) result.ogDate = 'hack because sometimes this does not come back for some reason';
177 | expect(result).to.have.all.keys(
178 | 'favicon',
179 | 'fbAppId',
180 | 'jsonLD',
181 | 'alAndroidAppName',
182 | 'alAndroidPackage',
183 | 'alAndroidUrl',
184 | 'alIosAppName',
185 | 'alIosAppStoreId',
186 | 'alIosUrl',
187 | 'alWebUrl',
188 | 'ogDate',
189 | 'ogDescription',
190 | 'ogImage',
191 | 'ogLocale',
192 | 'ogSiteName',
193 | 'ogTitle',
194 | 'ogType',
195 | 'ogUrl',
196 | 'ogVideo',
197 | 'ogVideoTag',
198 | 'ogVideoSecureURL',
199 | 'requestUrl',
200 | 'success',
201 | 'charset',
202 | 'twitterAppIdGooglePlay',
203 | 'twitterAppIdiPad',
204 | 'twitterAppIdiPhone',
205 | 'twitterAppNameGooglePlay',
206 | 'twitterAppNameiPad',
207 | 'twitterAppNameiPhone',
208 | 'twitterAppUrlGooglePlay',
209 | 'twitterAppUrliPad',
210 | 'twitterAppUrliPhone',
211 | 'twitterCard',
212 | 'twitterDescription',
213 | 'twitterImage',
214 | 'twitterPlayer',
215 | 'twitterSite',
216 | 'twitterTitle',
217 | 'twitterUrl',
218 | );
219 | expect(response).to.be.an('Response');
220 | });
221 | });
222 |
223 | it('Test Twitch.tv Video - Should Return correct Open Graph Info', function () {
224 | return ogs({ url: 'https://jshemas.github.io/openGraphScraperPages/twitch.html' }).then(function ({ error, result, response }) {
225 | console.log('error:', error);
226 | console.log('result:', result);
227 | expect(error).to.be.eql(false);
228 | expect(result.ogSiteName).to.be.eql('Twitch');
229 | expect(result.twitterSite).to.be.eql('@twitch');
230 | expect(result.ogLocale).to.be.eql('en-US');
231 | expect(result.ogTitle).to.be.oneOf(['Twitch', 'AI Soundscapes, Trials of Mana', 'AI Soundscapes, Trials of Mana - Vinesauce on Twitch']);
232 | expect(result.ogDescription).to.be.an('string').and.to.not.be.empty;
233 | expect(result.ogUrl).to.be.eql('https://www.twitch.tv/videos/632214184');
234 | expect(result.favicon).to.be.eql('https://static.twitchcdn.net/assets/favicon-32-d6025c14e900565d6177.png');
235 | expect(result.ogType).to.be.oneOf(['website', 'video.other']);
236 | expect(result.ogImage).to.be.to.be.an('array').and.to.not.be.empty;
237 | expect(result.requestUrl).to.be.eql('https://jshemas.github.io/openGraphScraperPages/twitch.html');
238 | expect(result.charset).to.be.eql('utf-8');
239 | expect(result.fbAppId).to.be.eql('161273083968709');
240 | expect(result.success).to.be.eql(true);
241 | expect(result).to.have.all.keys(
242 | 'favicon',
243 | 'fbAppId',
244 | 'ogDescription',
245 | 'ogImage',
246 | 'ogLocale',
247 | 'ogSiteName',
248 | 'ogTitle',
249 | 'ogType',
250 | 'ogUrl',
251 | 'requestUrl',
252 | 'success',
253 | 'charset',
254 | 'twitterSite',
255 | );
256 | expect(response).to.be.an('Response');
257 | });
258 | });
259 | });
260 |
--------------------------------------------------------------------------------
/tests/unit/media.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | import { mediaSetup } from '../../lib/media';
4 |
5 | describe('media', function () {
6 | it('has images and twitter images', function () {
7 | const ogMedia = mediaSetup({
8 | ogImageProperty: ['http://test.com/logo.png'],
9 | ogImageType: ['image/png'],
10 | ogImageWidth: ['300'],
11 | ogImageHeight: ['300'],
12 | twitterImageProperty: ['http://test.com/logo.png'],
13 | twitterImageAlt: ['image/png'],
14 | twitterImageWidth: ['300'],
15 | twitterImageHeight: ['300'],
16 | });
17 |
18 | expect(ogMedia.ogImage).to.eql([{
19 | url: 'http://test.com/logo.png',
20 | width: '300',
21 | height: '300',
22 | type: 'image/png',
23 | }]);
24 |
25 | expect(ogMedia.twitterImage).to.eql([{
26 | url: 'http://test.com/logo.png',
27 | width: '300',
28 | height: '300',
29 | alt: 'image/png',
30 | }]);
31 | });
32 |
33 | it('has twitter images but falls back to twitterImageSrc', function () {
34 | const ogMedia = mediaSetup({
35 | twitterImageSrc: ['http://test.com/logoTwo.png'],
36 | twitterImageAlt: ['image/png'],
37 | twitterImageWidth: ['300'],
38 | twitterImageHeight: ['300'],
39 | });
40 |
41 | expect(ogMedia.twitterImage).to.eql([{
42 | url: 'http://test.com/logoTwo.png',
43 | width: '300',
44 | height: '300',
45 | alt: 'image/png',
46 | }]);
47 | });
48 |
49 | it('has image/twitterImage but no type/height/width', function () {
50 | const ogMedia = mediaSetup({
51 | ogImageProperty: ['http://test.com/logo.png'],
52 | twitterImageProperty: ['http://test.com/logo.png'],
53 | });
54 |
55 | expect(ogMedia.ogImage).to.eql([{
56 | url: 'http://test.com/logo.png',
57 | }]);
58 |
59 | expect(ogMedia.twitterImage).to.eql([{
60 | url: 'http://test.com/logo.png',
61 | }]);
62 | });
63 |
64 | it('has images and twitter images without property', function () {
65 | const ogMedia = mediaSetup({
66 | ogImageType: ['image/png'],
67 | ogImageWidth: ['300'],
68 | ogImageHeight: ['300'],
69 | twitterImageAlt: ['image/png'],
70 | twitterImageWidth: ['300'],
71 | twitterImageHeight: ['300'],
72 | });
73 |
74 | expect(ogMedia).to.eql({});
75 | });
76 |
77 | it('has image with ogImageSecureURL/ogImageURL/ogImageProperty', function () {
78 | const ogMedia = mediaSetup({
79 | ogImageSecureURL: ['https://test.com/logo.png'],
80 | ogImageURL: ['http://test.com/logoTwo.png'],
81 | ogImageProperty: ['http://test.com/logo.png'],
82 | ogImageType: ['image/png'],
83 | ogImageWidth: ['300'],
84 | ogImageHeight: ['300'],
85 | });
86 |
87 | expect(ogMedia.ogImage).to.eql([{
88 | url: 'https://test.com/logo.png',
89 | width: '300',
90 | height: '300',
91 | type: 'image/png',
92 | }]);
93 | });
94 |
95 | it('has image with ogImageURL/ogImageProperty', function () {
96 | const ogMedia = mediaSetup({
97 | ogImageURL: ['http://test.com/logoTwo.png'],
98 | ogImageProperty: ['http://test.com/logo.png'],
99 | ogImageType: ['image/png'],
100 | ogImageWidth: ['300'],
101 | ogImageHeight: ['300'],
102 | });
103 |
104 | expect(ogMedia.ogImage).to.eql([{
105 | url: 'http://test.com/logo.png',
106 | width: '300',
107 | height: '300',
108 | type: 'image/png',
109 | }]);
110 | });
111 |
112 | it('has image with ogImageURL', function () {
113 | const ogMedia = mediaSetup({
114 | ogImageURL: ['http://test.com/logoTwo.png'],
115 | ogImageType: ['image/png'],
116 | ogImageWidth: ['300'],
117 | ogImageHeight: ['300'],
118 | });
119 |
120 | expect(ogMedia.ogImage).to.eql([{
121 | url: 'http://test.com/logoTwo.png',
122 | width: '300',
123 | height: '300',
124 | type: 'image/png',
125 | }]);
126 | });
127 |
128 | it('has many images and twitter images', function () {
129 | const ogMedia = mediaSetup({
130 | ogImageProperty: ['http://test.com/logo_one.png', 'http://test.com/logo_two.png', 'http://test.com/logo_three.png', ''],
131 | ogImageType: ['image/png', 'image/png', 'image/png'],
132 | ogImageWidth: ['300'],
133 | ogImageHeight: ['300'],
134 | twitterImageProperty: ['http://test.com/logo_one.png', 'http://test.com/logo_two.png', 'http://test.com/logo_three.png', ''],
135 | twitterImageAlt: ['image/png', 'image/png', 'image/png'],
136 | twitterImageWidth: ['300'],
137 | twitterImageHeight: ['300'],
138 | });
139 |
140 | expect(ogMedia.ogImage).to.eql([{
141 | url: 'http://test.com/logo_one.png',
142 | width: '300',
143 | height: '300',
144 | type: 'image/png',
145 | }, {
146 | url: 'http://test.com/logo_two.png',
147 | type: 'image/png',
148 | }, {
149 | url: 'http://test.com/logo_three.png',
150 | type: 'image/png',
151 | }]);
152 |
153 | expect(ogMedia.twitterImage).to.eql([{
154 | url: 'http://test.com/logo_one.png',
155 | width: '300',
156 | height: '300',
157 | alt: 'image/png',
158 | }, {
159 | url: 'http://test.com/logo_two.png',
160 | alt: 'image/png',
161 | }, {
162 | url: 'http://test.com/logo_three.png',
163 | alt: 'image/png',
164 | }]);
165 | });
166 |
167 | it('has a .gif images and twitter images', function () {
168 | const ogMedia = mediaSetup({
169 | ogImageProperty: ['http://test.com/logo_one.png', 'http://test.com/logo_two.gif'],
170 | ogImageType: ['image/png', 'image/gif'],
171 | ogImageWidth: ['300', '600'],
172 | ogImageHeight: ['300', '600'],
173 | twitterImageProperty: ['http://test.com/logo_two.gif', 'http://test.com/logo_one.png'],
174 | twitterImageAlt: ['image/gif', 'image/png'],
175 | twitterImageWidth: ['300', '600'],
176 | twitterImageHeight: ['300', '600'],
177 | });
178 |
179 | expect(ogMedia.ogImage).to.eql([{
180 | url: 'http://test.com/logo_two.gif',
181 | type: 'image/gif',
182 | width: '600',
183 | height: '600',
184 | }, {
185 | url: 'http://test.com/logo_one.png',
186 | type: 'image/png',
187 | width: '300',
188 | height: '300',
189 | }]);
190 |
191 | expect(ogMedia.twitterImage).to.eql([{
192 | url: 'http://test.com/logo_two.gif',
193 | alt: 'image/gif',
194 | width: '300',
195 | height: '300',
196 | }, {
197 | url: 'http://test.com/logo_one.png',
198 | alt: 'image/png',
199 | width: '600',
200 | height: '600',
201 | }]);
202 | });
203 |
204 | it('has no image or video', function () {
205 | const ogMedia = mediaSetup({
206 | ogTitle: 'test site',
207 | ogType: 'website',
208 | ogUrl: 'http://test.com/',
209 | ogDescription: 'stuff',
210 | });
211 |
212 | expect(ogMedia.ogImage).to.eql(undefined);
213 | expect(ogMedia.twitterImage).to.eql(undefined);
214 | expect(ogMedia.ogVideo).to.eql(undefined);
215 | expect(ogMedia.twitterPlayer).to.eql(undefined);
216 | });
217 |
218 | it('has video and twitter video', function () {
219 | const ogMedia = mediaSetup({
220 | ogVideoProperty: ['http://test.com/logo.png'],
221 | ogVideoType: ['image/png'],
222 | ogVideoWidth: ['300'],
223 | ogVideoHeight: ['300'],
224 | twitterPlayerProperty: ['http://test.com/logo.png'],
225 | twitterPlayerStream: ['image/png'],
226 | twitterPlayerWidth: ['300'],
227 | twitterPlayerHeight: ['300'],
228 | });
229 |
230 | expect(ogMedia.ogVideo).to.eql([{
231 | url: 'http://test.com/logo.png',
232 | width: '300',
233 | height: '300',
234 | type: 'image/png',
235 | }]);
236 |
237 | expect(ogMedia.twitterPlayer).to.eql([{
238 | url: 'http://test.com/logo.png',
239 | width: '300',
240 | height: '300',
241 | stream: 'image/png',
242 | }]);
243 | });
244 |
245 | it('has video/twitterVideo but no type/width/height', function () {
246 | const ogMedia = mediaSetup({
247 | ogVideoProperty: ['http://test.com/logo.png'],
248 | twitterPlayerProperty: ['http://test.com/logo.png'],
249 | });
250 |
251 | expect(ogMedia.ogVideo).to.eql([{
252 | url: 'http://test.com/logo.png',
253 | }]);
254 |
255 | expect(ogMedia.twitterPlayer).to.eql([{
256 | url: 'http://test.com/logo.png',
257 | }]);
258 | });
259 |
260 | it('has video and twitter video but with no property', function () {
261 | const ogMedia = mediaSetup({
262 | ogVideoType: ['image/png'],
263 | ogVideoWidth: ['300'],
264 | ogVideoHeight: ['300'],
265 | twitterPlayerStream: ['image/png'],
266 | twitterPlayerWidth: ['300'],
267 | twitterPlayerHeight: ['300'],
268 | });
269 |
270 | expect(ogMedia).to.eql({});
271 | });
272 |
273 | it('has music:song', function () {
274 | const ogMedia = mediaSetup({
275 | musicSongProperty: ['http://test.com/songurl'],
276 | musicSongTrack: ['1'],
277 | musicSongDisc: ['1'],
278 | });
279 |
280 | expect(ogMedia.musicSong).to.eql([{
281 | url: 'http://test.com/songurl',
282 | track: '1',
283 | disc: '1',
284 | }]);
285 | });
286 |
287 | it('has music:song but falls back to musicSongUrl', function () {
288 | const ogMedia = mediaSetup({
289 | musicSongUrl: ['http://test.com/songurlTwo'],
290 | musicSongTrack: ['1'],
291 | musicSongDisc: ['1'],
292 | });
293 |
294 | expect(ogMedia.musicSong).to.eql([{
295 | url: 'http://test.com/songurlTwo',
296 | track: '1',
297 | disc: '1',
298 | }]);
299 | });
300 |
301 | it('has music:song but no track/disc', function () {
302 | const ogMedia = mediaSetup({
303 | musicSongProperty: ['http://test.com/songurl'],
304 | });
305 |
306 | expect(ogMedia.musicSong).to.eql([{
307 | url: 'http://test.com/songurl',
308 | }]);
309 | });
310 |
311 | it('has multiple music:songs', function () {
312 | const ogMedia = mediaSetup({
313 | musicSongProperty: ['http://test.com/songurl', 'http://test.com/songurl3', 'http://test.com/songurl2', ''],
314 | musicSongTrack: ['1', '2', '4', ''],
315 | musicSongDisc: ['1', '2', '1', ''],
316 | });
317 |
318 | expect(ogMedia.musicSong).to.eql([{
319 | url: 'http://test.com/songurl',
320 | track: '1',
321 | disc: '1',
322 | },
323 | {
324 | url: 'http://test.com/songurl2',
325 | track: '4',
326 | disc: '1',
327 | },
328 | {
329 | url: 'http://test.com/songurl3',
330 | track: '2',
331 | disc: '2',
332 | }]);
333 | });
334 |
335 | it('has ogImageProperty/twitterImage/ogVideo/twitterPlayer', function () {
336 | const ogMedia = mediaSetup({
337 | ogImageProperty: ['http://test.com/logo.png'],
338 | ogImageType: ['image/png'],
339 | ogImageWidth: ['300'],
340 | ogImageHeight: ['300'],
341 | twitterImageProperty: ['http://test.com/logo.png'],
342 | twitterImageAlt: ['image/png'],
343 | twitterImageWidth: ['300'],
344 | twitterImageHeight: ['300'],
345 | ogVideoProperty: ['http://test.com/logo.png'],
346 | ogVideoType: ['image/png'],
347 | ogVideoWidth: ['300'],
348 | ogVideoHeight: ['300'],
349 | twitterPlayerProperty: ['http://test.com/logo.png'],
350 | twitterPlayerStream: ['image/png'],
351 | twitterPlayerWidth: ['300'],
352 | twitterPlayerHeight: ['300'],
353 | });
354 |
355 | expect(ogMedia.ogImage).to.eql([{
356 | url: 'http://test.com/logo.png',
357 | width: '300',
358 | height: '300',
359 | type: 'image/png',
360 | }]);
361 |
362 | expect(ogMedia.twitterImage).to.eql([{
363 | url: 'http://test.com/logo.png',
364 | width: '300',
365 | height: '300',
366 | alt: 'image/png',
367 | }]);
368 |
369 | expect(ogMedia.ogVideo).to.eql([{
370 | url: 'http://test.com/logo.png',
371 | width: '300',
372 | height: '300',
373 | type: 'image/png',
374 | }]);
375 |
376 | expect(ogMedia.twitterPlayer).to.eql([{
377 | url: 'http://test.com/logo.png',
378 | width: '300',
379 | height: '300',
380 | stream: 'image/png',
381 | }]);
382 | });
383 |
384 | it('has more then 10 images', function () {
385 | const ogMedia = mediaSetup({
386 | ogImageProperty: ['http://test.com/logo1.png', 'http://test.com/logo2.png', 'http://test.com/logo3.png', 'http://test.com/logo4.png', 'http://test.com/logo5.png', 'http://test.com/logo6.png', 'http://test.com/logo7.png', 'http://test.com/logo8.png', 'http://test.com/logo9.png', 'http://test.com/logo10.png', 'http://test.com/logo11.png'],
387 | ogImageType: ['image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png', 'image/png'],
388 | ogImageWidth: ['300', '300', '300', '300', '300', '300', '300', '300', '300', '300', '300'],
389 | ogImageHeight: ['300', '300', '300', '300', '300', '300', '300', '300', '300', '300', '300'],
390 | });
391 |
392 | expect(ogMedia.ogImage).to.eql([
393 | {
394 | url: 'http://test.com/logo1.png',
395 | width: '300',
396 | height: '300',
397 | type: 'image/png',
398 | },
399 | {
400 | url: 'http://test.com/logo2.png',
401 | width: '300',
402 | height: '300',
403 | type: 'image/png',
404 | },
405 | {
406 | url: 'http://test.com/logo3.png',
407 | width: '300',
408 | height: '300',
409 | type: 'image/png',
410 | },
411 | {
412 | url: 'http://test.com/logo4.png',
413 | width: '300',
414 | height: '300',
415 | type: 'image/png',
416 | },
417 | {
418 | url: 'http://test.com/logo5.png',
419 | width: '300',
420 | height: '300',
421 | type: 'image/png',
422 | },
423 | {
424 | url: 'http://test.com/logo6.png',
425 | width: '300',
426 | height: '300',
427 | type: 'image/png',
428 | },
429 | {
430 | url: 'http://test.com/logo7.png',
431 | width: '300',
432 | height: '300',
433 | type: 'image/png',
434 | },
435 | {
436 | url: 'http://test.com/logo8.png',
437 | width: '300',
438 | height: '300',
439 | type: 'image/png',
440 | },
441 | {
442 | url: 'http://test.com/logo9.png',
443 | width: '300',
444 | height: '300',
445 | type: 'image/png',
446 | },
447 | {
448 | url: 'http://test.com/logo10.png',
449 | width: '300',
450 | height: '300',
451 | type: 'image/png',
452 | },
453 | ]);
454 | });
455 | });
456 |
--------------------------------------------------------------------------------
/tests/unit/utils.spec.ts:
--------------------------------------------------------------------------------
1 | import { expect } from 'chai';
2 |
3 | /* eslint-disable mocha/no-setup-in-describe */
4 | import {
5 | findImageTypeFromUrl,
6 | isCustomMetaTagsValid,
7 | isImageTypeValid,
8 | isThisANonHTMLUrl,
9 | optionSetup,
10 | removeNestedUndefinedValues,
11 | unescapeScriptText,
12 | validateAndFormatURL,
13 | } from '../../lib/utils';
14 |
15 | const validateUrl = (urls, valid, message, urlValidatorSettings) => {
16 | for (let index = 0; index < urls.length; index += 1) {
17 | // eslint-disable-next-line no-loop-func
18 | it(`${urls[index]} ${message}`, function () {
19 | const formattedUrl = validateAndFormatURL(urls[index], urlValidatorSettings);
20 | if (valid) {
21 | return expect(formattedUrl.url).to.not.be.eql(null);
22 | }
23 | return expect(formattedUrl.url).to.be.eql(null);
24 | });
25 | }
26 | };
27 |
28 | describe('utils', function () {
29 | describe('validateAndFormatURL', function () {
30 | context('validing URLs', function () {
31 | const defaultUrlValidatorSettings = {
32 | allow_fragments: true,
33 | allow_protocol_relative_urls: false,
34 | allow_query_components: true,
35 | allow_trailing_dot: false,
36 | allow_underscores: false,
37 | protocols: ['http', 'https'],
38 | require_host: true,
39 | require_port: false,
40 | require_protocol: false,
41 | require_tld: true,
42 | require_valid_protocol: true,
43 | validate_length: true,
44 | };
45 |
46 | validateUrl([
47 | 'foobar.com',
48 | 'foobar.com/',
49 | 'http://[::192.9.5.5]/ipng',
50 | 'http://[::FFFF:129.144.52.38]:80/index.html',
51 | 'http://[1080::8:800:200C:417A]/foo',
52 | 'http://[1080:0:0:0:8:800:200C:417A]/index.html',
53 | 'http://[2010:836B:4179::836B:4179]',
54 | 'http://[3ffe:2a00:100:7031::1]',
55 | 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html',
56 | 'http://10.0.0.0/',
57 | 'http://127.0.0.1/',
58 | 'http://189.123.14.13/',
59 | 'http://duckduckgo.com/?q=%2F',
60 | 'http://example.com/example.json#/foo/bar',
61 | 'http://foo--bar.com',
62 | 'http://foobar.com?foo=bar',
63 | 'http://foobar.com/?foo=bar#baz=qux',
64 | 'http://foobar.com/t$-_.+!*\'(),',
65 | 'http://foobar.com#baz=qux',
66 | 'http://høyfjellet.no',
67 | 'http://user:@www.foobar.com/',
68 | 'http://user:pass@www.foobar.com/',
69 | 'http://www.foobar.com:23/',
70 | 'http://www.foobar.com:5/',
71 | 'http://www.foobar.com:65535/',
72 | 'http://www.foobar.com/',
73 | 'HTTP://WWW.FOOBAR.COM/',
74 | 'http://www.foobar.com/~foobar',
75 | 'http://www.xn--froschgrn-x9a.net/',
76 | 'http://xn------eddceddeftq7bvv7c4ke4c.xn--p1ai',
77 | 'http://xn--froschgrn-x9a.com/',
78 | 'http://xn--j1aac5a4g.xn--j1amh',
79 | 'http://кулік.укр',
80 | 'https://www.foobar.com/',
81 | 'https://www.foobar.com/',
82 | 'HTTPS://WWW.FOOBAR.COM/',
83 | 'test.com?ref=http://test2.com',
84 | 'valid.au',
85 | 'www.foobar.com',
86 | ], true, 'should be valid', defaultUrlValidatorSettings);
87 |
88 | validateUrl([
89 | '!.foo.com',
90 | '.com',
91 | '',
92 | '*.foo.com',
93 | '////foobar.com',
94 | '//foobar.com',
95 | 'ftp://www.foobar.com/',
96 | 'http://*.foo.com',
97 | 'http:////foobar.com',
98 | 'http://\n@www.foobar.com/',
99 | 'http://300.0.0.1/',
100 | 'http://com/',
101 | 'http://example.com.',
102 | 'http://foobar/ lol/',
103 | 'http://foobar/? lol',
104 | 'http://foobar/# lol',
105 | 'http://localhost:3000/',
106 | 'http://localhost:61500this is an invalid url!!!!',
107 | 'http://lol @foobar.com/',
108 | 'http://lol: @foobar.com/',
109 | 'http://lol:lol @foobar.com/',
110 | 'http://lol:lol:lol@foobar.com/',
111 | 'http://www.-foobar.com/',
112 | 'http://www.foo_bar.com/',
113 | 'http://www.foobar-.com/',
114 | 'http://www.foobar.com:0/',
115 | 'http://www.foobar.com:70000/',
116 | 'http://www.foobar.com:99999/',
117 | 'http://www.foobar.com/\t',
118 | 'http://www.xn--.com/',
119 | 'http://xn--.com/',
120 | 'https://example.com/foo//',
121 | 'invalid.',
122 | 'invalid.x',
123 | 'invalid/',
124 | 'mailto:foo@bar.com',
125 | 'rtmp://foobar.com',
126 | 'xyz://foobar.com',
127 | `http://foobar.com/${new Array(2083).join('f')}`,
128 | ], false, 'should be invalid', defaultUrlValidatorSettings);
129 | });
130 |
131 | context('validing URLs with options.urlValidatorSettings (https is invalid)', function () {
132 | const noHTTPSUrlValidatorSettings = {
133 | allow_fragments: true,
134 | allow_protocol_relative_urls: false,
135 | allow_query_components: true,
136 | allow_trailing_dot: false,
137 | allow_underscores: false,
138 | protocols: ['http'],
139 | require_host: true,
140 | require_port: false,
141 | require_protocol: false,
142 | require_tld: true,
143 | require_valid_protocol: true,
144 | validate_length: true,
145 | };
146 |
147 | validateUrl([
148 | 'http://www.foobar.com/',
149 | 'http://www.foobar.com/',
150 | 'HTTP://WWW.FOOBAR.COM/',
151 | ], true, 'should be valid', noHTTPSUrlValidatorSettings);
152 |
153 | validateUrl([
154 | 'https://www.foobar.com/',
155 | 'https://www.foobar.com/',
156 | 'HTTPS://WWW.FOOBAR.COM/',
157 | ], false, 'should be invalid', noHTTPSUrlValidatorSettings);
158 | });
159 | });
160 |
161 | describe('findImageTypeFromUrl', function () {
162 | it('foobar.com/image.png?test=true', function () {
163 | const type = findImageTypeFromUrl('foobar.com/image.png?test=true');
164 | expect(type).to.eql('png');
165 | });
166 |
167 | it('foobar.com/image.png', function () {
168 | const type = findImageTypeFromUrl('foobar.com/image.png');
169 | expect(type).to.eql('png');
170 | });
171 |
172 | it('image.png', function () {
173 | const type = findImageTypeFromUrl('image.png');
174 | expect(type).to.eql('png');
175 | });
176 |
177 | it('image', function () {
178 | const type = findImageTypeFromUrl('image');
179 | expect(type).to.eql('image');
180 | });
181 |
182 | it('empty string', function () {
183 | const type = findImageTypeFromUrl('');
184 | expect(type).to.eql('');
185 | });
186 | });
187 |
188 | describe('isImageTypeValid', function () {
189 | it('when type is png', function () {
190 | const valid = isImageTypeValid('png');
191 | expect(valid).to.eql(true);
192 | });
193 |
194 | it('when type is foo', function () {
195 | const valid = isImageTypeValid('foo');
196 | expect(valid).to.eql(false);
197 | });
198 | });
199 |
200 | describe('isThisANonHTMLUrl', function () {
201 | it('when url is type .png', function () {
202 | const valid = isThisANonHTMLUrl('www.foo.com/bar.png');
203 | expect(valid).to.eql(true);
204 | });
205 |
206 | it('when url is type .html', function () {
207 | const valid = isThisANonHTMLUrl('www.foo.com/bar.html');
208 | expect(valid).to.eql(false);
209 | });
210 |
211 | it('when url is type .pdf and has params', function () {
212 | const valid = isThisANonHTMLUrl('www.foo.com/bar.pdf?123');
213 | expect(valid).to.eql(true);
214 | });
215 |
216 | it('when domain in url contains a non HTML string (.txt)', function () {
217 | const valid = isThisANonHTMLUrl('www.txt.com/bar.html');
218 | expect(valid).to.eql(false);
219 | });
220 |
221 | it('when domain in url contains a non HTML string (.mov) no extension on path', function () {
222 | const valid = isThisANonHTMLUrl('www.mov.com/bar');
223 | expect(valid).to.eql(false);
224 | });
225 | });
226 |
227 | describe('removeNestedUndefinedValues', function () {
228 | it('when there is no undef values', function () {
229 | const object = removeNestedUndefinedValues({ one: 1 });
230 | expect(object).to.eql({ one: 1 });
231 | });
232 |
233 | it('when there is undef values', function () {
234 | const object = removeNestedUndefinedValues({ one: 1, two: undefined });
235 | expect(object).to.eql({ one: 1 });
236 | });
237 |
238 | it('when there is a nested undef value', function () {
239 | const object = removeNestedUndefinedValues({ one: 1, two: { three: undefined } });
240 | expect(object).to.eql({ one: 1, two: {} });
241 | });
242 | });
243 |
244 | describe('optionSetup', function () {
245 | it('when passing nothing into optionSetup', function () {
246 | const { options } = optionSetup({});
247 | expect(options).to.eql({ onlyGetOpenGraphInfo: false });
248 | });
249 |
250 | it('when passing onlyGetOpenGraphInfo into optionSetup', function () {
251 | const { options } = optionSetup({ onlyGetOpenGraphInfo: true });
252 | expect(options).to.eql({ onlyGetOpenGraphInfo: true });
253 | });
254 | });
255 |
256 | describe('isCustomMetaTagsValid', function () {
257 | it('when passing a valid custom tag into isCustomMetaTagsValid', function () {
258 | const response = isCustomMetaTagsValid([{
259 | multiple: false,
260 | property: 'foo',
261 | fieldName: 'fooTag',
262 | }]);
263 | expect(response).to.eql(true);
264 | });
265 |
266 | it('when passing a enpty array into isCustomMetaTagsValid', function () {
267 | const response = isCustomMetaTagsValid([]);
268 | expect(response).to.eql(true);
269 | });
270 |
271 | it('when passing a custom tag missing property into isCustomMetaTagsValid', function () {
272 | // @ts-ignore
273 | const response = isCustomMetaTagsValid([{
274 | multiple: false,
275 | fieldName: 'fooTag',
276 | }]);
277 | expect(response).to.eql(false);
278 | });
279 |
280 | it('when passing a custom tag invalid property into isCustomMetaTagsValid', function () {
281 | const response = isCustomMetaTagsValid([{
282 | multiple: false,
283 | property: 'foo',
284 | // @ts-ignore
285 | fieldName: true,
286 | }]);
287 | expect(response).to.eql(false);
288 | });
289 |
290 | it('when passing a valid and invalid custom tag into isCustomMetaTagsValid', function () {
291 | // @ts-ignore
292 | const response = isCustomMetaTagsValid([{
293 | multiple: false,
294 | property: 'foo',
295 | }, {
296 | multiple: false,
297 | property: 'foo',
298 | fieldName: 'fooTag',
299 | }]);
300 | expect(response).to.eql(false);
301 | });
302 |
303 | it('when passing a invalid array into isCustomMetaTagsValid', function () {
304 | // @ts-ignore
305 | const response = isCustomMetaTagsValid(['foo', 'bar']);
306 | expect(response).to.eql(false);
307 | });
308 | });
309 |
310 | describe('unescapeScriptText', function () {
311 | it('is needed because `JSON.parse()` is not able to parse string with \\xHH', function () {
312 | expect(JSON.parse('"\\u2611"')).to.eql('☑');
313 | expect(() => {
314 | JSON.parse('"\\x26"');
315 | }).to.throw(SyntaxError);
316 | });
317 |
318 | it('should unescape script text', function () {
319 | expect(unescapeScriptText('"\\x27"')).to.eql('"\'"');
320 | expect(unescapeScriptText('"\\x26"')).to.eql('"&"');
321 | expect(unescapeScriptText('"\\x22"')).to.eql('"\\""');
322 | });
323 | });
324 | });
325 |
--------------------------------------------------------------------------------
/tsconfig.build.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [
3 | "index.ts",
4 | "lib/**/*"
5 | ],
6 | "exclude": [
7 | "tests/**/*"
8 | ],
9 | "compilerOptions": {
10 | "esModuleInterop": true,
11 | "forceConsistentCasingInFileNames": true,
12 | "lib": ["es2023"],
13 | "skipLibCheck": true,
14 | "strict": true,
15 | "target": "es2022",
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/tsconfig.declaration.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [
3 | "index.ts",
4 | "lib/**/*"
5 | ],
6 | "exclude": [
7 | "tests/**/*"
8 | ],
9 | "compilerOptions": {
10 | "declaration": true,
11 | "declarationDir": "./types",
12 | "emitDeclarationOnly": true,
13 | "esModuleInterop": true,
14 | "forceConsistentCasingInFileNames": true,
15 | "lib": ["es2023"],
16 | "skipLibCheck": true,
17 | "strict": true,
18 | "target": "es2022",
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [
3 | "index.ts",
4 | "lib/**/*"
5 | ],
6 | "exclude": [
7 | "tests/**/*"
8 | ],
9 | "compilerOptions": {
10 | "esModuleInterop": true,
11 | "forceConsistentCasingInFileNames": true,
12 | "lib": ["es2023"],
13 | "module": "node16",
14 | "moduleResolution": "node16",
15 | "skipLibCheck": true,
16 | "strict": true,
17 | "target": "es2022",
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/tsconfig.tests.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "./tsconfig.json",
3 | "include": [
4 | "tests/**/*"
5 | ],
6 | "exclude": [
7 | "index.ts",
8 | "lib/**/*"
9 | ]
10 | }
11 |
--------------------------------------------------------------------------------
/types/index.d.ts:
--------------------------------------------------------------------------------
1 | import type { ErrorResult, OpenGraphScraperOptions, SuccessResult } from './lib/types';
2 | /**
3 | * `open-graph-scraper` uses [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) for http requests
4 | * for scraping Open Graph and Twitter Card info off a website.
5 | *
6 | * @param {object} options - The options used by Open Graph Scraper
7 | * @param {boolean|string[]} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on
8 | * anything else.
9 | * @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape.
10 | * @param {object} [options.fetchOptions] - Sets the options used by fetch for the http requests
11 | * @param {object} [options.urlValidatorSettings] - Sets the options used by validator.js for testing the URL
12 | * @param {string[]} [options.blacklist] - Pass in an array of sites you don't want ogs to run on.
13 | * @param {string} [options.html] - You can pass in an HTML string to run ogs on it. (use without options.url)
14 | * @param {number} [options.timeout] - Number of seconds before the fetch request ends. (default is 10 seconds)
15 | * @param {string} options.url - URL of the site. (Required)
16 | * @returns {Promise} Promise Object with the Open Graph results
17 | */
18 | declare function run(options: OpenGraphScraperOptions): Promise;
19 | export = run;
20 |
--------------------------------------------------------------------------------
/types/lib/extract.d.ts:
--------------------------------------------------------------------------------
1 | import type { OgObjectInternal, OpenGraphScraperOptions } from './types';
2 | /**
3 | * extract all of the meta tags needed for ogs
4 | *
5 | * @param {sting} body - the body of the fetch request
6 | * @param {object} options - options for ogs
7 | * @return {object} object with ogs results
8 | *
9 | */
10 | export default function extractMetaTags(body: string, options: OpenGraphScraperOptions): OgObjectInternal;
11 |
--------------------------------------------------------------------------------
/types/lib/fallback.d.ts:
--------------------------------------------------------------------------------
1 | import type { CheerioAPI } from 'cheerio';
2 | import type { OpenGraphScraperOptions, OgObjectInternal } from './types';
3 | /**
4 | * ogs fallbacks
5 | *
6 | * @param {object} ogObject - the current ogObject
7 | * @param {object} options - options for ogs
8 | * @param {object} $ - cheerio.load() of the current html
9 | * @return {object} object with ogs results with updated fallback values
10 | *
11 | */
12 | export declare function fallback(ogObject: OgObjectInternal, options: OpenGraphScraperOptions, $: CheerioAPI, body: string): OgObjectInternal;
13 | export default fallback;
14 |
--------------------------------------------------------------------------------
/types/lib/fields.d.ts:
--------------------------------------------------------------------------------
1 | import type { OgObjectInternal } from './types';
2 | type Fields = {
3 | multiple: boolean;
4 | property: string;
5 | fieldName: keyof OgObjectInternal;
6 | }[];
7 | /**
8 | * array of meta tags ogs is looking for
9 | *
10 | * @return {array} array of meta tags
11 | *
12 | */
13 | declare const fields: Fields;
14 | export default fields;
15 |
--------------------------------------------------------------------------------
/types/lib/isUrl.d.ts:
--------------------------------------------------------------------------------
1 | export default function isURL(url: any, options: any): boolean;
2 |
--------------------------------------------------------------------------------
/types/lib/media.d.ts:
--------------------------------------------------------------------------------
1 | import type { OgObjectInternal } from './types';
2 | /**
3 | * formats the multiple media values
4 | *
5 | * @param {object} ogObject - the current ogObject
6 | * @param {object} options - options for ogs
7 | * @return {object} object with ogs results with updated media values
8 | *
9 | */
10 | export declare function mediaSetup(ogObject: OgObjectInternal): OgObjectInternal;
11 | export default mediaSetup;
12 |
--------------------------------------------------------------------------------
/types/lib/openGraphScraper.d.ts:
--------------------------------------------------------------------------------
1 | import type { OpenGraphScraperOptions } from './types';
2 | /**
3 | * sets up options for the fetch request and calls extract on html
4 | *
5 | * @param {object} options - options for ogs
6 | * @return {object} object with ogs results
7 | *
8 | */
9 | export default function setOptionsAndReturnOpenGraphResults(ogsOptions: OpenGraphScraperOptions): Promise<{
10 | ogObject: import("./types").OgObjectInternal;
11 | response: {
12 | body: string;
13 | };
14 | html: string;
15 | } | {
16 | ogObject: import("./types").OgObjectInternal;
17 | response: import("undici").Response;
18 | html: string;
19 | }>;
20 |
--------------------------------------------------------------------------------
/types/lib/request.d.ts:
--------------------------------------------------------------------------------
1 | import type { OpenGraphScraperOptions } from './types';
2 | /**
3 | * performs the fetch request and formats the body for ogs
4 | *
5 | * @param {object} options - options for ogs
6 | * @return {object} formatted request body and response
7 | *
8 | */
9 | export default function requestAndResultsFormatter(options: OpenGraphScraperOptions): Promise<{
10 | body: string;
11 | response: import("undici").Response;
12 | }>;
13 |
--------------------------------------------------------------------------------
/types/lib/types.d.ts:
--------------------------------------------------------------------------------
1 | import type { RequestInit } from 'undici';
2 | export interface SuccessResult {
3 | error: false;
4 | html: string;
5 | response: object;
6 | result: OgObject;
7 | }
8 | export interface ErrorResult {
9 | error: true;
10 | html: undefined;
11 | response: undefined;
12 | result: OgObject;
13 | }
14 | export type OnlyGetOpenGraphInfoItem = 'image' | 'title' | 'description' | 'locale' | 'logo' | 'url' | 'favicon' | 'audioUrl' | 'date';
15 | /**
16 | * The options used by Open Graph Scraper
17 | *
18 | * @typeParam {string} url - URL of the site. (Required)
19 | * @typeParam {string} [html] - You can pass in an HTML string to run ogs on it. (use without options.url)
20 | * @typeParam {string[]} [blacklist] - Pass in an array of sites you don't want ogs to run on.
21 | * @typeParam {boolean | OnlyGetOpenGraphInfoItem[]} [onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else.
22 | * @typeParam {CustomMetaTags} [customMetaTags] - Here you can define custom meta tags you want to scrape.
23 | * @typeParam {Request} [fetchOptions] - The options passed into fetch.
24 | * @typeParam {number} [timeout] - Number of seconds before the fetch request ends. (default is 10 seconds)
25 | * @typeParam {ValidatorSettings} [urlValidatorSettings] - Sets the options used by validator.js for testing the URL
26 | */
27 | export interface OpenGraphScraperOptions {
28 | blacklist?: string[];
29 | customMetaTags?: CustomMetaTags[];
30 | fetchOptions?: RequestInit;
31 | html?: string;
32 | onlyGetOpenGraphInfo?: boolean | OnlyGetOpenGraphInfoItem[];
33 | timeout?: number;
34 | url?: string;
35 | urlValidatorSettings?: ValidatorSettings;
36 | }
37 | /**
38 | * Options for isURL method in Validator.js
39 | *
40 | * @typeParam allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed
41 | * @typeParam protocols - valid protocols can be modified with this option
42 | * @typeParam require_host - if set as false isURL will not check if host is present in the URL
43 | * @typeParam require_port - if set as true isURL will check if port is present in the URL
44 | * @typeParam require_protocol - if set as true isURL will return false if protocol is not present in the URL
45 | * @typeParam require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option
46 | * @typeParam validate_length - if set as false isURL will skip string length validation (IE maximum is 2083)
47 | *
48 | */
49 | export interface ValidatorSettings {
50 | allow_fragments: boolean;
51 | allow_protocol_relative_urls: boolean;
52 | allow_query_components: boolean;
53 | allow_trailing_dot: boolean;
54 | allow_underscores: boolean;
55 | protocols: string[];
56 | require_host: boolean;
57 | require_port: boolean;
58 | require_protocol: boolean;
59 | require_tld: boolean;
60 | require_valid_protocol: boolean;
61 | validate_length: boolean;
62 | }
63 | /**
64 | * The type for user defined custom meta tags you want to scrape.
65 | *
66 | * @typeParam {boolean} multiple - is there more than one of these tags on a page (normally this is false)
67 | * @typeParam {string} property - meta tag name/property attribute
68 | * @typeParam {string} fieldName - name of the result variable
69 | */
70 | export interface CustomMetaTags {
71 | fieldName: string;
72 | multiple: boolean;
73 | property: string;
74 | }
75 | export interface TwitterImageObject {
76 | alt?: string;
77 | height?: number;
78 | url: string;
79 | width?: number;
80 | }
81 | export interface TwitterPlayerObject {
82 | height?: number;
83 | stream?: string;
84 | url: string;
85 | width?: number;
86 | }
87 | export interface ImageObject {
88 | height?: number;
89 | type?: string;
90 | url: string;
91 | width?: number;
92 | alt?: string;
93 | }
94 | export interface VideoObject {
95 | height?: number;
96 | type?: string;
97 | url: string;
98 | width?: number;
99 | }
100 | export interface MusicSongObject {
101 | disc?: string;
102 | track?: number;
103 | url: string;
104 | }
105 | export interface OgObjectInternal {
106 | alAndroidAppName?: string;
107 | alAndroidClass?: string;
108 | alAndroidPackage?: string;
109 | alAndroidUrl?: string;
110 | alIosAppName?: string;
111 | alIosAppStoreId?: string;
112 | alIosUrl?: string;
113 | alIpadAppName?: string;
114 | alIpadAppStoreId?: string;
115 | alIpadUrl?: string;
116 | alIphoneAppName?: string;
117 | alIphoneAppStoreId?: string;
118 | alIphoneUrl?: string;
119 | alWebShouldFallback?: string;
120 | alWebUrl?: string;
121 | alWindowsAppId?: string;
122 | alWindowsAppName?: string;
123 | alWindowsPhoneAppId?: string;
124 | alWindowsPhoneAppName?: string;
125 | alWindowsPhoneUrl?: string;
126 | alWindowsUniversalAppId?: string;
127 | alWindowsUniversalAppName?: string;
128 | alWindowsUniversalUrl?: string;
129 | alWindowsUrl?: string;
130 | articleAuthor?: string;
131 | articleExpirationTime?: string;
132 | articleModifiedTime?: string;
133 | articlePublishedDate?: string;
134 | articlePublishedTime?: string;
135 | articleModifiedDate?: string;
136 | articlePublisher?: string;
137 | articleSection?: string;
138 | articleTag?: string;
139 | author?: string;
140 | bookAuthor?: string;
141 | bookCanonicalName?: string;
142 | bookIsbn?: string;
143 | bookReleaseDate?: string;
144 | booksBook?: string;
145 | booksRatingScale?: string;
146 | booksRatingValue?: string;
147 | bookTag?: string;
148 | businessContactDataCountryName?: string;
149 | businessContactDataLocality?: string;
150 | businessContactDataPostalCode?: string;
151 | businessContactDataRegion?: string;
152 | businessContactDataStreetAddress?: string;
153 | charset?: string;
154 | customMetaTags?: Record;
155 | dcContributor?: string;
156 | dcCoverage?: string;
157 | dcCreator?: string;
158 | dcDate?: string;
159 | dcDateCreated?: string;
160 | dcDateIssued?: string;
161 | dcDescription?: string;
162 | dcFormatMedia?: string;
163 | dcFormatSize?: string;
164 | dcIdentifier?: string;
165 | dcLanguage?: string;
166 | dcPublisher?: string;
167 | dcRelation?: string;
168 | dcRights?: string;
169 | dcSource?: string;
170 | dcSubject?: string;
171 | dcTitle?: string;
172 | dcType?: string;
173 | error?: string;
174 | errorDetails?: Error;
175 | favicon?: string;
176 | fbAppId?: string;
177 | jsonLD?: object[];
178 | modifiedTime?: string;
179 | musicAlbum?: string;
180 | musicAlbumDisc?: string;
181 | musicAlbumTrack?: string;
182 | musicAlbumUrl?: string;
183 | musicCreator?: string;
184 | musicDuration?: string;
185 | musicMusician?: string;
186 | musicPlaylist?: string;
187 | musicRadioStation?: string;
188 | musicReleaseDate?: string;
189 | musicSong?: MusicSongObject[];
190 | musicSongDisc?: string[];
191 | musicSongProperty?: string[];
192 | musicSongTrack?: string[];
193 | musicSongUrl?: string[];
194 | ogArticleAuthor?: string;
195 | ogArticleExpirationTime?: string;
196 | ogArticleModifiedTime?: string;
197 | ogArticlePublishedTime?: string;
198 | ogArticlePublisher?: string;
199 | ogArticleSection?: string;
200 | ogArticleTag?: string;
201 | ogAudio?: string;
202 | ogAudioSecureURL?: string;
203 | ogAudioType?: string;
204 | ogAudioURL?: string;
205 | ogAvailability?: string;
206 | ogDate?: string;
207 | ogDescription?: string;
208 | ogDeterminer?: string;
209 | ogEpisode?: string;
210 | ogImage?: ImageObject[];
211 | ogImageAlt?: string[];
212 | ogImageHeight?: string[];
213 | ogImageProperty?: string[];
214 | ogImageSecureURL?: string[];
215 | ogImageType?: string[];
216 | ogImageURL?: string[];
217 | ogImageWidth?: string[];
218 | ogLocale?: string;
219 | ogLocaleAlternate?: string;
220 | ogLogo?: string;
221 | ogMovie?: string;
222 | ogPriceAmount?: string;
223 | ogPriceCurrency?: string;
224 | ogProductAvailability?: string;
225 | ogProductCondition?: string;
226 | ogProductPriceAmount?: string;
227 | ogProductPriceCurrency?: string;
228 | ogProductRetailerItemId?: string;
229 | ogSiteName?: string;
230 | ogTitle?: string;
231 | ogType?: string;
232 | ogUrl?: string;
233 | ogVideo?: VideoObject[];
234 | ogVideoActor?: string;
235 | ogVideoActorId?: string;
236 | ogVideoActorRole?: string;
237 | ogVideoDirector?: string;
238 | ogVideoDuration?: string;
239 | ogVideoHeight?: string[];
240 | ogVideoOther?: string;
241 | ogVideoProperty?: string[];
242 | ogVideoReleaseDate?: string;
243 | ogVideoSecureURL?: string;
244 | ogVideoSeries?: string;
245 | ogVideoTag?: string;
246 | ogVideoTvShow?: string;
247 | ogVideoType?: string[];
248 | ogVideoWidth?: string[];
249 | ogVideoWriter?: string;
250 | ogWebsite?: string;
251 | placeLocationLatitude?: string;
252 | placeLocationLongitude?: string;
253 | profileFirstName?: string;
254 | profileGender?: string;
255 | profileLastName?: string;
256 | profileUsername?: string;
257 | publishedTime?: string;
258 | releaseDate?: string;
259 | requestUrl?: string;
260 | restaurantContactInfoCountryName?: string;
261 | restaurantContactInfoEmail?: string;
262 | restaurantContactInfoLocality?: string;
263 | restaurantContactInfoPhoneNumber?: string;
264 | restaurantContactInfoPostalCode?: string;
265 | restaurantContactInfoRegion?: string;
266 | restaurantContactInfoStreetAddress?: string;
267 | restaurantContactInfoWebsite?: string;
268 | restaurantMenu?: string;
269 | restaurantRestaurant?: string;
270 | restaurantSection?: string;
271 | restaurantVariationPriceAmount?: string;
272 | restaurantVariationPriceCurrency?: string;
273 | success?: boolean;
274 | twitterAccount?: string;
275 | twitterAppIdGooglePlay?: string;
276 | twitterAppIdiPad?: string;
277 | twitterAppIdiPhone?: string;
278 | twitterAppNameGooglePlay?: string;
279 | twitterAppNameiPad?: string;
280 | twitterAppNameiPhone?: string;
281 | twitterAppUrlGooglePlay?: string;
282 | twitterAppUrliPad?: string;
283 | twitterAppUrliPhone?: string;
284 | twitterCard?: string;
285 | twitterCreator?: string;
286 | twitterCreatorId?: string;
287 | twitterDescription?: string;
288 | twitterImage?: TwitterImageObject[];
289 | twitterImageAlt?: string[];
290 | twitterImageHeight?: string[];
291 | twitterImageProperty?: string[];
292 | twitterImageSrc?: string[];
293 | twitterImageWidth?: string[];
294 | twitterPlayer?: TwitterPlayerObject[];
295 | twitterPlayerHeight?: string[];
296 | twitterPlayerProperty?: string[];
297 | twitterPlayerStream?: string[];
298 | twitterPlayerStreamContentType?: string;
299 | twitterPlayerWidth?: string[];
300 | twitterSite?: string;
301 | twitterSiteId?: string;
302 | twitterTitle?: string;
303 | twitterUrl?: string;
304 | updatedTime?: string;
305 | }
306 | export type OgObject = Omit;
307 |
--------------------------------------------------------------------------------
/types/lib/utils.d.ts:
--------------------------------------------------------------------------------
1 | import type { CustomMetaTags, OgObjectInternal, OpenGraphScraperOptions, ValidatorSettings } from './types';
2 | export declare const defaultUrlValidatorSettings: {
3 | allow_fragments: boolean;
4 | allow_protocol_relative_urls: boolean;
5 | allow_query_components: boolean;
6 | allow_trailing_dot: boolean;
7 | allow_underscores: boolean;
8 | protocols: string[];
9 | require_host: boolean;
10 | require_port: boolean;
11 | require_protocol: boolean;
12 | require_tld: boolean;
13 | require_valid_protocol: boolean;
14 | validate_length: boolean;
15 | };
16 | /**
17 | * Checks if URL is valid
18 | *
19 | * @param {string} url - url to be checked
20 | * @param {string} urlValidatorSettings - settings used by validator
21 | * @return {boolean} boolean value if the url is valid
22 | *
23 | */
24 | export declare function isUrlValid(url: string, urlValidatorSettings: ValidatorSettings): boolean;
25 | /**
26 | * Validates and formats url
27 | *
28 | * @param {string} url - url to be checked and formatted
29 | * @param {string} urlValidatorSettings - settings used by validator
30 | * @return {string} proper url or null
31 | *
32 | */
33 | export declare function validateAndFormatURL(url: string, urlValidatorSettings: ValidatorSettings): {
34 | url: string | null;
35 | };
36 | /**
37 | * Finds the image type from a given url
38 | *
39 | * @param {string} url - url to be checked
40 | * @return {string} image type from url
41 | *
42 | */
43 | export declare function findImageTypeFromUrl(url: string): string;
44 | /**
45 | * Checks if image type is valid
46 | *
47 | * @param {string} type - type to be checked
48 | * @return {boolean} boolean value if type is value
49 | *
50 | */
51 | export declare function isImageTypeValid(type: string): boolean;
52 | /**
53 | * Checks if URL is a non html page
54 | *
55 | * @param {string} url - url to be checked
56 | * @return {boolean} boolean value if url is non html
57 | *
58 | */
59 | export declare function isThisANonHTMLUrl(url: string): boolean;
60 | /**
61 | * Find and delete nested undefineds
62 | *
63 | * @param {object} object - object to be cleaned
64 | * @return {object} object without nested undefineds
65 | *
66 | */
67 | export declare function removeNestedUndefinedValues(object: Record): OgObjectInternal;
68 | /**
69 | * Split the options object into ogs and got option objects
70 | *
71 | * @param {object} options - options that need to be split
72 | * @return {object} object with nested options for ogs and got
73 | *
74 | */
75 | export declare function optionSetup(ogsOptions: OpenGraphScraperOptions): {
76 | options: OpenGraphScraperOptions;
77 | };
78 | /**
79 | * Checks if image type is valid
80 | *
81 | * @param {string} type - type to be checked
82 | * @return {boolean} boolean value if type is value
83 | *
84 | */
85 | export declare function isCustomMetaTagsValid(customMetaTags: CustomMetaTags[]): boolean;
86 | /**
87 | * Unescape script text.
88 | *
89 | * Certain websites escape script text within script tags, which can
90 | * interfere with `JSON.parse()`. Therefore, we need to unescape it.
91 | *
92 | * Known good escape sequences:
93 | *
94 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh
95 | *
96 | * ```js
97 | * JSON.parse('"\\u2611"'); // '☑'
98 | * ```
99 | *
100 | * Known bad escape sequences:
101 | *
102 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh
103 | *
104 | * ```js
105 | * JSON.parse('"\\x26"'); // '&'
106 | * ```
107 | *
108 | * @param {string} scriptText - the text of the script tag
109 | * @returns {string} unescaped script text
110 | */
111 | export declare function unescapeScriptText(scriptText: string): string;
112 |
--------------------------------------------------------------------------------