├── .eslintrc ├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── stale.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── docs ├── examples │ ├── README.md │ ├── banned.js │ ├── captcha-html.js │ ├── custom-headers-v2.js │ ├── custom-headers.js │ ├── custom-requester-v2.js │ ├── custom-requester.js │ ├── debugging.js │ ├── download-v2.js │ ├── download.js │ ├── ignore-error.js │ ├── redirects.js │ ├── session-persistence.js │ ├── solve-recaptcha-v2.js │ ├── solve-recaptcha.js │ └── unsupported-requester.js └── migration-guide.md ├── errors.d.ts ├── errors.js ├── index.d.ts ├── index.js ├── index.test-d.ts ├── lib ├── brotli.js ├── browsers.json ├── email-decode.js ├── headers.js └── sandbox.js ├── mocha.opts ├── package-lock.json ├── package.json └── test ├── common.js ├── fixtures ├── access_denied.html ├── captcha.html ├── cf_recaptcha_01_12_2019.html ├── cf_recaptcha_15_04_2019.html ├── invalid_js_challenge.html ├── js_challenge_03_12_2018_1.html ├── js_challenge_03_12_2018_2.html ├── js_challenge_09_06_2016.html ├── js_challenge_10_04_2019.html ├── js_challenge_13_03_2019.html ├── js_challenge_21_03_2019.html ├── js_challenge_21_05_2015.html ├── js_challenge_22_04_2020.html ├── js_challenge_28_11_2019.html ├── page_with_emails.html ├── page_with_recaptcha.html ├── requested_page.html ├── sucuri_waf_11_08_2019.html └── sucuri_waf_18_08_2016.html ├── helper.js ├── test-brotli.js ├── test-captcha.js ├── test-emails.js ├── test-errors.js ├── test-headers.js ├── test-index.js ├── test-rp.js ├── test-sandbox.js └── test-timeout.js /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "standard", 4 | "plugin:promise/recommended" 5 | ], 6 | "plugins": [ 7 | "node", 8 | "json" 9 | ], 10 | "rules": { 11 | "semi": [2, "always"], 12 | "no-trailing-spaces": [0], 13 | "no-multi-spaces": [1, { 14 | "exceptions": { 15 | "VariableDeclarator": true 16 | } 17 | }] 18 | }, 19 | "overrides": [ 20 | { 21 | "files": ["*.ts"], 22 | "parser": "@typescript-eslint/parser", 23 | "extends": [ 24 | "standard", 25 | "plugin:promise/recommended", 26 | "plugin:@typescript-eslint/recommended" 27 | ], 28 | "rules": { 29 | "semi": [2, "always"] 30 | } 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | _Please attempt to answer the following questions before submitting a new issue:_ 11 | 12 | * What version of Cloudscraper are you using? 13 | - `node -p 'require("cloudscraper/package.json").version'` 14 | * What version of Node.js are you using? (Please share the process information) 15 | - `node -p process` 16 | * When did the problem start occurring? 17 | * How often does the problem occur? 18 | * What is the URL? 19 | * Are there any similar issues? (Please share the links) 20 | 21 | - [ ] I have read the [README](https://github.com/codemanki/cloudscraper#readme). (Code [examples](https://github.com/codemanki/cloudscraper/tree/master/docs/examples#readme)) 22 | 23 | _Please share a minimal working code snippet that reproduces the problem._ 24 |
Code snippet 25 | 26 | ```js 27 | INSERT CODE HERE 28 | ``` 29 |
30 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 14 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | - to fix next 10 | # Label to use when marking an issue as stale 11 | staleLabel: wontfix 12 | # Comment to post when marking an issue as stale. Set to `false` to disable 13 | markComment: > 14 | This issue has been automatically marked as stale because it has not had 15 | recent activity. It will be closed if no further activity occurs. Thank you 16 | for your contributions. 17 | # Comment to post when closing a stale issue. Set to `false` to disable 18 | closeComment: false 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # Compiled binary addons (http://nodejs.org/api/addons.html) 20 | build/Release 21 | 22 | # Dependency directory 23 | # Commenting this out is preferred by some people, see 24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 25 | node_modules 26 | 27 | # Users Environment Variables 28 | .lock-wscript 29 | 30 | test.js 31 | .nyc_output/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: node_js 4 | 5 | node_js: 6 | - node 7 | - 11 8 | - 10 9 | - 8 10 | 11 | matrix: 12 | include: 13 | - node_js: node 14 | env: BROTLI=1 15 | - node_js: 8 16 | env: BROTLI=1 17 | before_install: npm i --save-only request brotli 18 | 19 | before_install: npm i --save-only request 20 | install: npm i 21 | after_success: npm run coverage 22 | 23 | notifications: 24 | webhooks: https://www.travisbuddy.com/?insertMode=update 25 | on_success: never 26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Change Log 2 | 3 | ### 4.6.0 (12/02/2020) 4 | - Replace & in url with `&` 5 | 6 | ### 4.5.0 (03/12/2019) 7 | - [#293](https://github.com/codemanki/cloudscraper/pull/293) Update code to parse latest CF recaptcha. 8 | 9 | ### 4.4.0 (28/11/2019) 10 | - [#288](https://github.com/codemanki/cloudscraper/pull/288) Update code to parse latest CF challenge. 11 | 12 | ### 4.3.0 (28/09/2019) 13 | - [#267](https://github.com/codemanki/cloudscraper/pull/267) Typescript definitions. 14 | - [#271](https://github.com/codemanki/cloudscraper/pull/271) Fix brotli compressed JSON responses. 15 | 16 | ### 4.2.0 (24/09/2019) 17 | - [#260](https://github.com/codemanki/cloudscraper/pull/260) Update reCaptcha handling. Deprecate `captcha.url` in preference of `captcha.uri`. [Fix fallback siteKey handling](https://github.com/codemanki/cloudscraper/issues/259#issuecomment-531450844) 18 | 19 | ### 4.1.4 (24/08/2019) 20 | - [#247](https://github.com/codemanki/cloudscraper/pull/247) Optimize header checks. 21 | 22 | ### 4.1.3 (12/07/2019) 23 | - [#242](https://github.com/codemanki/cloudscraper/pull/242) Update Sucuri WAF Solving. 24 | 25 | ### 4.1.2 (23/05/2019) 26 | - [#219](https://github.com/codemanki/cloudscraper/pull/219) Remove a few problematic TLSv1.0 ciphers. 27 | 28 | ### 4.1.1 (11/05/2019) 29 | - Improve CF challenge security by nullifying VM context's prototype chain. 30 | 31 | ### v4.1.0 (02/05/2019) 32 | - Backport TLSv1.3 secure ciphers to potentially avoid getting a CAPTCHA. 33 | 34 | ### v4.0.1 (25/04/2019) 35 | - Improve documentation 36 | - Add `url` to captcha 37 | - Add more examples for reCAPTCHA handling 38 | 39 | ### v4.0.0 (22/04/2019) 40 | - Randomize `User-Agent` header with random chrome browser 41 | - Recaptcha solving support 42 | - Brotli non-mandatory support 43 | - Various code changes and improvements 44 | 45 | ### v3.9.1 (11/04/2019) 46 | - Fix for the timeout parsing 47 | 48 | ### v3.9.0 (11/04/2019) 49 | - [#193](https://github.com/codemanki/cloudscraper/pull/193) Fix bug with setTimeout match length 50 | 51 | ### v3.8.0 (11/04/2019) 52 | - [#191](https://github.com/codemanki/cloudscraper/pull/191) Update code to parse latest CF challenge 53 | 54 | ### v3.7.0 (07/04/2019) 55 | - [#182](https://github.com/codemanki/cloudscraper/pull/182) Usage examples have been added. 56 | - [#169](https://github.com/codemanki/cloudscraper/pull/169) Cloudscraper now automatically parses out timeout for a CF challenge. 57 | 58 | ### v3.6.0 (03/04/2019) 59 | - [#180](https://github.com/codemanki/cloudscraper/pull/180) Update code to parse latest CF challenge 60 | 61 | ### v3.5.0 (31/03/2019) 62 | - [#174](https://github.com/codemanki/cloudscraper/pull/174) Update code to parse latest CF challenge 63 | 64 | ### v3.4.0 (27/03/2019) 65 | - [#165](https://github.com/codemanki/cloudscraper/pull/165) Fixing CF challenge parsing, respect `Retry-After` header when CF returns `429 Too Many Requests` error. 66 | - [#163](https://github.com/codemanki/cloudscraper/pull/163) Improve the accuracy of challenge timing. Throw error immediatelly without a delay 67 | - [#159](https://github.com/codemanki/cloudscraper/pull/159) Decode emails in the page protected by CF 68 | 69 | ### v3.3.0 (22/03/2019) 70 | - [#153](https://github.com/codemanki/cloudscraper/pull/153) Update code to parse latest CF challenge 71 | 72 | ### v3.2.0 (20/03/2019) 73 | - [#149](https://github.com/codemanki/cloudscraper/pull/149) Update code to parse latest CF challenge 74 | 75 | ### v3.1.0 (14/03/2019) 76 | - [#140](https://github.com/codemanki/cloudscraper/pull/140) Update code to parse new CF challenge 77 | 78 | ### v3.0.1 (11/03/2019) 79 | - [#135](https://github.com/codemanki/cloudscraper/pull/135) Handle non-challenge response bodies 80 | - [#127](https://github.com/codemanki/cloudscraper/pull/127) Improve cloudflare detection 81 | - [#137](https://github.com/codemanki/cloudscraper/pull/137) Handle baseUrl option 82 | - Various code style improvements 83 | 84 | ### v3.0.0 (07/03/2019) 85 | - **BREAKING CHANGE**: `get/post` methods together with their signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod) 86 | - **BREAKING CHANGE**: `cloudscraper.request` method is deprecated in favour of `cloudscraper(options)` 87 | - Promise support has been added by using `request-promise` 88 | - Error object are inherited from Error and have additional properties. 89 | * `options` - The request options 90 | * `cause` - An alias for `error` 91 | * `response` - The request response 92 | - Stacktraces are available in error objects 93 | - `cloudflareTimeout` option can be defined to speed up waiting time 94 | - Challenge evaluation is done in a sandbox to avoid potential secutiry issues 95 | - Default [request methods](https://github.com/request/request#requestmethod) are available 96 | - Custom cookie jar can now be passed [#103](https://github.com/codemanki/cloudscraper/issues/102) 97 | - Proxies support [PR#101](https://github.com/codemanki/cloudscraper/pull/101) 98 | - MIT license 99 | 100 | ### v2.0.1 (02/03/2019) 101 | - Minor documentation changes 102 | 103 | ### v2.0.0 (09/12/2018) 104 | - [#2943](https://github.com/codemanki/cloudscraper/pull/66) Support recursive challenge solving. 105 | - **BREAKING CHANGE** Before this, when any error has been detected, the callback was called with an incorrect order: `callback(.., body, response);` instead of `return callback(..., response, body);` 106 | 107 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2014 Anorov 4 | Copyright (c) 2019 Oleksii Sribnyi 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cloudscraper 2 | ============ 3 | 4 | Node.js library to bypass Cloudflare's anti-ddos page. 5 | 6 | [![js-semistandard-style](https://cdn.rawgit.com/flet/semistandard/master/badge.svg)](https://github.com/Flet/semistandard) 7 | 8 | [![Build status](https://img.shields.io/travis/codemanki/cloudscraper/master.svg?style=flat-square)](https://travis-ci.org/codemanki/cloudscraper) 9 | [![Coverage](https://img.shields.io/coveralls/codemanki/cloudscraper.svg?style=flat-square)](https://coveralls.io/r/codemanki/cloudscraper) 10 | [![Dependency Status](https://img.shields.io/david/codemanki/cloudscraper.svg?style=flat-square)](https://david-dm.org/codemanki/cloudscraper) 11 | [![Greenkeeper badge](https://badges.greenkeeper.io/codemanki/cloudscraper.svg?style=flat-square)](https://greenkeeper.io/) 12 | 13 | If the page you want to access is protected by Cloudflare, it will return special page, which expects client to support Javascript to solve challenge. 14 | 15 | This small library encapsulates logic which extracts challenge, solves it, submits and returns the request page body. 16 | 17 | You can use cloudscraper even if you are not sure if Cloudflare protection is turned on. 18 | 19 | In general, Cloudflare has 4 types of _common_ anti-bot pages: 20 | - Simple html+javascript page with challenge 21 | - Page which redirects to original site 22 | - Page with reCAPTCHA 23 | - Page with error ( your ip was banned, etc) 24 | 25 | If you notice that for some reason cloudscraper stops working, do not hesitate and get in touch with me ( by creating an issue [here](https://github.com/codemanki/cloudscraper/issues), for example), so i can update it. 26 | 27 | Install 28 | ============ 29 | ```sh 30 | npm install cloudscraper 31 | ``` 32 | 33 | Saving the `request` module as a dependency is compulsory. 34 | 35 | ```sh 36 | # Pin the request version 37 | npm install --save request 38 | ``` 39 | 40 | Support for Brotli encoded responses is enabled by default when using Node.js v10 or later. 41 | If you wish to enable support for older Node.js versions, you may install [brotli](https://npmjs.com/package/brotli). 42 | It is recommended but not required. 43 | 44 | Usage 45 | ============ 46 | Cloudscraper uses `request-promise` by default since v3. You can find the migration guide [here.](docs/migration-guide.md) 47 | 48 | ```javascript 49 | var cloudscraper = require('cloudscraper'); 50 | 51 | cloudscraper.get('https://website.com/').then(console.log, console.error); 52 | ``` 53 | 54 | or for `POST` action: 55 | 56 | ```javascript 57 | var options = { 58 | uri: 'https://website.com/', 59 | formData: { field1: 'value', field2: 2 } 60 | }; 61 | 62 | cloudscraper.post(options).then(console.log).catch(console.error); 63 | ``` 64 | 65 | *Examples live in the docs directory of the Github repo and can be found [here.](docs/examples)* 66 | 67 | A generic request can be made with `cloudscraper(options)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option: 68 | 69 | ```javascript 70 | var options = { 71 | method: 'GET', 72 | url:'http://website.com/', 73 | }; 74 | 75 | cloudscraper(options).then(console.log); 76 | ``` 77 | 78 | ## Advanced usage 79 | Cloudscraper allows you to specify your own requester, one of either `request` or `request-promise`. 80 | Cloudscraper wraps the requester and accepts the same options, so using cloudscraper is pretty much like using those two libraries. 81 | - Cloudscraper exposes [the same HTTP verb methods as request](https://github.com/request/request#requestmethod): 82 | * `cloudscraper.get(options, callback)` 83 | * `cloudscraper.post(options, callback)` 84 | * `cloudscraper(uri)` 85 | - Cloudscraper uses request-promise by default, promise chaining is done exactly the same as described in [docs](https://github.com/request/request-promise#cheat-sheet): 86 | ``` 87 | cloudscraper(options) 88 | .then(function (htmlString) { 89 | }) 90 | .catch(function (err) { 91 | }); 92 | ``` 93 | Please refer to the requester's documentation for further instructions. 94 | 95 | ## Sucuri 96 | Cloudscraper can also identify and automatically bypass [Sucuri WAF](https://sucuri.net/website-firewall/). No actions are required. 97 | 98 | ## ReCAPTCHA 99 | Cloudscraper may help you with the reCAPTCHA page. Take a look at [this example](docs/examples/solve-recaptcha.js) and an [example using promises](docs/examples/solve-recaptcha-v2.js). 100 | 101 | Cloudflare may send a reCAPTCHA depending on the negotiated TLS cipher suite and extensions. Reducing the default cipher suite to only ciphers supported by Cloudflare may mitigate the problem: https://developers.cloudflare.com/ssl/ssl-tls/cipher-suites/ 102 | 103 | Only specifying the Cloudflare preferred TLSv1.2 cipher is also an option: 104 | ```javascript 105 | var cloudscraper = require('cloudscraper').defaults({ 106 | agentOptions: { 107 | ciphers: 'ECDHE-ECDSA-AES128-GCM-SHA256' 108 | } 109 | }) 110 | ``` 111 | 112 | More information on TLS issues can be found [here](https://github.com/codemanki/cloudscraper/issues?utf8=%E2%9C%93&q=tls). 113 | 114 | ## Defaults method 115 | 116 | `cloudscraper.defaults` is a very convenient way of extending the cloudscraper requests with any of your settings. 117 | 118 | ```javascript 119 | var cloudscraper = require('cloudscraper').defaults({ 'proxy': 'http://localproxy.com' }); 120 | // Overriding headers to remove them or using uncommon headers will cause reCAPTCHA responses 121 | var headers = { /* ... */ }; 122 | var cloudscraper = require('cloudscraper').defaults({ headers: headers }); 123 | 124 | cloudscraper(options).then(console.log); 125 | ``` 126 | 127 | ## Configuration 128 | Cloudscraper exposes the following options that are required by default but might be changed. *Please note that the default values eliminate the chance of getting sent a CAPTCHA.* 129 | 130 | ```javascript 131 | var options = { 132 | uri: 'https://website', 133 | jar: requestModule.jar(), // Custom cookie jar 134 | headers: { 135 | // User agent, Cache Control and Accept headers are required 136 | // User agent is populated by a random UA. 137 | 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', 138 | 'Cache-Control': 'private', 139 | 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' 140 | }, 141 | // Cloudscraper automatically parses out timeout required by Cloudflare. 142 | // Override cloudflareTimeout to adjust it. 143 | cloudflareTimeout: 5000, 144 | // Reduce Cloudflare's timeout to cloudflareMaxTimeout if it is excessive 145 | cloudflareMaxTimeout: 30000, 146 | // followAllRedirects - follow non-GET HTTP 3xx responses as redirects 147 | followAllRedirects: true, 148 | // Support only this max challenges in row. If CF returns more, throw an error 149 | challengesToSolve: 3, 150 | // Remove Cloudflare's email protection, replace encoded email with decoded versions 151 | decodeEmails: false, 152 | // Support gzip encoded responses (Should be enabled unless using custom headers) 153 | gzip: true, 154 | // Removes a few problematic TLSv1.0 ciphers to avoid CAPTCHA 155 | agentOptions: { ciphers } 156 | }; 157 | 158 | cloudscraper(options).then(console.log); 159 | 160 | ``` 161 | You can access the default configuration with `cloudscraper.defaultParams` 162 | 163 | ## Error object 164 | Cloudscraper error object inherits from `Error` has following fields: 165 | * `name` - `RequestError`/`CaptchaError`/`CloudflareError`/`ParserError` 166 | * `options` - The request options 167 | * `cause` - An alias for `error` 168 | * `response` - The request response 169 | * `errorType` - Custom error code 170 | Where `errorType` can be following: 171 | - `0` if request to page failed due to some native reason as bad url, http connection or so. `error` in this case will be error [event](http://nodejs.org/api/http.html#http_class_http_server) 172 | - `1` Cloudflare returned CAPTCHA. Nothing to do here. Bad luck 173 | - `2` Cloudflare returned page with some inner error. `error` will be `Number` within this range `1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008`. See more [here](https://support.cloudflare.com/hc/en-us/sections/200820298-Error-Pages) 174 | - `3` this error is returned when library failed to parse and solve js challenge. `error` will be `String` with some details. :warning: :warning: __Most likely it means that Cloudflare have changed their js challenge.__ 175 | - `4` CF went into a loop and started to return challenge after challenge. If number of solved challenges is greater than `3` and another challenge is returned, throw an error 176 | 177 | Errors are descriptive. You can find a list of all known errors [here.](errors.js) 178 | 179 | 180 | Do not always rely on `error.cause` to be an error, it can be a string. 181 | 182 | Running tests 183 | ============ 184 | Clone this repo, do `npm install` and then just `npm test` 185 | 186 | ### Unknown error? Library stopped working? ### 187 | Let me know, by opening an [issue](https://github.com/codemanki/cloudscraper/issues) in this repo and I will update library asap. Please, provide url and body of page where cloudscraper failed. 188 | 189 | WAT 190 | =========== 191 | Current Cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as described in above and pass `cloudflareTimeout` options with your value. But be aware that Cloudflare might track this timeout and use it against you ;) 192 | 193 | ## TODO 194 | - [x] Check for reCAPTCHA 195 | - [x] Support cookies, so challenge can be solved once per session 196 | - [x] Support page with simple redirects 197 | - [x] Add proper testing 198 | - [x] Remove manual 302 processing, replace with `followAllRedirects` param 199 | - [x] Parse out the timeout from challenge page 200 | - [x] Reorder the arguments in get/post/request methods and allow custom options to be passed in 201 | - [x] Support reCAPTCHA solving 202 | - [x] Promisification 203 | 204 | ## Kudos to contributors 205 | - [Dwayne](https://github.com/pro-src) 206 | - [drdokk](https://github.com/drdokk) 207 | - [Cole Faust](https://github.com/Colecf) 208 | - [Jeongbong Seo](https://github.com/jngbng) 209 | - [Mike van Rossum](https://github.com/askmike) 210 | - [Santiago Castro](https://github.com/bryant1410) 211 | - [Leonardo Gatica](https://github.com/lgaticaq) 212 | - [Michael](https://github.com/roflmuffin) 213 | - [Kamikadze4GAME](https://github.com/Kamikadze4GAME) 214 | - [Anorov](https://github.com/Anorov) :star: 215 | 216 | In the beginning cloudscraper was a port of python module [cloudflare-scrape](https://github.com/Anorov/cloudflare-scrape). Thank you [Anorov](https://github.com/Anorov) for an inspiration. 217 | 218 | ## Dependencies 219 | * [request-promise](https://github.com/request/request-promise) 220 | -------------------------------------------------------------------------------- /docs/examples/README.md: -------------------------------------------------------------------------------- 1 | Examples 2 | --- 3 | 4 | ***Not all of the examples are meant to work without modification.*** 5 | 6 | The version suffix is only meant to indicate an alternate version 7 | of the same example. e.g. `custom-headers-v2.js` is meant to be used 8 | with the latest version of Cloudscraper, ***not*** Cloudscraper v2.0.0. 9 | 10 | If you've noticed for some reason that an example is outdated and/or misleading, please send a PR to correct it. 11 |
In fact, updating or adding examples is a great way to contribute! 12 |
If you don't have the time to send a PR, please consider opening an issue instead. 13 | -------------------------------------------------------------------------------- /docs/examples/banned.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* eslint-disable yoda */ 3 | 4 | // https://github.com/codemanki/cloudscraper/issues/155 5 | 6 | var cloudscraper = require('../..'); 7 | var CloudflareError = require('../../errors').CloudflareError; 8 | 9 | var uri = process.argv[2]; 10 | 11 | cloudscraper.get(uri) 12 | .catch(function (error) { 13 | if (error instanceof CloudflareError) { 14 | if (!isNaN(error.cause)) { 15 | if (1004 < error.cause && error.cause < 1009) { 16 | return cloudscraper.get({ uri: uri, proxy: 'http://example-proxy.com' }); 17 | } 18 | } 19 | } 20 | 21 | throw error; 22 | }) 23 | .then(console.log) 24 | .catch(console.error); 25 | -------------------------------------------------------------------------------- /docs/examples/captcha-html.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | var CaptchaError = require('../../errors').CaptchaError; 5 | 6 | var uri = process.argv[2]; 7 | 8 | cloudscraper.get(uri).catch(function (error) { 9 | if (error instanceof CaptchaError) { 10 | console.log(error.response.body.toString('utf8')); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/examples/custom-headers-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..'); 4 | 5 | cloudscraper.defaultParams.headers = { 6 | Connection: 'keep-alive', 7 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36', 8 | Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 9 | 'Accept-Encoding': 'gzip, deflate', 10 | 'Accept-Language': 'en-US,en;q=0.9' 11 | }; 12 | 13 | var uri = process.argv[2]; 14 | 15 | cloudscraper.get({ gzip: true, uri: uri }).then(console.log).catch(console.error); 16 | -------------------------------------------------------------------------------- /docs/examples/custom-headers.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ 4 | headers: { 5 | Connection: 'keep-alive', 6 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36', 7 | Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 8 | 'Accept-Language': 'en-US,en;q=0.9' 9 | } 10 | }); 11 | 12 | var uri = process.argv[2]; 13 | 14 | cloudscraper.get(uri, function (error, response, body) { 15 | if (error) { 16 | throw error; 17 | } 18 | 19 | console.log(body); 20 | }); 21 | -------------------------------------------------------------------------------- /docs/examples/custom-requester-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var requester = require('request-promise'); 4 | var cloudscraper = require('../..').defaults({ requester: requester }); 5 | var uri = process.argv[2]; 6 | 7 | cloudscraper.get(uri).then(console.log).catch(console.error); 8 | -------------------------------------------------------------------------------- /docs/examples/custom-requester.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var requester = require('request'); 4 | var cloudscraper = require('../..').defaults({ requester: requester }); 5 | var uri = process.argv[2]; 6 | 7 | cloudscraper.get(uri, function (error, response, body) { 8 | if (error) { 9 | throw error; 10 | } 11 | 12 | console.log(body); 13 | }); 14 | -------------------------------------------------------------------------------- /docs/examples/debugging.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | var fs = require('fs'); 5 | 6 | var uri = process.argv[2]; 7 | 8 | cloudscraper.debug = true; 9 | cloudscraper.get(uri).then(onResponse).catch(onError); 10 | 11 | function onResponse (response) { 12 | var request = JSON.stringify(response.request.toJSON(), null, 2); 13 | var headers = JSON.stringify(response.headers, null, 2); 14 | 15 | fs.writeFileSync('./request.json', request, 'utf8'); 16 | fs.writeFileSync('./headers.json', headers, 'utf8'); 17 | fs.writeFileSync('./body.html', response.body, 'utf8'); 18 | } 19 | 20 | function onError (error) { 21 | console.error(error.stack); 22 | 23 | fs.writeFileSync('./error.txt', error.stack, 'utf8'); 24 | 25 | if (error.cause) { 26 | console.log('Cause: ', error.cause); 27 | } 28 | 29 | if (error.response) { 30 | onResponse(error.response); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /docs/examples/download-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* eslint-disable promise/always-return */ 3 | 4 | var cloudscraper = require('../..'); 5 | var fs = require('fs'); 6 | 7 | cloudscraper.get({ uri: 'https://subscene.com/content/images/logo.gif', encoding: null }) 8 | .then(function (bufferAsBody) { 9 | fs.writeFileSync('./test.gif', bufferAsBody); 10 | }) 11 | .catch(console.error); 12 | -------------------------------------------------------------------------------- /docs/examples/download.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | var fs = require('fs'); 5 | 6 | var uri = process.argv[2]; 7 | 8 | cloudscraper.get({ uri: uri, encoding: null }).then(saveFile).catch(console.error); 9 | 10 | function saveFile (response) { 11 | var filename = process.argv[3]; 12 | 13 | if (!filename) { 14 | var header = response.caseless.get('content-disposition'); 15 | var match = ('' + header).match(/filename=(['"]?)(.*?)\1/i); 16 | 17 | filename = match !== null ? match[2] : 'example.bin'; 18 | } 19 | 20 | fs.writeFileSync(filename, response.body); 21 | } 22 | -------------------------------------------------------------------------------- /docs/examples/ignore-error.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | 5 | var uri = process.argv[2]; 6 | // Cloudscraper thinks this server's response is a Cloudflare response 7 | var server = 'cloudflare-april-fools'; 8 | 9 | getHeaders(uri).then(console.log).catch(console.error); 10 | 11 | function getHeaders (uri) { 12 | return cloudscraper.head(uri) 13 | .catch(error => { 14 | if (error.errorType === 2 && server === error.response.headers.Server) { 15 | // Ignoring the error and returning the response 16 | return error.response; 17 | } 18 | 19 | throw error; 20 | }) 21 | .then(response => response.headers); 22 | } 23 | -------------------------------------------------------------------------------- /docs/examples/redirects.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ followAllRedirects: false, maxRedirects: 3 }); 4 | var uri = process.argv[2]; 5 | 6 | cloudscraper.get({ simple: false, uri: uri }).then(console.log).catch(console.error); 7 | -------------------------------------------------------------------------------- /docs/examples/session-persistence.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // https://github.com/codemanki/cloudscraper/issues/246 4 | 5 | var cloudscraper = require('../..'); 6 | 7 | // npm install --save tough-cookie-file-store 8 | var CookieStore = require('tough-cookie-file-store'); 9 | var jar = cloudscraper.jar(new CookieStore('./cookie.json')); 10 | 11 | /* 12 | // It's recommended to reuse the same headers. 13 | var fs = require('fs'); 14 | var headers = cloudscraper.defaultParams.headers; 15 | fs.writeFileSync('./headers.json', JSON.stringify(headers), 'utf-8'); 16 | */ 17 | 18 | var uri = process.argv[2]; 19 | 20 | cloudscraper = cloudscraper.defaults({ jar, headers: require('./headers') }); 21 | cloudscraper.get(uri).then(console.log).catch(console.error); 22 | -------------------------------------------------------------------------------- /docs/examples/solve-recaptcha-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // Force a CAPTCHA response by sending bogus headers 4 | const headers = { /* headers without user-agent, etc. */ }; 5 | const cloudscraper = require('../..').defaults({ onCaptcha: handler, headers }); 6 | 7 | // Pseudo function that returns a promise instead of calling captcha.submit() 8 | function handler (options, { captcha }) { 9 | return new Promise((resolve, reject) => { 10 | // Here you do some magic with the siteKey provided by cloudscraper 11 | console.error('The url is "' + captcha.uri.href + '"'); 12 | console.error('The site key is "' + captcha.siteKey + '"'); 13 | // captcha.form['g-recaptcha-response'] = /* Obtain from your service */ 14 | reject(new Error('This is a dummy function.')); 15 | }); 16 | } 17 | 18 | // An example handler with destructuring arguments 19 | function alternative (options, { captcha: { uri, siteKey } }) { 20 | // Here you do some magic with the siteKey provided by cloudscraper 21 | console.error('The url is "' + uri.href + '"'); 22 | console.error('The site key is "' + siteKey + '"'); 23 | return Promise.reject(new Error('This is a dummy function')); 24 | } 25 | 26 | const uri = process.argv[2]; 27 | cloudscraper.get({ uri, onCaptcha: alternative }).then(console.log).catch(console.warn); 28 | -------------------------------------------------------------------------------- /docs/examples/solve-recaptcha.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | function solveReCAPTCHA (url, sitekey, callback) { 4 | // Here you do some magic with the sitekey provided by cloudscraper 5 | } 6 | 7 | function onCaptcha (options, response, body) { 8 | const captcha = response.captcha; 9 | // solveReCAPTCHA is a method that you should come up with and pass it href and sitekey, in return it will return you a reponse 10 | solveReCAPTCHA(captcha.uri.href, captcha.siteKey, (error, gRes) => { 11 | // eslint-disable-next-line no-void 12 | if (error) return void captcha.submit(error); 13 | captcha.form['g-recaptcha-response'] = gRes; 14 | captcha.submit(); 15 | }); 16 | } 17 | 18 | const cloudscraper = require('../..').defaults({ onCaptcha }); 19 | var uri = process.argv[2]; 20 | cloudscraper.get({ uri: uri, headers: { cookie: 'captcha=1' } }).catch(console.warn).then(console.log); // eslint-disable-line promise/catch-or-return 21 | -------------------------------------------------------------------------------- /docs/examples/unsupported-requester.js: -------------------------------------------------------------------------------- 1 | import { EventEmitter } from 'events'; 2 | import { URL } from 'url'; 3 | // `npm i --save caseless` although it's available if `request` is installed 4 | import caseless from 'caseless'; 5 | 6 | export default function (options) { 7 | return new Request(options); 8 | }; 9 | 10 | // All of the properties that are defined in this class are required. 11 | class Request extends EventEmitter { 12 | constructor (options) { 13 | super(); 14 | const self = this; 15 | 16 | self.uri = typeof options.uri === 'string' 17 | ? new URL(options.uri) : options.uri; 18 | 19 | // Use options.headers instead of `this.headers` if serializing 20 | self.headers = caseless(options.headers); 21 | 22 | // Cloudscraper will only call `request.callback` for the very last request 23 | self.callback = options.callback; 24 | 25 | // The actual request should be performed at this point. 26 | // Pseudo error event 27 | const error = null; 28 | if (error) { 29 | self.emit('error', new Error('Request error')); 30 | } 31 | 32 | // Pseudo response arguments 33 | const body = Buffer.from('Response content', 'utf-8'); 34 | const status = 200; 35 | const headers = { 36 | // Response headers 37 | }; 38 | 39 | // Create a response object that `request` normally provides 40 | const response = new Response(headers, status, body); 41 | response.request = self; 42 | 43 | // Advanced, update the cookie jar, use `tough-cookie` if needed 44 | if (response.caseless.has('set-cookie')) { 45 | options.jar.setCookie( 46 | response.caseless['set-cookie'], 47 | self.uri.href, 48 | { ignoreError: true } 49 | ); 50 | } 51 | 52 | // Emit the complete event 53 | setImmediate(() => self.emit('complete', response, response.body)); 54 | } 55 | 56 | getHeader (name) { 57 | return this.headers.get(name); 58 | } 59 | 60 | setHeader (name, value) { 61 | this.headers.set(name, value); 62 | } 63 | } 64 | 65 | // All of the properties that are defined in this class are required. 66 | class Response { 67 | constructor (headers, statusCode, body) { 68 | this.headers = headers; 69 | this.caseless = caseless(headers); 70 | this.statusCode = statusCode; 71 | this.body = body; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /docs/migration-guide.md: -------------------------------------------------------------------------------- 1 | Migration from v2 to v3 2 | ============ 3 | - Replace `cloudscraper.request(options)` with `cloudscraper(options)` 4 | - `cloudscraper.get()` and `cloudscraper.post()` method signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod): 5 | ``` 6 | var options = { 7 | uri: 'https://website.com/', 8 | headers: {/*...*/} 9 | }; 10 | 11 | cloudscraper.get(options, function(error, response, body) { 12 | console.log(body); 13 | }); 14 | ``` 15 | or for **POST** 16 | ``` 17 | var options = { 18 | uri: 'https://website.com/', 19 | headers: {/*...*/}, 20 | formData: { field1: 'value', field2: 2 } 21 | }; 22 | 23 | cloudscraper.post(options, function(error, response, body) { 24 | console.log(body); 25 | }); 26 | ``` 27 | - If you are using custom promise support workarounds please remove them as cloudscraper now uses [request-promise](https://github.com/request/request-promise): 28 | 29 | ``` 30 | var cloudscraper = require('cloudscraper'); 31 | var options = { 32 | uri: 'https://website.com/', 33 | method: 'GET' 34 | }; 35 | 36 | cloudscraper(options).then(function(body) { 37 | console.log(body); 38 | }); 39 | ``` 40 | -------------------------------------------------------------------------------- /errors.d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import * as rp from 'request-promise/errors'; 3 | import cloudscraper = require('.'); 4 | import http = require('http'); 5 | 6 | export interface RequestError extends rp.RequestError { 7 | options: cloudscraper.Options; 8 | errorType: 0; 9 | } 10 | 11 | export interface RequestErrorConstructor extends Error { 12 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 13 | 14 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 15 | 16 | prototype: RequestError; 17 | } 18 | 19 | export const RequestError: RequestErrorConstructor; 20 | 21 | export interface CaptchaError extends rp.RequestError { 22 | options: cloudscraper.Options; 23 | errorType: 1; 24 | } 25 | 26 | export interface CaptchaErrorConstructor extends Error { 27 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 28 | 29 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 30 | 31 | prototype: CaptchaError; 32 | } 33 | 34 | export const CaptchaError: CaptchaErrorConstructor; 35 | 36 | export interface CloudflareError extends rp.RequestError { 37 | options: cloudscraper.Options; 38 | errorType: 2 | 4; 39 | } 40 | 41 | export interface CloudflareErrorConstructor extends Error { 42 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 43 | 44 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 45 | 46 | prototype: CloudflareError; 47 | } 48 | 49 | export const CloudflareError: CloudflareErrorConstructor; 50 | 51 | export interface ParserError extends rp.RequestError { 52 | options: cloudscraper.Options; 53 | errorType: 3; 54 | } 55 | 56 | export interface ParserErrorConstructor extends Error { 57 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 58 | 59 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 60 | 61 | prototype: ParserError; 62 | } 63 | 64 | export const ParserError: ParserErrorConstructor; 65 | 66 | export interface StatusCodeError extends rp.RequestError { 67 | options: cloudscraper.Options; 68 | statusCode: number; 69 | errorType: 5; 70 | } 71 | 72 | export interface StatusCodeErrorConstructor extends Error { 73 | new(statusCode: number, body: any, options: cloudscraper.Options, response: http.IncomingMessage): StatusCodeError; 74 | 75 | (statusCode: number, body: any, options: cloudscraper.Options, response: http.IncomingMessage): StatusCodeError; 76 | 77 | prototype: StatusCodeError; 78 | } 79 | 80 | export const StatusCodeError: StatusCodeErrorConstructor; 81 | 82 | export interface TransformError extends rp.RequestError { 83 | options: cloudscraper.Options; 84 | errorType: 6; 85 | } 86 | 87 | export interface TransformErrorConstructor extends Error { 88 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): TransformError; 89 | 90 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): TransformError; 91 | 92 | prototype: TransformError; 93 | } 94 | 95 | export const TransformError: TransformErrorConstructor; 96 | -------------------------------------------------------------------------------- /errors.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // The purpose of this library: 4 | // 1. Have errors consistent with request/promise-core 5 | // 2. Prevent request/promise core from wrapping our errors 6 | // 3. Create descriptive errors. 7 | 8 | // There are two differences between these errors and the originals. 9 | // 1. There is a non-enumerable errorType attribute. 10 | // 2. The error constructor is hidden from the stacktrace. 11 | 12 | const EOL = require('os').EOL; 13 | const original = require('request-promise-core/errors'); 14 | const http = require('http'); 15 | 16 | const BUG_REPORT = format([ 17 | '### Cloudflare may have changed their technique, or there may be a bug.', 18 | '### Bug Reports: https://github.com/codemanki/cloudscraper/issues', 19 | '### Check the detailed exception message that follows for the cause.' 20 | ]); 21 | 22 | const ERROR_CODES = { 23 | // Non-standard 5xx server error HTTP status codes 24 | 520: 'Web server is returning an unknown error', 25 | 521: 'Web server is down', 26 | 522: 'Connection timed out', 27 | 523: 'Origin is unreachable', 28 | 524: 'A timeout occurred', 29 | 525: 'SSL handshake failed', 30 | 526: 'Invalid SSL certificate', 31 | 527: 'Railgun Listener to Origin Error', 32 | 530: 'Origin DNS error', 33 | // Other codes 34 | 1000: 'DNS points to prohibited IP', 35 | 1001: 'DNS resolution error', 36 | 1002: 'Restricted or DNS points to Prohibited IP', 37 | 1003: 'Access Denied: Direct IP Access Not Allowed', 38 | 1004: 'Host Not Configured to Serve Web Traffic', 39 | 1005: 'Access Denied: IP of banned ASN/ISP', 40 | 1010: 'The owner of this website has banned your access based on your browser\'s signature', 41 | 1011: 'Access Denied (Hotlinking Denied)', 42 | 1012: 'Access Denied', 43 | 1013: 'HTTP hostname and TLS SNI hostname mismatch', 44 | 1016: 'Origin DNS error', 45 | 1018: 'Domain is misconfigured', 46 | 1020: 'Access Denied (Custom Firewall Rules)' 47 | }; 48 | 49 | ERROR_CODES[1006] = 50 | ERROR_CODES[1007] = 51 | ERROR_CODES[1008] = 'Access Denied: Your IP address has been banned'; 52 | 53 | const OriginalError = original.RequestError; 54 | 55 | const RequestError = create('RequestError', 0); 56 | const CaptchaError = create('CaptchaError', 1); 57 | 58 | // errorType 4 is a CloudflareError so this constructor is reused. 59 | const CloudflareError = create('CloudflareError', 2, function (error) { 60 | if (!isNaN(error.cause)) { 61 | const description = ERROR_CODES[error.cause] || http.STATUS_CODES[error.cause]; 62 | if (description) { 63 | error.message = error.cause + ', ' + description; 64 | } 65 | } 66 | }); 67 | 68 | const ParserError = create('ParserError', 3, function (error) { 69 | error.message = BUG_REPORT + error.message; 70 | }); 71 | 72 | // The following errors originate from promise-core and it's dependents. 73 | // Give them an errorType for consistency. 74 | original.StatusCodeError.prototype.errorType = 5; 75 | original.TransformError.prototype.errorType = 6; 76 | 77 | // This replaces the RequestError for all libraries using request/promise-core 78 | // and prevents silent failure. 79 | Object.defineProperty(original, 'RequestError', { 80 | configurable: true, 81 | enumerable: true, 82 | writable: true, 83 | value: RequestError 84 | }); 85 | 86 | // Export our custom errors along with StatusCodeError, etc. 87 | Object.assign(module.exports, original, { 88 | RequestError: RequestError, 89 | CaptchaError: CaptchaError, 90 | ParserError: ParserError, 91 | CloudflareError: CloudflareError 92 | }); 93 | 94 | const desc = { configurable: true, writable: true, enumerable: false }; 95 | const descriptors = { 96 | error: desc, 97 | cause: desc, 98 | response: desc, 99 | options: desc 100 | }; 101 | 102 | function create (name, errorType, customize) { 103 | function CustomError (cause, options, response) { 104 | // This prevents nasty things e.g. `error.cause.error` and 105 | // is why replacing the original RequestError is necessary. 106 | if (cause instanceof OriginalError) { 107 | return cause; 108 | } 109 | 110 | // Cleanup error output 111 | Object.defineProperties(this, descriptors); 112 | 113 | OriginalError.apply(this, arguments); 114 | 115 | // Change the name to match this constructor 116 | this.name = name; 117 | 118 | if (typeof customize === 'function') { 119 | customize(this); 120 | } 121 | 122 | if (Error.captureStackTrace) { // required for non-V8 environments 123 | // Provide a proper stack trace that hides this constructor 124 | Error.captureStackTrace(this, CustomError); 125 | } 126 | } 127 | 128 | CustomError.prototype = Object.create(OriginalError.prototype); 129 | CustomError.prototype.constructor = CustomError; 130 | // Keeps things stealthy by defining errorType on the prototype. 131 | // This makes it non-enumerable and safer to add. 132 | CustomError.prototype.errorType = errorType; 133 | 134 | Object.setPrototypeOf(CustomError, Object.getPrototypeOf(OriginalError)); 135 | Object.defineProperty(CustomError, 'name', { 136 | configurable: true, 137 | value: name 138 | }); 139 | 140 | return CustomError; 141 | } 142 | 143 | function format (lines) { 144 | return EOL + lines.join(EOL) + EOL + EOL; 145 | } 146 | -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | import { URL } from 'url'; 2 | import http = require('http'); 3 | import https = require('https'); 4 | import Promise = require('bluebird'); 5 | import request = require('request'); 6 | import rp = require('request-promise'); 7 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 8 | import errors = require('./errors'); 9 | 10 | declare namespace cloudscraper { 11 | interface Cloudscraper extends rp.RequestPromise, BaseOptions { 12 | cloudflareTimeout?: number; 13 | realEncoding: string | null; 14 | // Identify this request as a Cloudscraper request 15 | cloudscraper: boolean; 16 | } 17 | 18 | interface Captcha { 19 | submit(error?: Error): void; 20 | 21 | url: string; // <- deprecated 22 | siteKey: string; 23 | uri: URL; 24 | form: { 25 | [key: string]: string; 26 | // Secret form value 27 | s: string; 28 | }; 29 | } 30 | 31 | interface Response extends request.Response { 32 | isCloudflare?: boolean; 33 | isHTML?: boolean; 34 | isCaptcha?: boolean; 35 | 36 | // JS Challenge 37 | challenge?: string; 38 | } 39 | 40 | interface CaptchaResponse extends Response { 41 | captcha: Captcha; 42 | isCaptcha: true; 43 | } 44 | 45 | type Requester = 46 | rp.RequestPromiseAPI 47 | | request.RequestAPI; 48 | 49 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 50 | type CaptchaHandler = (options: Options, response: CaptchaResponse, body?: any) => Promise | void; 51 | 52 | interface BaseOptions { 53 | // The default export of either request or request-promise 54 | requester?: Requester; 55 | // Reduce Cloudflare's timeout to cloudflareMaxTimeout if it is excessive 56 | cloudflareMaxTimeout?: number; 57 | // Support only this max challenges in row. If CF returns more, throw an error 58 | challengesToSolve?: number; 59 | // Remove Cloudflare's email protection 60 | decodeEmails?: boolean; 61 | 62 | onCaptcha?: CaptchaHandler; 63 | } 64 | 65 | interface DefaultOptions extends Required, rp.RequestPromiseOptions { 66 | // Override the parsed timeout 67 | cloudflareTimeout?: number; 68 | agentOptions?: (http.AgentOptions | https.AgentOptions) & { 69 | ciphers?: string; 70 | }; 71 | } 72 | 73 | interface CoreOptions extends BaseOptions, rp.RequestPromiseOptions { 74 | cloudflareTimeout?: number; 75 | realEncoding?: string | null; 76 | } 77 | 78 | interface CloudscraperAPI extends request.RequestAPI { 79 | defaultParams: DefaultOptions; 80 | (options: OptionsWithUrl): Promise; 81 | } 82 | 83 | type OptionsWithUri = request.UriOptions & CoreOptions; 84 | type OptionsWithUrl = request.UrlOptions & CoreOptions; 85 | type Options = OptionsWithUri | OptionsWithUrl; 86 | } 87 | 88 | // eslint-disable-next-line no-redeclare 89 | declare const cloudscraper: cloudscraper.CloudscraperAPI; 90 | export = cloudscraper; 91 | -------------------------------------------------------------------------------- /index.test-d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import { expectType } from 'tsd'; 3 | import { URL } from 'url'; 4 | import { 5 | Options, Cloudscraper, CaptchaHandler, CoreOptions, DefaultOptions, 6 | CaptchaResponse, Captcha 7 | } from './index'; 8 | import Promise = require('bluebird'); 9 | import request = require('request'); 10 | import rp = require('request-promise'); 11 | import cloudscraper = require('./index'); 12 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 13 | import errors = require('./errors'); 14 | 15 | const noop = (): void => {}; 16 | 17 | expectType({ uri: '' }); 18 | expectType({ url: '' }); 19 | 20 | expectType({ uri: '', requester: request }); 21 | expectType({ uri: '', requester: rp }); 22 | 23 | expectType(cloudscraper({ uri: '' })); 24 | expectType(cloudscraper.get({ uri: '' })); 25 | expectType(cloudscraper.post({ uri: '' })); 26 | expectType(cloudscraper.put({ uri: '' })); 27 | expectType(cloudscraper.delete({ uri: '' })); 28 | expectType(cloudscraper.del({ uri: '' })); 29 | expectType(cloudscraper.head({ uri: '' })); 30 | expectType(cloudscraper.patch({ uri: '' })); 31 | 32 | expectType(cloudscraper('')); 33 | expectType(cloudscraper.get('')); 34 | expectType(cloudscraper.post('')); 35 | expectType(cloudscraper.put('')); 36 | expectType(cloudscraper.delete('')); 37 | expectType(cloudscraper.del('')); 38 | expectType(cloudscraper.head('')); 39 | expectType(cloudscraper.patch('')); 40 | 41 | // eslint-disable-next-line promise/always-return 42 | expectType>(cloudscraper.get({ uri: '' }).then(noop)); 43 | expectType>(cloudscraper.get({ uri: '' }).catch(noop)); 44 | expectType>(cloudscraper.get({ uri: '' }).finally(noop)); 45 | expectType>(cloudscraper.get({ uri: '' }).promise()); 46 | expectType(cloudscraper.get({ uri: '' }).cancel()); 47 | 48 | expectType((options: Options, response: CaptchaResponse) => { 49 | expectType(options); 50 | expectType(response); 51 | 52 | const { captcha, isCaptcha } = response; 53 | 54 | expectType(captcha); 55 | expectType(isCaptcha); 56 | 57 | expectType({ 58 | url: '', // <- deprecated 59 | uri: new URL(''), 60 | siteKey: '', 61 | submit: captcha.submit, 62 | form: { s: '' } 63 | }); 64 | 65 | captcha.submit(); 66 | }); 67 | 68 | expectType(cloudscraper.defaultParams); 69 | expectType({ 70 | requester: request, 71 | cloudflareMaxTimeout: 0, 72 | challengesToSolve: 0, 73 | decodeEmails: false, 74 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 75 | onCaptcha: (options: Options, response: CaptchaResponse) => {} 76 | }); 77 | 78 | expectType({ 79 | requester: request, 80 | cloudflareMaxTimeout: 0, 81 | challengesToSolve: 0, 82 | decodeEmails: false, 83 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 84 | onCaptcha: (options: Options, response: CaptchaResponse) => {}, 85 | realEncoding: 'utf-8' 86 | }); 87 | -------------------------------------------------------------------------------- /lib/brotli.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const zlib = require('zlib'); 4 | 5 | const brotli = module.exports; 6 | // Convenience boolean used to check for brotli support 7 | brotli.isAvailable = false; 8 | // Exported for tests 9 | brotli.optional = optional; 10 | 11 | // Check for node's built-in brotli support 12 | if (typeof zlib.brotliDecompressSync === 'function') { 13 | brotli.decompress = function (buf) { 14 | return zlib.brotliDecompressSync(buf); 15 | }; 16 | 17 | brotli.isAvailable = true; 18 | } else if (optional(require)) { 19 | brotli.isAvailable = true; 20 | } 21 | 22 | function optional (require) { 23 | try { 24 | // Require the NPM installed brotli 25 | const decompress = require('brotli/decompress'); 26 | 27 | brotli.decompress = function (buf) { 28 | return Buffer.from(decompress(buf)); 29 | }; 30 | 31 | return typeof decompress === 'function'; 32 | } catch (error) { 33 | // Don't throw an exception if the module is not installed 34 | if (error.code !== 'MODULE_NOT_FOUND') { 35 | throw error; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /lib/email-decode.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const pattern = ( 4 | // Opening tag 5 | // $1 = TAG_NAME 6 | '<([a-z]+)(?: [^>]*)?' + '(?:' + 7 | // href attribute 8 | // $2 = /cdn-cgi/l/email-protection#HEX_STRING 9 | // $3 = HEX_STRING 10 | ' href=[\'"]?(\\/cdn-cgi\\/l\\/email-protection#([a-f0-9]{4,}))' + '|' + 11 | // data attribute 12 | // $4 = HEX_STRING 13 | ' data-cfemail=["\']?([a-f0-9]{4,})' + 14 | // Self-closing or innerHTML(disallow child nodes) followed by closing tag 15 | // \1 backreference to $1 16 | '(?:[^<]*\\/>|[^<]*?<\\/\\1>)' + ')' 17 | ); 18 | 19 | const re = new RegExp(pattern, 'gi'); 20 | 21 | module.exports = function (html) { 22 | let match, result; 23 | 24 | re.lastIndex = 0; 25 | 26 | while ((match = re.exec(html)) !== null) { 27 | if (match[2] !== undefined) { 28 | result = match[0].replace(match[2], 'mailto:' + decode(match[3])); 29 | } else { 30 | result = decode(match[4]); 31 | } 32 | 33 | html = html.substr(0, match.index) + result + html.substr(re.lastIndex); 34 | re.lastIndex = match.index + result.length - 1; 35 | } 36 | 37 | return html; 38 | }; 39 | 40 | function decode (hexStr) { 41 | const key = parseInt(hexStr.substr(0, 2), 16); 42 | let email = ''; 43 | 44 | // noinspection ES6ConvertVarToLetConst 45 | for (var codePoint, i = 2; i < hexStr.length; i += 2) { 46 | codePoint = parseInt(hexStr.substr(i, 2), 16) ^ key; 47 | email += String.fromCharCode(codePoint); 48 | } 49 | 50 | // noinspection JSDeprecatedSymbols 51 | return decodeURIComponent(escape(email)); 52 | } 53 | -------------------------------------------------------------------------------- /lib/headers.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const chromeData = require('./browsers').chrome; 4 | const useBrotli = require('./brotli').isAvailable; 5 | 6 | module.exports = { getDefaultHeaders, caseless }; 7 | 8 | function getDefaultHeaders (defaults) { 9 | const headers = getChromeHeaders(random(chromeData)); 10 | return Object.assign({}, defaults, headers); 11 | } 12 | 13 | function random (arr) { 14 | return arr[Math.floor(Math.random() * arr.length)]; 15 | } 16 | 17 | function getChromeHeaders (options) { 18 | const { headers } = options; 19 | 20 | headers['User-Agent'] = random(options['User-Agent']); 21 | 22 | if (!useBrotli && headers['Accept-Encoding']) { 23 | headers['Accept-Encoding'] = 24 | headers['Accept-Encoding'].replace(/,?\s*\bbr\b\s*/i, ''); 25 | } 26 | 27 | return headers; 28 | } 29 | 30 | function caseless (headers) { 31 | const result = {}; 32 | 33 | Object.keys(headers).forEach(key => { 34 | result[key.toLowerCase()] = headers[key]; 35 | }); 36 | 37 | return result; 38 | } 39 | -------------------------------------------------------------------------------- /lib/sandbox.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const vm = require('vm'); 4 | 5 | const VM_OPTIONS = { 6 | filename: 'iuam-challenge.js', 7 | contextOrigin: 'cloudflare:iuam-challenge.js', 8 | contextCodeGeneration: { strings: true, wasm: false }, 9 | timeout: 5000 10 | }; 11 | 12 | const VM_ENV = ` 13 | (function (global) { 14 | const cache = Object.create(null); 15 | const keys = []; 16 | const { body, href } = global; 17 | 18 | Object.defineProperties(global, { 19 | document: { 20 | value: { 21 | createElement: function () { 22 | return { firstChild: { href: href } }; 23 | }, 24 | getElementById: function (id) { 25 | if (keys.indexOf(id) === -1) { 26 | const re = new RegExp(' id=[\\'"]?' + id + '[^>]*>([^<]*)'); 27 | const match = body.match(re); 28 | 29 | keys.push(id); 30 | cache[id] = match === null ? match : { innerHTML: match[1] }; 31 | } 32 | 33 | return cache[id]; 34 | } 35 | } 36 | }, 37 | location: { value: { reload: function () {} } } 38 | }) 39 | }(this)); 40 | `; 41 | 42 | module.exports = { eval: evaluate, Context }; 43 | 44 | function evaluate (code, ctx) { 45 | return vm.runInNewContext(VM_ENV + code, ctx, VM_OPTIONS); 46 | } 47 | 48 | // Global context used to evaluate standard IUAM JS challenge 49 | function Context (options) { 50 | if (!options) options = { body: '', hostname: '' }; 51 | 52 | const atob = Object.setPrototypeOf(function (str) { 53 | try { 54 | return Buffer.from(str, 'base64').toString('binary'); 55 | } catch (e) {} 56 | }, null); 57 | 58 | return Object.setPrototypeOf({ 59 | body: options.body, 60 | href: 'http://' + options.hostname + '/', 61 | atob 62 | }, null); 63 | } 64 | -------------------------------------------------------------------------------- /mocha.opts: -------------------------------------------------------------------------------- 1 | --reporter spec 2 | --require tests/common -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cloudscraper", 3 | "version": "4.6.0", 4 | "description": "Bypasses cloudflare's anti-ddos page", 5 | "main": "index.js", 6 | "engines": { 7 | "node": ">=8" 8 | }, 9 | "files": [ 10 | "lib/", 11 | "index.js", 12 | "index.d.ts", 13 | "errors.js", 14 | "errors.d.ts" 15 | ], 16 | "scripts": { 17 | "test": "npm run lint && npm run test:typescript && nyc --reporter=html --reporter=text mocha", 18 | "test:typescript": "tsc *.ts --noEmit && tsd", 19 | "coverage": "nyc report --reporter=text-lcov | coveralls", 20 | "lint": "eslint --ext .json --ext .js --ext .ts ." 21 | }, 22 | "repository": { 23 | "type": "git", 24 | "url": "https://github.com/codemanki/cloudscraper.git" 25 | }, 26 | "publishConfig": { 27 | "registry": "http://registry.npmjs.org" 28 | }, 29 | "keywords": [ 30 | "cloudflare", 31 | "ddos", 32 | "scrape", 33 | "webscraper", 34 | "anti-bot", 35 | "waf", 36 | "iuam", 37 | "bypass", 38 | "challenge" 39 | ], 40 | "author": "Oleksii Sribnyi", 41 | "license": "MIT", 42 | "homepage": "https://github.com/codemanki/cloudscraper", 43 | "dependencies": { 44 | "cheerio": "^1.0.0-rc.3", 45 | "request-promise": "^4.2.4" 46 | }, 47 | "devDependencies": { 48 | "@types/request-promise": "^4.1.44", 49 | "@typescript-eslint/eslint-plugin": "^2.3.1", 50 | "@typescript-eslint/parser": "^2.3.1", 51 | "chai": "^4.2.0", 52 | "chai-as-promised": "^7.1.1", 53 | "coveralls": "^3.0.3", 54 | "eslint": "^6.0.0", 55 | "eslint-config-standard": "^14.0.0", 56 | "eslint-plugin-import": "^2.16.0", 57 | "eslint-plugin-json": "^1.4.0", 58 | "eslint-plugin-node": "^10.0.0", 59 | "eslint-plugin-promise": "^4.0.1", 60 | "eslint-plugin-standard": "^4.0.0", 61 | "express": "^4.16.4", 62 | "mocha": "^6.1.1", 63 | "nyc": "^15.0.0", 64 | "sinon": "^7.2.4", 65 | "sinon-chai": "^3.3.0", 66 | "tsd": "^0.8.0", 67 | "typescript": "^3.6.3" 68 | }, 69 | "peerDependencies": { 70 | "brotli": "^1.3.2", 71 | "request": "^2.88.0" 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /test/common.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var chai = require('chai'); 4 | 5 | chai.use(require('sinon-chai')); 6 | chai.use(require('chai-as-promised')); 7 | 8 | chai.config.includeStack = true; 9 | -------------------------------------------------------------------------------- /test/fixtures/access_denied.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Access denied | site.com used CloudFlare to restrict access 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 | 24 |
25 |
26 |

27 | Error 28 | 1006 29 | Ray ID: 19400a3d29e30f8d 30 |

31 |

Access denied

32 |
33 | 34 |
35 | 36 |
37 |
38 |
39 |

What happened?

40 |

The owner of this website (site.com.com) has banned your IP address (91.91.111.11).

41 |
42 | 43 | 44 |
45 |
46 | 47 | 57 | 58 | 59 |
60 |
61 | 62 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /test/fixtures/captcha.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attention Required! | CloudFlare 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 |
27 |
28 |

One more step

29 |

Please complete the security check to access site.com

30 |
31 | 32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | 48 | 49 | 53 | 54 |
55 | 56 |
57 | 58 | 59 | 60 | 61 | 62 | 63 |
64 | 65 |
66 | 67 |
68 |
69 | 70 |
71 |
72 | 73 |
74 |
75 | 76 | 77 | 78 |
79 |
80 |
81 |
82 |
83 | 84 |
85 |
86 |
87 |

Why do I have to complete a CAPTCHA?

88 | 89 |

Completing the CAPTCHA proves you are a human and gives you temporary access to the web property.

90 |
91 | 92 |
93 |

What can I do to prevent this in the future?

94 | 95 |

If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware.

96 | 97 |

If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices.

98 |
99 |
100 |
101 | 102 | 114 | 115 | 116 |
117 |
118 | 119 | 120 | 121 | 122 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /test/fixtures/cf_recaptcha_01_12_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attention Required! | Cloudflare 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 |
30 |
31 |

One more step

32 |

Please complete the security check to access www.cloudflare.com

33 |
34 | 35 |
36 |
37 |
38 |
39 | 40 |
41 |
42 | 43 | 44 |
45 | 56 |
57 | 58 | 59 |
60 |
61 | 62 |
63 |
64 | 65 | 66 | 67 |
68 |
69 |
70 |
71 |
72 | 73 |
74 |
75 |
76 |

Why do I have to complete a CAPTCHA?

77 | 78 |

Completing the CAPTCHA proves you are a human and gives you temporary access to the web property.

79 |
80 | 81 |
82 |

What can I do to prevent this in the future?

83 | 84 | 85 |

If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware.

86 | 87 |

If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices.

88 | 89 | 90 |

Another way to prevent getting this page in the future is to use Privacy Pass. You may need to download version 2.0 now from the Chrome Web Store.

91 | 92 | 93 |
94 |
95 |
96 | 97 | 98 | 108 | 109 | 110 |
111 |
112 | 113 | 118 | 119 | 120 | 121 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /test/fixtures/cf_recaptcha_15_04_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attention Required! | Cloudflare 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 |
30 |
31 |

One more step

32 |

Please complete the security check to access example-site.dev

33 |
34 | 35 |
36 |
37 |
38 |
39 | 40 |
41 |
42 | 43 | 44 |
45 | 56 |
57 | 58 | 104 | 105 |
106 |
107 | 108 |
109 |
110 | 111 | 112 | 113 |
114 |
115 |
116 |
117 |
118 | 119 |
120 |
121 |
122 |

Why do I have to complete a CAPTCHA?

123 | 124 |

Completing the CAPTCHA proves you are a human and gives you temporary access to the web property.

125 |
126 | 127 |
128 |

What can I do to prevent this in the future?

129 | 130 | 131 |

If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware.

132 | 133 |

If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices.

134 | 135 |
136 |
137 |
138 | 139 | 140 | 150 | 151 | 152 |
153 |
154 | 155 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /test/fixtures/invalid_js_challenge.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 18 | 19 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 63 | 64 |
46 |
47 | 48 | 54 |
55 | 56 | 57 |
58 |
59 | 60 | 61 | 62 |
65 | 66 | 67 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_03_12_2018_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 80 | 81 | 82 |
52 |
53 | 54 | 65 | 66 |
67 | 68 | 69 | 70 |
71 |
72 | 73 | 74 |
75 | DDoS protection by Cloudflare 76 |
77 | Ray ID: 4834ce407815974a 78 |
79 |
83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_03_12_2018_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 80 | 81 | 82 |
52 |
53 | 54 | 65 | 66 |
67 | 68 | 69 | 70 |
71 |
72 | 73 | 74 |
75 | DDoS protection by Cloudflare 76 |
77 | Ray ID: 4834ce66ab7b9706 78 |
79 |
83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_09_06_2016.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 77 | 78 |
51 |
52 | 53 | 63 |
64 | 65 | 66 | 67 |
68 |
69 | 70 | 71 |
72 | DDoS protection by CloudFlare 73 |
74 | Ray ID: 2b05d3393e872d77 75 |
76 |
79 | 80 | 81 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_10_04_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 92 | 93 | 94 |
66 |
67 | 68 | 79 | 80 |
81 | 82 | 83 | 84 | 85 |
86 | 87 | 88 | 89 |
90 | 91 |
95 | 96 | 97 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_13_03_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 79 | 80 | 81 |
52 |
53 | 54 | 66 |
67 | 68 | 69 | 70 |
71 |
72 | 73 | 74 | 78 |
82 | 83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_21_03_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 76 | 77 | 78 |
52 |
53 | 54 | 65 | 66 |
67 | 68 | 69 | 70 | 71 |
72 |
73 | 74 | 75 |
79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_22_04_2020.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Just a moment... 12 | 31 | 32 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 121 | 122 | 123 |
85 |
86 | 89 | 101 | 102 |
103 | 104 | 105 | 106 | 107 |
108 | 109 | 110 | 111 |
112 |
113 | 114 | 115 |
116 | DDoS protection by Cloudflare 117 |
118 | Ray ID: 5882d60f1f99f2b4 119 |
120 |
124 | 125 | 126 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_28_11_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 99 | 100 | 101 |
66 |
67 | 68 | 80 | 81 |
82 | 83 | 84 | 85 | 86 |
87 | 88 | 89 | 90 |
91 | 92 | 93 |
94 | DDoS protection by Cloudflare 95 |
96 | Ray ID: 53cb1af29bc6c2d6 97 |
98 |
102 | 103 | -------------------------------------------------------------------------------- /test/fixtures/page_with_emails.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Cloudscraper 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |

The email is [email protected] 14 |

15 | 16 | 17 | -------------------------------------------------------------------------------- /test/fixtures/requested_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | The requested page 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | This is the page you want to parse 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /test/fixtures/sucuri_waf_11_08_2019.html: -------------------------------------------------------------------------------- 1 | You are being redirected... 2 | 3 | 4 | -------------------------------------------------------------------------------- /test/fixtures/sucuri_waf_18_08_2016.html: -------------------------------------------------------------------------------- 1 | You are being redirected... 2 | 3 | 4 | -------------------------------------------------------------------------------- /test/helper.js: -------------------------------------------------------------------------------- 1 | var request = require('request-promise'); 2 | var sinon = require('sinon'); 3 | var fs = require('fs'); 4 | var url = require('url'); 5 | var path = require('path'); 6 | var express = require('express'); 7 | 8 | // Clone a few defaults before testing 9 | var opts = require('../').defaultParams; 10 | var defaultHeaders = Object.assign({}, opts.headers); 11 | var agentOptions = Object.assign({}, opts.agentOptions); 12 | 13 | // Cache fixtures so they're only read from fs but once 14 | var cache = {}; 15 | 16 | var helper = { 17 | app: express(), 18 | reset: function () { 19 | helper.router = new express.Router(); 20 | 21 | helper.defaultParams = { 22 | // Since cloudscraper wraps the callback, just ensure callback is a function 23 | callback: sinon.match.func, 24 | requester: sinon.match.func, 25 | jar: request.jar(), 26 | uri: helper.resolve('/test'), 27 | headers: Object.assign({}, defaultHeaders), 28 | method: 'GET', 29 | encoding: null, 30 | realEncoding: 'utf8', 31 | followAllRedirects: true, 32 | cloudflareTimeout: 1, 33 | cloudflareMaxTimeout: 30000, 34 | challengesToSolve: 3, 35 | decodeEmails: false, 36 | gzip: true, 37 | agentOptions: Object.assign({}, agentOptions) 38 | }; 39 | }, 40 | getFixture: function (fileName) { 41 | var key = fileName; 42 | 43 | if (cache[key] === undefined) { 44 | fileName = path.join(__dirname, 'fixtures', fileName); 45 | cache[key] = fs.readFileSync(fileName, 'utf8'); 46 | } 47 | 48 | return cache[key]; 49 | }, 50 | extendParams: function (params) { 51 | var defaultParams = this.defaultParams; 52 | 53 | // Extend target with the default params and provided params 54 | var target = {}; 55 | Object.assign(target, defaultParams, params); 56 | // Extend target.headers with defaults headers and provided headers 57 | target.headers = {}; 58 | Object.assign(target.headers, defaultParams.headers, params.headers); 59 | 60 | return target; 61 | }, 62 | resolve: function (uri) { 63 | // eslint-disable-next-line node/no-deprecated-api 64 | return url.resolve(helper.uri.href, uri); 65 | }, 66 | listen: function (callback) { 67 | helper.server = helper.app.listen(0, '127.0.0.1', function () { 68 | var baseUrl = 'http://127.0.0.1:' + helper.server.address().port; 69 | 70 | // eslint-disable-next-line node/no-deprecated-api 71 | helper.uri = url.parse(baseUrl + '/'); 72 | helper.reset(); 73 | callback(); 74 | }); 75 | } 76 | }; 77 | 78 | helper.app.use(function (req, res, next) { 79 | helper.router(req, res, next); 80 | }); 81 | 82 | express.response.cloudflare = function () { 83 | this.header('Server', 'cloudflare'); 84 | this.header('Content-Type', 'text/html; charset=UTF-8'); 85 | return this; 86 | }; 87 | 88 | express.response.sendFixture = function (fileName) { 89 | return this.send(helper.getFixture(fileName)); 90 | }; 91 | 92 | express.response.sendChallenge = function (fileName) { 93 | return this.cloudflare().status(503).sendFixture(fileName); 94 | }; 95 | 96 | express.response.sendCaptcha = function (fileName) { 97 | return this.cloudflare().status(403).sendFixture(fileName); 98 | }; 99 | 100 | express.response.endAbruptly = function () { 101 | this.connection.write( 102 | 'HTTP/1.1 500\r\n' + 103 | 'Content-Type: text/plain\r\n' + 104 | 'Transfer-Encoding: chunked\r\n\r\n' 105 | ); 106 | this.end(); 107 | }; 108 | 109 | module.exports = helper; 110 | -------------------------------------------------------------------------------- /test/test-brotli.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const brotli = require('../lib/brotli'); 6 | const helper = require('./helper'); 7 | const zlib = require('zlib'); 8 | 9 | const sinon = require('sinon'); 10 | const expect = require('chai').expect; 11 | 12 | (process.env.BROTLI ? describe : describe.skip)('Brotli (lib)', function () { 13 | it('should be available', function () { 14 | expect(brotli.isAvailable).to.be.true; 15 | }); 16 | 17 | it('should have a decompress method', function () { 18 | expect(brotli.decompress).to.be.a('function'); 19 | }); 20 | 21 | it('decompress() should accept exactly 1 argument', function () { 22 | expect(brotli.decompress.length).to.equal(1); 23 | }); 24 | 25 | it('decompress() should accept buffer as input', function () { 26 | const data = Buffer.from([0x0b, 0x01, 0x80, 0x61, 0x62, 0x63, 0x03]); 27 | const result = brotli.decompress(data); 28 | 29 | expect(result).to.be.instanceof(Buffer); 30 | expect(result.toString('utf8')).to.equal('abc'); 31 | }); 32 | 33 | (zlib.brotliCompressSync ? it : it.skip)('[internal] decompress() should produce the expected result', function () { 34 | const input = helper.getFixture('captcha.html'); 35 | const data = zlib.brotliCompressSync(Buffer.from(input, 'utf8')); 36 | const result = brotli.decompress(data); 37 | 38 | expect(result).to.be.instanceof(Buffer); 39 | expect(result.toString('utf8')).to.equal(input); 40 | }); 41 | 42 | (zlib.brotliCompressSync ? it.skip : it)('[external] decompress() should produce the expected result', function () { 43 | const input = helper.getFixture('captcha.html'); 44 | // Try increasing the timeout if this fails on your system. 45 | const data = require('brotli').compress(Buffer.from(input, 'utf8')); 46 | const result = brotli.decompress(Buffer.from(data)); 47 | 48 | expect(result).to.be.instanceof(Buffer); 49 | expect(result.toString('utf8')).to.equal(input); 50 | }); 51 | 52 | it('optional() should throw an error if the module contains an error', function () { 53 | const spy = sinon.spy(function () { 54 | // This method should throw if called without arguments 55 | brotli.optional(); 56 | }); 57 | 58 | expect(spy).to.throw(); 59 | }); 60 | }); 61 | -------------------------------------------------------------------------------- /test/test-captcha.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const cloudscraper = require('../index'); 6 | const request = require('request-promise'); 7 | const errors = require('../errors'); 8 | const helper = require('./helper'); 9 | const http = require('http'); 10 | 11 | const sinon = require('sinon'); 12 | const expect = require('chai').expect; 13 | 14 | describe('Cloudscraper', function () { 15 | let sandbox; 16 | let Request; 17 | let uri; 18 | 19 | const requestedPage = helper.getFixture('requested_page.html'); 20 | 21 | before(function (done) { 22 | helper.listen(function () { 23 | uri = helper.resolve('/test'); 24 | 25 | // Speed up tests 26 | cloudscraper.defaultParams.cloudflareTimeout = 1; 27 | done(); 28 | }); 29 | }); 30 | 31 | after(function () { 32 | helper.server.close(); 33 | }); 34 | 35 | beforeEach(function () { 36 | // Prepare stubbed Request 37 | sandbox = sinon.createSandbox(); 38 | Request = sandbox.spy(request, 'Request'); 39 | }); 40 | 41 | afterEach(function () { 42 | helper.reset(); 43 | sandbox.restore(); 44 | }); 45 | 46 | it('should handle onCaptcha promise being rejected with a falsy error', function (done) { 47 | helper.router.get('/test', function (req, res) { 48 | res.sendCaptcha('cf_recaptcha_15_04_2019.html'); 49 | }); 50 | 51 | const options = { 52 | uri, 53 | onCaptcha: function () { 54 | // eslint-disable-next-line prefer-promise-reject-errors 55 | return Promise.reject(); 56 | } 57 | }; 58 | 59 | const promise = cloudscraper.get(options, function (error) { 60 | expect(error).to.be.instanceOf(errors.CaptchaError); 61 | expect(error.error).to.be.an('error'); 62 | expect(error).to.have.property('errorType', 1); 63 | expect(error.message).to.include('Falsy error'); 64 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 65 | }); 66 | }); 67 | describe('reCAPTCHA (version as on 10.04.2019)', () => { 68 | for (let stage = 0; stage < 4; stage++) { 69 | const desc = { 70 | 0: 'should resolve when user calls captcha.submit()', 71 | 1: 'should callback with an error if user calls captcha.submit(error)', 72 | 2: 'should resolve when the onCaptcha promise resolves', 73 | 3: 'should callback with an error if the onCaptcha promise is rejected' 74 | }; 75 | 76 | // Run this test 4 times 77 | it(desc[stage], function (done) { 78 | const secret = '6b132d85d185a8255f2451d48fe6a8bee7154ea2-1555377580-1800-AQ1azEkeDOnQP5ByOpwUU/RdbKrmMwHYpkaenRvjPXtB0w8Vbjn/Ceg62tfpp/lT799kjDLEMMuDkEMqQ7iO51kniWCQm00BQvDGl+D0h/WvXDWO96YXOUD3qrqUTuzO7QbUOinc8y8kedvOQkr4c0o='; 79 | const siteKey = '6LfBixYUAAAAABhdHynFUIMA_sa4s-XsJvnjtgB0'; 80 | const expectedError = new Error('anti-captcha failed!'); 81 | 82 | helper.router 83 | .get('/test', function (req, res) { 84 | res.sendCaptcha('cf_recaptcha_15_04_2019.html'); 85 | }) 86 | .get('/cdn-cgi/l/chk_captcha', function (req, res) { 87 | res.send(requestedPage); 88 | }); 89 | 90 | const onCaptcha = sinon.spy(function (options, response, body) { 91 | expect(options).to.be.an('object'); 92 | expect(response).to.be.instanceof(http.IncomingMessage); 93 | expect(body).to.be.a('string'); 94 | 95 | sinon.assert.match(response, { 96 | isCloudflare: true, 97 | isHTML: true, 98 | isCaptcha: true, 99 | captcha: sinon.match.object 100 | }); 101 | 102 | sinon.assert.match(response.captcha, { 103 | url: uri, // <-- Deprecated 104 | uri: sinon.match.same(response.request.uri), 105 | form: { s: secret }, 106 | siteKey: siteKey, 107 | submit: sinon.match.func 108 | }); 109 | 110 | // Simulate what the user should do here 111 | response.captcha.form['g-recaptcha-response'] = 'foobar'; 112 | 113 | switch (stage) { 114 | case 0: 115 | // User green lights form submission 116 | response.captcha.submit(); 117 | break; 118 | case 1: 119 | // User reports an error when solving the reCAPTCHA 120 | response.captcha.submit(expectedError); 121 | break; 122 | case 2: 123 | // User green lights form submission by resolving the returned promise 124 | return Promise.resolve(); 125 | case 3: 126 | // User reports an error by rejecting the returned promise 127 | return Promise.reject(expectedError); 128 | } 129 | }); 130 | 131 | const firstParams = helper.extendParams({ onCaptcha, uri }); 132 | const secondParams = helper.extendParams({ 133 | onCaptcha, 134 | method: 'GET', 135 | uri: helper.resolve('/cdn-cgi/l/chk_captcha'), 136 | headers: { 137 | Referer: uri 138 | }, 139 | qs: { 140 | s: secret, 141 | 'g-recaptcha-response': 'foobar' 142 | } 143 | }); 144 | 145 | const options = { onCaptcha, uri }; 146 | 147 | const promise = cloudscraper.get(options, function (error, response, body) { 148 | switch (stage) { 149 | case 0: 150 | case 2: 151 | expect(error).to.be.null; 152 | 153 | expect(onCaptcha).to.be.calledOnce; 154 | 155 | expect(Request).to.be.calledTwice; 156 | expect(Request.firstCall).to.be.calledWithExactly(firstParams); 157 | expect(Request.secondCall).to.be.calledWithExactly(secondParams); 158 | 159 | expect(body).to.be.equal(requestedPage); 160 | expect(promise).to.eventually.equal(requestedPage).and.notify(done); 161 | break; 162 | case 1: 163 | case 3: 164 | expect(error).to.be.instanceOf(errors.CaptchaError); 165 | expect(error.error).to.be.an('error'); 166 | expect(error).to.have.property('errorType', 1); 167 | expect(error.message).to.include(expectedError.message); 168 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 169 | break; 170 | } 171 | }); 172 | }); 173 | } 174 | }); 175 | 176 | describe('reCAPTCHA (version as on 01.12.2019)', () => { 177 | for (let stage = 0; stage < 4; stage++) { 178 | const desc = { 179 | 0: 'should resolve when user calls captcha.submit()', 180 | 1: 'should callback with an error if user calls captcha.submit(error)', 181 | 2: 'should resolve when the onCaptcha promise resolves', 182 | 3: 'should callback with an error if the onCaptcha promise is rejected' 183 | }; 184 | 185 | // Run this test 4 times 186 | it(desc[stage], function (done) { 187 | const secret = '0bd666f149acf02bbc05bba3b1bb'; 188 | const siteKey = '6LfBixYUAAAAABhdHynFUIMA_sa4s-XsJvnjtgB0'; 189 | const rayId = '53dfe8147d2a9e73'; 190 | const expectedError = new Error('anti-captcha failed!'); 191 | 192 | helper.router 193 | .get('/test', function (req, res) { 194 | res.sendCaptcha('cf_recaptcha_01_12_2019.html'); 195 | }) 196 | .post('/', function (req, res) { 197 | res.send(requestedPage); 198 | }); 199 | 200 | const onCaptcha = sinon.spy(function (options, response, body) { 201 | expect(options).to.be.an('object'); 202 | expect(response).to.be.instanceof(http.IncomingMessage); 203 | expect(body).to.be.a('string'); 204 | 205 | sinon.assert.match(response, { 206 | isCloudflare: true, 207 | isHTML: true, 208 | isCaptcha: true, 209 | captcha: sinon.match.object 210 | }); 211 | 212 | sinon.assert.match(response.captcha, { 213 | url: uri, // <-- Deprecated 214 | uri: sinon.match.same(response.request.uri), 215 | form: { r: secret, id: rayId }, 216 | siteKey: siteKey, 217 | submit: sinon.match.func 218 | }); 219 | 220 | // Simulate what the user should do here 221 | response.captcha.form['g-recaptcha-response'] = 'foobar'; 222 | 223 | switch (stage) { 224 | case 0: 225 | // User green lights form submission 226 | response.captcha.submit(); 227 | break; 228 | case 1: 229 | // User reports an error when solving the reCAPTCHA 230 | response.captcha.submit(expectedError); 231 | break; 232 | case 2: 233 | // User green lights form submission by resolving the returned promise 234 | return Promise.resolve(); 235 | case 3: 236 | // User reports an error by rejecting the returned promise 237 | return Promise.reject(expectedError); 238 | } 239 | }); 240 | 241 | const firstParams = helper.extendParams({ onCaptcha, uri }); 242 | const secondParams = helper.extendParams({ 243 | onCaptcha, 244 | method: 'POST', 245 | uri: helper.resolve('/?__cf_chl_captcha_tk__=e8844bdff35ae5e'), 246 | qs: { __cf_chl_captcha_tk__: 'e8844bdff35ae5e' }, 247 | headers: { 248 | Referer: helper.resolve('/test') 249 | }, 250 | form: { 251 | r: secret, 252 | id: rayId, 253 | 'g-recaptcha-response': 'foobar' 254 | } 255 | }); 256 | 257 | const options = { onCaptcha, uri }; 258 | 259 | const promise = cloudscraper.get(options, function (error, response, body) { 260 | switch (stage) { 261 | case 0: 262 | case 2: 263 | expect(error).to.be.null; 264 | 265 | expect(onCaptcha).to.be.calledOnce; 266 | 267 | expect(Request).to.be.calledTwice; 268 | expect(Request.firstCall).to.be.calledWithExactly(firstParams); 269 | expect(Request.secondCall).to.be.calledWithExactly(secondParams); 270 | 271 | expect(body).to.be.equal(requestedPage); 272 | expect(promise).to.eventually.equal(requestedPage).and.notify(done); 273 | break; 274 | case 1: 275 | case 3: 276 | expect(error).to.be.instanceOf(errors.CaptchaError); 277 | expect(error.error).to.be.an('error'); 278 | expect(error).to.have.property('errorType', 1); 279 | expect(error.message).to.include(expectedError.message); 280 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 281 | break; 282 | } 283 | }); 284 | }); 285 | }; 286 | }); 287 | }); 288 | -------------------------------------------------------------------------------- /test/test-emails.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const decode = require('../lib/email-decode'); 6 | const expect = require('chai').expect; 7 | 8 | const EMAIL = 'cloudscraper@example-site.dev'; 9 | const HEX_STRING = '6506090a10011606170415001725001d040815090048160c11004b010013'; 10 | 11 | function genHTML (body) { 12 | return '\n' + 13 | '\n' + 14 | '\n' + 15 | ' Cloudscraper\n' + 16 | ' \n' + 17 | ' \n' + 18 | ' \n\n' + 19 | ' ' + 20 | '' + 21 | '\n' + body + '\n' + 22 | '\n'; 23 | } 24 | 25 | describe('Email (lib)', function () { 26 | it('should not modify unprotected html', function () { 27 | const raw = genHTML(''); 28 | 29 | expect(decode(raw)).to.equal(raw); 30 | }); 31 | 32 | it('should remove email protection', function () { 33 | const protection = '!@#&*9^%()[]/\\'; 34 | 35 | expect(decode(protection)).to.equal(EMAIL); 36 | }); 37 | 38 | it('should replace anchors that have a data-cfemail attribute', function () { 39 | const protection = '[email protected]'; 41 | 42 | const raw = genHTML('

The email is ' + EMAIL + '

'); 43 | const enc = genHTML('

The email is ' + protection + '

'); 44 | 45 | expect(decode(enc)).to.equal(raw); 46 | }); 47 | 48 | it('should replace spans that have a data-cfemail attribute', function () { 49 | const protection = '[email protected]'; 51 | 52 | const raw = genHTML('

The email is ' + EMAIL + '

'); 53 | const enc = genHTML('

The email is ' + protection + '

'); 54 | 55 | expect(decode(enc)).to.equal(raw); 56 | }); 57 | 58 | it('should be space agnostic', function () { 59 | const protection = '\n[email protected]\r\n'; 61 | 62 | const raw = genHTML('\r\n

\n The email
is ' + EMAIL + '\r\n

\n'); 63 | const enc = genHTML('\r\n

\n The email
is ' + protection + '\r\n

\n'); 64 | 65 | expect(decode(enc)).to.equal(raw); 66 | }); 67 | 68 | it('should not replace nodes if they have children', function () { 69 | const protection = '[email protected]'; 71 | const enc = genHTML('

The email is ' + protection + '

'); 72 | 73 | expect(decode(enc)).to.equal(enc); 74 | }); 75 | 76 | it('should not replace malformed html', function () { 77 | const protection = '\n<\n'; 78 | const enc = genHTML('

The email is ' + protection + '

'); 79 | 80 | expect(decode(enc)).to.equal(enc); 81 | }); 82 | 83 | it('should account for self-closing nodes', function () { 84 | const protection = 'test'; 85 | 86 | expect(decode(protection)).to.equal(EMAIL + 'test'); 87 | }); 88 | 89 | it('should update href attribute values', function () { 90 | const protection = ''; 91 | 92 | const raw = genHTML(''); 93 | const enc = genHTML(protection); 94 | 95 | expect(decode(enc)).to.equal(raw); 96 | }); 97 | }); 98 | -------------------------------------------------------------------------------- /test/test-errors.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const cloudscraper = require('../index'); 6 | const request = require('request-promise'); 7 | const helper = require('./helper'); 8 | const brotli = require('../lib/brotli'); 9 | const errors = require('../errors'); 10 | 11 | const sinon = require('sinon'); 12 | const expect = require('chai').expect; 13 | const assert = require('chai').assert; 14 | 15 | describe('Cloudscraper', function () { 16 | let sandbox; 17 | let Request; 18 | let uri; 19 | 20 | before(function (done) { 21 | helper.listen(function () { 22 | uri = helper.resolve('/test'); 23 | 24 | // Speed up tests 25 | cloudscraper.defaultParams.cloudflareTimeout = 1; 26 | done(); 27 | }); 28 | }); 29 | 30 | after(function () { 31 | helper.server.close(); 32 | }); 33 | 34 | beforeEach(function () { 35 | // Prepare stubbed Request 36 | sandbox = sinon.createSandbox(); 37 | Request = sandbox.spy(request, 'Request'); 38 | }); 39 | 40 | afterEach(function () { 41 | helper.reset(); 42 | sandbox.restore(); 43 | }); 44 | 45 | it('should return error if it was thrown by request', function (done) { 46 | helper.router.get('/test', function (req, res) { 47 | res.endAbruptly(); 48 | }); 49 | 50 | const promise = cloudscraper.get(uri, function (error) { 51 | expect(error).to.be.instanceOf(errors.RequestError); 52 | expect(error.error).to.be.an('error'); 53 | expect(error).to.have.property('errorType', 0); 54 | 55 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 56 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 57 | }); 58 | }); 59 | 60 | it('should return error if cloudflare response is empty', function (done) { 61 | helper.router.get('/test', function (req, res) { 62 | res.cloudflare().status(504).end(); 63 | }); 64 | 65 | const promise = cloudscraper.get(uri, function (error) { 66 | // errorType 1, means captcha is served 67 | expect(error).to.be.instanceOf(errors.CloudflareError); 68 | expect(error).to.have.property('error', 504); 69 | expect(error).to.have.property('errorType', 2); 70 | expect(error.message).to.be.equal('504, Gateway Timeout'); 71 | 72 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 73 | 74 | expect(error.response.body).to.be.eql(Buffer.alloc(0)); 75 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 76 | }); 77 | }); 78 | 79 | it('should return error if captcha is served by cloudflare', function (done) { 80 | helper.router.get('/test', function (req, res) { 81 | res.sendChallenge('captcha.html'); 82 | }); 83 | 84 | const promise = cloudscraper.get(uri, function (error) { 85 | // errorType 1, means captcha is served 86 | expect(error).to.be.instanceOf(errors.CaptchaError); 87 | expect(error).to.have.property('error', 'captcha'); 88 | expect(error).to.have.property('errorType', 1); 89 | 90 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 91 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 92 | }); 93 | }); 94 | 95 | it('should return error if cloudflare returned some inner error', function (done) { 96 | // https://support.cloudflare.com/hc/en-us/sections/200820298-Error-Pages 97 | // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 98 | // Error codes can also be the same as the HTTP status code in the 5xx range. 99 | 100 | helper.router.get('/test', function (req, res) { 101 | res.cloudflare().status(500).sendFixture('access_denied.html'); 102 | }); 103 | 104 | const promise = cloudscraper.get(uri, function (error) { 105 | // errorType 2, means inner cloudflare error 106 | expect(error).to.be.instanceOf(errors.CloudflareError); 107 | expect(error).to.have.property('error', 1006); 108 | expect(error.message).to.equal('1006, Access Denied: Your IP address has been banned'); 109 | expect(error).to.have.property('errorType', 2); 110 | 111 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 112 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 113 | }); 114 | }); 115 | 116 | it('should add a description to 5xx range cloudflare errors', function (done) { 117 | const html = helper.getFixture('access_denied.html').toString('utf8'); 118 | 119 | helper.router.get('/test', function (req, res) { 120 | res.cloudflare().status(504).send(html.replace('1006', '504')); 121 | }); 122 | 123 | const promise = cloudscraper.get(uri, function (error) { 124 | // errorType 2, means inner cloudflare error 125 | expect(error).to.be.instanceOf(errors.CloudflareError); 126 | expect(error).to.have.property('error', 504); 127 | expect(error.message).to.equal('504, Gateway Timeout'); 128 | expect(error).to.have.property('errorType', 2); 129 | 130 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 131 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 132 | }); 133 | }); 134 | 135 | it('should not error if error description is unavailable', function (done) { 136 | const html = helper.getFixture('access_denied.html').toString('utf8'); 137 | 138 | helper.router.get('/test', function (req, res) { 139 | res.cloudflare().status(500).send(html.replace('1006', '5111')); 140 | }); 141 | 142 | const promise = cloudscraper.get(uri, function (error) { 143 | // errorType 2, means inner cloudflare error 144 | expect(error).to.be.instanceOf(errors.CloudflareError); 145 | expect(error).to.have.property('error', 5111); 146 | expect(error.message).to.equal('5111'); 147 | expect(error).to.have.property('errorType', 2); 148 | 149 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 150 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 151 | }); 152 | }); 153 | 154 | it('should return error if cf presented more than 3 challenges in a row', function (done) { 155 | helper.router.get('*', function (req, res) { 156 | res.sendChallenge('js_challenge_09_06_2016.html'); 157 | }); 158 | 159 | // The expected params for all subsequent calls to Request 160 | const expectedParams = helper.extendParams({ 161 | uri: helper.resolve('/cdn-cgi/l/chk_jschl') 162 | }); 163 | 164 | // Perform less strict matching on headers and qs to simplify this test 165 | Object.assign(expectedParams, { 166 | headers: sinon.match.object, 167 | qs: sinon.match.object 168 | }); 169 | 170 | const promise = cloudscraper.get(uri, function (error) { 171 | expect(error).to.be.instanceOf(errors.CloudflareError); 172 | expect(error).to.have.property('error', 'Cloudflare challenge loop'); 173 | expect(error).to.have.property('errorType', 4); 174 | 175 | assert.equal(Request.callCount, 4, 'Request call count'); 176 | expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); 177 | 178 | const total = helper.defaultParams.challengesToSolve + 1; 179 | // noinspection ES6ConvertVarToLetConst 180 | for (var i = 1; i < total; i++) { 181 | // Decrement the number of challengesToSolve to match actual params 182 | expectedParams.challengesToSolve -= 1; 183 | expect(Request.getCall(i)).to.be.calledWithExactly(expectedParams); 184 | } 185 | 186 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 187 | }); 188 | }); 189 | 190 | it('should return error if body is undefined', function (done) { 191 | helper.router.get('/test', function (req, res) { 192 | res.status(503).end(); 193 | }); 194 | 195 | const expectedParams = helper.extendParams({ json: true }); 196 | const options = { uri: uri, json: true }; 197 | 198 | const promise = cloudscraper.get(options, function (error) { 199 | expect(error).to.be.instanceOf(errors.RequestError); 200 | expect(error).to.have.property('error', null); 201 | expect(error).to.have.property('errorType', 0); 202 | 203 | assert.equal(error.response.statusCode, 503, 'status code'); 204 | 205 | expect(error.response.body).to.be.equal(undefined); 206 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 207 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 208 | }); 209 | }); 210 | 211 | (brotli.isAvailable ? it.skip : it)('should return error if content-type is brotli and missing dep', function (done) { 212 | // Brotli compressed JSON: {"a":"test"} 213 | const compressed = Buffer.from([ 214 | 0x8b, 0x05, 0x80, 0x7b, 0x22, 0x61, 0x22, 0x3a, 215 | 0x22, 0x74, 0x65, 0x73, 0x74, 0x22, 0x7d, 0x03 216 | ]); 217 | 218 | helper.router.get('/test', function (req, res) { 219 | res.set('content-encoding', 'br'); 220 | res.status(503).end(compressed, 'binary'); 221 | }); 222 | 223 | const expectedParams = helper.extendParams({ json: true }); 224 | const options = { uri: uri, json: true }; 225 | 226 | const promise = cloudscraper.get(options, function (error) { 227 | expect(error).to.be.instanceOf(errors.RequestError); 228 | expect(error).to.have.property('error').that.is.ok; 229 | expect(error).to.have.property('errorType', 0); 230 | 231 | assert.equal(error.response.statusCode, 503, 'status code'); 232 | 233 | assert(Buffer.isBuffer(error.response.body), 'response type'); 234 | expect(error.response.body).to.be.eql(compressed); 235 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 236 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 237 | }); 238 | }); 239 | 240 | it('should return error if challenge page failed to be parsed', function (done) { 241 | helper.router.get('/test', function (req, res) { 242 | res.sendChallenge('invalid_js_challenge.html'); 243 | }); 244 | 245 | const promise = cloudscraper.get(uri, function (error) { 246 | expect(error).to.be.instanceOf(errors.ParserError); 247 | expect(error).to.have.property('error').that.is.ok; 248 | expect(error).to.have.property('errorType', 3); 249 | 250 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 251 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 252 | }); 253 | }); 254 | 255 | it('should return error if js challenge has error during evaluation', function (done) { 256 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 257 | 258 | helper.router.get('/test', function (req, res) { 259 | // Adds a syntax error near the end of line 37 260 | res.cloudflare().status(503).send(html.replace(/\.toFixed/gm, '..toFixed')); 261 | }); 262 | 263 | const promise = cloudscraper.get(uri, function (error) { 264 | expect(error).to.be.instanceOf(errors.ParserError); 265 | expect(error).to.have.property('error').that.is.an('error'); 266 | expect(error).to.have.property('errorType', 3); 267 | expect(error.message).to.include('Challenge evaluation failed'); 268 | 269 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 270 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 271 | }); 272 | }); 273 | 274 | it('should return error if pass extraction fails', function (done) { 275 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 276 | 277 | helper.router.get('/test', function (req, res) { 278 | res.cloudflare().status(503).send(html.replace(/name="pass"/gm, '')); 279 | }); 280 | 281 | const promise = cloudscraper.get(uri, function (error) { 282 | expect(error).to.be.instanceOf(errors.ParserError); 283 | expect(error).to.have.property('error', 'Attribute (pass) value extraction failed'); 284 | expect(error).to.have.property('errorType', 3); 285 | 286 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 287 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 288 | }); 289 | }); 290 | 291 | it('should return error if challengeId extraction fails', function (done) { 292 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 293 | 294 | helper.router.get('/test', function (req, res) { 295 | res.cloudflare().status(503).send(html.replace(/name="jschl_vc"/gm, '')); 296 | }); 297 | 298 | const promise = cloudscraper.get(uri, function (error) { 299 | expect(error).to.be.instanceOf(errors.ParserError); 300 | expect(error).to.have.property('error', 'challengeId (jschl_vc) extraction failed'); 301 | expect(error).to.have.property('errorType', 3); 302 | 303 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 304 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 305 | }); 306 | }); 307 | 308 | it('should return error if challenge answer is not a number', function (done) { 309 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 310 | 311 | helper.router.get('/test', function (req, res) { 312 | res.cloudflare().status(503) 313 | .send(html.replace(/a.value.*/, 'a.value="abc" + t.length')); 314 | }); 315 | 316 | const promise = cloudscraper.get(uri, function (error) { 317 | expect(error).to.be.instanceOf(errors.ParserError); 318 | expect(error).to.have.property('error', 'Challenge answer is not a number'); 319 | expect(error).to.have.property('errorType', 3); 320 | 321 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 322 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 323 | }); 324 | }); 325 | 326 | it('should return error if it was thrown by request when solving challenge', function (done) { 327 | helper.router 328 | .get('/test', function (req, res) { 329 | res.sendChallenge('js_challenge_21_05_2015.html'); 330 | }) 331 | .get('/cdn-cgi/l/chk_jschl', function (req, res) { 332 | res.endAbruptly(); 333 | }); 334 | 335 | const promise = cloudscraper.get(uri, function (error) { 336 | // errorType 0, a connection error for example 337 | expect(error).to.be.instanceOf(errors.RequestError); 338 | expect(error.error).to.be.an('error'); 339 | expect(error).to.have.property('errorType', 0); 340 | 341 | expect(Request).to.be.calledTwice; 342 | expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); 343 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 344 | }); 345 | }); 346 | 347 | it('should properly handle a case when after a challenge another one is returned', function (done) { 348 | helper.router 349 | .get('/test', function (req, res) { 350 | res.sendChallenge('js_challenge_09_06_2016.html'); 351 | }) 352 | .get('/cdn-cgi/l/chk_jschl', function (req, res) { 353 | res.sendChallenge('captcha.html'); 354 | }); 355 | 356 | // Second call to request.get returns recaptcha 357 | const expectedParams = helper.extendParams({ 358 | uri: helper.resolve('/cdn-cgi/l/chk_jschl'), 359 | challengesToSolve: 2 360 | }); 361 | 362 | // Perform less strict matching on headers and qs to simplify this test 363 | Object.assign(expectedParams, { 364 | headers: sinon.match.object, 365 | qs: sinon.match.object 366 | }); 367 | 368 | const promise = cloudscraper.get(uri, function (error) { 369 | // errorType 1, means captcha is served 370 | expect(error).to.be.instanceOf(errors.CaptchaError); 371 | expect(error).to.have.property('error', 'captcha'); 372 | expect(error).to.have.property('errorType', 1); 373 | 374 | expect(Request).to.be.calledTwice; 375 | expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); 376 | expect(Request.secondCall).to.be.calledWithExactly(expectedParams); 377 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 378 | }); 379 | }); 380 | 381 | it('should return error if challenge page cookie extraction fails', function (done) { 382 | const html = helper.getFixture('sucuri_waf_18_08_2016.html').toString('utf8'); 383 | 384 | helper.router.get('/test', function (req, res) { 385 | // The cookie extraction codes looks for the `S` variable assignment 386 | res.cloudflare().status(503).send(html.replace(/S=/gm, 'Z=')); 387 | }); 388 | 389 | const promise = cloudscraper.get(uri, function (error) { 390 | expect(error).to.be.instanceOf(errors.ParserError); 391 | expect(error).to.have.property('error', 'Cookie code extraction failed'); 392 | expect(error).to.have.property('errorType', 3); 393 | 394 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 395 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 396 | }); 397 | }); 398 | 399 | it('should throw a TypeError if callback is not a function', function (done) { 400 | const spy = sinon.spy(function () { 401 | // request-promise always provides a callback so change requester 402 | const options = { uri: uri, requester: require('request') }; 403 | cloudscraper.get(options); 404 | }); 405 | 406 | expect(spy).to.throw(TypeError, /Expected a callback function/); 407 | done(); 408 | }); 409 | 410 | it('should throw a TypeError if requester is not a function', function (done) { 411 | const spy = sinon.spy(function () { 412 | cloudscraper.get({ requester: null }); 413 | }); 414 | 415 | expect(spy).to.throw(TypeError, /`requester` option .*function/); 416 | done(); 417 | }); 418 | 419 | it('should throw a TypeError if challengesToSolve is not a number', function (done) { 420 | const spy = sinon.spy(function () { 421 | const options = { uri: uri, challengesToSolve: 'abc' }; 422 | 423 | cloudscraper.get(options); 424 | }); 425 | 426 | expect(spy).to.throw(TypeError, /`challengesToSolve` option .*number/); 427 | done(); 428 | }); 429 | 430 | it('should throw a TypeError if cloudflareMaxTimeout is not a number', function (done) { 431 | const spy = sinon.spy(function () { 432 | const options = { uri: uri, cloudflareMaxTimeout: 'abc' }; 433 | 434 | cloudscraper.get(options, function () {}); 435 | }); 436 | 437 | expect(spy).to.throw(TypeError, /`cloudflareMaxTimeout` option .*number/); 438 | done(); 439 | }); 440 | 441 | it('should return error if cookie setting code evaluation fails', function (done) { 442 | // Change the cookie setting code so the vm will throw an error 443 | const html = helper.getFixture('sucuri_waf_18_08_2016.html').toString('utf8'); 444 | const b64 = Buffer.from('throw new Error(\'vm eval failed\');').toString('base64'); 445 | 446 | helper.router.get('/test', function (req, res) { 447 | res.cloudflare().status(503).send(html.replace(/S='([^']+)'/, 'S=\'' + b64 + '\'')); 448 | }); 449 | 450 | const promise = cloudscraper.get(uri, function (error) { 451 | expect(error).to.be.instanceOf(errors.ParserError); 452 | expect(error).to.have.property('error').that.is.an('error'); 453 | expect(error).to.have.property('errorType', 3); 454 | expect(error.message).to.include('vm eval failed'); 455 | 456 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 457 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 458 | }); 459 | }); 460 | 461 | it('should not error if Error.captureStackTrace is undefined', function () { 462 | const desc = Object.getOwnPropertyDescriptor(Error, 'captureStackTrace'); 463 | 464 | Object.defineProperty(Error, 'captureStackTrace', { 465 | configurable: true, 466 | value: undefined 467 | }); 468 | 469 | const spy = sinon.spy(function () { 470 | throw new errors.RequestError(); 471 | }); 472 | 473 | try { 474 | expect(spy).to.throw(errors.RequestError); 475 | } finally { 476 | Object.defineProperty(Error, 'captureStackTrace', desc); 477 | } 478 | }); 479 | }); 480 | -------------------------------------------------------------------------------- /test/test-headers.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const sinon = require('sinon'); 6 | const expect = require('chai').expect; 7 | 8 | describe('Headers (lib)', function () { 9 | const { getDefaultHeaders, caseless } = require('../lib/headers'); 10 | const browsers = require('../lib/browsers'); 11 | 12 | it('should export getDefaultHeaders function', function () { 13 | expect(getDefaultHeaders).to.be.a('function'); 14 | }); 15 | 16 | it('should export caseless function', function () { 17 | expect(caseless).to.be.a('function'); 18 | }); 19 | 20 | it('caseless should return an object with lowercase keys', function () { 21 | sinon.assert.match(caseless({ AbC: 'foobar' }), { abc: 'foobar' }); 22 | }); 23 | 24 | it('getDefaultHeaders should always return an object with user agent', function () { 25 | for (let i = 0; i < 100; i++) { 26 | sinon.assert.match(getDefaultHeaders(), { 'User-Agent': sinon.match.string }); 27 | } 28 | 29 | browsers.chrome.forEach(function (options) { 30 | try { 31 | expect(options['User-Agent']).to.be.an('array'); 32 | expect(options['User-Agent'].length).to.be.above(0); 33 | } catch (error) { 34 | error.message += '\n\n' + JSON.stringify(options, null, 2); 35 | throw error; 36 | } 37 | }); 38 | }); 39 | 40 | it('getDefaultHeaders should always retain insertion order', function () { 41 | for (let keys, i = 0; i < 100; i++) { 42 | keys = Object.keys(getDefaultHeaders({ Host: 'foobar' })); 43 | expect(keys[0]).to.equal('Host'); 44 | expect(keys[1]).to.equal('Connection'); 45 | } 46 | 47 | for (let keys, i = 0; i < 100; i++) { 48 | keys = Object.keys(getDefaultHeaders({ Host: 'foobar', 'N/A': null })); 49 | expect(keys[0]).to.equal('Host'); 50 | expect(keys[1]).to.equal('N/A'); 51 | expect(keys[2]).to.equal('Connection'); 52 | } 53 | }); 54 | }); 55 | -------------------------------------------------------------------------------- /test/test-rp.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable promise/always-return,promise/catch-or-return,promise/no-callback-in-promise */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const cloudscraper = require('../index'); 6 | const request = require('request-promise'); 7 | const helper = require('./helper'); 8 | 9 | const sinon = require('sinon'); 10 | const expect = require('chai').expect; 11 | 12 | describe('Cloudscraper', function () { 13 | let sandbox; 14 | let Request; 15 | let uri; 16 | 17 | const requestedPage = helper.getFixture('requested_page.html'); 18 | 19 | before(function (done) { 20 | helper.listen(function () { 21 | uri = helper.resolve('/test'); 22 | 23 | // Speed up tests 24 | cloudscraper.defaultParams.cloudflareTimeout = 1; 25 | done(); 26 | }); 27 | }); 28 | 29 | after(function () { 30 | helper.server.close(); 31 | }); 32 | 33 | beforeEach(function () { 34 | // Prepare stubbed Request 35 | sandbox = sinon.createSandbox(); 36 | Request = sandbox.spy(request, 'Request'); 37 | }); 38 | 39 | afterEach(function () { 40 | helper.reset(); 41 | sandbox.restore(); 42 | }); 43 | 44 | it('should resolve with response body', function () { 45 | helper.router.get('/test', function (req, res) { 46 | res.send(requestedPage); 47 | }); 48 | 49 | const expectedParams = helper.extendParams({ callback: undefined }); 50 | 51 | return cloudscraper.get(uri).then(function (body) { 52 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 53 | expect(body).to.be.equal(requestedPage); 54 | }); 55 | }); 56 | 57 | it('should resolve with full response', function () { 58 | helper.router.get('/test', function (req, res) { 59 | res.send(requestedPage); 60 | }); 61 | 62 | const expectedParams = helper.extendParams({ 63 | callback: undefined, 64 | resolveWithFullResponse: true 65 | }); 66 | 67 | // The method is implicitly GET 68 | delete expectedParams.method; 69 | 70 | const options = { 71 | uri: uri, 72 | resolveWithFullResponse: true 73 | }; 74 | 75 | return cloudscraper(options).then(function (response) { 76 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 77 | expect(response.body).to.be.equal(requestedPage); 78 | }); 79 | }); 80 | 81 | // The helper calls the fake request callback synchronously. This results 82 | // in the promise being rejected before we catch it in the test. 83 | // This can be noticeable if we return the promise instead of calling done. 84 | it('should define catch', function (done) { 85 | helper.router.get('/test', function (req, res) { 86 | res.endAbruptly(); 87 | }); 88 | 89 | let caught = false; 90 | 91 | cloudscraper(uri) 92 | .catch(function () { 93 | caught = true; 94 | }) 95 | .then(function () { 96 | if (caught) done(); 97 | }); 98 | }); 99 | 100 | it('should define finally', function (done) { 101 | helper.router.get('/test', function (req, res) { 102 | res.endAbruptly(); 103 | }); 104 | 105 | let caught = false; 106 | 107 | cloudscraper(uri) 108 | .then(function () { 109 | caught = true; 110 | }) 111 | .finally(function () { 112 | if (!caught) done(); 113 | }) 114 | .catch(function () {}); 115 | }); 116 | }); 117 | -------------------------------------------------------------------------------- /test/test-sandbox.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const sandbox = require('../lib/sandbox'); 6 | const expect = require('chai').expect; 7 | 8 | describe('Sandbox (lib)', function () { 9 | it('should export Context', function () { 10 | expect(sandbox.Context).to.be.a('function'); 11 | }); 12 | 13 | it('should export eval', function () { 14 | expect(sandbox.eval).to.be.a('function'); 15 | expect(sandbox.eval('0')).to.equal(0); 16 | expect(sandbox.eval('true')).to.be.true; 17 | expect(sandbox.eval('undefined')).to.equal(undefined); 18 | expect(sandbox.eval('NaN')).to.be.a('number'); 19 | expect(String(sandbox.eval('NaN'))).to.equal('NaN'); 20 | }); 21 | 22 | it('new Context() should return an object', function () { 23 | expect(new sandbox.Context()).to.be.an('object'); 24 | }); 25 | 26 | it('Context() should define atob', function () { 27 | const ctx = new sandbox.Context(); 28 | 29 | expect(ctx.atob).to.be.a('function'); 30 | expect(ctx.atob('YWJj')).to.equal('abc'); 31 | expect(sandbox.eval('atob("YWJj")', ctx)).to.equal('abc'); 32 | }); 33 | 34 | it('Context() should define location.reload', function () { 35 | const ctx = new sandbox.Context(); 36 | 37 | expect(sandbox.eval('location.reload()', ctx)).to.equal(undefined); 38 | }); 39 | 40 | it('Context() should define document.createElement', function () { 41 | let ctx = new sandbox.Context(); 42 | let pseudoElement = { firstChild: { href: 'http:///' } }; 43 | 44 | expect(sandbox.eval('document.createElement("a")', ctx)).to.eql(pseudoElement); 45 | 46 | ctx = new sandbox.Context({ hostname: 'test.com' }); 47 | pseudoElement = { firstChild: { href: 'http://test.com/' } }; 48 | 49 | expect(sandbox.eval('document.createElement("a")', ctx)).to.eql(pseudoElement); 50 | }); 51 | 52 | it('Context() should define document.geElementById', function () { 53 | let ctx = new sandbox.Context(); 54 | expect(sandbox.eval('document.getElementById()', ctx)).to.be.null; 55 | 56 | // Missing element 57 | ctx = new sandbox.Context(); 58 | expect(sandbox.eval('document.getElementById("foobar")', ctx)).to.be.null; 59 | 60 | // Double quotes 61 | ctx = new sandbox.Context({ body: '
foobar
' }); 62 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: 'foobar' }); 63 | 64 | // Single quotes 65 | ctx = new sandbox.Context({ body: '
foobar
' }); 66 | expect(sandbox.eval('document.getElementById(\'test\')', ctx)).eql({ innerHTML: 'foobar' }); 67 | 68 | // Empty 69 | ctx = new sandbox.Context({ body: '
' }); 70 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: '' }); 71 | 72 | // Space agnostic tests 73 | ctx = new sandbox.Context({ body: '
\nabc\n\n
' }); 74 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: '\nabc\n\n' }); 75 | 76 | ctx = new sandbox.Context({ body: '
abc
' }); 77 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: ' abc ' }); 78 | 79 | ctx = new sandbox.Context({ body: 'foo="bar" id=\'test\' a=b > abc <' }); 80 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: ' abc ' }); 81 | 82 | // Cache test 83 | ctx = new sandbox.Context({ body: '
foobar
' }); 84 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: 'foobar' }); 85 | }); 86 | }); 87 | -------------------------------------------------------------------------------- /test/test-timeout.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lenivene/cloudflare-scraper-nodejs/bad27a5a553ba55814b7955e559f656a0615de00/test/test-timeout.js --------------------------------------------------------------------------------