├── .eslintrc ├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── stale.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── docs ├── examples │ ├── README.md │ ├── banned.js │ ├── captcha-html.js │ ├── custom-headers-v2.js │ ├── custom-headers.js │ ├── custom-requester-v2.js │ ├── custom-requester.js │ ├── debugging.js │ ├── download-v2.js │ ├── download.js │ ├── ignore-error.js │ ├── redirects.js │ ├── session-persistence.js │ ├── solve-recaptcha-v2.js │ ├── solve-recaptcha.js │ └── unsupported-requester.js └── migration-guide.md ├── errors.d.ts ├── errors.js ├── index.d.ts ├── index.js ├── index.test-d.ts ├── lib ├── brotli.js ├── browsers.json ├── email-decode.js ├── headers.js └── sandbox.js ├── mocha.opts ├── package.json └── test ├── common.js ├── fixtures ├── access_denied.html ├── captcha.html ├── cf_recaptcha_01_12_2019.html ├── cf_recaptcha_15_04_2019.html ├── invalid_js_challenge.html ├── js_challenge_03_12_2018_1.html ├── js_challenge_03_12_2018_2.html ├── js_challenge_09_06_2016.html ├── js_challenge_10_04_2019.html ├── js_challenge_13_03_2019.html ├── js_challenge_21_03_2019.html ├── js_challenge_21_05_2015.html ├── js_challenge_28_11_2019.html ├── page_with_emails.html ├── page_with_recaptcha.html ├── requested_page.html ├── sucuri_waf_11_08_2019.html └── sucuri_waf_18_08_2016.html ├── helper.js ├── test-brotli.js ├── test-captcha.js ├── test-emails.js ├── test-errors.js ├── test-headers.js ├── test-index.js ├── test-rp.js ├── test-sandbox.js └── test-timeout.js /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "standard", 4 | "plugin:promise/recommended" 5 | ], 6 | "plugins": [ 7 | "node", 8 | "json" 9 | ], 10 | "rules": { 11 | "semi": [2, "always"], 12 | "no-trailing-spaces": [0], 13 | "no-multi-spaces": [1, { 14 | "exceptions": { 15 | "VariableDeclarator": true 16 | } 17 | }] 18 | }, 19 | "overrides": [ 20 | { 21 | "files": ["*.ts"], 22 | "parser": "@typescript-eslint/parser", 23 | "extends": [ 24 | "standard", 25 | "plugin:promise/recommended", 26 | "plugin:@typescript-eslint/recommended" 27 | ], 28 | "rules": { 29 | "semi": [2, "always"] 30 | } 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | _Please attempt to answer the following questions before submitting a new issue:_ 11 | 12 | * What version of Cloudscraper are you using? 13 | - `node -p 'require("cloudscraper/package.json").version'` 14 | * What version of Node.js are you using? (Please share the process information) 15 | - `node -p process` 16 | * When did the problem start occurring? 17 | * How often does the problem occur? 18 | * What is the URL? 19 | * Are there any similar issues? (Please share the links) 20 | 21 | - [ ] I have read the [README](https://github.com/codemanki/cloudscraper#readme). (Code [examples](https://github.com/codemanki/cloudscraper/tree/master/docs/examples#readme)) 22 | 23 | _Please share a minimal working code snippet that reproduces the problem._ 24 | Code snippet 25 | 26 | ```js 27 | INSERT CODE HERE 28 | ``` 29 | 30 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 14 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | - to fix next 10 | # Label to use when marking an issue as stale 11 | staleLabel: wontfix 12 | # Comment to post when marking an issue as stale. Set to `false` to disable 13 | markComment: > 14 | This issue has been automatically marked as stale because it has not had 15 | recent activity. It will be closed if no further activity occurs. Thank you 16 | for your contributions. 17 | # Comment to post when closing a stale issue. Set to `false` to disable 18 | closeComment: false 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # Compiled binary addons (http://nodejs.org/api/addons.html) 20 | build/Release 21 | 22 | # Dependency directory 23 | # Commenting this out is preferred by some people, see 24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 25 | node_modules 26 | 27 | # Users Environment Variables 28 | .lock-wscript 29 | 30 | test.js 31 | .nyc_output/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: node_js 4 | 5 | node_js: 6 | - node 7 | - 11 8 | - 10 9 | - 8 10 | 11 | matrix: 12 | include: 13 | - node_js: node 14 | env: BROTLI=1 15 | - node_js: 8 16 | env: BROTLI=1 17 | before_install: npm i --save-only request brotli 18 | 19 | before_install: npm i --save-only request 20 | install: npm i 21 | after_success: npm run coverage 22 | 23 | notifications: 24 | webhooks: https://www.travisbuddy.com/?insertMode=update 25 | on_success: never 26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Change Log 2 | 3 | ### 4.6.0 (12/02/2020) 4 | - Replace & in url with `&` 5 | 6 | ### 4.5.0 (03/12/2019) 7 | - [#293](https://github.com/codemanki/cloudscraper/pull/293) Update code to parse latest CF recaptcha. 8 | 9 | ### 4.4.0 (28/11/2019) 10 | - [#288](https://github.com/codemanki/cloudscraper/pull/288) Update code to parse latest CF challenge. 11 | 12 | ### 4.3.0 (28/09/2019) 13 | - [#267](https://github.com/codemanki/cloudscraper/pull/267) Typescript definitions. 14 | - [#271](https://github.com/codemanki/cloudscraper/pull/271) Fix brotli compressed JSON responses. 15 | 16 | ### 4.2.0 (24/09/2019) 17 | - [#260](https://github.com/codemanki/cloudscraper/pull/260) Update reCaptcha handling. Deprecate `captcha.url` in preference of `captcha.uri`. [Fix fallback siteKey handling](https://github.com/codemanki/cloudscraper/issues/259#issuecomment-531450844) 18 | 19 | ### 4.1.4 (24/08/2019) 20 | - [#247](https://github.com/codemanki/cloudscraper/pull/247) Optimize header checks. 21 | 22 | ### 4.1.3 (12/07/2019) 23 | - [#242](https://github.com/codemanki/cloudscraper/pull/242) Update Sucuri WAF Solving. 24 | 25 | ### 4.1.2 (23/05/2019) 26 | - [#219](https://github.com/codemanki/cloudscraper/pull/219) Remove a few problematic TLSv1.0 ciphers. 27 | 28 | ### 4.1.1 (11/05/2019) 29 | - Improve CF challenge security by nullifying VM context's prototype chain. 30 | 31 | ### v4.1.0 (02/05/2019) 32 | - Backport TLSv1.3 secure ciphers to potentially avoid getting a CAPTCHA. 33 | 34 | ### v4.0.1 (25/04/2019) 35 | - Improve documentation 36 | - Add `url` to captcha 37 | - Add more examples for reCAPTCHA handling 38 | 39 | ### v4.0.0 (22/04/2019) 40 | - Randomize `User-Agent` header with random chrome browser 41 | - Recaptcha solving support 42 | - Brotli non-mandatory support 43 | - Various code changes and improvements 44 | 45 | ### v3.9.1 (11/04/2019) 46 | - Fix for the timeout parsing 47 | 48 | ### v3.9.0 (11/04/2019) 49 | - [#193](https://github.com/codemanki/cloudscraper/pull/193) Fix bug with setTimeout match length 50 | 51 | ### v3.8.0 (11/04/2019) 52 | - [#191](https://github.com/codemanki/cloudscraper/pull/191) Update code to parse latest CF challenge 53 | 54 | ### v3.7.0 (07/04/2019) 55 | - [#182](https://github.com/codemanki/cloudscraper/pull/182) Usage examples have been added. 56 | - [#169](https://github.com/codemanki/cloudscraper/pull/169) Cloudscraper now automatically parses out timeout for a CF challenge. 57 | 58 | ### v3.6.0 (03/04/2019) 59 | - [#180](https://github.com/codemanki/cloudscraper/pull/180) Update code to parse latest CF challenge 60 | 61 | ### v3.5.0 (31/03/2019) 62 | - [#174](https://github.com/codemanki/cloudscraper/pull/174) Update code to parse latest CF challenge 63 | 64 | ### v3.4.0 (27/03/2019) 65 | - [#165](https://github.com/codemanki/cloudscraper/pull/165) Fixing CF challenge parsing, respect `Retry-After` header when CF returns `429 Too Many Requests` error. 66 | - [#163](https://github.com/codemanki/cloudscraper/pull/163) Improve the accuracy of challenge timing. Throw error immediatelly without a delay 67 | - [#159](https://github.com/codemanki/cloudscraper/pull/159) Decode emails in the page protected by CF 68 | 69 | ### v3.3.0 (22/03/2019) 70 | - [#153](https://github.com/codemanki/cloudscraper/pull/153) Update code to parse latest CF challenge 71 | 72 | ### v3.2.0 (20/03/2019) 73 | - [#149](https://github.com/codemanki/cloudscraper/pull/149) Update code to parse latest CF challenge 74 | 75 | ### v3.1.0 (14/03/2019) 76 | - [#140](https://github.com/codemanki/cloudscraper/pull/140) Update code to parse new CF challenge 77 | 78 | ### v3.0.1 (11/03/2019) 79 | - [#135](https://github.com/codemanki/cloudscraper/pull/135) Handle non-challenge response bodies 80 | - [#127](https://github.com/codemanki/cloudscraper/pull/127) Improve cloudflare detection 81 | - [#137](https://github.com/codemanki/cloudscraper/pull/137) Handle baseUrl option 82 | - Various code style improvements 83 | 84 | ### v3.0.0 (07/03/2019) 85 | - **BREAKING CHANGE**: `get/post` methods together with their signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod) 86 | - **BREAKING CHANGE**: `cloudscraper.request` method is deprecated in favour of `cloudscraper(options)` 87 | - Promise support has been added by using `request-promise` 88 | - Error object are inherited from Error and have additional properties. 89 | * `options` - The request options 90 | * `cause` - An alias for `error` 91 | * `response` - The request response 92 | - Stacktraces are available in error objects 93 | - `cloudflareTimeout` option can be defined to speed up waiting time 94 | - Challenge evaluation is done in a sandbox to avoid potential secutiry issues 95 | - Default [request methods](https://github.com/request/request#requestmethod) are available 96 | - Custom cookie jar can now be passed [#103](https://github.com/codemanki/cloudscraper/issues/102) 97 | - Proxies support [PR#101](https://github.com/codemanki/cloudscraper/pull/101) 98 | - MIT license 99 | 100 | ### v2.0.1 (02/03/2019) 101 | - Minor documentation changes 102 | 103 | ### v2.0.0 (09/12/2018) 104 | - [#2943](https://github.com/codemanki/cloudscraper/pull/66) Support recursive challenge solving. 105 | - **BREAKING CHANGE** Before this, when any error has been detected, the callback was called with an incorrect order: `callback(.., body, response);` instead of `return callback(..., response, body);` 106 | 107 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2014 Anorov 4 | Copyright (c) 2019 Oleksii Sribnyi 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🛑 THIS LIBRARY IS NO LONGER SUPPORTED AND IS DEPRECATED 🛑 2 | 3 | 4 | 5 | cloudscraper 6 | ============ 7 | 8 | Node.js library to bypass Cloudflare's anti-ddos page. 9 | 10 | [](https://github.com/Flet/semistandard) 11 | 12 | [](https://travis-ci.org/codemanki/cloudscraper) 13 | [](https://coveralls.io/r/codemanki/cloudscraper) 14 | [](https://david-dm.org/codemanki/cloudscraper) 15 | [](https://greenkeeper.io/) 16 | 17 | If the page you want to access is protected by Cloudflare, it will return special page, which expects client to support Javascript to solve challenge. 18 | 19 | This small library encapsulates logic which extracts challenge, solves it, submits and returns the request page body. 20 | 21 | You can use cloudscraper even if you are not sure if Cloudflare protection is turned on. 22 | 23 | In general, Cloudflare has 4 types of _common_ anti-bot pages: 24 | - Simple html+javascript page with challenge 25 | - Page which redirects to original site 26 | - Page with reCAPTCHA 27 | - Page with error ( your ip was banned, etc) 28 | 29 | If you notice that for some reason cloudscraper stops working, do not hesitate and get in touch with me ( by creating an issue [here](https://github.com/codemanki/cloudscraper/issues), for example), so i can update it. 30 | 31 | Install 32 | ============ 33 | ```sh 34 | npm install cloudscraper 35 | ``` 36 | 37 | Saving the `request` module as a dependency is compulsory. 38 | 39 | ```sh 40 | # Pin the request version 41 | npm install --save request 42 | ``` 43 | 44 | Support for Brotli encoded responses is enabled by default when using Node.js v10 or later. 45 | If you wish to enable support for older Node.js versions, you may install [brotli](https://npmjs.com/package/brotli). 46 | It is recommended but not required. 47 | 48 | Usage 49 | ============ 50 | Cloudscraper uses `request-promise` by default since v3. You can find the migration guide [here.](docs/migration-guide.md) 51 | 52 | ```javascript 53 | var cloudscraper = require('cloudscraper'); 54 | 55 | cloudscraper.get('https://website.com/').then(console.log, console.error); 56 | ``` 57 | 58 | or for `POST` action: 59 | 60 | ```javascript 61 | var options = { 62 | uri: 'https://website.com/', 63 | formData: { field1: 'value', field2: 2 } 64 | }; 65 | 66 | cloudscraper.post(options).then(console.log).catch(console.error); 67 | ``` 68 | 69 | *Examples live in the docs directory of the Github repo and can be found [here.](docs/examples)* 70 | 71 | A generic request can be made with `cloudscraper(options)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option: 72 | 73 | ```javascript 74 | var options = { 75 | method: 'GET', 76 | url:'http://website.com/', 77 | }; 78 | 79 | cloudscraper(options).then(console.log); 80 | ``` 81 | 82 | ## Advanced usage 83 | Cloudscraper allows you to specify your own requester, one of either `request` or `request-promise`. 84 | Cloudscraper wraps the requester and accepts the same options, so using cloudscraper is pretty much like using those two libraries. 85 | - Cloudscraper exposes [the same HTTP verb methods as request](https://github.com/request/request#requestmethod): 86 | * `cloudscraper.get(options, callback)` 87 | * `cloudscraper.post(options, callback)` 88 | * `cloudscraper(uri)` 89 | - Cloudscraper uses request-promise by default, promise chaining is done exactly the same as described in [docs](https://github.com/request/request-promise#cheat-sheet): 90 | ``` 91 | cloudscraper(options) 92 | .then(function (htmlString) { 93 | }) 94 | .catch(function (err) { 95 | }); 96 | ``` 97 | Please refer to the requester's documentation for further instructions. 98 | 99 | ## Sucuri 100 | Cloudscraper can also identify and automatically bypass [Sucuri WAF](https://sucuri.net/website-firewall/). No actions are required. 101 | 102 | ## ReCAPTCHA 103 | Cloudscraper may help you with the reCAPTCHA page. Take a look at [this example](docs/examples/solve-recaptcha.js) and an [example using promises](docs/examples/solve-recaptcha-v2.js). 104 | 105 | Cloudflare may send a reCAPTCHA depending on the negotiated TLS cipher suite and extensions. Reducing the default cipher suite to only ciphers supported by Cloudflare may mitigate the problem: https://developers.cloudflare.com/ssl/ssl-tls/cipher-suites/ 106 | 107 | Only specifying the Cloudflare preferred TLSv1.2 cipher is also an option: 108 | ```javascript 109 | var cloudscraper = require('cloudscraper').defaults({ 110 | agentOptions: { 111 | ciphers: 'ECDHE-ECDSA-AES128-GCM-SHA256' 112 | } 113 | }) 114 | ``` 115 | 116 | More information on TLS issues can be found [here](https://github.com/codemanki/cloudscraper/issues?utf8=%E2%9C%93&q=tls). 117 | 118 | ## Defaults method 119 | 120 | `cloudscraper.defaults` is a very convenient way of extending the cloudscraper requests with any of your settings. 121 | 122 | ```javascript 123 | var cloudscraper = require('cloudscraper').defaults({ 'proxy': 'http://localproxy.com' }); 124 | // Overriding headers to remove them or using uncommon headers will cause reCAPTCHA responses 125 | var headers = { /* ... */ }; 126 | var cloudscraper = require('cloudscraper').defaults({ headers: headers }); 127 | 128 | cloudscraper(options).then(console.log); 129 | ``` 130 | 131 | ## Configuration 132 | Cloudscraper exposes the following options that are required by default but might be changed. *Please note that the default values eliminate the chance of getting sent a CAPTCHA.* 133 | 134 | ```javascript 135 | var options = { 136 | uri: 'https://website', 137 | jar: requestModule.jar(), // Custom cookie jar 138 | headers: { 139 | // User agent, Cache Control and Accept headers are required 140 | // User agent is populated by a random UA. 141 | 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', 142 | 'Cache-Control': 'private', 143 | 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' 144 | }, 145 | // Cloudscraper automatically parses out timeout required by Cloudflare. 146 | // Override cloudflareTimeout to adjust it. 147 | cloudflareTimeout: 5000, 148 | // Reduce Cloudflare's timeout to cloudflareMaxTimeout if it is excessive 149 | cloudflareMaxTimeout: 30000, 150 | // followAllRedirects - follow non-GET HTTP 3xx responses as redirects 151 | followAllRedirects: true, 152 | // Support only this max challenges in row. If CF returns more, throw an error 153 | challengesToSolve: 3, 154 | // Remove Cloudflare's email protection, replace encoded email with decoded versions 155 | decodeEmails: false, 156 | // Support gzip encoded responses (Should be enabled unless using custom headers) 157 | gzip: true, 158 | // Removes a few problematic TLSv1.0 ciphers to avoid CAPTCHA 159 | agentOptions: { ciphers } 160 | }; 161 | 162 | cloudscraper(options).then(console.log); 163 | 164 | ``` 165 | You can access the default configuration with `cloudscraper.defaultParams` 166 | 167 | ## Error object 168 | Cloudscraper error object inherits from `Error` has following fields: 169 | * `name` - `RequestError`/`CaptchaError`/`CloudflareError`/`ParserError` 170 | * `options` - The request options 171 | * `cause` - An alias for `error` 172 | * `response` - The request response 173 | * `errorType` - Custom error code 174 | Where `errorType` can be following: 175 | - `0` if request to page failed due to some native reason as bad url, http connection or so. `error` in this case will be error [event](http://nodejs.org/api/http.html#http_class_http_server) 176 | - `1` Cloudflare returned CAPTCHA. Nothing to do here. Bad luck 177 | - `2` Cloudflare returned page with some inner error. `error` will be `Number` within this range `1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008`. See more [here](https://support.cloudflare.com/hc/en-us/sections/200820298-Error-Pages) 178 | - `3` this error is returned when library failed to parse and solve js challenge. `error` will be `String` with some details. :warning: :warning: __Most likely it means that Cloudflare have changed their js challenge.__ 179 | - `4` CF went into a loop and started to return challenge after challenge. If number of solved challenges is greater than `3` and another challenge is returned, throw an error 180 | 181 | Errors are descriptive. You can find a list of all known errors [here.](errors.js) 182 | 183 | 184 | Do not always rely on `error.cause` to be an error, it can be a string. 185 | 186 | Running tests 187 | ============ 188 | Clone this repo, do `npm install` and then just `npm test` 189 | 190 | ### Unknown error? Library stopped working? ### 191 | Let me know, by opening an [issue](https://github.com/codemanki/cloudscraper/issues) in this repo and I will update library asap. Please, provide url and body of page where cloudscraper failed. 192 | 193 | WAT 194 | =========== 195 | Current Cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as described in above and pass `cloudflareTimeout` options with your value. But be aware that Cloudflare might track this timeout and use it against you ;) 196 | 197 | ## TODO 198 | - [x] Check for reCAPTCHA 199 | - [x] Support cookies, so challenge can be solved once per session 200 | - [x] Support page with simple redirects 201 | - [x] Add proper testing 202 | - [x] Remove manual 302 processing, replace with `followAllRedirects` param 203 | - [x] Parse out the timeout from challenge page 204 | - [x] Reorder the arguments in get/post/request methods and allow custom options to be passed in 205 | - [x] Support reCAPTCHA solving 206 | - [x] Promisification 207 | 208 | ## Kudos to contributors 209 | - [Dwayne](https://github.com/pro-src) 210 | - [drdokk](https://github.com/drdokk) 211 | - [Cole Faust](https://github.com/Colecf) 212 | - [Jeongbong Seo](https://github.com/jngbng) 213 | - [Mike van Rossum](https://github.com/askmike) 214 | - [Santiago Castro](https://github.com/bryant1410) 215 | - [Leonardo Gatica](https://github.com/lgaticaq) 216 | - [Michael](https://github.com/roflmuffin) 217 | - [Kamikadze4GAME](https://github.com/Kamikadze4GAME) 218 | - [Anorov](https://github.com/Anorov) :star: 219 | 220 | In the beginning cloudscraper was a port of python module [cloudflare-scrape](https://github.com/Anorov/cloudflare-scrape). Thank you [Anorov](https://github.com/Anorov) for an inspiration. 221 | 222 | ## Dependencies 223 | * [request-promise](https://github.com/request/request-promise) 224 | -------------------------------------------------------------------------------- /docs/examples/README.md: -------------------------------------------------------------------------------- 1 | Examples 2 | --- 3 | 4 | ***Not all of the examples are meant to work without modification.*** 5 | 6 | The version suffix is only meant to indicate an alternate version 7 | of the same example. e.g. `custom-headers-v2.js` is meant to be used 8 | with the latest version of Cloudscraper, ***not*** Cloudscraper v2.0.0. 9 | 10 | If you've noticed for some reason that an example is outdated and/or misleading, please send a PR to correct it. 11 | In fact, updating or adding examples is a great way to contribute! 12 | If you don't have the time to send a PR, please consider opening an issue instead. 13 | -------------------------------------------------------------------------------- /docs/examples/banned.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* eslint-disable yoda */ 3 | 4 | // https://github.com/codemanki/cloudscraper/issues/155 5 | 6 | var cloudscraper = require('../..'); 7 | var CloudflareError = require('../../errors').CloudflareError; 8 | 9 | var uri = process.argv[2]; 10 | 11 | cloudscraper.get(uri) 12 | .catch(function (error) { 13 | if (error instanceof CloudflareError) { 14 | if (!isNaN(error.cause)) { 15 | if (1004 < error.cause && error.cause < 1009) { 16 | return cloudscraper.get({ uri: uri, proxy: 'http://example-proxy.com' }); 17 | } 18 | } 19 | } 20 | 21 | throw error; 22 | }) 23 | .then(console.log) 24 | .catch(console.error); 25 | -------------------------------------------------------------------------------- /docs/examples/captcha-html.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | var CaptchaError = require('../../errors').CaptchaError; 5 | 6 | var uri = process.argv[2]; 7 | 8 | cloudscraper.get(uri).catch(function (error) { 9 | if (error instanceof CaptchaError) { 10 | console.log(error.response.body.toString('utf8')); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/examples/custom-headers-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..'); 4 | 5 | cloudscraper.defaultParams.headers = { 6 | Connection: 'keep-alive', 7 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36', 8 | Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 9 | 'Accept-Encoding': 'gzip, deflate', 10 | 'Accept-Language': 'en-US,en;q=0.9' 11 | }; 12 | 13 | var uri = process.argv[2]; 14 | 15 | cloudscraper.get({ gzip: true, uri: uri }).then(console.log).catch(console.error); 16 | -------------------------------------------------------------------------------- /docs/examples/custom-headers.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ 4 | headers: { 5 | Connection: 'keep-alive', 6 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36', 7 | Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 8 | 'Accept-Language': 'en-US,en;q=0.9' 9 | } 10 | }); 11 | 12 | var uri = process.argv[2]; 13 | 14 | cloudscraper.get(uri, function (error, response, body) { 15 | if (error) { 16 | throw error; 17 | } 18 | 19 | console.log(body); 20 | }); 21 | -------------------------------------------------------------------------------- /docs/examples/custom-requester-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var requester = require('request-promise'); 4 | var cloudscraper = require('../..').defaults({ requester: requester }); 5 | var uri = process.argv[2]; 6 | 7 | cloudscraper.get(uri).then(console.log).catch(console.error); 8 | -------------------------------------------------------------------------------- /docs/examples/custom-requester.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var requester = require('request'); 4 | var cloudscraper = require('../..').defaults({ requester: requester }); 5 | var uri = process.argv[2]; 6 | 7 | cloudscraper.get(uri, function (error, response, body) { 8 | if (error) { 9 | throw error; 10 | } 11 | 12 | console.log(body); 13 | }); 14 | -------------------------------------------------------------------------------- /docs/examples/debugging.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | var fs = require('fs'); 5 | 6 | var uri = process.argv[2]; 7 | 8 | cloudscraper.debug = true; 9 | cloudscraper.get(uri).then(onResponse).catch(onError); 10 | 11 | function onResponse (response) { 12 | var request = JSON.stringify(response.request.toJSON(), null, 2); 13 | var headers = JSON.stringify(response.headers, null, 2); 14 | 15 | fs.writeFileSync('./request.json', request, 'utf8'); 16 | fs.writeFileSync('./headers.json', headers, 'utf8'); 17 | fs.writeFileSync('./body.html', response.body, 'utf8'); 18 | } 19 | 20 | function onError (error) { 21 | console.error(error.stack); 22 | 23 | fs.writeFileSync('./error.txt', error.stack, 'utf8'); 24 | 25 | if (error.cause) { 26 | console.log('Cause: ', error.cause); 27 | } 28 | 29 | if (error.response) { 30 | onResponse(error.response); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /docs/examples/download-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* eslint-disable promise/always-return */ 3 | 4 | var cloudscraper = require('../..'); 5 | var fs = require('fs'); 6 | 7 | cloudscraper.get({ uri: 'https://subscene.com/content/images/logo.gif', encoding: null }) 8 | .then(function (bufferAsBody) { 9 | fs.writeFileSync('./test.gif', bufferAsBody); 10 | }) 11 | .catch(console.error); 12 | -------------------------------------------------------------------------------- /docs/examples/download.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | var fs = require('fs'); 5 | 6 | var uri = process.argv[2]; 7 | 8 | cloudscraper.get({ uri: uri, encoding: null }).then(saveFile).catch(console.error); 9 | 10 | function saveFile (response) { 11 | var filename = process.argv[3]; 12 | 13 | if (!filename) { 14 | var header = response.caseless.get('content-disposition'); 15 | var match = ('' + header).match(/filename=(['"]?)(.*?)\1/i); 16 | 17 | filename = match !== null ? match[2] : 'example.bin'; 18 | } 19 | 20 | fs.writeFileSync(filename, response.body); 21 | } 22 | -------------------------------------------------------------------------------- /docs/examples/ignore-error.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const cloudscraper = require('../..').defaults({ resolveWithFullResponse: true }); 4 | 5 | var uri = process.argv[2]; 6 | // Cloudscraper thinks this server's response is a Cloudflare response 7 | var server = 'cloudflare-april-fools'; 8 | 9 | getHeaders(uri).then(console.log).catch(console.error); 10 | 11 | function getHeaders (uri) { 12 | return cloudscraper.head(uri) 13 | .catch(error => { 14 | if (error.errorType === 2 && server === error.response.headers.Server) { 15 | // Ignoring the error and returning the response 16 | return error.response; 17 | } 18 | 19 | throw error; 20 | }) 21 | .then(response => response.headers); 22 | } 23 | -------------------------------------------------------------------------------- /docs/examples/redirects.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var cloudscraper = require('../..').defaults({ followAllRedirects: false, maxRedirects: 3 }); 4 | var uri = process.argv[2]; 5 | 6 | cloudscraper.get({ simple: false, uri: uri }).then(console.log).catch(console.error); 7 | -------------------------------------------------------------------------------- /docs/examples/session-persistence.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // https://github.com/codemanki/cloudscraper/issues/246 4 | 5 | var cloudscraper = require('../..'); 6 | 7 | // npm install --save tough-cookie-file-store 8 | var CookieStore = require('tough-cookie-file-store'); 9 | var jar = cloudscraper.jar(new CookieStore('./cookie.json')); 10 | 11 | /* 12 | // It's recommended to reuse the same headers. 13 | var fs = require('fs'); 14 | var headers = cloudscraper.defaultParams.headers; 15 | fs.writeFileSync('./headers.json', JSON.stringify(headers), 'utf-8'); 16 | */ 17 | 18 | var uri = process.argv[2]; 19 | 20 | cloudscraper = cloudscraper.defaults({ jar, headers: require('./headers') }); 21 | cloudscraper.get(uri).then(console.log).catch(console.error); 22 | -------------------------------------------------------------------------------- /docs/examples/solve-recaptcha-v2.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // Force a CAPTCHA response by sending bogus headers 4 | const headers = { /* headers without user-agent, etc. */ }; 5 | const cloudscraper = require('../..').defaults({ onCaptcha: handler, headers }); 6 | 7 | // Pseudo function that returns a promise instead of calling captcha.submit() 8 | function handler (options, { captcha }) { 9 | return new Promise((resolve, reject) => { 10 | // Here you do some magic with the siteKey provided by cloudscraper 11 | console.error('The url is "' + captcha.uri.href + '"'); 12 | console.error('The site key is "' + captcha.siteKey + '"'); 13 | // captcha.form['g-recaptcha-response'] = /* Obtain from your service */ 14 | reject(new Error('This is a dummy function.')); 15 | }); 16 | } 17 | 18 | // An example handler with destructuring arguments 19 | function alternative (options, { captcha: { uri, siteKey } }) { 20 | // Here you do some magic with the siteKey provided by cloudscraper 21 | console.error('The url is "' + uri.href + '"'); 22 | console.error('The site key is "' + siteKey + '"'); 23 | return Promise.reject(new Error('This is a dummy function')); 24 | } 25 | 26 | const uri = process.argv[2]; 27 | cloudscraper.get({ uri, onCaptcha: alternative }).then(console.log).catch(console.warn); 28 | -------------------------------------------------------------------------------- /docs/examples/solve-recaptcha.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | function solveReCAPTCHA (url, sitekey, callback) { 4 | // Here you do some magic with the sitekey provided by cloudscraper 5 | } 6 | 7 | function onCaptcha (options, response, body) { 8 | const captcha = response.captcha; 9 | // solveReCAPTCHA is a method that you should come up with and pass it href and sitekey, in return it will return you a reponse 10 | solveReCAPTCHA(captcha.uri.href, captcha.siteKey, (error, gRes) => { 11 | // eslint-disable-next-line no-void 12 | if (error) return void captcha.submit(error); 13 | captcha.form['g-recaptcha-response'] = gRes; 14 | captcha.submit(); 15 | }); 16 | } 17 | 18 | const cloudscraper = require('../..').defaults({ onCaptcha }); 19 | var uri = process.argv[2]; 20 | cloudscraper.get({ uri: uri, headers: { cookie: 'captcha=1' } }).catch(console.warn).then(console.log); // eslint-disable-line promise/catch-or-return 21 | -------------------------------------------------------------------------------- /docs/examples/unsupported-requester.js: -------------------------------------------------------------------------------- 1 | import { EventEmitter } from 'events'; 2 | import { URL } from 'url'; 3 | // `npm i --save caseless` although it's available if `request` is installed 4 | import caseless from 'caseless'; 5 | 6 | export default function (options) { 7 | return new Request(options); 8 | }; 9 | 10 | // All of the properties that are defined in this class are required. 11 | class Request extends EventEmitter { 12 | constructor (options) { 13 | super(); 14 | const self = this; 15 | 16 | self.uri = typeof options.uri === 'string' 17 | ? new URL(options.uri) : options.uri; 18 | 19 | // Use options.headers instead of `this.headers` if serializing 20 | self.headers = caseless(options.headers); 21 | 22 | // Cloudscraper will only call `request.callback` for the very last request 23 | self.callback = options.callback; 24 | 25 | // The actual request should be performed at this point. 26 | // Pseudo error event 27 | const error = null; 28 | if (error) { 29 | self.emit('error', new Error('Request error')); 30 | } 31 | 32 | // Pseudo response arguments 33 | const body = Buffer.from('Response content', 'utf-8'); 34 | const status = 200; 35 | const headers = { 36 | // Response headers 37 | }; 38 | 39 | // Create a response object that `request` normally provides 40 | const response = new Response(headers, status, body); 41 | response.request = self; 42 | 43 | // Advanced, update the cookie jar, use `tough-cookie` if needed 44 | if (response.caseless.has('set-cookie')) { 45 | options.jar.setCookie( 46 | response.caseless['set-cookie'], 47 | self.uri.href, 48 | { ignoreError: true } 49 | ); 50 | } 51 | 52 | // Emit the complete event 53 | setImmediate(() => self.emit('complete', response, response.body)); 54 | } 55 | 56 | getHeader (name) { 57 | return this.headers.get(name); 58 | } 59 | 60 | setHeader (name, value) { 61 | this.headers.set(name, value); 62 | } 63 | } 64 | 65 | // All of the properties that are defined in this class are required. 66 | class Response { 67 | constructor (headers, statusCode, body) { 68 | this.headers = headers; 69 | this.caseless = caseless(headers); 70 | this.statusCode = statusCode; 71 | this.body = body; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /docs/migration-guide.md: -------------------------------------------------------------------------------- 1 | Migration from v2 to v3 2 | ============ 3 | - Replace `cloudscraper.request(options)` with `cloudscraper(options)` 4 | - `cloudscraper.get()` and `cloudscraper.post()` method signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod): 5 | ``` 6 | var options = { 7 | uri: 'https://website.com/', 8 | headers: {/*...*/} 9 | }; 10 | 11 | cloudscraper.get(options, function(error, response, body) { 12 | console.log(body); 13 | }); 14 | ``` 15 | or for **POST** 16 | ``` 17 | var options = { 18 | uri: 'https://website.com/', 19 | headers: {/*...*/}, 20 | formData: { field1: 'value', field2: 2 } 21 | }; 22 | 23 | cloudscraper.post(options, function(error, response, body) { 24 | console.log(body); 25 | }); 26 | ``` 27 | - If you are using custom promise support workarounds please remove them as cloudscraper now uses [request-promise](https://github.com/request/request-promise): 28 | 29 | ``` 30 | var cloudscraper = require('cloudscraper'); 31 | var options = { 32 | uri: 'https://website.com/', 33 | method: 'GET' 34 | }; 35 | 36 | cloudscraper(options).then(function(body) { 37 | console.log(body); 38 | }); 39 | ``` 40 | -------------------------------------------------------------------------------- /errors.d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import * as rp from 'request-promise/errors'; 3 | import cloudscraper = require('.'); 4 | import http = require('http'); 5 | 6 | export interface RequestError extends rp.RequestError { 7 | options: cloudscraper.Options; 8 | errorType: 0; 9 | } 10 | 11 | export interface RequestErrorConstructor extends Error { 12 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 13 | 14 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 15 | 16 | prototype: RequestError; 17 | } 18 | 19 | export const RequestError: RequestErrorConstructor; 20 | 21 | export interface CaptchaError extends rp.RequestError { 22 | options: cloudscraper.Options; 23 | errorType: 1; 24 | } 25 | 26 | export interface CaptchaErrorConstructor extends Error { 27 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 28 | 29 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 30 | 31 | prototype: CaptchaError; 32 | } 33 | 34 | export const CaptchaError: CaptchaErrorConstructor; 35 | 36 | export interface CloudflareError extends rp.RequestError { 37 | options: cloudscraper.Options; 38 | errorType: 2 | 4; 39 | } 40 | 41 | export interface CloudflareErrorConstructor extends Error { 42 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 43 | 44 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 45 | 46 | prototype: CloudflareError; 47 | } 48 | 49 | export const CloudflareError: CloudflareErrorConstructor; 50 | 51 | export interface ParserError extends rp.RequestError { 52 | options: cloudscraper.Options; 53 | errorType: 3; 54 | } 55 | 56 | export interface ParserErrorConstructor extends Error { 57 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 58 | 59 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): RequestError; 60 | 61 | prototype: ParserError; 62 | } 63 | 64 | export const ParserError: ParserErrorConstructor; 65 | 66 | export interface StatusCodeError extends rp.RequestError { 67 | options: cloudscraper.Options; 68 | statusCode: number; 69 | errorType: 5; 70 | } 71 | 72 | export interface StatusCodeErrorConstructor extends Error { 73 | new(statusCode: number, body: any, options: cloudscraper.Options, response: http.IncomingMessage): StatusCodeError; 74 | 75 | (statusCode: number, body: any, options: cloudscraper.Options, response: http.IncomingMessage): StatusCodeError; 76 | 77 | prototype: StatusCodeError; 78 | } 79 | 80 | export const StatusCodeError: StatusCodeErrorConstructor; 81 | 82 | export interface TransformError extends rp.RequestError { 83 | options: cloudscraper.Options; 84 | errorType: 6; 85 | } 86 | 87 | export interface TransformErrorConstructor extends Error { 88 | new(cause: any, options: cloudscraper.Options, response: http.IncomingMessage): TransformError; 89 | 90 | (cause: any, options: cloudscraper.Options, response: http.IncomingMessage): TransformError; 91 | 92 | prototype: TransformError; 93 | } 94 | 95 | export const TransformError: TransformErrorConstructor; 96 | -------------------------------------------------------------------------------- /errors.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // The purpose of this library: 4 | // 1. Have errors consistent with request/promise-core 5 | // 2. Prevent request/promise core from wrapping our errors 6 | // 3. Create descriptive errors. 7 | 8 | // There are two differences between these errors and the originals. 9 | // 1. There is a non-enumerable errorType attribute. 10 | // 2. The error constructor is hidden from the stacktrace. 11 | 12 | const EOL = require('os').EOL; 13 | const original = require('request-promise-core/errors'); 14 | const http = require('http'); 15 | 16 | const BUG_REPORT = format([ 17 | '### Cloudflare may have changed their technique, or there may be a bug.', 18 | '### Bug Reports: https://github.com/codemanki/cloudscraper/issues', 19 | '### Check the detailed exception message that follows for the cause.' 20 | ]); 21 | 22 | const ERROR_CODES = { 23 | // Non-standard 5xx server error HTTP status codes 24 | 520: 'Web server is returning an unknown error', 25 | 521: 'Web server is down', 26 | 522: 'Connection timed out', 27 | 523: 'Origin is unreachable', 28 | 524: 'A timeout occurred', 29 | 525: 'SSL handshake failed', 30 | 526: 'Invalid SSL certificate', 31 | 527: 'Railgun Listener to Origin Error', 32 | 530: 'Origin DNS error', 33 | // Other codes 34 | 1000: 'DNS points to prohibited IP', 35 | 1001: 'DNS resolution error', 36 | 1002: 'Restricted or DNS points to Prohibited IP', 37 | 1003: 'Access Denied: Direct IP Access Not Allowed', 38 | 1004: 'Host Not Configured to Serve Web Traffic', 39 | 1005: 'Access Denied: IP of banned ASN/ISP', 40 | 1010: 'The owner of this website has banned your access based on your browser\'s signature', 41 | 1011: 'Access Denied (Hotlinking Denied)', 42 | 1012: 'Access Denied', 43 | 1013: 'HTTP hostname and TLS SNI hostname mismatch', 44 | 1016: 'Origin DNS error', 45 | 1018: 'Domain is misconfigured', 46 | 1020: 'Access Denied (Custom Firewall Rules)' 47 | }; 48 | 49 | ERROR_CODES[1006] = 50 | ERROR_CODES[1007] = 51 | ERROR_CODES[1008] = 'Access Denied: Your IP address has been banned'; 52 | 53 | const OriginalError = original.RequestError; 54 | 55 | const RequestError = create('RequestError', 0); 56 | const CaptchaError = create('CaptchaError', 1); 57 | 58 | // errorType 4 is a CloudflareError so this constructor is reused. 59 | const CloudflareError = create('CloudflareError', 2, function (error) { 60 | if (!isNaN(error.cause)) { 61 | const description = ERROR_CODES[error.cause] || http.STATUS_CODES[error.cause]; 62 | if (description) { 63 | error.message = error.cause + ', ' + description; 64 | } 65 | } 66 | }); 67 | 68 | const ParserError = create('ParserError', 3, function (error) { 69 | error.message = BUG_REPORT + error.message; 70 | }); 71 | 72 | // The following errors originate from promise-core and it's dependents. 73 | // Give them an errorType for consistency. 74 | original.StatusCodeError.prototype.errorType = 5; 75 | original.TransformError.prototype.errorType = 6; 76 | 77 | // This replaces the RequestError for all libraries using request/promise-core 78 | // and prevents silent failure. 79 | Object.defineProperty(original, 'RequestError', { 80 | configurable: true, 81 | enumerable: true, 82 | writable: true, 83 | value: RequestError 84 | }); 85 | 86 | // Export our custom errors along with StatusCodeError, etc. 87 | Object.assign(module.exports, original, { 88 | RequestError: RequestError, 89 | CaptchaError: CaptchaError, 90 | ParserError: ParserError, 91 | CloudflareError: CloudflareError 92 | }); 93 | 94 | const desc = { configurable: true, writable: true, enumerable: false }; 95 | const descriptors = { 96 | error: desc, 97 | cause: desc, 98 | response: desc, 99 | options: desc 100 | }; 101 | 102 | function create (name, errorType, customize) { 103 | function CustomError (cause, options, response) { 104 | // This prevents nasty things e.g. `error.cause.error` and 105 | // is why replacing the original RequestError is necessary. 106 | if (cause instanceof OriginalError) { 107 | return cause; 108 | } 109 | 110 | // Cleanup error output 111 | Object.defineProperties(this, descriptors); 112 | 113 | OriginalError.apply(this, arguments); 114 | 115 | // Change the name to match this constructor 116 | this.name = name; 117 | 118 | if (typeof customize === 'function') { 119 | customize(this); 120 | } 121 | 122 | if (Error.captureStackTrace) { // required for non-V8 environments 123 | // Provide a proper stack trace that hides this constructor 124 | Error.captureStackTrace(this, CustomError); 125 | } 126 | } 127 | 128 | CustomError.prototype = Object.create(OriginalError.prototype); 129 | CustomError.prototype.constructor = CustomError; 130 | // Keeps things stealthy by defining errorType on the prototype. 131 | // This makes it non-enumerable and safer to add. 132 | CustomError.prototype.errorType = errorType; 133 | 134 | Object.setPrototypeOf(CustomError, Object.getPrototypeOf(OriginalError)); 135 | Object.defineProperty(CustomError, 'name', { 136 | configurable: true, 137 | value: name 138 | }); 139 | 140 | return CustomError; 141 | } 142 | 143 | function format (lines) { 144 | return EOL + lines.join(EOL) + EOL + EOL; 145 | } 146 | -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | import { URL } from 'url'; 2 | import http = require('http'); 3 | import https = require('https'); 4 | import Promise = require('bluebird'); 5 | import request = require('request'); 6 | import rp = require('request-promise'); 7 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 8 | import errors = require('./errors'); 9 | 10 | declare namespace cloudscraper { 11 | interface Cloudscraper extends rp.RequestPromise, BaseOptions { 12 | cloudflareTimeout?: number; 13 | realEncoding: string | null; 14 | // Identify this request as a Cloudscraper request 15 | cloudscraper: boolean; 16 | } 17 | 18 | interface Captcha { 19 | submit(error?: Error): void; 20 | 21 | url: string; // <- deprecated 22 | siteKey: string; 23 | uri: URL; 24 | form: { 25 | [key: string]: string; 26 | // Secret form value 27 | s: string; 28 | }; 29 | } 30 | 31 | interface Response extends request.Response { 32 | isCloudflare?: boolean; 33 | isHTML?: boolean; 34 | isCaptcha?: boolean; 35 | 36 | // JS Challenge 37 | challenge?: string; 38 | } 39 | 40 | interface CaptchaResponse extends Response { 41 | captcha: Captcha; 42 | isCaptcha: true; 43 | } 44 | 45 | type Requester = 46 | rp.RequestPromiseAPI 47 | | request.RequestAPI; 48 | 49 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 50 | type CaptchaHandler = (options: Options, response: CaptchaResponse, body?: any) => Promise | void; 51 | 52 | interface BaseOptions { 53 | // The default export of either request or request-promise 54 | requester?: Requester; 55 | // Reduce Cloudflare's timeout to cloudflareMaxTimeout if it is excessive 56 | cloudflareMaxTimeout?: number; 57 | // Support only this max challenges in row. If CF returns more, throw an error 58 | challengesToSolve?: number; 59 | // Remove Cloudflare's email protection 60 | decodeEmails?: boolean; 61 | 62 | onCaptcha?: CaptchaHandler; 63 | } 64 | 65 | interface DefaultOptions extends Required, rp.RequestPromiseOptions { 66 | // Override the parsed timeout 67 | cloudflareTimeout?: number; 68 | agentOptions?: (http.AgentOptions | https.AgentOptions) & { 69 | ciphers?: string; 70 | }; 71 | } 72 | 73 | interface CoreOptions extends BaseOptions, rp.RequestPromiseOptions { 74 | cloudflareTimeout?: number; 75 | realEncoding?: string | null; 76 | } 77 | 78 | interface CloudscraperAPI extends request.RequestAPI { 79 | defaultParams: DefaultOptions; 80 | (options: OptionsWithUrl): Promise; 81 | } 82 | 83 | type OptionsWithUri = request.UriOptions & CoreOptions; 84 | type OptionsWithUrl = request.UrlOptions & CoreOptions; 85 | type Options = OptionsWithUri | OptionsWithUrl; 86 | } 87 | 88 | // eslint-disable-next-line no-redeclare 89 | declare const cloudscraper: cloudscraper.CloudscraperAPI; 90 | export = cloudscraper; 91 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const requestModule = require('request-promise'); 4 | const sandbox = require('./lib/sandbox'); 5 | const decodeEmails = require('./lib/email-decode.js'); 6 | const { getDefaultHeaders, caseless } = require('./lib/headers'); 7 | const brotli = require('./lib/brotli'); 8 | const crypto = require('crypto'); 9 | const { deprecate } = require('util'); 10 | 11 | const { 12 | RequestError, 13 | CaptchaError, 14 | CloudflareError, 15 | ParserError 16 | } = require('./errors'); 17 | 18 | let debugging = false; 19 | 20 | const HOST = Symbol('host'); 21 | 22 | module.exports = defaults.call(requestModule); 23 | 24 | function defaults (params) { 25 | // isCloudScraper === !isRequestModule 26 | const isRequestModule = this === requestModule; 27 | 28 | let defaultParams = (!isRequestModule && this.defaultParams) || { 29 | requester: requestModule, 30 | // Cookies should be enabled 31 | jar: requestModule.jar(), 32 | headers: getDefaultHeaders({ Host: HOST }), 33 | // Reduce Cloudflare's timeout to cloudflareMaxTimeout if it is excessive 34 | cloudflareMaxTimeout: 30000, 35 | // followAllRedirects - follow non-GET HTTP 3xx responses as redirects 36 | followAllRedirects: true, 37 | // Support only this max challenges in row. If CF returns more, throw an error 38 | challengesToSolve: 3, 39 | // Remove Cloudflare's email protection 40 | decodeEmails: false, 41 | // Support gzip encoded responses 42 | gzip: true, 43 | agentOptions: { 44 | // Removes a few problematic TLSv1.0 ciphers to avoid CAPTCHA 45 | ciphers: crypto.constants.defaultCipherList + ':!ECDHE+SHA:!AES128-SHA' 46 | } 47 | }; 48 | 49 | // Object.assign requires at least nodejs v4, request only test/supports v6+ 50 | defaultParams = Object.assign({}, defaultParams, params); 51 | 52 | const cloudscraper = requestModule.defaults 53 | .call(this, defaultParams, function (options) { 54 | validateRequest(options); 55 | return performRequest(options, true); 56 | }); 57 | 58 | // There's no safety net here, any changes apply to all future requests 59 | // that are made with this instance and derived instances. 60 | cloudscraper.defaultParams = defaultParams; 61 | 62 | // Ensure this instance gets a copy of our custom defaults function 63 | // and afterwards, it will be copied over automatically. 64 | if (isRequestModule) { 65 | cloudscraper.defaults = defaults; 66 | } 67 | 68 | // Expose the debug option 69 | Object.defineProperty(cloudscraper, 'debug', { 70 | configurable: true, 71 | enumerable: true, 72 | set (value) { 73 | requestModule.debug = debugging = true; 74 | }, 75 | get () { 76 | return debugging; 77 | } 78 | }); 79 | 80 | return cloudscraper; 81 | } 82 | 83 | function validateRequest (options) { 84 | // Prevent overwriting realEncoding in subsequent calls 85 | if (!('realEncoding' in options)) { 86 | // Can't just do the normal options.encoding || 'utf8' 87 | // because null is a valid encoding. 88 | if ('encoding' in options) { 89 | options.realEncoding = options.encoding; 90 | } else { 91 | options.realEncoding = 'utf8'; 92 | } 93 | } 94 | 95 | options.encoding = null; 96 | 97 | if (isNaN(options.challengesToSolve)) { 98 | throw new TypeError('Expected `challengesToSolve` option to be a number, ' + 99 | 'got ' + typeof (options.challengesToSolve) + ' instead.'); 100 | } 101 | 102 | if (isNaN(options.cloudflareMaxTimeout)) { 103 | throw new TypeError('Expected `cloudflareMaxTimeout` option to be a number, ' + 104 | 'got ' + typeof (options.cloudflareMaxTimeout) + ' instead.'); 105 | } 106 | 107 | if (typeof options.requester !== 'function') { 108 | throw new TypeError('Expected `requester` option to be a function, got ' + 109 | typeof (options.requester) + ' instead.'); 110 | } 111 | } 112 | 113 | // This function is wrapped to ensure that we get new options on first call. 114 | // The options object is reused in subsequent calls when calling it directly. 115 | function performRequest (options, isFirstRequest) { 116 | // This should be the default export of either request or request-promise. 117 | const requester = options.requester; 118 | 119 | // Note that request is always an instanceof ReadableStream, EventEmitter 120 | // If the requester is request-promise, it is also thenable. 121 | const request = requester(options); 122 | 123 | // We must define the host header ourselves to preserve case and order. 124 | if (request.getHeader('host') === HOST) { 125 | request.setHeader('host', request.uri.host); 126 | } 127 | 128 | // If the requester is not request-promise, ensure we get a callback. 129 | if (typeof request.callback !== 'function') { 130 | throw new TypeError('Expected a callback function, got ' + 131 | typeof (request.callback) + ' instead.'); 132 | } 133 | 134 | // We only need the callback from the first request. 135 | // The other callbacks can be safely ignored. 136 | if (isFirstRequest) { 137 | // This should be a user supplied callback or request-promise's callback. 138 | // The callback is always wrapped/bound to the request instance. 139 | options.callback = request.callback; 140 | } 141 | 142 | request.removeAllListeners('error') 143 | .once('error', function (error) { 144 | onRequestResponse(options, error); 145 | }); 146 | 147 | request.removeAllListeners('complete') 148 | .once('complete', function (response, body) { 149 | onRequestResponse(options, null, response, body); 150 | }); 151 | 152 | // Indicate that this is a cloudscraper request 153 | request.cloudscraper = true; 154 | return request; 155 | } 156 | 157 | // The argument convention is options first where possible, options 158 | // always before response, and body always after response. 159 | function onRequestResponse (options, error, response, body) { 160 | const callback = options.callback; 161 | 162 | // Encoding is null so body should be a buffer object 163 | if (error || !body || !body.toString) { 164 | // Pure request error (bad connection, wrong url, etc) 165 | return callback(new RequestError(error, options, response)); 166 | } 167 | 168 | const headers = caseless(response.headers); 169 | 170 | response.responseStartTime = Date.now(); 171 | response.isCloudflare = /^(cloudflare|sucuri)/i.test('' + headers.server); 172 | response.isHTML = /text\/html/i.test('' + headers['content-type']); 173 | 174 | // If body isn't a buffer, this is a custom response body. 175 | if (!Buffer.isBuffer(body)) { 176 | return callback(null, response, body); 177 | } 178 | 179 | // Decompress brotli compressed responses 180 | if (/\bbr\b/i.test('' + headers['content-encoding'])) { 181 | if (!brotli.isAvailable) { 182 | const cause = 'Received a Brotli compressed response. Please install brotli'; 183 | return callback(new RequestError(cause, options, response)); 184 | } 185 | 186 | try { 187 | response.body = body = brotli.decompress(body); 188 | } catch (error) { 189 | return callback(new RequestError(error, options, response)); 190 | } 191 | 192 | // Request doesn't handle brotli and would've failed to parse JSON. 193 | if (options.json) { 194 | try { 195 | response.body = body = JSON.parse(body, response.request._jsonReviver); 196 | // If successful, this isn't a challenge. 197 | return callback(null, response, body); 198 | } catch (error) { 199 | // Request's debug will log the failure, no need to duplicate. 200 | } 201 | } 202 | } 203 | 204 | if (response.isCloudflare && response.isHTML) { 205 | onCloudflareResponse(options, response, body); 206 | } else { 207 | onRequestComplete(options, response, body); 208 | } 209 | } 210 | 211 | function onCloudflareResponse (options, response, body) { 212 | const callback = options.callback; 213 | 214 | if (body.length < 1) { 215 | // This is a 4xx-5xx Cloudflare response with an empty body. 216 | return callback(new CloudflareError(response.statusCode, options, response)); 217 | } 218 | 219 | const stringBody = body.toString('utf8'); 220 | 221 | try { 222 | validateResponse(options, response, stringBody); 223 | } catch (error) { 224 | if (error instanceof CaptchaError && typeof options.onCaptcha === 'function') { 225 | // Give users a chance to solve the reCAPTCHA via services such as anti-captcha.com 226 | return onCaptcha(options, response, stringBody); 227 | } 228 | 229 | return callback(error); 230 | } 231 | 232 | const isChallenge = stringBody.indexOf('a = document.getElementById(\'jschl-answer\');') !== -1; 233 | 234 | if (isChallenge) { 235 | return onChallenge(options, response, stringBody); 236 | } 237 | 238 | const isRedirectChallenge = stringBody.indexOf('You are being redirected') !== -1 || 239 | stringBody.indexOf('sucuri_cloudproxy_js') !== -1; 240 | 241 | if (isRedirectChallenge) { 242 | return onRedirectChallenge(options, response, stringBody); 243 | } 244 | 245 | // 503 status is always a challenge 246 | if (response.statusCode === 503) { 247 | return onChallenge(options, response, stringBody); 248 | } 249 | 250 | // All is good 251 | onRequestComplete(options, response, body); 252 | } 253 | 254 | function detectRecaptchaVersion (body) { 255 | // New version > Dec 2019 256 | if (/__cf_chl_captcha_tk__=(.*)/i.test(body)) { // Test for ver2 first, as it also has ver2 fields 257 | return 'ver2'; 258 | // Old version < Dec 2019 259 | } else if (body.indexOf('why_captcha') !== -1 || /cdn-cgi\/l\/chk_captcha/i.test(body)) { 260 | return 'ver1'; 261 | } 262 | 263 | return false; 264 | } 265 | 266 | function validateResponse (options, response, body) { 267 | // Finding captcha 268 | // Old version < Dec 2019 269 | const recaptchaVer = detectRecaptchaVersion(body); 270 | if (recaptchaVer) { 271 | // Convenience boolean 272 | response.isCaptcha = true; 273 | throw new CaptchaError('captcha', options, response); 274 | } 275 | 276 | // Trying to find '1006' 277 | const match = body.match(/<\w+\s+class="cf-error-code">(.*)<\/\w+>/i); 278 | 279 | if (match) { 280 | const code = parseInt(match[1]); 281 | throw new CloudflareError(code, options, response); 282 | } 283 | 284 | return false; 285 | } 286 | 287 | function onChallenge (options, response, body) { 288 | const callback = options.callback; 289 | const uri = response.request.uri; 290 | // The query string to send back to Cloudflare 291 | const payload = { /* s, jschl_vc, pass, jschl_answer */ }; 292 | 293 | let cause; 294 | let error; 295 | 296 | if (options.challengesToSolve === 0) { 297 | cause = 'Cloudflare challenge loop'; 298 | error = new CloudflareError(cause, options, response); 299 | error.errorType = 4; 300 | 301 | return callback(error); 302 | } 303 | 304 | let timeout = parseInt(options.cloudflareTimeout); 305 | let match; 306 | 307 | match = body.match(/name="(.+?)" value="(.+?)"/); 308 | 309 | if (match) { 310 | const hiddenInputName = match[1]; 311 | payload[hiddenInputName] = match[2]; 312 | } 313 | 314 | match = body.match(/name="jschl_vc" value="(\w+)"/); 315 | if (!match) { 316 | cause = 'challengeId (jschl_vc) extraction failed'; 317 | return callback(new ParserError(cause, options, response)); 318 | } 319 | 320 | payload.jschl_vc = match[1]; 321 | 322 | match = body.match(/name="pass" value="(.+?)"/); 323 | if (!match) { 324 | cause = 'Attribute (pass) value extraction failed'; 325 | return callback(new ParserError(cause, options, response)); 326 | } 327 | 328 | payload.pass = match[1]; 329 | 330 | match = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value\s*=.+?)\r?\n(?:[^{<>]*},\s*(\d{4,}))?/); 331 | if (!match) { 332 | cause = 'setTimeout callback extraction failed'; 333 | return callback(new ParserError(cause, options, response)); 334 | } 335 | 336 | if (isNaN(timeout)) { 337 | if (match[2] !== undefined) { 338 | timeout = parseInt(match[2]); 339 | 340 | if (timeout > options.cloudflareMaxTimeout) { 341 | if (debugging) { 342 | console.warn('Cloudflare\'s timeout is excessive: ' + (timeout / 1000) + 's'); 343 | } 344 | 345 | timeout = options.cloudflareMaxTimeout; 346 | } 347 | } else { 348 | cause = 'Failed to parse challenge timeout'; 349 | return callback(new ParserError(cause, options, response)); 350 | } 351 | } 352 | 353 | // Append a.value so it's always returned from the vm 354 | response.challenge = match[1] + '; a.value'; 355 | 356 | try { 357 | const ctx = new sandbox.Context({ hostname: uri.hostname, body }); 358 | payload.jschl_answer = sandbox.eval(response.challenge, ctx); 359 | } catch (error) { 360 | error.message = 'Challenge evaluation failed: ' + error.message; 361 | return callback(new ParserError(error, options, response)); 362 | } 363 | 364 | if (isNaN(payload.jschl_answer)) { 365 | cause = 'Challenge answer is not a number'; 366 | return callback(new ParserError(cause, options, response)); 367 | } 368 | 369 | // Prevent reusing the headers object to simplify unit testing. 370 | options.headers = Object.assign({}, options.headers); 371 | // Use the original uri as the referer and to construct the answer uri. 372 | options.headers.Referer = uri.href; 373 | // Check is form to be submitted via GET or POST 374 | match = body.match(/id="challenge-form" action="(.+?)" method="(.+?)"/); 375 | if (match && match[2] && match[2] === 'POST') { 376 | options.uri = uri.protocol + '//' + uri.host + match[1]; 377 | // Pass the payload using body form 378 | options.form = payload; 379 | options.method = 'POST'; 380 | } else { 381 | // Whatever is there, fallback to GET 382 | options.uri = uri.protocol + '//' + uri.host + '/cdn-cgi/l/chk_jschl'; 383 | // Pass the payload using query string 384 | options.qs = payload; 385 | } 386 | // Decrement the number of challenges to solve. 387 | options.challengesToSolve -= 1; 388 | // baseUrl can't be used in conjunction with an absolute uri 389 | if (options.baseUrl !== undefined) { 390 | options.baseUrl = undefined; 391 | } 392 | // Change required by Cloudflate in Jan-Feb 2020 393 | options.uri = options.uri.replace(/&/g, '&'); 394 | 395 | // Make request with answer after delay. 396 | timeout -= Date.now() - response.responseStartTime; 397 | setTimeout(performRequest, timeout, options, false); 398 | } 399 | 400 | // Parses the reCAPTCHA form and hands control over to the user 401 | function onCaptcha (options, response, body) { 402 | const recaptchaVer = detectRecaptchaVersion(body); 403 | const isRecaptchaVer2 = recaptchaVer === 'ver2'; 404 | const callback = options.callback; 405 | // UDF that has the responsibility of returning control back to cloudscraper 406 | const handler = options.onCaptcha; 407 | // The form data to send back to Cloudflare 408 | const payload = { /* r|s, g-re-captcha-response */ }; 409 | 410 | let cause; 411 | let match; 412 | 413 | match = body.match(/]*)? id=["']?challenge-form['"]?(?: [^<>]*)?>([\S\s]*?)<\/form>/); 414 | if (!match) { 415 | cause = 'Challenge form extraction failed'; 416 | return callback(new ParserError(cause, options, response)); 417 | } 418 | 419 | const form = match[1]; 420 | 421 | let siteKey; 422 | let rayId; // only for ver 2 423 | 424 | if (isRecaptchaVer2) { 425 | match = body.match(/\sdata-ray=["']?([^\s"'<>&]+)/); 426 | if (!match) { 427 | cause = 'Unable to find cloudflare ray id'; 428 | return callback(new ParserError(cause, options, response)); 429 | } 430 | rayId = match[1]; 431 | } 432 | 433 | match = body.match(/\sdata-sitekey=["']?([^\s"'<>&]+)/); 434 | if (match) { 435 | siteKey = match[1]; 436 | } else { 437 | const keys = []; 438 | const re = /\/recaptcha\/api2?\/(?:fallback|anchor|bframe)\?(?:[^\s<>]+&(?:amp;)?)?[Kk]=["']?([^\s"'<>&]+)/g; 439 | 440 | while ((match = re.exec(body)) !== null) { 441 | // Prioritize the explicit fallback siteKey over other matches 442 | if (match[0].indexOf('fallback') !== -1) { 443 | keys.unshift(match[1]); 444 | if (!debugging) break; 445 | } else { 446 | keys.push(match[1]); 447 | } 448 | } 449 | 450 | siteKey = keys[0]; 451 | 452 | if (!siteKey) { 453 | cause = 'Unable to find the reCAPTCHA site key'; 454 | return callback(new ParserError(cause, options, response)); 455 | } 456 | 457 | if (debugging) { 458 | console.warn('Failed to find data-sitekey, using a fallback:', keys); 459 | } 460 | } 461 | 462 | // Everything that is needed to solve the reCAPTCHA 463 | response.captcha = { 464 | siteKey, 465 | uri: response.request.uri, 466 | form: payload, 467 | version: recaptchaVer 468 | }; 469 | 470 | if (isRecaptchaVer2) { 471 | response.rayId = rayId; 472 | 473 | match = body.match(/id="challenge-form" action="(.+?)" method="(.+?)"/); 474 | if (!match) { 475 | cause = 'Challenge form action and method extraction failed'; 476 | return callback(new ParserError(cause, options, response)); 477 | } 478 | response.captcha.formMethod = match[2]; 479 | match = match[1].match(/\/(.*)/); 480 | response.captcha.formActionUri = match[0]; 481 | payload.id = rayId; 482 | } 483 | 484 | Object.defineProperty(response.captcha, 'url', { 485 | configurable: true, 486 | enumerable: false, 487 | get: deprecate(function () { 488 | return response.request.uri.href; 489 | }, 'captcha.url is deprecated. Please use captcha.uri instead.') 490 | }); 491 | 492 | // Adding formData 493 | match = form.match(/]*)? name=[^<>]+>/g); 494 | if (!match) { 495 | cause = 'Challenge form is missing inputs'; 496 | return callback(new ParserError(cause, options, response)); 497 | } 498 | 499 | const inputs = match; 500 | // Only adding inputs that have both a name and value defined 501 | for (let name, value, i = 0; i < inputs.length; i++) { 502 | name = inputs[i].match(/name=["']?([^\s"'<>]*)/); 503 | if (name) { 504 | value = inputs[i].match(/value=["']?([^\s"'<>]*)/); 505 | if (value) { 506 | payload[name[1]] = value[1]; 507 | } 508 | } 509 | } 510 | 511 | // Sanity check 512 | if (!payload.s && !payload.r) { 513 | cause = 'Challenge form is missing secret input'; 514 | return callback(new ParserError(cause, options, response)); 515 | } 516 | 517 | if (debugging) { 518 | console.warn('Captcha:', response.captcha); 519 | } 520 | 521 | // The callback used to green light form submission 522 | const submit = function (error) { 523 | if (error) { 524 | // Pass an user defined error back to the original request call 525 | return callback(new CaptchaError(error, options, response)); 526 | } 527 | 528 | onSubmitCaptcha(options, response); 529 | }; 530 | 531 | // This seems like an okay-ish API (fewer arguments to the handler) 532 | response.captcha.submit = submit; 533 | 534 | // We're handing control over to the user now. 535 | const thenable = handler(options, response, body); 536 | // Handle the case where the user returns a promise 537 | if (thenable && typeof thenable.then === 'function') { 538 | // eslint-disable-next-line promise/catch-or-return 539 | thenable.then(submit, function (error) { 540 | if (!error) { 541 | // The user broke their promise with a falsy error 542 | submit(new Error('Falsy error')); 543 | } else { 544 | submit(error); 545 | } 546 | }); 547 | } 548 | } 549 | 550 | function onSubmitCaptcha (options, response) { 551 | const callback = options.callback; 552 | const uri = response.request.uri; 553 | const isRecaptchaVer2 = response.captcha.version === 'ver2'; 554 | 555 | if (!response.captcha.form['g-recaptcha-response']) { 556 | const cause = 'Form submission without g-recaptcha-response'; 557 | return callback(new CaptchaError(cause, options, response)); 558 | } 559 | 560 | if (isRecaptchaVer2) { 561 | options.qs = { 562 | __cf_chl_captcha_tk__: response.captcha.formActionUri.match(/__cf_chl_captcha_tk__=(.*)/)[1] 563 | }; 564 | 565 | options.form = response.captcha.form; 566 | } else { 567 | options.qs = response.captcha.form; 568 | } 569 | 570 | options.method = response.captcha.formMethod || 'GET'; 571 | 572 | // Prevent reusing the headers object to simplify unit testing. 573 | options.headers = Object.assign({}, options.headers); 574 | // Use the original uri as the referer and to construct the form action. 575 | options.headers.Referer = uri.href; 576 | if (isRecaptchaVer2) { 577 | options.uri = uri.protocol + '//' + uri.host + response.captcha.formActionUri; 578 | } else { 579 | options.uri = uri.protocol + '//' + uri.host + '/cdn-cgi/l/chk_captcha'; 580 | } 581 | 582 | performRequest(options, false); 583 | } 584 | 585 | function onRedirectChallenge (options, response, body) { 586 | const callback = options.callback; 587 | const uri = response.request.uri; 588 | 589 | const match = body.match(/S='([^']+)'/); 590 | if (!match) { 591 | const cause = 'Cookie code extraction failed'; 592 | return callback(new ParserError(cause, options, response)); 593 | } 594 | 595 | const base64EncodedCode = match[1]; 596 | response.challenge = Buffer.from(base64EncodedCode, 'base64').toString('ascii'); 597 | 598 | try { 599 | // Evaluate cookie setting code 600 | const ctx = new sandbox.Context(); 601 | sandbox.eval(response.challenge, ctx); 602 | 603 | options.jar.setCookie(ctx.document.cookie, uri.href, { ignoreError: true }); 604 | } catch (error) { 605 | error.message = 'Cookie code evaluation failed: ' + error.message; 606 | return callback(new ParserError(error, options, response)); 607 | } 608 | 609 | options.challengesToSolve -= 1; 610 | 611 | performRequest(options, false); 612 | } 613 | 614 | function onRequestComplete (options, response, body) { 615 | const callback = options.callback; 616 | 617 | if (typeof options.realEncoding === 'string') { 618 | body = body.toString(options.realEncoding); 619 | // The resolveWithFullResponse option will resolve with the response 620 | // object. This changes the response.body so it is as expected. 621 | 622 | if (response.isHTML && options.decodeEmails) { 623 | body = decodeEmails(body); 624 | } 625 | 626 | response.body = body; 627 | } 628 | 629 | callback(null, response, body); 630 | } 631 | -------------------------------------------------------------------------------- /index.test-d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import { expectType } from 'tsd'; 3 | import { URL } from 'url'; 4 | import { 5 | Options, Cloudscraper, CaptchaHandler, CoreOptions, DefaultOptions, 6 | CaptchaResponse, Captcha 7 | } from './index'; 8 | import Promise = require('bluebird'); 9 | import request = require('request'); 10 | import rp = require('request-promise'); 11 | import cloudscraper = require('./index'); 12 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 13 | import errors = require('./errors'); 14 | 15 | const noop = (): void => {}; 16 | 17 | expectType({ uri: '' }); 18 | expectType({ url: '' }); 19 | 20 | expectType({ uri: '', requester: request }); 21 | expectType({ uri: '', requester: rp }); 22 | 23 | expectType(cloudscraper({ uri: '' })); 24 | expectType(cloudscraper.get({ uri: '' })); 25 | expectType(cloudscraper.post({ uri: '' })); 26 | expectType(cloudscraper.put({ uri: '' })); 27 | expectType(cloudscraper.delete({ uri: '' })); 28 | expectType(cloudscraper.del({ uri: '' })); 29 | expectType(cloudscraper.head({ uri: '' })); 30 | expectType(cloudscraper.patch({ uri: '' })); 31 | 32 | expectType(cloudscraper('')); 33 | expectType(cloudscraper.get('')); 34 | expectType(cloudscraper.post('')); 35 | expectType(cloudscraper.put('')); 36 | expectType(cloudscraper.delete('')); 37 | expectType(cloudscraper.del('')); 38 | expectType(cloudscraper.head('')); 39 | expectType(cloudscraper.patch('')); 40 | 41 | // eslint-disable-next-line promise/always-return 42 | expectType>(cloudscraper.get({ uri: '' }).then(noop)); 43 | expectType>(cloudscraper.get({ uri: '' }).catch(noop)); 44 | expectType>(cloudscraper.get({ uri: '' }).finally(noop)); 45 | expectType>(cloudscraper.get({ uri: '' }).promise()); 46 | expectType(cloudscraper.get({ uri: '' }).cancel()); 47 | 48 | expectType((options: Options, response: CaptchaResponse) => { 49 | expectType(options); 50 | expectType(response); 51 | 52 | const { captcha, isCaptcha } = response; 53 | 54 | expectType(captcha); 55 | expectType(isCaptcha); 56 | 57 | expectType({ 58 | url: '', // <- deprecated 59 | uri: new URL(''), 60 | siteKey: '', 61 | submit: captcha.submit, 62 | form: { s: '' } 63 | }); 64 | 65 | captcha.submit(); 66 | }); 67 | 68 | expectType(cloudscraper.defaultParams); 69 | expectType({ 70 | requester: request, 71 | cloudflareMaxTimeout: 0, 72 | challengesToSolve: 0, 73 | decodeEmails: false, 74 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 75 | onCaptcha: (options: Options, response: CaptchaResponse) => {} 76 | }); 77 | 78 | expectType({ 79 | requester: request, 80 | cloudflareMaxTimeout: 0, 81 | challengesToSolve: 0, 82 | decodeEmails: false, 83 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 84 | onCaptcha: (options: Options, response: CaptchaResponse) => {}, 85 | realEncoding: 'utf-8' 86 | }); 87 | -------------------------------------------------------------------------------- /lib/brotli.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const zlib = require('zlib'); 4 | 5 | const brotli = module.exports; 6 | // Convenience boolean used to check for brotli support 7 | brotli.isAvailable = false; 8 | // Exported for tests 9 | brotli.optional = optional; 10 | 11 | // Check for node's built-in brotli support 12 | if (typeof zlib.brotliDecompressSync === 'function') { 13 | brotli.decompress = function (buf) { 14 | return zlib.brotliDecompressSync(buf); 15 | }; 16 | 17 | brotli.isAvailable = true; 18 | } else if (optional(require)) { 19 | brotli.isAvailable = true; 20 | } 21 | 22 | function optional (require) { 23 | try { 24 | // Require the NPM installed brotli 25 | const decompress = require('brotli/decompress'); 26 | 27 | brotli.decompress = function (buf) { 28 | return Buffer.from(decompress(buf)); 29 | }; 30 | 31 | return typeof decompress === 'function'; 32 | } catch (error) { 33 | // Don't throw an exception if the module is not installed 34 | if (error.code !== 'MODULE_NOT_FOUND') { 35 | throw error; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /lib/email-decode.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const pattern = ( 4 | // Opening tag 5 | // $1 = TAG_NAME 6 | '<([a-z]+)(?: [^>]*)?' + '(?:' + 7 | // href attribute 8 | // $2 = /cdn-cgi/l/email-protection#HEX_STRING 9 | // $3 = HEX_STRING 10 | ' href=[\'"]?(\\/cdn-cgi\\/l\\/email-protection#([a-f0-9]{4,}))' + '|' + 11 | // data attribute 12 | // $4 = HEX_STRING 13 | ' data-cfemail=["\']?([a-f0-9]{4,})' + 14 | // Self-closing or innerHTML(disallow child nodes) followed by closing tag 15 | // \1 backreference to $1 16 | '(?:[^<]*\\/>|[^<]*?<\\/\\1>)' + ')' 17 | ); 18 | 19 | const re = new RegExp(pattern, 'gi'); 20 | 21 | module.exports = function (html) { 22 | let match, result; 23 | 24 | re.lastIndex = 0; 25 | 26 | while ((match = re.exec(html)) !== null) { 27 | if (match[2] !== undefined) { 28 | result = match[0].replace(match[2], 'mailto:' + decode(match[3])); 29 | } else { 30 | result = decode(match[4]); 31 | } 32 | 33 | html = html.substr(0, match.index) + result + html.substr(re.lastIndex); 34 | re.lastIndex = match.index + result.length - 1; 35 | } 36 | 37 | return html; 38 | }; 39 | 40 | function decode (hexStr) { 41 | const key = parseInt(hexStr.substr(0, 2), 16); 42 | let email = ''; 43 | 44 | // noinspection ES6ConvertVarToLetConst 45 | for (var codePoint, i = 2; i < hexStr.length; i += 2) { 46 | codePoint = parseInt(hexStr.substr(i, 2), 16) ^ key; 47 | email += String.fromCharCode(codePoint); 48 | } 49 | 50 | // noinspection JSDeprecatedSymbols 51 | return decodeURIComponent(escape(email)); 52 | } 53 | -------------------------------------------------------------------------------- /lib/headers.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const chromeData = require('./browsers').chrome; 4 | const useBrotli = require('./brotli').isAvailable; 5 | 6 | module.exports = { getDefaultHeaders, caseless }; 7 | 8 | function getDefaultHeaders (defaults) { 9 | const headers = getChromeHeaders(random(chromeData)); 10 | return Object.assign({}, defaults, headers); 11 | } 12 | 13 | function random (arr) { 14 | return arr[Math.floor(Math.random() * arr.length)]; 15 | } 16 | 17 | function getChromeHeaders (options) { 18 | const { headers } = options; 19 | 20 | headers['User-Agent'] = random(options['User-Agent']); 21 | 22 | if (!useBrotli && headers['Accept-Encoding']) { 23 | headers['Accept-Encoding'] = 24 | headers['Accept-Encoding'].replace(/,?\s*\bbr\b\s*/i, ''); 25 | } 26 | 27 | return headers; 28 | } 29 | 30 | function caseless (headers) { 31 | const result = {}; 32 | 33 | Object.keys(headers).forEach(key => { 34 | result[key.toLowerCase()] = headers[key]; 35 | }); 36 | 37 | return result; 38 | } 39 | -------------------------------------------------------------------------------- /lib/sandbox.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const vm = require('vm'); 4 | 5 | const VM_OPTIONS = { 6 | filename: 'iuam-challenge.js', 7 | contextOrigin: 'cloudflare:iuam-challenge.js', 8 | contextCodeGeneration: { strings: true, wasm: false }, 9 | timeout: 5000 10 | }; 11 | 12 | const VM_ENV = ` 13 | (function (global) { 14 | const cache = Object.create(null); 15 | const keys = []; 16 | const { body, href } = global; 17 | 18 | Object.defineProperties(global, { 19 | document: { 20 | value: { 21 | createElement: function () { 22 | return { firstChild: { href: href } }; 23 | }, 24 | getElementById: function (id) { 25 | if (keys.indexOf(id) === -1) { 26 | const re = new RegExp(' id=[\\'"]?' + id + '[^>]*>([^<]*)'); 27 | const match = body.match(re); 28 | 29 | keys.push(id); 30 | cache[id] = match === null ? match : { innerHTML: match[1] }; 31 | } 32 | 33 | return cache[id]; 34 | } 35 | } 36 | }, 37 | location: { value: { reload: function () {} } } 38 | }) 39 | }(this)); 40 | `; 41 | 42 | module.exports = { eval: evaluate, Context }; 43 | 44 | function evaluate (code, ctx) { 45 | return vm.runInNewContext(VM_ENV + code, ctx, VM_OPTIONS); 46 | } 47 | 48 | // Global context used to evaluate standard IUAM JS challenge 49 | function Context (options) { 50 | if (!options) options = { body: '', hostname: '' }; 51 | 52 | const atob = Object.setPrototypeOf(function (str) { 53 | try { 54 | return Buffer.from(str, 'base64').toString('binary'); 55 | } catch (e) {} 56 | }, null); 57 | 58 | return Object.setPrototypeOf({ 59 | body: options.body, 60 | href: 'http://' + options.hostname + '/', 61 | atob 62 | }, null); 63 | } 64 | -------------------------------------------------------------------------------- /mocha.opts: -------------------------------------------------------------------------------- 1 | --reporter spec 2 | --require tests/common -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cloudscraper", 3 | "version": "4.6.0", 4 | "description": "Bypasses cloudflare's anti-ddos page", 5 | "main": "index.js", 6 | "engines": { 7 | "node": ">=8" 8 | }, 9 | "files": [ 10 | "lib/", 11 | "index.js", 12 | "index.d.ts", 13 | "errors.js", 14 | "errors.d.ts" 15 | ], 16 | "scripts": { 17 | "test": "npm run lint && npm run test:typescript && nyc --reporter=html --reporter=text mocha", 18 | "test:typescript": "tsc *.ts --noEmit && tsd", 19 | "coverage": "nyc report --reporter=text-lcov | coveralls", 20 | "lint": "eslint --ext .json --ext .js --ext .ts ." 21 | }, 22 | "repository": { 23 | "type": "git", 24 | "url": "https://github.com/codemanki/cloudscraper.git" 25 | }, 26 | "publishConfig": { 27 | "registry": "http://registry.npmjs.org" 28 | }, 29 | "keywords": [ 30 | "cloudflare", 31 | "ddos", 32 | "scrape", 33 | "webscraper", 34 | "anti-bot", 35 | "waf", 36 | "iuam", 37 | "bypass", 38 | "challenge" 39 | ], 40 | "author": "Oleksii Sribnyi", 41 | "license": "MIT", 42 | "homepage": "https://github.com/codemanki/cloudscraper", 43 | "dependencies": { 44 | "request-promise": "^4.2.4" 45 | }, 46 | "devDependencies": { 47 | "@types/request-promise": "^4.1.44", 48 | "@typescript-eslint/eslint-plugin": "^2.3.1", 49 | "@typescript-eslint/parser": "^2.3.1", 50 | "chai": "^4.2.0", 51 | "chai-as-promised": "^7.1.1", 52 | "coveralls": "^3.0.3", 53 | "eslint": "^6.0.0", 54 | "eslint-config-standard": "^14.0.0", 55 | "eslint-plugin-import": "^2.16.0", 56 | "eslint-plugin-json": "^1.4.0", 57 | "eslint-plugin-node": "^10.0.0", 58 | "eslint-plugin-promise": "^4.0.1", 59 | "eslint-plugin-standard": "^4.0.0", 60 | "express": "^4.16.4", 61 | "mocha": "^6.1.1", 62 | "nyc": "^15.0.0", 63 | "sinon": "^7.2.4", 64 | "sinon-chai": "^3.3.0", 65 | "tsd": "^0.8.0", 66 | "typescript": "^3.6.3" 67 | }, 68 | "peerDependencies": { 69 | "brotli": "^1.3.2", 70 | "request": "^2.88.0" 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /test/common.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var chai = require('chai'); 4 | 5 | chai.use(require('sinon-chai')); 6 | chai.use(require('chai-as-promised')); 7 | 8 | chai.config.includeStack = true; 9 | -------------------------------------------------------------------------------- /test/fixtures/access_denied.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Access denied | site.com used CloudFlare to restrict access 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Please enable cookies. 24 | 25 | 26 | 27 | Error 28 | 1006 29 | Ray ID: 19400a3d29e30f8d 30 | 31 | Access denied 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | What happened? 40 | The owner of this website (site.com.com) has banned your IP address (91.91.111.11). 41 | 42 | 43 | 44 | 45 | 46 | 47 | 57 | 58 | 59 | 60 | 61 | 62 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /test/fixtures/captcha.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attention Required! | CloudFlare 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | Please enable cookies. 26 | 27 | 28 | One more step 29 | Please complete the security check to access site.com 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | Refresh 42 | Show Image 43 | Announce 44 | Info 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | Enter above text 58 | 59 | 60 | Leave a message for the site owner? (100 characters left) 61 | 62 | 63 | 64 | 65 | 66 | Submit 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | Why do I have to complete a CAPTCHA? 88 | 89 | Completing the CAPTCHA proves you are a human and gives you temporary access to the web property. 90 | 91 | 92 | 93 | What can I do to prevent this in the future? 94 | 95 | If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware. 96 | 97 | If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices. 98 | 99 | 100 | 101 | 102 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /test/fixtures/cf_recaptcha_01_12_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attention Required! | Cloudflare 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | Please enable cookies. 29 | 30 | 31 | One more step 32 | Please complete the security check to access www.cloudflare.com 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | Why do I have to complete a CAPTCHA? 77 | 78 | Completing the CAPTCHA proves you are a human and gives you temporary access to the web property. 79 | 80 | 81 | 82 | What can I do to prevent this in the future? 83 | 84 | 85 | If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware. 86 | 87 | If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices. 88 | 89 | 90 | Another way to prevent getting this page in the future is to use Privacy Pass. You may need to download version 2.0 now from the Chrome Web Store. 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 108 | 109 | 110 | 111 | 112 | 113 | 118 | 119 | 120 | 121 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /test/fixtures/cf_recaptcha_15_04_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attention Required! | Cloudflare 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | Please enable cookies. 29 | 30 | 31 | One more step 32 | Please complete the security check to access example-site.dev 33 | 34 | 35 | 36 | 37 | 38 | 39 | table 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | Why do I have to complete a CAPTCHA? 123 | 124 | Completing the CAPTCHA proves you are a human and gives you temporary access to the web property. 125 | 126 | 127 | 128 | What can I do to prevent this in the future? 129 | 130 | 131 | If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware. 132 | 133 | If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices. 134 | 135 | 136 | 137 | 138 | 139 | 140 | 150 | 151 | 152 | 153 | 154 | 155 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /test/fixtures/invalid_js_challenge.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 18 | 19 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | Please turn JavaScript on and reload the page. 48 | 49 | 50 | Checking your browser before accessing site.com. 51 | This process is automatic. Your browser will redirect to your requested content shortly. 52 | Please allow up to 5 seconds… 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | DDoS protection by CloudFlare 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_03_12_2018_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | Please turn JavaScript on and reload the page. 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | Checking your browser before accessing iload.to. 62 | This process is automatic. Your browser will redirect to your requested content shortly. 63 | Please allow up to 5 seconds… 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | DDoS protection by Cloudflare 76 | 77 | Ray ID: 4834ce407815974a 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_03_12_2018_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | Please turn JavaScript on and reload the page. 54 | 55 | 56 | 57 | 58 | 59 | 60 | Checking your browser before accessing iload.to. 61 | 62 | This process is automatic. Your browser will redirect to your requested content shortly. 63 | Please allow up to 5 seconds… 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | DDoS protection by Cloudflare 76 | 77 | Ray ID: 4834ce66ab7b9706 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_09_06_2016.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | Please turn JavaScript on and reload the page. 53 | 54 | 55 | 56 | 57 | 58 | 59 | Checking your browser before accessing cineblog01.cc. 60 | This process is automatic. Your browser will redirect to your requested content shortly. 61 | Please allow up to 5 seconds… 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | DDoS protection by CloudFlare 73 | 74 | Ray ID: 2b05d3393e872d77 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_10_04_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | Please turn JavaScript on and reload the page. 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | This process is automatic. Your browser will redirect to your requested content shortly. 77 | Please allow up to 5 seconds… 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | +((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+!![]+!![]+[])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(+[])) 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_13_03_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | Please turn JavaScript on and reload the page. 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | Checking your browser before accessing website.com. 62 | 63 | This process is automatic. Your browser will redirect to your requested content shortly. 64 | Please allow up to 5 seconds… 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | DDoS protection by Cloudflare 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_21_03_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | Please turn JavaScript on and reload the page. 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | This process is automatic. Your browser will redirect to your requested content shortly. 63 | Please allow up to 5 seconds… 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | +((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![])+(+!![])+(+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+!![]))/+((!+[]+!![]+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])) 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /test/fixtures/js_challenge_28_11_2019.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Just a moment... 10 | 21 | 22 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | Please turn JavaScript on and reload the page. 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Checking your browser before accessing. 76 | 77 | This process is automatic. Your browser will redirect to your requested content shortly. 78 | Please allow up to 5 seconds… 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | +((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+[])+(!+[]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![])) 89 | 90 | 91 | 92 | 93 | 94 | DDoS protection by Cloudflare 95 | 96 | Ray ID: 53cb1af29bc6c2d6 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /test/fixtures/page_with_emails.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Cloudscraper 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | The email is [email protected] 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/fixtures/requested_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | The requested page 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | This is the page you want to parse 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /test/fixtures/sucuri_waf_11_08_2019.html: -------------------------------------------------------------------------------- 1 | You are being redirected... 2 | Javascript is required. Please enable javascript before you are allowed to see this page. 3 | 4 | -------------------------------------------------------------------------------- /test/fixtures/sucuri_waf_18_08_2016.html: -------------------------------------------------------------------------------- 1 | You are being redirected... 2 | Javascript is required. Please enable javascript before you are allowed to see this page. 3 | 4 | -------------------------------------------------------------------------------- /test/helper.js: -------------------------------------------------------------------------------- 1 | var request = require('request-promise'); 2 | var sinon = require('sinon'); 3 | var fs = require('fs'); 4 | var url = require('url'); 5 | var path = require('path'); 6 | var express = require('express'); 7 | 8 | // Clone a few defaults before testing 9 | var opts = require('../').defaultParams; 10 | var defaultHeaders = Object.assign({}, opts.headers); 11 | var agentOptions = Object.assign({}, opts.agentOptions); 12 | 13 | // Cache fixtures so they're only read from fs but once 14 | var cache = {}; 15 | 16 | var helper = { 17 | app: express(), 18 | reset: function () { 19 | helper.router = new express.Router(); 20 | 21 | helper.defaultParams = { 22 | // Since cloudscraper wraps the callback, just ensure callback is a function 23 | callback: sinon.match.func, 24 | requester: sinon.match.func, 25 | jar: request.jar(), 26 | uri: helper.resolve('/test'), 27 | headers: Object.assign({}, defaultHeaders), 28 | method: 'GET', 29 | encoding: null, 30 | realEncoding: 'utf8', 31 | followAllRedirects: true, 32 | cloudflareTimeout: 1, 33 | cloudflareMaxTimeout: 30000, 34 | challengesToSolve: 3, 35 | decodeEmails: false, 36 | gzip: true, 37 | agentOptions: Object.assign({}, agentOptions) 38 | }; 39 | }, 40 | getFixture: function (fileName) { 41 | var key = fileName; 42 | 43 | if (cache[key] === undefined) { 44 | fileName = path.join(__dirname, 'fixtures', fileName); 45 | cache[key] = fs.readFileSync(fileName, 'utf8'); 46 | } 47 | 48 | return cache[key]; 49 | }, 50 | extendParams: function (params) { 51 | var defaultParams = this.defaultParams; 52 | 53 | // Extend target with the default params and provided params 54 | var target = {}; 55 | Object.assign(target, defaultParams, params); 56 | // Extend target.headers with defaults headers and provided headers 57 | target.headers = {}; 58 | Object.assign(target.headers, defaultParams.headers, params.headers); 59 | 60 | return target; 61 | }, 62 | resolve: function (uri) { 63 | // eslint-disable-next-line node/no-deprecated-api 64 | return url.resolve(helper.uri.href, uri); 65 | }, 66 | listen: function (callback) { 67 | helper.server = helper.app.listen(0, '127.0.0.1', function () { 68 | var baseUrl = 'http://127.0.0.1:' + helper.server.address().port; 69 | 70 | // eslint-disable-next-line node/no-deprecated-api 71 | helper.uri = url.parse(baseUrl + '/'); 72 | helper.reset(); 73 | callback(); 74 | }); 75 | } 76 | }; 77 | 78 | helper.app.use(function (req, res, next) { 79 | helper.router(req, res, next); 80 | }); 81 | 82 | express.response.cloudflare = function () { 83 | this.header('Server', 'cloudflare'); 84 | this.header('Content-Type', 'text/html; charset=UTF-8'); 85 | return this; 86 | }; 87 | 88 | express.response.sendFixture = function (fileName) { 89 | return this.send(helper.getFixture(fileName)); 90 | }; 91 | 92 | express.response.sendChallenge = function (fileName) { 93 | return this.cloudflare().status(503).sendFixture(fileName); 94 | }; 95 | 96 | express.response.sendCaptcha = function (fileName) { 97 | return this.cloudflare().status(403).sendFixture(fileName); 98 | }; 99 | 100 | express.response.endAbruptly = function () { 101 | this.connection.write( 102 | 'HTTP/1.1 500\r\n' + 103 | 'Content-Type: text/plain\r\n' + 104 | 'Transfer-Encoding: chunked\r\n\r\n' 105 | ); 106 | this.end(); 107 | }; 108 | 109 | module.exports = helper; 110 | -------------------------------------------------------------------------------- /test/test-brotli.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const brotli = require('../lib/brotli'); 6 | const helper = require('./helper'); 7 | const zlib = require('zlib'); 8 | 9 | const sinon = require('sinon'); 10 | const expect = require('chai').expect; 11 | 12 | (process.env.BROTLI ? describe : describe.skip)('Brotli (lib)', function () { 13 | it('should be available', function () { 14 | expect(brotli.isAvailable).to.be.true; 15 | }); 16 | 17 | it('should have a decompress method', function () { 18 | expect(brotli.decompress).to.be.a('function'); 19 | }); 20 | 21 | it('decompress() should accept exactly 1 argument', function () { 22 | expect(brotli.decompress.length).to.equal(1); 23 | }); 24 | 25 | it('decompress() should accept buffer as input', function () { 26 | const data = Buffer.from([0x0b, 0x01, 0x80, 0x61, 0x62, 0x63, 0x03]); 27 | const result = brotli.decompress(data); 28 | 29 | expect(result).to.be.instanceof(Buffer); 30 | expect(result.toString('utf8')).to.equal('abc'); 31 | }); 32 | 33 | (zlib.brotliCompressSync ? it : it.skip)('[internal] decompress() should produce the expected result', function () { 34 | const input = helper.getFixture('captcha.html'); 35 | const data = zlib.brotliCompressSync(Buffer.from(input, 'utf8')); 36 | const result = brotli.decompress(data); 37 | 38 | expect(result).to.be.instanceof(Buffer); 39 | expect(result.toString('utf8')).to.equal(input); 40 | }); 41 | 42 | (zlib.brotliCompressSync ? it.skip : it)('[external] decompress() should produce the expected result', function () { 43 | const input = helper.getFixture('captcha.html'); 44 | // Try increasing the timeout if this fails on your system. 45 | const data = require('brotli').compress(Buffer.from(input, 'utf8')); 46 | const result = brotli.decompress(Buffer.from(data)); 47 | 48 | expect(result).to.be.instanceof(Buffer); 49 | expect(result.toString('utf8')).to.equal(input); 50 | }); 51 | 52 | it('optional() should throw an error if the module contains an error', function () { 53 | const spy = sinon.spy(function () { 54 | // This method should throw if called without arguments 55 | brotli.optional(); 56 | }); 57 | 58 | expect(spy).to.throw(); 59 | }); 60 | }); 61 | -------------------------------------------------------------------------------- /test/test-captcha.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const cloudscraper = require('../index'); 6 | const request = require('request-promise'); 7 | const errors = require('../errors'); 8 | const helper = require('./helper'); 9 | const http = require('http'); 10 | 11 | const sinon = require('sinon'); 12 | const expect = require('chai').expect; 13 | 14 | describe('Cloudscraper', function () { 15 | let sandbox; 16 | let Request; 17 | let uri; 18 | 19 | const requestedPage = helper.getFixture('requested_page.html'); 20 | 21 | before(function (done) { 22 | helper.listen(function () { 23 | uri = helper.resolve('/test'); 24 | 25 | // Speed up tests 26 | cloudscraper.defaultParams.cloudflareTimeout = 1; 27 | done(); 28 | }); 29 | }); 30 | 31 | after(function () { 32 | helper.server.close(); 33 | }); 34 | 35 | beforeEach(function () { 36 | // Prepare stubbed Request 37 | sandbox = sinon.createSandbox(); 38 | Request = sandbox.spy(request, 'Request'); 39 | }); 40 | 41 | afterEach(function () { 42 | helper.reset(); 43 | sandbox.restore(); 44 | }); 45 | 46 | it('should handle onCaptcha promise being rejected with a falsy error', function (done) { 47 | helper.router.get('/test', function (req, res) { 48 | res.sendCaptcha('cf_recaptcha_15_04_2019.html'); 49 | }); 50 | 51 | const options = { 52 | uri, 53 | onCaptcha: function () { 54 | // eslint-disable-next-line prefer-promise-reject-errors 55 | return Promise.reject(); 56 | } 57 | }; 58 | 59 | const promise = cloudscraper.get(options, function (error) { 60 | expect(error).to.be.instanceOf(errors.CaptchaError); 61 | expect(error.error).to.be.an('error'); 62 | expect(error).to.have.property('errorType', 1); 63 | expect(error.message).to.include('Falsy error'); 64 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 65 | }); 66 | }); 67 | describe('reCAPTCHA (version as on 10.04.2019)', () => { 68 | for (let stage = 0; stage < 4; stage++) { 69 | const desc = { 70 | 0: 'should resolve when user calls captcha.submit()', 71 | 1: 'should callback with an error if user calls captcha.submit(error)', 72 | 2: 'should resolve when the onCaptcha promise resolves', 73 | 3: 'should callback with an error if the onCaptcha promise is rejected' 74 | }; 75 | 76 | // Run this test 4 times 77 | it(desc[stage], function (done) { 78 | const secret = '6b132d85d185a8255f2451d48fe6a8bee7154ea2-1555377580-1800-AQ1azEkeDOnQP5ByOpwUU/RdbKrmMwHYpkaenRvjPXtB0w8Vbjn/Ceg62tfpp/lT799kjDLEMMuDkEMqQ7iO51kniWCQm00BQvDGl+D0h/WvXDWO96YXOUD3qrqUTuzO7QbUOinc8y8kedvOQkr4c0o='; 79 | const siteKey = '6LfBixYUAAAAABhdHynFUIMA_sa4s-XsJvnjtgB0'; 80 | const expectedError = new Error('anti-captcha failed!'); 81 | 82 | helper.router 83 | .get('/test', function (req, res) { 84 | res.sendCaptcha('cf_recaptcha_15_04_2019.html'); 85 | }) 86 | .get('/cdn-cgi/l/chk_captcha', function (req, res) { 87 | res.send(requestedPage); 88 | }); 89 | 90 | const onCaptcha = sinon.spy(function (options, response, body) { 91 | expect(options).to.be.an('object'); 92 | expect(response).to.be.instanceof(http.IncomingMessage); 93 | expect(body).to.be.a('string'); 94 | 95 | sinon.assert.match(response, { 96 | isCloudflare: true, 97 | isHTML: true, 98 | isCaptcha: true, 99 | captcha: sinon.match.object 100 | }); 101 | 102 | sinon.assert.match(response.captcha, { 103 | url: uri, // <-- Deprecated 104 | uri: sinon.match.same(response.request.uri), 105 | form: { s: secret }, 106 | siteKey: siteKey, 107 | submit: sinon.match.func 108 | }); 109 | 110 | // Simulate what the user should do here 111 | response.captcha.form['g-recaptcha-response'] = 'foobar'; 112 | 113 | switch (stage) { 114 | case 0: 115 | // User green lights form submission 116 | response.captcha.submit(); 117 | break; 118 | case 1: 119 | // User reports an error when solving the reCAPTCHA 120 | response.captcha.submit(expectedError); 121 | break; 122 | case 2: 123 | // User green lights form submission by resolving the returned promise 124 | return Promise.resolve(); 125 | case 3: 126 | // User reports an error by rejecting the returned promise 127 | return Promise.reject(expectedError); 128 | } 129 | }); 130 | 131 | const firstParams = helper.extendParams({ onCaptcha, uri }); 132 | const secondParams = helper.extendParams({ 133 | onCaptcha, 134 | method: 'GET', 135 | uri: helper.resolve('/cdn-cgi/l/chk_captcha'), 136 | headers: { 137 | Referer: uri 138 | }, 139 | qs: { 140 | s: secret, 141 | 'g-recaptcha-response': 'foobar' 142 | } 143 | }); 144 | 145 | const options = { onCaptcha, uri }; 146 | 147 | const promise = cloudscraper.get(options, function (error, response, body) { 148 | switch (stage) { 149 | case 0: 150 | case 2: 151 | expect(error).to.be.null; 152 | 153 | expect(onCaptcha).to.be.calledOnce; 154 | 155 | expect(Request).to.be.calledTwice; 156 | expect(Request.firstCall).to.be.calledWithExactly(firstParams); 157 | expect(Request.secondCall).to.be.calledWithExactly(secondParams); 158 | 159 | expect(body).to.be.equal(requestedPage); 160 | expect(promise).to.eventually.equal(requestedPage).and.notify(done); 161 | break; 162 | case 1: 163 | case 3: 164 | expect(error).to.be.instanceOf(errors.CaptchaError); 165 | expect(error.error).to.be.an('error'); 166 | expect(error).to.have.property('errorType', 1); 167 | expect(error.message).to.include(expectedError.message); 168 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 169 | break; 170 | } 171 | }); 172 | }); 173 | } 174 | }); 175 | 176 | describe('reCAPTCHA (version as on 01.12.2019)', () => { 177 | for (let stage = 0; stage < 4; stage++) { 178 | const desc = { 179 | 0: 'should resolve when user calls captcha.submit()', 180 | 1: 'should callback with an error if user calls captcha.submit(error)', 181 | 2: 'should resolve when the onCaptcha promise resolves', 182 | 3: 'should callback with an error if the onCaptcha promise is rejected' 183 | }; 184 | 185 | // Run this test 4 times 186 | it(desc[stage], function (done) { 187 | const secret = '0bd666f149acf02bbc05bba3b1bb'; 188 | const siteKey = '6LfBixYUAAAAABhdHynFUIMA_sa4s-XsJvnjtgB0'; 189 | const rayId = '53dfe8147d2a9e73'; 190 | const expectedError = new Error('anti-captcha failed!'); 191 | 192 | helper.router 193 | .get('/test', function (req, res) { 194 | res.sendCaptcha('cf_recaptcha_01_12_2019.html'); 195 | }) 196 | .post('/', function (req, res) { 197 | res.send(requestedPage); 198 | }); 199 | 200 | const onCaptcha = sinon.spy(function (options, response, body) { 201 | expect(options).to.be.an('object'); 202 | expect(response).to.be.instanceof(http.IncomingMessage); 203 | expect(body).to.be.a('string'); 204 | 205 | sinon.assert.match(response, { 206 | isCloudflare: true, 207 | isHTML: true, 208 | isCaptcha: true, 209 | captcha: sinon.match.object 210 | }); 211 | 212 | sinon.assert.match(response.captcha, { 213 | url: uri, // <-- Deprecated 214 | uri: sinon.match.same(response.request.uri), 215 | form: { r: secret, id: rayId }, 216 | siteKey: siteKey, 217 | submit: sinon.match.func 218 | }); 219 | 220 | // Simulate what the user should do here 221 | response.captcha.form['g-recaptcha-response'] = 'foobar'; 222 | 223 | switch (stage) { 224 | case 0: 225 | // User green lights form submission 226 | response.captcha.submit(); 227 | break; 228 | case 1: 229 | // User reports an error when solving the reCAPTCHA 230 | response.captcha.submit(expectedError); 231 | break; 232 | case 2: 233 | // User green lights form submission by resolving the returned promise 234 | return Promise.resolve(); 235 | case 3: 236 | // User reports an error by rejecting the returned promise 237 | return Promise.reject(expectedError); 238 | } 239 | }); 240 | 241 | const firstParams = helper.extendParams({ onCaptcha, uri }); 242 | const secondParams = helper.extendParams({ 243 | onCaptcha, 244 | method: 'POST', 245 | uri: helper.resolve('/?__cf_chl_captcha_tk__=e8844bdff35ae5e'), 246 | qs: { __cf_chl_captcha_tk__: 'e8844bdff35ae5e' }, 247 | headers: { 248 | Referer: helper.resolve('/test') 249 | }, 250 | form: { 251 | r: secret, 252 | id: rayId, 253 | 'g-recaptcha-response': 'foobar' 254 | } 255 | }); 256 | 257 | const options = { onCaptcha, uri }; 258 | 259 | const promise = cloudscraper.get(options, function (error, response, body) { 260 | switch (stage) { 261 | case 0: 262 | case 2: 263 | expect(error).to.be.null; 264 | 265 | expect(onCaptcha).to.be.calledOnce; 266 | 267 | expect(Request).to.be.calledTwice; 268 | expect(Request.firstCall).to.be.calledWithExactly(firstParams); 269 | expect(Request.secondCall).to.be.calledWithExactly(secondParams); 270 | 271 | expect(body).to.be.equal(requestedPage); 272 | expect(promise).to.eventually.equal(requestedPage).and.notify(done); 273 | break; 274 | case 1: 275 | case 3: 276 | expect(error).to.be.instanceOf(errors.CaptchaError); 277 | expect(error.error).to.be.an('error'); 278 | expect(error).to.have.property('errorType', 1); 279 | expect(error.message).to.include(expectedError.message); 280 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 281 | break; 282 | } 283 | }); 284 | }); 285 | }; 286 | }); 287 | }); 288 | -------------------------------------------------------------------------------- /test/test-emails.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const decode = require('../lib/email-decode'); 6 | const expect = require('chai').expect; 7 | 8 | const EMAIL = 'cloudscraper@example-site.dev'; 9 | const HEX_STRING = '6506090a10011606170415001725001d040815090048160c11004b010013'; 10 | 11 | function genHTML (body) { 12 | return '\n' + 13 | '\n' + 14 | '\n' + 15 | ' Cloudscraper\n' + 16 | ' \n' + 17 | ' \n' + 18 | ' \n\n' + 19 | ' ' + 20 | '' + 21 | '\n' + body + '\n' + 22 | '\n'; 23 | } 24 | 25 | describe('Email (lib)', function () { 26 | it('should not modify unprotected html', function () { 27 | const raw = genHTML(''); 28 | 29 | expect(decode(raw)).to.equal(raw); 30 | }); 31 | 32 | it('should remove email protection', function () { 33 | const protection = '!@#&*9^%()[]/\\'; 34 | 35 | expect(decode(protection)).to.equal(EMAIL); 36 | }); 37 | 38 | it('should replace anchors that have a data-cfemail attribute', function () { 39 | const protection = '[email protected]'; 41 | 42 | const raw = genHTML(' The email is ' + EMAIL + ''); 43 | const enc = genHTML(' The email is ' + protection + ''); 44 | 45 | expect(decode(enc)).to.equal(raw); 46 | }); 47 | 48 | it('should replace spans that have a data-cfemail attribute', function () { 49 | const protection = '[email protected]'; 51 | 52 | const raw = genHTML(' The email is ' + EMAIL + ''); 53 | const enc = genHTML(' The email is ' + protection + ''); 54 | 55 | expect(decode(enc)).to.equal(raw); 56 | }); 57 | 58 | it('should be space agnostic', function () { 59 | const protection = '\n[email protected]\r\n'; 61 | 62 | const raw = genHTML('\r\n\n The email is ' + EMAIL + '\r\n\n'); 63 | const enc = genHTML('\r\n\n The email is ' + protection + '\r\n\n'); 64 | 65 | expect(decode(enc)).to.equal(raw); 66 | }); 67 | 68 | it('should not replace nodes if they have children', function () { 69 | const protection = '[email protected]'; 71 | const enc = genHTML(' The email is ' + protection + ''); 72 | 73 | expect(decode(enc)).to.equal(enc); 74 | }); 75 | 76 | it('should not replace malformed html', function () { 77 | const protection = '\n<\n'; 78 | const enc = genHTML(' The email is ' + protection + ''); 79 | 80 | expect(decode(enc)).to.equal(enc); 81 | }); 82 | 83 | it('should account for self-closing nodes', function () { 84 | const protection = 'test'; 85 | 86 | expect(decode(protection)).to.equal(EMAIL + 'test'); 87 | }); 88 | 89 | it('should update href attribute values', function () { 90 | const protection = ''; 91 | 92 | const raw = genHTML(''); 93 | const enc = genHTML(protection); 94 | 95 | expect(decode(enc)).to.equal(raw); 96 | }); 97 | }); 98 | -------------------------------------------------------------------------------- /test/test-errors.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const cloudscraper = require('../index'); 6 | const request = require('request-promise'); 7 | const helper = require('./helper'); 8 | const brotli = require('../lib/brotli'); 9 | const errors = require('../errors'); 10 | 11 | const sinon = require('sinon'); 12 | const expect = require('chai').expect; 13 | const assert = require('chai').assert; 14 | 15 | describe('Cloudscraper', function () { 16 | let sandbox; 17 | let Request; 18 | let uri; 19 | 20 | before(function (done) { 21 | helper.listen(function () { 22 | uri = helper.resolve('/test'); 23 | 24 | // Speed up tests 25 | cloudscraper.defaultParams.cloudflareTimeout = 1; 26 | done(); 27 | }); 28 | }); 29 | 30 | after(function () { 31 | helper.server.close(); 32 | }); 33 | 34 | beforeEach(function () { 35 | // Prepare stubbed Request 36 | sandbox = sinon.createSandbox(); 37 | Request = sandbox.spy(request, 'Request'); 38 | }); 39 | 40 | afterEach(function () { 41 | helper.reset(); 42 | sandbox.restore(); 43 | }); 44 | 45 | it('should return error if it was thrown by request', function (done) { 46 | helper.router.get('/test', function (req, res) { 47 | res.endAbruptly(); 48 | }); 49 | 50 | const promise = cloudscraper.get(uri, function (error) { 51 | expect(error).to.be.instanceOf(errors.RequestError); 52 | expect(error.error).to.be.an('error'); 53 | expect(error).to.have.property('errorType', 0); 54 | 55 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 56 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 57 | }); 58 | }); 59 | 60 | it('should return error if cloudflare response is empty', function (done) { 61 | helper.router.get('/test', function (req, res) { 62 | res.cloudflare().status(504).end(); 63 | }); 64 | 65 | const promise = cloudscraper.get(uri, function (error) { 66 | // errorType 1, means captcha is served 67 | expect(error).to.be.instanceOf(errors.CloudflareError); 68 | expect(error).to.have.property('error', 504); 69 | expect(error).to.have.property('errorType', 2); 70 | expect(error.message).to.be.equal('504, Gateway Timeout'); 71 | 72 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 73 | 74 | expect(error.response.body).to.be.eql(Buffer.alloc(0)); 75 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 76 | }); 77 | }); 78 | 79 | it('should return error if captcha is served by cloudflare', function (done) { 80 | helper.router.get('/test', function (req, res) { 81 | res.sendChallenge('captcha.html'); 82 | }); 83 | 84 | const promise = cloudscraper.get(uri, function (error) { 85 | // errorType 1, means captcha is served 86 | expect(error).to.be.instanceOf(errors.CaptchaError); 87 | expect(error).to.have.property('error', 'captcha'); 88 | expect(error).to.have.property('errorType', 1); 89 | 90 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 91 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 92 | }); 93 | }); 94 | 95 | it('should return error if cloudflare returned some inner error', function (done) { 96 | // https://support.cloudflare.com/hc/en-us/sections/200820298-Error-Pages 97 | // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 98 | // Error codes can also be the same as the HTTP status code in the 5xx range. 99 | 100 | helper.router.get('/test', function (req, res) { 101 | res.cloudflare().status(500).sendFixture('access_denied.html'); 102 | }); 103 | 104 | const promise = cloudscraper.get(uri, function (error) { 105 | // errorType 2, means inner cloudflare error 106 | expect(error).to.be.instanceOf(errors.CloudflareError); 107 | expect(error).to.have.property('error', 1006); 108 | expect(error.message).to.equal('1006, Access Denied: Your IP address has been banned'); 109 | expect(error).to.have.property('errorType', 2); 110 | 111 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 112 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 113 | }); 114 | }); 115 | 116 | it('should add a description to 5xx range cloudflare errors', function (done) { 117 | const html = helper.getFixture('access_denied.html').toString('utf8'); 118 | 119 | helper.router.get('/test', function (req, res) { 120 | res.cloudflare().status(504).send(html.replace('1006', '504')); 121 | }); 122 | 123 | const promise = cloudscraper.get(uri, function (error) { 124 | // errorType 2, means inner cloudflare error 125 | expect(error).to.be.instanceOf(errors.CloudflareError); 126 | expect(error).to.have.property('error', 504); 127 | expect(error.message).to.equal('504, Gateway Timeout'); 128 | expect(error).to.have.property('errorType', 2); 129 | 130 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 131 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 132 | }); 133 | }); 134 | 135 | it('should not error if error description is unavailable', function (done) { 136 | const html = helper.getFixture('access_denied.html').toString('utf8'); 137 | 138 | helper.router.get('/test', function (req, res) { 139 | res.cloudflare().status(500).send(html.replace('1006', '5111')); 140 | }); 141 | 142 | const promise = cloudscraper.get(uri, function (error) { 143 | // errorType 2, means inner cloudflare error 144 | expect(error).to.be.instanceOf(errors.CloudflareError); 145 | expect(error).to.have.property('error', 5111); 146 | expect(error.message).to.equal('5111'); 147 | expect(error).to.have.property('errorType', 2); 148 | 149 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 150 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 151 | }); 152 | }); 153 | 154 | it('should return error if cf presented more than 3 challenges in a row', function (done) { 155 | helper.router.get('*', function (req, res) { 156 | res.sendChallenge('js_challenge_09_06_2016.html'); 157 | }); 158 | 159 | // The expected params for all subsequent calls to Request 160 | const expectedParams = helper.extendParams({ 161 | uri: helper.resolve('/cdn-cgi/l/chk_jschl') 162 | }); 163 | 164 | // Perform less strict matching on headers and qs to simplify this test 165 | Object.assign(expectedParams, { 166 | headers: sinon.match.object, 167 | qs: sinon.match.object 168 | }); 169 | 170 | const promise = cloudscraper.get(uri, function (error) { 171 | expect(error).to.be.instanceOf(errors.CloudflareError); 172 | expect(error).to.have.property('error', 'Cloudflare challenge loop'); 173 | expect(error).to.have.property('errorType', 4); 174 | 175 | assert.equal(Request.callCount, 4, 'Request call count'); 176 | expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); 177 | 178 | const total = helper.defaultParams.challengesToSolve + 1; 179 | // noinspection ES6ConvertVarToLetConst 180 | for (var i = 1; i < total; i++) { 181 | // Decrement the number of challengesToSolve to match actual params 182 | expectedParams.challengesToSolve -= 1; 183 | expect(Request.getCall(i)).to.be.calledWithExactly(expectedParams); 184 | } 185 | 186 | expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); 187 | }); 188 | }); 189 | 190 | it('should return error if body is undefined', function (done) { 191 | helper.router.get('/test', function (req, res) { 192 | res.status(503).end(); 193 | }); 194 | 195 | const expectedParams = helper.extendParams({ json: true }); 196 | const options = { uri: uri, json: true }; 197 | 198 | const promise = cloudscraper.get(options, function (error) { 199 | expect(error).to.be.instanceOf(errors.RequestError); 200 | expect(error).to.have.property('error', null); 201 | expect(error).to.have.property('errorType', 0); 202 | 203 | assert.equal(error.response.statusCode, 503, 'status code'); 204 | 205 | expect(error.response.body).to.be.equal(undefined); 206 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 207 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 208 | }); 209 | }); 210 | 211 | (brotli.isAvailable ? it.skip : it)('should return error if content-type is brotli and missing dep', function (done) { 212 | // Brotli compressed JSON: {"a":"test"} 213 | const compressed = Buffer.from([ 214 | 0x8b, 0x05, 0x80, 0x7b, 0x22, 0x61, 0x22, 0x3a, 215 | 0x22, 0x74, 0x65, 0x73, 0x74, 0x22, 0x7d, 0x03 216 | ]); 217 | 218 | helper.router.get('/test', function (req, res) { 219 | res.set('content-encoding', 'br'); 220 | res.status(503).end(compressed, 'binary'); 221 | }); 222 | 223 | const expectedParams = helper.extendParams({ json: true }); 224 | const options = { uri: uri, json: true }; 225 | 226 | const promise = cloudscraper.get(options, function (error) { 227 | expect(error).to.be.instanceOf(errors.RequestError); 228 | expect(error).to.have.property('error').that.is.ok; 229 | expect(error).to.have.property('errorType', 0); 230 | 231 | assert.equal(error.response.statusCode, 503, 'status code'); 232 | 233 | assert(Buffer.isBuffer(error.response.body), 'response type'); 234 | expect(error.response.body).to.be.eql(compressed); 235 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 236 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 237 | }); 238 | }); 239 | 240 | it('should return error if challenge page failed to be parsed', function (done) { 241 | helper.router.get('/test', function (req, res) { 242 | res.sendChallenge('invalid_js_challenge.html'); 243 | }); 244 | 245 | const promise = cloudscraper.get(uri, function (error) { 246 | expect(error).to.be.instanceOf(errors.ParserError); 247 | expect(error).to.have.property('error').that.is.ok; 248 | expect(error).to.have.property('errorType', 3); 249 | 250 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 251 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 252 | }); 253 | }); 254 | 255 | it('should return error if js challenge has error during evaluation', function (done) { 256 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 257 | 258 | helper.router.get('/test', function (req, res) { 259 | // Adds a syntax error near the end of line 37 260 | res.cloudflare().status(503).send(html.replace(/\.toFixed/gm, '..toFixed')); 261 | }); 262 | 263 | const promise = cloudscraper.get(uri, function (error) { 264 | expect(error).to.be.instanceOf(errors.ParserError); 265 | expect(error).to.have.property('error').that.is.an('error'); 266 | expect(error).to.have.property('errorType', 3); 267 | expect(error.message).to.include('Challenge evaluation failed'); 268 | 269 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 270 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 271 | }); 272 | }); 273 | 274 | it('should return error if pass extraction fails', function (done) { 275 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 276 | 277 | helper.router.get('/test', function (req, res) { 278 | res.cloudflare().status(503).send(html.replace(/name="pass"/gm, '')); 279 | }); 280 | 281 | const promise = cloudscraper.get(uri, function (error) { 282 | expect(error).to.be.instanceOf(errors.ParserError); 283 | expect(error).to.have.property('error', 'Attribute (pass) value extraction failed'); 284 | expect(error).to.have.property('errorType', 3); 285 | 286 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 287 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 288 | }); 289 | }); 290 | 291 | it('should return error if challengeId extraction fails', function (done) { 292 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 293 | 294 | helper.router.get('/test', function (req, res) { 295 | res.cloudflare().status(503).send(html.replace(/name="jschl_vc"/gm, '')); 296 | }); 297 | 298 | const promise = cloudscraper.get(uri, function (error) { 299 | expect(error).to.be.instanceOf(errors.ParserError); 300 | expect(error).to.have.property('error', 'challengeId (jschl_vc) extraction failed'); 301 | expect(error).to.have.property('errorType', 3); 302 | 303 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 304 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 305 | }); 306 | }); 307 | 308 | it('should return error if challenge answer is not a number', function (done) { 309 | const html = helper.getFixture('js_challenge_03_12_2018_1.html'); 310 | 311 | helper.router.get('/test', function (req, res) { 312 | res.cloudflare().status(503) 313 | .send(html.replace(/a.value.*/, 'a.value="abc" + t.length')); 314 | }); 315 | 316 | const promise = cloudscraper.get(uri, function (error) { 317 | expect(error).to.be.instanceOf(errors.ParserError); 318 | expect(error).to.have.property('error', 'Challenge answer is not a number'); 319 | expect(error).to.have.property('errorType', 3); 320 | 321 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 322 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 323 | }); 324 | }); 325 | 326 | it('should return error if it was thrown by request when solving challenge', function (done) { 327 | helper.router 328 | .get('/test', function (req, res) { 329 | res.sendChallenge('js_challenge_21_05_2015.html'); 330 | }) 331 | .get('/cdn-cgi/l/chk_jschl', function (req, res) { 332 | res.endAbruptly(); 333 | }); 334 | 335 | const promise = cloudscraper.get(uri, function (error) { 336 | // errorType 0, a connection error for example 337 | expect(error).to.be.instanceOf(errors.RequestError); 338 | expect(error.error).to.be.an('error'); 339 | expect(error).to.have.property('errorType', 0); 340 | 341 | expect(Request).to.be.calledTwice; 342 | expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); 343 | expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); 344 | }); 345 | }); 346 | 347 | it('should properly handle a case when after a challenge another one is returned', function (done) { 348 | helper.router 349 | .get('/test', function (req, res) { 350 | res.sendChallenge('js_challenge_09_06_2016.html'); 351 | }) 352 | .get('/cdn-cgi/l/chk_jschl', function (req, res) { 353 | res.sendChallenge('captcha.html'); 354 | }); 355 | 356 | // Second call to request.get returns recaptcha 357 | const expectedParams = helper.extendParams({ 358 | uri: helper.resolve('/cdn-cgi/l/chk_jschl'), 359 | challengesToSolve: 2 360 | }); 361 | 362 | // Perform less strict matching on headers and qs to simplify this test 363 | Object.assign(expectedParams, { 364 | headers: sinon.match.object, 365 | qs: sinon.match.object 366 | }); 367 | 368 | const promise = cloudscraper.get(uri, function (error) { 369 | // errorType 1, means captcha is served 370 | expect(error).to.be.instanceOf(errors.CaptchaError); 371 | expect(error).to.have.property('error', 'captcha'); 372 | expect(error).to.have.property('errorType', 1); 373 | 374 | expect(Request).to.be.calledTwice; 375 | expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); 376 | expect(Request.secondCall).to.be.calledWithExactly(expectedParams); 377 | expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); 378 | }); 379 | }); 380 | 381 | it('should return error if challenge page cookie extraction fails', function (done) { 382 | const html = helper.getFixture('sucuri_waf_18_08_2016.html').toString('utf8'); 383 | 384 | helper.router.get('/test', function (req, res) { 385 | // The cookie extraction codes looks for the `S` variable assignment 386 | res.cloudflare().status(503).send(html.replace(/S=/gm, 'Z=')); 387 | }); 388 | 389 | const promise = cloudscraper.get(uri, function (error) { 390 | expect(error).to.be.instanceOf(errors.ParserError); 391 | expect(error).to.have.property('error', 'Cookie code extraction failed'); 392 | expect(error).to.have.property('errorType', 3); 393 | 394 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 395 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 396 | }); 397 | }); 398 | 399 | it('should throw a TypeError if callback is not a function', function (done) { 400 | const spy = sinon.spy(function () { 401 | // request-promise always provides a callback so change requester 402 | const options = { uri: uri, requester: require('request') }; 403 | cloudscraper.get(options); 404 | }); 405 | 406 | expect(spy).to.throw(TypeError, /Expected a callback function/); 407 | done(); 408 | }); 409 | 410 | it('should throw a TypeError if requester is not a function', function (done) { 411 | const spy = sinon.spy(function () { 412 | cloudscraper.get({ requester: null }); 413 | }); 414 | 415 | expect(spy).to.throw(TypeError, /`requester` option .*function/); 416 | done(); 417 | }); 418 | 419 | it('should throw a TypeError if challengesToSolve is not a number', function (done) { 420 | const spy = sinon.spy(function () { 421 | const options = { uri: uri, challengesToSolve: 'abc' }; 422 | 423 | cloudscraper.get(options); 424 | }); 425 | 426 | expect(spy).to.throw(TypeError, /`challengesToSolve` option .*number/); 427 | done(); 428 | }); 429 | 430 | it('should throw a TypeError if cloudflareMaxTimeout is not a number', function (done) { 431 | const spy = sinon.spy(function () { 432 | const options = { uri: uri, cloudflareMaxTimeout: 'abc' }; 433 | 434 | cloudscraper.get(options, function () {}); 435 | }); 436 | 437 | expect(spy).to.throw(TypeError, /`cloudflareMaxTimeout` option .*number/); 438 | done(); 439 | }); 440 | 441 | it('should return error if cookie setting code evaluation fails', function (done) { 442 | // Change the cookie setting code so the vm will throw an error 443 | const html = helper.getFixture('sucuri_waf_18_08_2016.html').toString('utf8'); 444 | const b64 = Buffer.from('throw new Error(\'vm eval failed\');').toString('base64'); 445 | 446 | helper.router.get('/test', function (req, res) { 447 | res.cloudflare().status(503).send(html.replace(/S='([^']+)'/, 'S=\'' + b64 + '\'')); 448 | }); 449 | 450 | const promise = cloudscraper.get(uri, function (error) { 451 | expect(error).to.be.instanceOf(errors.ParserError); 452 | expect(error).to.have.property('error').that.is.an('error'); 453 | expect(error).to.have.property('errorType', 3); 454 | expect(error.message).to.include('vm eval failed'); 455 | 456 | expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); 457 | expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); 458 | }); 459 | }); 460 | 461 | it('should not error if Error.captureStackTrace is undefined', function () { 462 | const desc = Object.getOwnPropertyDescriptor(Error, 'captureStackTrace'); 463 | 464 | Object.defineProperty(Error, 'captureStackTrace', { 465 | configurable: true, 466 | value: undefined 467 | }); 468 | 469 | const spy = sinon.spy(function () { 470 | throw new errors.RequestError(); 471 | }); 472 | 473 | try { 474 | expect(spy).to.throw(errors.RequestError); 475 | } finally { 476 | Object.defineProperty(Error, 'captureStackTrace', desc); 477 | } 478 | }); 479 | }); 480 | -------------------------------------------------------------------------------- /test/test-headers.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const sinon = require('sinon'); 6 | const expect = require('chai').expect; 7 | 8 | describe('Headers (lib)', function () { 9 | const { getDefaultHeaders, caseless } = require('../lib/headers'); 10 | const browsers = require('../lib/browsers'); 11 | 12 | it('should export getDefaultHeaders function', function () { 13 | expect(getDefaultHeaders).to.be.a('function'); 14 | }); 15 | 16 | it('should export caseless function', function () { 17 | expect(caseless).to.be.a('function'); 18 | }); 19 | 20 | it('caseless should return an object with lowercase keys', function () { 21 | sinon.assert.match(caseless({ AbC: 'foobar' }), { abc: 'foobar' }); 22 | }); 23 | 24 | it('getDefaultHeaders should always return an object with user agent', function () { 25 | for (let i = 0; i < 100; i++) { 26 | sinon.assert.match(getDefaultHeaders(), { 'User-Agent': sinon.match.string }); 27 | } 28 | 29 | browsers.chrome.forEach(function (options) { 30 | try { 31 | expect(options['User-Agent']).to.be.an('array'); 32 | expect(options['User-Agent'].length).to.be.above(0); 33 | } catch (error) { 34 | error.message += '\n\n' + JSON.stringify(options, null, 2); 35 | throw error; 36 | } 37 | }); 38 | }); 39 | 40 | it('getDefaultHeaders should always retain insertion order', function () { 41 | for (let keys, i = 0; i < 100; i++) { 42 | keys = Object.keys(getDefaultHeaders({ Host: 'foobar' })); 43 | expect(keys[0]).to.equal('Host'); 44 | expect(keys[1]).to.equal('Connection'); 45 | } 46 | 47 | for (let keys, i = 0; i < 100; i++) { 48 | keys = Object.keys(getDefaultHeaders({ Host: 'foobar', 'N/A': null })); 49 | expect(keys[0]).to.equal('Host'); 50 | expect(keys[1]).to.equal('N/A'); 51 | expect(keys[2]).to.equal('Connection'); 52 | } 53 | }); 54 | }); 55 | -------------------------------------------------------------------------------- /test/test-rp.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable promise/always-return,promise/catch-or-return,promise/no-callback-in-promise */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const cloudscraper = require('../index'); 6 | const request = require('request-promise'); 7 | const helper = require('./helper'); 8 | 9 | const sinon = require('sinon'); 10 | const expect = require('chai').expect; 11 | 12 | describe('Cloudscraper', function () { 13 | let sandbox; 14 | let Request; 15 | let uri; 16 | 17 | const requestedPage = helper.getFixture('requested_page.html'); 18 | 19 | before(function (done) { 20 | helper.listen(function () { 21 | uri = helper.resolve('/test'); 22 | 23 | // Speed up tests 24 | cloudscraper.defaultParams.cloudflareTimeout = 1; 25 | done(); 26 | }); 27 | }); 28 | 29 | after(function () { 30 | helper.server.close(); 31 | }); 32 | 33 | beforeEach(function () { 34 | // Prepare stubbed Request 35 | sandbox = sinon.createSandbox(); 36 | Request = sandbox.spy(request, 'Request'); 37 | }); 38 | 39 | afterEach(function () { 40 | helper.reset(); 41 | sandbox.restore(); 42 | }); 43 | 44 | it('should resolve with response body', function () { 45 | helper.router.get('/test', function (req, res) { 46 | res.send(requestedPage); 47 | }); 48 | 49 | const expectedParams = helper.extendParams({ callback: undefined }); 50 | 51 | return cloudscraper.get(uri).then(function (body) { 52 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 53 | expect(body).to.be.equal(requestedPage); 54 | }); 55 | }); 56 | 57 | it('should resolve with full response', function () { 58 | helper.router.get('/test', function (req, res) { 59 | res.send(requestedPage); 60 | }); 61 | 62 | const expectedParams = helper.extendParams({ 63 | callback: undefined, 64 | resolveWithFullResponse: true 65 | }); 66 | 67 | // The method is implicitly GET 68 | delete expectedParams.method; 69 | 70 | const options = { 71 | uri: uri, 72 | resolveWithFullResponse: true 73 | }; 74 | 75 | return cloudscraper(options).then(function (response) { 76 | expect(Request).to.be.calledOnceWithExactly(expectedParams); 77 | expect(response.body).to.be.equal(requestedPage); 78 | }); 79 | }); 80 | 81 | // The helper calls the fake request callback synchronously. This results 82 | // in the promise being rejected before we catch it in the test. 83 | // This can be noticeable if we return the promise instead of calling done. 84 | it('should define catch', function (done) { 85 | helper.router.get('/test', function (req, res) { 86 | res.endAbruptly(); 87 | }); 88 | 89 | let caught = false; 90 | 91 | cloudscraper(uri) 92 | .catch(function () { 93 | caught = true; 94 | }) 95 | .then(function () { 96 | if (caught) done(); 97 | }); 98 | }); 99 | 100 | it('should define finally', function (done) { 101 | helper.router.get('/test', function (req, res) { 102 | res.endAbruptly(); 103 | }); 104 | 105 | let caught = false; 106 | 107 | cloudscraper(uri) 108 | .then(function () { 109 | caught = true; 110 | }) 111 | .finally(function () { 112 | if (!caught) done(); 113 | }) 114 | .catch(function () {}); 115 | }); 116 | }); 117 | -------------------------------------------------------------------------------- /test/test-sandbox.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-unused-expressions */ 2 | /* eslint-env node, mocha */ 3 | 'use strict'; 4 | 5 | const sandbox = require('../lib/sandbox'); 6 | const expect = require('chai').expect; 7 | 8 | describe('Sandbox (lib)', function () { 9 | it('should export Context', function () { 10 | expect(sandbox.Context).to.be.a('function'); 11 | }); 12 | 13 | it('should export eval', function () { 14 | expect(sandbox.eval).to.be.a('function'); 15 | expect(sandbox.eval('0')).to.equal(0); 16 | expect(sandbox.eval('true')).to.be.true; 17 | expect(sandbox.eval('undefined')).to.equal(undefined); 18 | expect(sandbox.eval('NaN')).to.be.a('number'); 19 | expect(String(sandbox.eval('NaN'))).to.equal('NaN'); 20 | }); 21 | 22 | it('new Context() should return an object', function () { 23 | expect(new sandbox.Context()).to.be.an('object'); 24 | }); 25 | 26 | it('Context() should define atob', function () { 27 | const ctx = new sandbox.Context(); 28 | 29 | expect(ctx.atob).to.be.a('function'); 30 | expect(ctx.atob('YWJj')).to.equal('abc'); 31 | expect(sandbox.eval('atob("YWJj")', ctx)).to.equal('abc'); 32 | }); 33 | 34 | it('Context() should define location.reload', function () { 35 | const ctx = new sandbox.Context(); 36 | 37 | expect(sandbox.eval('location.reload()', ctx)).to.equal(undefined); 38 | }); 39 | 40 | it('Context() should define document.createElement', function () { 41 | let ctx = new sandbox.Context(); 42 | let pseudoElement = { firstChild: { href: 'http:///' } }; 43 | 44 | expect(sandbox.eval('document.createElement("a")', ctx)).to.eql(pseudoElement); 45 | 46 | ctx = new sandbox.Context({ hostname: 'test.com' }); 47 | pseudoElement = { firstChild: { href: 'http://test.com/' } }; 48 | 49 | expect(sandbox.eval('document.createElement("a")', ctx)).to.eql(pseudoElement); 50 | }); 51 | 52 | it('Context() should define document.geElementById', function () { 53 | let ctx = new sandbox.Context(); 54 | expect(sandbox.eval('document.getElementById()', ctx)).to.be.null; 55 | 56 | // Missing element 57 | ctx = new sandbox.Context(); 58 | expect(sandbox.eval('document.getElementById("foobar")', ctx)).to.be.null; 59 | 60 | // Double quotes 61 | ctx = new sandbox.Context({ body: 'foobar' }); 62 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: 'foobar' }); 63 | 64 | // Single quotes 65 | ctx = new sandbox.Context({ body: 'foobar' }); 66 | expect(sandbox.eval('document.getElementById(\'test\')', ctx)).eql({ innerHTML: 'foobar' }); 67 | 68 | // Empty 69 | ctx = new sandbox.Context({ body: '' }); 70 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: '' }); 71 | 72 | // Space agnostic tests 73 | ctx = new sandbox.Context({ body: '\nabc\n\n' }); 74 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: '\nabc\n\n' }); 75 | 76 | ctx = new sandbox.Context({ body: ' abc ' }); 77 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: ' abc ' }); 78 | 79 | ctx = new sandbox.Context({ body: 'foo="bar" id=\'test\' a=b > abc <' }); 80 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: ' abc ' }); 81 | 82 | // Cache test 83 | ctx = new sandbox.Context({ body: 'foobar' }); 84 | expect(sandbox.eval('document.getElementById("test")', ctx)).eql({ innerHTML: 'foobar' }); 85 | }); 86 | }); 87 | -------------------------------------------------------------------------------- /test/test-timeout.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codemanki/cloudscraper/829e4d3c1fd21d8b8b940ca6e56757a970b56ada/test/test-timeout.js --------------------------------------------------------------------------------
The owner of this website (site.com.com) has banned your IP address (91.91.111.11).
Completing the CAPTCHA proves you are a human and gives you temporary access to the web property.
If you are on a personal connection, like at home, you can run an anti-virus scan on your device to make sure it is not infected with malware.
If you are at an office or shared network, you can ask the network administrator to run a scan across the network looking for misconfigured or infected devices.
Another way to prevent getting this page in the future is to use Privacy Pass. You may need to download version 2.0 now from the Chrome Web Store.
This process is automatic. Your browser will redirect to your requested content shortly.
Please allow up to 5 seconds…
The email is [email protected] 14 |
The email is ' + EMAIL + '
The email is ' + protection + '
\n The email is ' + EMAIL + '\r\n
\n The email is ' + protection + '\r\n