├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ └── general_issue.yaml └── workflows │ ├── check_test.yaml │ └── docker_hub.yaml ├── .gitignore ├── Dockerfile ├── LICENSE.md ├── README.md ├── package-lock.json ├── package.json ├── src ├── data │ ├── capsolver.png │ ├── fakePage.html │ └── sdo.gif ├── endpoints │ ├── getSource.js │ ├── solveTurnstile.max.js │ ├── solveTurnstile.min.js │ └── wafSession.js ├── index.js └── module │ ├── createBrowser.js │ └── reqValidate.js └── tests ├── endpoints.test.js └── validate.test.js /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | Dockerfile 4 | .dockerignore 5 | .git 6 | .gitignore 7 | README.md 8 | .env 9 | .env.* -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_issue.yaml: -------------------------------------------------------------------------------- 1 | name: Report Issue 2 | description: Please use this to report any issue 3 | labels: [triage] 4 | assignees: 5 | - zfcsoftware 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Please take care to fill in all fields. Recreating the issue will speed up its resolution. Thank you for contributing to the betterment of the library by reporting issues. 11 | - type: textarea 12 | id: issue-detail 13 | attributes: 14 | label: Description 15 | description: Please describe the problem you are experiencing. You only need to provide information about the problem in this field. 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: issue-recreate 20 | attributes: 21 | label: Full steps to reproduce the issue 22 | description: Please provide a full working code to reproduce the issue. Make sure that the code you provide is directly executable. This step is very important to resolve the issue. 23 | validations: 24 | required: true 25 | - type: dropdown 26 | id: issue-type 27 | attributes: 28 | label: Issue Type 29 | description: What type of issue would you like to report? 30 | multiple: true 31 | options: 32 | - Bug 33 | - Build/Install 34 | - Performance 35 | - Support 36 | - Feature Request 37 | - Documentation Request 38 | - Others 39 | - type: dropdown 40 | id: Operating-System 41 | attributes: 42 | label: Operating System 43 | description: What OS are you seeing the issue in? If you don't see your OS listed, please provide more details in the "Description" section above. 44 | multiple: true 45 | options: 46 | - Windows 10 47 | - Linux 48 | - Mac OS 49 | - Other 50 | - type: dropdown 51 | id: use-type 52 | attributes: 53 | label: Do you use Docker? 54 | description: Are you running it with Docker or on your local computer? 55 | multiple: false 56 | options: 57 | - Docker 58 | - I don't use Docker -------------------------------------------------------------------------------- /.github/workflows/check_test.yaml: -------------------------------------------------------------------------------- 1 | name: Run Test 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["main"] 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | strategy: 13 | matrix: 14 | node-version: [20.x] 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Use Node.js ${{ matrix.node-version }} 18 | uses: actions/setup-node@v4 19 | with: 20 | node-version: ${{ matrix.node-version }} 21 | cache: "npm" 22 | - name: Install dependencies 23 | run: | 24 | npm install 25 | sudo apt-get install -y libnss3 libatk-bridge2.0-0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libasound2t64 libpangocairo-1.0-0 libatk1.0-0 libatk-bridge2.0-0 libgtk-3-0 xvfb 26 | 27 | - name: Run a test 28 | run: npm test 29 | -------------------------------------------------------------------------------- /.github/workflows/docker_hub.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Docker Image 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v2 15 | 16 | - name: Set up Node.js 17 | uses: actions/setup-node@v2 18 | with: 19 | node-version: '20.x' 20 | 21 | - name: Install dependencies 22 | run: npm install 23 | 24 | - name: Get current version from package.json 25 | id: get_version 26 | run: echo "VERSION=$(node -p "require('./package.json').version")" >> $GITHUB_ENV 27 | 28 | - name: Get latest version from Docker Hub 29 | id: get_docker_version 30 | run: | 31 | LATEST_VERSION=$(curl -s "https://hub.docker.com/v2/repositories/zfcsoftware/cf-clearance-scraper/tags?page_size=1" | jq -r '.results[0].name') 32 | echo "DOCKER_VERSION=$LATEST_VERSION" >> $GITHUB_ENV 33 | 34 | - name: Compare versions 35 | id: compare_versions 36 | run: | 37 | if [ "${{ env.VERSION }}" != "${{ env.DOCKER_VERSION }}" ]; then 38 | echo "Versions are different. Proceeding to build and publish." 39 | echo "publish=true" >> $GITHUB_ENV 40 | else 41 | echo "Versions are the same. Skipping publish." 42 | echo "publish=false" >> $GITHUB_ENV 43 | fi 44 | 45 | - name: Build and Push Docker Image 46 | if: env.publish == 'true' 47 | env: 48 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 49 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 50 | run: | 51 | echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin 52 | docker build -t zfcsoftware/cf-clearance-scraper:$VERSION . 53 | docker tag zfcsoftware/cf-clearance-scraper:${{ env.VERSION }} zfcsoftware/cf-clearance-scraper:latest 54 | docker push zfcsoftware/cf-clearance-scraper:latest 55 | docker push zfcsoftware/cf-clearance-scraper:$VERSION 56 | - name: Logout from Docker Hub 57 | run: docker logout 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:latest 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | wget \ 5 | gnupg \ 6 | ca-certificates \ 7 | apt-transport-https \ 8 | chromium \ 9 | chromium-driver \ 10 | xvfb \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | ENV CHROME_BIN=/usr/bin/chromium 14 | 15 | WORKDIR /app 16 | 17 | COPY package*.json ./ 18 | 19 | RUN npm update 20 | RUN npm install 21 | RUN npm i -g pm2 22 | COPY . . 23 | 24 | EXPOSE 3000 25 | 26 | CMD ["pm2-runtime", "src/index.js"] 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 - 2024 @zfcsoftware 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > [!WARNING] 2 | > This repo will no longer receive updates. Thank you to everyone who supported it. 3 | 4 | # CF Clearance Scraper 5 | 6 | This library was created for testing and training purposes to retrieve the page source of websites, create Cloudflare Turnstile tokens and create Cloudflare WAF sessions. 7 | 8 | Cloudflare protection not only checks cookies in the request. It also checks variables in the header. For this reason, it is recommended to use it with the sample code in this readme file. 9 | 10 | Cookies with cf in the name belong to Cloudflare. You can find out what these cookies do and how long they are valid by **[Clicking Here](https://developers.cloudflare.com/fundamentals/reference/policies-compliances/cloudflare-cookies/)**. 11 | 12 | ## Sponsor 13 | 14 | [![Capsolver](src/data/capsolver.png)](https://www.capsolver.com/?utm_source=github&utm_medium=repo&utm_campaign=scraping&utm_term=cf-clearance-scraper) 15 | 16 | [![ScrapeDo](src/data/sdo.gif)](https://scrape.do/?utm_source=github&utm_medium=repo_ccs) 17 | 18 | ## Installation 19 | 20 | Installation with Docker is recommended. 21 | 22 | **Docker** 23 | 24 | Please make sure you have installed the latest image. If you get an error, try downloading the latest version by going to Docker Hub. 25 | 26 | ```bash 27 | sudo docker rmi zfcsoftware/cf-clearance-scraper:latest --force 28 | ``` 29 | 30 | ```bash 31 | docker run -d -p 3000:3000 \ 32 | -e PORT=3000 \ 33 | -e browserLimit=20 \ 34 | -e timeOut=60000 \ 35 | zfcsoftware/cf-clearance-scraper:latest 36 | ``` 37 | 38 | **Github** 39 | 40 | ```bash 41 | git clone https://github.com/zfcsoftware/cf-clearance-scraper 42 | cd cf-clearance-scraper 43 | npm install 44 | npm run start 45 | ``` 46 | 47 | ## Create Cloudflare WAF Session 48 | 49 | By creating a session as in the example, you can send multiple requests to the same site without being blocked. Since sites may have TLS protection, it is recommended to send requests with the library in the example. 50 | 51 | ```js 52 | const initCycleTLS = require('cycletls'); 53 | async function test() { 54 | const session = await fetch('http://localhost:3000/cf-clearance-scraper', { 55 | method: 'POST', 56 | headers: { 57 | 'Content-Type': 'application/json' 58 | }, 59 | body: JSON.stringify({ 60 | url: 'https://nopecha.com/demo/cloudflare', 61 | mode: "waf-session", 62 | // proxy:{ 63 | // host: '127.0.0.1', 64 | // port: 3000, 65 | // username: 'username', 66 | // password: 'password' 67 | // } 68 | }) 69 | }).then(res => res.json()).catch(err => { console.error(err); return null }); 70 | 71 | if (!session || session.code != 200) return console.error(session); 72 | 73 | const cycleTLS = await initCycleTLS(); 74 | const response = await cycleTLS('https://nopecha.com/demo/cloudflare', { 75 | body: '', 76 | ja3: '772,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,23-27-65037-43-51-45-16-11-13-17513-5-18-65281-0-10-35,25497-29-23-24,0', // https://scrapfly.io/web-scraping-tools/ja3-fingerprint 77 | userAgent: session.headers["user-agent"], 78 | // proxy: 'http://username:password@hostname.com:443', 79 | headers: { 80 | ...session.headers, 81 | cookie: session.cookies.map(cookie => `${cookie.name}=${cookie.value}`).join('; ') 82 | } 83 | }, 'get'); 84 | 85 | console.log(response.status); 86 | cycleTLS.exit().catch(err => { }); 87 | } 88 | test() 89 | ``` 90 | 91 | ## Create Turnstile Token with Little Resource Consumption 92 | 93 | This endpoint allows you to generate tokens for a Cloudflare Turnstile Captcha. It blocks the request that fetches the page resource and instead makes the page resource a simple Turnstile render page. This allows you to generate tokens without having to load any additional css or js files. 94 | 95 | However, in this method, the siteKey variable must be sent to Turnstile along with the site to create the token. If this does not work, you can examine the token generation system by loading the full page resource described in the next section. 96 | 97 | ```js 98 | fetch('http://localhost:3000/cf-clearance-scraper', { 99 | method: 'POST', 100 | headers: { 101 | 'Content-Type': 'application/json' 102 | }, 103 | body: JSON.stringify({ 104 | url: 'https://turnstile.zeroclover.io/', 105 | siteKey: "0x4AAAAAAAEwzhD6pyKkgXC0", 106 | mode: "turnstile-min", 107 | // proxy:{ 108 | // host: '127.0.0.1', 109 | // port: 3000, 110 | // username: 'username', 111 | // password: 'password' 112 | // } 113 | }) 114 | }) 115 | .then(res => res.json()) 116 | .then(console.log) 117 | .catch(console.log); 118 | ``` 119 | 120 | ## Creating Turnstile Token with Full Page Load 121 | 122 | This example request goes to the page at the given url address with a real browser, resolves the Turnstile and returns you the token. 123 | 124 | ```js 125 | fetch('http://localhost:3000/cf-clearance-scraper', { 126 | method: 'POST', 127 | headers: { 128 | 'Content-Type': 'application/json' 129 | }, 130 | body: JSON.stringify({ 131 | url: 'https://turnstile.zeroclover.io/', 132 | mode: "turnstile-max", 133 | // proxy:{ 134 | // host: '127.0.0.1', 135 | // port: 3000, 136 | // username: 'username', 137 | // password: 'password' 138 | // } 139 | }) 140 | }) 141 | .then(res => res.json()) 142 | .then(console.log) 143 | .catch(console.log); 144 | ``` 145 | 146 | ## Getting Page Source from a Site Protected with Cloudflare WAF 147 | 148 | With this request you can scrape the page source of a website protected with CF WAF. 149 | 150 | ```js 151 | fetch('http://localhost:3000/cf-clearance-scraper', { 152 | method: 'POST', 153 | headers: { 154 | 'Content-Type': 'application/json' 155 | }, 156 | body: JSON.stringify({ 157 | url: 'https://nopecha.com/demo/cloudflare', 158 | mode: "source" 159 | // proxy:{ 160 | // host: '127.0.0.1', 161 | // port: 3000, 162 | // username: 'username', 163 | // password: 'password' 164 | // } 165 | }) 166 | }) 167 | .then(res => res.json()) 168 | .then(console.log) 169 | .catch(console.log); 170 | ``` 171 | 172 | ## Quick Questions and Answers 173 | 174 | ### Does It Open A New Browser On Every Request? 175 | No, a new context is started with each request and closed when the job is finished. Processes are executed with isolated contexts through a single browser. 176 | 177 | ### How Do I Limit the Browser Context to Open? 178 | You can do this by changing the process.env.browserLimit value. The default is 20 179 | 180 | ### How Do I Add Authentication to Api? 181 | You can add authorisation by changing the process.env.authToken variable. If this variable is added, it returns 401 if the authToken variable in the request body is not equal to the token you specify. 182 | 183 | ### How Do I Set The Timeout Time? 184 | You can give the variable process.env.timeOut a value in milliseconds. The default is 60000. 185 | 186 | ## Disclaimer of Liability 187 | This repository was created purely for testing and training purposes. The user is responsible for any prohibited liability that may arise from its use. 188 | The library is not intended to harm any site or company. The user is responsible for any damage that may arise. 189 | Users of this repository are deemed to have accepted this disclaimer. 190 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cf-clearance-scraper", 3 | "version": "2.1.3", 4 | "main": "index.js", 5 | "scripts": { 6 | "start": "node src/index.js", 7 | "test": "node --experimental-vm-modules ./node_modules/.bin/jest --detectOpenHandles --verbose" 8 | }, 9 | "jest": { 10 | "testMatch": [ 11 | "**/tests/**/*.js" 12 | ], 13 | "verbose": true 14 | }, 15 | "keywords": [ 16 | "cf-clearance", 17 | "cloudflare", 18 | "waf", 19 | "scraper", 20 | "puppeteer", 21 | "xvfb", 22 | "turnstile", 23 | "bypass", 24 | "undetected", 25 | "stealth" 26 | ], 27 | "author": "zfcsoftware", 28 | "license": "ISC", 29 | "description": "This package is an experimental and educational package created for Cloudflare protections.", 30 | "dependencies": { 31 | "ajv": "^8.17.1", 32 | "ajv-formats": "^3.0.1", 33 | "body-parser": "^1.20.3", 34 | "cors": "^2.8.5", 35 | "dotenv": "^16.4.5", 36 | "express": "^4.21.0", 37 | "jest": "^29.7.0", 38 | "puppeteer-real-browser": "^1.4.0", 39 | "supertest": "^7.0.0" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/data/capsolver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZFC-Digital/cf-clearance-scraper/c94563410342eacae92a1f2921f6000d84539ad7/src/data/capsolver.png -------------------------------------------------------------------------------- /src/data/fakePage.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/data/sdo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZFC-Digital/cf-clearance-scraper/c94563410342eacae92a1f2921f6000d84539ad7/src/data/sdo.gif -------------------------------------------------------------------------------- /src/endpoints/getSource.js: -------------------------------------------------------------------------------- 1 | function getSource({ url, proxy }) { 2 | return new Promise(async (resolve, reject) => { 3 | if (!url) return reject("Missing url parameter"); 4 | const context = await global.browser 5 | .createBrowserContext({ 6 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions 7 | }) 8 | .catch(() => null); 9 | if (!context) return reject("Failed to create browser context"); 10 | 11 | let isResolved = false; 12 | 13 | var cl = setTimeout(async () => { 14 | if (!isResolved) { 15 | await context.close(); 16 | reject("Timeout Error"); 17 | } 18 | }, global.timeOut || 60000); 19 | 20 | try { 21 | const page = await context.newPage(); 22 | 23 | if (proxy?.username && proxy?.password) 24 | await page.authenticate({ 25 | username: proxy.username, 26 | password: proxy.password, 27 | }); 28 | 29 | await page.setRequestInterception(true); 30 | page.on("request", async (request) => request.continue()); 31 | page.on("response", async (res) => { 32 | try { 33 | if ( 34 | [200, 302].includes(res.status()) && 35 | [url, url + "/"].includes(res.url()) 36 | ) { 37 | await page 38 | .waitForNavigation({ waitUntil: "load", timeout: 5000 }) 39 | .catch(() => {}); 40 | const html = await page.content(); 41 | await context.close(); 42 | isResolved = true; 43 | clearInterval(cl); 44 | resolve(html); 45 | } 46 | } catch (e) {} 47 | }); 48 | await page.goto(url, { 49 | waitUntil: "domcontentloaded", 50 | }); 51 | } catch (e) { 52 | if (!isResolved) { 53 | await context.close(); 54 | clearInterval(cl); 55 | reject(e.message); 56 | } 57 | } 58 | }); 59 | } 60 | module.exports = getSource; 61 | -------------------------------------------------------------------------------- /src/endpoints/solveTurnstile.max.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs"); 2 | function solveTurnstileMin({ url, proxy }) { 3 | return new Promise(async (resolve, reject) => { 4 | if (!url) return reject("Missing url parameter"); 5 | 6 | const context = await global.browser 7 | .createBrowserContext({ 8 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions 9 | }) 10 | .catch(() => null); 11 | 12 | if (!context) return reject("Failed to create browser context"); 13 | 14 | let isResolved = false; 15 | 16 | var cl = setTimeout(async () => { 17 | if (!isResolved) { 18 | await context.close(); 19 | reject("Timeout Error"); 20 | } 21 | }, global.timeOut || 60000); 22 | 23 | try { 24 | const page = await context.newPage(); 25 | 26 | if (proxy?.username && proxy?.password) 27 | await page.authenticate({ 28 | username: proxy.username, 29 | password: proxy.password, 30 | }); 31 | 32 | await page.evaluateOnNewDocument(() => { 33 | let token = null; 34 | async function waitForToken() { 35 | while (!token) { 36 | try { 37 | token = window.turnstile.getResponse(); 38 | } catch (e) {} 39 | await new Promise((resolve) => setTimeout(resolve, 500)); 40 | } 41 | var c = document.createElement("input"); 42 | c.type = "hidden"; 43 | c.name = "cf-response"; 44 | c.value = token; 45 | document.body.appendChild(c); 46 | } 47 | waitForToken(); 48 | }); 49 | 50 | await page.goto(url, { 51 | waitUntil: "domcontentloaded", 52 | }); 53 | 54 | await page.waitForSelector('[name="cf-response"]', { 55 | timeout: 60000, 56 | }); 57 | const token = await page.evaluate(() => { 58 | try { 59 | return document.querySelector('[name="cf-response"]').value; 60 | } catch (e) { 61 | return null; 62 | } 63 | }); 64 | isResolved = true; 65 | clearInterval(cl); 66 | await context.close(); 67 | if (!token || token.length < 10) return reject("Failed to get token"); 68 | return resolve(token); 69 | } catch (e) { 70 | console.log(e); 71 | 72 | if (!isResolved) { 73 | await context.close(); 74 | clearInterval(cl); 75 | reject(e.message); 76 | } 77 | } 78 | }); 79 | } 80 | module.exports = solveTurnstileMin; 81 | -------------------------------------------------------------------------------- /src/endpoints/solveTurnstile.min.js: -------------------------------------------------------------------------------- 1 | function solveTurnstileMin({ url, proxy, siteKey }) { 2 | return new Promise(async (resolve, reject) => { 3 | if (!url) return reject("Missing url parameter"); 4 | if (!siteKey) return reject("Missing siteKey parameter"); 5 | 6 | const context = await global.browser 7 | .createBrowserContext({ 8 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions 9 | }) 10 | .catch(() => null); 11 | if (!context) return reject("Failed to create browser context"); 12 | 13 | let isResolved = false; 14 | 15 | var cl = setTimeout(async () => { 16 | if (!isResolved) { 17 | await context.close(); 18 | reject("Timeout Error"); 19 | } 20 | }, global.timeOut || 60000); 21 | 22 | try { 23 | const page = await context.newPage(); 24 | 25 | if (proxy?.username && proxy?.password) 26 | await page.authenticate({ 27 | username: proxy.username, 28 | password: proxy.password, 29 | }); 30 | 31 | await page.setRequestInterception(true); 32 | 33 | page.on("request", async (request) => { 34 | if ( 35 | [url, url + "/"].includes(request.url()) && 36 | request.resourceType() === "document" 37 | ) { 38 | const response = await request.respond({ 39 | status: 200, 40 | contentType: "text/html", 41 | body: String( 42 | require("fs").readFileSync("./src/data/fakePage.html") 43 | ).replace(//g, siteKey), 44 | }); 45 | } else { 46 | await request.continue(); 47 | } 48 | }); 49 | 50 | await page.goto(url, { 51 | waitUntil: "domcontentloaded", 52 | }); 53 | 54 | await page.waitForSelector('[name="cf-response"]', { 55 | timeout: 60000, 56 | }); 57 | 58 | const token = await page.evaluate(() => { 59 | try { 60 | return document.querySelector('[name="cf-response"]').value; 61 | } catch (e) { 62 | return null; 63 | } 64 | }); 65 | 66 | isResolved = true; 67 | clearInterval(cl); 68 | await context.close(); 69 | if (!token || token.length < 10) return reject("Failed to get token"); 70 | return resolve(token); 71 | } catch (e) { 72 | if (!isResolved) { 73 | await context.close(); 74 | clearInterval(cl); 75 | reject(e.message); 76 | } 77 | } 78 | }); 79 | } 80 | module.exports = solveTurnstileMin; 81 | -------------------------------------------------------------------------------- /src/endpoints/wafSession.js: -------------------------------------------------------------------------------- 1 | async function findAcceptLanguage(page) { 2 | return await page.evaluate(async () => { 3 | const result = await fetch("https://httpbin.org/get") 4 | .then((res) => res.json()) 5 | .then( 6 | (res) => 7 | res.headers["Accept-Language"] || res.headers["accept-language"] 8 | ) 9 | .catch(() => null); 10 | return result; 11 | }); 12 | } 13 | 14 | function getSource({ url, proxy }) { 15 | return new Promise(async (resolve, reject) => { 16 | if (!url) return reject("Missing url parameter"); 17 | const context = await global.browser 18 | .createBrowserContext({ 19 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions 20 | }) 21 | .catch(() => null); 22 | if (!context) return reject("Failed to create browser context"); 23 | 24 | let isResolved = false; 25 | 26 | var cl = setTimeout(async () => { 27 | if (!isResolved) { 28 | await context.close(); 29 | reject("Timeout Error"); 30 | } 31 | }, global.timeOut || 60000); 32 | 33 | try { 34 | const page = await context.newPage(); 35 | 36 | if (proxy?.username && proxy?.password) 37 | await page.authenticate({ 38 | username: proxy.username, 39 | password: proxy.password, 40 | }); 41 | let acceptLanguage = await findAcceptLanguage(page); 42 | await page.setRequestInterception(true); 43 | page.on("request", async (request) => request.continue()); 44 | page.on("response", async (res) => { 45 | try { 46 | if ( 47 | [200, 302].includes(res.status()) && 48 | [url, url + "/"].includes(res.url()) 49 | ) { 50 | await page 51 | .waitForNavigation({ waitUntil: "load", timeout: 5000 }) 52 | .catch(() => {}); 53 | const cookies = await page.cookies(); 54 | let headers = await res.request().headers(); 55 | delete headers["content-type"]; 56 | delete headers["accept-encoding"]; 57 | delete headers["accept"]; 58 | delete headers["content-length"]; 59 | headers["accept-language"] = acceptLanguage; 60 | await context.close(); 61 | isResolved = true; 62 | clearInterval(cl); 63 | resolve({ cookies, headers }); 64 | } 65 | } catch (e) {} 66 | }); 67 | 68 | await page.goto(url, { 69 | waitUntil: "domcontentloaded", 70 | }); 71 | } catch (e) { 72 | if (!isResolved) { 73 | await context.close(); 74 | clearInterval(cl); 75 | reject(e.message); 76 | } 77 | } 78 | }); 79 | } 80 | module.exports = getSource; 81 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | const express = require('express') 2 | const app = express() 3 | const port = process.env.PORT || 3000 4 | const bodyParser = require('body-parser') 5 | const authToken = process.env.authToken || null 6 | const cors = require('cors') 7 | const reqValidate = require('./module/reqValidate') 8 | 9 | global.browserLength = 0 10 | global.browserLimit = Number(process.env.browserLimit) || 20 11 | global.timeOut = Number(process.env.timeOut || 60000) 12 | 13 | app.use(bodyParser.json({})) 14 | app.use(bodyParser.urlencoded({ extended: true })) 15 | app.use(cors()) 16 | if (process.env.NODE_ENV !== 'development') { 17 | let server = app.listen(port, () => { console.log(`Server running on port ${port}`) }) 18 | try { 19 | server.timeout = global.timeOut 20 | } catch (e) { } 21 | } 22 | if (process.env.SKIP_LAUNCH != 'true') require('./module/createBrowser') 23 | 24 | const getSource = require('./endpoints/getSource') 25 | const solveTurnstileMin = require('./endpoints/solveTurnstile.min') 26 | const solveTurnstileMax = require('./endpoints/solveTurnstile.max') 27 | const wafSession = require('./endpoints/wafSession') 28 | 29 | 30 | app.post('/cf-clearance-scraper', async (req, res) => { 31 | 32 | const data = req.body 33 | 34 | const check = reqValidate(data) 35 | 36 | if (check !== true) return res.status(400).json({ code: 400, message: 'Bad Request', schema: check }) 37 | 38 | if (authToken && data.authToken !== authToken) return res.status(401).json({ code: 401, message: 'Unauthorized' }) 39 | 40 | if (global.browserLength >= global.browserLimit) return res.status(429).json({ code: 429, message: 'Too Many Requests' }) 41 | 42 | if (process.env.SKIP_LAUNCH != 'true' && !global.browser) return res.status(500).json({ code: 500, message: 'The scanner is not ready yet. Please try again a little later.' }) 43 | 44 | var result = { code: 500 } 45 | 46 | global.browserLength++ 47 | 48 | switch (data.mode) { 49 | case "source": 50 | result = await getSource(data).then(res => { return { source: res, code: 200 } }).catch(err => { return { code: 500, message: err.message } }) 51 | break; 52 | case "turnstile-min": 53 | result = await solveTurnstileMin(data).then(res => { return { token: res, code: 200 } }).catch(err => { return { code: 500, message: err.message } }) 54 | break; 55 | case "turnstile-max": 56 | result = await solveTurnstileMax(data).then(res => { return { token: res, code: 200 } }).catch(err => { return { code: 500, message: err.message } }) 57 | break; 58 | case "waf-session": 59 | result = await wafSession(data).then(res => { return { ...res, code: 200 } }).catch(err => { return { code: 500, message: err.message } }) 60 | break; 61 | } 62 | 63 | global.browserLength-- 64 | 65 | res.status(result.code ?? 500).send(result) 66 | }) 67 | 68 | app.use((req, res) => { res.status(404).json({ code: 404, message: 'Not Found' }) }) 69 | 70 | if (process.env.NODE_ENV == 'development') module.exports = app 71 | -------------------------------------------------------------------------------- /src/module/createBrowser.js: -------------------------------------------------------------------------------- 1 | const { connect } = require("puppeteer-real-browser") 2 | async function createBrowser() { 3 | try { 4 | if (global.finished == true) return 5 | 6 | global.browser = null 7 | 8 | // console.log('Launching the browser...'); 9 | 10 | const { browser } = await connect({ 11 | headless: false, 12 | turnstile: true, 13 | connectOption: { defaultViewport: null }, 14 | disableXvfb: false, 15 | }) 16 | 17 | // console.log('Browser launched'); 18 | 19 | global.browser = browser; 20 | 21 | browser.on('disconnected', async () => { 22 | if (global.finished == true) return 23 | console.log('Browser disconnected'); 24 | await new Promise(resolve => setTimeout(resolve, 3000)); 25 | await createBrowser(); 26 | }) 27 | 28 | } catch (e) { 29 | console.log(e.message); 30 | if (global.finished == true) return 31 | await new Promise(resolve => setTimeout(resolve, 3000)); 32 | await createBrowser(); 33 | } 34 | } 35 | createBrowser() -------------------------------------------------------------------------------- /src/module/reqValidate.js: -------------------------------------------------------------------------------- 1 | const Ajv = require("ajv") 2 | const addFormats = require("ajv-formats") 3 | 4 | const ajv = new Ajv() 5 | addFormats(ajv) 6 | 7 | const schema = { 8 | "type": "object", 9 | "properties": { 10 | "mode": { 11 | "type": "string", 12 | "enum": ["source", "turnstile-min", "turnstile-max", "waf-session"], 13 | }, 14 | "proxy": { 15 | "type": "object", 16 | "properties": { 17 | "host": { "type": "string" }, 18 | "port": { "type": "integer" }, 19 | "username": { "type": "string" }, 20 | "password": { "type": "string" } 21 | }, 22 | "additionalProperties": false 23 | }, 24 | "url": { 25 | "type": "string", 26 | "format": "uri", 27 | }, 28 | "authToken": { 29 | "type": "string" 30 | }, 31 | "siteKey": { 32 | "type": "string" 33 | } 34 | }, 35 | "required": ["mode", "url"], 36 | "additionalProperties": false 37 | } 38 | 39 | // const data = { 40 | // mode: "source", 41 | // url: "https://example.com", 42 | // proxy: { 43 | // host: "localhost", 44 | // port: 8080, 45 | // username: "test", 46 | // password: "test" 47 | // }, 48 | // authToken: "123456" 49 | // } 50 | 51 | 52 | function validate(data) { 53 | const valid = ajv.validate(schema, data) 54 | if (!valid) return ajv.errors 55 | else return true 56 | } 57 | 58 | module.exports = validate -------------------------------------------------------------------------------- /tests/endpoints.test.js: -------------------------------------------------------------------------------- 1 | process.env.NODE_ENV = 'development' 2 | const server = require('../src/index') 3 | const request = require("supertest") 4 | 5 | beforeAll(async () => { 6 | while (!global.browser) { 7 | await new Promise(resolve => setTimeout(resolve, 1000)); 8 | } 9 | }, 30000); 10 | 11 | 12 | afterAll(async () => { 13 | global.finished = true 14 | await global.browser.close() 15 | }) 16 | 17 | 18 | test('Scraping Page Source from Cloudflare Protection', async () => { 19 | return request(server) 20 | .post("/cf-clearance-scraper") 21 | .send({ 22 | url: 'https://nopecha.com/demo/cloudflare', 23 | mode: "source" 24 | }) 25 | .expect(200) 26 | .then(response => { expect(response.body.code).toEqual(200); }) 27 | }, 60000) 28 | 29 | 30 | test('Creating a Turnstile Token With Site Key [min]', async () => { 31 | return request(server) 32 | .post("/cf-clearance-scraper") 33 | .send({ 34 | url: 'https://turnstile.zeroclover.io/', 35 | siteKey: "0x4AAAAAAAEwzhD6pyKkgXC0", 36 | mode: "turnstile-min" 37 | }) 38 | .expect(200) 39 | .then(response => { expect(response.body.code).toEqual(200); }) 40 | }, 60000) 41 | 42 | test('Creating a Turnstile Token With Site Key [max]', async () => { 43 | return request(server) 44 | .post("/cf-clearance-scraper") 45 | .send({ 46 | url: 'https://turnstile.zeroclover.io/', 47 | mode: "turnstile-max" 48 | }) 49 | .expect(200) 50 | .then(response => { expect(response.body.code).toEqual(200); }) 51 | }, 60000) 52 | 53 | test('Create Cloudflare WAF Session', async () => { 54 | return request(server) 55 | .post("/cf-clearance-scraper") 56 | .send({ 57 | url: 'https://nopecha.com/demo/cloudflare', 58 | mode: "waf-session" 59 | }) 60 | .expect(200) 61 | .then(response => { expect(response.body.code).toEqual(200); }) 62 | }, 60000) -------------------------------------------------------------------------------- /tests/validate.test.js: -------------------------------------------------------------------------------- 1 | process.env.NODE_ENV = 'development' 2 | process.env.SKIP_LAUNCH = "true" 3 | process.env.authToken = "123456" 4 | process.env.browserLimit = -1 5 | 6 | const server = require('../src/index') 7 | const request = require("supertest") 8 | 9 | test('Request Authorisation Control Test', async () => { 10 | return request(server) 11 | .post("/cf-clearance-scraper") 12 | .send({ 13 | url: 'https://nopecha.com/demo/cloudflare', 14 | mode: "source" 15 | }) 16 | .expect(401) 17 | }, 10000) 18 | 19 | test('Browser Context Limit Control Test', async () => { 20 | return request(server) 21 | .post("/cf-clearance-scraper") 22 | .send({ 23 | url: 'https://nopecha.com/demo/cloudflare', 24 | mode: "source", 25 | authToken: "123456" 26 | }) 27 | .expect(429) 28 | }, 10000) --------------------------------------------------------------------------------