├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ └── general_issue.yaml
└── workflows
│ ├── check_test.yaml
│ └── docker_hub.yaml
├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── package-lock.json
├── package.json
├── src
├── data
│ ├── capsolver.png
│ ├── fakePage.html
│ └── sdo.gif
├── endpoints
│ ├── getSource.js
│ ├── solveTurnstile.max.js
│ ├── solveTurnstile.min.js
│ └── wafSession.js
├── index.js
└── module
│ ├── createBrowser.js
│ └── reqValidate.js
└── tests
├── endpoints.test.js
└── validate.test.js
/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | npm-debug.log
3 | Dockerfile
4 | .dockerignore
5 | .git
6 | .gitignore
7 | README.md
8 | .env
9 | .env.*
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general_issue.yaml:
--------------------------------------------------------------------------------
1 | name: Report Issue
2 | description: Please use this to report any issue
3 | labels: [triage]
4 | assignees:
5 | - zfcsoftware
6 | body:
7 | - type: markdown
8 | attributes:
9 | value: |
10 | Please take care to fill in all fields. Recreating the issue will speed up its resolution. Thank you for contributing to the betterment of the library by reporting issues.
11 | - type: textarea
12 | id: issue-detail
13 | attributes:
14 | label: Description
15 | description: Please describe the problem you are experiencing. You only need to provide information about the problem in this field.
16 | validations:
17 | required: true
18 | - type: textarea
19 | id: issue-recreate
20 | attributes:
21 | label: Full steps to reproduce the issue
22 | description: Please provide a full working code to reproduce the issue. Make sure that the code you provide is directly executable. This step is very important to resolve the issue.
23 | validations:
24 | required: true
25 | - type: dropdown
26 | id: issue-type
27 | attributes:
28 | label: Issue Type
29 | description: What type of issue would you like to report?
30 | multiple: true
31 | options:
32 | - Bug
33 | - Build/Install
34 | - Performance
35 | - Support
36 | - Feature Request
37 | - Documentation Request
38 | - Others
39 | - type: dropdown
40 | id: Operating-System
41 | attributes:
42 | label: Operating System
43 | description: What OS are you seeing the issue in? If you don't see your OS listed, please provide more details in the "Description" section above.
44 | multiple: true
45 | options:
46 | - Windows 10
47 | - Linux
48 | - Mac OS
49 | - Other
50 | - type: dropdown
51 | id: use-type
52 | attributes:
53 | label: Do you use Docker?
54 | description: Are you running it with Docker or on your local computer?
55 | multiple: false
56 | options:
57 | - Docker
58 | - I don't use Docker
--------------------------------------------------------------------------------
/.github/workflows/check_test.yaml:
--------------------------------------------------------------------------------
1 | name: Run Test
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | pull_request:
7 | branches: ["main"]
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | strategy:
13 | matrix:
14 | node-version: [20.x]
15 | steps:
16 | - uses: actions/checkout@v4
17 | - name: Use Node.js ${{ matrix.node-version }}
18 | uses: actions/setup-node@v4
19 | with:
20 | node-version: ${{ matrix.node-version }}
21 | cache: "npm"
22 | - name: Install dependencies
23 | run: |
24 | npm install
25 | sudo apt-get install -y libnss3 libatk-bridge2.0-0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libasound2t64 libpangocairo-1.0-0 libatk1.0-0 libatk-bridge2.0-0 libgtk-3-0 xvfb
26 |
27 | - name: Run a test
28 | run: npm test
29 |
--------------------------------------------------------------------------------
/.github/workflows/docker_hub.yaml:
--------------------------------------------------------------------------------
1 | name: Publish Docker Image
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout code
14 | uses: actions/checkout@v2
15 |
16 | - name: Set up Node.js
17 | uses: actions/setup-node@v2
18 | with:
19 | node-version: '20.x'
20 |
21 | - name: Install dependencies
22 | run: npm install
23 |
24 | - name: Get current version from package.json
25 | id: get_version
26 | run: echo "VERSION=$(node -p "require('./package.json').version")" >> $GITHUB_ENV
27 |
28 | - name: Get latest version from Docker Hub
29 | id: get_docker_version
30 | run: |
31 | LATEST_VERSION=$(curl -s "https://hub.docker.com/v2/repositories/zfcsoftware/cf-clearance-scraper/tags?page_size=1" | jq -r '.results[0].name')
32 | echo "DOCKER_VERSION=$LATEST_VERSION" >> $GITHUB_ENV
33 |
34 | - name: Compare versions
35 | id: compare_versions
36 | run: |
37 | if [ "${{ env.VERSION }}" != "${{ env.DOCKER_VERSION }}" ]; then
38 | echo "Versions are different. Proceeding to build and publish."
39 | echo "publish=true" >> $GITHUB_ENV
40 | else
41 | echo "Versions are the same. Skipping publish."
42 | echo "publish=false" >> $GITHUB_ENV
43 | fi
44 |
45 | - name: Build and Push Docker Image
46 | if: env.publish == 'true'
47 | env:
48 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
49 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
50 | run: |
51 | echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
52 | docker build -t zfcsoftware/cf-clearance-scraper:$VERSION .
53 | docker tag zfcsoftware/cf-clearance-scraper:${{ env.VERSION }} zfcsoftware/cf-clearance-scraper:latest
54 | docker push zfcsoftware/cf-clearance-scraper:latest
55 | docker push zfcsoftware/cf-clearance-scraper:$VERSION
56 | - name: Logout from Docker Hub
57 | run: docker logout
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:latest
2 |
3 | RUN apt-get update && apt-get install -y \
4 | wget \
5 | gnupg \
6 | ca-certificates \
7 | apt-transport-https \
8 | chromium \
9 | chromium-driver \
10 | xvfb \
11 | && rm -rf /var/lib/apt/lists/*
12 |
13 | ENV CHROME_BIN=/usr/bin/chromium
14 |
15 | WORKDIR /app
16 |
17 | COPY package*.json ./
18 |
19 | RUN npm update
20 | RUN npm install
21 | RUN npm i -g pm2
22 | COPY . .
23 |
24 | EXPOSE 3000
25 |
26 | CMD ["pm2-runtime", "src/index.js"]
27 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 - 2024 @zfcsoftware
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | > [!WARNING]
2 | > This repo will no longer receive updates. Thank you to everyone who supported it.
3 |
4 | # CF Clearance Scraper
5 |
6 | This library was created for testing and training purposes to retrieve the page source of websites, create Cloudflare Turnstile tokens and create Cloudflare WAF sessions.
7 |
8 | Cloudflare protection not only checks cookies in the request. It also checks variables in the header. For this reason, it is recommended to use it with the sample code in this readme file.
9 |
10 | Cookies with cf in the name belong to Cloudflare. You can find out what these cookies do and how long they are valid by **[Clicking Here](https://developers.cloudflare.com/fundamentals/reference/policies-compliances/cloudflare-cookies/)**.
11 |
12 | ## Sponsor
13 |
14 | [](https://www.capsolver.com/?utm_source=github&utm_medium=repo&utm_campaign=scraping&utm_term=cf-clearance-scraper)
15 |
16 | [](https://scrape.do/?utm_source=github&utm_medium=repo_ccs)
17 |
18 | ## Installation
19 |
20 | Installation with Docker is recommended.
21 |
22 | **Docker**
23 |
24 | Please make sure you have installed the latest image. If you get an error, try downloading the latest version by going to Docker Hub.
25 |
26 | ```bash
27 | sudo docker rmi zfcsoftware/cf-clearance-scraper:latest --force
28 | ```
29 |
30 | ```bash
31 | docker run -d -p 3000:3000 \
32 | -e PORT=3000 \
33 | -e browserLimit=20 \
34 | -e timeOut=60000 \
35 | zfcsoftware/cf-clearance-scraper:latest
36 | ```
37 |
38 | **Github**
39 |
40 | ```bash
41 | git clone https://github.com/zfcsoftware/cf-clearance-scraper
42 | cd cf-clearance-scraper
43 | npm install
44 | npm run start
45 | ```
46 |
47 | ## Create Cloudflare WAF Session
48 |
49 | By creating a session as in the example, you can send multiple requests to the same site without being blocked. Since sites may have TLS protection, it is recommended to send requests with the library in the example.
50 |
51 | ```js
52 | const initCycleTLS = require('cycletls');
53 | async function test() {
54 | const session = await fetch('http://localhost:3000/cf-clearance-scraper', {
55 | method: 'POST',
56 | headers: {
57 | 'Content-Type': 'application/json'
58 | },
59 | body: JSON.stringify({
60 | url: 'https://nopecha.com/demo/cloudflare',
61 | mode: "waf-session",
62 | // proxy:{
63 | // host: '127.0.0.1',
64 | // port: 3000,
65 | // username: 'username',
66 | // password: 'password'
67 | // }
68 | })
69 | }).then(res => res.json()).catch(err => { console.error(err); return null });
70 |
71 | if (!session || session.code != 200) return console.error(session);
72 |
73 | const cycleTLS = await initCycleTLS();
74 | const response = await cycleTLS('https://nopecha.com/demo/cloudflare', {
75 | body: '',
76 | ja3: '772,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,23-27-65037-43-51-45-16-11-13-17513-5-18-65281-0-10-35,25497-29-23-24,0', // https://scrapfly.io/web-scraping-tools/ja3-fingerprint
77 | userAgent: session.headers["user-agent"],
78 | // proxy: 'http://username:password@hostname.com:443',
79 | headers: {
80 | ...session.headers,
81 | cookie: session.cookies.map(cookie => `${cookie.name}=${cookie.value}`).join('; ')
82 | }
83 | }, 'get');
84 |
85 | console.log(response.status);
86 | cycleTLS.exit().catch(err => { });
87 | }
88 | test()
89 | ```
90 |
91 | ## Create Turnstile Token with Little Resource Consumption
92 |
93 | This endpoint allows you to generate tokens for a Cloudflare Turnstile Captcha. It blocks the request that fetches the page resource and instead makes the page resource a simple Turnstile render page. This allows you to generate tokens without having to load any additional css or js files.
94 |
95 | However, in this method, the siteKey variable must be sent to Turnstile along with the site to create the token. If this does not work, you can examine the token generation system by loading the full page resource described in the next section.
96 |
97 | ```js
98 | fetch('http://localhost:3000/cf-clearance-scraper', {
99 | method: 'POST',
100 | headers: {
101 | 'Content-Type': 'application/json'
102 | },
103 | body: JSON.stringify({
104 | url: 'https://turnstile.zeroclover.io/',
105 | siteKey: "0x4AAAAAAAEwzhD6pyKkgXC0",
106 | mode: "turnstile-min",
107 | // proxy:{
108 | // host: '127.0.0.1',
109 | // port: 3000,
110 | // username: 'username',
111 | // password: 'password'
112 | // }
113 | })
114 | })
115 | .then(res => res.json())
116 | .then(console.log)
117 | .catch(console.log);
118 | ```
119 |
120 | ## Creating Turnstile Token with Full Page Load
121 |
122 | This example request goes to the page at the given url address with a real browser, resolves the Turnstile and returns you the token.
123 |
124 | ```js
125 | fetch('http://localhost:3000/cf-clearance-scraper', {
126 | method: 'POST',
127 | headers: {
128 | 'Content-Type': 'application/json'
129 | },
130 | body: JSON.stringify({
131 | url: 'https://turnstile.zeroclover.io/',
132 | mode: "turnstile-max",
133 | // proxy:{
134 | // host: '127.0.0.1',
135 | // port: 3000,
136 | // username: 'username',
137 | // password: 'password'
138 | // }
139 | })
140 | })
141 | .then(res => res.json())
142 | .then(console.log)
143 | .catch(console.log);
144 | ```
145 |
146 | ## Getting Page Source from a Site Protected with Cloudflare WAF
147 |
148 | With this request you can scrape the page source of a website protected with CF WAF.
149 |
150 | ```js
151 | fetch('http://localhost:3000/cf-clearance-scraper', {
152 | method: 'POST',
153 | headers: {
154 | 'Content-Type': 'application/json'
155 | },
156 | body: JSON.stringify({
157 | url: 'https://nopecha.com/demo/cloudflare',
158 | mode: "source"
159 | // proxy:{
160 | // host: '127.0.0.1',
161 | // port: 3000,
162 | // username: 'username',
163 | // password: 'password'
164 | // }
165 | })
166 | })
167 | .then(res => res.json())
168 | .then(console.log)
169 | .catch(console.log);
170 | ```
171 |
172 | ## Quick Questions and Answers
173 |
174 | ### Does It Open A New Browser On Every Request?
175 | No, a new context is started with each request and closed when the job is finished. Processes are executed with isolated contexts through a single browser.
176 |
177 | ### How Do I Limit the Browser Context to Open?
178 | You can do this by changing the process.env.browserLimit value. The default is 20
179 |
180 | ### How Do I Add Authentication to Api?
181 | You can add authorisation by changing the process.env.authToken variable. If this variable is added, it returns 401 if the authToken variable in the request body is not equal to the token you specify.
182 |
183 | ### How Do I Set The Timeout Time?
184 | You can give the variable process.env.timeOut a value in milliseconds. The default is 60000.
185 |
186 | ## Disclaimer of Liability
187 | This repository was created purely for testing and training purposes. The user is responsible for any prohibited liability that may arise from its use.
188 | The library is not intended to harm any site or company. The user is responsible for any damage that may arise.
189 | Users of this repository are deemed to have accepted this disclaimer.
190 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "cf-clearance-scraper",
3 | "version": "2.1.3",
4 | "main": "index.js",
5 | "scripts": {
6 | "start": "node src/index.js",
7 | "test": "node --experimental-vm-modules ./node_modules/.bin/jest --detectOpenHandles --verbose"
8 | },
9 | "jest": {
10 | "testMatch": [
11 | "**/tests/**/*.js"
12 | ],
13 | "verbose": true
14 | },
15 | "keywords": [
16 | "cf-clearance",
17 | "cloudflare",
18 | "waf",
19 | "scraper",
20 | "puppeteer",
21 | "xvfb",
22 | "turnstile",
23 | "bypass",
24 | "undetected",
25 | "stealth"
26 | ],
27 | "author": "zfcsoftware",
28 | "license": "ISC",
29 | "description": "This package is an experimental and educational package created for Cloudflare protections.",
30 | "dependencies": {
31 | "ajv": "^8.17.1",
32 | "ajv-formats": "^3.0.1",
33 | "body-parser": "^1.20.3",
34 | "cors": "^2.8.5",
35 | "dotenv": "^16.4.5",
36 | "express": "^4.21.0",
37 | "jest": "^29.7.0",
38 | "puppeteer-real-browser": "^1.4.0",
39 | "supertest": "^7.0.0"
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/data/capsolver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZFC-Digital/cf-clearance-scraper/c94563410342eacae92a1f2921f6000d84539ad7/src/data/capsolver.png
--------------------------------------------------------------------------------
/src/data/fakePage.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/src/data/sdo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZFC-Digital/cf-clearance-scraper/c94563410342eacae92a1f2921f6000d84539ad7/src/data/sdo.gif
--------------------------------------------------------------------------------
/src/endpoints/getSource.js:
--------------------------------------------------------------------------------
1 | function getSource({ url, proxy }) {
2 | return new Promise(async (resolve, reject) => {
3 | if (!url) return reject("Missing url parameter");
4 | const context = await global.browser
5 | .createBrowserContext({
6 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions
7 | })
8 | .catch(() => null);
9 | if (!context) return reject("Failed to create browser context");
10 |
11 | let isResolved = false;
12 |
13 | var cl = setTimeout(async () => {
14 | if (!isResolved) {
15 | await context.close();
16 | reject("Timeout Error");
17 | }
18 | }, global.timeOut || 60000);
19 |
20 | try {
21 | const page = await context.newPage();
22 |
23 | if (proxy?.username && proxy?.password)
24 | await page.authenticate({
25 | username: proxy.username,
26 | password: proxy.password,
27 | });
28 |
29 | await page.setRequestInterception(true);
30 | page.on("request", async (request) => request.continue());
31 | page.on("response", async (res) => {
32 | try {
33 | if (
34 | [200, 302].includes(res.status()) &&
35 | [url, url + "/"].includes(res.url())
36 | ) {
37 | await page
38 | .waitForNavigation({ waitUntil: "load", timeout: 5000 })
39 | .catch(() => {});
40 | const html = await page.content();
41 | await context.close();
42 | isResolved = true;
43 | clearInterval(cl);
44 | resolve(html);
45 | }
46 | } catch (e) {}
47 | });
48 | await page.goto(url, {
49 | waitUntil: "domcontentloaded",
50 | });
51 | } catch (e) {
52 | if (!isResolved) {
53 | await context.close();
54 | clearInterval(cl);
55 | reject(e.message);
56 | }
57 | }
58 | });
59 | }
60 | module.exports = getSource;
61 |
--------------------------------------------------------------------------------
/src/endpoints/solveTurnstile.max.js:
--------------------------------------------------------------------------------
1 | const fs = require("fs");
2 | function solveTurnstileMin({ url, proxy }) {
3 | return new Promise(async (resolve, reject) => {
4 | if (!url) return reject("Missing url parameter");
5 |
6 | const context = await global.browser
7 | .createBrowserContext({
8 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions
9 | })
10 | .catch(() => null);
11 |
12 | if (!context) return reject("Failed to create browser context");
13 |
14 | let isResolved = false;
15 |
16 | var cl = setTimeout(async () => {
17 | if (!isResolved) {
18 | await context.close();
19 | reject("Timeout Error");
20 | }
21 | }, global.timeOut || 60000);
22 |
23 | try {
24 | const page = await context.newPage();
25 |
26 | if (proxy?.username && proxy?.password)
27 | await page.authenticate({
28 | username: proxy.username,
29 | password: proxy.password,
30 | });
31 |
32 | await page.evaluateOnNewDocument(() => {
33 | let token = null;
34 | async function waitForToken() {
35 | while (!token) {
36 | try {
37 | token = window.turnstile.getResponse();
38 | } catch (e) {}
39 | await new Promise((resolve) => setTimeout(resolve, 500));
40 | }
41 | var c = document.createElement("input");
42 | c.type = "hidden";
43 | c.name = "cf-response";
44 | c.value = token;
45 | document.body.appendChild(c);
46 | }
47 | waitForToken();
48 | });
49 |
50 | await page.goto(url, {
51 | waitUntil: "domcontentloaded",
52 | });
53 |
54 | await page.waitForSelector('[name="cf-response"]', {
55 | timeout: 60000,
56 | });
57 | const token = await page.evaluate(() => {
58 | try {
59 | return document.querySelector('[name="cf-response"]').value;
60 | } catch (e) {
61 | return null;
62 | }
63 | });
64 | isResolved = true;
65 | clearInterval(cl);
66 | await context.close();
67 | if (!token || token.length < 10) return reject("Failed to get token");
68 | return resolve(token);
69 | } catch (e) {
70 | console.log(e);
71 |
72 | if (!isResolved) {
73 | await context.close();
74 | clearInterval(cl);
75 | reject(e.message);
76 | }
77 | }
78 | });
79 | }
80 | module.exports = solveTurnstileMin;
81 |
--------------------------------------------------------------------------------
/src/endpoints/solveTurnstile.min.js:
--------------------------------------------------------------------------------
1 | function solveTurnstileMin({ url, proxy, siteKey }) {
2 | return new Promise(async (resolve, reject) => {
3 | if (!url) return reject("Missing url parameter");
4 | if (!siteKey) return reject("Missing siteKey parameter");
5 |
6 | const context = await global.browser
7 | .createBrowserContext({
8 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions
9 | })
10 | .catch(() => null);
11 | if (!context) return reject("Failed to create browser context");
12 |
13 | let isResolved = false;
14 |
15 | var cl = setTimeout(async () => {
16 | if (!isResolved) {
17 | await context.close();
18 | reject("Timeout Error");
19 | }
20 | }, global.timeOut || 60000);
21 |
22 | try {
23 | const page = await context.newPage();
24 |
25 | if (proxy?.username && proxy?.password)
26 | await page.authenticate({
27 | username: proxy.username,
28 | password: proxy.password,
29 | });
30 |
31 | await page.setRequestInterception(true);
32 |
33 | page.on("request", async (request) => {
34 | if (
35 | [url, url + "/"].includes(request.url()) &&
36 | request.resourceType() === "document"
37 | ) {
38 | const response = await request.respond({
39 | status: 200,
40 | contentType: "text/html",
41 | body: String(
42 | require("fs").readFileSync("./src/data/fakePage.html")
43 | ).replace(//g, siteKey),
44 | });
45 | } else {
46 | await request.continue();
47 | }
48 | });
49 |
50 | await page.goto(url, {
51 | waitUntil: "domcontentloaded",
52 | });
53 |
54 | await page.waitForSelector('[name="cf-response"]', {
55 | timeout: 60000,
56 | });
57 |
58 | const token = await page.evaluate(() => {
59 | try {
60 | return document.querySelector('[name="cf-response"]').value;
61 | } catch (e) {
62 | return null;
63 | }
64 | });
65 |
66 | isResolved = true;
67 | clearInterval(cl);
68 | await context.close();
69 | if (!token || token.length < 10) return reject("Failed to get token");
70 | return resolve(token);
71 | } catch (e) {
72 | if (!isResolved) {
73 | await context.close();
74 | clearInterval(cl);
75 | reject(e.message);
76 | }
77 | }
78 | });
79 | }
80 | module.exports = solveTurnstileMin;
81 |
--------------------------------------------------------------------------------
/src/endpoints/wafSession.js:
--------------------------------------------------------------------------------
1 | async function findAcceptLanguage(page) {
2 | return await page.evaluate(async () => {
3 | const result = await fetch("https://httpbin.org/get")
4 | .then((res) => res.json())
5 | .then(
6 | (res) =>
7 | res.headers["Accept-Language"] || res.headers["accept-language"]
8 | )
9 | .catch(() => null);
10 | return result;
11 | });
12 | }
13 |
14 | function getSource({ url, proxy }) {
15 | return new Promise(async (resolve, reject) => {
16 | if (!url) return reject("Missing url parameter");
17 | const context = await global.browser
18 | .createBrowserContext({
19 | proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions
20 | })
21 | .catch(() => null);
22 | if (!context) return reject("Failed to create browser context");
23 |
24 | let isResolved = false;
25 |
26 | var cl = setTimeout(async () => {
27 | if (!isResolved) {
28 | await context.close();
29 | reject("Timeout Error");
30 | }
31 | }, global.timeOut || 60000);
32 |
33 | try {
34 | const page = await context.newPage();
35 |
36 | if (proxy?.username && proxy?.password)
37 | await page.authenticate({
38 | username: proxy.username,
39 | password: proxy.password,
40 | });
41 | let acceptLanguage = await findAcceptLanguage(page);
42 | await page.setRequestInterception(true);
43 | page.on("request", async (request) => request.continue());
44 | page.on("response", async (res) => {
45 | try {
46 | if (
47 | [200, 302].includes(res.status()) &&
48 | [url, url + "/"].includes(res.url())
49 | ) {
50 | await page
51 | .waitForNavigation({ waitUntil: "load", timeout: 5000 })
52 | .catch(() => {});
53 | const cookies = await page.cookies();
54 | let headers = await res.request().headers();
55 | delete headers["content-type"];
56 | delete headers["accept-encoding"];
57 | delete headers["accept"];
58 | delete headers["content-length"];
59 | headers["accept-language"] = acceptLanguage;
60 | await context.close();
61 | isResolved = true;
62 | clearInterval(cl);
63 | resolve({ cookies, headers });
64 | }
65 | } catch (e) {}
66 | });
67 |
68 | await page.goto(url, {
69 | waitUntil: "domcontentloaded",
70 | });
71 | } catch (e) {
72 | if (!isResolved) {
73 | await context.close();
74 | clearInterval(cl);
75 | reject(e.message);
76 | }
77 | }
78 | });
79 | }
80 | module.exports = getSource;
81 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | const express = require('express')
2 | const app = express()
3 | const port = process.env.PORT || 3000
4 | const bodyParser = require('body-parser')
5 | const authToken = process.env.authToken || null
6 | const cors = require('cors')
7 | const reqValidate = require('./module/reqValidate')
8 |
9 | global.browserLength = 0
10 | global.browserLimit = Number(process.env.browserLimit) || 20
11 | global.timeOut = Number(process.env.timeOut || 60000)
12 |
13 | app.use(bodyParser.json({}))
14 | app.use(bodyParser.urlencoded({ extended: true }))
15 | app.use(cors())
16 | if (process.env.NODE_ENV !== 'development') {
17 | let server = app.listen(port, () => { console.log(`Server running on port ${port}`) })
18 | try {
19 | server.timeout = global.timeOut
20 | } catch (e) { }
21 | }
22 | if (process.env.SKIP_LAUNCH != 'true') require('./module/createBrowser')
23 |
24 | const getSource = require('./endpoints/getSource')
25 | const solveTurnstileMin = require('./endpoints/solveTurnstile.min')
26 | const solveTurnstileMax = require('./endpoints/solveTurnstile.max')
27 | const wafSession = require('./endpoints/wafSession')
28 |
29 |
30 | app.post('/cf-clearance-scraper', async (req, res) => {
31 |
32 | const data = req.body
33 |
34 | const check = reqValidate(data)
35 |
36 | if (check !== true) return res.status(400).json({ code: 400, message: 'Bad Request', schema: check })
37 |
38 | if (authToken && data.authToken !== authToken) return res.status(401).json({ code: 401, message: 'Unauthorized' })
39 |
40 | if (global.browserLength >= global.browserLimit) return res.status(429).json({ code: 429, message: 'Too Many Requests' })
41 |
42 | if (process.env.SKIP_LAUNCH != 'true' && !global.browser) return res.status(500).json({ code: 500, message: 'The scanner is not ready yet. Please try again a little later.' })
43 |
44 | var result = { code: 500 }
45 |
46 | global.browserLength++
47 |
48 | switch (data.mode) {
49 | case "source":
50 | result = await getSource(data).then(res => { return { source: res, code: 200 } }).catch(err => { return { code: 500, message: err.message } })
51 | break;
52 | case "turnstile-min":
53 | result = await solveTurnstileMin(data).then(res => { return { token: res, code: 200 } }).catch(err => { return { code: 500, message: err.message } })
54 | break;
55 | case "turnstile-max":
56 | result = await solveTurnstileMax(data).then(res => { return { token: res, code: 200 } }).catch(err => { return { code: 500, message: err.message } })
57 | break;
58 | case "waf-session":
59 | result = await wafSession(data).then(res => { return { ...res, code: 200 } }).catch(err => { return { code: 500, message: err.message } })
60 | break;
61 | }
62 |
63 | global.browserLength--
64 |
65 | res.status(result.code ?? 500).send(result)
66 | })
67 |
68 | app.use((req, res) => { res.status(404).json({ code: 404, message: 'Not Found' }) })
69 |
70 | if (process.env.NODE_ENV == 'development') module.exports = app
71 |
--------------------------------------------------------------------------------
/src/module/createBrowser.js:
--------------------------------------------------------------------------------
1 | const { connect } = require("puppeteer-real-browser")
2 | async function createBrowser() {
3 | try {
4 | if (global.finished == true) return
5 |
6 | global.browser = null
7 |
8 | // console.log('Launching the browser...');
9 |
10 | const { browser } = await connect({
11 | headless: false,
12 | turnstile: true,
13 | connectOption: { defaultViewport: null },
14 | disableXvfb: false,
15 | })
16 |
17 | // console.log('Browser launched');
18 |
19 | global.browser = browser;
20 |
21 | browser.on('disconnected', async () => {
22 | if (global.finished == true) return
23 | console.log('Browser disconnected');
24 | await new Promise(resolve => setTimeout(resolve, 3000));
25 | await createBrowser();
26 | })
27 |
28 | } catch (e) {
29 | console.log(e.message);
30 | if (global.finished == true) return
31 | await new Promise(resolve => setTimeout(resolve, 3000));
32 | await createBrowser();
33 | }
34 | }
35 | createBrowser()
--------------------------------------------------------------------------------
/src/module/reqValidate.js:
--------------------------------------------------------------------------------
1 | const Ajv = require("ajv")
2 | const addFormats = require("ajv-formats")
3 |
4 | const ajv = new Ajv()
5 | addFormats(ajv)
6 |
7 | const schema = {
8 | "type": "object",
9 | "properties": {
10 | "mode": {
11 | "type": "string",
12 | "enum": ["source", "turnstile-min", "turnstile-max", "waf-session"],
13 | },
14 | "proxy": {
15 | "type": "object",
16 | "properties": {
17 | "host": { "type": "string" },
18 | "port": { "type": "integer" },
19 | "username": { "type": "string" },
20 | "password": { "type": "string" }
21 | },
22 | "additionalProperties": false
23 | },
24 | "url": {
25 | "type": "string",
26 | "format": "uri",
27 | },
28 | "authToken": {
29 | "type": "string"
30 | },
31 | "siteKey": {
32 | "type": "string"
33 | }
34 | },
35 | "required": ["mode", "url"],
36 | "additionalProperties": false
37 | }
38 |
39 | // const data = {
40 | // mode: "source",
41 | // url: "https://example.com",
42 | // proxy: {
43 | // host: "localhost",
44 | // port: 8080,
45 | // username: "test",
46 | // password: "test"
47 | // },
48 | // authToken: "123456"
49 | // }
50 |
51 |
52 | function validate(data) {
53 | const valid = ajv.validate(schema, data)
54 | if (!valid) return ajv.errors
55 | else return true
56 | }
57 |
58 | module.exports = validate
--------------------------------------------------------------------------------
/tests/endpoints.test.js:
--------------------------------------------------------------------------------
1 | process.env.NODE_ENV = 'development'
2 | const server = require('../src/index')
3 | const request = require("supertest")
4 |
5 | beforeAll(async () => {
6 | while (!global.browser) {
7 | await new Promise(resolve => setTimeout(resolve, 1000));
8 | }
9 | }, 30000);
10 |
11 |
12 | afterAll(async () => {
13 | global.finished = true
14 | await global.browser.close()
15 | })
16 |
17 |
18 | test('Scraping Page Source from Cloudflare Protection', async () => {
19 | return request(server)
20 | .post("/cf-clearance-scraper")
21 | .send({
22 | url: 'https://nopecha.com/demo/cloudflare',
23 | mode: "source"
24 | })
25 | .expect(200)
26 | .then(response => { expect(response.body.code).toEqual(200); })
27 | }, 60000)
28 |
29 |
30 | test('Creating a Turnstile Token With Site Key [min]', async () => {
31 | return request(server)
32 | .post("/cf-clearance-scraper")
33 | .send({
34 | url: 'https://turnstile.zeroclover.io/',
35 | siteKey: "0x4AAAAAAAEwzhD6pyKkgXC0",
36 | mode: "turnstile-min"
37 | })
38 | .expect(200)
39 | .then(response => { expect(response.body.code).toEqual(200); })
40 | }, 60000)
41 |
42 | test('Creating a Turnstile Token With Site Key [max]', async () => {
43 | return request(server)
44 | .post("/cf-clearance-scraper")
45 | .send({
46 | url: 'https://turnstile.zeroclover.io/',
47 | mode: "turnstile-max"
48 | })
49 | .expect(200)
50 | .then(response => { expect(response.body.code).toEqual(200); })
51 | }, 60000)
52 |
53 | test('Create Cloudflare WAF Session', async () => {
54 | return request(server)
55 | .post("/cf-clearance-scraper")
56 | .send({
57 | url: 'https://nopecha.com/demo/cloudflare',
58 | mode: "waf-session"
59 | })
60 | .expect(200)
61 | .then(response => { expect(response.body.code).toEqual(200); })
62 | }, 60000)
--------------------------------------------------------------------------------
/tests/validate.test.js:
--------------------------------------------------------------------------------
1 | process.env.NODE_ENV = 'development'
2 | process.env.SKIP_LAUNCH = "true"
3 | process.env.authToken = "123456"
4 | process.env.browserLimit = -1
5 |
6 | const server = require('../src/index')
7 | const request = require("supertest")
8 |
9 | test('Request Authorisation Control Test', async () => {
10 | return request(server)
11 | .post("/cf-clearance-scraper")
12 | .send({
13 | url: 'https://nopecha.com/demo/cloudflare',
14 | mode: "source"
15 | })
16 | .expect(401)
17 | }, 10000)
18 |
19 | test('Browser Context Limit Control Test', async () => {
20 | return request(server)
21 | .post("/cf-clearance-scraper")
22 | .send({
23 | url: 'https://nopecha.com/demo/cloudflare',
24 | mode: "source",
25 | authToken: "123456"
26 | })
27 | .expect(429)
28 | }, 10000)
--------------------------------------------------------------------------------