├── .prettierignore ├── .prettierrc.json ├── .babelrc ├── src ├── types.d.ts ├── test.ts └── botcheck.ts ├── .eslintrc.json ├── .gitignore ├── tsconfig.json ├── package.json └── readme.md /.prettierignore: -------------------------------------------------------------------------------- 1 | # Ignore artifacts: 2 | build 3 | logs 4 | storage 5 | vendor -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "plugins": ["prettier-plugin-sorted"] 4 | } 5 | -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | [ 4 | "@babel/preset-env", 5 | { 6 | "modules": true 7 | } 8 | ] 9 | ], 10 | "plugins": ["@babel/plugin-proposal-class-properties"] 11 | } 12 | -------------------------------------------------------------------------------- /src/types.d.ts: -------------------------------------------------------------------------------- 1 | import { Page } from "puppeteer" 2 | 3 | export interface BotCheck { 4 | page: Page 5 | 6 | isolatedWorld(): Promise 7 | 8 | behaviorMonitor(): Promise 9 | 10 | f5network(): Promise 11 | 12 | pixelscan(): Promise 13 | 14 | sannysoft(): Promise 15 | 16 | recaptcha(): Promise 17 | 18 | fingerprintJs(): Promise 19 | 20 | datadome(): Promise 21 | 22 | whiteOps(): Promise 23 | } 24 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "extends": [ 4 | "plugin:@typescript-eslint/recommended", 5 | "plugin:prettier/recommended", 6 | "prettier/@typescript-eslint" 7 | ], 8 | "plugins": [ 9 | "@typescript-eslint", 10 | "prettier" 11 | ], 12 | "rules": { 13 | "semi": [ 14 | "error", 15 | "never" 16 | ], 17 | "no-debugger": "off", 18 | "no-console": 0, 19 | "@typescript-eslint/member-ordering": "error" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # JS 2 | yarn.lock 3 | .yarn-integrity 4 | .npm 5 | .node_repl_history 6 | 7 | # Vendors 8 | node_modules/ 9 | bower_components/ 10 | 11 | # Build 12 | dist/ 13 | *.tgz 14 | .grunt 15 | 16 | # Environment 17 | .env 18 | 19 | # TypeScript 20 | src/**/*.js 21 | run.js 22 | typings/ 23 | *.tsbuildinfo 24 | 25 | # Temp Files 26 | cache/ 27 | *.zip 28 | 29 | # Runtime data 30 | pids 31 | *.pid 32 | *.seed 33 | *.pid.lock 34 | 35 | # Editors 36 | .idea/ 37 | .vscode/ 38 | .eslintcache 39 | 40 | # Logs 41 | *.log 42 | npm-debug.log* 43 | yarn-debug.log* 44 | yarn-error.log* 45 | *.dump 46 | 47 | # OS metadata 48 | .DS_Store 49 | Thumbs.db -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es6", 4 | "module": "commonjs", 5 | "moduleResolution": "node", 6 | "esModuleInterop": true, 7 | "noImplicitAny": true, 8 | "noImplicitReturns": true, 9 | "allowJs": false, 10 | "noFallthroughCasesInSwitch": true, 11 | "sourceMap": true, 12 | "allowSyntheticDefaultImports": true, 13 | "outDir": "dist", 14 | "baseUrl": "src", 15 | "resolveJsonModule": true, 16 | "strict": true 17 | }, 18 | "include": [ 19 | "src/**/*" 20 | ], 21 | "exclude": [ 22 | "node_modules", 23 | "node_modules/**/*", 24 | "dist", 25 | "src/**/*.spec.tsx", 26 | "src/**/*.spec.ts" 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "puppeteer-botcheck", 3 | "version": "2.0.0", 4 | "description": "Bot detection tests for Puppeteer", 5 | "keywords": [ 6 | "puppeteer", 7 | "puppeteer-extra", 8 | "scraping" 9 | ], 10 | "author": "prescience-data", 11 | "license": "MIT", 12 | "scripts": { 13 | "test": "npm run build && node dist/test.js", 14 | "build": "npx tsc", 15 | "update:packages": "npx npm-check-updates -u --packageFile package.json" 16 | }, 17 | "dependencies": { 18 | "chalk": "^2.4.2", 19 | "puppeteer": "^5.4.1", 20 | "puppeteer-extra": "^3.1.15", 21 | "puppeteer-extra-plugin-stealth": "^2.6.5", 22 | "yargs": "^16.1.0" 23 | }, 24 | "devDependencies": { 25 | "@babel/core": "^7.12.3", 26 | "@babel/plugin-proposal-class-properties": "^7.12.1", 27 | "@babel/preset-env": "^7.12.1", 28 | "@types/puppeteer": "^5.4.0", 29 | "@types/yargs": "^15.0.9", 30 | "@typescript-eslint/eslint-plugin": "^4.6.1", 31 | "@typescript-eslint/parser": "^4.6.1", 32 | "eslint": "7.12.1", 33 | "eslint-config-prettier": "^6.15.0", 34 | "eslint-plugin-prettier": "3.1.4", 35 | "prettier": "^2.1.2", 36 | "prettier-plugin-sorted": "^2.0.0", 37 | "typescript": "^4.0.5" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 🕵‍♂ A collection of bot detection tests for Puppeteer Extra 2 | 3 | 4 | > ### Note: A cleaner version of these tests will be getting migrated to Foundation: https://github.com/prescience-data/foundation 5 | > I still intend to keep these updated but will likely get less attention than the `Foundation` package. 6 | 7 | This class accepts a `Puppeteer` page instance and allows the user to run a series of tests against known bot detection tools and products. 8 | 9 | The scores or outcome will be shown in the console. 10 | 11 | DanceContinues 12 | 13 | ### 🧰 Available Tests 14 | 15 | - SannySoft 16 | - `npm test -- --tag=sannysoft` 17 | - https://bot.sannysoft.com/ 18 | - Recaptcha Score 19 | - `npm test -- --tag=recaptcha` 20 | - https://antcpt.com/eng/information/demo-form/recaptcha-3-test-score.html 21 | - AreYouHeadless 22 | - `npm test -- --tag=areyouheadless` 23 | - https://arh.antoinevastel.com/bots/areyouheadless/ 24 | - FingerprintJS2 25 | - `npm test -- --tag=fingerprintjs` 26 | - https://fingerprintjs.com/demo/ 27 | - Datadome 28 | - `npm test -- --tag=datadome` 29 | - https://datadome.co/ 30 | - Execution Monitor 31 | - `npm test -- --tag=execution` 32 | - https://prescience-data.github.io/execution-monitor.html 33 | - Behavior Monitor 34 | - `npm test -- --tag=behaviour` 35 | - https://prescience-data.github.io/behavior-monitor.html 36 | - F5 Network 37 | - `npm test -- --tag=f5` 38 | - https://ib.bri.co.id/ib-bri/ 39 | - WhiteOps 40 | - `npm test -- --tag=whiteops` 41 | - https://www.whiteops.com/ _(Need better test case if available)_ 42 | - PixelScan 43 | - `npm test -- --tag=pixelscan` 44 | - https://pixelscan.net/ 45 | 46 | ### 🛠 Usage 47 | 48 | 1. Run `npm install` to pull down `puppeteer`, `puppeteer-extra`, `typescript`, and other dependencies. 49 | 2. Edit `test.ts` to adjust your configuration if required. 50 | 3. Run `node test -- --tag={testName}` 51 | -------------------------------------------------------------------------------- /src/test.ts: -------------------------------------------------------------------------------- 1 | import chalk from "chalk" 2 | import { LaunchOptions } from "puppeteer" 3 | import Puppeteer from "puppeteer-extra" 4 | import Stealth from "puppeteer-extra-plugin-stealth" 5 | import { argv } from "yargs" 6 | 7 | import BotCheck from "./botcheck" 8 | 9 | /** 10 | * Set your browser path. 11 | * Must change if wanting to test on Linux or a browser other than Chrome. 12 | * @type {string} 13 | */ 14 | const BROWSER_PATH: string = 15 | "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe" 16 | 17 | /** 18 | * Your Puppeteer launch options. 19 | * @type {LaunchOptions} 20 | */ 21 | const options: LaunchOptions = { 22 | headless: false, 23 | ignoreHTTPSErrors: true, 24 | executablePath: BROWSER_PATH, 25 | args: [ 26 | "--no-sandbox", 27 | "--disable-setuid-sandbox", 28 | "--disable-sync", 29 | "--ignore-certificate-errors", 30 | "--user-agent=" + 31 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36", 32 | "--lang=en-US,en;q=0.9", 33 | ], 34 | defaultViewport: { width: 1366, height: 768 }, 35 | } 36 | ;(async () => { 37 | const tag: string = String(argv.tag) 38 | if (tag) { 39 | try { 40 | // Boot Puppeteer and BotCheck. 41 | Puppeteer.use(Stealth()) 42 | const browser = await Puppeteer.launch(options) 43 | const page = await browser.newPage() 44 | const botcheck = new BotCheck(page) 45 | // Only continue if the test exists. 46 | if (botcheck) { 47 | console.log(chalk.blue(`⏱ Attempting test: ${tag}...`)) 48 | try { 49 | // Run test and display result. 50 | const result: string = await botcheck.run(tag) 51 | console.log(result) 52 | } catch (err) { 53 | console.warn(chalk.red(err.message)) 54 | } 55 | } else { 56 | console.log(chalk.yellow(`⚠ Test not found.`)) 57 | } 58 | // Clean up. 59 | await browser.close() 60 | } catch (err) { 61 | console.log(chalk.red(err.message)) 62 | } 63 | } else { 64 | console.log(chalk.yellow(`⚠ No test defined.`)) 65 | } 66 | process.exit() 67 | })() 68 | -------------------------------------------------------------------------------- /src/botcheck.ts: -------------------------------------------------------------------------------- 1 | import chalk from "chalk" 2 | import { Page } from "puppeteer" 3 | 4 | import * as Types from "./types" 5 | 6 | /** 7 | * Enum list of available tests. 8 | * @type {enum} 9 | */ 10 | export const enum TAGS { 11 | AREYOURHEADLESS = "areyouheadless", 12 | BEHAVIOUR_MONITOR = "behaviour", 13 | DATADOME = "datadome", 14 | EXECUTION_MONITOR = "execution", 15 | F5NETWORK = "f5", 16 | FINGERPRINTJS = "fingerprintjs", 17 | PIXELSCAN = "pixelscan", 18 | RECAPTCHA = "recaptcha", 19 | SANNYSOFT = "sannysoft", 20 | WHITEOPS = "whiteops", 21 | } 22 | 23 | /** 24 | * @class BotCheck 25 | * @classdesc Wraps a Puppeteer page instance to execute tests against anti-bot vendor scripts 26 | */ 27 | export class BotCheck implements Types.BotCheck { 28 | /** 29 | * Provides the active page context. 30 | * @type {Page} 31 | */ 32 | page: Page 33 | 34 | /** 35 | * Constructor 36 | * 37 | * @param {Page} page 38 | */ 39 | constructor(page: Page) { 40 | this.page = page 41 | } 42 | 43 | /** 44 | * Returns the test for the selected tag. 45 | * 46 | * @param {TAGS} tag 47 | * @return {Promise} 48 | */ 49 | public async run(tag: string) { 50 | switch (tag) { 51 | case TAGS.AREYOURHEADLESS: 52 | return this.areYouHeadless() 53 | case TAGS.BEHAVIOUR_MONITOR: 54 | return this.behaviorMonitor() 55 | case TAGS.DATADOME: 56 | return this.datadome() 57 | case TAGS.EXECUTION_MONITOR: 58 | return this.isolatedWorld() 59 | case TAGS.F5NETWORK: 60 | return this.f5network() 61 | case TAGS.FINGERPRINTJS: 62 | return this.fingerprintJs() 63 | case TAGS.PIXELSCAN: 64 | return this.pixelscan() 65 | case TAGS.RECAPTCHA: 66 | return this.recaptcha() 67 | case TAGS.SANNYSOFT: 68 | return this.sannysoft() 69 | case TAGS.WHITEOPS: 70 | return this.whiteOps() 71 | default: 72 | throw new Error(`Invalid test selected.`) 73 | } 74 | } 75 | 76 | /** 77 | * Test your scripts execution output. 78 | * 79 | * @return {Promise} 80 | */ 81 | public async isolatedWorld(): Promise { 82 | await this.page.goto( 83 | "https://prescience-data.github.io/execution-monitor.html", 84 | { waitUntil: "networkidle2" } 85 | ) 86 | // Test abstracted getElementById 87 | await this.page.$("#result") 88 | // Add any other tests you need here... 89 | await this.page.evaluate(() => { 90 | // Test createElement execution 91 | let newDiv = document.createElement("div") 92 | let newContent = document.createTextNode( 93 | "Creating an element on the page." 94 | ) 95 | newDiv.appendChild(newContent) 96 | // Test getElementById execution 97 | let currentDiv = document.getElementById("div1") 98 | document.body.insertBefore(newDiv, currentDiv) 99 | }) 100 | await this.page.waitForTimeout(2000) 101 | const element = await this.page.$("#result") 102 | if (!element) { 103 | throw new Error(`Could not find final element.`) 104 | } 105 | return await this.page.evaluate((element) => element.textContent, element) 106 | } 107 | 108 | /** 109 | * Test your scripts behaviour output. 110 | * 111 | * @return {Promise} 112 | */ 113 | public async behaviorMonitor(): Promise { 114 | await this.page.goto( 115 | "https://prescience-data.github.io/behavior-monitor.html", 116 | { waitUntil: "networkidle2" } 117 | ) 118 | // Hover and click an element 119 | const resultElement = await this.page.$("#result") 120 | if (!resultElement) { 121 | throw new Error(`Could not find result element.`) 122 | } 123 | await resultElement.hover() 124 | await resultElement.click({ delay: 10 }) 125 | await this.page.waitForTimeout(200) 126 | // Type string 127 | const inputElement = await this.page.$("input#test-input") 128 | if (!inputElement) { 129 | throw new Error(`Could not find input element.`) 130 | } 131 | await inputElement.click() 132 | await inputElement.type("Hello world...", { delay: 3 }) 133 | //@ts-ignore 134 | await this.page._client.send("Input.synthesizeScrollGesture", { 135 | x: 0, 136 | y: 0, 137 | xDistance: 0, 138 | yDistance: -100, 139 | }) 140 | await this.page.waitForTimeout(2500) 141 | const element = await this.page.$("#result") 142 | if (!element) { 143 | throw new Error(`Could not find final element.`) 144 | } 145 | return await this.page.evaluate((element) => element.textContent, element) 146 | } 147 | 148 | /** 149 | * Test F5/Shape deployment. 150 | * 151 | * @return {Promise} 152 | */ 153 | public async f5network(): Promise { 154 | await this.page.goto("https://ib.bri.co.id/ib-bri", { 155 | waitUntil: "networkidle2", 156 | timeout: 5000, 157 | }) 158 | await this.page.waitForTimeout(2000) 159 | const element = await this.page.$("form#loginForm") 160 | return this._makeResult(!!element) 161 | } 162 | 163 | /** 164 | * Test PixelScan page. 165 | * 166 | * @return {Promise} 167 | */ 168 | public async pixelscan(): Promise { 169 | await this.page.goto("https://pixelscan.net", { waitUntil: "networkidle2" }) 170 | await this.page.waitForTimeout(1000) 171 | const element = await this.page.$("span.consistency-status-text") 172 | if (!element) { 173 | throw new Error(`Could not find result element.`) 174 | } 175 | return await this.page.evaluate((element) => element.textContent, element) 176 | } 177 | 178 | /** 179 | * Run suite of SannySoft tests. 180 | * 181 | * @return {Promise} 182 | */ 183 | public async sannysoft(): Promise { 184 | await this.page.goto("https://bot.sannysoft.com", { 185 | waitUntil: "networkidle2", 186 | }) 187 | await this.page.waitForTimeout(5000) 188 | const output = await this.page.evaluate(() => { 189 | let results: any = [] 190 | const tables = document.querySelectorAll("table") 191 | let rows: any 192 | let cols: any = [] 193 | for (let i = 0; i < 3; i++) { 194 | if (tables[i]) { 195 | rows = tables[i].querySelectorAll("tr") 196 | 197 | rows.forEach((row: Element) => { 198 | cols = row.querySelectorAll("td") 199 | if (cols[0]) { 200 | results.push({ 201 | name: cols[0] 202 | ? cols[0].textContent.replace(/\s/g, " ").trim() 203 | : null, 204 | result: cols[1] 205 | ? cols[1].textContent 206 | .replace(/\s/g, " ") 207 | .replace(/ +/g, " ") 208 | .replace(/['"]+/g, "") 209 | .trim() 210 | : null, 211 | }) 212 | } 213 | }) 214 | } 215 | } 216 | return results 217 | }) 218 | 219 | let finalOutput: string[] = [] 220 | if (!!output && output.length) { 221 | output.forEach(({ name, result }: any) => { 222 | result = JSON.stringify(result) 223 | if (result.includes("FAIL") || result.includes("WARN")) { 224 | finalOutput.push(chalk.bgRed(`🚫 ${name}:\n ${result}`)) 225 | } else { 226 | finalOutput.push(chalk.green(`✅ ${name}:\n ${result}`)) 227 | } 228 | }) 229 | } else { 230 | finalOutput.push(`⚠ No data found on page.`) 231 | } 232 | return finalOutput.join(`\n\n`) 233 | } 234 | 235 | /** 236 | * Test your recaptcha score. 237 | * 238 | * @return {Promise} 239 | */ 240 | public async recaptcha(): Promise { 241 | await this.page.goto( 242 | "https://antcpt.com/eng/information/demo-form/recaptcha-3-test-score.html", 243 | { waitUntil: "networkidle2" } 244 | ) 245 | await this.page.waitForTimeout(15000) 246 | const element = await this.page.$("#score") 247 | if (!element) { 248 | throw new Error(`Could not find score element.`) 249 | } 250 | let result: string = ( 251 | await this.page.evaluate((element) => element.textContent, element) 252 | ).trim() 253 | 254 | return result.includes("0.3") || 255 | result.includes("0.2") || 256 | result.includes("0.1") 257 | ? chalk.bgRed(result) 258 | : chalk.green(result) 259 | } 260 | 261 | /** 262 | * Test original AreYouHeadless test by Antoine Vastel. 263 | * 264 | * @return {Promise} 265 | */ 266 | public async areYouHeadless(): Promise { 267 | await this.page.goto("https://arh.antoinevastel.com/bots/areyouheadless", { 268 | waitUntil: "networkidle2", 269 | }) 270 | await this.page.waitForTimeout(2000) 271 | await this.page.waitForSelector("#res") 272 | const element = await this.page.$("#res") 273 | if (!element) { 274 | throw new Error(`Could not find result element.`) 275 | } 276 | return await this.page.evaluate((element) => element.textContent, element) 277 | } 278 | 279 | /** 280 | * Test for FingerprintJS demo. 281 | * 282 | * @return {Promise} 283 | */ 284 | public async fingerprintJs(): Promise { 285 | await this.page.goto("https://fingerprintjs.com/demo", { 286 | waitUntil: "networkidle2", 287 | }) 288 | await this.page.waitForTimeout(5000) 289 | await this.page.waitForSelector("table.table-compact") 290 | const element = await this.page.$( 291 | "table.table-compact > tbody > tr:nth-child(4) > td.miriam" 292 | ) 293 | if (!element) { 294 | throw new Error(`Could not find result table.`) 295 | } 296 | const text = await (await element.getProperty("textContent")).jsonValue() 297 | return this._makeResult(text === "NO") 298 | } 299 | 300 | /** 301 | * Interact with the DataDome homepage and test for captcha. 302 | * 303 | * @return {Promise} 304 | */ 305 | public async datadome(): Promise { 306 | await this.page.goto("https://datadome.co", { waitUntil: "networkidle2" }) 307 | await this.page.waitForTimeout(2000) 308 | const button = await this.page.$("#menu-item-18474") 309 | if (!button) { 310 | throw new Error("Could not find the button!") 311 | } 312 | await button.click({ delay: 10 }) 313 | await this.page.waitForNavigation({ waitUntil: "networkidle2" }) 314 | await this.page.waitForTimeout(500) 315 | const captcha = await this.page.$( 316 | `iframe[src^="https://geo.captcha-delivery.com/captcha/"]` 317 | ) 318 | return this._makeResult(!captcha) 319 | } 320 | 321 | /** 322 | * Test for WhiteOps homepage. 323 | * 324 | * @return {Promise} 325 | */ 326 | public async whiteOps(): Promise { 327 | await this.page.goto("https://www.whiteops.com", { 328 | waitUntil: "networkidle2", 329 | }) 330 | await this.page.waitForTimeout(3000) 331 | const button = await this.page.$( 332 | 'a[href="https://www.whiteops.com/company/about"]' 333 | ) 334 | if (!button) { 335 | throw new Error("Could not find the button!") 336 | } 337 | await button.click({ delay: 8 }) 338 | await this.page.waitForNavigation({ waitUntil: "networkidle2" }) 339 | await this.page.waitForTimeout(500) 340 | const test = await this.page.$( 341 | `a[href="https://resources.whiteops.com/data-sheets/white-ops-company-overview"]` 342 | ) 343 | return this._makeResult(!!test) 344 | } 345 | 346 | /** 347 | * Helper function to make chalked results strings. 348 | * 349 | * @param {boolean} result 350 | * @return {string} 351 | * @protected 352 | */ 353 | protected _makeResult(result: boolean): string { 354 | return result ? chalk.green(`✅ Passed`) : chalk.bgRed(`🚫 Failed`) 355 | } 356 | } 357 | 358 | export default BotCheck 359 | --------------------------------------------------------------------------------