├── .github └── workflows │ └── pypi-release.yml ├── README.md ├── assets └── last-screenshot.png ├── pyppeteerstealth └── __init__.py └── pyproject.toml /.github/workflows/pypi-release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍distribution 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | jobs: 5 | build: 6 | name: Build distribution 📦 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: Set up Python 12 | uses: actions/setup-python@v5 13 | with: 14 | python-version: "3.x" 15 | - name: Install pypa/build 16 | run: >- 17 | python3 -m 18 | pip install 19 | build 20 | --user 21 | - name: Build a binary wheel and a source tarball 22 | run: python3 -m build 23 | - name: Store the distribution packages 24 | uses: actions/upload-artifact@v4 25 | with: 26 | name: python-package-distributions 27 | path: dist/ 28 | 29 | 30 | publish-to-pypi: 31 | name: >- 32 | Publish Python 🐍 distribution 📦 to PyPI 33 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 34 | needs: 35 | - build 36 | runs-on: ubuntu-latest 37 | environment: 38 | name: release 39 | url: https://pypi.org/p/pyppeteerstealth 40 | permissions: 41 | id-token: write # IMPORTANT: mandatory for trusted publishing 42 | 43 | steps: 44 | - name: Download all the dists 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: python-package-distributions 48 | path: dist/ 49 | - name: Publish distribution 📦 to PyPI 50 | uses: pypa/gh-action-pypi-publish@release/v1 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyppeteerstealth 2 | 3 | A bunch of hacks from different websites 4 | 5 | ## Note! 6 | - **Does not yet pass https://arh.antoinevastel.com/bots/areyouheadless** please help! 7 | - Probably wont help where sites are using https://developer.mozilla.org/en-US/docs/Web/API/User-Agent_Client_Hints_API (`navigator.userAgentData.brands` etc) 8 | - Designed to work with the puppeteer fetcher at https://changedetection.io 9 | 10 | __**If you know what is missing, please make a PR!!!**__ 11 | 12 | If you compare loading https://arh.antoinevastel.com/bots/ in your application, versus in your browser you might be able 13 | to see what is required to get the fingerprint closer to a "normal" browser (further away from a "headless" browser) 14 | 15 | This is intended to be used with https://github.com/dgtlmoon/pyppeteer-ng and is also part of the 16 | https://changedetection.io project. 17 | 18 | ```python 19 | browser = await pyppeteer_instance.connect(browserWSEndpoint="ws://127.0.0.1:3000", 20 | ignoreHTTPSErrors=True 21 | ) 22 | 23 | self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() 24 | # Should be set with setUserAgent 25 | user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), DEFAULT_USER_AGENT) 26 | await self.page.setUserAgent(user_agent) 27 | 28 | try: 29 | from pyppeteerstealth import inject_evasions_into_page 30 | except ImportError: 31 | logger.debug("pyppeteerstealth module not available, skipping") 32 | pass 33 | else: 34 | await inject_evasions_into_page(self.page) 35 | 36 | response = await self.page.goto("https://example.com", waitUntil="load") 37 | ``` 38 | 39 | Last report from https://bot.sannysoft.com/ 40 | 41 | ![last fingerprint status](assets/last-screenshot.png "Last fingerprint status") 42 | 43 | 44 | ### Other related interesting projects/alternatives 45 | - https://github.com/MeiK2333/pyppeteer_stealth 46 | 47 | -------------------------------------------------------------------------------- /assets/last-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dgtlmoon/pyppeteerstealth/80282aad85be58932fd175693822dcbaea81d0ca/assets/last-screenshot.png -------------------------------------------------------------------------------- /pyppeteerstealth/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from pyppeteer.page import Page 3 | from loguru import logger 4 | import os 5 | 6 | 7 | # Best here is maybe Page.addScriptToevaluateRawOnNewDocument 8 | # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-addScriptToevaluateRawOnNewDocument 9 | # "Evaluates given script in every frame upon creation (before loading frame's scripts)." inside of CDP 10 | # https://arh.antoinevastel.com/bots/areyouheadless 11 | # https://bot.sannysoft.com/ 12 | # https://www.f5.com/company/blog/detecting-phantomjs-based-visitors 13 | async def inject_evasions_into_page(page : Page) -> None: 14 | 15 | 16 | # /* 17 | # TODO: Add windows profile. Sophisticated bot detection vendors check TCP stack features (see p0f) so system level changes may be necessary. 18 | # Will likely need to add: 19 | # - navigator.platform 20 | # - Different vendor/renderer names (DirectX vs Mesa) for WebGL getParameter 21 | # - Different codec support (untested) 22 | # - Different avail/inner/outer widths/heights because Windows has a different size taskbar and other things 23 | # - Widevine plugin 24 | # */ 25 | 26 | # Doesn't really help and results in errors from scraping browser providers 27 | # await page.setExtraHTTPHeaders({"DNT": "1"}); 28 | 29 | # Puppeteer defines languages to be "" for some reason 30 | # https://pptr.dev/api/puppeteer.page.evaluateRawOnNewDocument 31 | await page.evaluateRawOnNewDocument('''Object.defineProperty(navigator, "languages", { 32 | get: function() { 33 | return ["en-GB", "en"] 34 | } 35 | }); 36 | ''') 37 | 38 | await page.evaluateRawOnNewDocument('''Object.defineProperty(navigator, "doNotTrack", { 39 | get: function() { 40 | return 1; 41 | } 42 | }); 43 | ''') 44 | await page.evaluateRawOnNewDocument('''Object.defineProperty(navigator, "deviceMemory", { 45 | get: function() { 46 | return 4; 47 | } 48 | }); 49 | ''') 50 | await page.evaluateRawOnNewDocument('''Object.defineProperty(navigator, "hardwareConcurrency", { 51 | get: function() { 52 | return 4; 53 | } 54 | }); 55 | ''') 56 | await page.evaluateRawOnNewDocument(''' 57 | Object.defineProperty(window, "chrome", { 58 | writable: true, 59 | enumerable: true, 60 | configurable: false, // note! 61 | value: {}, // We'll extend that later 62 | }); 63 | /* Copied from Google Chrome v83 on Linux */ 64 | var currentTime = new Date().getTime(); 65 | var currentTimeDivided = currentTime / 1000; 66 | var randOffset = Math.random() * 3; 67 | 68 | Object.defineProperty(window.chrome, "csi", { 69 | value: function csi() { 70 | /* https://chromium.googlesource.com/chromium/src.git/+/master/chrome/renderer/loadtimes_extension_bindings.cc */ 71 | return { 72 | startE: currentTime, 73 | onloadT: currentTime + 3 * randOffset, 74 | pageT: 30000 * randOffset, 75 | tran: 15, 76 | } 77 | }, 78 | }); 79 | 80 | Object.defineProperty(window.chrome, "loadTimes", { 81 | value: function loadTimes() { 82 | return { 83 | requestTime: currentTimeDivided + 1 * randOffset, 84 | startLoadTime: currentTimeDivided + 1 * randOffset, 85 | commitLoadTme: currentTimeDivided + 2 * randOffset, 86 | finishDocumentLoadTime: currentTimeDivided + 3 * randOffset, 87 | firstPaintTime: currentTimeDivided + 4 * randOffset, 88 | finishLoadTime: currentTimeDivided + 5 * randOffset, 89 | firstPaintAfterLoadTime: 0, 90 | navigationType: "Other", 91 | wasFetchedViaSpdy: true, 92 | wasNpnNegotiated: true, 93 | npnNegotiatedProtocol: "h2", 94 | wasAlternateProtocolAvailable: false, 95 | connectionInfo: "h2", 96 | } 97 | }, 98 | }); 99 | 100 | const stripErrorWithAnchor = (err, anchor) => { 101 | const stackArr = err.stack.split(`\n`); 102 | const anchorIndex = stackArr.findIndex((line) => 103 | line.trim().startsWith(anchor), 104 | ); 105 | if (anchorIndex === -1) { 106 | return err; // 404, anchor not found 107 | } 108 | // Strip everything from the top until we reach the anchor line (remove anchor line as well) 109 | // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) 110 | stackArr.splice(1, anchorIndex); 111 | err.stack = stackArr.join(`\n`); 112 | return err; 113 | } 114 | 115 | const makeError = { 116 | ErrorInInvocation: (fn) => { 117 | const err = new TypeError(`Error in invocation of app.${fn}()`); 118 | return stripErrorWithAnchor( 119 | err, 120 | `at ${fn} (eval at `, 121 | ); 122 | }, 123 | } 124 | 125 | // https://github.com/berstend/puppeteer-extra/blob/9c3d4aace43cb44da984f1e2f581ad376ebefeea/packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.js 126 | Object.defineProperty(window.chrome, "app", { 127 | value: { 128 | InstallState: { 129 | DISABLED: "disabled", 130 | INSTALLED: "installed", 131 | NOT_INSTALLED: "not_installed", 132 | }, 133 | RunningState: { 134 | CANNOT_RUN: "cannot_run", 135 | READY_TO_RUN: "ready_to_run", 136 | RUNNING: "running", 137 | }, 138 | get isInstalled() { 139 | false; 140 | }, 141 | getDetails: function getDetails() { 142 | if (arguments.length) { 143 | throw makeError.ErrorInInvocation(`getDetails`); 144 | } 145 | return null; 146 | }, 147 | getIsInstalled: function getIsInstalled() { 148 | if (arguments.length) { 149 | throw makeError.ErrorInInvocation(`getIsInstalled`); 150 | } 151 | return false; 152 | }, 153 | runningState: function runningState() { 154 | if (arguments.length) { 155 | throw makeError.ErrorInInvocation(`runningState`); 156 | } 157 | return "cannot_run"; 158 | }, 159 | }, 160 | }); 161 | ''') 162 | 163 | 164 | # pass the Webdriver test 165 | # https://arh.antoinevastel.com/bots/ according to this .webDriver should be = true on my desktop 166 | # maybe not? https://github.com/microsoft/playwright-python/issues/527 167 | # https://stackoverflow.com/questions/53039551/selenium-webdriver-modifying-navigator-webdriver-flag-to-prevent-selenium-detec/69533548#69533548 168 | await page.evaluateRawOnNewDocument(''' 169 | delete navigator.webdriver; 170 | delete Navigator.prototype.webdriver; 171 | ''') 172 | 173 | await page.evaluateRawOnNewDocument(''' 174 | 175 | Object.defineProperty(navigator, "webdrivervalue", { 176 | get: function() { 177 | return false 178 | } 179 | }); 180 | ''') 181 | 182 | # pass the permissions test by denying all permissions 183 | await page.evaluateRawOnNewDocument(''' 184 | const originalQuery = window.navigator.permissions.query; 185 | Permissions.prototype.query = function query(parameters) { 186 | console.log("GOT PERMISSIONS QUERY!!!"); 187 | console.log(parameters.name); 188 | 189 | if (!parameters || !parameters.name) 190 | return originalQuery(parameters); 191 | return Promise.resolve({ 192 | state: "denied", 193 | permission: "default" 194 | }); 195 | } 196 | ''') 197 | 198 | 199 | # Fake standard visiblity checks 200 | await page.evaluateRawOnNewDocument(''' 201 | // https://adtechmadness.wordpress.com/2019/03/14/spoofing-viewability-measurements-technical-examples/ 202 | Object.defineProperty(Document.prototype, "hasFocus", { 203 | value: function hasFocus(document) { 204 | return true; 205 | }, 206 | }); 207 | Object.defineProperty(Document.prototype, "hidden", { 208 | get: () => false, 209 | }); 210 | Object.defineProperty(Document.prototype, "visiblityState", { 211 | get: () => "visible", 212 | }); 213 | // window.locationbar.visible, window.menubar.visible, window.personalbar.visible, window.scrollbars.visible, window.statusbar.visible, window.toolbar.visible 214 | Object.defineProperty(BarProp.prototype, "visible", { 215 | get: () => true, 216 | }); 217 | Object.defineProperty(Document.prototype, "onvisiblitychange", { 218 | set: (params) => function () {}, // ignore visiblity changes even when an event handler is registered 219 | }); 220 | ''') 221 | 222 | # Set plugins to Chrome's 223 | await page.evaluateRawOnNewDocument(''' 224 | /* global MimeType MimeTypeArray PluginArray */ 225 | 226 | const fakeData = { 227 | mimeTypes: [ 228 | { 229 | type: "application/pdf", 230 | suffixes: "pdf", 231 | description: "Portable Document Format", 232 | __pluginName: "Chrome PDF Viewer", 233 | }, 234 | { 235 | type: "text/pdf", 236 | suffixes: "pdf", 237 | description: "Portable Document Format", 238 | __pluginName: "Chrome PDF Viewer", 239 | }, 240 | ], 241 | plugins: [ 242 | { 243 | name: "Chrome PDF Viewer", 244 | filename: "internal-pdf-viewer", 245 | description: "Portable Document Format", 246 | }, 247 | { 248 | name: "Widevine Content Decryption Module", 249 | filename: "widevinecdmadapter", 250 | description: "Enables Widevine licenses for playback of HTML audio/video content.", 251 | }, 252 | { 253 | name: "Chrome Remote Desktop Viewer", 254 | filename: "remoting viewer", 255 | description: "", 256 | }, 257 | ], 258 | fns: { 259 | namedItem: (instanceName) => { 260 | // Returns the Plugin/MimeType with the specified name. 261 | return function namedItem(name) { 262 | if (!arguments.length) { 263 | throw new TypeError( 264 | `Failed to execute 'namedItem' on '${instanceName}': 1 argument required, but only 0 present.`, 265 | ); 266 | } 267 | return this[name] || null; 268 | }; 269 | }, 270 | item: (instanceName) => { 271 | // Returns the Plugin/MimeType at the specified index into the array. 272 | return function item(index) { 273 | if (!arguments.length) { 274 | throw new TypeError( 275 | `Failed to execute 'namedItem' on '${instanceName}': 1 argument required, but only 0 present.`, 276 | ); 277 | } 278 | return this[index] || null; 279 | }; 280 | }, 281 | refresh: (instanceName) => { 282 | // Refreshes all plugins on the current page, optionally reloading documents. 283 | return function refresh() { 284 | return undefined; 285 | }; 286 | }, 287 | }, 288 | }; 289 | // Poor mans _.pluck 290 | const getSubset = (keys, obj) => 291 | keys.reduce((a, c) => ({...a, [c]: obj[c]}), {}); 292 | 293 | function generateMimeTypeArray() { 294 | const arr = fakeData.mimeTypes 295 | .map((obj) => 296 | getSubset(["type", "suffixes", "description"], obj), 297 | ) 298 | .map((obj) => Object.setPrototypeOf(obj, MimeType.prototype)); 299 | arr.forEach((obj) => { 300 | Object.defineProperty(arr, obj.type, { 301 | value: obj, 302 | enumerable: false, // make sure its not enumerable or distil networks will put duplicates in their list 303 | }); 304 | }); 305 | 306 | // Mock functions 307 | arr.namedItem = fakeData.fns.namedItem("MimeTypeArray"); 308 | arr.item = fakeData.fns.item("MimeTypeArray"); 309 | 310 | return Object.setPrototypeOf(arr, MimeTypeArray.prototype); 311 | } 312 | 313 | const mimeTypeArray = generateMimeTypeArray(); 314 | Object.defineProperty(Object.getPrototypeOf(navigator), "mimeTypes", { 315 | get: () => mimeTypeArray, 316 | }); 317 | 318 | function generatePluginArray() { 319 | const arr = fakeData.plugins 320 | .map((obj) => 321 | getSubset(["name", "filename", "description"], obj), 322 | ) 323 | .map((obj) => { 324 | const mimes = fakeData.mimeTypes.filter( 325 | (m) => m.__pluginName === obj.name, 326 | ); 327 | // Add mimetypes 328 | mimes.forEach((mime, index) => { 329 | navigator.mimeTypes[mime.type].enabledPlugin = obj; 330 | obj[mime.type] = navigator.mimeTypes[mime.type]; 331 | obj[index] = navigator.mimeTypes[mime.type]; 332 | }); 333 | obj.length = mimes.length; 334 | return obj; 335 | }) 336 | .map((obj) => { 337 | // Mock functions 338 | obj.namedItem = fakeData.fns.namedItem("Plugin"); 339 | obj.item = fakeData.fns.item("Plugin"); 340 | return obj; 341 | }) 342 | .map((obj) => Object.setPrototypeOf(obj, Plugin.prototype)); 343 | arr.forEach((obj) => { 344 | Object.defineProperty(arr, obj.name, { 345 | value: obj, 346 | enumerable: false, // make sure its not enumerable or distil networks will put duplicates in their list 347 | }); 348 | }); 349 | 350 | // Mock functions 351 | arr.namedItem = fakeData.fns.namedItem("PluginArray"); 352 | arr.item = fakeData.fns.item("PluginArray"); 353 | arr.refresh = fakeData.fns.refresh("PluginArray"); 354 | 355 | return Object.setPrototypeOf(arr, PluginArray.prototype); 356 | } 357 | 358 | const pluginArray = generatePluginArray(); 359 | Object.defineProperty(Object.getPrototypeOf(navigator), "plugins", { 360 | get: () => pluginArray, 361 | }); 362 | ''') 363 | 364 | 365 | # Fake resolution info 366 | await page.evaluateRawOnNewDocument(''' 367 | const resolution = { 368 | width: 1366, 369 | height: 768, 370 | } 371 | Object.defineProperty(Screen.prototype, "width", { 372 | get: () => resolution.width, 373 | }); 374 | Object.defineProperty(Screen.prototype, "height", { 375 | get: () => resolution.height, 376 | }); 377 | Object.defineProperty(Screen.prototype, "availWidth", { 378 | get: () => resolution.width, 379 | }); 380 | Object.defineProperty(Screen.prototype, "availHeight", { 381 | get: () => resolution.height, 382 | }); 383 | 384 | Object.defineProperty(window, "innerWidth", { 385 | get: () => resolution.width, 386 | }); 387 | Object.defineProperty(window, "innerHeight", { 388 | get: () => resolution.height - 72, 389 | }); 390 | Object.defineProperty(window, "outerWidth", { 391 | get: () => resolution.width, 392 | }); 393 | Object.defineProperty(window, "outerHeight", { 394 | get: () => resolution.height, 395 | }); 396 | Object.defineProperty(HTMLHtmlElement.prototype, "clientWidth", { 397 | get: () => window.innerWidth, 398 | }); 399 | Object.defineProperty(HTMLHtmlElement.prototype, "clientHeight", { 400 | get: () => window.innerHeight, 401 | }); 402 | 403 | // Fake min-width based resolution checks 404 | const originalMatchMedia = window.matchMedia; 405 | Object.defineProperty(window, "matchMedia", { 406 | value: function matchMedia(query) { 407 | var lowerQuery = query.toLowerCase(); 408 | var result = originalMatchMedia(query); 409 | if (lowerQuery.includes("min-width")) { 410 | Object.defineProperty(result, "matches", { 411 | get: () => true, 412 | }); 413 | } 414 | 415 | return result; 416 | }, 417 | }); 418 | ''') 419 | 420 | # Codec support 421 | await page.evaluateRawOnNewDocument(''' 422 | // ACCEPTED CODECS UNUSED 423 | const acceptedCodecs = [ 424 | 'audio/aac;codecs="mp4a.E1"', 425 | 'audio/m4a; codecs="aac"', 426 | 'audio/mp4;codecs="mp4a.40.2"', 427 | 'audio/mpeg;codecs="mp3"', 428 | 'audio/ogg;codecs=flac', 429 | 'audio/ogg;codecs=vorbis', 430 | 'video/3gpp2;codecs="mp4v.20.9, mp4a.E1"', 431 | 'video/mp4;codecs="avc1.42E01E"', 432 | 'video/mp4;codecs="avc1.42E01E, mp4a.40.2"', 433 | 'video/mp4;codecs="avc1.4D401E, mp4a.40.2"', 434 | 'video/mp4;codecs="avc1.58A01E, mp4a.40.2"', 435 | 'video/mp4;codecs="avc1.64001E, mp4a.40.2"', 436 | 'video/ogg;codecs="theora, vorbis"', 437 | 'video/webm; codecs="vorbis,vp8"', 438 | ]; 439 | 440 | Object.defineProperty(HTMLVideoElement.prototype, "canPlayType", { 441 | value: function canPlayType(codec) { 442 | codec = codec.toLowerCase(); 443 | if ( 444 | codec.includes("ogg") || 445 | codec.includes("mp4") || 446 | codec.includes("h264") || 447 | codec.includes("webm") || 448 | codec.includes("mp3") || 449 | codec.includes("mpeg") 450 | ) { 451 | return "probably"; 452 | } else if (codec.includes("wav")) { 453 | return "maybe"; 454 | } else { 455 | return ""; 456 | } 457 | }, 458 | }); 459 | Object.defineProperty(HTMLAudioElement.prototype, "canPlayType", { 460 | value: function canPlayType(codec) { 461 | codec = codec.toLowerCase(); 462 | if ( 463 | codec.includes("aac") || 464 | codec.includes("mp3") || 465 | codec.includes("mp4") || 466 | codec.includes("mpeg") || 467 | codec.includes("ogg") || 468 | codec.includes("webm") || 469 | codec.includes("wav") 470 | ) { 471 | return "probably"; 472 | } else if (codec.includes("m4a")) { 473 | return "maybe"; 474 | } else { 475 | return ""; 476 | } 477 | }, 478 | }); 479 | ''') 480 | 481 | # Standard desktop screen orientation 482 | await page.evaluateRawOnNewDocument(''' 483 | Object.defineProperty(ScreenOrientation.prototype, "type", { 484 | get: () => "landscape-primary", 485 | }); 486 | Object.defineProperty(ScreenOrientation.prototype, "angle", { 487 | get: () => 0, 488 | }); 489 | ''') 490 | 491 | # Fix HTMLElement animate toString (Puppeteer doesn't make it native code for some reason) 492 | await page.evaluateRawOnNewDocument(''' 493 | const oldAnimate = HTMLElement.prototype.animate; 494 | Object.defineProperty(HTMLElement.prototype, "animate", { 495 | value: function animate(parameters) { 496 | return oldAnimate(this, parameters); 497 | }, 498 | }); 499 | ''') 500 | 501 | await page.evaluateRawOnNewDocument(''' 502 | WebGLRenderingContext.prototype.getParameter = (function getParameter( 503 | originalFunction, 504 | ) { 505 | // https://gist.github.com/cvan/042b2448fcecefafbb6a91469484cdf8?permalink_comment_id=4685136#gistcomment-4685136 506 | const paramMap = {} 507 | // UNMASKED_VENDOR_WEBGL 508 | paramMap[0x9245] = "Google Inc. (Intel)"; 509 | // UNMASKED_RENDERER_WEBGL 510 | paramMap[0x9246] = 511 | "ANGLE (Intel, Mesa Intel(R) UHD Graphics (CML GT2), OpenGL 4.6)"; 512 | // VENDOR 513 | paramMap[0x1f00] = "WebKit"; 514 | // RENDERER 515 | paramMap[0x1f01] = "WebKit WebGL"; 516 | // VERSION 517 | paramMap[0x1f02] = "WebGL 1.0 (OpenGL ES 2.0 Chromium)"; 518 | 519 | return function getParameter(parameter) { 520 | return ( 521 | paramMap[parameter] || 522 | originalFunction.call(this, parameter) 523 | ); 524 | } 525 | })(WebGLRenderingContext.prototype.getParameter); 526 | ''') 527 | 528 | 529 | # Overwrite iframe window object so we don't have to reapply the above evasions for every iframe 530 | # Stolen from https://github.com/berstend/puppeteer-extra/blob/ceca9c6fed0a9f39d6c80b71fd413f3656ebb704/packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/index.js 531 | # Cant get this one to work 532 | # await page.evaluateRawOnNewDocument(''' 533 | # try { 534 | # // Adds a contentWindow proxy to the provided iframe element 535 | # const addContentWindowProxy = (iframe) => { 536 | # const contentWindowProxy = { 537 | # get(target, key) { 538 | # // Now to the interesting part: 539 | # // We actually make this thing behave like a regular iframe window, 540 | # // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :) 541 | # // That makes it possible for these assertions to be correct: 542 | # // iframe.contentWindow.self === window.top // must be false 543 | # if (key === "self") { 544 | # return this; 545 | # } 546 | # // iframe.contentWindow.frameElement === iframe // must be true 547 | # if (key === "frameElement") { 548 | # return iframe; 549 | # } 550 | # return Reflect.get(target, key); 551 | # }, 552 | # } 553 | # 554 | # if (!iframe.contentWindow) { 555 | # const proxy = new Proxy(window, contentWindowProxy); 556 | # Object.defineProperty(iframe, "contentWindow", { 557 | # get() { 558 | # return proxy; 559 | # }, 560 | # set(newValue) { 561 | # return newValue; // contentWindow is immutable 562 | # }, 563 | # enumerable: true, 564 | # configurable: false, 565 | # }); 566 | # } 567 | # } 568 | # 569 | # // Handles iframe element creation, augments `srcdoc` property so we can intercept further 570 | # const handleIframeCreation = (target, thisArg, args) => { 571 | # const iframe = target.apply(thisArg, args); 572 | # 573 | # // We need to keep the originals around 574 | # const _iframe = iframe; 575 | # const _srcdoc = _iframe.srcdoc; 576 | # 577 | # // Add hook for the srcdoc property 578 | # // We need to be very surgical here to not break other iframes by accident 579 | # Object.defineProperty(iframe, "srcdoc", { 580 | # configurable: true, // Important, so we can reset this later 581 | # get: function () { 582 | # return _iframe.srcdoc; 583 | # }, 584 | # set: function (newValue) { 585 | # addContentWindowProxy(this); 586 | # // Reset property, the hook is only needed once 587 | # Object.defineProperty(iframe, "srcdoc", { 588 | # configurable: false, 589 | # writable: false, 590 | # value: _srcdoc, 591 | # }); 592 | # _iframe.srcdoc = newValue; 593 | # }, 594 | # }); 595 | # return iframe; 596 | # } 597 | # 598 | # // Adds a hook to intercept iframe creation events 599 | # const addIframeCreationSniffer = 600 | # /* global document */ 601 | # const createElement = { 602 | # // Make toString() native 603 | # get(target, key) { 604 | # return Reflect.get(target, key); 605 | # }, 606 | # apply: function (target, thisArg, args) { 607 | # const isIframe = 608 | # args && 609 | # args.length && 610 | # `${args[0]}`.toLowerCase() === "iframe"; 611 | # if (!isIframe) { 612 | # // Everything as usual 613 | # return target.apply(thisArg, args); 614 | # } else { 615 | # return handleIframeCreation(target, thisArg, args); 616 | # } 617 | # }, 618 | # } 619 | # // All this just due to iframes with srcdoc bug 620 | # document.createElement = new Proxy( 621 | # document.createElement, 622 | # createElement, 623 | # ); 624 | # } 625 | # 626 | # // Let's go 627 | # addIframeCreationSniffer(); 628 | # } catch (err) {} 629 | # ''') 630 | 631 | # disable alert since it blocks 632 | await page.evaluateRawOnNewDocument(''' 633 | Object.defineProperty(window, "alert", { 634 | value: function alert(parameter) { 635 | return undefined; 636 | }, 637 | }); 638 | ''') 639 | 640 | # default broken image test 641 | await page.evaluateRawOnNewDocument(''' 642 | ["height", "width"].forEach((property) => { 643 | // store the existing descriptor 644 | const imageDescriptor = Object.getOwnPropertyDescriptor( 645 | HTMLImageElement.prototype, 646 | property, 647 | ); 648 | 649 | // redefine the property with a patched descriptor 650 | Object.defineProperty(HTMLImageElement.prototype, property, { 651 | ...imageDescriptor, 652 | get: function () { 653 | // return an arbitrary non-zero dimension if the image failed to load 654 | if (this.complete && this.naturalHeight == 0) { 655 | return 16; 656 | } 657 | // otherwise, return the actual dimension 658 | return imageDescriptor.get.apply(this); 659 | }, 660 | }); 661 | }); 662 | ''') 663 | 664 | 665 | # Evade toString detection 666 | # 3/oct/'24 - Seemed to cause some sites to crash that ran NextJS etc 667 | # await page.evaluateRawOnNewDocument(''' 668 | # // Spoofs the toString output of the following functions to native code. If you spoof another function, add it to this list. 669 | # var functionList = [ 670 | # Permissions.prototype.query, 671 | # window.alert, 672 | # Document.prototype.hasFocus, 673 | # WebGLRenderingContext.prototype.getParameter, 674 | # navigator.mimeTypes.item, 675 | # navigator.mimeTypes.namedItem, 676 | # navigator.plugins.refresh, 677 | # HTMLVideoElement.prototype.canPlayType, 678 | # HTMLAudioElement.prototype.canPlayType, 679 | # window.matchMedia, 680 | # Object.getOwnPropertyDescriptor(Screen.prototype, "height").get, 681 | # Object.getOwnPropertyDescriptor(Screen.prototype, "width").get, 682 | # Object.getOwnPropertyDescriptor(Screen.prototype, "availHeight") 683 | # .get, 684 | # Object.getOwnPropertyDescriptor(ScreenOrientation.prototype, "type") 685 | # .get, 686 | # Object.getOwnPropertyDescriptor( 687 | # ScreenOrientation.prototype, 688 | # "angle", 689 | # ).get, 690 | # Object.getOwnPropertyDescriptor(Screen.prototype, "availWidth").get, 691 | # Object.getOwnPropertyDescriptor(Document.prototype, "hidden").get, 692 | # Object.getOwnPropertyDescriptor( 693 | # Document.prototype, 694 | # "visiblityState", 695 | # ).get, 696 | # Object.getOwnPropertyDescriptor(BarProp.prototype, "visible").get, 697 | # Object.getOwnPropertyDescriptor(Navigator.prototype, "mimeTypes") 698 | # .get, 699 | # Object.getOwnPropertyDescriptor(Navigator.prototype, "plugins").get, 700 | # Object.getOwnPropertyDescriptor(Navigator.prototype, "languages") 701 | # .get, 702 | # Object.getOwnPropertyDescriptor(window, "innerWidth").get, 703 | # Object.getOwnPropertyDescriptor(window, "innerHeight").get, 704 | # Object.getOwnPropertyDescriptor(window, "outerWidth").get, 705 | # Object.getOwnPropertyDescriptor(window, "outerHeight").get, 706 | # Object.getOwnPropertyDescriptor( 707 | # HTMLHtmlElement.prototype, 708 | # "clientWidth", 709 | # ).get, 710 | # Object.getOwnPropertyDescriptor( 711 | # HTMLHtmlElement.prototype, 712 | # "clientHeight", 713 | # ).get, 714 | # Object.getOwnPropertyDescriptor(HTMLImageElement.prototype, "width") 715 | # .get, 716 | # Object.getOwnPropertyDescriptor( 717 | # HTMLImageElement.prototype, 718 | # "height", 719 | # ).get, 720 | # HTMLElement.prototype.animate, 721 | # window.chrome.csi, 722 | # window.chrome.loadTimes, 723 | # window.chrome.app.getDetails, 724 | # window.chrome.app.getIsInstalled, 725 | # window.chrome.app.runningState, 726 | # Object.getOwnPropertyDescriptor(window.chrome.app, "isInstalled") 727 | # .get, 728 | # document.createElement, 729 | # ]; 730 | # 731 | # // Undetecable toString modification - https://adtechmadness.wordpress.com/2019/03/23/javascript-tampering-detection-and-stealth/ */ 732 | # var toStringProxy = new Proxy(Function.prototype.toString, { 733 | # apply: function toString(target, thisArg, args) { 734 | # // Special functions we make always return "native code" 735 | # // NOTE: This depends on the functions being named (see hasFocus example). Anonymous functions will not work (or at least will not show the proper output) because their name attribute is equal to "". 736 | # if (functionList.includes(thisArg)) { 737 | # return "function " + thisArg.name + "() { [native code] }"; 738 | # } else { 739 | # return target.call(thisArg); 740 | # } 741 | # }, 742 | # }); 743 | # 744 | # Function.prototype.toString = toStringProxy; 745 | # functionList.push(Function.prototype.toString); // now that its modified, we can add it 746 | # ''') 747 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pyppeteerstealth" 3 | version = "0.0.5" 4 | description = "A general collection of robot-evading tweaks for pyppeteer-ng" 5 | readme = 'README.md' 6 | license = "MIT" 7 | homepage = "https://github.com/dgtlmoon/pyppeteerstealth" 8 | repository = "https://github.com/dgtlmoon/pyppeteerstealth" 9 | keywords=['pyppeteer', 'puppeteer', 'chrome', 'chromium', 'pyppeteerstealth', 'stealth', 'robot', 'captcha'] 10 | authors = [ 11 | ] 12 | classifiers = [ 13 | 'Development Status :: 3 - Alpha', 14 | 'Intended Audience :: Developers', 15 | 'License :: OSI Approved :: MIT License', 16 | 'Natural Language :: English', 17 | 'Programming Language :: Python :: 3.9' 18 | ] 19 | packages = [ 20 | { include = "pyppeteerstealth" }, 21 | ] 22 | include = [ 23 | "README.md" 24 | ] 25 | exclude = [ 26 | '*/__pycache__', 27 | '*/*.py[co]', 28 | ] 29 | 30 | [tool.poetry.urls] 31 | "Bug Tracker" = "https://github.com/dgtlmoon/pyppeteerstealth/issues" 32 | 33 | [tool.poetry.scripts] 34 | pyppeteer-install = 'pyppeteerstealth.command:install' 35 | 36 | 37 | [tool.black] 38 | line-length = 120 39 | target-version = ['py36', 'py37', 'py38', 'py39', 'py310'] 40 | skip-string-normalization = true 41 | 42 | [tool.isort] 43 | line_length = 120 44 | multi_line_output = 3 45 | include_trailing_comma = true 46 | force_grid_wrap = 0 47 | use_parentheses = true 48 | known_third_party = [] 49 | 50 | [build-system] 51 | requires = ["poetry>=0.12"] 52 | build-backend = "poetry.masonry.api" --------------------------------------------------------------------------------