├── Dockerfile ├── DrissionPage_example.py ├── DrissionPage_example_docker.py ├── puppeteer-real-browser_example.js ├── readme.md ├── requirements.txt └── turnstilePatch ├── manifest.json └── script.js /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | RUN apt-get update && \ 3 | apt-get install -y python3 python3-pip wget xvfb 4 | 5 | RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \ 6 | sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list' && \ 7 | apt-get update && \ 8 | apt-get install -y google-chrome-stable 9 | 10 | COPY . . 11 | 12 | RUN pip install -r requirements.txt 13 | 14 | CMD ["python3", "DrissionPage_example_docker.py"] -------------------------------------------------------------------------------- /DrissionPage_example.py: -------------------------------------------------------------------------------- 1 | from DrissionPage import Chromium, ChromiumOptions 2 | import time 3 | import os 4 | 5 | co = ChromiumOptions() 6 | co.auto_port() 7 | 8 | co.set_timeouts(base=1) 9 | 10 | # change this to the path of the folder containing the extension 11 | EXTENSION_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "turnstilePatch")) 12 | co.add_extension(EXTENSION_PATH) 13 | 14 | # uncomment this if you want to use headless mode 15 | """ 16 | co.headless() 17 | 18 | from sys import platform 19 | if platform == "linux" or platform == "linux2": 20 | platformIdentifier = "X11; Linux x86_64" 21 | elif platform == "darwin": 22 | platformIdentifier = "Macintosh; Intel Mac OS X 10_15_7" 23 | elif platform == "win32": 24 | platformIdentifier = "Windows NT 10.0; Win64; x64" 25 | 26 | co.set_user_agent(f"Mozilla/5.0 ({platformIdentifier}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36") 27 | """ 28 | 29 | browser = Chromium(co) 30 | page = browser.get_tabs()[-1] 31 | page.get("https://turnstile.zeroclover.io/") 32 | 33 | def getTurnstileToken(): 34 | page.run_js("try { turnstile.reset() } catch(e) { }") 35 | 36 | turnstileResponse = None 37 | 38 | for i in range(0, 15): 39 | try: 40 | turnstileResponse = page.run_js("try { return turnstile.getResponse() } catch(e) { return null }") 41 | if turnstileResponse: 42 | return turnstileResponse 43 | 44 | challengeSolution = page.ele("@name=cf-turnstile-response") 45 | challengeWrapper = challengeSolution.parent() 46 | challengeIframe = challengeWrapper.shadow_root.ele("tag:iframe") 47 | challengeIframeBody = challengeIframe.ele("tag:body").shadow_root 48 | challengeButton = challengeIframeBody.ele("tag:input") 49 | challengeButton.click() 50 | except: 51 | pass 52 | time.sleep(1) 53 | page.refresh() 54 | raise Exception("failed to solve turnstile") 55 | 56 | while True: 57 | print(getTurnstileToken()) -------------------------------------------------------------------------------- /DrissionPage_example_docker.py: -------------------------------------------------------------------------------- 1 | from DrissionPage import Chromium, ChromiumOptions 2 | import time 3 | import os 4 | from pyvirtualdisplay import Display 5 | 6 | display = Display(size=(1920, 1080)) 7 | display.start() 8 | 9 | co = ChromiumOptions() 10 | co.set_argument("--no-sandbox") 11 | co.auto_port() 12 | co.set_timeouts(base=1) 13 | EXTENSION_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "turnstilePatch")) 14 | co.add_extension(EXTENSION_PATH) 15 | 16 | # headless is optional 17 | co.headless() 18 | co.set_user_agent(f"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36") 19 | 20 | browser = Chromium(co) 21 | page = browser.get_tabs()[-1] 22 | page.get("https://turnstile.zeroclover.io/") 23 | 24 | def getTurnstileToken(): 25 | page.run_js("try { turnstile.reset() } catch(e) { }") 26 | 27 | turnstileResponse = None 28 | 29 | for i in range(0, 15): 30 | try: 31 | turnstileResponse = page.run_js("try { return turnstile.getResponse() } catch(e) { return null }") 32 | if turnstileResponse: 33 | return turnstileResponse 34 | 35 | challengeSolution = page.ele("@name=cf-turnstile-response") 36 | challengeWrapper = challengeSolution.parent() 37 | challengeIframe = challengeWrapper.shadow_root.ele("tag:iframe") 38 | challengeIframeBody = challengeIframe.ele("tag:body").shadow_root 39 | challengeButton = challengeIframeBody.ele("tag:input") 40 | challengeButton.click() 41 | except: 42 | pass 43 | time.sleep(1) 44 | page.refresh() 45 | raise Exception("failed to solve turnstile") 46 | 47 | print(getTurnstileToken()) -------------------------------------------------------------------------------- /puppeteer-real-browser_example.js: -------------------------------------------------------------------------------- 1 | async function main() { 2 | const { connect } = await import('puppeteer-real-browser'); 3 | 4 | // not needed anymore cuz library already does it automatically 😉👌 5 | //const EXTENSION_PATH = `${__dirname}/turnstilePatch/`; 6 | 7 | const { page, browser } = await connect({ 8 | /* 9 | args: [ 10 | `--disable-extensions-except=${EXTENSION_PATH}`, 11 | `--load-extension=${EXTENSION_PATH}` 12 | ], 13 | */ 14 | turnstile: true, 15 | }); 16 | page.goto('https://nopecha.com/demo/cloudflare'); 17 | } 18 | 19 | main() -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # CDP bug MouseEvent .screenX .screenY patcher 2 | ### supports both headless and headful 3 | 4 | ## bug that this fixes: https://issues.chromium.org/issues/40280325 5 | When the CDP command `Input.dispatchMouseEvent` is ran, the MouseEvent/PointerEvent created will have a "fake" property for .screenX and .screenY (they will be same as the .x and .y properties respectively). Cloudflare Turnstile is able to detect this and will mark you as a bot, and you won't be able to get past (Interstitial) Turnstile. 6 | 7 | This extension fixes this by providing fake values for .screenX and .screenY. One downside of this is that now even real clicks will use fake values. However manually clicking in an automated browser is mostly non-existant so its fine, and .screenX and .screenY are rarely used anyways, so it should be fine. Let me know if it breaks something and I'll see if I can fix it. 8 | 9 | TL;DR: Chrome has a bug allowing fake clicks to be detected. This extension fixes those detections so you can continue scraping. 10 | 11 | ## how to use 12 | Load the extension in [./turnstilePatch/](/turnstilePatch/) 13 | 14 | tested libraries: 15 | - [DrissionPage](https://github.com/g1879/DrissionPage) ([example](/DrissionPage_example.py)) 16 | - ~~[puppeteer-real-browser](https://github.com/zfcsoftware/puppeteer-real-browser) ([example](/puppeteer-real-browser_example.js))~~ (already directly implemented in library) 17 | 18 | ## Docker: 19 | - [Dockerfile](/Dockerfile) 20 | - [Docker code version](/DrissionPage_example_docker.py) 21 | 22 | ## status 23 | Gets past Cloudflare Turnstile 24 | 25 | ![turnstile success](https://files.catbox.moe/hx2i15.gif) 26 | 27 | ## for the memes 28 | ![based web scraper gigachad](https://files.catbox.moe/sgou1o.png) 29 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | DrissionPage==4.1.0.9 2 | PyVirtualDisplay==3.0 3 | -------------------------------------------------------------------------------- /turnstilePatch/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 3, 3 | "name": "Turnstile Patcher", 4 | "version": "2.1", 5 | "content_scripts": [ 6 | { 7 | "js": [ 8 | "./script.js" 9 | ], 10 | "matches": [ 11 | "" 12 | ], 13 | "run_at": "document_start", 14 | "all_frames": true, 15 | "world": "MAIN" 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /turnstilePatch/script.js: -------------------------------------------------------------------------------- 1 | function getRandomInt(min, max) { 2 | return Math.floor(Math.random() * (max - min + 1)) + min; 3 | } 4 | 5 | // old method wouldn't work on 4k screens 6 | 7 | let screenX = getRandomInt(800, 1200); 8 | let screenY = getRandomInt(400, 600); 9 | 10 | Object.defineProperty(MouseEvent.prototype, 'screenX', { value: screenX }); 11 | 12 | Object.defineProperty(MouseEvent.prototype, 'screenY', { value: screenY }); --------------------------------------------------------------------------------