├── assets └── solver.gif ├── main.py ├── readme.md └── requirements.txt /assets/solver.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Binit-Dhakal/Google-reCAPTCHA-v2-solver-using-playwright-python/b4fa56490cd7307bd1baf0083678cecc23be8243/assets/solver.gif -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # for playwright 2 | from playwright.sync_api import sync_playwright, TimeoutError 3 | from playwright_stealth import stealth_sync 4 | import datetime 5 | import json 6 | 7 | # for recaptcha 8 | import urllib 9 | import pydub 10 | from speech_recognition import Recognizer, AudioFile 11 | import random 12 | import os 13 | 14 | configs = { 15 | 'CHROME_BUNDLE': '/home/binit/driver/chrome-linux/chrome', 16 | 'HEADLESS': 'false', 17 | } 18 | 19 | 20 | def browsersetup(p): 21 | headless = True if configs["HEADLESS"] == "true" else False 22 | 23 | args = [ 24 | '--deny-permission-prompts', 25 | '--no-default-browser-check', 26 | '--no-first-run', 27 | '--deny-permission-prompts', 28 | '--disable-popup-blocking', 29 | '--ignore-certificate-errors', 30 | '--no-service-autorun', 31 | '--password-store=basic', 32 | '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36', 33 | '--window-size=640,480', 34 | '--disable-audio-output' 35 | ] 36 | browser = p.chromium.launch( 37 | headless=headless, executable_path=configs["CHROME_BUNDLE"], args=args) 38 | 39 | return browser 40 | 41 | 42 | class SolveCaptcha: 43 | def __init__(self, page): 44 | self.page = page 45 | self.main_frame = None 46 | self.recaptcha = None 47 | 48 | def delay(self): 49 | self.page.wait_for_timeout(random.randint(1, 3) * 1000) 50 | 51 | def presetup(self): 52 | name = self.page.locator( 53 | "//iframe[@title='reCAPTCHA']").get_attribute("name") 54 | self.recaptcha = self.page.frame(name=name) 55 | 56 | self.recaptcha.click("//div[@class='recaptcha-checkbox-border']") 57 | self.delay() 58 | s = self.recaptcha.locator("//span[@id='recaptcha-anchor']") 59 | if s.get_attribute("aria-checked") != "false": # solved already 60 | return 61 | 62 | self.main_frame = self.page.frame(name=page.locator( 63 | "//iframe[contains(@src,'https://www.google.com/recaptcha/api2/bframe?')]").get_attribute("name")) 64 | self.main_frame.click("id=recaptcha-audio-button") 65 | 66 | def start(self): 67 | self.presetup() 68 | tries = 0 69 | while (tries <= 5): 70 | self.delay() 71 | try: 72 | self.solve_captcha() 73 | except Exception as e: 74 | print(e) 75 | self.main_frame.click("id=recaptcha-reload-button") 76 | else: 77 | s = self.recaptcha.locator("//span[@id='recaptcha-anchor']") 78 | if s.get_attribute("aria-checked") != "false": 79 | self.page.click("id=recaptcha-demo-submit") 80 | self.delay() 81 | break 82 | tries += 1 83 | 84 | def solve_captcha(self): 85 | self.main_frame.click( 86 | "//button[@aria-labelledby='audio-instructions rc-response-label']") 87 | href = self.main_frame.locator( 88 | "//a[@class='rc-audiochallenge-tdownload-link']").get_attribute("href") 89 | 90 | urllib.request.urlretrieve(href, "audio.mp3") 91 | 92 | sound = pydub.AudioSegment.from_mp3( 93 | "audio.mp3").export("audio.wav", format="wav") 94 | 95 | recognizer = Recognizer() 96 | 97 | recaptcha_audio = AudioFile("audio.wav") 98 | with recaptcha_audio as source: 99 | audio = recognizer.record(source) 100 | 101 | text = recognizer.recognize_google(audio) 102 | print(text) 103 | self.main_frame.fill("id=audio-response", text) 104 | self.main_frame.click("id=recaptcha-verify-button") 105 | self.delay() 106 | 107 | def __del__(self): 108 | os.remove("audio.mp3") 109 | os.remove("audio.wav") 110 | 111 | 112 | if __name__ == "__main__": 113 | with sync_playwright() as p: 114 | browser = browsersetup(p) 115 | context = browser.new_context( 116 | record_video_dir="videos/", 117 | record_video_size={"width": 640, "height": 480} 118 | ) 119 | page = context.new_page() 120 | stealth_sync(page) 121 | 122 | try: 123 | page.goto("https://www.google.com/recaptcha/api2/demo") 124 | captcha_solver = SolveCaptcha(page) 125 | captcha_solver.start() 126 | del captcha_solver 127 | except Exception as e: 128 | print(e) 129 | browser.close() 130 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Google — reCAPTCHA v3 solver using playwright-python 2 | This script solve the google recaptcha using playwright-python with pydub and speech recognization. 3 | 4 | ![](assets/solver.gif) 5 | 6 | ## Motivation 7 | I hate captcha(as a scraper) 8 | 9 | ## Installation required 10 | - Install all the library we will need 11 | ```bash 12 | pip3 install -r requirements.txt 13 | ``` 14 | - We also have to run 15 | ```bash 16 | playwright install 17 | ``` 18 | to install browser bundle and everything for playwright. More [detail](https://playwright.dev/python/docs/intro) 19 | 20 | 21 | ## Run script 22 | ```bash 23 | python3 main.py 24 | ``` 25 | 26 | ## 27 | 28 | ## Notes 29 | This script tries for 5 times only but we can increase that for how much long we want. 30 | 31 | ## Future release plan 32 | - [ ] More testing on sites other than https://www.google.com/recaptcha/api2/demo 33 | - [ ] More corner cases catching 34 | - [ ] Find more accurate algorithm or library to solve the audio captcha problem 35 | 36 | ## Credits 37 | I will like to give all the credit to this blog https://medium.com/geekculture/how-to-solve-google-recaptcha-v3-with-python-9f92bb0212bf by Romik Kelesh(thank you). There he have written script in selenium and i found that working with iframe in selenium is such a pain. So I used one of my favourite weapon for web scraping - playwright. 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | playwright==1.17.0 2 | playwright_stealth==1.0.5 3 | pydub==0.22.1 4 | SpeechRecognition==3.8.1 5 | --------------------------------------------------------------------------------