├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── entry.sh ├── requirements.txt └── src ├── captcha_bypass.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.pyc 3 | *.mp3 4 | *.wav 5 | 6 | __pycache__ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | os: linux 4 | dist: xenial 5 | 6 | services: 7 | - docker 8 | 9 | before_install: 10 | - docker build -t captcha-bypass . 11 | - docker images 12 | 13 | script: 14 | - docker run -it captcha-bypass 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | RUN apk update 4 | RUN apk add chromium chromium-chromedriver ffmpeg python3 py3-pip flac 5 | 6 | COPY . /mnt/ 7 | 8 | RUN pip install -r /mnt/requirements.txt 9 | 10 | COPY entry.sh /entry.sh 11 | 12 | WORKDIR /mnt/src 13 | 14 | CMD [ "sh", "/entry.sh" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 1337 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-captcha-bypass 2 | 3 | [![License](https://img.shields.io/github/license/threadexio/python-captcha-bypass?style=for-the-badge)](https://github.com/threadexio/python-captcha-bypass/blob/master/LICENSE) 4 | [![Tests](https://img.shields.io/travis/com/threadexio/python-captcha-bypass?label=Tests&logo=python&logoColor=yellow&style=for-the-badge)](https://app.travis-ci.com/github/threadexio/python-captcha-bypass) 5 | 6 | A small and harmless utility written in Python used to solve CAPTCHAs with Selenium. 7 | 8 | # How to use 9 | 10 | 1. Clone this repo 11 | ```bash 12 | git clone https://github.com/threadexio/python-captcha-bypass 13 | ``` 14 | 15 | 2. Copy `src/captcha_bypass.py` to your project 16 | 17 | 3. Import with `from captcha_bypass import solve_captcha` 18 | 19 | ------- 20 | 21 | # Dependencies: 22 | - python3 23 | 24 | - chromium (or Google Chrome, others might work but are not tested) 25 | * Windows: `https://www.chromium.org/getting-involved/download-chromium` 26 | * Linux: 27 | - Debian-based: `sudo apt-get install chromium` 28 | - Arch-based: `sudo pacman -S chromium` 29 | - Fedora-based: `sudo dnf install chromium` 30 | 31 | - ffmpeg 32 | * Windows: `https://www.ffmpeg.org/download.html` 33 | * Linux: 34 | - Debian-based: `sudo apt-get install ffmpeg` 35 | - Arch-based: `sudo pacman -S ffmpeg` 36 | - Fedora-based: `sudo dnf install ffmpeg` 37 | 38 | ------- 39 | 40 | # Docs 41 | 42 | ```python 43 | solve_captcha(browser, captcha) 44 | ``` 45 | `browser`: is the active webdriver instance (`selenium.webdriver`) 46 | 47 | `captcha`: is a reference to the CAPTCHA's iframe 48 | 49 | ### See `src/test.py` for a code example 50 | 51 | ------- 52 | 53 | ### Legal Disclaimer 54 | This was made for educational purposes only, nobody directly involved in this project is responsible for any damages caused. You are responsible for your actions 55 | -------------------------------------------------------------------------------- /entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python3 test.py 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | SpeechRecognition 2 | requests 3 | selenium 4 | pydub -------------------------------------------------------------------------------- /src/captcha_bypass.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python Captcha Bypass 3 | # https://github.com/threadexio/python-captcha-bypass 4 | # 5 | # MIT License 6 | # 7 | 8 | from enum import Enum 9 | from typing import Tuple 10 | from pydub import AudioSegment 11 | import speech_recognition as sr 12 | import tempfile 13 | import requests 14 | import time 15 | import os 16 | 17 | from selenium.webdriver.support.ui import WebDriverWait 18 | from selenium.webdriver.support import expected_conditions as EC 19 | from selenium.webdriver.common.by import By 20 | from selenium.common.exceptions import TimeoutException 21 | import selenium 22 | 23 | 24 | class status(Enum): 25 | SUCCESS = 0 26 | UNKNOWN = 1 27 | RATELIMITED = 2 28 | TIMEOUT = 3 29 | 30 | 31 | class NotExistent(Exception): 32 | """ 33 | This exception is used internally 34 | """ 35 | err = None 36 | 37 | def __init__(self, error, *args: object) -> None: 38 | super().__init__(*args) 39 | 40 | self.err = error 41 | 42 | 43 | def solve_captcha(driver: selenium.webdriver, iframe, t=5): 44 | """Solve the given captcha 45 | 46 | #### Args: 47 | `driver` (`selenium.webdriver`): The active webdriver instance 48 | `iframe` (`any`): A reference to the captcha's iframe 49 | `t` (`int`, optional): Page load timeout (in seconds). Defaults to 5. 50 | 51 | #### Returns: 52 | `Tuple(int, str)`: Error code (0 on success) and the answer (empty if error) 53 | """ 54 | 55 | ret = None 56 | tmp_dir = tempfile.gettempdir() 57 | mp3_file = os.path.join(tmp_dir, "_tmp.mp3") 58 | wav_file = os.path.join(tmp_dir, "_tmp.wav") 59 | tmp_files = [mp3_file, wav_file] 60 | 61 | # Switch current context 62 | driver.switch_to.frame(iframe) 63 | 64 | # Click the checkbox 65 | wait_for_elem(driver, By.CLASS_NAME, 66 | "recaptcha-checkbox-border", t).click() 67 | 68 | # Switch back to the main page 69 | # cause the actual captcha window 70 | # is on another iframe 71 | driver.switch_to.default_content() 72 | 73 | try: 74 | driver.switch_to.frame(wait_for_elem( 75 | driver, By.XPATH, '//iframe[@title="recaptcha challenge"]', t)) 76 | 77 | # Get the audio challenge instead 78 | wait_for_elem(driver, By.ID, "recaptcha-audio-button", t).click() 79 | 80 | # Download & convert the file 81 | download_link = is_elem_present( 82 | driver, By.CLASS_NAME, "rc-audiochallenge-tdownload-link", t) 83 | 84 | if not download_link: 85 | raise NotExistent(status.RATELIMITED) 86 | 87 | with open(mp3_file, "wb") as f: 88 | link = download_link.get_attribute("href") 89 | r = requests.get(link, allow_redirects=True) 90 | f.write(r.content) 91 | f.close() 92 | 93 | # Convert to wav here 94 | AudioSegment.from_mp3(mp3_file).export(wav_file, format="wav") 95 | 96 | # Using google's own api against them 97 | recognizer = sr.Recognizer() 98 | 99 | with sr.AudioFile(wav_file) as source: 100 | recorded_audio = recognizer.listen(source) 101 | text = recognizer.recognize_google(recorded_audio) 102 | 103 | # Type out the answer 104 | wait_for_elem(driver, By.ID, "audio-response", t).send_keys(text) 105 | 106 | # Click the "Verify" button to complete 107 | wait_for_elem(driver, By.ID, "recaptcha-verify-button", t).click() 108 | 109 | # Return the text used for the answer 110 | ret = (status.SUCCESS, text) 111 | 112 | except TimeoutException as e: 113 | ret = (status.TIMEOUT, "") 114 | 115 | except NotExistent as e: 116 | # If we encounter the "Your computer is sending automated requests...", catch here and return the appropriate error 117 | ret = (e.err, "") 118 | 119 | except Exception as e: 120 | print(e) 121 | ret = (status.UNKNOWN, "") 122 | 123 | finally: 124 | __cleanup(tmp_files) 125 | return ret 126 | 127 | 128 | def __cleanup(files: list): 129 | for x in files: 130 | if os.path.exists(x): 131 | os.remove(x) 132 | 133 | 134 | def wait_for_elem(driver: selenium.webdriver, locator_type: str, locator: str, timeout: int): 135 | """ 136 | Simple wrapper around selenium's find_element -- added a simple mechanism to wait until the element we want is present. Use try/except with `selenium.common.exceptions.TimeoutException` 137 | """ 138 | return WebDriverWait(driver, timeout).until(EC.presence_of_element_located((locator_type, locator))) 139 | 140 | 141 | def is_elem_present(driver: selenium.webdriver, locator_type: str, locator: str, timeout: int): 142 | """ 143 | Check if an element is present or wait for a timeout. Return the element if present otherwise False 144 | """ 145 | try: 146 | return wait_for_elem(driver, locator_type, locator, timeout) 147 | except TimeoutException: 148 | return False 149 | -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from time import sleep 3 | import selenium 4 | 5 | import captcha_bypass 6 | 7 | # Selenium browser setup 8 | options = webdriver.ChromeOptions() 9 | 10 | 11 | # Headless? 12 | options.add_argument("--headless") 13 | 14 | options.add_argument("--no-sandbox") 15 | options.add_argument("--disable-dev-shm-usage") 16 | 17 | browser = webdriver.Chrome(options=options) 18 | 19 | browser.get("https://www.google.com/recaptcha/api2/demo") 20 | 21 | 22 | 23 | # Filter through all the iframes on the page and find the one that corresponds to the captcha 24 | iframes = browser.find_elements_by_tag_name("iframe") 25 | for iframe in iframes: 26 | if iframe.get_attribute("src").startswith("https://www.google.com/recaptcha/api2/anchor"): 27 | captcha = iframe 28 | 29 | result = captcha_bypass.solve_captcha(browser, captcha) 30 | 31 | # do error checking here 32 | 33 | """ 34 | # are we ratelimited? 35 | if result[0] == captcha_bypass.status.RATELIMITED: 36 | # do stuff here 37 | 38 | # is the network or the server too slow? 39 | elif result[0] == captcha_bypass.status.TIMEOUT 40 | # do stuff here 41 | 42 | else: 43 | """ 44 | 45 | if result: 46 | print(result) 47 | exit(0) 48 | else: 49 | print("Failed!") 50 | exit(1) 51 | --------------------------------------------------------------------------------