├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── README.md
├── entry.sh
├── requirements.txt
└── src
    ├── captcha_bypass.py
    └── test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | *.pyc
3 | *.mp3
4 | *.wav
5 | 
6 | __pycache__


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | os: linux
 4 | dist: xenial
 5 | 
 6 | services:
 7 |   - docker
 8 | 
 9 | before_install:
10 |   - docker build -t captcha-bypass .
11 |   - docker images
12 | 
13 | script:
14 |   - docker run -it captcha-bypass
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine
 2 | 
 3 | RUN apk update
 4 | RUN apk add chromium chromium-chromedriver ffmpeg python3 py3-pip flac
 5 | 
 6 | COPY . /mnt/
 7 | 
 8 | RUN pip install -r /mnt/requirements.txt
 9 | 
10 | COPY entry.sh /entry.sh
11 | 
12 | WORKDIR /mnt/src
13 | 
14 | CMD [ "sh", "/entry.sh" ]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 1337
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # python-captcha-bypass
 2 | 
 3 | [![License](https://img.shields.io/github/license/threadexio/python-captcha-bypass?style=for-the-badge)](https://github.com/threadexio/python-captcha-bypass/blob/master/LICENSE)
 4 | [![Tests](https://img.shields.io/travis/com/threadexio/python-captcha-bypass?label=Tests&logo=python&logoColor=yellow&style=for-the-badge)](https://app.travis-ci.com/github/threadexio/python-captcha-bypass)
 5 | 
 6 | A small and harmless utility written in Python used to solve CAPTCHAs with Selenium.
 7 | 
 8 | # How to use
 9 | 
10 | 1. Clone this repo
11 | ```bash
12 | git clone https://github.com/threadexio/python-captcha-bypass
13 | ```
14 | 
15 | 2. Copy `src/captcha_bypass.py` to your project
16 | 
17 | 3. Import with `from captcha_bypass import solve_captcha`
18 | 
19 | -------
20 | 
21 | # Dependencies:
22 | - python3
23 | 
24 | - chromium (or Google Chrome, others might work but are not tested)
25 |   * Windows:  `https://www.chromium.org/getting-involved/download-chromium`
26 |   * Linux:
27 |     - Debian-based: `sudo apt-get install chromium`
28 |     - Arch-based:   `sudo pacman -S chromium`
29 |     - Fedora-based: `sudo dnf install chromium`
30 | 
31 | - ffmpeg
32 |   * Windows:  `https://www.ffmpeg.org/download.html`
33 |   * Linux:
34 |     - Debian-based: `sudo apt-get install ffmpeg`
35 |     - Arch-based:   `sudo pacman -S ffmpeg`
36 |     - Fedora-based: `sudo dnf install ffmpeg`
37 | 
38 | -------
39 | 
40 | # Docs
41 | 
42 | ```python
43 | solve_captcha(browser, captcha)
44 | ```
45 | `browser`: is the active webdriver instance (`selenium.webdriver`)
46 | 
47 | `captcha`: is a reference to the CAPTCHA's iframe
48 | 
49 | ### See `src/test.py` for a code example
50 | 
51 | -------
52 | 
53 | ### Legal Disclaimer
54 | This was made for educational purposes only, nobody directly involved in this project is responsible for any damages caused. You are responsible for your actions
55 | 


--------------------------------------------------------------------------------
/entry.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | python3 test.py
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | SpeechRecognition
2 | requests
3 | selenium
4 | pydub


--------------------------------------------------------------------------------
/src/captcha_bypass.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Python Captcha Bypass
  3 | # https://github.com/threadexio/python-captcha-bypass
  4 | #
  5 | #	MIT License
  6 | #
  7 | 
  8 | from enum import Enum
  9 | from typing import Tuple
 10 | from pydub import AudioSegment
 11 | import speech_recognition as sr
 12 | import tempfile
 13 | import requests
 14 | import time
 15 | import os
 16 | 
 17 | from selenium.webdriver.support.ui import WebDriverWait
 18 | from selenium.webdriver.support import expected_conditions as EC
 19 | from selenium.webdriver.common.by import By
 20 | from selenium.common.exceptions import TimeoutException
 21 | import selenium
 22 | 
 23 | 
 24 | class status(Enum):
 25 |     SUCCESS = 0
 26 |     UNKNOWN = 1
 27 |     RATELIMITED = 2
 28 |     TIMEOUT = 3
 29 | 
 30 | 
 31 | class NotExistent(Exception):
 32 |     """
 33 |     This exception is used internally
 34 |     """
 35 |     err = None
 36 | 
 37 |     def __init__(self, error, *args: object) -> None:
 38 |         super().__init__(*args)
 39 | 
 40 |         self.err = error
 41 | 
 42 | 
 43 | def solve_captcha(driver: selenium.webdriver, iframe, t=5):
 44 |     """Solve the given captcha
 45 | 
 46 | #### Args:
 47 |     `driver` (`selenium.webdriver`): The active webdriver instance
 48 |     `iframe` (`any`): A reference to the captcha's iframe
 49 |     `t` (`int`, optional): Page load timeout (in seconds). Defaults to 5.
 50 | 
 51 | #### Returns:
 52 |         `Tuple(int, str)`: Error code (0 on success) and the answer (empty if error)
 53 |     """
 54 | 
 55 |     ret = None
 56 |     tmp_dir = tempfile.gettempdir()
 57 |     mp3_file = os.path.join(tmp_dir, "_tmp.mp3")
 58 |     wav_file = os.path.join(tmp_dir, "_tmp.wav")
 59 |     tmp_files = [mp3_file, wav_file]
 60 | 
 61 |     # Switch current context
 62 |     driver.switch_to.frame(iframe)
 63 | 
 64 |     # Click the checkbox
 65 |     wait_for_elem(driver, By.CLASS_NAME,
 66 |                   "recaptcha-checkbox-border", t).click()
 67 | 
 68 |     # Switch back to the main page
 69 |     # cause the actual captcha window
 70 |     # is on another iframe
 71 |     driver.switch_to.default_content()
 72 | 
 73 |     try:
 74 |         driver.switch_to.frame(wait_for_elem(
 75 |             driver, By.XPATH, '//iframe[@title="recaptcha challenge"]', t))
 76 | 
 77 |         # Get the audio challenge instead
 78 |         wait_for_elem(driver, By.ID, "recaptcha-audio-button", t).click()
 79 | 
 80 |         # Download & convert the file
 81 |         download_link = is_elem_present(
 82 |             driver, By.CLASS_NAME, "rc-audiochallenge-tdownload-link", t)
 83 | 
 84 |         if not download_link:
 85 |             raise NotExistent(status.RATELIMITED)
 86 | 
 87 |         with open(mp3_file, "wb") as f:
 88 |             link = download_link.get_attribute("href")
 89 |             r = requests.get(link, allow_redirects=True)
 90 |             f.write(r.content)
 91 |             f.close()
 92 | 
 93 |         # Convert to wav here
 94 |         AudioSegment.from_mp3(mp3_file).export(wav_file, format="wav")
 95 | 
 96 |         # Using google's own api against them
 97 |         recognizer = sr.Recognizer()
 98 | 
 99 |         with sr.AudioFile(wav_file) as source:
100 |             recorded_audio = recognizer.listen(source)
101 |             text = recognizer.recognize_google(recorded_audio)
102 | 
103 |         # Type out the answer
104 |         wait_for_elem(driver, By.ID, "audio-response", t).send_keys(text)
105 | 
106 |         # Click the "Verify" button to complete
107 |         wait_for_elem(driver, By.ID, "recaptcha-verify-button", t).click()
108 | 
109 |         # Return the text used for the answer
110 |         ret = (status.SUCCESS, text)
111 | 
112 |     except TimeoutException as e:
113 |         ret = (status.TIMEOUT, "")
114 | 
115 |     except NotExistent as e:
116 |         # If we encounter the "Your computer is sending automated requests...", catch here and return the appropriate error
117 |         ret = (e.err, "")
118 | 
119 |     except Exception as e:
120 |         print(e)
121 |         ret = (status.UNKNOWN, "")
122 | 
123 |     finally:
124 |         __cleanup(tmp_files)
125 |         return ret
126 | 
127 | 
128 | def __cleanup(files: list):
129 |     for x in files:
130 |         if os.path.exists(x):
131 |             os.remove(x)
132 | 
133 | 
134 | def wait_for_elem(driver: selenium.webdriver, locator_type: str, locator: str, timeout: int):
135 |     """
136 |     Simple wrapper around selenium's find_element -- added a simple mechanism to wait until the element we want is present. Use try/except with `selenium.common.exceptions.TimeoutException`
137 |     """
138 |     return WebDriverWait(driver, timeout).until(EC.presence_of_element_located((locator_type, locator)))
139 | 
140 | 
141 | def is_elem_present(driver: selenium.webdriver, locator_type: str, locator: str, timeout: int):
142 |     """
143 |     Check if an element is present or wait for a timeout. Return the element if present otherwise False
144 |     """
145 |     try:
146 |         return wait_for_elem(driver, locator_type, locator, timeout)
147 |     except TimeoutException:
148 |         return False
149 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from time import sleep
 3 | import selenium
 4 | 
 5 | import captcha_bypass
 6 | 
 7 | # Selenium browser setup
 8 | options = webdriver.ChromeOptions()
 9 | 
10 | 
11 | # Headless?
12 | options.add_argument("--headless")
13 | 
14 | options.add_argument("--no-sandbox")
15 | options.add_argument("--disable-dev-shm-usage")
16 | 
17 | browser = webdriver.Chrome(options=options)
18 | 
19 | browser.get("https://www.google.com/recaptcha/api2/demo")
20 | 
21 | 
22 | 
23 | # Filter through all the iframes on the page and find the one that corresponds to the captcha
24 | iframes = browser.find_elements_by_tag_name("iframe")
25 | for iframe in iframes:
26 |     if iframe.get_attribute("src").startswith("https://www.google.com/recaptcha/api2/anchor"):
27 |         captcha = iframe
28 | 
29 | result = captcha_bypass.solve_captcha(browser, captcha)
30 | 
31 | # do error checking here
32 | 
33 | """
34 | # are we ratelimited?
35 | if result[0] == captcha_bypass.status.RATELIMITED:
36 |     # do stuff here
37 | 
38 | # is the network or the server too slow?
39 | elif result[0] == captcha_bypass.status.TIMEOUT
40 |     # do stuff here
41 | 
42 | else:
43 | """
44 | 
45 | if result:
46 |     print(result)
47 |     exit(0)
48 | else:
49 |     print("Failed!")
50 |     exit(1)
51 | 


--------------------------------------------------------------------------------